From: "Sam James" <sam@gentoo.org>
To: gentoo-commits@lists.gentoo.org
Subject: [gentoo-commits] proj/gentoo-functions:master commit in: /
Date: Fri, 2 Aug 2024 23:14:12 +0000 (UTC) [thread overview]
Message-ID: <1722615672.282fbd3bc2cca32b1a83c28cb9649de46cf404da.sam@gentoo> (raw)
commit: 282fbd3bc2cca32b1a83c28cb9649de46cf404da
Author: Kerin Millar <kfm <AT> plushkava <DOT> net>
AuthorDate: Thu Aug 1 06:30:19 2024 +0000
Commit: Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Fri Aug 2 16:21:12 2024 +0000
URL: https://gitweb.gentoo.org/proj/gentoo-functions.git/commit/?id=282fbd3b
Render quote_args() robust and implement a test case
Coerce the effective character set as being C (US-ASCII) in the course
of executing awk(1). Some implementations are strict and will otherwise
fail in situations where the bytes cannot be decoded.
$ uname -o
Darwin
$ echo "$LC_ALL"
en_GB.UTF-8
$ printf '\200' | awk '/[\001-\037\177-\377]/'
awk: towc: multibyte conversion failure on: ''
In the above case, awk aborts because it has a need to decode the input,
which turns out not to be valid UTF-8. Now, it is rather beyond the
purview of quote_args() to guarantee that its parameters adhere to any
particular character encoding. Fortunately, for it to contend with
strings on a byte-by-byte basis is acceptable.
Refactor the code somewhat. The behaviour has been adjusted so to be
virtually identical to that of the "${*@Q}" expansion in bash, with the
exception that the ESC character is rendered as $'\e' instead of $'\E'.
Such an exception is necessary for POSIX-1.2024 conformance, wherein
dollar-single-quotes are now a standard feature (see section 2.2.4 of
the Shell Command Language).
Revise the comment preceding the function so as to accurately document
its behaviour.
Finally, add a test case. It works by calling quote_args for every
possible single-byte string before calculating a CRC checksum for the
cumulative output and comparing it against a pre-determined value.
Signed-off-by: Kerin Millar <kfm <AT> plushkava.net>
functions.sh | 65 ++++++++++++++++++++++++++++++++++++----------------------
test-functions | 20 ++++++++++++++++++
2 files changed, 60 insertions(+), 25 deletions(-)
diff --git a/functions.sh b/functions.sh
index faacdca..036e3a7 100644
--- a/functions.sh
+++ b/functions.sh
@@ -425,47 +425,62 @@ parallel_run()
#
# Prints the positional parameters in a format that may be reused as shell
# input. For each considered, it shall be determined whether its value contains
-# any non-printable characters in lieu of the US-ASCII character set. If no such
-# characters are found, the value shall have each instance of <apostrophe> be
-# replaced by <apostrophe><backslash><apostrophe><apostrophe> before being
-# enclosed by a pair of <apostrophe> characters. Otherwise, non-printable
-# characters shall be replaced by octal escape sequences, <apostrophe> by
-# <backslash><apostrophe> and <backslash> by <backslash><backslash>, prior to
-# the value being given a prefix of <dollar-sign><apostrophe> and a suffix of
-# <apostrophe>, per POSIX-1.2024. Finally, the resulting values shall be printed
-# as <space> separated. The latter quoting strategy can be suppressed by setting
-# the POSIXLY_CORRECT variable as non-empty in the environment.
+# any bytes that are either outside the scope of the US-ASCII character set or
+# which are considered as non-printable. If no such bytes are found, the value
+# shall have each instance of <apostrophe> be replaced by <apostrophe>
+# <backslash> <apostrophe> <apostrophe> before being enclosed by a pair of
+# <apostrophe> characters. However, as a special case, a value consisting of a
+# single <apostrophe> shall be replaced by <backslash> <apostrophe>.
+#
+# If any such bytes are found, the value shall instead be requoted in a manner
+# that conforms with section 2.2.4 of the Shell Command Language, wherein the
+# the use of dollar-single-quotes sequences is described. Such sequences are
+# standard as of POSIX-1.2024. However, as of August 2024, many implementations
+# lack support for this feature. So as to mitigate this state of affairs, the
+# use of dollar-single-quotes may be suppressed by setting POSIXLY_CORRECT as a
+# non-empty string.
#
quote_args()
{
- awk -v q=\' -f - -- "$@" <<-'EOF'
+ LC_ALL=C awk -v q=\' -f - -- "$@" <<-'EOF'
+ function init_table() {
+ # Iterate over ranges \001-\037 and \177-\377.
+ for (i = 1; i <= 255; i += (i == 31 ? 96 : 1)) {
+ char = sprintf("%c", i)
+ seq_by[char] = sprintf("%03o", i)
+ }
+ seq_by["\007"] = "a"
+ seq_by["\010"] = "b"
+ seq_by["\011"] = "t"
+ seq_by["\012"] = "n"
+ seq_by["\013"] = "v"
+ seq_by["\014"] = "f"
+ seq_by["\015"] = "r"
+ seq_by["\033"] = "e"
+ seq_by["\047"] = "'"
+ seq_by["\134"] = "\\"
+ }
BEGIN {
strictly_posix = length(ENVIRON["POSIXLY_CORRECT"])
argc = ARGC
ARGC = 1
for (arg_idx = 1; arg_idx < argc; arg_idx++) {
arg = ARGV[arg_idx]
- if (strictly_posix || arg !~ /[\001-\037\177]/) {
+ if (arg == q) {
+ word = "\\" q
+ } else if (strictly_posix || arg !~ /[\001-\037\177-\377]/) {
gsub(q, q "\\" q q, arg)
word = q arg q
} else {
- # Use $'' quoting per POSIX-1.2024
- if (! ("\001" in ord_by)) {
- for (i = 1; i < 32; i++) {
- char = sprintf("%c", i)
- ord_by[char] = i
- }
- ord_by["\177"] = 127
+ # Use $'' quoting per POSIX-1.2024.
+ if (! ("\001" in seq_by)) {
+ init_table()
}
word = "$'"
for (i = 1; i <= length(arg); i++) {
char = substr(arg, i, 1)
- if (char == "\\") {
- word = word "\\\\"
- } else if (char == q) {
- word = word "\\'"
- } else if (char in ord_by) {
- word = word "\\" sprintf("%03o", ord_by[char])
+ if (char in seq_by) {
+ word = word "\\" seq_by[char]
} else {
word = word char
}
diff --git a/test-functions b/test-functions
index f37477c..ef2aa98 100755
--- a/test-functions
+++ b/test-functions
@@ -882,6 +882,25 @@ test_contains_any() {
iterate_tests 5 "$@"
}
+test_quote_args() {
+ testnum=$((testnum + 1))
+ retval=0
+ i=0
+ while [ "$(( i += 1 ))" -le 255 ]; do
+ fmt=$(printf '\%o' "$i")
+ str=$(printf "$fmt.")
+ POSIXLY_CORRECT= quote_args "${str%.}" || break
+ done | cksum | {
+ read -r cksum _
+ if [ "${cksum}" != "380900690" ]; then
+ printf 'not '
+ retval=1
+ fi
+ printf 'ok %d - quote_args output test (expected cksum 380900690, got %s)\n' "${testnum}" "${cksum}"
+ return "${retval}"
+ }
+}
+
iterate_tests() {
slice_width=$1
shift
@@ -959,6 +978,7 @@ else
#test_substr || rc=1
test_contains_all || rc=1
test_contains_any || rc=1
+ test_quote_args || rc=1
fi
cleanup_tmpdir
next reply other threads:[~2024-08-02 23:14 UTC|newest]
Thread overview: 286+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-02 23:14 Sam James [this message]
-- strict thread matches above, loose matches on Subject: below --
2025-05-13 0:30 [gentoo-commits] proj/gentoo-functions:master commit in: / Sam James
2025-05-13 0:30 Sam James
2025-05-13 0:30 Sam James
2025-05-13 0:30 Sam James
2025-05-13 0:30 Sam James
2024-10-05 7:25 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-10-05 4:15 Sam James
2024-08-11 10:23 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-11 10:11 Sam James
2024-08-05 20:39 Sam James
2024-08-05 20:39 Sam James
2024-08-05 2:03 Sam James
2024-08-05 2:02 Sam James
2024-08-05 2:02 Sam James
2024-08-05 2:02 Sam James
2024-08-05 2:02 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-08-02 23:14 Sam James
2024-07-08 3:00 Sam James
2024-07-08 2:31 Sam James
2024-07-08 2:31 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-07-07 5:55 Sam James
2024-06-25 4:06 Sam James
2024-06-25 4:06 Sam James
2024-06-25 4:06 Sam James
2024-06-25 4:06 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-06-21 13:14 Sam James
2024-05-24 6:05 Sam James
2024-05-24 1:18 Sam James
2024-05-24 1:18 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-22 1:12 Sam James
2024-05-19 15:27 Sam James
2024-05-19 15:27 Sam James
2024-05-19 15:27 Sam James
2024-05-19 15:27 Sam James
2024-05-18 16:07 Sam James
2024-05-18 16:06 Sam James
2024-05-18 16:06 Sam James
2024-05-18 15:34 Sam James
2024-05-18 15:32 Sam James
2024-05-18 15:32 Sam James
2024-05-18 14:04 Sam James
2024-05-18 14:04 Sam James
2024-05-18 14:04 Sam James
2024-05-18 14:04 Sam James
2024-05-18 14:04 Sam James
2024-05-18 14:04 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-17 4:03 Sam James
2024-05-15 10:28 Sam James
2024-05-15 10:28 Sam James
2024-05-14 0:18 Sam James
2024-05-14 0:15 Sam James
2024-05-14 0:12 Sam James
2024-05-14 0:12 Sam James
2024-05-14 0:08 Sam James
2024-05-14 0:08 Sam James
2024-05-14 0:05 Sam James
2024-05-14 0:05 Sam James
2024-05-14 0:05 Sam James
2024-05-14 0:05 Sam James
2024-05-14 0:05 Sam James
2024-02-16 21:35 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-11 16:47 Sam James
2023-06-10 7:23 Sam James
2023-06-10 7:23 Sam James
2023-06-10 6:04 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-10 4:22 Sam James
2023-06-09 11:17 Sam James
2023-06-09 11:11 Sam James
2023-06-09 11:02 Sam James
2023-06-09 11:02 Sam James
2023-06-09 11:02 Sam James
2023-06-09 11:02 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-06-07 11:13 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-19 16:14 Sam James
2023-02-17 7:44 Sam James
2023-02-17 7:44 Sam James
2023-02-17 7:44 Sam James
2023-02-17 1:33 Sam James
2023-02-17 1:33 Sam James
2023-02-17 1:33 Sam James
2023-02-15 8:18 Sam James
2023-02-15 7:48 Sam James
2023-02-15 7:46 Sam James
2023-02-15 7:46 Sam James
2023-02-15 7:46 Sam James
2023-02-15 7:46 Sam James
2023-02-15 7:46 Sam James
2023-02-15 7:46 Sam James
2023-02-15 2:24 Sam James
2023-02-15 2:24 Sam James
2023-02-15 2:24 Sam James
2023-02-14 3:40 Sam James
2023-02-14 3:40 Sam James
2023-02-14 3:40 Sam James
2023-02-14 3:40 Sam James
2023-02-14 0:09 Sam James
2023-02-14 0:09 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-13 21:37 Sam James
2023-02-12 18:53 Sam James
2023-02-12 18:53 Sam James
2023-02-12 6:53 Sam James
2023-02-12 6:53 Sam James
2023-02-12 6:53 Sam James
2023-02-11 1:43 Sam James
2023-02-11 1:43 Sam James
2023-02-10 6:09 Sam James
2023-02-10 6:09 Sam James
2023-02-10 6:09 Sam James
2023-02-09 3:54 Sam James
2023-02-09 3:54 Sam James
2023-02-08 3:37 Sam James
2023-02-08 1:06 Sam James
2023-02-08 0:03 Sam James
2023-02-08 0:03 Sam James
2023-02-07 23:47 Sam James
2023-02-07 23:42 Sam James
2023-02-07 23:42 Sam James
2023-02-07 23:42 Sam James
2023-02-07 23:42 Sam James
2023-02-07 1:08 Sam James
2023-02-07 1:08 Sam James
2023-02-06 13:47 Sam James
2023-02-06 4:32 Sam James
2023-02-06 4:23 Sam James
2023-02-06 4:19 Sam James
2023-02-06 4:10 Sam James
2023-02-06 4:10 Sam James
2023-02-06 3:59 Sam James
2023-02-06 3:59 Sam James
2023-02-06 3:59 Sam James
2022-07-30 5:48 Sam James
2022-07-29 2:03 Sam James
2022-07-29 2:03 Sam James
2022-07-29 2:03 Sam James
2021-08-30 21:14 Mike Gilbert
2021-08-30 21:14 Mike Gilbert
2020-11-19 18:20 Mike Gilbert
2020-11-19 18:20 Mike Gilbert
2020-11-19 18:20 Mike Gilbert
2020-01-26 23:19 Mike Gilbert
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1722615672.282fbd3bc2cca32b1a83c28cb9649de46cf404da.sam@gentoo \
--to=sam@gentoo.org \
--cc=gentoo-commits@lists.gentoo.org \
--cc=gentoo-dev@lists.gentoo.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox