diff options
author | Kerin Millar <kfm@plushkava.net> | 2024-08-01 07:30:19 +0100 |
---|---|---|
committer | Kerin Millar <kfm@plushkava.net> | 2024-08-02 17:21:12 +0100 |
commit | 282fbd3bc2cca32b1a83c28cb9649de46cf404da (patch) | |
tree | 072757464d5f9d04574d52d55d2c0df4462fe7d8 | |
parent | Treat EINFO_LOG as false if equal to RC_SERVICE (diff) | |
download | gentoo-functions-282fbd3bc2cca32b1a83c28cb9649de46cf404da.tar.gz gentoo-functions-282fbd3bc2cca32b1a83c28cb9649de46cf404da.tar.bz2 gentoo-functions-282fbd3bc2cca32b1a83c28cb9649de46cf404da.zip |
Render quote_args() robust and implement a test case
Coerce the effective character set as being C (US-ASCII) in the course
of executing awk(1). Some implementations are strict and will otherwise
fail in situations where the bytes cannot be decoded.
$ uname -o
Darwin
$ echo "$LC_ALL"
en_GB.UTF-8
$ printf '\200' | awk '/[\001-\037\177-\377]/'
awk: towc: multibyte conversion failure on: ''
In the above case, awk aborts because it has a need to decode the input,
which turns out not to be valid UTF-8. Now, it is rather beyond the
purview of quote_args() to guarantee that its parameters adhere to any
particular character encoding. Fortunately, for it to contend with
strings on a byte-by-byte basis is acceptable.
Refactor the code somewhat. The behaviour has been adjusted so to be
virtually identical to that of the "${*@Q}" expansion in bash, with the
exception that the ESC character is rendered as $'\e' instead of $'\E'.
Such an exception is necessary for POSIX-1.2024 conformance, wherein
dollar-single-quotes are now a standard feature (see section 2.2.4 of
the Shell Command Language).
Revise the comment preceding the function so as to accurately document
its behaviour.
Finally, add a test case. It works by calling quote_args for every
possible single-byte string before calculating a CRC checksum for the
cumulative output and comparing it against a pre-determined value.
Signed-off-by: Kerin Millar <kfm@plushkava.net>
-rw-r--r-- | functions.sh | 65 | ||||
-rwxr-xr-x | test-functions | 20 |
2 files changed, 60 insertions, 25 deletions
diff --git a/functions.sh b/functions.sh index faacdca..036e3a7 100644 --- a/functions.sh +++ b/functions.sh @@ -425,47 +425,62 @@ parallel_run() # # Prints the positional parameters in a format that may be reused as shell # input. For each considered, it shall be determined whether its value contains -# any non-printable characters in lieu of the US-ASCII character set. If no such -# characters are found, the value shall have each instance of <apostrophe> be -# replaced by <apostrophe><backslash><apostrophe><apostrophe> before being -# enclosed by a pair of <apostrophe> characters. Otherwise, non-printable -# characters shall be replaced by octal escape sequences, <apostrophe> by -# <backslash><apostrophe> and <backslash> by <backslash><backslash>, prior to -# the value being given a prefix of <dollar-sign><apostrophe> and a suffix of -# <apostrophe>, per POSIX-1.2024. Finally, the resulting values shall be printed -# as <space> separated. The latter quoting strategy can be suppressed by setting -# the POSIXLY_CORRECT variable as non-empty in the environment. +# any bytes that are either outside the scope of the US-ASCII character set or +# which are considered as non-printable. If no such bytes are found, the value +# shall have each instance of <apostrophe> be replaced by <apostrophe> +# <backslash> <apostrophe> <apostrophe> before being enclosed by a pair of +# <apostrophe> characters. However, as a special case, a value consisting of a +# single <apostrophe> shall be replaced by <backslash> <apostrophe>. +# +# If any such bytes are found, the value shall instead be requoted in a manner +# that conforms with section 2.2.4 of the Shell Command Language, wherein the +# the use of dollar-single-quotes sequences is described. Such sequences are +# standard as of POSIX-1.2024. However, as of August 2024, many implementations +# lack support for this feature. So as to mitigate this state of affairs, the +# use of dollar-single-quotes may be suppressed by setting POSIXLY_CORRECT as a +# non-empty string. # quote_args() { - awk -v q=\' -f - -- "$@" <<-'EOF' + LC_ALL=C awk -v q=\' -f - -- "$@" <<-'EOF' + function init_table() { + # Iterate over ranges \001-\037 and \177-\377. + for (i = 1; i <= 255; i += (i == 31 ? 96 : 1)) { + char = sprintf("%c", i) + seq_by[char] = sprintf("%03o", i) + } + seq_by["\007"] = "a" + seq_by["\010"] = "b" + seq_by["\011"] = "t" + seq_by["\012"] = "n" + seq_by["\013"] = "v" + seq_by["\014"] = "f" + seq_by["\015"] = "r" + seq_by["\033"] = "e" + seq_by["\047"] = "'" + seq_by["\134"] = "\\" + } BEGIN { strictly_posix = length(ENVIRON["POSIXLY_CORRECT"]) argc = ARGC ARGC = 1 for (arg_idx = 1; arg_idx < argc; arg_idx++) { arg = ARGV[arg_idx] - if (strictly_posix || arg !~ /[\001-\037\177]/) { + if (arg == q) { + word = "\\" q + } else if (strictly_posix || arg !~ /[\001-\037\177-\377]/) { gsub(q, q "\\" q q, arg) word = q arg q } else { - # Use $'' quoting per POSIX-1.2024 - if (! ("\001" in ord_by)) { - for (i = 1; i < 32; i++) { - char = sprintf("%c", i) - ord_by[char] = i - } - ord_by["\177"] = 127 + # Use $'' quoting per POSIX-1.2024. + if (! ("\001" in seq_by)) { + init_table() } word = "$'" for (i = 1; i <= length(arg); i++) { char = substr(arg, i, 1) - if (char == "\\") { - word = word "\\\\" - } else if (char == q) { - word = word "\\'" - } else if (char in ord_by) { - word = word "\\" sprintf("%03o", ord_by[char]) + if (char in seq_by) { + word = word "\\" seq_by[char] } else { word = word char } diff --git a/test-functions b/test-functions index f37477c..ef2aa98 100755 --- a/test-functions +++ b/test-functions @@ -882,6 +882,25 @@ test_contains_any() { iterate_tests 5 "$@" } +test_quote_args() { + testnum=$((testnum + 1)) + retval=0 + i=0 + while [ "$(( i += 1 ))" -le 255 ]; do + fmt=$(printf '\%o' "$i") + str=$(printf "$fmt.") + POSIXLY_CORRECT= quote_args "${str%.}" || break + done | cksum | { + read -r cksum _ + if [ "${cksum}" != "380900690" ]; then + printf 'not ' + retval=1 + fi + printf 'ok %d - quote_args output test (expected cksum 380900690, got %s)\n' "${testnum}" "${cksum}" + return "${retval}" + } +} + iterate_tests() { slice_width=$1 shift @@ -959,6 +978,7 @@ else #test_substr || rc=1 test_contains_all || rc=1 test_contains_any || rc=1 + test_quote_args || rc=1 fi cleanup_tmpdir |