cat: with -E fix handling of \r\n spanning buffers

This commit is contained in:
liningjie 2023-11-28 09:34:48 +08:00
parent ab2fd67db6
commit 53b4f9d706
2 changed files with 182 additions and 1 deletions

View File

@ -0,0 +1,177 @@
From a8fcc3805260a017c0f5bf8e621f9f9f9147fb2e Mon Sep 17 00:00:00 2001
From: P¨¢draig Brady <P@draigBrady.com>
Date: Tue, 28 Nov 2023 10:22:52 +0800
Subject: [PATCH] cat: with -E fix handling of \r\n spanning buffers
We must delay handling when \r is the last character
of the buffer being processed, as the next character
may or may not be \n.
* src/cat.c (pending_cr): A new global to record whether
the last character processed (in -E mode) is '\r'.
(cat): Honor pending_cr when processing the start of the buffer.
(main): Honor pending_cr if no more files to process.
* tests/misc/cat-E.sh: Add test cases.
Fixes https://bugs.gnu.org/49925
---
src/cat.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
tests/local.mk | 1 +
tests/misc/cat-E.sh | 42 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+), 5 deletions(-)
create mode 100755 tests/misc/cat-E.sh
diff --git a/src/cat.c b/src/cat.c
index b132a7d..c5c8ccf 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -78,6 +78,9 @@ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
/* Preserves the 'cat' function's local 'newlines' between invocations. */
static int newlines2 = 0;
+/* Whether there is a pending CR to process. */
+static bool pending_cr = false;
+
void
usage (int status)
{
@@ -397,9 +400,16 @@ cat (
}
/* Output a currency symbol if requested (-e). */
-
if (show_ends)
- *bpout++ = '$';
+ {
+ if (pending_cr)
+ {
+ *bpout++ = '^';
+ *bpout++ = 'M';
+ pending_cr = false;
+ }
+ *bpout++ = '$';
+ }
/* Output the newline. */
@@ -409,6 +419,14 @@ cat (
}
while (ch == '\n');
+ /* Here CH cannot contain a newline character. */
+
+ if (pending_cr)
+ {
+ *bpout++ = '\r';
+ pending_cr = false;
+ }
+
/* Are we at the beginning of a line, and line numbers are requested? */
if (newlines >= 0 && number)
@@ -417,8 +435,6 @@ cat (
bpout = stpcpy (bpout, line_num_print);
}
- /* Here CH cannot contain a newline character. */
-
/* The loops below continue until a newline character is found,
which means that the buffer is empty or that a proper newline
has been found. */
@@ -486,7 +502,20 @@ cat (
*bpout++ = ch + 64;
}
else if (ch != '\n')
- *bpout++ = ch;
+ {
+ if (ch == '\r' && *bpin == '\n' && show_ends)
+ {
+ if (bpin == eob)
+ pending_cr = true;
+ else
+ {
+ *bpout++ = '^';
+ *bpout++ = 'M';
+ }
+ }
+ else
+ *bpout++ = ch;
+ }
else
{
newlines = -1;
@@ -760,6 +789,12 @@ main (int argc, char **argv)
}
while (++argind < argc);
+ if (pending_cr)
+ {
+ if (full_write (STDOUT_FILENO, "\r", 1) != 1)
+ die (EXIT_FAILURE, errno, _("write error"));
+ }
+
if (have_read_stdin && close (STDIN_FILENO) < 0)
die (EXIT_FAILURE, errno, _("closing standard input"));
diff --git a/tests/local.mk b/tests/local.mk
index 1af761e..64c62df 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -278,6 +278,7 @@ all_tests = \
tests/misc/wc-nbsp.sh \
tests/misc/wc-parallel.sh \
tests/misc/wc-proc.sh \
+ tests/misc/cat-E.sh \
tests/misc/cat-proc.sh \
tests/misc/cat-buf.sh \
tests/misc/cat-self.sh \
diff --git a/tests/misc/cat-E.sh b/tests/misc/cat-E.sh
new file mode 100755
index 0000000..1131eb3
--- /dev/null
+++ b/tests/misc/cat-E.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# Copyright (C) 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ cat
+
+# \r followed by \n is displayed as ^M$
+# Up to and including 8.32 the $ would have displayed at the start of the line
+# overwriting the first character
+printf 'a\rb\r\nc\n\r\nd\r' > 'in' || framework_failure_
+printf 'a\rb^M$\nc$\n^M$\nd\r' > 'exp' || framework_failure_
+cat -E 'in' > out || fail=1
+compare exp out || fail=1
+
+# Ensure \r\n spanning files (or buffers) is handled
+printf '1\r' > in2 || framework_failure_
+printf '\n2\r\n' > in2b || framework_failure_
+printf '1^M$\n2^M$\n' > 'exp' || framework_failure_
+cat -E 'in2' 'in2b' > out || fail=1
+compare exp out || fail=1
+
+# Ensure \r at end of buffer is handled
+printf '1\r' > in2 || framework_failure_
+printf '2\r\n' > in2b || framework_failure_
+printf '1\r2^M$\n' > 'exp' || framework_failure_
+cat -E 'in2' 'in2b' > out || fail=1
+compare exp out || fail=1
+
+Exit $fail
--
2.33.0

View File

@ -1,6 +1,6 @@
Name: coreutils
Version: 8.32
Release: 7
Release: 8
License: GPLv3+
Summary: A set of basic GNU tools commonly used in shell scripts
Url: https://www.gnu.org/software/coreutils/
@ -39,6 +39,7 @@ Patch21: backport-ls-avoid-triggering-automounts.patch
Patch22: backport-stat-only-automount-with-cached-never.patch
Patch23: test-skip-overlay-filesystem-because-of-no-inotify_add_watch.patch
Patch24: 0001-basenc-fix-bug49741-using-wrong-decoding-buffer-leng.patch
Patch25: 0002-cat-with-E-fix-handling-of-r-n-spanning-buffers.patch
Conflicts: filesystem < 3
# To avoid clobbering installs
@ -157,6 +158,9 @@ fi
%{_mandir}/man*/*
%changelog
* Tue Nov 28 2023 liningjie <liningjie@xfusion.com> - 8.32-8
- cat: with -E fix handling of \r\n spanning buffers
* Sat Nov 25 2023 liningjie <liningjie@xfusion.com> - 8.32-7
- fix baseenc using wrong decoding buffer length