coreutils/0002-cat-with-E-fix-handling-of-r-n-spanning-buffers.patch

178 lines
5.6 KiB
Diff

From a8fcc3805260a017c0f5bf8e621f9f9f9147fb2e Mon Sep 17 00:00:00 2001
From: P¨¢draig Brady <P@draigBrady.com>
Date: Tue, 28 Nov 2023 10:22:52 +0800
Subject: [PATCH] cat: with -E fix handling of \r\n spanning buffers
We must delay handling when \r is the last character
of the buffer being processed, as the next character
may or may not be \n.
* src/cat.c (pending_cr): A new global to record whether
the last character processed (in -E mode) is '\r'.
(cat): Honor pending_cr when processing the start of the buffer.
(main): Honor pending_cr if no more files to process.
* tests/misc/cat-E.sh: Add test cases.
Fixes https://bugs.gnu.org/49925
---
src/cat.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
tests/local.mk | 1 +
tests/misc/cat-E.sh | 42 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 83 insertions(+), 5 deletions(-)
create mode 100755 tests/misc/cat-E.sh
diff --git a/src/cat.c b/src/cat.c
index b132a7d..c5c8ccf 100644
--- a/src/cat.c
+++ b/src/cat.c
@@ -78,6 +78,9 @@ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
/* Preserves the 'cat' function's local 'newlines' between invocations. */
static int newlines2 = 0;
+/* Whether there is a pending CR to process. */
+static bool pending_cr = false;
+
void
usage (int status)
{
@@ -397,9 +400,16 @@ cat (
}
/* Output a currency symbol if requested (-e). */
-
if (show_ends)
- *bpout++ = '$';
+ {
+ if (pending_cr)
+ {
+ *bpout++ = '^';
+ *bpout++ = 'M';
+ pending_cr = false;
+ }
+ *bpout++ = '$';
+ }
/* Output the newline. */
@@ -409,6 +419,14 @@ cat (
}
while (ch == '\n');
+ /* Here CH cannot contain a newline character. */
+
+ if (pending_cr)
+ {
+ *bpout++ = '\r';
+ pending_cr = false;
+ }
+
/* Are we at the beginning of a line, and line numbers are requested? */
if (newlines >= 0 && number)
@@ -417,8 +435,6 @@ cat (
bpout = stpcpy (bpout, line_num_print);
}
- /* Here CH cannot contain a newline character. */
-
/* The loops below continue until a newline character is found,
which means that the buffer is empty or that a proper newline
has been found. */
@@ -486,7 +502,20 @@ cat (
*bpout++ = ch + 64;
}
else if (ch != '\n')
- *bpout++ = ch;
+ {
+ if (ch == '\r' && *bpin == '\n' && show_ends)
+ {
+ if (bpin == eob)
+ pending_cr = true;
+ else
+ {
+ *bpout++ = '^';
+ *bpout++ = 'M';
+ }
+ }
+ else
+ *bpout++ = ch;
+ }
else
{
newlines = -1;
@@ -760,6 +789,12 @@ main (int argc, char **argv)
}
while (++argind < argc);
+ if (pending_cr)
+ {
+ if (full_write (STDOUT_FILENO, "\r", 1) != 1)
+ die (EXIT_FAILURE, errno, _("write error"));
+ }
+
if (have_read_stdin && close (STDIN_FILENO) < 0)
die (EXIT_FAILURE, errno, _("closing standard input"));
diff --git a/tests/local.mk b/tests/local.mk
index 1af761e..64c62df 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -278,6 +278,7 @@ all_tests = \
tests/misc/wc-nbsp.sh \
tests/misc/wc-parallel.sh \
tests/misc/wc-proc.sh \
+ tests/misc/cat-E.sh \
tests/misc/cat-proc.sh \
tests/misc/cat-buf.sh \
tests/misc/cat-self.sh \
diff --git a/tests/misc/cat-E.sh b/tests/misc/cat-E.sh
new file mode 100755
index 0000000..1131eb3
--- /dev/null
+++ b/tests/misc/cat-E.sh
@@ -0,0 +1,42 @@
+#!/bin/sh
+# Copyright (C) 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ cat
+
+# \r followed by \n is displayed as ^M$
+# Up to and including 8.32 the $ would have displayed at the start of the line
+# overwriting the first character
+printf 'a\rb\r\nc\n\r\nd\r' > 'in' || framework_failure_
+printf 'a\rb^M$\nc$\n^M$\nd\r' > 'exp' || framework_failure_
+cat -E 'in' > out || fail=1
+compare exp out || fail=1
+
+# Ensure \r\n spanning files (or buffers) is handled
+printf '1\r' > in2 || framework_failure_
+printf '\n2\r\n' > in2b || framework_failure_
+printf '1^M$\n2^M$\n' > 'exp' || framework_failure_
+cat -E 'in2' 'in2b' > out || fail=1
+compare exp out || fail=1
+
+# Ensure \r at end of buffer is handled
+printf '1\r' > in2 || framework_failure_
+printf '2\r\n' > in2b || framework_failure_
+printf '1\r2^M$\n' > 'exp' || framework_failure_
+cat -E 'in2' 'in2b' > out || fail=1
+compare exp out || fail=1
+
+Exit $fail
--
2.33.0