Fix ras-mc-ctl script

This commit is contained in:
shixuantong 2023-03-21 20:19:18 +08:00
parent b7af7e8dde
commit b3ff53efe4
3 changed files with 515 additions and 1 deletions

View File

@ -0,0 +1,454 @@
From 546cf713f667437fb6e283cc3dc090679eb47d08 Mon Sep 17 00:00:00 2001
From: Subhendu Saha <subhends@akamai.com>
Date: Tue, 12 Jan 2021 03:29:55 -0500
Subject: [PATCH] Fix ras-mc-ctl script.
When rasdaemon is compiled without enabling aer, mce, devlink,
etc., those tables are not created in the database file. Then
ras-mc-ctl script breaks trying to query data from non-existent
tables.
Signed-off-by: Subhendu Saha subhends@akamai.com
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
util/ras-mc-ctl.in | 384 ++++++++++++++++++++++++---------------------
1 file changed, 208 insertions(+), 176 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 665a042..be9d983 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -41,6 +41,18 @@ my $sysconfdir = "@sysconfdir@";
my $dmidecode = find_prog ("dmidecode");
my $modprobe = find_prog ("modprobe") or exit (1);
+my $has_aer = 0;
+my $has_devlink = 0;
+my $has_disk_errors = 0;
+my $has_extlog = 0;
+my $has_mce = 0;
+
+@WITH_AER_TRUE@$has_aer = 1;
+@WITH_DEVLINK_TRUE@$has_devlink = 1;
+@WITH_DISKERROR_TRUE@$has_disk_errors = 1;
+@WITH_EXTLOG_TRUE@$has_extlog = 1;
+@WITH_MCE_TRUE@$has_mce = 1;
+
my %conf = ();
my %bus = ();
my %dimm_size = ();
@@ -1143,86 +1155,96 @@ sub summary
$query_handle->finish;
# PCIe AER aer_event errors
- $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($err_type, $msg, $count));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "\t$count $err_type errors: $msg\n";
- }
- if ($out ne "") {
- print "PCIe AER events summary:\n$out\n";
- } else {
- print "No PCIe AER errors.\n\n";
+ if ($has_aer == 1) {
+ $query = "select err_type, err_msg, count(*) from aer_event group by err_type, err_msg";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($err_type, $msg, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "\t$count $err_type errors: $msg\n";
+ }
+ if ($out ne "") {
+ print "PCIe AER events summary:\n$out\n";
+ } else {
+ print "No PCIe AER errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# extlog errors
- $query = "select etype, severity, count(*) from extlog_event group by etype, severity";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($etype, $severity, $count));
- $out = "";
- while($query_handle->fetch()) {
- $etype_string = get_extlog_type($etype);
- $severity_string = get_extlog_severity($severity);
- $out .= "\t$count $etype_string $severity_string errors\n";
- }
- if ($out ne "") {
- print "Extlog records summary:\n$out";
- } else {
- print "No Extlog errors.\n\n";
+ if ($has_extlog == 1) {
+ $query = "select etype, severity, count(*) from extlog_event group by etype, severity";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($etype, $severity, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $etype_string = get_extlog_type($etype);
+ $severity_string = get_extlog_severity($severity);
+ $out .= "\t$count $etype_string $severity_string errors\n";
+ }
+ if ($out ne "") {
+ print "Extlog records summary:\n$out";
+ } else {
+ print "No Extlog errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# devlink errors
- $query = "select dev_name, count(*) from devlink_event group by dev_name";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($dev_name, $count));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "\t$dev_name has $count errors\n";
- }
- if ($out ne "") {
- print "Devlink records summary:\n$out";
- } else {
- print "No devlink errors.\n";
+ if ($has_devlink == 1) {
+ $query = "select dev_name, count(*) from devlink_event group by dev_name";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($dev_name, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "\t$dev_name has $count errors\n";
+ }
+ if ($out ne "") {
+ print "Devlink records summary:\n$out";
+ } else {
+ print "No devlink errors.\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# Disk errors
- $query = "select dev, count(*) from disk_errors group by dev";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($dev, $count));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "\t$dev has $count errors\n";
- }
- if ($out ne "") {
- print "Disk errors summary:\n$out";
- } else {
- print "No disk errors.\n";
+ if ($has_disk_errors == 1) {
+ $query = "select dev, count(*) from disk_errors group by dev";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($dev, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "\t$dev has $count errors\n";
+ }
+ if ($out ne "") {
+ print "Disk errors summary:\n$out";
+ } else {
+ print "No disk errors.\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# MCE mce_record errors
- $query = "select error_msg, count(*) from mce_record group by error_msg";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($msg, $count));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "\t$count $msg errors\n";
- }
- if ($out ne "") {
- print "MCE records summary:\n$out";
- } else {
- print "No MCE errors.\n";
+ if ($has_mce == 1) {
+ $query = "select error_msg, count(*) from mce_record group by error_msg";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($msg, $count));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "\t$count $msg errors\n";
+ }
+ if ($out ne "") {
+ print "MCE records summary:\n$out";
+ } else {
+ print "No MCE errors.\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
undef($dbh);
}
@@ -1259,128 +1281,138 @@ sub errors
$query_handle->finish;
# PCIe AER aer_event errors
- $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($id, $time, $devname, $type, $msg));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "$id $time $devname $type error: $msg\n";
- }
- if ($out ne "") {
- print "PCIe AER events:\n$out\n";
- } else {
- print "No PCIe AER errors.\n\n";
+ if ($has_aer == 1) {
+ $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $time, $devname, $type, $msg));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "$id $time $devname $type error: $msg\n";
+ }
+ if ($out ne "") {
+ print "PCIe AER events:\n$out\n";
+ } else {
+ print "No PCIe AER errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# Extlog errors
- $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data));
- $out = "";
- while($query_handle->fetch()) {
- $etype_string = get_extlog_type($etype);
- $severity_string = get_extlog_severity($severity);
- $out .= "$id $timestamp error: ";
- $out .= "type=$etype_string, ";
- $out .= "severity=$severity_string, ";
- $out .= sprintf "address=0x%08x, ", $addr;
- $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id);
- $out .= "fru_text='$fru_text', ";
- $out .= get_cper_data_text($cper_data) if ($cper_data);
- $out .= "\n";
- }
- if ($out ne "") {
- print "Extlog events:\n$out\n";
- } else {
- print "No Extlog errors.\n\n";
+ if ($has_extlog == 1) {
+ $query = "select id, timestamp, etype, severity, address, fru_id, fru_text, cper_data from extlog_event order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $timestamp, $etype, $severity, $addr, $fru_id, $fru_text, $cper_data));
+ $out = "";
+ while($query_handle->fetch()) {
+ $etype_string = get_extlog_type($etype);
+ $severity_string = get_extlog_severity($severity);
+ $out .= "$id $timestamp error: ";
+ $out .= "type=$etype_string, ";
+ $out .= "severity=$severity_string, ";
+ $out .= sprintf "address=0x%08x, ", $addr;
+ $out .= sprintf "fru_id=%s, ", get_uuid_le($fru_id);
+ $out .= "fru_text='$fru_text', ";
+ $out .= get_cper_data_text($cper_data) if ($cper_data);
+ $out .= "\n";
+ }
+ if ($out ne "") {
+ print "Extlog events:\n$out\n";
+ } else {
+ print "No Extlog errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# devlink errors
- $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "$id $timestamp error: ";
- $out .= "bus_name=$bus_name, ";
- $out .= "dev_name=$dev_name, ";
- $out .= "driver_name=$driver_name, ";
- $out .= "reporter_name=$reporter_name, ";
- $out .= "message='$msg', ";
- $out .= "\n";
- }
- if ($out ne "") {
- print "Devlink events:\n$out\n";
- } else {
- print "No devlink errors.\n\n";
+ if ($has_devlink == 1) {
+ $query = "select id, timestamp, bus_name, dev_name, driver_name, reporter_name, msg from devlink_event order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $timestamp, $bus_name, $dev_name, $driver_name, $reporter_name, $msg));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "$id $timestamp error: ";
+ $out .= "bus_name=$bus_name, ";
+ $out .= "dev_name=$dev_name, ";
+ $out .= "driver_name=$driver_name, ";
+ $out .= "reporter_name=$reporter_name, ";
+ $out .= "message='$msg', ";
+ $out .= "\n";
+ }
+ if ($out ne "") {
+ print "Devlink events:\n$out\n";
+ } else {
+ print "No devlink errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# Disk errors
- $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "$id $timestamp error: ";
- $out .= "dev=$dev, ";
- $out .= "sector=$sector, ";
- $out .= "nr_sector=$nr_sector, ";
- $out .= "error='$error', ";
- $out .= "rwbs='$rwbs', ";
- $out .= "cmd='$cmd', ";
- $out .= "\n";
- }
- if ($out ne "") {
- print "Disk errors\n$out\n";
- } else {
- print "No disk errors.\n\n";
+ if ($has_disk_errors == 1) {
+ $query = "select id, timestamp, dev, sector, nr_sector, error, rwbs, cmd from disk_errors order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $timestamp, $dev, $sector, $nr_sector, $error, $rwbs, $cmd));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "$id $timestamp error: ";
+ $out .= "dev=$dev, ";
+ $out .= "sector=$sector, ";
+ $out .= "nr_sector=$nr_sector, ";
+ $out .= "error='$error', ";
+ $out .= "rwbs='$rwbs', ";
+ $out .= "cmd='$cmd', ";
+ $out .= "\n";
+ }
+ if ($out ne "") {
+ print "Disk errors\n$out\n";
+ } else {
+ print "No disk errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
# MCE mce_record errors
- $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id";
- $query_handle = $dbh->prepare($query);
- $query_handle->execute();
- $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location));
- $out = "";
- while($query_handle->fetch()) {
- $out .= "$id $time error: $msg";
- $out .= ", CPU $cpuvendor" if ($cpuvendor);
- $out .= ", bank $bank_name" if ($bank_name);
- $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg);
- $out .= ", mci $mcistatus_msg" if ($mcistatus_msg);
- $out .= ", $mc_location" if ($mc_location);
- $out .= ", $user_action" if ($user_action);
- $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap);
- $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus);
- $out .= sprintf ", status=0x%08x", $status if ($status);
- $out .= sprintf ", addr=0x%08x", $addr if ($addr);
- $out .= sprintf ", misc=0x%08x", $misc if ($misc);
- $out .= sprintf ", ip=0x%08x", $ip if ($ip);
- $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc);
- $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime);
- $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu);
- $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid);
- $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid);
- $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid);
- $out .= sprintf ", cs=0x%08x", $cs if ($cs);
- $out .= sprintf ", bank=0x%08x", $bank if ($bank);
-
- $out .= "\n";
- }
- if ($out ne "") {
- print "MCE events:\n$out\n";
- } else {
- print "No MCE errors.\n\n";
+ if ($has_mce == 1) {
+ $query = "select id, timestamp, mcgcap, mcgstatus, status, addr, misc, ip, tsc, walltime, cpu, cpuid, apicid, socketid, cs, bank, cpuvendor, bank_name, error_msg, mcgstatus_msg, mcistatus_msg, user_action, mc_location from mce_record order by id";
+ $query_handle = $dbh->prepare($query);
+ $query_handle->execute();
+ $query_handle->bind_columns(\($id, $time, $mcgcap,$mcgstatus, $status, $addr, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $msg, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location));
+ $out = "";
+ while($query_handle->fetch()) {
+ $out .= "$id $time error: $msg";
+ $out .= ", CPU $cpuvendor" if ($cpuvendor);
+ $out .= ", bank $bank_name" if ($bank_name);
+ $out .= ", mcg $mcgstatus_msg" if ($mcgstatus_msg);
+ $out .= ", mci $mcistatus_msg" if ($mcistatus_msg);
+ $out .= ", $mc_location" if ($mc_location);
+ $out .= ", $user_action" if ($user_action);
+ $out .= sprintf ", mcgcap=0x%08x", $mcgcap if ($mcgcap);
+ $out .= sprintf ", mcgstatus=0x%08x", $mcgstatus if ($mcgstatus);
+ $out .= sprintf ", status=0x%08x", $status if ($status);
+ $out .= sprintf ", addr=0x%08x", $addr if ($addr);
+ $out .= sprintf ", misc=0x%08x", $misc if ($misc);
+ $out .= sprintf ", ip=0x%08x", $ip if ($ip);
+ $out .= sprintf ", tsc=0x%08x", $tsc if ($tsc);
+ $out .= sprintf ", walltime=0x%08x", $walltime if ($walltime);
+ $out .= sprintf ", cpu=0x%08x", $cpu if ($cpu);
+ $out .= sprintf ", cpuid=0x%08x", $cpuid if ($cpuid);
+ $out .= sprintf ", apicid=0x%08x", $apicid if ($apicid);
+ $out .= sprintf ", socketid=0x%08x", $socketid if ($socketid);
+ $out .= sprintf ", cs=0x%08x", $cs if ($cs);
+ $out .= sprintf ", bank=0x%08x", $bank if ($bank);
+
+ $out .= "\n";
+ }
+ if ($out ne "") {
+ print "MCE events:\n$out\n";
+ } else {
+ print "No MCE errors.\n\n";
+ }
+ $query_handle->finish;
}
- $query_handle->finish;
undef($dbh);
}
--
2.27.0

View File

@ -0,0 +1,52 @@
From 059a901e97f4091e31c50ce55027daf707638f8d Mon Sep 17 00:00:00 2001
From: dann frazier <dann.frazier@canonical.com>
Date: Tue, 21 Apr 2020 15:56:04 -0600
Subject: [PATCH] ras-mc-ctl: PCIe AER: display PCIe dev name
Storage of PCIe dev name was added in commit 8e96ca2c1c59 ("rasdaemon:
store PCIe dev name and TLP header for the aer event"). This makes
ras-mc-ctl extract and emit it like so:
PCIe AER events:
1 2020-04-16 22:09:48 +0000 0000:0b:00.0 Corrected error: Receiver Error
2 2020-04-16 22:23:24 +0000 0000:0b:00.0 Corrected error: Receiver Error
3 2020-04-17 23:00:37 +0000 0000:d9:01.0 Corrected error: Advisory Non-Fatal, BIT15
4 2020-04-17 23:21:52 +0000 0000:d9:01.0 Corrected error: Advisory Non-Fatal
5 2020-04-18 02:04:24 +0000 0000:5e:00.0 Corrected error: Receiver Error
Signed-off-by: Dann Frazier <dann.frazier@canonical.com>
Tested-by: Shiju Jose <shiju.jose@huawei.com>
---
util/ras-mc-ctl.in | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/util/ras-mc-ctl.in b/util/ras-mc-ctl.in
index 8d6d866..665a042 100755
--- a/util/ras-mc-ctl.in
+++ b/util/ras-mc-ctl.in
@@ -1230,7 +1230,7 @@ sub summary
sub errors
{
require DBI;
- my ($query, $query_handle, $id, $time, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out);
+ my ($query, $query_handle, $id, $time, $devname, $count, $type, $msg, $label, $mc, $top, $mid, $low, $addr, $grain, $syndrome, $detail, $out);
my ($mcgcap,$mcgstatus, $status, $misc, $ip, $tsc, $walltime, $cpu, $cpuid, $apicid, $socketid, $cs, $bank, $cpuvendor, $bank_name, $mcgstatus_msg, $mcistatus_msg, $user_action, $mc_location);
my ($timestamp, $etype, $severity, $etype_string, $severity_string, $fru_id, $fru_text, $cper_data);
my ($bus_name, $dev_name, $driver_name, $reporter_name);
@@ -1259,13 +1259,13 @@ sub errors
$query_handle->finish;
# PCIe AER aer_event errors
- $query = "select id, timestamp, err_type, err_msg from aer_event order by id";
+ $query = "select id, timestamp, dev_name, err_type, err_msg from aer_event order by id";
$query_handle = $dbh->prepare($query);
$query_handle->execute();
- $query_handle->bind_columns(\($id, $time, $type, $msg));
+ $query_handle->bind_columns(\($id, $time, $devname, $type, $msg));
$out = "";
while($query_handle->fetch()) {
- $out .= "$id $time $type error: $msg\n";
+ $out .= "$id $time $devname $type error: $msg\n";
}
if ($out ne "") {
print "PCIe AER events:\n$out\n";

View File

@ -1,6 +1,6 @@
Name: rasdaemon
Version: 0.6.6
Release: 8
Release: 9
License: GPLv2
Summary: Utility to get Platform Reliability, Availability and Serviceability (RAS) reports via the Kernel tracing events
URL: https://github.com/mchehab/rasdaemon.git
@ -37,6 +37,8 @@ Patch15: 0007-add-trace-print-and-add-sqlite-store.patch
Patch16: 0008-modify-cpu-parse-for-adapting-to-new-bios-version.patch
Patch17: bugfix-modify-the-way-counting-cpu-logical-index.patch
Patch18: bugfix-fix-where-local-variables-are-not-initialized.patch
Patch19: backport-ras-mc-ctl-PCIe-AER-display-PCIe-dev-name.patch
Patch20: backport-Fix-ras-mc-ctl-script.patch
%description
The rasdaemon program is a daemon which monitors the platform
@ -83,6 +85,12 @@ rm INSTALL %{buildroot}/usr/include/*.h
/usr/bin/systemctl enable rasdaemon.service >/dev/null 2>&1 || :
%changelog
* Tue Mar 21 2023 shixuantong <shixuantong1@huawei.com> - 0.6.6-9
- Type:bugfix
- ID:NA
- SUG:NA
- DESC:Fix ras-mc-ctl script
* Wed Dec 15 2021 luoshengwei<luoshengwei@huawei.com> - 0.6.6-8
- Type:bugfix
- ID:NA