!360 [TSV110] Fix incorrect port of tsv110 pipeline

From: @eastb233 
Reviewed-by: @li-yancheng 
Signed-off-by: @li-yancheng
This commit is contained in:
openeuler-ci-bot 2023-11-27 11:52:00 +00:00 committed by Gitee
commit b095a38ab2
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 663 additions and 97 deletions

View File

@ -1,53 +1,34 @@
diff -urpN a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
--- a/gcc/config/aarch64/aarch64.c 2019-04-15 14:50:25.866378665 +0800
+++ b/gcc/config/aarch64/aarch64.c 2019-04-15 14:49:21.986376983 +0800
@@ -554,6 +554,31 @@ static const struct tune_params generic_
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
From a29529542553e0c49cf1efe0808fc4a4733dc674 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Wed, 22 Nov 2023 17:18:35 +0800
Subject: [PATCH 2/4] [Backport][AArch64] Add tsv110 pipeline scheduling
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8108dfde82ad6ec43613107b2c156999e6a5cbe7
Committed on behalf of Wu Yuan.
---
gcc/config/aarch64/aarch64-cores.def | 2 +-
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/tsv110.md | 708 +++++++++++++++++++++++++++
3 files changed, 710 insertions(+), 1 deletion(-)
create mode 100644 gcc/config/aarch64/tsv110.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index ea7052388..6911f9704 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -88,6 +88,6 @@ AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH
/* V8.2 Architecture Processors. */
+static const struct tune_params tsv110_tunings =
+{
+ &cortexa57_extra_costs,
+ &generic_addrcost_table,
+ &generic_regmove_cost,
+ &generic_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 4, /* issue_rate */
+ AARCH64_FUSE_NOTHING, /* fusible_ops */
+ 16, /* function_align. */
+ 16, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+};
+
static const struct tune_params cortexa35_tunings =
{
&cortexa53_extra_costs,
diff -urpN a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
--- a/gcc/config/aarch64/aarch64-cores.def 2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-cores.def 2019-04-15 14:49:21.986376983 +0800
@@ -78,6 +78,8 @@ AARCH64_CORE("xgene1", xgene1, x
AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, tsv110, 0x48, 0xd01, -1)
+
/* V8 big.LITTLE implementations. */
AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
--- a/gcc/config/aarch64/aarch64.md 2019-04-15 14:50:25.870378665 +0800
+++ b/gcc/config/aarch64/aarch64.md 2019-04-15 14:49:21.986376983 +0800
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5b5c401f8..0d6ed8c5c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -226,6 +226,7 @@
(include "thunderx.md")
(include "../arm/xgene1.md")
@ -56,19 +37,11 @@ diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff -urpN a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
--- a/gcc/config/aarch64/aarch64-tune.md 2017-02-15 08:09:28.845771000 +0800
+++ b/gcc/config/aarch64/aarch64-tune.md 2019-04-15 14:49:21.986376983 +0800
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,tsv110,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
--- a/gcc/config/aarch64/tsv110.md 1970-01-01 08:00:00.000000000 +0800
+++ b/gcc/config/aarch64/tsv110.md 2019-04-15 14:55:30.420081420 +0800
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
new file mode 100644
index 000000000..33fc72ab4
--- /dev/null
+++ b/gcc/config/aarch64/tsv110.md
@@ -0,0 +1,708 @@
+;; tsv110 pipeline description
+;; Copyright (C) 2018 Free Software Foundation, Inc.
@ -688,7 +661,7 @@ diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
+
+(define_insn_reservation "tsv110_fp_add_sub" 5
+ (and (eq_attr "tune" "tsv110")
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ (eq_attr "type" "fadds,faddd,fmuls,fmuld"))
+ "tsv110_fsu1|tsv110_fsu2")
+
+(define_insn_reservation "tsv110_fp_mac" 7
@ -778,3 +751,6 @@ diff -urpN a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
+;; help.
+(define_bypass 1 "tsv110_*"
+ "tsv110_call,tsv110_branch")
--
2.19.1

View File

@ -0,0 +1,39 @@
From 312e8086a6a1164e8f16aff68ca175f32b3185ee Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Sat, 25 Nov 2023 10:50:11 +0800
Subject: [PATCH] [Backport][AArch64] Fix longbranch test
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c7fd21762de653a19dabf837917a8ad6f9491bc0
Fix longbranch test so it still generates long tbz branches.
---
gcc/testsuite/gcc.target/aarch64/long_branch_1.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/gcc/testsuite/gcc.target/aarch64/long_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c
index 46f500d36..49d8b6a22 100644
--- a/gcc/testsuite/gcc.target/aarch64/long_branch_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/long_branch_1.c
@@ -54,10 +54,6 @@ test_and_branch (int selector, int addend, int cond)
{
start0:
return sum - 1;
-start1:
- return sum + 1;
-start2:
- return sum;
start3:
return sum - 2;
}
@@ -65,6 +61,8 @@ start3:
{
switch (selector)
{
+start1:
+start2:
CASE_ENTRY128 (1)
CASE_ENTRY64 (129)
CASE_ENTRY16 (193)
--
2.19.1

View File

@ -0,0 +1,205 @@
From d290efa0319b0327a6dc804a4b9ecad0e8b5a5bb Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Thu, 23 Nov 2023 11:01:45 +0800
Subject: [PATCH 4/4] [Backport] Learn GIMPLE pretty printer to produce nicer
dump output.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5edb1c48f8e526a2b8f7f9d03fce9a7fdcb14b88
But we only port part of changes about attr-hotcold-2.c becasue
the rest is irrelevant.
[Backport] Recover GOTO predictor.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7fef86d3486c9f4208a111a41a2cc66b7328b6d9
---
gcc/c/c-typeck.c | 1 +
gcc/cp/constexpr.c | 1 +
gcc/cp/pt.c | 2 ++
gcc/cp/semantics.c | 2 ++
gcc/gimplify.c | 4 +++-
gcc/predict.def | 5 ++---
gcc/testsuite/gcc.dg/predict-15.c | 17 +++++++++++++++++
gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c | 13 ++++++-------
gcc/testsuite/gcc.dg/tree-ssa/vrp24.c | 10 ++++------
9 files changed, 38 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/predict-15.c
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index ee365313c..cf8463da7 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -9816,6 +9816,7 @@ c_finish_goto_label (location_t loc, tree label)
return NULL_TREE;
TREE_USED (decl) = 1;
{
+ add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN));
tree t = build1 (GOTO_EXPR, void_type_node, decl);
SET_EXPR_LOCATION (t, loc);
return add_stmt (t);
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 9082230b9..adae14b4f 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -5827,6 +5827,7 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict,
case CLEANUP_STMT:
case EMPTY_CLASS_EXPR:
+ case PREDICT_EXPR:
return false;
case GOTO_EXPR:
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 5687bb212..b536a54af 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -15161,6 +15161,8 @@ tsubst_copy (tree t, tree args, tsubst_flags_t complain, tree in_decl)
return tsubst_binary_left_fold (t, args, complain, in_decl);
case BINARY_RIGHT_FOLD_EXPR:
return tsubst_binary_right_fold (t, args, complain, in_decl);
+ case PREDICT_EXPR:
+ return t;
default:
/* We shouldn't get here, but keep going if !flag_checking. */
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index e06637646..840d193b9 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -41,6 +41,7 @@ along with GCC; see the file COPYING3. If not see
#include "omp-general.h"
#include "convert.h"
#include "gomp-constants.h"
+#include "predict.h"
/* There routines provide a modular interface to perform many parsing
operations. They may therefore be used during actual parsing, or
@@ -628,6 +629,7 @@ finish_goto_stmt (tree destination)
check_goto (destination);
+ add_stmt (build_predict_expr (PRED_GOTO, NOT_TAKEN));
return add_stmt (build_stmt (input_location, GOTO_EXPR, destination));
}
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index c6a06d014..dfc2fddd8 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -2023,7 +2023,9 @@ should_warn_for_implicit_fallthrough (gimple_stmt_iterator *gsi_p, tree label)
gsi = *gsi_p;
/* Skip all immediately following labels. */
- while (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
+ while (!gsi_end_p (gsi)
+ && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
+ || gimple_code (gsi_stmt (gsi)) == GIMPLE_PREDICT))
gsi_next (&gsi);
/* { ... something; default:; } */
diff --git a/gcc/predict.def b/gcc/predict.def
index e96be12be..d7048e433 100644
--- a/gcc/predict.def
+++ b/gcc/predict.def
@@ -141,9 +141,8 @@ DEF_PREDICTOR (PRED_RECURSIVE_CALL, "recursive call", HITRATE (75), 0)
this from FE or retire the predictor. */
DEF_PREDICTOR (PRED_TREE_EARLY_RETURN, "early return (on trees)", HITRATE (54), 0)
-/* Branch containing goto is probably not taken.
- FIXME: Currently not used. */
-DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (70), 0)
+/* Branch containing goto is probably not taken. */
+DEF_PREDICTOR (PRED_GOTO, "goto", HITRATE (66), 0)
/* Branch ending with return constant is probably not taken. */
DEF_PREDICTOR (PRED_CONST_RETURN, "const return", HITRATE (69), 0)
diff --git a/gcc/testsuite/gcc.dg/predict-15.c b/gcc/testsuite/gcc.dg/predict-15.c
new file mode 100644
index 000000000..2a8c3ea85
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/predict-15.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+int main(int argc, char **argv)
+{
+ if (argc == 123)
+ goto exit;
+ else
+ {
+ return 0;
+ }
+
+exit:
+ return 1;
+}
+
+/* { dg-final { scan-tree-dump "goto heuristics of edge" "profile_estimate"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
index 184dd10dd..17526113d 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
@@ -1,8 +1,7 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-profile_estimate-blocks-details" } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
-void g(void);
-void h(void);
+int v1, v2;
void f(int x, int y)
{
if (x) goto A;
@@ -10,19 +9,19 @@ void f(int x, int y)
return;
A: __attribute__((cold))
- g();
+ v1 = x;
return;
B: __attribute__((hot))
- h();
+ v2 = y;
return;
}
/* { dg-final { scan-tree-dump-times "hot label heuristics" 1 "profile_estimate" } } */
/* { dg-final { scan-tree-dump-times "cold label heuristics" 1 "profile_estimate" } } */
-/* { dg-final { scan-tree-dump "A \\\[0\\\..*\\\]" "profile_estimate" } } */
+/* { dg-final { scan-tree-dump-times "combined heuristics: 0\\\..*" 1 "profile_estimate" } } */
/* Note: we're attempting to match some number > 6000, i.e. > 60%.
The exact number ought to be tweekable without having to juggle
the testcase around too much. */
-/* { dg-final { scan-tree-dump "B \\\[\[6-9\]\[0-9\]\\\..*\\\]" "profile_estimate" } } */
+/* { dg-final { scan-tree-dump-times "combined heuristics: \[6-9\]\[0-9\]\\\..*" 1 "profile_estimate" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c
index 853ee21bb..ed49e25f8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp24.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-vrp1-details" } */
+/* { dg-options "-O2 -fno-tree-forwprop -fdump-tree-vrp1-details -fdump-tree-optimized" } */
struct rtx_def;
@@ -86,10 +86,8 @@ L7:
/* The first n_sets > 0 test can be simplfiied into n_sets == 1 since
n_sets can only have the values [0, 1] as it's the result of a
- boolean operation.
+ boolean operation. */
- The second n_sets > 0 test can also be simplified into n_sets == 1
- as the only way to reach the tests is when n_sets <= 1 and the only
- value which satisfies both conditions is n_sets == 1. */
-/* { dg-final { scan-tree-dump-times "Simplified relational" 2 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "Simplified relational" 1 "vrp1" } } */
+/* { dg-final { scan-tree-dump-times "if " 4 "optimized" } } */
--
2.19.1

View File

@ -0,0 +1,90 @@
From 96b5d381d4a670fd3f2fed4ca73ded601ec779e9 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Fri, 24 Nov 2023 15:47:33 +0800
Subject: [PATCH 3/4] [Backport] Rework cold and hot label attributes in
predict.c.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93c18375823fdd0e384f673f75e39136719135dd
---
gcc/gimplify.c | 10 +++++++-
gcc/predict.c | 23 -------------------
.../gcc.dg/tree-ssa/attr-hotcold-2.c | 4 ++--
3 files changed, 11 insertions(+), 26 deletions(-)
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 5264a4f3d..c6a06d014 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -2340,10 +2340,18 @@ gimplify_label_expr (tree *expr_p, gimple_seq *pre_p)
gcc_assert (decl_function_context (LABEL_EXPR_LABEL (*expr_p))
== current_function_decl);
- glabel *label_stmt = gimple_build_label (LABEL_EXPR_LABEL (*expr_p));
+ tree label = LABEL_EXPR_LABEL (*expr_p);
+ glabel *label_stmt = gimple_build_label (label);
gimple_set_location (label_stmt, EXPR_LOCATION (*expr_p));
gimplify_seq_add_stmt (pre_p, label_stmt);
+ if (lookup_attribute ("cold", DECL_ATTRIBUTES (label)))
+ gimple_seq_add_stmt (pre_p, gimple_build_predict (PRED_COLD_LABEL,
+ NOT_TAKEN));
+ else if (lookup_attribute ("hot", DECL_ATTRIBUTES (label)))
+ gimple_seq_add_stmt (pre_p, gimple_build_predict (PRED_HOT_LABEL,
+ TAKEN));
+
return GS_ALL_DONE;
}
diff --git a/gcc/predict.c b/gcc/predict.c
index fa4e626fa..d68fb17ba 100644
--- a/gcc/predict.c
+++ b/gcc/predict.c
@@ -2701,29 +2701,6 @@ tree_estimate_probability_bb (basic_block bb)
FOR_EACH_EDGE (e, ei, bb->succs)
{
- /* Predict edges to user labels with attributes. */
- if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
- {
- gimple_stmt_iterator gi;
- for (gi = gsi_start_bb (e->dest); !gsi_end_p (gi); gsi_next (&gi))
- {
- glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (gi));
- tree decl;
-
- if (!label_stmt)
- break;
- decl = gimple_label_label (label_stmt);
- if (DECL_ARTIFICIAL (decl))
- continue;
-
- /* Finally, we have a user-defined label. */
- if (lookup_attribute ("cold", DECL_ATTRIBUTES (decl)))
- predict_edge_def (e, PRED_COLD_LABEL, NOT_TAKEN);
- else if (lookup_attribute ("hot", DECL_ATTRIBUTES (decl)))
- predict_edge_def (e, PRED_HOT_LABEL, TAKEN);
- }
- }
-
/* Predict early returns to be probable, as we've already taken
care for error returns and other cases are often used for
fast paths through function.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
index 13d2916c4..184dd10dd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/attr-hotcold-2.c
@@ -20,9 +20,9 @@ void f(int x, int y)
/* { dg-final { scan-tree-dump-times "hot label heuristics" 1 "profile_estimate" } } */
/* { dg-final { scan-tree-dump-times "cold label heuristics" 1 "profile_estimate" } } */
-/* { dg-final { scan-tree-dump-times "block 4, loop depth 0, count 0, freq \[1-4\]\[^0-9\]" 3 "profile_estimate" } } */
+/* { dg-final { scan-tree-dump "A \\\[0\\\..*\\\]" "profile_estimate" } } */
/* Note: we're attempting to match some number > 6000, i.e. > 60%.
The exact number ought to be tweekable without having to juggle
the testcase around too much. */
-/* { dg-final { scan-tree-dump-times "block 5, loop depth 0, count 0, freq \[6-9\]\[0-9\]\[0-9\]\[0-9\]" 3 "profile_estimate" } } */
+/* { dg-final { scan-tree-dump "B \\\[\[6-9\]\[0-9\]\\\..*\\\]" "profile_estimate" } } */
--
2.19.1

View File

@ -0,0 +1,281 @@
From a9c1a43518391483789e3b036b1d7242b7576c4e Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Wed, 22 Nov 2023 16:46:58 +0800
Subject: [PATCH 1/4] [Backport][aarch64] Add HiSilicon tsv110 CPU support
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=910f72e78fe76993b8a9bcc9e385a788446f1f10
This patch adds HiSilicon's an mcpu: tsv110, which supports v8_4A.
It has been tested on aarch64 and no regressions from this patch.
[aarch64] Correct architecture for tsv110.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5a8d95cc43f3ff425fa58bf4025a8527320fb46c
For HiSilicon's tsv110 cpu core, it supports some v8_4A features, but
some mandatory features are not implemented.
---
gcc/config/aarch64/aarch64-cores.def | 5 ++
gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++++++++++++++++++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.c | 71 ++++++++++++++++
gcc/doc/invoke.texi | 2 +-
5 files changed, 182 insertions(+), 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 92b57cffb..ea7052388 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -85,4 +85,9 @@ AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH
AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
+/* V8.2 Architecture Processors. */
+
+/* HiSilicon ('H') cores. */
+AARCH64_CORE("tsv110", tsv110, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16, tsv110, 0x48, 0xd01, -1)
+
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 070c083be..4595c5e71 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -334,4 +334,108 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
}
};
+const struct cpu_cost_table tsv110_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
#endif
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index c948846af..4231e56ec 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53"
+ "cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,exynosm1,falkor,qdf24xx,thunderx,thunderxt88p1,thunderxt88,thunderxt81,thunderxt83,thunderx2t99,xgene1,thunderx2t99p1,vulcan,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,tsv110"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 22e76e083..58e91ecce 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -254,6 +254,22 @@ static const struct cpu_addrcost_table xgene1_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table tsv110_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -362,6 +378,16 @@ static const struct cpu_regmove_cost thunderx2t99_regmove_cost =
4 /* FP2FP */
};
+static const struct cpu_regmove_cost tsv110_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost generic_vector_cost =
{
@@ -402,6 +428,25 @@ static const struct cpu_vector_cost thunderx_vector_cost =
3 /* cond_not_taken_branch_cost */
};
+static const struct cpu_vector_cost tsv110_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 2, /* vec_int_stmt_cost */
+ 2, /* vec_fp_stmt_cost */
+ 2, /* vec_permute_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* vec_align_load_cost */
+ 5, /* vec_unalign_load_cost */
+ 1, /* vec_unalign_store_cost */
+ 1, /* vec_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1 /* cond_not_taken_branch_cost */
+};
+
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost cortexa57_vector_cost =
{
@@ -731,6 +776,32 @@ static const struct tune_params thunderx_tunings =
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */
};
+static const struct tune_params tsv110_tunings =
+{
+ &tsv110_extra_costs,
+ &tsv110_addrcost_table,
+ &tsv110_regmove_cost,
+ &tsv110_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ 4, /* memmov_cost */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC
+ | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops */
+ 16, /* function_align. */
+ 4, /* jump_align. */
+ 8, /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ 0, /* cache_line_size. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 808ebe57f..430c0d5db 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -14028,7 +14028,7 @@ performance of the code. Permissible values for this option are:
@samp{generic}, @samp{cortex-a35}, @samp{cortex-a53}, @samp{cortex-a57},
@samp{cortex-a72}, @samp{cortex-a73}, @samp{exynos-m1}, @samp{falkor},
@samp{qdf24xx}, @samp{xgene1}, @samp{vulcan}, @samp{thunderx},
-@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81},
+@samp{thunderxt88}, @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{cortex-a57.cortex-a53},
@samp{cortex-a72.cortex-a53}, @samp{cortex-a73.cortex-a35},
@samp{cortex-a73.cortex-a53}, @samp{native}.
--
2.19.1

View File

@ -41,7 +41,7 @@ Version: 7.3.0
# number 2020033101 meaning the openEuler 20.03 release date plus 01 to
# replace DATE and will never change it in the future.
%global openEulerDATE 2020033101
Release: %{openEulerDATE}.55
Release: %{openEulerDATE}.56
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
Group: Development/Languages
#Source0: hcc-aarch64-linux-release.tar.bz2
@ -73,7 +73,6 @@ Patch2: gcc-adapt-to-isl.patch
Patch3: sanitizer-pr-85835.patch
Patch4: CVE-2018-12886.patch
Patch5: CVE-2019-15847.patch
Patch7: add-tsv110-pipeline-scheduling.patch
Patch12: aarch64-fix-tls-negative-offset.patch
Patch14: arm-fix-push-minipool.patch
Patch22: arm-bigendian-disable-interleaved-LS-vectorize.patch
@ -100,6 +99,11 @@ Patch49: aarch64-Rename-hard_fp_offset-to-bytes_above_hard_fp.patch
Patch50: aarch64-Tweak-frame_size-comment.patch
Patch51: Backport-check-function-bodies-support.patch
Patch52: aarch64-Make-stack-smash-canary-protect-saved-registers.patch
Patch53: Backport-aarch64-Add-HiSilicon-tsv110-CPU-support.patch
Patch54: Backport-AArch64-Add-tsv110-pipeline-scheduling.patch
Patch55: Backport-Rework-cold-and-hot-label-attributes-in-pre.patch
Patch56: Backport-Learn-GIMPLE-pretty-printer-to-produce-nice.patch
Patch57: Backport-AArch64-Fix-longbranch-test.patch
#AutoReqProv: off
AutoReq: true
@ -557,39 +561,7 @@ package or when debugging this package.
%setup -q -n gcc-%{version} -a 1 -a 2
/bin/pwd
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%patch5 -p1
%patch7 -p1
%patch12 -p1
%patch14 -p1
%patch22 -p1
%patch23 -p1
%patch25 -p1
%patch29 -p1
%patch31 -p1
%patch34 -p1
%patch35 -p1
%patch36 -p1
%patch37 -p1
%patch38 -p1
%patch39 -p1
%patch40 -p1
%patch41 -p1
%patch42 -p1
%patch43 -p1
%patch44 -p1
%patch45 -p1
%patch46 -p1
%patch47 -p1
%patch48 -p1
%patch49 -p1
%patch50 -p1
%patch51 -p1
%patch52 -p1
%autopatch -p1
%if 0%{?_enable_debug_packages}
cat > split-debuginfo.sh <<\EOF
@ -3364,6 +3336,9 @@ fi
%changelog
* Mon Nov 27 2023 eastb233 <xiezhiheng@huawei.com> - 7.3.0-2020033101.56
- Fix incorrect port of tsv110 pipeline.
* Fri Nov 24 2023 zhaoshujian <zhaoshujian@huawei.com> - 7.3.0-2020033101.55
- Change isl version from isl-0.21 to isl-0.16.1 to fix the deja test ice.