Skip to content

Commit 73ca056

Browse files
artemiy-volkovMichielDerhaeg
authored andcommitted
arcv: add scheduling implementation for RHX-100
This commit implements the scheduling model for the RHX-100 core. Among notable things are: (1) The arcv_macro_fusion_pair_p () hook has been modified to not create SCHED_GROUP's larger than 2 instructions; also, it gives priority to double load/store fusion, suppressing the other types until sched2. (2) riscv_issue_rate () is set to 4 and the system is modeled as 4 separate pipelines, giving access to as many instructions in ready_list as possible. (3) The rhx.md description puts some initial constraints in place (e.g. memory ops can only go into pipe B), saving some work in the reordering hook. (4) The riscv_sched_variable_issue () and riscv_sched_reorder2 () hooks work together to make sure (in order of descending priority) that: (a) the critical path and the instruction priorities are respected; (b) both pipes are filled (taking advantage of parallel dispatch within the microarchitectural constraints); (c) there is as much fusion going on as possible (and the existing fusion pairs are not broken up). There is probably some room for improvement, and some tweaks will probably have to be made in response to HLA changes as the HW development process goes on. Signed-off-by: Artemiy Volkov <artemiy@synopsys.com>
1 parent 925d7fa commit 73ca056

File tree

1 file changed

+259
-24
lines changed

1 file changed

+259
-24
lines changed

gcc/config/riscv/riscv.cc

Lines changed: 259 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,12 @@ unsigned riscv_stack_boundary;
340340
/* Whether in riscv_output_mi_thunk. */
341341
static bool riscv_in_thunk_func = false;
342342

343+
static int alu_pipe_scheduled_p;
344+
static int pipeB_scheduled_p;
345+
346+
static rtx_insn *last_scheduled_insn;
347+
static short cached_can_issue_more;
348+
343349
/* If non-zero, this is an offset to be added to SP to redefine the CFA
344350
when restoring the FP register from the stack. Only valid when generating
345351
the epilogue. */
@@ -10299,6 +10305,21 @@ riscv_issue_rate (void)
1029910305
static int
1030010306
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1030110307
{
10308+
/* Beginning of cycle - reset variables. */
10309+
if (more == tune_param->issue_rate)
10310+
{
10311+
alu_pipe_scheduled_p = 0;
10312+
pipeB_scheduled_p = 0;
10313+
}
10314+
10315+
if (alu_pipe_scheduled_p && pipeB_scheduled_p)
10316+
{
10317+
cached_can_issue_more = 0;
10318+
return 0;
10319+
}
10320+
10321+
cached_can_issue_more = more;
10322+
1030210323
if (DEBUG_INSN_P (insn))
1030310324
return more;
1030410325

@@ -10319,6 +10340,28 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1031910340
an assert so we can find and fix this problem. */
1032010341
gcc_assert (insn_has_dfa_reservation_p (insn));
1032110342

10343+
if (next_insn (insn) && INSN_P (next_insn (insn))
10344+
&& SCHED_GROUP_P (next_insn (insn)))
10345+
{
10346+
if (get_attr_type (insn) == TYPE_LOAD
10347+
|| get_attr_type (insn) == TYPE_STORE
10348+
|| get_attr_type (next_insn (insn)) == TYPE_LOAD
10349+
|| get_attr_type (next_insn (insn)) == TYPE_STORE)
10350+
pipeB_scheduled_p = 1;
10351+
else
10352+
alu_pipe_scheduled_p = 1;
10353+
}
10354+
10355+
if (get_attr_type (insn) == TYPE_ALU_FUSED
10356+
|| get_attr_type (insn) == TYPE_IMUL_FUSED)
10357+
{
10358+
alu_pipe_scheduled_p = 1;
10359+
more -= 1;
10360+
}
10361+
10362+
last_scheduled_insn = insn;
10363+
cached_can_issue_more = more - 1;
10364+
1032210365
return more - 1;
1032310366
}
1032410367

@@ -10557,22 +10600,49 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr)
1055710600
static bool
1055810601
arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1055910602
{
10603+
/* Never create sched groups with more than 2 members. */
10604+
if (SCHED_GROUP_P (prev))
10605+
return false;
10606+
1056010607
rtx prev_set = single_set (prev);
1056110608
rtx curr_set = single_set (curr);
10562-
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
10563-
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
1056410609

10565-
/* Fuse load-immediate with a dependent conditional branch. */
10566-
if (get_attr_type (prev) == TYPE_MOVE
10567-
&& get_attr_move_type (prev) == MOVE_TYPE_CONST
10568-
&& any_condjump_p (curr))
10610+
/* Fuse multiply-add pair. */
10611+
if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT
10612+
&& GET_CODE (SET_SRC (curr_set)) == PLUS
10613+
&& (REG_P (XEXP (SET_SRC (curr_set), 0))
10614+
&& REGNO (SET_DEST (prev_set)) ==
10615+
REGNO (XEXP (SET_SRC (curr_set), 0))
10616+
|| (REG_P (XEXP (SET_SRC (curr_set), 1))
10617+
&& REGNO (SET_DEST (prev_set)) ==
10618+
REGNO (XEXP (SET_SRC (curr_set), 1)))))
10619+
return true;
10620+
10621+
/* Fuse logical shift left with logical shift right (bit-extract pattern). */
10622+
if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT
10623+
&& GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
10624+
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10625+
&& REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
10626+
return true;
10627+
10628+
/* Fuse load-immediate with a dependent conditional branch. */
10629+
if (get_attr_type (prev) == TYPE_MOVE
10630+
&& get_attr_move_type (prev) == MOVE_TYPE_CONST
10631+
&& any_condjump_p (curr))
1056910632
{
1057010633
rtx comp = XEXP (SET_SRC (curr_set), 0);
1057110634

1057210635
return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set))
1057310636
|| (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set));
1057410637
}
1057510638

10639+
/* Do not fuse loads/stores before sched2. */
10640+
if (!reload_completed || sched_fusion)
10641+
return false;
10642+
10643+
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
10644+
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10645+
1057610646
/* Don't handle anything with a jump past this point. */
1057710647
if (!simple_sets_p)
1057810648
return false;
@@ -10598,6 +10668,30 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1059810668
return true;
1059910669
}
1060010670

10671+
/* Look ahead 1 insn to make sure double loads/stores are always
10672+
fused together, even in the presence of other opportunities. */
10673+
if (next_insn (curr) && single_set (next_insn (curr))
10674+
&& get_attr_type (curr) == TYPE_LOAD
10675+
&& get_attr_type (next_insn (curr)) == TYPE_LOAD)
10676+
{
10677+
rtx addr0 = XEXP (SET_SRC (curr_set), 0);
10678+
rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
10679+
10680+
if (arcv_fused_addr_p (addr0, addr1))
10681+
return false;
10682+
}
10683+
10684+
if (next_insn (curr) && single_set (next_insn (curr))
10685+
&& get_attr_type (curr) == TYPE_STORE
10686+
&& get_attr_type (next_insn (curr)) == TYPE_STORE)
10687+
{
10688+
rtx addr0 = XEXP (SET_DEST (curr_set), 0);
10689+
rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
10690+
10691+
if (arcv_fused_addr_p (addr0, addr1))
10692+
return false;
10693+
}
10694+
1060110695
/* Fuse a pre- or post-update memory operation. */
1060210696
if (arcv_memop_arith_pair_p (prev, curr)
1060310697
|| arcv_memop_arith_pair_p (curr, prev))
@@ -10618,20 +10712,6 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1061810712
&& SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set)))))
1061910713
return true;
1062010714

10621-
if (GET_CODE (SET_SRC (prev_set)) == MULT
10622-
&& GET_CODE (SET_SRC (curr_set)) == PLUS
10623-
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10624-
&& (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))
10625-
|| REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1))))
10626-
return true;
10627-
10628-
/* Fuse logical shift left with logical shift right (bit-extract pattern). */
10629-
if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
10630-
&& GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
10631-
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10632-
&& REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
10633-
return true;
10634-
1063510715
return false;
1063610716
}
1063710717

@@ -11353,17 +11433,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1135311433
we currently only perform the adjustment when -madjust-lmul-cost is given.
1135411434
*/
1135511435
static int
11356-
riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
11357-
unsigned int)
11436+
riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
11437+
int cost, unsigned int)
1135811438
{
11439+
if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI
11440+
&& !SCHED_GROUP_P (insn))
11441+
return cost + 1;
11442+
1135911443
/* Only do adjustments for the generic out-of-order scheduling model. */
1136011444
if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
1136111445
return cost;
1136211446

11363-
if (recog_memoized (insn) < 0)
11447+
if (recog_memoized (dep_insn) < 0)
1136411448
return cost;
1136511449

11366-
enum attr_type type = get_attr_type (insn);
11450+
enum attr_type type = get_attr_type (dep_insn);
1136711451

1136811452
if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
1136911453
{
@@ -11414,6 +11498,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
1141411498
return new_cost;
1141511499
}
1141611500

11501+
1141711502
/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
1141811503
can be scheduled for speculative execution. Reject vsetvl instructions to
1141911504
prevent the scheduler from hoisting them out of basic blocks without
@@ -11435,6 +11520,149 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
1143511520
}
1143611521
}
1143711522

11523+
static void
11524+
riscv_sched_init (FILE *file ATTRIBUTE_UNUSED,
11525+
int verbose ATTRIBUTE_UNUSED,
11526+
int max_ready ATTRIBUTE_UNUSED)
11527+
{
11528+
last_scheduled_insn = 0;
11529+
}
11530+
11531+
static int
11532+
riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
11533+
int verbose ATTRIBUTE_UNUSED,
11534+
rtx_insn **ready,
11535+
int *n_readyp,
11536+
int clock ATTRIBUTE_UNUSED)
11537+
{
11538+
if (sched_fusion)
11539+
return cached_can_issue_more;
11540+
11541+
if (!cached_can_issue_more)
11542+
return 0;
11543+
11544+
/* Fuse double load/store instances missed by sched_fusion. */
11545+
if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0
11546+
&& !SCHED_GROUP_P (last_scheduled_insn)
11547+
&& (get_attr_type (last_scheduled_insn) == TYPE_LOAD
11548+
|| get_attr_type (last_scheduled_insn) == TYPE_STORE))
11549+
{
11550+
for (int i = 1; i <= *n_readyp; i++)
11551+
{
11552+
if (NONDEBUG_INSN_P (ready[*n_readyp - i])
11553+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
11554+
&& (!next_insn (ready[*n_readyp - i])
11555+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11556+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
11557+
&& arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
11558+
{
11559+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11560+
SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
11561+
pipeB_scheduled_p = 1;
11562+
return cached_can_issue_more;
11563+
}
11564+
}
11565+
pipeB_scheduled_p = 1;
11566+
}
11567+
11568+
/* Try to fuse a non-memory last_scheduled_insn. */
11569+
if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p)
11570+
&& last_scheduled_insn && ready && *n_readyp > 0
11571+
&& !SCHED_GROUP_P (last_scheduled_insn)
11572+
&& (get_attr_type (last_scheduled_insn) != TYPE_LOAD
11573+
&& get_attr_type (last_scheduled_insn) != TYPE_STORE))
11574+
{
11575+
for (int i = 1; i <= *n_readyp; i++)
11576+
{
11577+
if (NONDEBUG_INSN_P (ready[*n_readyp - i])
11578+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
11579+
&& (!next_insn (ready[*n_readyp - i])
11580+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11581+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
11582+
&& arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
11583+
{
11584+
if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
11585+
|| get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
11586+
if (pipeB_scheduled_p)
11587+
continue;
11588+
else
11589+
pipeB_scheduled_p = 1;
11590+
else if (!alu_pipe_scheduled_p)
11591+
alu_pipe_scheduled_p = 1;
11592+
else
11593+
pipeB_scheduled_p = 1;
11594+
11595+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11596+
SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
11597+
return cached_can_issue_more;
11598+
}
11599+
}
11600+
alu_pipe_scheduled_p = 1;
11601+
}
11602+
11603+
/* When pipe B is scheduled, we can have no more memops this cycle. */
11604+
if (pipeB_scheduled_p && *n_readyp > 0
11605+
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
11606+
&& recog_memoized (ready[*n_readyp - 1]) >= 0
11607+
&& !SCHED_GROUP_P (ready[*n_readyp - 1])
11608+
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
11609+
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
11610+
{
11611+
if (alu_pipe_scheduled_p)
11612+
return 0;
11613+
11614+
for (int i = 2; i <= *n_readyp; i++)
11615+
{
11616+
if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
11617+
&& recog_memoized (ready[*n_readyp - i]) >= 0
11618+
&& get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
11619+
&& get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
11620+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
11621+
&& ((!next_insn (ready[*n_readyp - i])
11622+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11623+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))))
11624+
|| ((next_insn (ready[*n_readyp - i])
11625+
&& NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11626+
&& recog_memoized (next_insn (ready[*n_readyp - i])) >= 0
11627+
&& get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD
11628+
&& get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE)))
11629+
{
11630+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11631+
alu_pipe_scheduled_p = 1;
11632+
cached_can_issue_more = 1;
11633+
return 1;
11634+
}
11635+
}
11636+
return 0;
11637+
}
11638+
11639+
/* If all else fails, schedule a single instruction. */
11640+
if (ready && *n_readyp > 0
11641+
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
11642+
&& recog_memoized (ready[*n_readyp - 1]) >= 0
11643+
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
11644+
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
11645+
{
11646+
if (!pipeB_scheduled_p
11647+
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
11648+
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
11649+
{
11650+
alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
11651+
cached_can_issue_more = 1;
11652+
return 1;
11653+
}
11654+
else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
11655+
|| get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
11656+
{
11657+
alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
11658+
cached_can_issue_more = 1;
11659+
return 1;
11660+
}
11661+
}
11662+
11663+
return cached_can_issue_more;
11664+
}
11665+
1143811666
/* Auxiliary function to emit RISC-V ELF attribute. */
1143911667
static void
1144011668
riscv_emit_attribute ()
@@ -16020,9 +16248,16 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
1602016248
#undef TARGET_SCHED_ADJUST_COST
1602116249
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
1602216250

16251+
1602316252
#undef TARGET_SCHED_CAN_SPECULATE_INSN
1602416253
#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
1602516254

16255+
#undef TARGET_SCHED_REORDER2
16256+
#define TARGET_SCHED_REORDER2 riscv_sched_reorder2
16257+
16258+
#undef TARGET_SCHED_INIT
16259+
#define TARGET_SCHED_INIT riscv_sched_init
16260+
1602616261
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1602716262
#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
1602816263

0 commit comments

Comments
 (0)