Skip to content

Commit 19f2bee

Browse files
artemiy-volkovMichielDerhaeg
authored andcommitted
arcv: add scheduling implementation for RHX-100
This commit implements the scheduling model for the RHX-100 core. Among notable things are: (1) The arcv_macro_fusion_pair_p () hook has been modified to not create SCHED_GROUP's larger than 2 instructions; also, it gives priority to double load/store fusion, suppressing the other types until sched2. (2) riscv_issue_rate () is set to 4 and the system is modeled as 4 separate pipelines, giving access to as many instructions in ready_list as possible. (3) The rhx.md description puts some initial constraints in place (e.g. memory ops can only go into pipe B), saving some work in the reordering hook. (4) The riscv_sched_variable_issue () and riscv_sched_reorder2 () hooks work together to make sure (in order of descending priority) that: (a) the critical path and the instruction priorities are respected; (b) both pipes are filled (taking advantage of parallel dispatch within the microarchitectural constraints); (c) there is as much fusion going on as possible (and the existing fusion pairs are not broken up). There is probably some room for improvement, and some tweaks will probably have to be made in response to HLA changes as the HW development process goes on. Signed-off-by: Artemiy Volkov <artemiy@synopsys.com>
1 parent 08b3411 commit 19f2bee

File tree

1 file changed

+259
-24
lines changed

1 file changed

+259
-24
lines changed

gcc/config/riscv/riscv.cc

Lines changed: 259 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,12 @@ unsigned riscv_stack_boundary;
340340
/* Whether in riscv_output_mi_thunk. */
341341
static bool riscv_in_thunk_func = false;
342342

343+
static int alu_pipe_scheduled_p;
344+
static int pipeB_scheduled_p;
345+
346+
static rtx_insn *last_scheduled_insn;
347+
static short cached_can_issue_more;
348+
343349
/* If non-zero, this is an offset to be added to SP to redefine the CFA
344350
when restoring the FP register from the stack. Only valid when generating
345351
the epilogue. */
@@ -10847,6 +10853,21 @@ riscv_sched_init (FILE *, int, int)
1084710853
static int
1084810854
riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1084910855
{
10856+
/* Beginning of cycle - reset variables. */
10857+
if (more == tune_param->issue_rate)
10858+
{
10859+
alu_pipe_scheduled_p = 0;
10860+
pipeB_scheduled_p = 0;
10861+
}
10862+
10863+
if (alu_pipe_scheduled_p && pipeB_scheduled_p)
10864+
{
10865+
cached_can_issue_more = 0;
10866+
return 0;
10867+
}
10868+
10869+
cached_can_issue_more = more;
10870+
1085010871
if (DEBUG_INSN_P (insn))
1085110872
return more;
1085210873

@@ -10892,6 +10913,28 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1089210913
}
1089310914
}
1089410915

10916+
if (next_insn (insn) && INSN_P (next_insn (insn))
10917+
&& SCHED_GROUP_P (next_insn (insn)))
10918+
{
10919+
if (get_attr_type (insn) == TYPE_LOAD
10920+
|| get_attr_type (insn) == TYPE_STORE
10921+
|| get_attr_type (next_insn (insn)) == TYPE_LOAD
10922+
|| get_attr_type (next_insn (insn)) == TYPE_STORE)
10923+
pipeB_scheduled_p = 1;
10924+
else
10925+
alu_pipe_scheduled_p = 1;
10926+
}
10927+
10928+
if (get_attr_type (insn) == TYPE_ALU_FUSED
10929+
|| get_attr_type (insn) == TYPE_IMUL_FUSED)
10930+
{
10931+
alu_pipe_scheduled_p = 1;
10932+
more -= 1;
10933+
}
10934+
10935+
last_scheduled_insn = insn;
10936+
cached_can_issue_more = more - 1;
10937+
1089510938
return more - 1;
1089610939
}
1089710940

@@ -11184,22 +11227,49 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr)
1118411227
static bool
1118511228
arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1118611229
{
11230+
/* Never create sched groups with more than 2 members. */
11231+
if (SCHED_GROUP_P (prev))
11232+
return false;
11233+
1118711234
rtx prev_set = single_set (prev);
1118811235
rtx curr_set = single_set (curr);
11189-
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
11190-
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
1119111236

11192-
/* Fuse load-immediate with a dependent conditional branch. */
11193-
if (get_attr_type (prev) == TYPE_MOVE
11194-
&& get_attr_move_type (prev) == MOVE_TYPE_CONST
11195-
&& any_condjump_p (curr))
11237+
/* Fuse multiply-add pair. */
11238+
if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT
11239+
&& GET_CODE (SET_SRC (curr_set)) == PLUS
11240+
&& (REG_P (XEXP (SET_SRC (curr_set), 0))
11241+
&& REGNO (SET_DEST (prev_set)) ==
11242+
REGNO (XEXP (SET_SRC (curr_set), 0))
11243+
|| (REG_P (XEXP (SET_SRC (curr_set), 1))
11244+
&& REGNO (SET_DEST (prev_set)) ==
11245+
REGNO (XEXP (SET_SRC (curr_set), 1)))))
11246+
return true;
11247+
11248+
/* Fuse logical shift left with logical shift right (bit-extract pattern). */
11249+
if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT
11250+
&& GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
11251+
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11252+
&& REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
11253+
return true;
11254+
11255+
/* Fuse load-immediate with a dependent conditional branch. */
11256+
if (get_attr_type (prev) == TYPE_MOVE
11257+
&& get_attr_move_type (prev) == MOVE_TYPE_CONST
11258+
&& any_condjump_p (curr))
1119611259
{
1119711260
rtx comp = XEXP (SET_SRC (curr_set), 0);
1119811261

1119911262
return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set))
1120011263
|| (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set));
1120111264
}
1120211265

11266+
/* Do not fuse loads/stores before sched2. */
11267+
if (!reload_completed || sched_fusion)
11268+
return false;
11269+
11270+
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
11271+
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
11272+
1120311273
/* Don't handle anything with a jump past this point. */
1120411274
if (!simple_sets_p)
1120511275
return false;
@@ -11225,6 +11295,30 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1122511295
return true;
1122611296
}
1122711297

11298+
/* Look ahead 1 insn to make sure double loads/stores are always
11299+
fused together, even in the presence of other opportunities. */
11300+
if (next_insn (curr) && single_set (next_insn (curr))
11301+
&& get_attr_type (curr) == TYPE_LOAD
11302+
&& get_attr_type (next_insn (curr)) == TYPE_LOAD)
11303+
{
11304+
rtx addr0 = XEXP (SET_SRC (curr_set), 0);
11305+
rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
11306+
11307+
if (arcv_fused_addr_p (addr0, addr1))
11308+
return false;
11309+
}
11310+
11311+
if (next_insn (curr) && single_set (next_insn (curr))
11312+
&& get_attr_type (curr) == TYPE_STORE
11313+
&& get_attr_type (next_insn (curr)) == TYPE_STORE)
11314+
{
11315+
rtx addr0 = XEXP (SET_DEST (curr_set), 0);
11316+
rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
11317+
11318+
if (arcv_fused_addr_p (addr0, addr1))
11319+
return false;
11320+
}
11321+
1122811322
/* Fuse a pre- or post-update memory operation. */
1122911323
if (arcv_memop_arith_pair_p (prev, curr)
1123011324
|| arcv_memop_arith_pair_p (curr, prev))
@@ -11245,20 +11339,6 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1124511339
&& SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set)))))
1124611340
return true;
1124711341

11248-
if (GET_CODE (SET_SRC (prev_set)) == MULT
11249-
&& GET_CODE (SET_SRC (curr_set)) == PLUS
11250-
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11251-
&& (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))
11252-
|| REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1))))
11253-
return true;
11254-
11255-
/* Fuse logical shift left with logical shift right (bit-extract pattern). */
11256-
if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
11257-
&& GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
11258-
&& REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11259-
&& REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
11260-
return true;
11261-
1126211342
return false;
1126311343
}
1126411344

@@ -11980,17 +12060,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1198012060
we currently only perform the adjustment when -madjust-lmul-cost is given.
1198112061
*/
1198212062
static int
11983-
riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
11984-
unsigned int)
12063+
riscv_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
12064+
int cost, unsigned int)
1198512065
{
12066+
if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI
12067+
&& !SCHED_GROUP_P (insn))
12068+
return cost + 1;
12069+
1198612070
/* Only do adjustments for the generic out-of-order scheduling model. */
1198712071
if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
1198812072
return cost;
1198912073

11990-
if (recog_memoized (insn) < 0)
12074+
if (recog_memoized (dep_insn) < 0)
1199112075
return cost;
1199212076

11993-
enum attr_type type = get_attr_type (insn);
12077+
enum attr_type type = get_attr_type (dep_insn);
1199412078

1199512079
if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
1199612080
{
@@ -12041,6 +12125,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
1204112125
return new_cost;
1204212126
}
1204312127

12128+
1204412129
/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
1204512130
can be scheduled for speculative execution. Reject vsetvl instructions to
1204612131
prevent the scheduler from hoisting them out of basic blocks without
@@ -12062,6 +12147,149 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
1206212147
}
1206312148
}
1206412149

12150+
static void
12151+
riscv_sched_init (FILE *file ATTRIBUTE_UNUSED,
12152+
int verbose ATTRIBUTE_UNUSED,
12153+
int max_ready ATTRIBUTE_UNUSED)
12154+
{
12155+
last_scheduled_insn = 0;
12156+
}
12157+
12158+
static int
12159+
riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
12160+
int verbose ATTRIBUTE_UNUSED,
12161+
rtx_insn **ready,
12162+
int *n_readyp,
12163+
int clock ATTRIBUTE_UNUSED)
12164+
{
12165+
if (sched_fusion)
12166+
return cached_can_issue_more;
12167+
12168+
if (!cached_can_issue_more)
12169+
return 0;
12170+
12171+
/* Fuse double load/store instances missed by sched_fusion. */
12172+
if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0
12173+
&& !SCHED_GROUP_P (last_scheduled_insn)
12174+
&& (get_attr_type (last_scheduled_insn) == TYPE_LOAD
12175+
|| get_attr_type (last_scheduled_insn) == TYPE_STORE))
12176+
{
12177+
for (int i = 1; i <= *n_readyp; i++)
12178+
{
12179+
if (NONDEBUG_INSN_P (ready[*n_readyp - i])
12180+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
12181+
&& (!next_insn (ready[*n_readyp - i])
12182+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12183+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
12184+
&& arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
12185+
{
12186+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12187+
SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
12188+
pipeB_scheduled_p = 1;
12189+
return cached_can_issue_more;
12190+
}
12191+
}
12192+
pipeB_scheduled_p = 1;
12193+
}
12194+
12195+
/* Try to fuse a non-memory last_scheduled_insn. */
12196+
if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p)
12197+
&& last_scheduled_insn && ready && *n_readyp > 0
12198+
&& !SCHED_GROUP_P (last_scheduled_insn)
12199+
&& (get_attr_type (last_scheduled_insn) != TYPE_LOAD
12200+
&& get_attr_type (last_scheduled_insn) != TYPE_STORE))
12201+
{
12202+
for (int i = 1; i <= *n_readyp; i++)
12203+
{
12204+
if (NONDEBUG_INSN_P (ready[*n_readyp - i])
12205+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
12206+
&& (!next_insn (ready[*n_readyp - i])
12207+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12208+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
12209+
&& arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
12210+
{
12211+
if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
12212+
|| get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
12213+
if (pipeB_scheduled_p)
12214+
continue;
12215+
else
12216+
pipeB_scheduled_p = 1;
12217+
else if (!alu_pipe_scheduled_p)
12218+
alu_pipe_scheduled_p = 1;
12219+
else
12220+
pipeB_scheduled_p = 1;
12221+
12222+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12223+
SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
12224+
return cached_can_issue_more;
12225+
}
12226+
}
12227+
alu_pipe_scheduled_p = 1;
12228+
}
12229+
12230+
/* When pipe B is scheduled, we can have no more memops this cycle. */
12231+
if (pipeB_scheduled_p && *n_readyp > 0
12232+
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
12233+
&& recog_memoized (ready[*n_readyp - 1]) >= 0
12234+
&& !SCHED_GROUP_P (ready[*n_readyp - 1])
12235+
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
12236+
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
12237+
{
12238+
if (alu_pipe_scheduled_p)
12239+
return 0;
12240+
12241+
for (int i = 2; i <= *n_readyp; i++)
12242+
{
12243+
if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
12244+
&& recog_memoized (ready[*n_readyp - i]) >= 0
12245+
&& get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
12246+
&& get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
12247+
&& !SCHED_GROUP_P (ready[*n_readyp - i])
12248+
&& ((!next_insn (ready[*n_readyp - i])
12249+
|| !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12250+
|| !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))))
12251+
|| ((next_insn (ready[*n_readyp - i])
12252+
&& NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12253+
&& recog_memoized (next_insn (ready[*n_readyp - i])) >= 0
12254+
&& get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD
12255+
&& get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE)))
12256+
{
12257+
std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12258+
alu_pipe_scheduled_p = 1;
12259+
cached_can_issue_more = 1;
12260+
return 1;
12261+
}
12262+
}
12263+
return 0;
12264+
}
12265+
12266+
/* If all else fails, schedule a single instruction. */
12267+
if (ready && *n_readyp > 0
12268+
&& NONDEBUG_INSN_P (ready[*n_readyp - 1])
12269+
&& recog_memoized (ready[*n_readyp - 1]) >= 0
12270+
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
12271+
&& get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
12272+
{
12273+
if (!pipeB_scheduled_p
12274+
&& (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
12275+
|| get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
12276+
{
12277+
alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
12278+
cached_can_issue_more = 1;
12279+
return 1;
12280+
}
12281+
else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
12282+
|| get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
12283+
{
12284+
alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
12285+
cached_can_issue_more = 1;
12286+
return 1;
12287+
}
12288+
}
12289+
12290+
return cached_can_issue_more;
12291+
}
12292+
1206512293
/* Auxiliary function to emit RISC-V ELF attribute. */
1206612294
static void
1206712295
riscv_emit_attribute ()
@@ -16668,9 +16896,16 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
1666816896
#undef TARGET_SCHED_ADJUST_COST
1666916897
#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
1667016898

16899+
1667116900
#undef TARGET_SCHED_CAN_SPECULATE_INSN
1667216901
#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
1667316902

16903+
#undef TARGET_SCHED_REORDER2
16904+
#define TARGET_SCHED_REORDER2 riscv_sched_reorder2
16905+
16906+
#undef TARGET_SCHED_INIT
16907+
#define TARGET_SCHED_INIT riscv_sched_init
16908+
1667416909
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1667516910
#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
1667616911

0 commit comments

Comments
 (0)