@@ -340,6 +340,12 @@ unsigned riscv_stack_boundary;
340340/* Whether in riscv_output_mi_thunk. */
341341static bool riscv_in_thunk_func = false;
342342
343+ static int alu_pipe_scheduled_p;
344+ static int pipeB_scheduled_p;
345+
346+ static rtx_insn *last_scheduled_insn;
347+ static short cached_can_issue_more;
348+
343349/* If non-zero, this is an offset to be added to SP to redefine the CFA
344350 when restoring the FP register from the stack. Only valid when generating
345351 the epilogue. */
@@ -10847,6 +10853,21 @@ riscv_sched_init (FILE *, int, int)
1084710853static int
1084810854riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1084910855{
10856+ /* Beginning of cycle - reset variables. */
10857+ if (more == tune_param->issue_rate)
10858+ {
10859+ alu_pipe_scheduled_p = 0;
10860+ pipeB_scheduled_p = 0;
10861+ }
10862+
10863+ if (alu_pipe_scheduled_p && pipeB_scheduled_p)
10864+ {
10865+ cached_can_issue_more = 0;
10866+ return 0;
10867+ }
10868+
10869+ cached_can_issue_more = more;
10870+
1085010871 if (DEBUG_INSN_P (insn))
1085110872 return more;
1085210873
@@ -10892,6 +10913,28 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1089210913 }
1089310914 }
1089410915
10916+ if (next_insn (insn) && INSN_P (next_insn (insn))
10917+ && SCHED_GROUP_P (next_insn (insn)))
10918+ {
10919+ if (get_attr_type (insn) == TYPE_LOAD
10920+ || get_attr_type (insn) == TYPE_STORE
10921+ || get_attr_type (next_insn (insn)) == TYPE_LOAD
10922+ || get_attr_type (next_insn (insn)) == TYPE_STORE)
10923+ pipeB_scheduled_p = 1;
10924+ else
10925+ alu_pipe_scheduled_p = 1;
10926+ }
10927+
10928+ if (get_attr_type (insn) == TYPE_ALU_FUSED
10929+ || get_attr_type (insn) == TYPE_IMUL_FUSED)
10930+ {
10931+ alu_pipe_scheduled_p = 1;
10932+ more -= 1;
10933+ }
10934+
10935+ last_scheduled_insn = insn;
10936+ cached_can_issue_more = more - 1;
10937+
1089510938 return more - 1;
1089610939}
1089710940
@@ -11184,22 +11227,49 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr)
1118411227static bool
1118511228arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1118611229{
11230+ /* Never create sched groups with more than 2 members. */
11231+ if (SCHED_GROUP_P (prev))
11232+ return false;
11233+
1118711234 rtx prev_set = single_set (prev);
1118811235 rtx curr_set = single_set (curr);
11189- /* prev and curr are simple SET insns i.e. no flag setting or branching. */
11190- bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
1119111236
11192- /* Fuse load-immediate with a dependent conditional branch. */
11193- if (get_attr_type (prev) == TYPE_MOVE
11194- && get_attr_move_type (prev) == MOVE_TYPE_CONST
11195- && any_condjump_p (curr))
11237+ /* Fuse multiply-add pair. */
11238+ if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT
11239+ && GET_CODE (SET_SRC (curr_set)) == PLUS
11240+ && (REG_P (XEXP (SET_SRC (curr_set), 0))
11241+ && REGNO (SET_DEST (prev_set)) ==
11242+ REGNO (XEXP (SET_SRC (curr_set), 0))
11243+ || (REG_P (XEXP (SET_SRC (curr_set), 1))
11244+ && REGNO (SET_DEST (prev_set)) ==
11245+ REGNO (XEXP (SET_SRC (curr_set), 1)))))
11246+ return true;
11247+
11248+ /* Fuse logical shift left with logical shift right (bit-extract pattern). */
11249+ if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT
11250+ && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
11251+ && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11252+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
11253+ return true;
11254+
11255+ /* Fuse load-immediate with a dependent conditional branch. */
11256+ if (get_attr_type (prev) == TYPE_MOVE
11257+ && get_attr_move_type (prev) == MOVE_TYPE_CONST
11258+ && any_condjump_p (curr))
1119611259 {
1119711260 rtx comp = XEXP (SET_SRC (curr_set), 0);
1119811261
1119911262 return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set))
1120011263 || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set));
1120111264 }
1120211265
11266+ /* Do not fuse loads/stores before sched2. */
11267+ if (!reload_completed || sched_fusion)
11268+ return false;
11269+
11270+ /* prev and curr are simple SET insns i.e. no flag setting or branching. */
11271+ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
11272+
1120311273 /* Don't handle anything with a jump past this point. */
1120411274 if (!simple_sets_p)
1120511275 return false;
@@ -11225,6 +11295,30 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1122511295 return true;
1122611296 }
1122711297
11298+ /* Look ahead 1 insn to make sure double loads/stores are always
11299+ fused together, even in the presence of other opportunities. */
11300+ if (next_insn (curr) && single_set (next_insn (curr))
11301+ && get_attr_type (curr) == TYPE_LOAD
11302+ && get_attr_type (next_insn (curr)) == TYPE_LOAD)
11303+ {
11304+ rtx addr0 = XEXP (SET_SRC (curr_set), 0);
11305+ rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
11306+
11307+ if (arcv_fused_addr_p (addr0, addr1))
11308+ return false;
11309+ }
11310+
11311+ if (next_insn (curr) && single_set (next_insn (curr))
11312+ && get_attr_type (curr) == TYPE_STORE
11313+ && get_attr_type (next_insn (curr)) == TYPE_STORE)
11314+ {
11315+ rtx addr0 = XEXP (SET_DEST (curr_set), 0);
11316+ rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
11317+
11318+ if (arcv_fused_addr_p (addr0, addr1))
11319+ return false;
11320+ }
11321+
1122811322 /* Fuse a pre- or post-update memory operation. */
1122911323 if (arcv_memop_arith_pair_p (prev, curr)
1123011324 || arcv_memop_arith_pair_p (curr, prev))
@@ -11245,20 +11339,6 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1124511339 && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set)))))
1124611340 return true;
1124711341
11248- if (GET_CODE (SET_SRC (prev_set)) == MULT
11249- && GET_CODE (SET_SRC (curr_set)) == PLUS
11250- && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11251- && (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0))
11252- || REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1))))
11253- return true;
11254-
11255- /* Fuse logical shift left with logical shift right (bit-extract pattern). */
11256- if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
11257- && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
11258- && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
11259- && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
11260- return true;
11261-
1126211342 return false;
1126311343}
1126411344
@@ -11980,17 +12060,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1198012060 we currently only perform the adjustment when -madjust-lmul-cost is given.
1198112061 */
1198212062static int
11983- riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost ,
11984- unsigned int)
12063+ riscv_sched_adjust_cost (rtx_insn *insn , int dep_type , rtx_insn *dep_insn ,
12064+ int cost, unsigned int)
1198512065{
12066+ if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI
12067+ && !SCHED_GROUP_P (insn))
12068+ return cost + 1;
12069+
1198612070 /* Only do adjustments for the generic out-of-order scheduling model. */
1198712071 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
1198812072 return cost;
1198912073
11990- if (recog_memoized (insn ) < 0)
12074+ if (recog_memoized (dep_insn ) < 0)
1199112075 return cost;
1199212076
11993- enum attr_type type = get_attr_type (insn );
12077+ enum attr_type type = get_attr_type (dep_insn );
1199412078
1199512079 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
1199612080 {
@@ -12041,6 +12125,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
1204112125 return new_cost;
1204212126}
1204312127
12128+
1204412129/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
1204512130 can be scheduled for speculative execution. Reject vsetvl instructions to
1204612131 prevent the scheduler from hoisting them out of basic blocks without
@@ -12062,6 +12147,149 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
1206212147 }
1206312148}
1206412149
12150+ static void
12151+ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED,
12152+ int verbose ATTRIBUTE_UNUSED,
12153+ int max_ready ATTRIBUTE_UNUSED)
12154+ {
12155+ last_scheduled_insn = 0;
12156+ }
12157+
12158+ static int
12159+ riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
12160+ int verbose ATTRIBUTE_UNUSED,
12161+ rtx_insn **ready,
12162+ int *n_readyp,
12163+ int clock ATTRIBUTE_UNUSED)
12164+ {
12165+ if (sched_fusion)
12166+ return cached_can_issue_more;
12167+
12168+ if (!cached_can_issue_more)
12169+ return 0;
12170+
12171+ /* Fuse double load/store instances missed by sched_fusion. */
12172+ if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0
12173+ && !SCHED_GROUP_P (last_scheduled_insn)
12174+ && (get_attr_type (last_scheduled_insn) == TYPE_LOAD
12175+ || get_attr_type (last_scheduled_insn) == TYPE_STORE))
12176+ {
12177+ for (int i = 1; i <= *n_readyp; i++)
12178+ {
12179+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
12180+ && !SCHED_GROUP_P (ready[*n_readyp - i])
12181+ && (!next_insn (ready[*n_readyp - i])
12182+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12183+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
12184+ && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
12185+ {
12186+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12187+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
12188+ pipeB_scheduled_p = 1;
12189+ return cached_can_issue_more;
12190+ }
12191+ }
12192+ pipeB_scheduled_p = 1;
12193+ }
12194+
12195+ /* Try to fuse a non-memory last_scheduled_insn. */
12196+ if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p)
12197+ && last_scheduled_insn && ready && *n_readyp > 0
12198+ && !SCHED_GROUP_P (last_scheduled_insn)
12199+ && (get_attr_type (last_scheduled_insn) != TYPE_LOAD
12200+ && get_attr_type (last_scheduled_insn) != TYPE_STORE))
12201+ {
12202+ for (int i = 1; i <= *n_readyp; i++)
12203+ {
12204+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
12205+ && !SCHED_GROUP_P (ready[*n_readyp - i])
12206+ && (!next_insn (ready[*n_readyp - i])
12207+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12208+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
12209+ && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
12210+ {
12211+ if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
12212+ || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
12213+ if (pipeB_scheduled_p)
12214+ continue;
12215+ else
12216+ pipeB_scheduled_p = 1;
12217+ else if (!alu_pipe_scheduled_p)
12218+ alu_pipe_scheduled_p = 1;
12219+ else
12220+ pipeB_scheduled_p = 1;
12221+
12222+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12223+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
12224+ return cached_can_issue_more;
12225+ }
12226+ }
12227+ alu_pipe_scheduled_p = 1;
12228+ }
12229+
12230+ /* When pipe B is scheduled, we can have no more memops this cycle. */
12231+ if (pipeB_scheduled_p && *n_readyp > 0
12232+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
12233+ && recog_memoized (ready[*n_readyp - 1]) >= 0
12234+ && !SCHED_GROUP_P (ready[*n_readyp - 1])
12235+ && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
12236+ || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
12237+ {
12238+ if (alu_pipe_scheduled_p)
12239+ return 0;
12240+
12241+ for (int i = 2; i <= *n_readyp; i++)
12242+ {
12243+ if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
12244+ && recog_memoized (ready[*n_readyp - i]) >= 0
12245+ && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
12246+ && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
12247+ && !SCHED_GROUP_P (ready[*n_readyp - i])
12248+ && ((!next_insn (ready[*n_readyp - i])
12249+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12250+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))))
12251+ || ((next_insn (ready[*n_readyp - i])
12252+ && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
12253+ && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0
12254+ && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD
12255+ && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE)))
12256+ {
12257+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
12258+ alu_pipe_scheduled_p = 1;
12259+ cached_can_issue_more = 1;
12260+ return 1;
12261+ }
12262+ }
12263+ return 0;
12264+ }
12265+
12266+ /* If all else fails, schedule a single instruction. */
12267+ if (ready && *n_readyp > 0
12268+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
12269+ && recog_memoized (ready[*n_readyp - 1]) >= 0
12270+ && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
12271+ && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
12272+ {
12273+ if (!pipeB_scheduled_p
12274+ && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
12275+ || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
12276+ {
12277+ alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
12278+ cached_can_issue_more = 1;
12279+ return 1;
12280+ }
12281+ else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
12282+ || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
12283+ {
12284+ alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
12285+ cached_can_issue_more = 1;
12286+ return 1;
12287+ }
12288+ }
12289+
12290+ return cached_can_issue_more;
12291+ }
12292+
1206512293/* Auxiliary function to emit RISC-V ELF attribute. */
1206612294static void
1206712295riscv_emit_attribute ()
@@ -16668,9 +16896,16 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
1666816896#undef TARGET_SCHED_ADJUST_COST
1666916897#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
1667016898
16899+
1667116900#undef TARGET_SCHED_CAN_SPECULATE_INSN
1667216901#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
1667316902
16903+ #undef TARGET_SCHED_REORDER2
16904+ #define TARGET_SCHED_REORDER2 riscv_sched_reorder2
16905+
16906+ #undef TARGET_SCHED_INIT
16907+ #define TARGET_SCHED_INIT riscv_sched_init
16908+
1667416909#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1667516910#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
1667616911
0 commit comments