@@ -340,6 +340,12 @@ unsigned riscv_stack_boundary;
340340/* Whether in riscv_output_mi_thunk. */
341341static bool riscv_in_thunk_func = false;
342342
343+ static int alu_pipe_scheduled_p;
344+ static int pipeB_scheduled_p;
345+
346+ static rtx_insn *last_scheduled_insn;
347+ static short cached_can_issue_more;
348+
343349/* If non-zero, this is an offset to be added to SP to redefine the CFA
344350 when restoring the FP register from the stack. Only valid when generating
345351 the epilogue. */
@@ -10299,6 +10305,21 @@ riscv_issue_rate (void)
1029910305static int
1030010306riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1030110307{
10308+ /* Beginning of cycle - reset variables. */
10309+ if (more == tune_param->issue_rate)
10310+ {
10311+ alu_pipe_scheduled_p = 0;
10312+ pipeB_scheduled_p = 0;
10313+ }
10314+
10315+ if (alu_pipe_scheduled_p && pipeB_scheduled_p)
10316+ {
10317+ cached_can_issue_more = 0;
10318+ return 0;
10319+ }
10320+
10321+ cached_can_issue_more = more;
10322+
1030210323 if (DEBUG_INSN_P (insn))
1030310324 return more;
1030410325
@@ -10319,6 +10340,28 @@ riscv_sched_variable_issue (FILE *, int, rtx_insn *insn, int more)
1031910340 an assert so we can find and fix this problem. */
1032010341 gcc_assert (insn_has_dfa_reservation_p (insn));
1032110342
10343+ if (next_insn (insn) && INSN_P (next_insn (insn))
10344+ && SCHED_GROUP_P (next_insn (insn)))
10345+ {
10346+ if (get_attr_type (insn) == TYPE_LOAD
10347+ || get_attr_type (insn) == TYPE_STORE
10348+ || get_attr_type (next_insn (insn)) == TYPE_LOAD
10349+ || get_attr_type (next_insn (insn)) == TYPE_STORE)
10350+ pipeB_scheduled_p = 1;
10351+ else
10352+ alu_pipe_scheduled_p = 1;
10353+ }
10354+
10355+ if (get_attr_type (insn) == TYPE_ALU_FUSED
10356+ || get_attr_type (insn) == TYPE_IMUL_FUSED)
10357+ {
10358+ alu_pipe_scheduled_p = 1;
10359+ more -= 1;
10360+ }
10361+
10362+ last_scheduled_insn = insn;
10363+ cached_can_issue_more = more - 1;
10364+
1032210365 return more - 1;
1032310366}
1032410367
@@ -10557,22 +10600,49 @@ arcv_memop_lui_pair_p (rtx_insn *prev, rtx_insn *curr)
1055710600static bool
1055810601arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1055910602{
10603+ /* Never create sched groups with more than 2 members. */
10604+ if (SCHED_GROUP_P (prev))
10605+ return false;
10606+
1056010607 rtx prev_set = single_set (prev);
1056110608 rtx curr_set = single_set (curr);
10562- /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10563- bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
1056410609
10565- /* Fuse load-immediate with a dependent conditional branch. */
10566- if (get_attr_type (prev) == TYPE_MOVE
10567- && get_attr_move_type (prev) == MOVE_TYPE_CONST
10568- && any_condjump_p (curr))
10610+ /* Fuse multiply-add pair. */
10611+ if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == MULT
10612+ && GET_CODE (SET_SRC (curr_set)) == PLUS
10613+ && (REG_P (XEXP (SET_SRC (curr_set), 0))
10614+ && REGNO (SET_DEST (prev_set)) ==
10615+ REGNO (XEXP (SET_SRC (curr_set), 0))
10616+ || (REG_P (XEXP (SET_SRC (curr_set), 1))
10617+ && REGNO (SET_DEST (prev_set)) ==
10618+ REGNO (XEXP (SET_SRC (curr_set), 1)))))
10619+ return true;
10620+
10621+ /* Fuse logical shift left with logical shift right (bit-extract pattern). */
10622+ if (prev_set && curr_set && GET_CODE (SET_SRC (prev_set)) == ASHIFT
10623+ && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
10624+ && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10625+ && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0)))
10626+ return true;
10627+
10628+ /* Fuse load-immediate with a dependent conditional branch. */
10629+ if (get_attr_type (prev) == TYPE_MOVE
10630+ && get_attr_move_type (prev) == MOVE_TYPE_CONST
10631+ && any_condjump_p (curr))
1056910632 {
1057010633 rtx comp = XEXP (SET_SRC (curr_set), 0);
1057110634
1057210635 return (REG_P (XEXP (comp, 0)) && XEXP (comp, 0) == SET_DEST (prev_set))
1057310636 || (REG_P (XEXP (comp, 1)) && XEXP (comp, 1) == SET_DEST (prev_set));
1057410637 }
1057510638
10639+ /* Do not fuse loads/stores before sched2. */
10640+ if (!reload_completed || sched_fusion)
10641+ return false;
10642+
10643+ /* prev and curr are simple SET insns i.e. no flag setting or branching. */
10644+ bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10645+
1057610646 /* Don't handle anything with a jump past this point. */
1057710647 if (!simple_sets_p)
1057810648 return false;
@@ -10598,6 +10668,30 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1059810668 return true;
1059910669 }
1060010670
10671+ /* Look ahead 1 insn to make sure double loads/stores are always
10672+ fused together, even in the presence of other opportunities. */
10673+ if (next_insn (curr) && single_set (next_insn (curr))
10674+ && get_attr_type (curr) == TYPE_LOAD
10675+ && get_attr_type (next_insn (curr)) == TYPE_LOAD)
10676+ {
10677+ rtx addr0 = XEXP (SET_SRC (curr_set), 0);
10678+ rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
10679+
10680+ if (arcv_fused_addr_p (addr0, addr1))
10681+ return false;
10682+ }
10683+
10684+ if (next_insn (curr) && single_set (next_insn (curr))
10685+ && get_attr_type (curr) == TYPE_STORE
10686+ && get_attr_type (next_insn (curr)) == TYPE_STORE)
10687+ {
10688+ rtx addr0 = XEXP (SET_DEST (curr_set), 0);
10689+ rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
10690+
10691+ if (arcv_fused_addr_p (addr0, addr1))
10692+ return false;
10693+ }
10694+
1060110695 /* Fuse a pre- or post-update memory operation. */
1060210696 if (arcv_memop_arith_pair_p (prev, curr)
1060310697 || arcv_memop_arith_pair_p (curr, prev))
@@ -10618,20 +10712,6 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1061810712 && SET_DEST (prev_set) == SUBREG_REG (SET_SRC (curr_set)))))
1061910713 return true;
1062010714
10621- if (GET_CODE (SET_SRC (prev_set)) == MULT
10622- && GET_CODE (SET_SRC (curr_set)) == PLUS
10623- && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10624- && (REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0 ))
10625- || REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 1 ))))
10626- return true ;
10627-
10628- /* Fuse logical shift left with logical shift right (bit-extract pattern). */
10629- if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
10630- && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
10631- && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
10632- && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (SET_SRC (curr_set), 0 )))
10633- return true ;
10634-
1063510715 return false;
1063610716}
1063710717
@@ -11353,17 +11433,21 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1135311433 we currently only perform the adjustment when -madjust-lmul-cost is given.
1135411434 */
1135511435static int
11356- riscv_sched_adjust_cost (rtx_insn *, int , rtx_insn *insn, int cost ,
11357- unsigned int )
11436+ riscv_sched_adjust_cost (rtx_insn *insn , int dep_type , rtx_insn *dep_insn ,
11437+ int cost, unsigned int)
1135811438{
11439+ if (riscv_is_micro_arch (arcv_rhx100) && dep_type == REG_DEP_ANTI
11440+ && !SCHED_GROUP_P (insn))
11441+ return cost + 1;
11442+
1135911443 /* Only do adjustments for the generic out-of-order scheduling model. */
1136011444 if (!TARGET_VECTOR || riscv_microarchitecture != generic_ooo)
1136111445 return cost;
1136211446
11363- if (recog_memoized (insn ) < 0 )
11447+ if (recog_memoized (dep_insn ) < 0)
1136411448 return cost;
1136511449
11366- enum attr_type type = get_attr_type (insn );
11450+ enum attr_type type = get_attr_type (dep_insn );
1136711451
1136811452 if (type == TYPE_VFREDO || type == TYPE_VFWREDO)
1136911453 {
@@ -11414,6 +11498,7 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
1141411498 return new_cost;
1141511499}
1141611500
11501+
1141711502/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook. Return true if insn can
1141811503 can be scheduled for speculative execution. Reject vsetvl instructions to
1141911504 prevent the scheduler from hoisting them out of basic blocks without
@@ -11435,6 +11520,149 @@ riscv_sched_can_speculate_insn (rtx_insn *insn)
1143511520 }
1143611521}
1143711522
11523+ static void
11524+ riscv_sched_init (FILE *file ATTRIBUTE_UNUSED,
11525+ int verbose ATTRIBUTE_UNUSED,
11526+ int max_ready ATTRIBUTE_UNUSED)
11527+ {
11528+ last_scheduled_insn = 0;
11529+ }
11530+
11531+ static int
11532+ riscv_sched_reorder2 (FILE *file ATTRIBUTE_UNUSED,
11533+ int verbose ATTRIBUTE_UNUSED,
11534+ rtx_insn **ready,
11535+ int *n_readyp,
11536+ int clock ATTRIBUTE_UNUSED)
11537+ {
11538+ if (sched_fusion)
11539+ return cached_can_issue_more;
11540+
11541+ if (!cached_can_issue_more)
11542+ return 0;
11543+
11544+ /* Fuse double load/store instances missed by sched_fusion. */
11545+ if (!pipeB_scheduled_p && last_scheduled_insn && ready && *n_readyp > 0
11546+ && !SCHED_GROUP_P (last_scheduled_insn)
11547+ && (get_attr_type (last_scheduled_insn) == TYPE_LOAD
11548+ || get_attr_type (last_scheduled_insn) == TYPE_STORE))
11549+ {
11550+ for (int i = 1; i <= *n_readyp; i++)
11551+ {
11552+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
11553+ && !SCHED_GROUP_P (ready[*n_readyp - i])
11554+ && (!next_insn (ready[*n_readyp - i])
11555+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11556+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
11557+ && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
11558+ {
11559+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11560+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
11561+ pipeB_scheduled_p = 1;
11562+ return cached_can_issue_more;
11563+ }
11564+ }
11565+ pipeB_scheduled_p = 1;
11566+ }
11567+
11568+ /* Try to fuse a non-memory last_scheduled_insn. */
11569+ if ((!alu_pipe_scheduled_p || !pipeB_scheduled_p)
11570+ && last_scheduled_insn && ready && *n_readyp > 0
11571+ && !SCHED_GROUP_P (last_scheduled_insn)
11572+ && (get_attr_type (last_scheduled_insn) != TYPE_LOAD
11573+ && get_attr_type (last_scheduled_insn) != TYPE_STORE))
11574+ {
11575+ for (int i = 1; i <= *n_readyp; i++)
11576+ {
11577+ if (NONDEBUG_INSN_P (ready[*n_readyp - i])
11578+ && !SCHED_GROUP_P (ready[*n_readyp - i])
11579+ && (!next_insn (ready[*n_readyp - i])
11580+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11581+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))
11582+ && arcv_macro_fusion_pair_p (last_scheduled_insn, ready[*n_readyp - i]))
11583+ {
11584+ if (get_attr_type (ready[*n_readyp - i]) == TYPE_LOAD
11585+ || get_attr_type (ready[*n_readyp - i]) == TYPE_STORE)
11586+ if (pipeB_scheduled_p)
11587+ continue;
11588+ else
11589+ pipeB_scheduled_p = 1;
11590+ else if (!alu_pipe_scheduled_p)
11591+ alu_pipe_scheduled_p = 1;
11592+ else
11593+ pipeB_scheduled_p = 1;
11594+
11595+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11596+ SCHED_GROUP_P (ready[*n_readyp - 1]) = 1;
11597+ return cached_can_issue_more;
11598+ }
11599+ }
11600+ alu_pipe_scheduled_p = 1;
11601+ }
11602+
11603+ /* When pipe B is scheduled, we can have no more memops this cycle. */
11604+ if (pipeB_scheduled_p && *n_readyp > 0
11605+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
11606+ && recog_memoized (ready[*n_readyp - 1]) >= 0
11607+ && !SCHED_GROUP_P (ready[*n_readyp - 1])
11608+ && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
11609+ || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
11610+ {
11611+ if (alu_pipe_scheduled_p)
11612+ return 0;
11613+
11614+ for (int i = 2; i <= *n_readyp; i++)
11615+ {
11616+ if ((NONDEBUG_INSN_P (ready[*n_readyp - i])
11617+ && recog_memoized (ready[*n_readyp - i]) >= 0
11618+ && get_attr_type (ready[*n_readyp - i]) != TYPE_LOAD
11619+ && get_attr_type (ready[*n_readyp - i]) != TYPE_STORE
11620+ && !SCHED_GROUP_P (ready[*n_readyp - i])
11621+ && ((!next_insn (ready[*n_readyp - i])
11622+ || !NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11623+ || !SCHED_GROUP_P (next_insn (ready[*n_readyp - i])))))
11624+ || ((next_insn (ready[*n_readyp - i])
11625+ && NONDEBUG_INSN_P (next_insn (ready[*n_readyp - i]))
11626+ && recog_memoized (next_insn (ready[*n_readyp - i])) >= 0
11627+ && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_LOAD
11628+ && get_attr_type (next_insn (ready[*n_readyp - i])) != TYPE_STORE)))
11629+ {
11630+ std::swap (ready[*n_readyp - 1], ready[*n_readyp - i]);
11631+ alu_pipe_scheduled_p = 1;
11632+ cached_can_issue_more = 1;
11633+ return 1;
11634+ }
11635+ }
11636+ return 0;
11637+ }
11638+
11639+ /* If all else fails, schedule a single instruction. */
11640+ if (ready && *n_readyp > 0
11641+ && NONDEBUG_INSN_P (ready[*n_readyp - 1])
11642+ && recog_memoized (ready[*n_readyp - 1]) >= 0
11643+ && get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
11644+ && get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
11645+ {
11646+ if (!pipeB_scheduled_p
11647+ && (get_attr_type (ready[*n_readyp - 1]) == TYPE_LOAD
11648+ || get_attr_type (ready[*n_readyp - 1]) == TYPE_STORE))
11649+ {
11650+ alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
11651+ cached_can_issue_more = 1;
11652+ return 1;
11653+ }
11654+ else if (get_attr_type (ready[*n_readyp - 1]) != TYPE_LOAD
11655+ || get_attr_type (ready[*n_readyp - 1]) != TYPE_STORE)
11656+ {
11657+ alu_pipe_scheduled_p = pipeB_scheduled_p = 1;
11658+ cached_can_issue_more = 1;
11659+ return 1;
11660+ }
11661+ }
11662+
11663+ return cached_can_issue_more;
11664+ }
11665+
1143811666/* Auxiliary function to emit RISC-V ELF attribute. */
1143911667static void
1144011668riscv_emit_attribute ()
@@ -16020,9 +16248,16 @@ riscv_prefetch_offset_address_p (rtx x, machine_mode mode)
1602016248#undef TARGET_SCHED_ADJUST_COST
1602116249#define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
1602216250
16251+
1602316252#undef TARGET_SCHED_CAN_SPECULATE_INSN
1602416253#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
1602516254
16255+ #undef TARGET_SCHED_REORDER2
16256+ #define TARGET_SCHED_REORDER2 riscv_sched_reorder2
16257+
16258+ #undef TARGET_SCHED_INIT
16259+ #define TARGET_SCHED_INIT riscv_sched_init
16260+
1602616261#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1602716262#define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
1602816263
0 commit comments