Skip to content

Commit 285dc3d

Browse files
artemiy-volkovMichielDerhaeg
authored andcommitted
arcv: fuse LH+LH and LB+LB instruction pairs
In addition to the LW+LW and SW+SW pairs that are already being recognized as macro-op-fusable, add support for 8-bit and naturally aligned 16-bit loads operating on adjacent memory locations. To that end, introduce the new microarch-specific pair_fusion_mode_allowed_p () predicate, and call it from fusion_load_store () during sched_fusion, and from arcv_macro_fusion_pair_p () during regular scheduling passes. Signed-off-by: Artemiy Volkov <artemiy@synopsys.com>
1 parent 3f5ca7b commit 285dc3d

File tree

1 file changed

+80
-35
lines changed

1 file changed

+80
-35
lines changed

gcc/config/riscv/riscv.cc

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10457,37 +10457,73 @@ riscv_set_is_shNadduw (rtx set)
1045710457
&& REG_P (SET_DEST (set)));
1045810458
}
1045910459

10460+
/* Return TRUE if the target microarchitecture supports macro-op
10461+
fusion for two memory operations of mode MODE (the direction
10462+
of transfer is determined by the IS_LOAD parameter). */
10463+
10464+
static bool
10465+
pair_fusion_mode_allowed_p (machine_mode mode, bool is_load)
10466+
{
10467+
if (!riscv_is_micro_arch (arcv_rhx100))
10468+
return true;
10469+
10470+
return ((is_load && (mode == SImode
10471+
|| mode == HImode
10472+
|| mode == QImode))
10473+
|| (!is_load && mode == SImode));
10474+
}
10475+
1046010476
/* Return TRUE if two addresses can be fused. */
1046110477

1046210478
static bool
10463-
arcv_fused_addr_p (rtx addr0, rtx addr1)
10479+
arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load)
1046410480
{
1046510481
rtx base0, base1, tmp;
1046610482
HOST_WIDE_INT off0 = 0, off1 = 0;
1046710483

10468-
if (GET_CODE (addr0) == PLUS)
10484+
if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND)
10485+
addr0 = XEXP (addr0, 0);
10486+
10487+
if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND)
10488+
addr1 = XEXP (addr1, 0);
10489+
10490+
if (!MEM_P (addr0) || !MEM_P (addr1))
10491+
return false;
10492+
10493+
/* Require the accesses to have the same mode. */
10494+
if (GET_MODE (addr0) != GET_MODE (addr1))
10495+
return false;
10496+
10497+
/* Check if the mode is allowed. */
10498+
if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load))
10499+
return false;
10500+
10501+
rtx reg0 = XEXP (addr0, 0);
10502+
rtx reg1 = XEXP (addr1, 0);
10503+
10504+
if (GET_CODE (reg0) == PLUS)
1046910505
{
10470-
base0 = XEXP (addr0, 0);
10471-
tmp = XEXP (addr0, 1);
10506+
base0 = XEXP (reg0, 0);
10507+
tmp = XEXP (reg0, 1);
1047210508
if (!CONST_INT_P (tmp))
1047310509
return false;
1047410510
off0 = INTVAL (tmp);
1047510511
}
10476-
else if (REG_P (addr0))
10477-
base0 = addr0;
10512+
else if (REG_P (reg0))
10513+
base0 = reg0;
1047810514
else
1047910515
return false;
1048010516

10481-
if (GET_CODE (addr1) == PLUS)
10517+
if (GET_CODE (reg1) == PLUS)
1048210518
{
10483-
base1 = XEXP (addr1, 0);
10484-
tmp = XEXP (addr1, 1);
10519+
base1 = XEXP (reg1, 0);
10520+
tmp = XEXP (reg1, 1);
1048510521
if (!CONST_INT_P (tmp))
1048610522
return false;
1048710523
off1 = INTVAL (tmp);
1048810524
}
10489-
else if (REG_P (addr1))
10490-
base1 = addr1;
10525+
else if (REG_P (reg1))
10526+
base1 = reg1;
1049110527
else
1049210528
return false;
1049310529

@@ -10496,9 +10532,9 @@ arcv_fused_addr_p (rtx addr0, rtx addr1)
1049610532
if (REGNO (base0) != REGNO (base1))
1049710533
return false;
1049810534

10499-
/* Offsets have to be aligned to word boundary and adjacent in memory,
10500-
but the memory operations can be narrower. */
10501-
if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD))
10535+
/* Fuse adjacent aligned addresses. */
10536+
if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0)
10537+
&& (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ()))
1050210538
return true;
1050310539

1050410540
return false;
@@ -10651,20 +10687,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1065110687
if (get_attr_type (prev) == TYPE_LOAD
1065210688
&& get_attr_type (curr) == TYPE_LOAD)
1065310689
{
10654-
rtx addr0 = XEXP (SET_SRC (prev_set), 0);
10655-
rtx addr1 = XEXP (SET_SRC (curr_set), 0);
10656-
10657-
if (arcv_fused_addr_p (addr0, addr1))
10690+
if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true))
1065810691
return true;
1065910692
}
1066010693

1066110694
if (get_attr_type (prev) == TYPE_STORE
1066210695
&& get_attr_type (curr) == TYPE_STORE)
1066310696
{
10664-
rtx addr0 = XEXP (SET_DEST (prev_set), 0);
10665-
rtx addr1 = XEXP (SET_DEST (curr_set), 0);
10666-
10667-
if (arcv_fused_addr_p (addr0, addr1))
10697+
if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false))
1066810698
return true;
1066910699
}
1067010700

@@ -10674,21 +10704,19 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1067410704
&& get_attr_type (curr) == TYPE_LOAD
1067510705
&& get_attr_type (next_insn (curr)) == TYPE_LOAD)
1067610706
{
10677-
rtx addr0 = XEXP (SET_SRC (curr_set), 0);
10678-
rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
10679-
10680-
if (arcv_fused_addr_p (addr0, addr1))
10707+
if (arcv_fused_addr_p (SET_SRC (curr_set),
10708+
SET_SRC (single_set (next_insn (curr))),
10709+
true))
1068110710
return false;
1068210711
}
1068310712

1068410713
if (next_insn (curr) && single_set (next_insn (curr))
1068510714
&& get_attr_type (curr) == TYPE_STORE
1068610715
&& get_attr_type (next_insn (curr)) == TYPE_STORE)
1068710716
{
10688-
rtx addr0 = XEXP (SET_DEST (curr_set), 0);
10689-
rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
10690-
10691-
if (arcv_fused_addr_p (addr0, addr1))
10717+
if (arcv_fused_addr_p (SET_DEST (curr_set),
10718+
SET_DEST (single_set (next_insn (curr))),
10719+
false))
1069210720
return false;
1069310721
}
1069410722

@@ -11359,7 +11387,8 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1135911387
otherwise return FALSE. */
1136011388

1136111389
static bool
11362-
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
11390+
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode,
11391+
bool *is_load)
1136311392
{
1136411393
rtx x, dest, src;
1136511394

@@ -11370,15 +11399,22 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
1137011399

1137111400
src = SET_SRC (x);
1137211401
dest = SET_DEST (x);
11402+
11403+
if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
11404+
&& MEM_P (XEXP (src, 0)))
11405+
src = XEXP (src, 0);
11406+
1137311407
if (REG_P (src) && MEM_P (dest))
1137411408
{
1137511409
*is_load = false;
11376-
extract_base_offset_in_addr (dest, base, offset);
11410+
if (extract_base_offset_in_addr (dest, base, offset))
11411+
*mode = GET_MODE (dest);
1137711412
}
1137811413
else if (MEM_P (src) && REG_P (dest))
1137911414
{
1138011415
*is_load = true;
11381-
extract_base_offset_in_addr (src, base, offset);
11416+
if (extract_base_offset_in_addr (src, base, offset))
11417+
*mode = GET_MODE (src);
1138211418
}
1138311419
else
1138411420
return false;
@@ -11393,11 +11429,13 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1139311429
int tmp, off_val;
1139411430
bool is_load;
1139511431
rtx base, offset;
11432+
machine_mode mode = SImode;
1139611433

1139711434
gcc_assert (INSN_P (insn));
1139811435

1139911436
tmp = max_pri - 1;
11400-
if (!fusion_load_store (insn, &base, &offset, &is_load))
11437+
if (!fusion_load_store (insn, &base, &offset, &mode, &is_load)
11438+
|| !pair_fusion_mode_allowed_p (mode, is_load))
1140111439
{
1140211440
*pri = tmp;
1140311441
*fusion_pri = tmp;
@@ -11406,6 +11444,11 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1140611444

1140711445
tmp /= 2;
1140811446

11447+
if (mode == HImode)
11448+
tmp /= 2;
11449+
else if (mode == QImode)
11450+
tmp /= 4;
11451+
1140911452
/* INSN with smaller base register goes first. */
1141011453
tmp -= ((REGNO (base) & 0xff) << 20);
1141111454

@@ -11414,7 +11457,9 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1141411457

1141511458
/* Put loads/stores operating on adjacent words into the same
1141611459
* scheduling group. */
11417-
*fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load;
11460+
*fusion_pri = tmp
11461+
- ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1)
11462+
+ is_load;
1141811463

1141911464
if (off_val >= 0)
1142011465
tmp -= (off_val & 0xfffff);

0 commit comments

Comments
 (0)