Skip to content

Commit 12c7874

Browse files
artemiy-volkovMichielDerhaeg
authored andcommitted
arcv: fuse LH+LH and LB+LB instruction pairs
In addition to the LW+LW and SW+SW pairs that are already being recognized as macro-op-fusable, add support for 8-bit and naturally aligned 16-bit loads operating on adjacent memory locations. To that end, introduce the new microarch-specific pair_fusion_mode_allowed_p () predicate, and call it from fusion_load_store () during sched_fusion, and from arcv_macro_fusion_pair_p () during regular scheduling passes. Signed-off-by: Artemiy Volkov <artemiy@synopsys.com>
1 parent 9e7d3cc commit 12c7874

File tree

1 file changed

+80
-35
lines changed

1 file changed

+80
-35
lines changed

gcc/config/riscv/riscv.cc

Lines changed: 80 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -11084,37 +11084,73 @@ riscv_set_is_shNadduw (rtx set)
1108411084
&& REG_P (SET_DEST (set)));
1108511085
}
1108611086

11087+
/* Return TRUE if the target microarchitecture supports macro-op
11088+
fusion for two memory operations of mode MODE (the direction
11089+
of transfer is determined by the IS_LOAD parameter). */
11090+
11091+
static bool
11092+
pair_fusion_mode_allowed_p (machine_mode mode, bool is_load)
11093+
{
11094+
if (!riscv_is_micro_arch (arcv_rhx100))
11095+
return true;
11096+
11097+
return ((is_load && (mode == SImode
11098+
|| mode == HImode
11099+
|| mode == QImode))
11100+
|| (!is_load && mode == SImode));
11101+
}
11102+
1108711103
/* Return TRUE if two addresses can be fused. */
1108811104

1108911105
static bool
11090-
arcv_fused_addr_p (rtx addr0, rtx addr1)
11106+
arcv_fused_addr_p (rtx addr0, rtx addr1, bool is_load)
1109111107
{
1109211108
rtx base0, base1, tmp;
1109311109
HOST_WIDE_INT off0 = 0, off1 = 0;
1109411110

11095-
if (GET_CODE (addr0) == PLUS)
11111+
if (GET_CODE (addr0) == SIGN_EXTEND || GET_CODE (addr0) == ZERO_EXTEND)
11112+
addr0 = XEXP (addr0, 0);
11113+
11114+
if (GET_CODE (addr1) == SIGN_EXTEND || GET_CODE (addr1) == ZERO_EXTEND)
11115+
addr1 = XEXP (addr1, 0);
11116+
11117+
if (!MEM_P (addr0) || !MEM_P (addr1))
11118+
return false;
11119+
11120+
/* Require the accesses to have the same mode. */
11121+
if (GET_MODE (addr0) != GET_MODE (addr1))
11122+
return false;
11123+
11124+
/* Check if the mode is allowed. */
11125+
if (!pair_fusion_mode_allowed_p (GET_MODE (addr0), is_load))
11126+
return false;
11127+
11128+
rtx reg0 = XEXP (addr0, 0);
11129+
rtx reg1 = XEXP (addr1, 0);
11130+
11131+
if (GET_CODE (reg0) == PLUS)
1109611132
{
11097-
base0 = XEXP (addr0, 0);
11098-
tmp = XEXP (addr0, 1);
11133+
base0 = XEXP (reg0, 0);
11134+
tmp = XEXP (reg0, 1);
1109911135
if (!CONST_INT_P (tmp))
1110011136
return false;
1110111137
off0 = INTVAL (tmp);
1110211138
}
11103-
else if (REG_P (addr0))
11104-
base0 = addr0;
11139+
else if (REG_P (reg0))
11140+
base0 = reg0;
1110511141
else
1110611142
return false;
1110711143

11108-
if (GET_CODE (addr1) == PLUS)
11144+
if (GET_CODE (reg1) == PLUS)
1110911145
{
11110-
base1 = XEXP (addr1, 0);
11111-
tmp = XEXP (addr1, 1);
11146+
base1 = XEXP (reg1, 0);
11147+
tmp = XEXP (reg1, 1);
1111211148
if (!CONST_INT_P (tmp))
1111311149
return false;
1111411150
off1 = INTVAL (tmp);
1111511151
}
11116-
else if (REG_P (addr1))
11117-
base1 = addr1;
11152+
else if (REG_P (reg1))
11153+
base1 = reg1;
1111811154
else
1111911155
return false;
1112011156

@@ -11123,9 +11159,9 @@ arcv_fused_addr_p (rtx addr0, rtx addr1)
1112311159
if (REGNO (base0) != REGNO (base1))
1112411160
return false;
1112511161

11126-
/* Offsets have to be aligned to word boundary and adjacent in memory,
11127-
but the memory operations can be narrower. */
11128-
if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD))
11162+
/* Fuse adjacent aligned addresses. */
11163+
if ((off0 % GET_MODE_SIZE (GET_MODE (addr0)).to_constant () == 0)
11164+
&& (abs (off1 - off0) == GET_MODE_SIZE (GET_MODE (addr0)).to_constant ()))
1112911165
return true;
1113011166

1113111167
return false;
@@ -11278,20 +11314,14 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1127811314
if (get_attr_type (prev) == TYPE_LOAD
1127911315
&& get_attr_type (curr) == TYPE_LOAD)
1128011316
{
11281-
rtx addr0 = XEXP (SET_SRC (prev_set), 0);
11282-
rtx addr1 = XEXP (SET_SRC (curr_set), 0);
11283-
11284-
if (arcv_fused_addr_p (addr0, addr1))
11317+
if (arcv_fused_addr_p (SET_SRC (prev_set), SET_SRC (curr_set), true))
1128511318
return true;
1128611319
}
1128711320

1128811321
if (get_attr_type (prev) == TYPE_STORE
1128911322
&& get_attr_type (curr) == TYPE_STORE)
1129011323
{
11291-
rtx addr0 = XEXP (SET_DEST (prev_set), 0);
11292-
rtx addr1 = XEXP (SET_DEST (curr_set), 0);
11293-
11294-
if (arcv_fused_addr_p (addr0, addr1))
11324+
if (arcv_fused_addr_p (SET_DEST (prev_set), SET_DEST (curr_set), false))
1129511325
return true;
1129611326
}
1129711327

@@ -11301,21 +11331,19 @@ arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1130111331
&& get_attr_type (curr) == TYPE_LOAD
1130211332
&& get_attr_type (next_insn (curr)) == TYPE_LOAD)
1130311333
{
11304-
rtx addr0 = XEXP (SET_SRC (curr_set), 0);
11305-
rtx addr1 = XEXP (SET_SRC (single_set (next_insn (curr))), 0);
11306-
11307-
if (arcv_fused_addr_p (addr0, addr1))
11334+
if (arcv_fused_addr_p (SET_SRC (curr_set),
11335+
SET_SRC (single_set (next_insn (curr))),
11336+
true))
1130811337
return false;
1130911338
}
1131011339

1131111340
if (next_insn (curr) && single_set (next_insn (curr))
1131211341
&& get_attr_type (curr) == TYPE_STORE
1131311342
&& get_attr_type (next_insn (curr)) == TYPE_STORE)
1131411343
{
11315-
rtx addr0 = XEXP (SET_DEST (curr_set), 0);
11316-
rtx addr1 = XEXP (SET_DEST (single_set (next_insn (curr))), 0);
11317-
11318-
if (arcv_fused_addr_p (addr0, addr1))
11344+
if (arcv_fused_addr_p (SET_DEST (curr_set),
11345+
SET_DEST (single_set (next_insn (curr))),
11346+
false))
1131911347
return false;
1132011348
}
1132111349

@@ -11986,7 +12014,8 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1198612014
otherwise return FALSE. */
1198712015

1198812016
static bool
11989-
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
12017+
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, machine_mode *mode,
12018+
bool *is_load)
1199012019
{
1199112020
rtx x, dest, src;
1199212021

@@ -11997,15 +12026,22 @@ fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
1199712026

1199812027
src = SET_SRC (x);
1199912028
dest = SET_DEST (x);
12029+
12030+
if ((GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
12031+
&& MEM_P (XEXP (src, 0)))
12032+
src = XEXP (src, 0);
12033+
1200012034
if (REG_P (src) && MEM_P (dest))
1200112035
{
1200212036
*is_load = false;
12003-
extract_base_offset_in_addr (dest, base, offset);
12037+
if (extract_base_offset_in_addr (dest, base, offset))
12038+
*mode = GET_MODE (dest);
1200412039
}
1200512040
else if (MEM_P (src) && REG_P (dest))
1200612041
{
1200712042
*is_load = true;
12008-
extract_base_offset_in_addr (src, base, offset);
12043+
if (extract_base_offset_in_addr (src, base, offset))
12044+
*mode = GET_MODE (src);
1200912045
}
1201012046
else
1201112047
return false;
@@ -12020,11 +12056,13 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1202012056
int tmp, off_val;
1202112057
bool is_load;
1202212058
rtx base, offset;
12059+
machine_mode mode = SImode;
1202312060

1202412061
gcc_assert (INSN_P (insn));
1202512062

1202612063
tmp = max_pri - 1;
12027-
if (!fusion_load_store (insn, &base, &offset, &is_load))
12064+
if (!fusion_load_store (insn, &base, &offset, &mode, &is_load)
12065+
|| !pair_fusion_mode_allowed_p (mode, is_load))
1202812066
{
1202912067
*pri = tmp;
1203012068
*fusion_pri = tmp;
@@ -12033,6 +12071,11 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1203312071

1203412072
tmp /= 2;
1203512073

12074+
if (mode == HImode)
12075+
tmp /= 2;
12076+
else if (mode == QImode)
12077+
tmp /= 4;
12078+
1203612079
/* INSN with smaller base register goes first. */
1203712080
tmp -= ((REGNO (base) & 0xff) << 20);
1203812081

@@ -12041,7 +12084,9 @@ riscv_sched_fusion_priority (rtx_insn *insn, int max_pri, int *fusion_pri,
1204112084

1204212085
/* Put loads/stores operating on adjacent words into the same
1204312086
* scheduling group. */
12044-
*fusion_pri = tmp - ((off_val / (UNITS_PER_WORD * 2)) << 1) + is_load;
12087+
*fusion_pri = tmp
12088+
- ((off_val / (GET_MODE_SIZE (mode).to_constant () * 2)) << 1)
12089+
+ is_load;
1204512090

1204612091
if (off_val >= 0)
1204712092
tmp -= (off_val & 0xfffff);

0 commit comments

Comments
 (0)