Skip to content

Commit 9225a85

Browse files
clazissMichielDerhaeg
authored andcommitted
arcv: Add initial scheduling scheme.
Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
1 parent 8b7268c commit 9225a85

File tree

5 files changed

+221
-1
lines changed

5 files changed

+221
-1
lines changed

gcc/config/riscv/arcv-rhx100.md

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
;; DFA scheduling description of the Synopsys RHX-100 cpu
2+
;; for GNU C compiler
3+
;; Copyright (C) 2023 Free Software Foundation, Inc.
4+
5+
;; This file is part of GCC.
6+
7+
;; GCC is free software; you can redistribute it and/or modify
8+
;; it under the terms of the GNU General Public License as published by
9+
;; the Free Software Foundation; either version 3, or (at your option)
10+
;; any later version.
11+
12+
;; GCC is distributed in the hope that it will be useful,
13+
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
;; GNU General Public License for more details.
16+
17+
;; You should have received a copy of the GNU General Public License
18+
;; along with GCC; see the file COPYING3. If not see
19+
;; <http://www.gnu.org/licenses/>.
20+
21+
(define_automaton "arcv_rhx100")
22+
23+
(define_cpu_unit "arcv_rhx100_ALU_A_fuse0_early" "arcv_rhx100")
24+
(define_cpu_unit "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100")
25+
(define_cpu_unit "arcv_rhx100_ALU_B_fuse0_early" "arcv_rhx100")
26+
(define_cpu_unit "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100")
27+
(define_cpu_unit "arcv_rhx100_MPY32" "arcv_rhx100")
28+
(define_cpu_unit "arcv_rhx100_DIV" "arcv_rhx100")
29+
(define_cpu_unit "arcv_rhx100_DMP_fuse0" "arcv_rhx100")
30+
(define_cpu_unit "arcv_rhx100_DMP_fuse1" "arcv_rhx100")
31+
(define_cpu_unit "arcv_rhx100_fdivsqrt" "arcv_rhx100")
32+
(define_cpu_unit "arcv_rhx100_issueA_fuse0" "arcv_rhx100")
33+
(define_cpu_unit "arcv_rhx100_issueA_fuse1" "arcv_rhx100")
34+
(define_cpu_unit "arcv_rhx100_issueB_fuse0" "arcv_rhx100")
35+
(define_cpu_unit "arcv_rhx100_issueB_fuse1" "arcv_rhx100")
36+
37+
;; Instruction reservation for arithmetic instructions (pipe A, pipe B).
38+
(define_insn_reservation "arcv_rhx100_alu_early_arith" 1
39+
(and (eq_attr "tune" "arcv_rhx100")
40+
(eq_attr "type" "unknown,move,const,arith,shift,slt,multi,auipc,nop,logical,\
41+
bitmanip,min,max,minu,maxu,clz,ctz,atomic,\
42+
condmove,mvpair,zicond,cpop,clmul"))
43+
"((arcv_rhx100_issueA_fuse0 + arcv_rhx100_ALU_A_fuse0_early) | (arcv_rhx100_issueA_fuse1 + arcv_rhx100_ALU_A_fuse1_early)) | ((arcv_rhx100_issueB_fuse0 + arcv_rhx100_ALU_B_fuse0_early) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_ALU_B_fuse1_early))")
44+
45+
(define_insn_reservation "arcv_rhx100_jmp_insn" 1
46+
(and (eq_attr "tune" "arcv_rhx100")
47+
(eq_attr "type" "branch,jump,call,jalr,ret,trap"))
48+
"arcv_rhx100_issueA_fuse0 | arcv_rhx100_issueA_fuse1")
49+
50+
(define_insn_reservation "arcv_rhx100_div_insn" 12
51+
(and (eq_attr "tune" "arcv_rhx100")
52+
(eq_attr "type" "idiv"))
53+
"arcv_rhx100_issueA_fuse0 + arcv_rhx100_DIV, nothing*11")
54+
55+
(define_insn_reservation "arcv_rhx100_mpy32_insn" 4
56+
(and (eq_attr "tune" "arcv_rhx100")
57+
(eq_attr "type" "imul"))
58+
"arcv_rhx100_issueA_fuse0 + arcv_rhx100_MPY32, nothing*3")
59+
60+
(define_insn_reservation "arcv_rhx100_load_insn" 3
61+
(and (eq_attr "tune" "arcv_rhx100")
62+
(eq_attr "type" "load,fpload"))
63+
"(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)")
64+
65+
(define_insn_reservation "arcv_rhx100_store_insn" 1
66+
(and (eq_attr "tune" "arcv_rhx100")
67+
(eq_attr "type" "store,fpstore"))
68+
"(arcv_rhx100_issueB_fuse0 + arcv_rhx100_DMP_fuse0) | (arcv_rhx100_issueB_fuse1 + arcv_rhx100_DMP_fuse1)")
69+
70+
;; (soft) floating points
71+
(define_insn_reservation "arcv_rhx100_xfer" 3
72+
(and (eq_attr "tune" "arcv_rhx100")
73+
(eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp"))
74+
"(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early), nothing*2")
75+
76+
(define_insn_reservation "arcv_rhx100_fmul" 5
77+
(and (eq_attr "tune" "arcv_rhx100")
78+
(eq_attr "type" "fadd,fmul,fmadd"))
79+
"(arcv_rhx100_ALU_A_fuse0_early | arcv_rhx100_ALU_B_fuse0_early)")
80+
81+
(define_insn_reservation "arcv_rhx100_fdiv" 20
82+
(and (eq_attr "tune" "arcv_rhx100")
83+
(eq_attr "type" "fdiv,fsqrt"))
84+
"arcv_rhx100_fdivsqrt*20")
85+
86+
;(final_presence_set "arcv_rhx100_issueA_fuse1" "arcv_rhx100_issueA_fuse0")
87+
;(final_presence_set "arcv_rhx100_issueB_fuse1" "arcv_rhx100_issueB_fuse0")
88+
;(final_presence_set "arcv_rhx100_ALU_A_fuse1_early" "arcv_rhx100_ALU_A_fuse0_early")
89+
;(final_presence_set "arcv_rhx100_ALU_B_fuse1_early" "arcv_rhx100_ALU_B_fuse0_early")
90+
91+
;; Bypasses
92+
;(define_bypass 0 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
93+
(define_bypass 1 "arcv_rhx100_alu_early_arith" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
94+
95+
;(define_bypass 0 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
96+
(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_store_insn" "riscv_store_data_bypass_p")
97+
(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_alu_early_arith")
98+
(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_mpy*_insn")
99+
(define_bypass 2 "arcv_rhx100_load_insn" "arcv_rhx100_load_insn")
100+
(define_bypass 1 "arcv_rhx100_load_insn" "arcv_rhx100_div_insn")
101+
102+
(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_mpy*_insn")
103+
(define_bypass 3 "arcv_rhx100_mpy32_insn" "arcv_rhx100_div_insn")

gcc/config/riscv/riscv-cores.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
5151
RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
5252
RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
5353
RISCV_TUNE("arc-v-rmx-100-series", arcv_rmx100, arcv_rmx100_tune_info)
54+
RISCV_TUNE("arc-v-rhx-100-series", arcv_rhx100, arcv_rhx100_tune_info)
5455
RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
5556
RISCV_TUNE("size", generic, optimize_size_tune_info)
5657
RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)

gcc/config/riscv/riscv-opts.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ enum riscv_microarchitecture_type {
6262
mips_p8700,
6363
tt_ascalon_d8,
6464
arcv_rmx100,
65+
arcv_rhx100,
6566
};
6667
extern enum riscv_microarchitecture_type riscv_microarchitecture;
6768

gcc/config/riscv/riscv.cc

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ enum riscv_fusion_pairs
291291
RISCV_FUSE_BFEXT = (1 << 11),
292292
RISCV_FUSE_EXPANDED_LD = (1 << 12),
293293
RISCV_FUSE_B_ALUI = (1 << 13),
294+
RISCV_FUSE_ARCV = (1 << 14),
294295
};
295296

296297
/* Costs of various operations on the different architectures. */
@@ -709,6 +710,30 @@ static const struct riscv_tune_param arcv_rmx100_tune_info = {
709710
NULL, /* loop_align */
710711
};
711712

713+
/* Costs to use when optimizing for Synopsys RHX-100. */
714+
static const struct riscv_tune_param arcv_rhx100_tune_info = {
715+
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */
716+
{COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_mul */
717+
{COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */
718+
{COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */
719+
{COSTS_N_INSNS (27), COSTS_N_INSNS (43)}, /* int_div */
720+
4, /* issue_rate */
721+
9, /* branch_cost */
722+
2, /* memory_cost */
723+
8, /* fmv_cost */
724+
false, /* slow_unaligned_access */
725+
false, /* vector_unaligned_access */
726+
false, /* use_divmod_expansion */
727+
false, /* overlap_op_by_pieces */
728+
true, /* use_zero_stride_load */
729+
false, /* speculative_sched_vsetvl */
730+
RISCV_FUSE_ARCV, /* fusible_ops */
731+
NULL, /* vector cost */
732+
NULL, /* function_align */
733+
NULL, /* jump_align */
734+
NULL, /* loop_align */
735+
};
736+
712737
/* Costs to use when optimizing for size. */
713738
static const struct riscv_tune_param optimize_size_tune_info = {
714739
{COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* fp_add */
@@ -10382,6 +10407,91 @@ riscv_set_is_shNadduw (rtx set)
1038210407
&& REG_P (SET_DEST (set)));
1038310408
}
1038410409

10410+
/* Return TRUE if two addresses can be fused. */
10411+
10412+
static bool
10413+
arcv_fused_addr_p (rtx addr0, rtx addr1)
10414+
{
10415+
rtx base0, base1, tmp;
10416+
HOST_WIDE_INT off0 = 0, off1 = 0;
10417+
10418+
if (GET_CODE (addr0) == PLUS)
10419+
{
10420+
base0 = XEXP (addr0, 0);
10421+
tmp = XEXP (addr0, 1);
10422+
if (!CONST_INT_P (tmp))
10423+
return false;
10424+
off0 = INTVAL (tmp);
10425+
}
10426+
else if (REG_P (addr0))
10427+
base0 = addr0;
10428+
else
10429+
return false;
10430+
10431+
if (GET_CODE (addr1) == PLUS)
10432+
{
10433+
base1 = XEXP (addr1, 0);
10434+
tmp = XEXP (addr1, 1);
10435+
if (!CONST_INT_P (tmp))
10436+
return false;
10437+
off1 = INTVAL (tmp);
10438+
}
10439+
else if (REG_P (addr1))
10440+
base1 = addr1;
10441+
else
10442+
return false;
10443+
10444+
/* Check if we have the same base. */
10445+
gcc_assert (REG_P (base0) && REG_P (base1));
10446+
if (REGNO (base0) != REGNO (base1))
10447+
return false;
10448+
10449+
/* Offsets have to be aligned to word boundary and adjacent in memory,
10450+
but the memory operations can be narrower. */
10451+
if ((off0 % UNITS_PER_WORD == 0) && (abs (off1 - off0) == UNITS_PER_WORD))
10452+
return true;
10453+
10454+
return false;
10455+
}
10456+
10457+
/* Return true if PREV and CURR should be kept together during scheduling. */
10458+
10459+
static bool
10460+
arcv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
10461+
{
10462+
rtx prev_set = single_set (prev);
10463+
rtx curr_set = single_set (curr);
10464+
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
10465+
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
10466+
10467+
/* Don't handle anything with a jump. */
10468+
if (!simple_sets_p)
10469+
return false;
10470+
10471+
/* Fuse adjacent loads and stores. */
10472+
if (get_attr_type (prev) == TYPE_LOAD
10473+
&& get_attr_type (curr) == TYPE_LOAD)
10474+
{
10475+
rtx addr0 = XEXP (SET_SRC (prev_set), 0);
10476+
rtx addr1 = XEXP (SET_SRC (curr_set), 0);
10477+
10478+
if (arcv_fused_addr_p (addr0, addr1))
10479+
return true;
10480+
}
10481+
10482+
if (get_attr_type (prev) == TYPE_STORE
10483+
&& get_attr_type (curr) == TYPE_STORE)
10484+
{
10485+
rtx addr0 = XEXP (SET_DEST (prev_set), 0);
10486+
rtx addr1 = XEXP (SET_DEST (curr_set), 0);
10487+
10488+
if (arcv_fused_addr_p (addr0, addr1))
10489+
return true;
10490+
}
10491+
10492+
return false;
10493+
}
10494+
1038510495
/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
1038610496
should be kept together during scheduling. */
1038710497

@@ -11014,6 +11124,9 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
1101411124
}
1101511125
}
1101611126

11127+
if (riscv_fusion_enabled_p (RISCV_FUSE_ARCV))
11128+
return arcv_macro_fusion_pair_p (prev, curr);
11129+
1101711130
return false;
1101811131
}
1101911132

gcc/config/riscv/riscv.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,8 @@
672672
;; Microarchitectures we know how to tune for.
673673
;; Keep this in sync with enum riscv_microarchitecture.
674674
(define_attr "tune"
675-
"generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100"
675+
"generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700,tt_ascalon_d8,arcv_rmx100,arcv_rhx100"
676+
"generic,sifive_7,sifive_p400,sifive_p600,xiangshan,arcv_rhx100,generic_ooo"
676677
(const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
677678

678679
;; Describe a user's asm statement.
@@ -4991,3 +4992,4 @@
49914992
(include "generic-ooo.md")
49924993
(include "tt-ascalon-d8.md")
49934994
(include "arcv-rmx100.md")
4995+
(include "arcv-rhx100.md")

0 commit comments

Comments
 (0)