@@ -117,7 +117,9 @@ TARGET_BUILTIN(__builtin_ia32_uwrmsr, "vULLiULLi", "n", "usermsr")
117117// AMX internal builtin
118118TARGET_BUILTIN(__builtin_ia32_tile_loadconfig_internal, " vvC*" , " n" , " amx-tile" )
119119TARGET_BUILTIN(__builtin_ia32_tileloadd64_internal, " V256iUsUsvC*z" , " n" , " amx-tile" )
120+ TARGET_BUILTIN(__builtin_ia32_tileloaddrs64_internal, " V256iUsUsvC*z" , " n" , " amx-movrs" )
120121TARGET_BUILTIN(__builtin_ia32_tileloaddt164_internal, " V256iUsUsvC*z" , " n" , " amx-tile" )
122+ TARGET_BUILTIN(__builtin_ia32_tileloaddrst164_internal, " V256iUsUsvC*z" , " n" , " amx-movrs" )
121123TARGET_BUILTIN(__builtin_ia32_tdpbssd_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
122124TARGET_BUILTIN(__builtin_ia32_tdpbsud_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
123125TARGET_BUILTIN(__builtin_ia32_tdpbusd_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-int8" )
@@ -129,10 +131,15 @@ TARGET_BUILTIN(__builtin_ia32_tdpfp16ps_internal, "V256iUsUsUsV256iV256iV256i",
129131TARGET_BUILTIN(__builtin_ia32_tcmmimfp16ps_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-complex" )
130132TARGET_BUILTIN(__builtin_ia32_tcmmrlfp16ps_internal, " V256iUsUsUsV256iV256iV256i" , " n" , " amx-complex" )
131133TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
134+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
132135TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0t1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
136+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
133137TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
138+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
134139TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1t1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-transpose" )
140+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1_internal, " vUsUsUsV256i*V256i*vC*z" , " n" , " amx-movrs,amx-transpose" )
135141TARGET_BUILTIN(__builtin_ia32_ttransposed_internal, " V256iUsUsV256i" , " n" , " amx-transpose" )
142+
136143TARGET_BUILTIN(__builtin_ia32_tcvtrowd2ps_internal, " V16fUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
137144TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16h_internal, " V32yUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
138145TARGET_BUILTIN(__builtin_ia32_tcvtrowps2pbf16l_internal, " V32yUsUsV256iUi" , " n" , " amx-avx512,avx10.2-512" )
@@ -147,6 +154,13 @@ TARGET_BUILTIN(__builtin_ia32_tile_loadconfig, "vvC*", "n", "amx-tile")
147154TARGET_BUILTIN(__builtin_ia32_tile_storeconfig, " vvC*" , " n" , " amx-tile" )
148155TARGET_BUILTIN(__builtin_ia32_tilerelease, " v" , " n" , " amx-tile" )
149156TARGET_BUILTIN(__builtin_ia32_tilezero, " vUc" , " n" , " amx-tile" )
157+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rs, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
158+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz0rst1, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
159+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rs, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
160+ TARGET_BUILTIN(__builtin_ia32_t2rpntlvwz1rst1, " vIUcvC*z" , " n" , " amx-movrs,amx-transpose" )
161+
162+ TARGET_BUILTIN(__builtin_ia32_tileloaddrs64, " vIUcvC*z" , " n" , " amx-movrs" )
163+ TARGET_BUILTIN(__builtin_ia32_tileloaddrst164, " vIUcvC*z" , " n" , " amx-movrs" )
150164
151165TARGET_BUILTIN(__builtin_ia32_tileloadd64, " vIUcvC*z" , " n" , " amx-tile" )
152166TARGET_BUILTIN(__builtin_ia32_tileloaddt164, " vIUcvC*z" , " n" , " amx-tile" )
0 commit comments