@@ -1189,19 +1189,27 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
11891189 if dstReg == srcReg {
11901190 break
11911191 }
1192- tmpReg1 := int16 (arm64 .REG_R24 )
1193- tmpReg2 := int16 (arm64 .REG_R25 )
1192+ tmpReg1 := int16 (arm64 .REG_R25 )
1193+ tmpFReg1 := int16 (arm64 .REG_F16 )
1194+ tmpFReg2 := int16 (arm64 .REG_F17 )
11941195 n := v .AuxInt
11951196 if n < 16 {
11961197 v .Fatalf ("Move too small %d" , n )
11971198 }
11981199
11991200 // Generate copying instructions.
12001201 var off int64
1202+ for n >= 32 {
1203+ // FLDPQ off(srcReg), (tmpFReg1, tmpFReg2)
1204+ // FSTPQ (tmpFReg1, tmpFReg2), off(dstReg)
1205+ move32 (s , srcReg , dstReg , tmpFReg1 , tmpFReg2 , off , false )
1206+ off += 32
1207+ n -= 32
1208+ }
12011209 for n >= 16 {
1202- // LDP off(srcReg ), (tmpReg1, tmpReg2)
1203- // STP (tmpReg1, tmpReg2), off(dstReg )
1204- move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , off , false )
1210+ // FMOVQ off(src ), tmpFReg1
1211+ // FMOVQ tmpFReg1, off(dst )
1212+ move16 (s , srcReg , dstReg , tmpFReg1 , off , false )
12051213 off += 16
12061214 n -= 16
12071215 }
@@ -1223,9 +1231,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
12231231 if dstReg == srcReg {
12241232 break
12251233 }
1226- countReg := int16 (arm64 .REG_R23 )
1227- tmpReg1 := int16 (arm64 .REG_R24 )
1228- tmpReg2 := int16 (arm64 .REG_R25 )
1234+ countReg := int16 (arm64 .REG_R24 )
1235+ tmpReg1 := int16 (arm64 .REG_R25 )
1236+ tmpFReg1 := int16 (arm64 .REG_F16 )
1237+ tmpFReg2 := int16 (arm64 .REG_F17 )
12291238 n := v .AuxInt
12301239 loopSize := int64 (64 )
12311240 if n < 3 * loopSize {
@@ -1251,10 +1260,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
12511260
12521261 // Move loopSize bytes starting at srcReg to dstReg.
12531262 // Increment srcReg and destReg by loopSize as a side effect.
1254- for range loopSize / 16 {
1255- // LDP .P 16 (srcReg), (tmpReg1, tmpReg2 )
1256- // STP .P (tmpReg1, tmpReg2 ), 16 (dstReg)
1257- move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , 0 , true )
1263+ for range loopSize / 32 {
1264+ // FLDPQ .P 32 (srcReg), (tmpFReg1, tmpFReg2 )
1265+ // FSTPQ .P (tmpFReg1, tmpFReg2 ), 32 (dstReg)
1266+ move32 (s , srcReg , dstReg , tmpFReg1 , tmpFReg2 , 0 , true )
12581267 }
12591268 // Decrement loop count.
12601269 // SUB $1, countReg
@@ -1276,10 +1285,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
12761285
12771286 // Copy any fractional portion.
12781287 var off int64
1288+ for n >= 32 {
1289+ // FLDPQ off(srcReg), (tmpFReg1, tmpFReg2)
1290+ // FSTPQ (tmpFReg1, tmpFReg2), off(dstReg)
1291+ move32 (s , srcReg , dstReg , tmpFReg1 , tmpFReg2 , off , false )
1292+ off += 32
1293+ n -= 32
1294+ }
12791295 for n >= 16 {
1280- // LDP off(srcReg ), (tmpReg1, tmpReg2)
1281- // STP (tmpReg1, tmpReg2), off(dstReg )
1282- move16 (s , srcReg , dstReg , tmpReg1 , tmpReg2 , off , false )
1296+ // FMOVQ off(src ), tmpFReg1
1297+ // FMOVQ tmpFReg1, off(dst )
1298+ move16 (s , srcReg , dstReg , tmpFReg1 , off , false )
12831299 off += 16
12841300 n -= 16
12851301 }
@@ -1699,26 +1715,55 @@ func zero8(s *ssagen.State, reg int16, off int64) {
16991715 p .To .Offset = off
17001716}
17011717
1702- // move16 copies 16 bytes at src+off to dst+off.
1718+ // move32 copies 32 bytes at src+off to dst+off.
17031719// Uses registers tmp1 and tmp2.
1704- // If postInc is true, increment src and dst by 16 .
1705- func move16 (s * ssagen.State , src , dst , tmp1 , tmp2 int16 , off int64 , postInc bool ) {
1706- // LDP off(src), (tmp1, tmp2)
1707- ld := s .Prog (arm64 .ALDP )
1720+ // If postInc is true, increment src and dst by 32 .
1721+ func move32 (s * ssagen.State , src , dst , tmp1 , tmp2 int16 , off int64 , postInc bool ) {
1722+ // FLDPQ off(src), (tmp1, tmp2)
1723+ ld := s .Prog (arm64 .AFLDPQ )
17081724 ld .From .Type = obj .TYPE_MEM
17091725 ld .From .Reg = src
17101726 ld .From .Offset = off
17111727 ld .To .Type = obj .TYPE_REGREG
17121728 ld .To .Reg = tmp1
17131729 ld .To .Offset = int64 (tmp2 )
1714- // STP (tmp1, tmp2), off(dst)
1715- st := s .Prog (arm64 .ASTP )
1730+ // FSTPQ (tmp1, tmp2), off(dst)
1731+ st := s .Prog (arm64 .AFSTPQ )
17161732 st .From .Type = obj .TYPE_REGREG
17171733 st .From .Reg = tmp1
17181734 st .From .Offset = int64 (tmp2 )
17191735 st .To .Type = obj .TYPE_MEM
17201736 st .To .Reg = dst
17211737 st .To .Offset = off
1738+ if postInc {
1739+ if off != 0 {
1740+ panic ("can't postinc with non-zero offset" )
1741+ }
1742+ ld .Scond = arm64 .C_XPOST
1743+ st .Scond = arm64 .C_XPOST
1744+ ld .From .Offset = 32
1745+ st .To .Offset = 32
1746+ }
1747+ }
1748+
1749+ // move16 copies 16 bytes at src+off to dst+off.
1750+ // Uses register tmp1
1751+ // If postInc is true, increment src and dst by 16.
1752+ func move16 (s * ssagen.State , src , dst , tmp1 int16 , off int64 , postInc bool ) {
1753+ // FMOVQ off(src), tmp1
1754+ ld := s .Prog (arm64 .AFMOVQ )
1755+ ld .From .Type = obj .TYPE_MEM
1756+ ld .From .Reg = src
1757+ ld .From .Offset = off
1758+ ld .To .Type = obj .TYPE_REG
1759+ ld .To .Reg = tmp1
1760+ // FMOVQ tmp1, off(dst)
1761+ st := s .Prog (arm64 .AFMOVQ )
1762+ st .From .Type = obj .TYPE_REG
1763+ st .From .Reg = tmp1
1764+ st .To .Type = obj .TYPE_MEM
1765+ st .To .Reg = dst
1766+ st .To .Offset = off
17221767 if postInc {
17231768 if off != 0 {
17241769 panic ("can't postinc with non-zero offset" )
0 commit comments