1919// The differences in this and the original implementation are
2020// due to the calling conventions and initialization of constants.
2121
22- //go:build gc && !purego
22+ //go:build gc && !purego && (ppc64 || ppc64le)
2323
2424#include "textflag.h"
2525
3636// for VPERMXOR
3737#define MASK R18
3838
39- DATA consts<>+0x00 (SB)/8 , $0x3320646e61707865
40- DATA consts<>+0x08 (SB)/8 , $0x6b20657479622d32
41- DATA consts<>+0x10 (SB)/8 , $0x0000000000000001
42- DATA consts<>+0x18 (SB)/8 , $0x0000000000000000
43- DATA consts<>+0x20 (SB)/8 , $0x0000000000000004
44- DATA consts<>+0x28 (SB)/8 , $0x0000000000000000
45- DATA consts<>+0x30 (SB)/8 , $0x0a0b08090e0f0c0d
46- DATA consts<>+0x38 (SB)/8 , $0x0203000106070405
47- DATA consts<>+0x40 (SB)/8 , $0x090a0b080d0e0f0c
48- DATA consts<>+0x48 (SB)/8 , $0x0102030005060704
49- DATA consts<>+0x50 (SB)/8 , $0x6170786561707865
50- DATA consts<>+0x58 (SB)/8 , $0x6170786561707865
51- DATA consts<>+0x60 (SB)/8 , $0x3320646e3320646e
52- DATA consts<>+0x68 (SB)/8 , $0x3320646e3320646e
53- DATA consts<>+0x70 (SB)/8 , $0x79622d3279622d32
54- DATA consts<>+0x78 (SB)/8 , $0x79622d3279622d32
55- DATA consts<>+0x80 (SB)/8 , $0x6b2065746b206574
56- DATA consts<>+0x88 (SB)/8 , $0x6b2065746b206574
57- DATA consts<>+0x90 (SB)/8 , $0x0000000100000000
58- DATA consts<>+0x98 (SB)/8 , $0x0000000300000002
59- DATA consts<>+0xa0 (SB)/8 , $0x5566774411223300
60- DATA consts<>+0xa8 (SB)/8 , $0xddeeffcc99aabb88
61- DATA consts<>+0xb0 (SB)/8 , $0x6677445522330011
62- DATA consts<>+0xb8 (SB)/8 , $0xeeffccddaabb8899
39+ DATA consts<>+0x00 (SB)/4 , $0x61707865
40+ DATA consts<>+0x04 (SB)/4 , $0x3320646e
41+ DATA consts<>+0x08 (SB)/4 , $0x79622d32
42+ DATA consts<>+0x0c (SB)/4 , $0x6b206574
43+ DATA consts<>+0x10 (SB)/4 , $0x00000001
44+ DATA consts<>+0x14 (SB)/4 , $0x00000000
45+ DATA consts<>+0x18 (SB)/4 , $0x00000000
46+ DATA consts<>+0x1c (SB)/4 , $0x00000000
47+ DATA consts<>+0x20 (SB)/4 , $0x00000004
48+ DATA consts<>+0x24 (SB)/4 , $0x00000000
49+ DATA consts<>+0x28 (SB)/4 , $0x00000000
50+ DATA consts<>+0x2c (SB)/4 , $0x00000000
51+ DATA consts<>+0x30 (SB)/4 , $0x0e0f0c0d
52+ DATA consts<>+0x34 (SB)/4 , $0x0a0b0809
53+ DATA consts<>+0x38 (SB)/4 , $0x06070405
54+ DATA consts<>+0x3c (SB)/4 , $0x02030001
55+ DATA consts<>+0x40 (SB)/4 , $0x0d0e0f0c
56+ DATA consts<>+0x44 (SB)/4 , $0x090a0b08
57+ DATA consts<>+0x48 (SB)/4 , $0x05060704
58+ DATA consts<>+0x4c (SB)/4 , $0x01020300
59+ DATA consts<>+0x50 (SB)/4 , $0x61707865
60+ DATA consts<>+0x54 (SB)/4 , $0x61707865
61+ DATA consts<>+0x58 (SB)/4 , $0x61707865
62+ DATA consts<>+0x5c (SB)/4 , $0x61707865
63+ DATA consts<>+0x60 (SB)/4 , $0x3320646e
64+ DATA consts<>+0x64 (SB)/4 , $0x3320646e
65+ DATA consts<>+0x68 (SB)/4 , $0x3320646e
66+ DATA consts<>+0x6c (SB)/4 , $0x3320646e
67+ DATA consts<>+0x70 (SB)/4 , $0x79622d32
68+ DATA consts<>+0x74 (SB)/4 , $0x79622d32
69+ DATA consts<>+0x78 (SB)/4 , $0x79622d32
70+ DATA consts<>+0x7c (SB)/4 , $0x79622d32
71+ DATA consts<>+0x80 (SB)/4 , $0x6b206574
72+ DATA consts<>+0x84 (SB)/4 , $0x6b206574
73+ DATA consts<>+0x88 (SB)/4 , $0x6b206574
74+ DATA consts<>+0x8c (SB)/4 , $0x6b206574
75+ DATA consts<>+0x90 (SB)/4 , $0x00000000
76+ DATA consts<>+0x94 (SB)/4 , $0x00000001
77+ DATA consts<>+0x98 (SB)/4 , $0x00000002
78+ DATA consts<>+0x9c (SB)/4 , $0x00000003
79+ DATA consts<>+0xa0 (SB)/4 , $0x11223300
80+ DATA consts<>+0xa4 (SB)/4 , $0x55667744
81+ DATA consts<>+0xa8 (SB)/4 , $0x99aabb88
82+ DATA consts<>+0xac (SB)/4 , $0xddeeffcc
83+ DATA consts<>+0xb0 (SB)/4 , $0x22330011
84+ DATA consts<>+0xb4 (SB)/4 , $0x66774455
85+ DATA consts<>+0xb8 (SB)/4 , $0xaabb8899
86+ DATA consts<>+0xbc (SB)/4 , $0xeeffccdd
6387GLOBL consts<>(SB), RODATA, $0xc0
6488
89+ #ifdef GOARCH_ppc64
90+ #define BE_XXBRW_INIT() \
91+ LVSL (R0)(R0), V24 \
92+ VSPLTISB $3 , V25 \
93+ VXOR V24, V25, V24 \
94+
95+ #define BE_XXBRW(vr) VPERM vr, vr, V24, vr
96+ #else
97+ #define BE_XXBRW_INIT()
98+ #define BE_XXBRW(vr)
99+ #endif
100+
65101//func chaCha20_ctr32_vsx(out, inp *byte, len int, key *[8]uint32, counter *uint32)
66102TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64 -40
67103 MOVD out +0 (FP), OUT
@@ -94,6 +130,8 @@ TEXT ·chaCha20_ctr32_vsx(SB),NOSPLIT,$64-40
94130 // Clear V27
95131 VXOR V27, V27, V27
96132
133+ BE_XXBRW_INIT()
134+
97135 // V28
98136 LXVW4X (CONSTBASE)(R11), VS60
99137
@@ -299,6 +337,11 @@ loop_vsx:
299337 VADDUWM V8, V18, V8
300338 VADDUWM V12, V19, V12
301339
340+ BE_XXBRW(V0)
341+ BE_XXBRW(V4)
342+ BE_XXBRW(V8)
343+ BE_XXBRW(V12)
344+
302345 CMPU LEN, $64
303346 BLT tail_vsx
304347
@@ -327,15 +370,20 @@ loop_vsx:
327370 VADDUWM V9, V18, V8
328371 VADDUWM V13, V19, V12
329372
373+ BE_XXBRW(V0)
374+ BE_XXBRW(V4)
375+ BE_XXBRW(V8)
376+ BE_XXBRW(V12)
377+
330378 CMPU LEN, $64
331379 BLT tail_vsx
332380
333381 LXVW4X (INP)(R0), VS59
334382 LXVW4X (INP)(R8), VS60
335383 LXVW4X (INP)(R9), VS61
336384 LXVW4X (INP)(R10), VS62
337- VXOR V27, V0, V27
338385
386+ VXOR V27, V0, V27
339387 VXOR V28, V4, V28
340388 VXOR V29, V8, V29
341389 VXOR V30, V12, V30
@@ -354,6 +402,11 @@ loop_vsx:
354402 VADDUWM V10, V18, V8
355403 VADDUWM V14, V19, V12
356404
405+ BE_XXBRW(V0)
406+ BE_XXBRW(V4)
407+ BE_XXBRW(V8)
408+ BE_XXBRW(V12)
409+
357410 CMPU LEN, $64
358411 BLT tail_vsx
359412
@@ -381,6 +434,11 @@ loop_vsx:
381434 VADDUWM V11, V18, V8
382435 VADDUWM V15, V19, V12
383436
437+ BE_XXBRW(V0)
438+ BE_XXBRW(V4)
439+ BE_XXBRW(V8)
440+ BE_XXBRW(V12)
441+
384442 CMPU LEN, $64
385443 BLT tail_vsx
386444
@@ -408,9 +466,9 @@ loop_vsx:
408466
409467done_vsx:
410468 // Increment counter by number of 64 byte blocks
411- MOVD (CNT), R14
469+ MOVWZ (CNT), R14
412470 ADD BLOCKS, R14
413- MOVD R14, (CNT)
471+ MOVWZ R14, (CNT)
414472 RET
415473
416474tail_vsx:
0 commit comments