Skip to content

Commit d514605

Browse files
committed
AArch64: Integrate scaling into first invNTT layers
Previously, the initial scaling step in the inverseNTT was handled separately and without SLOTHY optimization. This commit integrates scaling into the initial layer of the invNTT and re-runs SLOTHY. Signed-off-by: Hanno Becker <beckphan@amazon.co.uk>
1 parent 105bb61 commit d514605

File tree

5 files changed

+2566
-2501
lines changed

5 files changed

+2566
-2501
lines changed

dev/aarch64_clean/src/intt.S

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,8 @@ MLK_ASM_FN_SYMBOL(intt_asm)
235235
mov inp, in
236236
mov count, #8
237237

238-
intt_scale_start:
238+
.p2align 2
239+
intt_layer4567_start:
239240

240241
ldr q_data0, [inp, #(16*0)]
241242
ldr q_data1, [inp, #(16*1)]
@@ -245,25 +246,6 @@ intt_scale_start:
245246
mul_ninv data0, data1, data2, data3, data0, data1, data2, data3
246247
// Bounds: Absolute value < q
247248

248-
str q_data0, [inp], #64
249-
str q_data1, [inp, #(-64 + 16*1)]
250-
str q_data2, [inp, #(-64 + 16*2)]
251-
str q_data3, [inp, #(-64 + 16*3)]
252-
253-
subs count, count, #1
254-
cbnz count, intt_scale_start
255-
256-
mov inp, in
257-
mov count, #8
258-
259-
.p2align 2
260-
intt_layer4567_start:
261-
262-
ldr q_data0, [inp, #(16*0)]
263-
ldr q_data1, [inp, #(16*1)]
264-
ldr q_data2, [inp, #(16*2)]
265-
ldr q_data3, [inp, #(16*3)]
266-
267249
transpose4 data // manual ld4
268250

269251
load_next_roots_67

0 commit comments

Comments
 (0)