55!
66! Copyright (C) 2001 Megan Potter
77! Copyright (C) 2014 , 2016 , 2023 Ruslan Rostovtsev
8- ! Copyright (C) 2023 Andy Barajas
8+ ! Copyright (C) 2023 , 2024 Andy Barajas
9+ ! Copyright (C) 2024 Paul Cercueil
910!
1011! Optimized assembler code for managing the cache.
1112!
1920 .globl _dcache_purge_all
2021 .globl _dcache_purge_all_with_buffer
2122
22- ! Routine to flush parts of cache.. Thanks to the Linux - SH guys
23- ! for the algorithm. The original version of this routine was
24- ! taken from sh - stub.c.
23+ ! This routine goes through and flushes/invalidates the icache
24+ ! for a given range.
2525!
2626! r4 is starting address
27- ! r5 is count
27+ ! r5 is size
2828 . align 2
2929_icache_flush_range:
30+ tst r5 , r5 ! Test if size is 0
3031 mov .l ifr_addr , r0
32+
33+ bt .iflush_exit ! Exit early if no blocks to flush
3134 mov .l p2_mask , r1
35+
3236 or r1 , r0
3337 jmp @r0
3438 nop
@@ -41,15 +45,15 @@ _icache_flush_range:
4145 or r1 , r0
4246 ldc r0 , sr
4347
44- ! Get ending address from count and align start address
48+ ! Get ending address from size and align start address
4549 add r4 , r5
4650 mov .l align_mask , r0
4751 and r0 , r4
4852 mov .l ica_addr , r1
4953 mov .l ic_entry_mask , r2
5054 mov .l ic_valid_mask , r3
5155
52- .flush_loop :
56+ .iflush_loop :
5357 ! Write back D cache
5458 ocbwb @r4
5559
@@ -62,8 +66,8 @@ _icache_flush_range:
6266 and r3 , r7
6367
6468 add # 32 , r4 ! Move on to next cache block
65- cmp /hs r4 , r5
66- bt /s .flush_loop
69+ cmp /hi r4 , r5
70+ bt /s .iflush_loop
6771 mov .l r7 , @r6 ! * addr = data
6872
6973 ! Restore old SR
@@ -78,6 +82,8 @@ _icache_flush_range:
7882 nop
7983 nop
8084 nop
85+
86+ .iflush_exit:
8187 rts
8288 nop
8389
@@ -87,21 +93,25 @@ _icache_flush_range:
8793! if you care about the contents.
8894!
8995! r4 is starting address
90- ! r5 is count
96+ ! r5 is size
9197 . align 2
9298_dcache_inval_range:
93- ! Get ending address from count and align start address
94- add r4 , r5
95- mov .l align_mask , r0
96- and r0 , r4
99+ tst r5 , r5 ! Test if size is 0
100+ mov .l align_mask , r0
101+
102+ bt .dinval_exit ! Exit early if no blocks to inval
103+ add r4 , r5 ! Get ending address from size
104+
105+ and r0 , r4 ! Align start address
97106
98107.dinval_loop:
99108 ! Invalidate the dcache
100109 ocbi @r4
101- cmp /hs r4 , r5
102- bt /s .dinval_loop
103110 add # 32 , r4 ! Move on to next cache block
111+ cmp /hi r4 , r5
112+ bt .dinval_loop
104113
114+ .dinval_exit:
105115 rts
106116 nop
107117
@@ -112,29 +122,30 @@ _dcache_inval_range:
112122! we flush the whole cache instead.
113123!
114124! r4 is starting address
115- ! r5 is count
125+ ! r5 is size
116126 . align 2
117127_dcache_flush_range:
118- ! Divide byte count by 32
119- mov # - 5 , r1
120- shad r1 , r5
121-
122- ! Compare with flush_check
123- mov .w flush_check , r2
124- cmp /hi r2 , r5
125- bt _dcache_flush_all ! If lines > flush_check , jump to _dcache_flush_all
126-
127- ! Align start address
128+ ! Check th at 0 < size < flush_check
129+ tst r5 , r5
130+ mov .l flush_check , r2
131+
132+ bt .dflush_exit ! Exit early if no blocks to flush
128133 mov .l align_mask , r0
129- and r0 , r4
134+
135+ cmp /hi r2 , r5 ! Compare with flush_check
136+ add r4 , r5 ! Get ending address from size
137+
138+ bt _dcache_flush_all ! If size > flush_check , jump to _dcache_flush_all
139+ and r0 , r4 ! Align start address
130140
131141.dflush_loop:
132142 ! Write back the dcache
133143 ocbwb @r4
134- dt r5
135- bf/s .dflush_loop
136144 add # 32 , r4 ! Move on to next cache block
145+ cmp /hi r4 , r5
146+ bt .dflush_loop
137147
148+ .dflush_exit:
138149 rts
139150 nop
140151
@@ -167,29 +178,30 @@ _dcache_flush_all:
167178! we purge the whole cache instead.
168179!
169180! r4 is starting address
170- ! r5 is count
181+ ! r5 is size
171182 . align 2
172183_dcache_purge_range:
173- ! Divide byte count by 32
174- mov # - 5 , r1
175- shad r1 , r5
184+ ! Check th at 0 < size < purge_check
185+ tst r5 , r5
186+ mov .l purge_check , r2
187+
188+ bt .dpurge_exit ! Exit early if no blocks to purge
189+ mov .l align_mask , r0
176190
177- ! Compare with purge_check
178- mov .w purge_check , r2
179- cmp /hi r2 , r5
180- bt _dcache_purge_all ! If lines > purge_check , jump to _dcache_purge_all
191+ cmp /hi r2 , r5 ! Compare with purge_check
192+ add r4 , r5 ! Get ending address from size
181193
182- ! Align start address
183- mov .l align_mask , r0
184- and r0 , r4
194+ bt _dcache_purge_all ! If size > purge_check , jump to _dcache_purge_all
195+ and r0 , r4 ! Align start address
185196
186197.dpurge_loop:
187198 ! Write back and invalidate the D cache
188199 ocbp @r4
189- dt r5
190- bf/s .dpurge_loop
191200 add # 32 , r4 ! Move on to next cache block
201+ cmp /hi r4 , r5
202+ bt .dpurge_loop
192203
204+ .dpurge_exit:
193205 rts
194206 nop
195207
@@ -227,9 +239,9 @@ _dcache_purge_all_with_buffer:
227239 ! Allocate and then invalidate the dcache line
228240 movca.l r0 , @r4
229241 ocbi @r4
230- cmp /hs r4 , r5
231- bt /s .dpurge_all_buffer_loop
232242 add # 32 , r4 ! Move on to next cache block
243+ cmp /hi r4 , r5
244+ bt .dpurge_all_buffer_loop
233245
234246 rts
235247 nop
@@ -254,6 +266,16 @@ dca_addr:
254266dc_ubit_mask:
255267 .long 0xfffffffd ! Mask to zero out U bit
256268
269+ ! _dcache_flush_range can have size param set up to 66560 bytes
270+ ! and still be faster than dcache_flush_all.
271+ flush_check:
272+ .long 66560
273+
274+ ! _dcache_purge_range can have size param set up to 39936 bytes
275+ ! and still be faster than dcache_purge_all.
276+ purge_check:
277+ .long 39936
278+
257279! Shared
258280p2_mask:
259281 .long 0xa0000000
@@ -263,18 +285,5 @@ align_mask:
263285 .long ~ 31 ! Align address to 32 - byte boundary
264286cache_lines:
265287 . word 512 ! Total number of cache lines in dcache
266-
267- ! _dcache_flush_range can execute up to this amount of loops and
268- ! be at execution time of _dcache_flush_all. This means th at
269- ! dcache_flush_range can have count param set up to 66560 bytes
270- ! and still be faster than dcache_flush_all.
271- flush_check:
272- . word 2080
273-
274- ! _dcache_purge_range can execute up to this amount of loops and
275- ! be at execution time of _dcache_purge_all. This means th at
276- ! dcache_purge_range can have count param set up to 39936 bytes
277- ! and still be faster than dcache_purge_all.
278- purge_check:
279- . word 1248
288+
280289
0 commit comments