Skip to content

Commit 1f7b7ff

Browse files
committed
[fw-isoldr] Update cache routines.
1 parent 4c2eb86 commit 1f7b7ff

File tree

3 files changed

+91
-65
lines changed

3 files changed

+91
-65
lines changed

firmware/isoldr/loader/cdda.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -691,15 +691,15 @@ static void aica_pcm_split_sq(uint32 data, uint32 aica_left, uint32 aica_right,
691691
}
692692

693693
/* Write-back SQ0 */
694-
dcache_pref_block(masked_left);
694+
dcache_wback_sq(masked_left);
695695

696696
/* Fill SQ1 */
697697
for(i = 16; i < 32; i += 2) {
698698
masked_left[i / 2] = (s[i * 2] << 16) | s[(i + 1) * 2];
699699
}
700700

701701
/* Write-back SQ1 */
702-
dcache_pref_block(masked_left + 8);
702+
dcache_wback_sq(masked_left + 8);
703703
masked_left += 16;
704704

705705
/* Fill SQ0 */
@@ -708,15 +708,15 @@ static void aica_pcm_split_sq(uint32 data, uint32 aica_left, uint32 aica_right,
708708
}
709709

710710
/* Write-back SQ0 */
711-
dcache_pref_block(masked_right);
711+
dcache_wback_sq(masked_right);
712712

713713
/* Fill SQ1 */
714714
for(i = 16; i < 32; i += 2) {
715715
masked_right[i / 2] = (s[(i * 2) + 1] << 16) | s[((i + 1) * 2) + 1];
716716
}
717717

718718
/* Write-back SQ1 */
719-
dcache_pref_block(masked_right + 8);
719+
dcache_wback_sq(masked_right + 8);
720720
masked_right += 16;
721721
s += 64;
722722
}

firmware/isoldr/loader/kos/arch/cache.h

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
Copyright (C) 2023 Andy Barajas
77
*/
88

9-
/** \file arch/cache.h
10-
\brief Cache management functionality.
9+
/** \file arch/cache.h
10+
\brief Cache management functionality.
11+
\ingroup system_cache
1112
1213
This file contains definitions for functions that manage the cache in the
1314
Dreamcast, including functions to flush, invalidate, purge, prefetch and
@@ -27,6 +28,13 @@ __BEGIN_DECLS
2728
#include <stdint.h>
2829
#include <arch/types.h>
2930

31+
/** \defgroup system_cache Cache
32+
\brief Driver and API for managing the SH4's cache
33+
\ingroup system
34+
35+
@{
36+
*/
37+
3038
/** \brief SH4 cache block size.
3139
3240
The size of a cache block.
@@ -137,6 +145,14 @@ static __always_inline void dcache_pref_block(const void *src) {
137145
);
138146
}
139147

148+
/** \brief Write-back Store Queue buffer to external memory
149+
150+
This function initiates write-back for one Store Queue.
151+
152+
\param ptr The SQ mapped address to write-back.
153+
*/
154+
#define dcache_wback_sq(ptr) dcache_pref_block(ptr)
155+
140156
/** \brief Allocate one block of the data/operand cache.
141157
142158
This function allocate a block of the data/operand cache.
@@ -152,6 +168,7 @@ static __always_inline void dcache_alloc_block(const void *src, uint32_t value)
152168
);
153169
}
154170

171+
/** @} */
155172

156173
__END_DECLS
157174

firmware/isoldr/loader/kos/src/cache.s

Lines changed: 68 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55
!
66
! Copyright (C) 2001 Megan Potter
77
! Copyright (C) 2014, 2016, 2023 Ruslan Rostovtsev
8-
! Copyright (C) 2023 Andy Barajas
8+
! Copyright (C) 2023, 2024 Andy Barajas
9+
! Copyright (C) 2024 Paul Cercueil
910
!
1011
! Optimized assembler code for managing the cache.
1112
!
@@ -19,16 +20,19 @@
1920
.globl _dcache_purge_all
2021
.globl _dcache_purge_all_with_buffer
2122

22-
! Routine to flush parts of cache.. Thanks to the Linux-SH guys
23-
! for the algorithm. The original version of this routine was
24-
! taken from sh-stub.c.
23+
! This routine goes through and flushes/invalidates the icache
24+
! for a given range.
2525
!
2626
! r4 is starting address
27-
! r5 is count
27+
! r5 is size
2828
.align 2
2929
_icache_flush_range:
30+
tst r5, r5 ! Test if size is 0
3031
mov.l ifr_addr, r0
32+
33+
bt .iflush_exit ! Exit early if no blocks to flush
3134
mov.l p2_mask, r1
35+
3236
or r1, r0
3337
jmp @r0
3438
nop
@@ -41,15 +45,15 @@ _icache_flush_range:
4145
or r1, r0
4246
ldc r0, sr
4347

44-
! Get ending address from count and align start address
48+
! Get ending address from size and align start address
4549
add r4, r5
4650
mov.l align_mask, r0
4751
and r0, r4
4852
mov.l ica_addr, r1
4953
mov.l ic_entry_mask, r2
5054
mov.l ic_valid_mask, r3
5155

52-
.flush_loop:
56+
.iflush_loop:
5357
! Write back D cache
5458
ocbwb @r4
5559

@@ -62,8 +66,8 @@ _icache_flush_range:
6266
and r3, r7
6367

6468
add #32, r4 ! Move on to next cache block
65-
cmp/hs r4, r5
66-
bt/s .flush_loop
69+
cmp/hi r4, r5
70+
bt/s .iflush_loop
6771
mov.l r7, @r6 ! *addr = data
6872

6973
! Restore old SR
@@ -78,6 +82,8 @@ _icache_flush_range:
7882
nop
7983
nop
8084
nop
85+
86+
.iflush_exit:
8187
rts
8288
nop
8389

@@ -87,21 +93,25 @@ _icache_flush_range:
8793
! if you care about the contents.
8894
!
8995
! r4 is starting address
90-
! r5 is count
96+
! r5 is size
9197
.align 2
9298
_dcache_inval_range:
93-
! Get ending address from count and align start address
94-
add r4, r5
95-
mov.l align_mask, r0
96-
and r0, r4
99+
tst r5, r5 ! Test if size is 0
100+
mov.l align_mask, r0
101+
102+
bt .dinval_exit ! Exit early if no blocks to inval
103+
add r4, r5 ! Get ending address from size
104+
105+
and r0, r4 ! Align start address
97106

98107
.dinval_loop:
99108
! Invalidate the dcache
100109
ocbi @r4
101-
cmp/hs r4, r5
102-
bt/s .dinval_loop
103110
add #32, r4 ! Move on to next cache block
111+
cmp/hi r4, r5
112+
bt .dinval_loop
104113

114+
.dinval_exit:
105115
rts
106116
nop
107117

@@ -112,29 +122,30 @@ _dcache_inval_range:
112122
! we flush the whole cache instead.
113123
!
114124
! r4 is starting address
115-
! r5 is count
125+
! r5 is size
116126
.align 2
117127
_dcache_flush_range:
118-
! Divide byte count by 32
119-
mov #-5, r1
120-
shad r1, r5
121-
122-
! Compare with flush_check
123-
mov.w flush_check, r2
124-
cmp/hi r2, r5
125-
bt _dcache_flush_all ! If lines > flush_check, jump to _dcache_flush_all
126-
127-
! Align start address
128+
! Check that 0 < size < flush_check
129+
tst r5, r5
130+
mov.l flush_check, r2
131+
132+
bt .dflush_exit ! Exit early if no blocks to flush
128133
mov.l align_mask, r0
129-
and r0, r4
134+
135+
cmp/hi r2, r5 ! Compare with flush_check
136+
add r4, r5 ! Get ending address from size
137+
138+
bt _dcache_flush_all ! If size > flush_check, jump to _dcache_flush_all
139+
and r0, r4 ! Align start address
130140

131141
.dflush_loop:
132142
! Write back the dcache
133143
ocbwb @r4
134-
dt r5
135-
bf/s .dflush_loop
136144
add #32, r4 ! Move on to next cache block
145+
cmp/hi r4, r5
146+
bt .dflush_loop
137147

148+
.dflush_exit:
138149
rts
139150
nop
140151

@@ -167,29 +178,30 @@ _dcache_flush_all:
167178
! we purge the whole cache instead.
168179
!
169180
! r4 is starting address
170-
! r5 is count
181+
! r5 is size
171182
.align 2
172183
_dcache_purge_range:
173-
! Divide byte count by 32
174-
mov #-5, r1
175-
shad r1, r5
184+
! Check that 0 < size < purge_check
185+
tst r5, r5
186+
mov.l purge_check, r2
187+
188+
bt .dpurge_exit ! Exit early if no blocks to purge
189+
mov.l align_mask, r0
176190

177-
! Compare with purge_check
178-
mov.w purge_check, r2
179-
cmp/hi r2, r5
180-
bt _dcache_purge_all ! If lines > purge_check, jump to _dcache_purge_all
191+
cmp/hi r2, r5 ! Compare with purge_check
192+
add r4, r5 ! Get ending address from size
181193

182-
! Align start address
183-
mov.l align_mask, r0
184-
and r0, r4
194+
bt _dcache_purge_all ! If size > purge_check, jump to _dcache_purge_all
195+
and r0, r4 ! Align start address
185196

186197
.dpurge_loop:
187198
! Write back and invalidate the D cache
188199
ocbp @r4
189-
dt r5
190-
bf/s .dpurge_loop
191200
add #32, r4 ! Move on to next cache block
201+
cmp/hi r4, r5
202+
bt .dpurge_loop
192203

204+
.dpurge_exit:
193205
rts
194206
nop
195207

@@ -227,9 +239,9 @@ _dcache_purge_all_with_buffer:
227239
! Allocate and then invalidate the dcache line
228240
movca.l r0, @r4
229241
ocbi @r4
230-
cmp/hs r4, r5
231-
bt/s .dpurge_all_buffer_loop
232242
add #32, r4 ! Move on to next cache block
243+
cmp/hi r4, r5
244+
bt .dpurge_all_buffer_loop
233245

234246
rts
235247
nop
@@ -254,6 +266,16 @@ dca_addr:
254266
dc_ubit_mask:
255267
.long 0xfffffffd ! Mask to zero out U bit
256268

269+
! _dcache_flush_range can have size param set up to 66560 bytes
270+
! and still be faster than dcache_flush_all.
271+
flush_check:
272+
.long 66560
273+
274+
! _dcache_purge_range can have size param set up to 39936 bytes
275+
! and still be faster than dcache_purge_all.
276+
purge_check:
277+
.long 39936
278+
257279
! Shared
258280
p2_mask:
259281
.long 0xa0000000
@@ -263,18 +285,5 @@ align_mask:
263285
.long ~31 ! Align address to 32-byte boundary
264286
cache_lines:
265287
.word 512 ! Total number of cache lines in dcache
266-
267-
! _dcache_flush_range can execute up to this amount of loops and
268-
! beat execution time of _dcache_flush_all. This means that
269-
! dcache_flush_range can have count param set up to 66560 bytes
270-
! and still be faster than dcache_flush_all.
271-
flush_check:
272-
.word 2080
273-
274-
! _dcache_purge_range can execute up to this amount of loops and
275-
! beat execution time of _dcache_purge_all. This means that
276-
! dcache_purge_range can have count param set up to 39936 bytes
277-
! and still be faster than dcache_purge_all.
278-
purge_check:
279-
.word 1248
288+
280289

0 commit comments

Comments
 (0)