1818 */
1919
2020/*
21- * This exception handler, allows for byte or short accesses to iRAM or PROGMEM
22- * to succeed without causing a crash. It is still preferred to use the xxx_P
23- * macros whenever possible, since they are probably 30x faster than this
24- * exception handler method.
21+ * This exception handler handles EXCCAUSE_LOAD_STORE_ERROR. It allows for a
22+ * byte or short access to iRAM or PROGMEM to succeed without causing a crash.
23+ * When reading, it is still preferred to use the xxx_P macros when possible
24+ * since they are probably 30x faster than this exception handler method.
2525 *
2626 * Code taken directly from @pvvx's public domain code in
2727 * https://github.com/pvvx/esp8266web/blob/master/app/sdklib/system/app_main.c
3737#include < Schedule.h>
3838#include < debug.h>
3939
40+ // All of these optimization were tried and now work
41+ // These results were from irammem.ino using GCC 10.2
42+ // DRAM reference uint16 9 AVG cycles/transfer
43+ // #pragma GCC optimize("O0") // uint16, 289 AVG cycles/transfer, IRAM: +180
44+ // #pragma GCC optimize("O1") // uint16, 241 AVG cycles/transfer, IRAM: +16
45+ #pragma GCC optimize("O2") // uint16, 230 AVG cycles/transfer, IRAM: +4
46+ // #pragma GCC optimize("O3") // uint16, 230 AVG cycles/transfer, IRAM: +4
47+ // #pragma GCC optimize("Ofast") // uint16, 230 AVG cycles/transfer, IRAM: +4
48+ // #pragma GCC optimize("Os") // uint16, 233 AVG cycles/transfer, IRAM: 27556 +0
49+
4050extern " C" {
4151
4252#define LOAD_MASK 0x00f00fu
@@ -50,32 +60,55 @@ extern "C" {
5060
5161static fn_c_exception_handler_t old_c_handler = NULL ;
5262
53- static IRAM_ATTR void non32xfer_exception_handler (struct __exception_frame *ef, int cause)
63+ static
64+ IRAM_ATTR void non32xfer_exception_handler (struct __exception_frame *ef, int cause)
5465{
5566 do {
5667 /*
57- Had to split out some of the asm, compiler was reusing a register that it
58- needed later. A crash would come or go away with the slightest unrelated
59- changes elsewhere in the function.
60-
61- Register a15 was used for epc1, then clobbered for rsr. Maybe an
62- __asm("":::"memory") before starting the asm would help for these cases.
63- For this instance moved setting epc1 closer to where it was used.
64- Edit. "&" on output register would have resolved the problem.
65- Refactored to reduce and consolidate register usage.
68+ In adapting the public domain version, a crash would come or go away with
69+ the slightest unrelated changes elsewhere in the function. Observed that
70+ register a15 was used for epc1, then clobbered by `rsr.` I now believe a
71+ "&" on the output register would have resolved the problem.
72+
73+ However, I have refactored the Extended ASM to reduce and consolidate
74+ register usage and corrected the issue.
75+
76+ The positioning of the Extended ASM block (as early as possible in the
77+ compiled function) is in part controlled by the immediate need for
78+ output variable `insn`. This placement aids in getting excvaddr read as
79+ early as possible.
6680 */
67- uint32_t insn;
68- __asm (
69- " movi %0, ~3\n\t " /* prepare a mask for the EPC */
70- " and %0, %0, %1\n\t " /* apply mask for 32bit aligned base */
71- " ssa8l %1\n\t " /* set up shift register for src op */
72- " l32i %1, %0, 0\n\t " /* load part 1 */
73- " l32i %0, %0, 4\n\t " /* load part 2 */
74- " src %0, %0, %1\n\t " /* right shift to get faulting instruction */
75- :" =&r" (insn)
76- :" r" (ef->epc )
77- :
78- );
81+ uint32_t insn, excvaddr;
82+ #if 1
83+ {
84+ uint32_t tmp;
85+ __asm__ (
86+ " rsr.excvaddr %[vaddr]\n\t " /* Read faulting address as early as possible */
87+ " movi.n %[tmp], ~3\n\t " /* prepare a mask for the EPC */
88+ " and %[tmp], %[tmp], %[epc]\n\t " /* apply mask for 32-bit aligned base */
89+ " ssa8l %[epc]\n\t " /* set up shift register for src op */
90+ " l32i %[insn], %[tmp], 0\n\t " /* load part 1 */
91+ " l32i %[tmp], %[tmp], 4\n\t " /* load part 2 */
92+ " src %[insn], %[tmp], %[insn]\n\t " /* right shift to get faulting instruction */
93+ : [vaddr]" =&r" (excvaddr), [insn]" =&r" (insn), [tmp]" =&r" (tmp)
94+ : [epc]" r" (ef->epc ) :);
95+ }
96+
97+ #else
98+ {
99+ __asm__ __volatile__ ("rsr.excvaddr %0;" : "=r"(excvaddr):: "memory");
100+ /*
101+ "C" reference code for the ASM to document intent.
102+ May also prove useful when issolating possible issues with Extended ASM,
103+ optimizations, new compilers, etc.
104+ */
105+ uint32_t epc = ef->epc;
106+ uint32_t *pWord = (uint32_t *)(epc & ~3);
107+ uint64_t big_word = ((uint64_t)pWord[1] << 32) | pWord[0];
108+ uint32_t pos = (epc & 3) * 8;
109+ insn = (uint32_t)(big_word >>= pos);
110+ }
111+ #endif
79112
80113 uint32_t what = insn & LOAD_MASK;
81114 uint32_t valmask = 0 ;
@@ -102,10 +135,6 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
102135 --regno; /* account for skipped a1 in exception_frame */
103136 }
104137
105- uint32_t excvaddr;
106- /* read out the faulting address */
107- __asm (" rsr %0, EXCVADDR;" :" =r" (excvaddr)::);
108-
109138#ifdef DEBUG_ESP_MMU
110139 /* debug option to validate address so we don't hide memory access bugs in APP */
111140 if (mmu_is_iram ((void *)excvaddr) || (is_read && mmu_is_icache ((void *)excvaddr))) {
@@ -114,31 +143,34 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
114143 continue ; /* fail */
115144 }
116145#endif
117-
118- if (is_read) {
119- /* Load, shift and mask down to correct size */
120- uint32_t val = (*(uint32_t *)(excvaddr & ~0x3 ));
121- val >>= (excvaddr & 0x3 ) * 8 ;
122- val &= valmask;
123-
124- /* Sign-extend for L16SI, if applicable */
125- if (what == L16SI_MATCH && (val & 0x8000 )) {
126- val |= 0xffff0000 ;
146+ {
147+ uint32_t *pWord = (uint32_t *)(excvaddr & ~0x3 );
148+ uint32_t pos = (excvaddr & 0x3 ) * 8 ;
149+ uint32_t mem_val = *pWord;
150+
151+ if (is_read) {
152+ /* shift and mask down to correct size */
153+ mem_val >>= pos;
154+ mem_val &= valmask;
155+
156+ /* Sign-extend for L16SI, if applicable */
157+ if (what == L16SI_MATCH && (mem_val & 0x8000 )) {
158+ mem_val |= 0xffff0000 ;
159+ }
160+
161+ ef->a_reg [regno] = mem_val; /* carry out the load */
162+
163+ } else { /* is write */
164+ uint32_t val = ef->a_reg [regno]; /* get value to store from register */
165+ val <<= pos;
166+ valmask <<= pos;
167+ val &= valmask;
168+
169+ /* mask out field, and merge */
170+ mem_val &= (~valmask);
171+ mem_val |= val;
172+ *pWord = mem_val; /* carry out the store */
127173 }
128-
129- ef->a_reg [regno] = val; /* carry out the load */
130-
131- } else { /* is write */
132- uint32_t val = ef->a_reg [regno]; /* get value to store from register */
133- val <<= (excvaddr & 0x3 ) * 8 ;
134- valmask <<= (excvaddr & 0x3 ) * 8 ;
135- val &= valmask;
136-
137- /* Load, mask out field, and merge */
138- uint32_t dst_val = (*(uint32_t *)(excvaddr & ~0x3 ));
139- dst_val &= (~valmask);
140- dst_val |= val;
141- (*(uint32_t *)(excvaddr & ~0x3 )) = dst_val; /* carry out the store */
142174 }
143175
144176 ef->epc += 3 ; /* resume at following instruction */
@@ -201,6 +233,7 @@ static void _set_exception_handler_wrapper(int cause) {
201233 }
202234}
203235
236+ void install_non32xfer_exception_handler (void ) __attribute__((weak));
204237void install_non32xfer_exception_handler (void ) {
205238 if (NULL == old_c_handler) {
206239 // Set the "C" exception handler the wrapper will call
0 commit comments