11export function memcpy ( dest : usize , src : usize , n : usize ) : void { // see: musl/src/string/memcpy.c
2- if ( ASC_SHRINK_LEVEL > 1 ) {
3- while ( n ) {
4- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
5- -- n ;
6- }
7- } else {
8- let w : u32 , x : u32 ;
2+ var w : u32 , x : u32 ;
93
10- // copy 1 byte each until src is aligned to 4 bytes
11- while ( n && ( src & 3 ) ) {
4+ // copy 1 byte each until src is aligned to 4 bytes
5+ while ( n && ( src & 3 ) ) {
6+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
7+ n -- ;
8+ }
9+
10+ // if dst is aligned to 4 bytes as well, copy 4 bytes each
11+ if ( ( dest & 3 ) == 0 ) {
12+ while ( n >= 16 ) {
13+ store < u32 > ( dest , load < u32 > ( src ) ) ;
14+ store < u32 > ( dest + 4 , load < u32 > ( src + 4 ) ) ;
15+ store < u32 > ( dest + 8 , load < u32 > ( src + 8 ) ) ;
16+ store < u32 > ( dest + 12 , load < u32 > ( src + 12 ) ) ;
17+ src += 16 ; dest += 16 ; n -= 16 ;
18+ }
19+ if ( n & 8 ) {
20+ store < u32 > ( dest , load < u32 > ( src ) ) ;
21+ store < u32 > ( dest + 4 , load < u32 > ( src + 4 ) ) ;
22+ dest += 8 ; src += 8 ;
23+ }
24+ if ( n & 4 ) {
25+ store < u32 > ( dest , load < u32 > ( src ) ) ;
26+ dest += 4 ; src += 4 ;
27+ }
28+ if ( n & 2 ) { // drop to 2 bytes each
29+ store < u16 > ( dest , load < u16 > ( src ) ) ;
30+ dest += 2 ; src += 2 ;
31+ }
32+ if ( n & 1 ) { // drop to 1 byte
1233 store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
13- n -- ;
1434 }
35+ return ;
36+ }
1537
16- // if dst is aligned to 4 bytes as well, copy 4 bytes each
17- if ( ( dest & 3 ) == 0 ) {
18- while ( n >= 16 ) {
19- store < u32 > ( dest , load < u32 > ( src ) ) ;
20- store < u32 > ( dest + 4 , load < u32 > ( src + 4 ) ) ;
21- store < u32 > ( dest + 8 , load < u32 > ( src + 8 ) ) ;
22- store < u32 > ( dest + 12 , load < u32 > ( src + 12 ) ) ;
23- src += 16 ; dest += 16 ; n -= 16 ;
24- }
25- if ( n & 8 ) {
26- store < u32 > ( dest , load < u32 > ( src ) ) ;
27- store < u32 > ( dest + 4 , load < u32 > ( src + 4 ) ) ;
28- dest += 8 ; src += 8 ;
29- }
30- if ( n & 4 ) {
31- store < u32 > ( dest , load < u32 > ( src ) ) ;
32- dest += 4 ; src += 4 ;
33- }
34- if ( n & 2 ) { // drop to 2 bytes each
35- store < u16 > ( dest , load < u16 > ( src ) ) ;
36- dest += 2 ; src += 2 ;
37- }
38- if ( n & 1 ) { // drop to 1 byte
38+ // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
39+ // doing shifts if faster when copying enough bytes (here: 32 or more)
40+ if ( n >= 32 ) {
41+ switch ( dest & 3 ) {
42+ // known to be != 0
43+ case 1 : {
44+ w = load < u32 > ( src ) ;
3945 store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
40- }
41- return ;
42- }
43-
44- // if dst is not aligned to 4 bytes, use alternating shifts to copy 4 bytes each
45- // doing shifts if faster when copying enough bytes (here: 32 or more)
46- if ( n >= 32 ) {
47- switch ( dest & 3 ) {
48- // known to be != 0
49- case 1 : {
50- w = load < u32 > ( src ) ;
51- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
52- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
53- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
54- n -= 3 ;
55- while ( n >= 17 ) {
56- x = load < u32 > ( src + 1 ) ;
57- store < u32 > ( dest , w >> 24 | x << 8 ) ;
58- w = load < u32 > ( src + 5 ) ;
59- store < u32 > ( dest + 4 , x >> 24 | w << 8 ) ;
60- x = load < u32 > ( src + 9 ) ;
61- store < u32 > ( dest + 8 , w >> 24 | x << 8 ) ;
62- w = load < u32 > ( src + 13 ) ;
63- store < u32 > ( dest + 12 , x >> 24 | w << 8 ) ;
64- src += 16 ; dest += 16 ; n -= 16 ;
65- }
66- break ;
46+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
47+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
48+ n -= 3 ;
49+ while ( n >= 17 ) {
50+ x = load < u32 > ( src + 1 ) ;
51+ store < u32 > ( dest , w >> 24 | x << 8 ) ;
52+ w = load < u32 > ( src + 5 ) ;
53+ store < u32 > ( dest + 4 , x >> 24 | w << 8 ) ;
54+ x = load < u32 > ( src + 9 ) ;
55+ store < u32 > ( dest + 8 , w >> 24 | x << 8 ) ;
56+ w = load < u32 > ( src + 13 ) ;
57+ store < u32 > ( dest + 12 , x >> 24 | w << 8 ) ;
58+ src += 16 ; dest += 16 ; n -= 16 ;
6759 }
68- case 2 : {
69- w = load < u32 > ( src ) ;
70- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
71- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
72- n -= 2 ;
73- while ( n >= 18 ) {
74- x = load < u32 > ( src + 2 ) ;
75- store < u32 > ( dest , w >> 16 | x << 16 ) ;
76- w = load < u32 > ( src + 6 ) ;
77- store < u32 > ( dest + 4 , x >> 16 | w << 16 ) ;
78- x = load < u32 > ( src + 10 ) ;
79- store < u32 > ( dest + 8 , w >> 16 | x << 16 ) ;
80- w = load < u32 > ( src + 14 ) ;
81- store < u32 > ( dest + 12 , x >> 16 | w << 16 ) ;
82- src += 16 ; dest += 16 ; n -= 16 ;
83- }
84- break ;
60+ break ;
61+ }
62+ case 2 : {
63+ w = load < u32 > ( src ) ;
64+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
65+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
66+ n -= 2 ;
67+ while ( n >= 18 ) {
68+ x = load < u32 > ( src + 2 ) ;
69+ store < u32 > ( dest , w >> 16 | x << 16 ) ;
70+ w = load < u32 > ( src + 6 ) ;
71+ store < u32 > ( dest + 4 , x >> 16 | w << 16 ) ;
72+ x = load < u32 > ( src + 10 ) ;
73+ store < u32 > ( dest + 8 , w >> 16 | x << 16 ) ;
74+ w = load < u32 > ( src + 14 ) ;
75+ store < u32 > ( dest + 12 , x >> 16 | w << 16 ) ;
76+ src += 16 ; dest += 16 ; n -= 16 ;
8577 }
86- case 3 : {
87- w = load < u32 > ( src ) ;
88- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
89- n -= 1 ;
90- while ( n >= 19 ) {
91- x = load < u32 > ( src + 3 ) ;
92- store < u32 > ( dest , w >> 8 | x << 24 ) ;
93- w = load < u32 > ( src + 7 ) ;
94- store < u32 > ( dest + 4 , x >> 8 | w << 24 ) ;
95- x = load < u32 > ( src + 11 ) ;
96- store < u32 > ( dest + 8 , w >> 8 | x << 24 ) ;
97- w = load < u32 > ( src + 15 ) ;
98- store < u32 > ( dest + 12 , x >> 8 | w << 24 ) ;
99- src += 16 ; dest += 16 ; n -= 16 ;
100- }
101- break ;
78+ break ;
79+ }
80+ case 3 : {
81+ w = load < u32 > ( src ) ;
82+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
83+ n -= 1 ;
84+ while ( n >= 19 ) {
85+ x = load < u32 > ( src + 3 ) ;
86+ store < u32 > ( dest , w >> 8 | x << 24 ) ;
87+ w = load < u32 > ( src + 7 ) ;
88+ store < u32 > ( dest + 4 , x >> 8 | w << 24 ) ;
89+ x = load < u32 > ( src + 11 ) ;
90+ store < u32 > ( dest + 8 , w >> 8 | x << 24 ) ;
91+ w = load < u32 > ( src + 15 ) ;
92+ store < u32 > ( dest + 12 , x >> 8 | w << 24 ) ;
93+ src += 16 ; dest += 16 ; n -= 16 ;
10294 }
95+ break ;
10396 }
10497 }
98+ }
10599
106- // copy remaining bytes one by one
107- if ( n & 16 ) {
108- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
109- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
110- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
111- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
112- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
113- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
114- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
115- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
116- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
117- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
118- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
119- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
120- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
121- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
122- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
123- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
124- }
125- if ( n & 8 ) {
126- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
127- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
128- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
129- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
130- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
131- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
132- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
133- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
134- }
135- if ( n & 4 ) {
136- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
137- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
138- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
139- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
140- }
141- if ( n & 2 ) {
142- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
143- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
144- }
145- if ( n & 1 ) {
146- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
147- }
100+ // copy remaining bytes one by one
101+ if ( n & 16 ) {
102+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
103+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
104+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
105+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
106+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
107+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
108+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
109+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
110+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
111+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
112+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
113+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
114+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
115+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
116+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
117+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
118+ }
119+ if ( n & 8 ) {
120+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
121+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
122+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
123+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
124+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
125+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
126+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
127+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
128+ }
129+ if ( n & 4 ) {
130+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
131+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
132+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
133+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
134+ }
135+ if ( n & 2 ) {
136+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
137+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
138+ }
139+ if ( n & 1 ) {
140+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
148141 }
149142}
150143
@@ -159,32 +152,36 @@ export function memmove(dest: usize, src: usize, n: usize): void { // see: musl/
159152 }
160153 }
161154 if ( dest < src ) {
162- if ( ( src & 7 ) == ( dest & 7 ) ) {
163- while ( dest & 7 ) {
164- if ( ! n ) return ;
165- -- n ;
166- store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
167- }
168- while ( n >= 8 ) {
169- store < u64 > ( dest , load < u64 > ( src ) ) ;
170- n -= 8 ;
171- dest += 8 ;
172- src += 8 ;
155+ if ( ASC_SHRINK_LEVEL < 2 ) {
156+ if ( ( src & 7 ) == ( dest & 7 ) ) {
157+ while ( dest & 7 ) {
158+ if ( ! n ) return ;
159+ -- n ;
160+ store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
161+ }
162+ while ( n >= 8 ) {
163+ store < u64 > ( dest , load < u64 > ( src ) ) ;
164+ n -= 8 ;
165+ dest += 8 ;
166+ src += 8 ;
167+ }
173168 }
174169 }
175170 while ( n ) {
176171 store < u8 > ( dest ++ , load < u8 > ( src ++ ) ) ;
177172 -- n ;
178173 }
179174 } else {
180- if ( ( src & 7 ) == ( dest & 7 ) ) {
181- while ( ( dest + n ) & 7 ) {
182- if ( ! n ) return ;
183- store < u8 > ( dest + -- n , load < u8 > ( src + n ) ) ;
184- }
185- while ( n >= 8 ) {
186- n -= 8 ;
187- store < u64 > ( dest + n , load < u64 > ( src + n ) ) ;
175+ if ( ASC_SHRINK_LEVEL < 2 ) {
176+ if ( ( src & 7 ) == ( dest & 7 ) ) {
177+ while ( ( dest + n ) & 7 ) {
178+ if ( ! n ) return ;
179+ store < u8 > ( dest + -- n , load < u8 > ( src + n ) ) ;
180+ }
181+ while ( n >= 8 ) {
182+ n -= 8 ;
183+ store < u64 > ( dest + n , load < u64 > ( src + n ) ) ;
184+ }
188185 }
189186 }
190187 while ( n ) {
0 commit comments