1515// You should have received a copy of the GNU General Public License
1616// along with Aero. If not, see <https://www.gnu.org/licenses/>.
1717
18- #[ no_mangle]
18+ fn should_store_by_byte ( ) -> bool {
19+ let cpuid = raw_cpuid:: CpuId :: new ( ) ;
20+ if let Some ( features) = cpuid. get_extended_feature_info ( ) {
21+ // Check if "Enhanced" or "Fast Short" optimizations are available.
22+ features. has_rep_movsb_stosb ( )
23+ } else {
24+ false
25+ }
26+ }
27+
1928#[ naked]
2029unsafe extern "C" fn memcpy_movsq ( dest : * mut u8 , src : * const u8 , len : usize ) -> * mut u8 {
2130 // Registers used:
@@ -39,7 +48,33 @@ unsafe extern "C" fn memcpy_movsq(dest: *mut u8, src: *const u8, len: usize) ->
3948 ) ;
4049}
4150
42- #[ no_mangle]
51+ #[ naked]
52+ unsafe extern "C" fn memcpy_movsb ( dest : * mut u8 , src : * const u8 , len : usize ) -> * mut u8 {
53+ // Registers used:
54+ //
55+ // %rdi = argument 1, `dest`
56+ // %rsi = argument 2, `src`
57+ // %rdx = argument 3, `len`
58+ asm ! (
59+ // Save the return value.
60+ "mov rax, rdi" ,
61+ // Copy!
62+ "mov rcx, rdx" ,
63+ "rep movsb" ,
64+ "ret" ,
65+ options( noreturn)
66+ )
67+ }
68+
69+ #[ indirect]
70+ extern "C" fn memcpy ( ) -> fn ( * mut u8 , * const u8 , usize ) {
71+ if should_store_by_byte ( ) {
72+ memcpy_movsb
73+ } else {
74+ memcpy_movsq
75+ }
76+ }
77+
4378#[ naked]
4479unsafe extern "C" fn memset_stosq ( dest : * mut u8 , byte : i32 , len : usize ) -> * mut u8 {
4580 // Registers used:
@@ -70,6 +105,34 @@ unsafe extern "C" fn memset_stosq(dest: *mut u8, byte: i32, len: usize) -> *mut
70105 )
71106}
72107
108+ #[ naked]
109+ unsafe extern "C" fn memset_stosb ( dest : * mut u8 , byte : i32 , len : usize ) -> * mut u8 {
110+ // Registers used:
111+ //
112+ // %rdi = argument 1, `dest`
113+ // %rsi = argument 2, `byte`
114+ // %rdx = argument 3, `len`
115+ asm ! (
116+ // Save the return value.
117+ "mov r11, rdi" ,
118+ "mov al, sil" ,
119+ "mov rcx, rdx" ,
120+ "rep stosb" ,
121+ "mov rax, r11" ,
122+ "ret" ,
123+ options( noreturn)
124+ )
125+ }
126+
127+ #[ indirect]
128+ extern "C" fn memset ( ) -> fn ( * mut u8 , i32 , usize ) {
129+ if should_store_by_byte ( ) {
130+ memset_stosb
131+ } else {
132+ memset_stosq
133+ }
134+ }
135+
73136#[ no_mangle]
74137#[ naked]
75138unsafe extern "C" fn memmove_erms ( dest : * mut u8 , src : * const u8 , len : usize ) -> * mut u8 {
@@ -108,18 +171,6 @@ unsafe extern "C" fn memmove_erms(dest: *mut u8, src: *const u8, len: usize) ->
108171 )
109172}
110173
111- // FIXME(andypython): pick the best implementation for the current CPU using indirect functions.
112-
113- #[ no_mangle]
114- extern "C" fn memcpy ( dest : * mut u8 , src : * const u8 , len : usize ) -> * mut u8 {
115- unsafe { memcpy_movsq ( dest, src, len) }
116- }
117-
118- #[ no_mangle]
119- extern "C" fn memset ( dest : * mut u8 , byte : i32 , len : usize ) -> * mut u8 {
120- unsafe { memset_stosq ( dest, byte, len) }
121- }
122-
123174#[ no_mangle]
124175extern "C" fn memmove ( dest : * mut u8 , src : * const u8 , len : usize ) -> * mut u8 {
125176 unsafe { memmove_erms ( dest, src, len) }
0 commit comments