Skip to content

Commit 32eb146

Browse files
author
Herton R. Krzesinski
committed
Revert "x86: bring back rep movsq for user access on CPUs without ERMS"
JIRA: https://issues.redhat.com/browse/RHEL-79780 Upstream Status: RHEL Only This reverts commit 4c7f1f1. The change "x86: bring back rep movsq for user access", while aiming at fixing a performance regression reported at RHEL-74389, unfortunately brought another regression on lnst mlx5 multistream tcp test. This is like a short blanket problem where looks like we fix one case in detriment of other. It appears some AMD Zen processors without advertising ERMS/FSRM looks to be problematic regarding memory copy and rep movs usage (and I guess probably they are not advertising/supporting it in some systems as probably they know it's not behaving well with rep movs). Lets revert the change for now, until we can find an way to fix the original issue reported at RHEL-74389 without bringing another regression with it. Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
1 parent 5150d3a commit 32eb146

File tree

2 files changed

+45
-14
lines changed

2 files changed

+45
-14
lines changed

arch/x86/include/asm/uaccess_64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ copy_user_generic(void *to, const void *from, unsigned long len)
133133
"2:\n"
134134
_ASM_EXTABLE_UA(1b, 2b)
135135
:"+c" (len), "+D" (to), "+S" (from), ASM_CALL_CONSTRAINT
136-
: : "memory", "rax");
136+
: : "memory", "rax", "r8", "r9", "r10", "r11");
137137
clac();
138138
return len;
139139
}

arch/x86/lib/copy_user_64.S

Lines changed: 44 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
* NOTE! The calling convention is very intentionally the same as
2828
* for 'rep movs', so that we can rewrite the function call with
2929
* just a plain 'rep movs' on machines that have FSRM. But to make
30-
* it simpler for us, we can clobber rsi/rdi and rax freely.
30+
* it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely.
3131
*/
3232
SYM_FUNC_START(rep_movs_alternative)
3333
cmpq $64,%rcx
@@ -68,24 +68,55 @@ SYM_FUNC_START(rep_movs_alternative)
6868
_ASM_EXTABLE_UA( 3b, .Lcopy_user_tail)
6969

7070
.Llarge:
71-
0: ALTERNATIVE "jmp .Llarge_movsq", "rep movsb", X86_FEATURE_ERMS
71+
0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS
7272
1: RET
7373

74-
_ASM_EXTABLE_UA( 0b, 1b)
74+
_ASM_EXTABLE_UA( 0b, 1b)
7575

76-
.Llarge_movsq:
77-
movq %rcx,%rax
78-
shrq $3,%rcx
79-
andl $7,%eax
80-
0: rep movsq
81-
movl %eax,%ecx
76+
.p2align 4
77+
.Lunrolled:
78+
10: movq (%rsi),%r8
79+
11: movq 8(%rsi),%r9
80+
12: movq 16(%rsi),%r10
81+
13: movq 24(%rsi),%r11
82+
14: movq %r8,(%rdi)
83+
15: movq %r9,8(%rdi)
84+
16: movq %r10,16(%rdi)
85+
17: movq %r11,24(%rdi)
86+
20: movq 32(%rsi),%r8
87+
21: movq 40(%rsi),%r9
88+
22: movq 48(%rsi),%r10
89+
23: movq 56(%rsi),%r11
90+
24: movq %r8,32(%rdi)
91+
25: movq %r9,40(%rdi)
92+
26: movq %r10,48(%rdi)
93+
27: movq %r11,56(%rdi)
94+
addq $64,%rsi
95+
addq $64,%rdi
96+
subq $64,%rcx
97+
cmpq $64,%rcx
98+
jae .Lunrolled
99+
cmpl $8,%ecx
100+
jae .Lword
82101
testl %ecx,%ecx
83102
jne .Lcopy_user_tail
84103
RET
85104

86-
1: leaq (%rax,%rcx,8),%rcx
87-
jmp .Lcopy_user_tail
88-
89-
_ASM_EXTABLE_UA( 0b, 1b)
105+
_ASM_EXTABLE_UA(10b, .Lcopy_user_tail)
106+
_ASM_EXTABLE_UA(11b, .Lcopy_user_tail)
107+
_ASM_EXTABLE_UA(12b, .Lcopy_user_tail)
108+
_ASM_EXTABLE_UA(13b, .Lcopy_user_tail)
109+
_ASM_EXTABLE_UA(14b, .Lcopy_user_tail)
110+
_ASM_EXTABLE_UA(15b, .Lcopy_user_tail)
111+
_ASM_EXTABLE_UA(16b, .Lcopy_user_tail)
112+
_ASM_EXTABLE_UA(17b, .Lcopy_user_tail)
113+
_ASM_EXTABLE_UA(20b, .Lcopy_user_tail)
114+
_ASM_EXTABLE_UA(21b, .Lcopy_user_tail)
115+
_ASM_EXTABLE_UA(22b, .Lcopy_user_tail)
116+
_ASM_EXTABLE_UA(23b, .Lcopy_user_tail)
117+
_ASM_EXTABLE_UA(24b, .Lcopy_user_tail)
118+
_ASM_EXTABLE_UA(25b, .Lcopy_user_tail)
119+
_ASM_EXTABLE_UA(26b, .Lcopy_user_tail)
120+
_ASM_EXTABLE_UA(27b, .Lcopy_user_tail)
90121
SYM_FUNC_END(rep_movs_alternative)
91122
EXPORT_SYMBOL(rep_movs_alternative)

0 commit comments

Comments
 (0)