|
27 | 27 | * NOTE! The calling convention is very intentionally the same as |
28 | 28 | * for 'rep movs', so that we can rewrite the function call with |
29 | 29 | * just a plain 'rep movs' on machines that have FSRM. But to make |
30 | | - * it simpler for us, we can clobber rsi/rdi and rax/r8-r11 freely. |
| 30 | + * it simpler for us, we can clobber rsi/rdi and rax freely. |
31 | 31 | */ |
32 | 32 | SYM_FUNC_START(rep_movs_alternative) |
33 | 33 | cmpq $64,%rcx |
@@ -68,55 +68,24 @@ SYM_FUNC_START(rep_movs_alternative) |
68 | 68 | _ASM_EXTABLE_UA( 3b, .Lcopy_user_tail) |
69 | 69 |
|
70 | 70 | .Llarge: |
71 | | -0: ALTERNATIVE "jmp .Lunrolled", "rep movsb", X86_FEATURE_ERMS |
| 71 | +0: ALTERNATIVE "jmp .Llarge_movsq", "rep movsb", X86_FEATURE_ERMS |
72 | 72 | 1: RET |
73 | 73 |
|
74 | | - _ASM_EXTABLE_UA( 0b, 1b) |
| 74 | + _ASM_EXTABLE_UA( 0b, 1b) |
75 | 75 |
|
76 | | - .p2align 4 |
77 | | -.Lunrolled: |
78 | | -10: movq (%rsi),%r8 |
79 | | -11: movq 8(%rsi),%r9 |
80 | | -12: movq 16(%rsi),%r10 |
81 | | -13: movq 24(%rsi),%r11 |
82 | | -14: movq %r8,(%rdi) |
83 | | -15: movq %r9,8(%rdi) |
84 | | -16: movq %r10,16(%rdi) |
85 | | -17: movq %r11,24(%rdi) |
86 | | -20: movq 32(%rsi),%r8 |
87 | | -21: movq 40(%rsi),%r9 |
88 | | -22: movq 48(%rsi),%r10 |
89 | | -23: movq 56(%rsi),%r11 |
90 | | -24: movq %r8,32(%rdi) |
91 | | -25: movq %r9,40(%rdi) |
92 | | -26: movq %r10,48(%rdi) |
93 | | -27: movq %r11,56(%rdi) |
94 | | - addq $64,%rsi |
95 | | - addq $64,%rdi |
96 | | - subq $64,%rcx |
97 | | - cmpq $64,%rcx |
98 | | - jae .Lunrolled |
99 | | - cmpl $8,%ecx |
100 | | - jae .Lword |
| 76 | +.Llarge_movsq: |
| 77 | + movq %rcx,%rax |
| 78 | + shrq $3,%rcx |
| 79 | + andl $7,%eax |
| 80 | +0: rep movsq |
| 81 | + movl %eax,%ecx |
101 | 82 | testl %ecx,%ecx |
102 | 83 | jne .Lcopy_user_tail |
103 | 84 | RET |
104 | 85 |
|
105 | | - _ASM_EXTABLE_UA(10b, .Lcopy_user_tail) |
106 | | - _ASM_EXTABLE_UA(11b, .Lcopy_user_tail) |
107 | | - _ASM_EXTABLE_UA(12b, .Lcopy_user_tail) |
108 | | - _ASM_EXTABLE_UA(13b, .Lcopy_user_tail) |
109 | | - _ASM_EXTABLE_UA(14b, .Lcopy_user_tail) |
110 | | - _ASM_EXTABLE_UA(15b, .Lcopy_user_tail) |
111 | | - _ASM_EXTABLE_UA(16b, .Lcopy_user_tail) |
112 | | - _ASM_EXTABLE_UA(17b, .Lcopy_user_tail) |
113 | | - _ASM_EXTABLE_UA(20b, .Lcopy_user_tail) |
114 | | - _ASM_EXTABLE_UA(21b, .Lcopy_user_tail) |
115 | | - _ASM_EXTABLE_UA(22b, .Lcopy_user_tail) |
116 | | - _ASM_EXTABLE_UA(23b, .Lcopy_user_tail) |
117 | | - _ASM_EXTABLE_UA(24b, .Lcopy_user_tail) |
118 | | - _ASM_EXTABLE_UA(25b, .Lcopy_user_tail) |
119 | | - _ASM_EXTABLE_UA(26b, .Lcopy_user_tail) |
120 | | - _ASM_EXTABLE_UA(27b, .Lcopy_user_tail) |
| 86 | +1: leaq (%rax,%rcx,8),%rcx |
| 87 | + jmp .Lcopy_user_tail |
| 88 | + |
| 89 | + _ASM_EXTABLE_UA( 0b, 1b) |
121 | 90 | SYM_FUNC_END(rep_movs_alternative) |
122 | 91 | EXPORT_SYMBOL(rep_movs_alternative) |
0 commit comments