@@ -8,8 +8,9 @@ L_dyn_call_begin:
88 # At this point , the following registers are bound :
99 #
1010 # rdi < - callee
11- # rsi < - argv
12- # rdx < - argc
11+ # rsi < - process
12+ # rdx < - argv
13+ # rcx < - argc
1314 #
1415 # Save the parent base pointer for when control returns to this call frame.
1516 # CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,15 @@ L_dyn_call_begin:
2021 mov rbp , rsp
2122 .cfi_def_cfa_register rbp
2223
23- # Save our callee and argv pointers , and argc
24+ # Save our callee , process and argv pointers , and argc
2425 mov r10 , rdi
2526 mov r11 , rsi
26- mov rax , rdx
27+ mov rdi , rdx
28+ mov rax , rcx
2729
28- # Determine if spills are needed
30+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
2931 # In the common case in which they are not , we perform a tail call
30- cmp rdx , 7
32+ cmp rcx , 6
3133 ja L_dyn_call_spill
3234
3335L_dyn_call_no_spill:
@@ -38,62 +40,52 @@ L_dyn_call_no_spill:
3840
3941 # Calculate offset in jump table to block which handles the specific
4042 # number of registers we have arguments for , then jump to th at block
41- lea rcx , [ rip + L_dyn_call_jt ]
42- mov rax , [ rcx + rax * 4 ]
43- add rax , rcx
44- jmp [ rax ]
43+ lea rdx , [ rip + L_dyn_call_jt ]
44+ movsxd rax , dword ptr [ rdx + 4 * rax ]
45+ add rax , rdx
46+ jmp rax
4547
4648 # All of these basic blocks perform a tail call . As such ,
4749 # the unwinder will skip over this frame should the callee
4850 # throw an exception
4951L_dyn_call_regs0:
5052 pop rbp
51- jmp [ r10 ]
53+ jmp r10
5254
5355L_dyn_call_regs1:
54- mov rdi , [ r11 ]
56+ mov rsi , [ rdi ]
5557 pop rbp
56- jmp [ r10 ]
58+ jmp r10
5759
5860L_dyn_call_regs2:
59- mov rdi , [ r11 ]
60- mov rsi , [ r11 + 8 ]
61+ mov rsi , [ rdi ]
62+ mov rdx , [ rdi + 8 ]
6163 pop rbp
62- jmp [ r10 ]
64+ jmp r10
6365
6466L_dyn_call_regs3:
65- mov rdi , [ r11 ]
66- mov rsi , [ r11 + 8 ]
67- mov rdx , [ r11 + 16 ]
67+ mov rsi , [ rdi ]
68+ mov rdx , [ rdi + 8 ]
69+ mov rcx , [ rdi + 16 ]
6870 pop rbp
69- jmp [ r10 ]
71+ jmp r10
7072
7173L_dyn_call_regs4:
72- mov rdi , [ r11 ]
73- mov rsi , [ r11 + 8 ]
74- mov rdx , [ r11 + 16 ]
75- mov rcx , [ r11 + 24 ]
74+ mov rsi , [ rdi ]
75+ mov rdx , [ rdi + 8 ]
76+ mov rcx , [ rdi + 16 ]
77+ mov r8 , [ rdi + 24 ]
7678 pop rbp
77- jmp [ r10 ]
79+ jmp r10
7880
7981L_dyn_call_regs5:
80- mov rdi , [ r11 ]
81- mov rsi , [ r11 + 8 ]
82- mov rdx , [ r11 + 16 ]
83- mov rcx , [ r11 + 24 ]
84- mov r8 , [ r11 + 32 ]
82+ mov rsi , [ rdi ]
83+ mov rdx , [ rdi + 8 ]
84+ mov rcx , [ rdi + 16 ]
85+ mov r8 , [ rdi + 24 ]
86+ mov r9 , [ rdi + 32 ]
8587 pop rbp
86- jmp [ r10 ]
87-
88- L_dyn_call_regs6:
89- mov rdi , [ r11 ]
90- mov rsi , [ r11 + 8 ]
91- mov rdx , [ r11 + 16 ]
92- mov rcx , [ r11 + 24 ]
93- mov r8 , [ r11 + 32 ]
94- mov r9 , [ r11 + 40 ]
95- pop rbp
96- jmp [ r10 ]
88+ jmp r10
9789
9890L_dyn_call_spill:
9991 # If we hit this block , we have identified th at there are
@@ -102,7 +94,7 @@ L_dyn_call_spill:
10294
10395 # Calculate spill count for later ( rep uses rcx for the iteration count ,
10496 # which in this case is the number of quadwords to copy)
105- mov rcx , rdx
97+ mov r8 , rcx
10698 sub rcx , 6
10799
108100 # Calculate spill space , and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +105,24 @@ L_dyn_call_spill:
113105 sub rsp , rax
114106
115107 # load source pointer (last item of argv)
116- lea rsi , [ r11 + rdx * 8 - 8 ]
108+ lea rsi , [ rdi + r8 * 8 - 8 ]
117109 # load destination pointer (top of spill region)
118- lea rdi , [ rsp + rcx * 8 - 8 ]
110+ lea rdi , [ rsp + rcx * 8 - 8 ]
119111 # copy rcx quadwords from rsi to rdi , in reverse
120112 std
121113 rep movsq
122114 cld
123115
124116 # We've spilled arguments , so we have at least 6 args
125- mov rdi , [ r11 ]
126- mov rsi , [ r11 + 8 ]
117+ mov r8 , rdi # We need to move rdi to r11 , but it is occupied , so temporarily move to r8
118+ mov rdi , r11 # Move process pointer to rdi
119+ mov r11 , r8 # Move r8 to r11
120+ mov rsi , [ r11 ]
121+ mov rsi , [ r11 + 8 ]
127122 mov rdx , [ r11 + 16 ]
128123 mov rcx , [ r11 + 24 ]
129- mov r8 , [ r11 + 32 ]
130- mov r9 , [ r11 + 40 ]
124+ mov r8 , [ r11 + 32 ]
125+ mov r9 , [ r11 + 40 ]
131126
132127L_dyn_call_exec:
133128 # If we spill arguments to the stack , we can't perform
@@ -141,7 +136,7 @@ L_dyn_call_exec:
141136 # This instruction will push the return address and jump ,
142137 # and we can expect rbp to be the same as we left it upon
143138 # return.
144- call [ r10 ]
139+ call r10
145140
146141L_dyn_call_ret:
147142 # Non - tail call completed successfully
@@ -156,21 +151,19 @@ L_dyn_call_end:
156151 # a variable number of register - based arguments
157152 .p2align 2
158153 .data_region jt32
159- .set L_dyn_call_jt_entry0 , L_dyn_call_exec - L_dyn_call_jt
154+ .set L_dyn_call_jt_entry0 , L_dyn_call_regs0 - L_dyn_call_jt
160155 .set L_dyn_call_jt_entry1 , L_dyn_call_regs1 - L_dyn_call_jt
161156 .set L_dyn_call_jt_entry2 , L_dyn_call_regs2 - L_dyn_call_jt
162157 .set L_dyn_call_jt_entry3 , L_dyn_call_regs3 - L_dyn_call_jt
163158 .set L_dyn_call_jt_entry4 , L_dyn_call_regs4 - L_dyn_call_jt
164159 .set L_dyn_call_jt_entry5 , L_dyn_call_regs5 - L_dyn_call_jt
165- .set L_dyn_call_jt_entry6 , L_dyn_call_regs6 - L_dyn_call_jt
166160L_dyn_call_jt:
167161 .long L_dyn_call_jt_entry0
168162 .long L_dyn_call_jt_entry1
169163 .long L_dyn_call_jt_entry2
170164 .long L_dyn_call_jt_entry3
171165 .long L_dyn_call_jt_entry4
172166 .long L_dyn_call_jt_entry5
173- .long L_dyn_call_jt_entry6
174167 .end_data_region
175168
176169 # The following is the LSDA metadata for exception handling
0 commit comments