@@ -8,8 +8,9 @@ L_dyn_call_begin:
88 # At this point , the following registers are bound :
99 #
1010 # rdi < - callee
11- # rsi < - argv
12- # rdx < - argc
11+ # rsi < - process
12+ # rdx < - argv
13+ # rcx < - argc
1314 #
1415 # Save the parent base pointer for when control returns to this call frame.
1516 # CFA directives will inform the unwinder to expect rbp at the bottom of the
@@ -20,14 +21,16 @@ L_dyn_call_begin:
2021 mov rbp , rsp
2122 .cfi_def_cfa_register rbp
2223
23- # Save our callee and argv pointers , and argc
24+ # Pin callee pointer to r10
2425 mov r10 , rdi
25- mov r11 , rsi
26- mov rax , rdx
26+ # Pin the argv pointer to r11
27+ mov r11 , rdx
28+ # The process pointer needs to be in rdi
29+ mov rdi , rsi
2730
28- # Determine if spills are needed
31+ # Determine if spills are needed (argc + 1 should be <= 8 when not needed)
2932 # In the common case in which they are not , we perform a tail call
30- cmp rdx , 7
33+ cmp rcx , 6
3134 ja L_dyn_call_spill
3235
3336L_dyn_call_no_spill:
@@ -39,70 +42,70 @@ L_dyn_call_no_spill:
3942 # Calculate offset in jump table to block which handles the specific
4043 # number of registers we have arguments for , then jump to th at block
4144 lea rcx , [ rip + L_dyn_call_jt ]
42- mov rax , [ rcx + rax * 4 ]
45+ movsxd rax , dword ptr [ rcx + 4 * rax ]
4346 add rax , rcx
44- jmp [ rax ]
47+ jmp rax
4548
4649 # All of these basic blocks perform a tail call . As such ,
4750 # the unwinder will skip over this frame should the callee
4851 # throw an exception
4952L_dyn_call_regs0:
5053 pop rbp
51- jmp [ r10 ]
54+ jmp r10
5255
5356L_dyn_call_regs1:
54- mov rdi , [ r11 ]
57+ mov rsi , [ r11 ]
5558 pop rbp
56- jmp [ r10 ]
59+ jmp r10
5760
5861L_dyn_call_regs2:
59- mov rdi , [ r11 ]
60- mov rsi , [ r11 + 8 ]
62+ mov rsi , [ r11 ]
63+ mov rdx , [ r11 + 8 ]
6164 pop rbp
62- jmp [ r10 ]
65+ jmp r10
6366
6467L_dyn_call_regs3:
65- mov rdi , [ r11 ]
66- mov rsi , [ r11 + 8 ]
67- mov rdx , [ r11 + 16 ]
68+ mov rsi , [ r11 ]
69+ mov rdx , [ r11 + 8 ]
70+ mov rcx , [ r11 + 16 ]
6871 pop rbp
69- jmp [ r10 ]
72+ jmp r10
7073
7174L_dyn_call_regs4:
72- mov rdi , [ r11 ]
73- mov rsi , [ r11 + 8 ]
74- mov rdx , [ r11 + 16 ]
75- mov rcx , [ r11 + 24 ]
75+ mov rsi , [ r11 ]
76+ mov rdx , [ r11 + 8 ]
77+ mov rcx , [ r11 + 16 ]
78+ mov r8 , [ r11 + 24 ]
7679 pop rbp
77- jmp [ r10 ]
80+ jmp r10
7881
7982L_dyn_call_regs5:
80- mov rdi , [ r11 ]
81- mov rsi , [ r11 + 8 ]
82- mov rdx , [ r11 + 16 ]
83- mov rcx , [ r11 + 24 ]
84- mov r8 , [ r11 + 32 ]
83+ mov rsi , [ r11 ]
84+ mov rdx , [ r11 + 8 ]
85+ mov rcx , [ r11 + 16 ]
86+ mov r8 , [ r11 + 24 ]
87+ mov r9 , [ r11 + 32 ]
8588 pop rbp
86- jmp [ r10 ]
87-
88- L_dyn_call_regs6:
89- mov rdi , [ r11 ]
90- mov rsi , [ r11 + 8 ]
91- mov rdx , [ r11 + 16 ]
92- mov rcx , [ r11 + 24 ]
93- mov r8 , [ r11 + 32 ]
94- mov r9 , [ r11 + 40 ]
95- pop rbp
96- jmp [ r10 ]
89+ jmp r10
9790
9891L_dyn_call_spill:
9992 # If we hit this block , we have identified th at there are
10093 # arguments to spill. We perform some setup for the actual
10194 # spilling , which is a loop built on ` rep movsq `
95+ #
96+ # At this point , the following registers are occupied/hold these values:
97+ #
98+ # r10 < - callee
99+ # rdi < - process
100+ # r11 < - argv
101+ # rcx < - argc
102+
103+ # rcx , rdi , and rsi are used by ` rep movsq ` , so save them temporarily
104+ mov r8 , rcx
105+ mov r9 , rdi
102106
103- # Calculate spill count for later ( rep uses rcx for the iteration count ,
107+ # Calculate spill count for later ( rep uses rcx for the iteration count `i` ,
104108 # which in this case is the number of quadwords to copy)
105- mov rcx , rdx
106109 sub rcx , 6
107110
108111 # Calculate spill space , and ensure it is rounded up to the nearest 16 bytes.
@@ -113,21 +116,22 @@ L_dyn_call_spill:
113116 sub rsp , rax
114117
115118 # load source pointer (last item of argv)
116- lea rsi , [ r11 + rdx * 8 - 8 ]
119+ lea rsi , [ r11 + r8 * 8 - 8 ]
117120 # load destination pointer (top of spill region)
118- lea rdi , [ rsp + rcx * 8 - 8 ]
119- # copy rcx quadwords from rsi to rdi , in reverse
121+ lea rdi , [ rsp + rcx * 8 - 8 ]
122+ # copy `i` quadwords from source to destination , in reverse
120123 std
121124 rep movsq
122125 cld
123126
124- # We've spilled arguments , so we have at least 6 args
125- mov rdi , [ r11 ]
126- mov rsi , [ r11 + 8 ]
127- mov rdx , [ r11 + 16 ]
128- mov rcx , [ r11 + 24 ]
129- mov r8 , [ r11 + 32 ]
130- mov r9 , [ r11 + 40 ]
127+ # We've spilled arguments , so we have at least 6 args , move them into their
128+ # final destination registers in preparation for the call
129+ mov rdi , r9
130+ mov rsi , [ r11 ]
131+ mov rdx , [ r11 + 8 ]
132+ mov rcx , [ r11 + 16 ]
133+ mov r8 , [ r11 + 24 ]
134+ mov r9 , [ r11 + 32 ]
131135
132136L_dyn_call_exec:
133137 # If we spill arguments to the stack , we can't perform
@@ -141,7 +145,7 @@ L_dyn_call_exec:
141145 # This instruction will push the return address and jump ,
142146 # and we can expect rbp to be the same as we left it upon
143147 # return.
144- call [ r10 ]
148+ call r10
145149
146150L_dyn_call_ret:
147151 # Non - tail call completed successfully
@@ -156,21 +160,19 @@ L_dyn_call_end:
156160 # a variable number of register - based arguments
157161 .p2align 2
158162 .data_region jt32
159- .set L_dyn_call_jt_entry0 , L_dyn_call_exec - L_dyn_call_jt
163+ .set L_dyn_call_jt_entry0 , L_dyn_call_regs0 - L_dyn_call_jt
160164 .set L_dyn_call_jt_entry1 , L_dyn_call_regs1 - L_dyn_call_jt
161165 .set L_dyn_call_jt_entry2 , L_dyn_call_regs2 - L_dyn_call_jt
162166 .set L_dyn_call_jt_entry3 , L_dyn_call_regs3 - L_dyn_call_jt
163167 .set L_dyn_call_jt_entry4 , L_dyn_call_regs4 - L_dyn_call_jt
164168 .set L_dyn_call_jt_entry5 , L_dyn_call_regs5 - L_dyn_call_jt
165- .set L_dyn_call_jt_entry6 , L_dyn_call_regs6 - L_dyn_call_jt
166169L_dyn_call_jt:
167170 .long L_dyn_call_jt_entry0
168171 .long L_dyn_call_jt_entry1
169172 .long L_dyn_call_jt_entry2
170173 .long L_dyn_call_jt_entry3
171174 .long L_dyn_call_jt_entry4
172175 .long L_dyn_call_jt_entry5
173- .long L_dyn_call_jt_entry6
174176 .end_data_region
175177
176178 # The following is the LSDA metadata for exception handling
0 commit comments