@@ -44,7 +44,7 @@ let stack_alignment () = 16
4444let _Plea (r , addr ) =
4545 if Archi. ptr64 then Pleaq (r, addr) else Pleal (r, addr)
4646
47- (* SP adjustment to allocate or free a stack frame *)
47+ (* SP adjustment to allocate or free a stack frame. *)
4848
4949let align n a =
5050 if n > = 0 then (n + a - 1 ) land (- a) else n land (- a)
@@ -56,7 +56,7 @@ let sp_adjustment_32 sz =
5656 (* The top 4 bytes have already been allocated by the "call" instruction. *)
5757 sz - 4
5858
59- let sp_adjustment_64 sz =
59+ let sp_adjustment_elf64 sz =
6060 let sz = Z. to_int sz in
6161 if is_current_function_variadic() then begin
6262 (* If variadic, add room for register save area, which must be 16-aligned *)
@@ -73,6 +73,13 @@ let sp_adjustment_64 sz =
7373 (sz - 8 , - 1 )
7474 end
7575
76+ let sp_adjustment_win64 sz =
77+ let sz = Z. to_int sz in
78+ (* Preserve proper alignment of the stack *)
79+ let sz = align sz 16 in
80+ (* The top 8 bytes have already been allocated by the "call" instruction. *)
81+ sz - 8
82+
7683(* Built-ins. They come in two flavors:
7784 - annotation statements: take their arguments in registers or stack
7885 locations; generate no code;
@@ -256,7 +263,7 @@ let expand_builtin_va_start_32 r =
256263 emit (Pleal (RAX , linear_addr RSP (Z. of_uint32 ofs)));
257264 emit (Pmovl_mr (linear_addr r _0z, RAX ))
258265
259- let expand_builtin_va_start_64 r =
266+ let expand_builtin_va_start_elf64 r =
260267 if not (is_current_function_variadic () ) then
261268 invalid_arg " Fatal error: va_start used in non-vararg function" ;
262269 let (ir, fr, ofs) =
@@ -287,6 +294,17 @@ let expand_builtin_va_start_64 r =
287294 emit (Pleaq (RAX , linear_addr RSP (Z. of_uint64 reg_save_area)));
288295 emit (Pmovq_mr (linear_addr r _16z, RAX ))
289296
297+ let expand_builtin_va_start_win64 r =
298+ if not (is_current_function_variadic () ) then
299+ invalid_arg " Fatal error: va_start used in non-vararg function" ;
300+ let num_args =
301+ List. length (get_current_function_args() ) in
302+ let ofs =
303+ Int64. (add ! current_function_stacksize
304+ (mul 8L (of_int num_args))) in
305+ emit (Pleaq (RAX , linear_addr RSP (Z. of_uint64 ofs)));
306+ emit (Pmovq_mr (linear_addr r _0z, RAX ))
307+
290308(* FMA operations *)
291309
292310(* vfmadd<i><j><k> r1, r2, r3 performs r1 := ri * rj + rk
@@ -463,8 +481,8 @@ let expand_builtin_inline name args res =
463481 (* Vararg stuff *)
464482 | "__builtin_va_start" , [BA (IR a)], _ ->
465483 assert (a = RDX );
466- if Archi. ptr64
467- then expand_builtin_va_start_64 a
484+ if Archi. win64 then expand_builtin_va_start_win64 a
485+ else if Archi. ptr64 then expand_builtin_va_start_elf64 a
468486 else expand_builtin_va_start_32 a
469487 (* Synchronization *)
470488 | "__builtin_membar" , [] , _ ->
@@ -476,24 +494,66 @@ let expand_builtin_inline name args res =
476494 | _ ->
477495 raise (Error (" unrecognized builtin " ^ name))
478496
479- (* Calls to variadic functions for x86-64: register AL must contain
497+ (* Calls to variadic functions for x86-64 ELF : register AL must contain
480498 the number of XMM registers used for parameter passing. To be on
481- the safe side. do the same if the called function is
499+ the safe side, do the same if the called function is
482500 unprototyped. *)
483501
484- let set_al sg =
485- if Archi. ptr64 && ( sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto) then begin
502+ let fixup_funcall_elf64 sg =
503+ if sg.sig_cc.cc_vararg || sg.sig_cc.cc_unproto then begin
486504 let (ir, fr, ofs) = next_arg_locations 0 0 0 sg.sig_args in
487505 emit (Pmovl_ri (RAX , coqint_of_camlint (Int32. of_int fr)))
488506 end
489507
508+ (* Calls to variadic functions for x86-64 Windows:
509+ FP arguments passed in FP registers must also be passed in integer
510+ registers.
511+ *)
512+
513+ let rec copy_fregs_to_iregs args fr ir =
514+ match (ir, fr, args) with
515+ | (i1 :: ir , f1 :: fr , (Tfloat | Tsingle ) :: args ) ->
516+ emit (Pmovq_rf (i1, f1));
517+ copy_fregs_to_iregs args fr ir
518+ | (i1 :: ir , f1 :: fr , _ :: args ) ->
519+ copy_fregs_to_iregs args fr ir
520+ | _ ->
521+ ()
522+
523+ let fixup_funcall_win64 sg =
524+ if sg.sig_cc.cc_vararg then
525+ copy_fregs_to_iregs sg.sig_args [XMM0 ; XMM1 ; XMM2 ; XMM3 ] [RCX ; RDX ; R8 ; R9 ]
526+
527+ let fixup_funcall sg =
528+ if Archi. ptr64
529+ then if Archi. win64
530+ then fixup_funcall_win64 sg
531+ else fixup_funcall_elf64 sg
532+ else ()
533+
490534(* Expansion of instructions *)
491535
492536let expand_instruction instr =
493537 match instr with
494538 | Pallocframe (sz , ofs_ra , ofs_link ) ->
495- if Archi. ptr64 then begin
496- let (sz, save_regs) = sp_adjustment_64 sz in
539+ if Archi. win64 then begin
540+ let sz = sp_adjustment_win64 sz in
541+ if is_current_function_variadic() then
542+ (* Save parameters passed in registers in reserved stack area *)
543+ emit (Pcall_s (intern_string " __compcert_va_saveregs" ,
544+ {sig_args = [] ; sig_res = Tvoid ; sig_cc = cc_default}));
545+ (* Allocate frame *)
546+ let sz' = Z. of_uint sz in
547+ emit (Psubl_ri (RSP , sz'));
548+ emit (Pcfi_adjust sz');
549+ (* Stack chaining *)
550+ let addr1 = linear_addr RSP (Z. of_uint (sz + 8 )) in
551+ let addr2 = linear_addr RSP ofs_link in
552+ emit (Pleaq (RAX ,addr1));
553+ emit (Pmovq_mr (addr2, RAX ));
554+ current_function_stacksize := Int64. of_int (sz + 8 )
555+ end else if Archi. ptr64 then begin
556+ let (sz, save_regs) = sp_adjustment_elf64 sz in
497557 (* Allocate frame *)
498558 let sz' = Z. of_uint sz in
499559 emit (Psubq_ri (RSP , sz'));
@@ -525,15 +585,18 @@ let expand_instruction instr =
525585 PrintAsmaux. current_function_stacksize := Int32. of_int sz
526586 end
527587 | Pfreeframe (sz , ofs_ra , ofs_link ) ->
528- if Archi. ptr64 then begin
529- let (sz, _) = sp_adjustment_64 sz in
588+ if Archi. win64 then begin
589+ let sz = sp_adjustment_win64 sz in
590+ emit (Paddq_ri (RSP , Z. of_uint sz))
591+ end else if Archi. ptr64 then begin
592+ let (sz, _) = sp_adjustment_elf64 sz in
530593 emit (Paddq_ri (RSP , Z. of_uint sz))
531594 end else begin
532595 let sz = sp_adjustment_32 sz in
533596 emit (Paddl_ri (RSP , Z. of_uint sz))
534597 end
535598 | Pjmp_s (_ , sg ) | Pjmp_r (_ , sg ) | Pcall_s (_ , sg ) | Pcall_r (_ , sg ) ->
536- set_al sg;
599+ fixup_funcall sg;
537600 emit instr
538601 | Pbuiltin (ef ,args , res ) ->
539602 begin
0 commit comments