@@ -773,7 +773,7 @@ static inline void emit_alu64_imm32(struct jit_state *state,
773773static inline void emit_cmp_imm32 (struct jit_state * state , int dst , int32_t imm )
774774{
775775#if defined(__x86_64__ )
776- emit_alu64_imm32 (state , 0x81 , 7 , dst , imm );
776+ emit_alu32_imm32 (state , 0x81 , 7 , dst , imm );
777777#elif defined(__aarch64__ )
778778 emit_load_imm (state , R10 , imm );
779779 emit_addsub_register (state , false, AS_SUBS , RZ , dst , R10 );
@@ -823,6 +823,10 @@ static inline void emit_jcc_offset(struct jit_state *state, int code)
823823#endif
824824}
825825
826+ static inline void emit_load_imm (struct jit_state * state ,
827+ int dst ,
828+ uint32_t imm );
829+
826830/* Load [src + offset] into dst.
827831 *
828832 * If the offset is non-zero, it restores the vm register to the host register
@@ -835,6 +839,18 @@ static inline void emit_load(struct jit_state *state,
835839 int dst ,
836840 int32_t offset )
837841{
842+ for (int i = 0 ; i < n_host_regs ; i ++ ) {
843+ if (register_map [i ].reg_idx != dst )
844+ continue ;
845+ if (register_map [i ].vm_reg_idx != 0 )
846+ continue ;
847+
848+ /* if dst is x0, load 0x0 into host register */
849+ emit_load_imm (state , dst , 0x0 );
850+ set_dirty (dst , true);
851+ return ;
852+ }
853+
838854#if defined(__x86_64__ )
839855 if (src & 8 || dst & 8 )
840856 emit_basic_rex (state , 0 , dst , src );
@@ -875,6 +891,18 @@ static inline void emit_load_sext(struct jit_state *state,
875891 int dst ,
876892 int32_t offset )
877893{
894+ for (int i = 0 ; i < n_host_regs ; i ++ ) {
895+ if (register_map [i ].reg_idx != dst )
896+ continue ;
897+ if (register_map [i ].vm_reg_idx != 0 )
898+ continue ;
899+
900+ /* if dst is x0, load 0x0 into host register */
901+ emit_load_imm (state , dst , 0x0 );
902+ set_dirty (dst , true);
903+ return ;
904+ }
905+
878906#if defined(__x86_64__ )
879907 if (size == S8 || size == S16 ) {
880908 if (src & 8 || dst & 8 )
@@ -908,8 +936,28 @@ static inline void emit_load_sext(struct jit_state *state,
908936 set_dirty (dst , !offset );
909937}
910938
939+ /* Load 32-bit immediate into register (zero-extend) */
940+ static inline void emit_load_imm (struct jit_state * state , int dst , uint32_t imm )
941+ {
942+ #if defined(__x86_64__ )
943+ if (dst & 8 )
944+ emit_basic_rex (state , 0 , 0 , dst );
945+ emit1 (state , 0xb8 | (dst & 7 ));
946+ emit4 (state , imm );
947+
948+ set_dirty (dst , true);
949+ #elif defined(__aarch64__ )
950+ if ((int32_t ) imm == imm )
951+ emit_movewide_imm (state , false, dst , imm );
952+ else
953+ emit_movewide_imm (state , true, dst , imm );
954+ #endif
955+ }
956+
911957/* Load sign-extended immediate into register */
912- static inline void emit_load_imm (struct jit_state * state , int dst , int64_t imm )
958+ static inline void emit_load_imm_sext (struct jit_state * state ,
959+ int dst ,
960+ int64_t imm )
913961{
914962#if defined(__x86_64__ )
915963 if ((int32_t ) imm == imm )
@@ -942,6 +990,38 @@ static inline void emit_store(struct jit_state *state,
942990 int dst ,
943991 int32_t offset )
944992{
993+ for (int i = 0 ; i < n_host_regs ; i ++ ) {
994+ if (register_map [i ].reg_idx != src )
995+ continue ;
996+ if (register_map [i ].vm_reg_idx != 0 )
997+ continue ;
998+
999+ /* if src is x0, write 0x0 into destination */
1000+ if (size == S16 )
1001+ emit1 (state , 0x66 ); /* 16-bit override */
1002+ if (dst & 8 )
1003+ emit_rex (state , 0 , 0 , 0 , !!(dst & 8 ));
1004+ emit1 (state , size == S8 ? 0xc6 : 0xc7 );
1005+ emit1 (state , 0x80 | (dst & 0x7 ));
1006+ emit4 (state , offset );
1007+ switch (size ) {
1008+ case S8 :
1009+ emit1 (state , 0x0 );
1010+ break ;
1011+ case S16 :
1012+ emit1 (state , 0x0 );
1013+ emit1 (state , 0x0 );
1014+ break ;
1015+ case S32 :
1016+ emit4 (state , 0x0 );
1017+ break ;
1018+ default :
1019+ assert (NULL );
1020+ __UNREACHABLE ;
1021+ }
1022+ set_dirty (src , false);
1023+ return ;
1024+ }
9451025#if defined(__x86_64__ )
9461026 if (size == S16 )
9471027 emit1 (state , 0x66 ); /* 16-bit override */
@@ -990,7 +1070,7 @@ static inline void unmap_vm_reg(int);
9901070static inline void emit_call (struct jit_state * state , intptr_t target )
9911071{
9921072#if defined(__x86_64__ )
993- emit_load_imm (state , RAX , target );
1073+ emit_load_imm_sext (state , RAX , target );
9941074 /* callq *%rax */
9951075 emit1 (state , 0xff );
9961076 /* ModR/M byte: b11010000b = xd0, rax is register 0 */
@@ -1129,7 +1209,7 @@ static void muldivmod(struct jit_state *state,
11291209
11301210 if (div || mod ) {
11311211 if (sign ) {
1132- emit_load_imm (state , RDX , -1 );
1212+ emit_load_imm_sext (state , RDX , -1 );
11331213 /* compare divisor with -1 for overflow checking */
11341214 emit_cmp32 (state , RDX , RCX );
11351215 /* Save the result of the comparision */
@@ -1165,7 +1245,7 @@ static void muldivmod(struct jit_state *state,
11651245
11661246 if (div ) {
11671247 /* Set the dividend to zero if the divisor was zero. */
1168- emit_load_imm (state , RCX , -1 );
1248+ emit_load_imm_sext (state , RCX , -1 );
11691249
11701250 /* Store 0 in RAX if the divisor was zero. */
11711251 /* Use conditional move to avoid a branch. */
@@ -1795,7 +1875,8 @@ static void do_fuse3(struct jit_state *state, riscv_t *rv, rv_insn_t *ir)
17951875 opcode_fuse_t * fuse = ir -> fuse ;
17961876 for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
17971877 vm_reg [0 ] = ra_load (state , fuse [i ].rs1 );
1798- emit_load_imm (state , temp_reg , (intptr_t ) (m -> mem_base + fuse [i ].imm ));
1878+ emit_load_imm_sext (state , temp_reg ,
1879+ (intptr_t ) (m -> mem_base + fuse [i ].imm ));
17991880 emit_alu64 (state , 0x01 , vm_reg [0 ], temp_reg );
18001881 vm_reg [1 ] = ra_load (state , fuse [i ].rs2 );
18011882 emit_store (state , S32 , vm_reg [1 ], temp_reg , 0 );
@@ -1808,7 +1889,8 @@ static void do_fuse4(struct jit_state *state, riscv_t *rv, rv_insn_t *ir)
18081889 opcode_fuse_t * fuse = ir -> fuse ;
18091890 for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
18101891 vm_reg [0 ] = ra_load (state , fuse [i ].rs1 );
1811- emit_load_imm (state , temp_reg , (intptr_t ) (m -> mem_base + fuse [i ].imm ));
1892+ emit_load_imm_sext (state , temp_reg ,
1893+ (intptr_t ) (m -> mem_base + fuse [i ].imm ));
18121894 emit_alu64 (state , 0x01 , vm_reg [0 ], temp_reg );
18131895 vm_reg [1 ] = map_vm_reg (state , fuse [i ].rd );
18141896 emit_load (state , S32 , temp_reg , vm_reg [1 ], 0 );
0 commit comments