diff --git a/py/asmarm.c b/py/asmarm.c index 0d55686920..8dbd9ad20a 100644 --- a/py/asmarm.c +++ b/py/asmarm.c @@ -170,6 +170,11 @@ STATIC uint asm_arm_op_sub_imm(uint rd, uint rn, uint imm) { return 0x2400000 | (rn << 16) | (rd << 12) | (imm & 0xFF); } +STATIC uint asm_arm_op_sub_reg(uint rd, uint rn, uint rm) { + // sub rd, rn, rm + return 0x0400000 | (rn << 16) | (rd << 12) | rm; +} + void asm_arm_bkpt(asm_arm_t *as) { // bkpt #0 emit_al(as, 0x1200070); @@ -298,11 +303,16 @@ void asm_arm_less_op(asm_arm_t *as, uint rd, uint rn, uint rm) { emit(as, asm_arm_op_mov_imm(rd, 0) | ASM_ARM_CC_GE); // movge rd, #0 } -void asm_arm_add_reg(asm_arm_t *as, uint rd, uint rn, uint rm) { +void asm_arm_add_reg_reg_reg(asm_arm_t *as, uint rd, uint rn, uint rm) { // add rd, rn, rm emit_al(as, asm_arm_op_add_reg(rd, rn, rm)); } +void asm_arm_sub_reg_reg_reg(asm_arm_t *as, uint rd, uint rn, uint rm) { + // sub rd, rn, rm + emit_al(as, asm_arm_op_sub_reg(rd, rn, rm)); +} + void asm_arm_mov_reg_local_addr(asm_arm_t *as, uint rd, int local_num) { // add rd, sp, #local_num*4 emit_al(as, asm_arm_op_add_imm(rd, ASM_ARM_REG_SP, local_num << 2)); diff --git a/py/asmarm.h b/py/asmarm.h index e0c8efe1f7..d977102ba8 100644 --- a/py/asmarm.h +++ b/py/asmarm.h @@ -89,7 +89,8 @@ void asm_arm_mov_reg_local(asm_arm_t *as, uint rd, int local_num); void asm_arm_cmp_reg_i8(asm_arm_t *as, uint rd, int imm); void asm_arm_cmp_reg_reg(asm_arm_t *as, uint rd, uint rn); void asm_arm_less_op(asm_arm_t *as, uint rd, uint rn, uint rm); -void asm_arm_add_reg(asm_arm_t *as, uint rd, uint rn, uint rm); +void asm_arm_add_reg_reg_reg(asm_arm_t *as, uint rd, uint rn, uint rm); +void asm_arm_sub_reg_reg_reg(asm_arm_t *as, uint rd, uint rn, uint rm); void asm_arm_mov_reg_local_addr(asm_arm_t *as, uint rd, int local_num); void asm_arm_bcc_label(asm_arm_t *as, int cond, uint label); diff --git a/py/asmx64.c b/py/asmx64.c index 8d074dc402..3f111781f2 100644 --- a/py/asmx64.c +++ b/py/asmx64.c @@ -54,19 +54,21 @@ #define OPCODE_MOV_RM64_TO_R64 (0x8b) #define OPCODE_LEA_MEM_TO_R64 (0x8d) /* /r */ #define OPCODE_XOR_R64_TO_RM64 (0x31) /* /r */ -#define OPCODE_ADD_R64_TO_RM64 (0x01) +#define OPCODE_ADD_R64_TO_RM64 (0x01) /* /r */ #define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */ #define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */ #define OPCODE_SUB_R64_FROM_RM64 (0x29) #define OPCODE_SUB_I32_FROM_RM64 (0x81) /* /5 */ #define OPCODE_SUB_I8_FROM_RM64 (0x83) /* /5 */ -#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */ -#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */ -#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */ -#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */ -#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */ -#define OPCODE_CMP_R64_WITH_RM64 (0x39) -#define OPCODE_CMP_RM32_WITH_R32 (0x3b) +//#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */ +//#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */ +//#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */ +#define OPCODE_SHL_RM64_CL (0xd3) /* /4 */ +#define OPCODE_SAR_RM64_CL (0xd3) /* /7 */ +//#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */ +//#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */ +#define OPCODE_CMP_R64_WITH_RM64 (0x39) /* /r */ +//#define OPCODE_CMP_RM32_WITH_R32 (0x3b) #define OPCODE_TEST_R8_WITH_RM8 (0x84) /* /r */ #define OPCODE_JMP_REL8 (0xeb) #define OPCODE_JMP_REL32 (0xe9) @@ -253,6 +255,10 @@ STATIC void asm_x64_write_r64_disp(asm_x64_t *as, int r64, int disp_r64, int dis } } +STATIC void asm_x64_generic_r64_r64(asm_x64_t *as, int dest_r64, int src_r64, int op) { + asm_x64_write_byte_3(as, REX_PREFIX | REX_W | (src_r64 < 8 ? 0 : REX_R) | (dest_r64 < 8 ? 0 : REX_B), op, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +} + void asm_x64_nop(asm_x64_t *as) { asm_x64_write_byte_1(as, OPCODE_NOP); } @@ -290,9 +296,8 @@ STATIC void asm_x64_ret(asm_x64_t *as) { asm_x64_write_byte_1(as, OPCODE_RET); } -void asm_x64_mov_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) { - // use REX prefix for 64 bit operation - asm_x64_write_byte_3(as, REX_PREFIX | REX_W | (src_r64 < 8 ? 0 : REX_R) | (dest_r64 < 8 ? 0 : REX_B), OPCODE_MOV_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +void asm_x64_mov_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) { + asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_MOV_R64_TO_RM64); } void asm_x64_mov_r8_to_disp(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp) { @@ -377,30 +382,24 @@ void asm_x64_mov_i64_to_r64_aligned(asm_x64_t *as, int64_t src_i64, int dest_r64 asm_x64_mov_i64_to_r64(as, src_i64, dest_r64); } -void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) { - assert(src_r64 < 8); - assert(dest_r64 < 8); - asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_XOR_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +void asm_x64_xor_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) { + asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_XOR_R64_TO_RM64); } -void asm_x64_add_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64) { - assert(src_r64 < 8); - assert(dest_r64 < 8); - asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_ADD_R64_TO_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +void asm_x64_shl_r64_cl(asm_x64_t* as, int dest_r64) { + asm_x64_generic_r64_r64(as, dest_r64, 4, OPCODE_SHL_RM64_CL); } -/* -void asm_x64_sub_r32_from_r32(asm_x64_t *as, int src_r32, int dest_r32) { - // defaults to 32 bit operation - asm_x64_write_byte_2(as, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r32) | MODRM_RM_REG | MODRM_RM_R64(dest_r32)); +void asm_x64_sar_r64_cl(asm_x64_t* as, int dest_r64) { + asm_x64_generic_r64_r64(as, dest_r64, 7, OPCODE_SAR_RM64_CL); } -*/ -void asm_x64_sub_r64_from_r64(asm_x64_t *as, int src_r64, int dest_r64) { - // use REX prefix for 64 bit operation - assert(src_r64 < 8); - assert(dest_r64 < 8); - asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_SUB_R64_FROM_RM64, MODRM_R64(src_r64) | MODRM_RM_REG | MODRM_RM_R64(dest_r64)); +void asm_x64_add_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) { + asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_ADD_R64_TO_RM64); +} + +void asm_x64_sub_r64_r64(asm_x64_t *as, int dest_r64, int src_r64) { + asm_x64_generic_r64_r64(as, dest_r64, src_r64, OPCODE_SUB_R64_FROM_RM64); } /* @@ -417,7 +416,7 @@ void asm_x64_sub_i32_from_r32(asm_x64_t *as, int src_i32, int dest_r32) { } */ -void asm_x64_sub_i32_from_r64(asm_x64_t *as, int src_i32, int dest_r64) { +STATIC void asm_x64_sub_r64_i32(asm_x64_t *as, int dest_r64, int src_i32) { assert(dest_r64 < 8); if (SIGNED_FIT8(src_i32)) { // use REX prefix for 64 bit operation @@ -448,9 +447,7 @@ void asm_x64_sar_r32_by_imm(asm_x64_t *as, int r32, int imm) { */ void asm_x64_cmp_r64_with_r64(asm_x64_t *as, int src_r64_a, int src_r64_b) { - assert(src_r64_a < 8); - assert(src_r64_b < 8); - asm_x64_write_byte_3(as, REX_PREFIX | REX_W, OPCODE_CMP_R64_WITH_RM64, MODRM_R64(src_r64_a) | MODRM_RM_REG | MODRM_RM_R64(src_r64_b)); + asm_x64_generic_r64_r64(as, src_r64_b, src_r64_a, OPCODE_CMP_R64_WITH_RM64); } /* @@ -541,12 +538,12 @@ void asm_x64_jcc_label(asm_x64_t *as, int jcc_type, int label) { void asm_x64_entry(asm_x64_t *as, int num_locals) { asm_x64_push_r64(as, ASM_X64_REG_RBP); - asm_x64_mov_r64_to_r64(as, ASM_X64_REG_RSP, ASM_X64_REG_RBP); + asm_x64_mov_r64_r64(as, ASM_X64_REG_RBP, ASM_X64_REG_RSP); if (num_locals < 0) { num_locals = 0; } num_locals |= 1; // make it odd so stack is aligned on 16 byte boundary - asm_x64_sub_i32_from_r64(as, num_locals * WORD_SIZE, ASM_X64_REG_RSP); + asm_x64_sub_r64_i32(as, ASM_X64_REG_RSP, num_locals * WORD_SIZE); asm_x64_push_r64(as, ASM_X64_REG_RBX); asm_x64_push_r64(as, ASM_X64_REG_R12); asm_x64_push_r64(as, ASM_X64_REG_R13); @@ -587,7 +584,7 @@ void asm_x64_mov_r64_to_local(asm_x64_t *as, int src_r64, int dest_local_num) { void asm_x64_mov_local_addr_to_r64(asm_x64_t *as, int local_num, int dest_r64) { int offset = asm_x64_local_offset_from_ebp(as, local_num); if (offset == 0) { - asm_x64_mov_r64_to_r64(as, ASM_X64_REG_RBP, dest_r64); + asm_x64_mov_r64_r64(as, dest_r64, ASM_X64_REG_RBP); } else { asm_x64_lea_disp_to_r64(as, ASM_X64_REG_RBP, offset, dest_r64); } @@ -600,7 +597,7 @@ void asm_x64_push_local(asm_x64_t *as, int local_num) { void asm_x64_push_local_addr(asm_x64_t *as, int local_num, int temp_r64) { - asm_x64_mov_r64_to_r64(as, ASM_X64_REG_RBP, temp_r64); + asm_x64_mov_r64_r64(as, temp_r64, ASM_X64_REG_RBP); asm_x64_add_i32_to_r32(as, asm_x64_local_offset_from_ebp(as, local_num), temp_r64); asm_x64_push_r64(as, temp_r64); } @@ -614,7 +611,7 @@ void asm_x64_call(asm_x64_t *as, void* func) asm_x64_sub_i32_from_r32(as, 8, ASM_X64_REG_RSP); asm_x64_write_byte_1(as, OPCODE_CALL_REL32); asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); - asm_x64_mov_r64_to_r64(as, ASM_X64_REG_RBP, ASM_X64_REG_RSP); + asm_x64_mov_r64_r64(as, ASM_X64_REG_RSP, ASM_X64_REG_RBP); } void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) @@ -625,7 +622,7 @@ void asm_x64_call_i1(asm_x64_t *as, void* func, int i1) asm_x64_write_byte_1(as, OPCODE_CALL_REL32); asm_x64_write_word32(as, func - (void*)(as->code_cur + 4)); asm_x64_add_i32_to_r32(as, 16, ASM_X64_REG_RSP); - asm_x64_mov_r64_to_r64(as, ASM_X64_REG_RBP, ASM_X64_REG_RSP); + asm_x64_mov_r64_r64(as, ASM_X64_REG_RSP, ASM_X64_REG_RBP); } */ diff --git a/py/asmx64.h b/py/asmx64.h index 3b138a753e..0d3f58ecdd 100644 --- a/py/asmx64.h +++ b/py/asmx64.h @@ -31,6 +31,11 @@ // - RAX, RCX, RDX, RSI, RDI, R08, R09, R10, R11 are caller-save // - RBX, RBP, R12, R13, R14, R15 are callee-save +// In the functions below, argument order follows x86 docs and generally +// the destination is the first argument. +// NOTE: this is a change from the old convention used in this file and +// some functions still use the old (reverse) convention. + #define ASM_X64_PASS_COMPUTE (1) #define ASM_X64_PASS_EMIT (2) @@ -58,6 +63,8 @@ #define ASM_X64_CC_JNZ (0x5) #define ASM_X64_CC_JNE (0x5) #define ASM_X64_CC_JL (0xc) // less, signed +#define ASM_X64_CC_JGE (0xd) // greater or equal, signed +#define ASM_X64_CC_JLE (0xe) // less or equal, signed #define ASM_X64_CC_JG (0xf) // greater, signed typedef struct _asm_x64_t asm_x64_t; @@ -72,15 +79,18 @@ void* asm_x64_get_code(asm_x64_t* as); void asm_x64_nop(asm_x64_t* as); void asm_x64_push_r64(asm_x64_t* as, int src_r64); void asm_x64_pop_r64(asm_x64_t* as, int dest_r64); -void asm_x64_mov_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_mov_r64_r64(asm_x64_t* as, int dest_r64, int src_r64); void asm_x64_mov_i64_to_r64(asm_x64_t* as, int64_t src_i64, int dest_r64); void asm_x64_mov_i64_to_r64_optimised(asm_x64_t *as, int64_t src_i64, int dest_r64); void asm_x64_mov_i64_to_r64_aligned(asm_x64_t *as, int64_t src_i64, int dest_r64); void asm_x64_mov_r8_to_disp(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp); void asm_x64_mov_r16_to_disp(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp); void asm_x64_mov_r64_to_disp(asm_x64_t *as, int src_r64, int dest_r64, int dest_disp); -void asm_x64_xor_r64_to_r64(asm_x64_t *as, int src_r64, int dest_r64); -void asm_x64_add_r64_to_r64(asm_x64_t* as, int src_r64, int dest_r64); +void asm_x64_xor_r64_r64(asm_x64_t *as, int dest_r64, int src_r64); +void asm_x64_shl_r64_cl(asm_x64_t* as, int dest_r64); +void asm_x64_sar_r64_cl(asm_x64_t* as, int dest_r64); +void asm_x64_add_r64_r64(asm_x64_t* as, int dest_r64, int src_r64); +void asm_x64_sub_r64_r64(asm_x64_t* as, int dest_r64, int src_r64); void asm_x64_cmp_r64_with_r64(asm_x64_t* as, int src_r64_a, int src_r64_b); void asm_x64_test_r8_with_r8(asm_x64_t* as, int src_r64_a, int src_r64_b); void asm_x64_setcc_r8(asm_x64_t* as, int jcc_type, int dest_r8); diff --git a/py/asmx86.c b/py/asmx86.c index 08299f8511..072998c673 100644 --- a/py/asmx86.c +++ b/py/asmx86.c @@ -57,12 +57,14 @@ #define OPCODE_ADD_R32_TO_RM32 (0x01) #define OPCODE_ADD_I32_TO_RM32 (0x81) /* /0 */ #define OPCODE_ADD_I8_TO_RM32 (0x83) /* /0 */ -//#define OPCODE_SUB_R32_FROM_RM32 (0x29) +#define OPCODE_SUB_R32_FROM_RM32 (0x29) #define OPCODE_SUB_I32_FROM_RM32 (0x81) /* /5 */ #define OPCODE_SUB_I8_FROM_RM32 (0x83) /* /5 */ //#define OPCODE_SHL_RM32_BY_I8 (0xc1) /* /4 */ //#define OPCODE_SHR_RM32_BY_I8 (0xc1) /* /5 */ //#define OPCODE_SAR_RM32_BY_I8 (0xc1) /* /7 */ +#define OPCODE_SHL_RM32_CL (0xd3) /* /4 */ +#define OPCODE_SAR_RM32_CL (0xd3) /* /7 */ //#define OPCODE_CMP_I32_WITH_RM32 (0x81) /* /7 */ //#define OPCODE_CMP_I8_WITH_RM32 (0x83) /* /7 */ #define OPCODE_CMP_R32_WITH_RM32 (0x39) @@ -204,6 +206,10 @@ STATIC void asm_x86_write_r32_disp(asm_x86_t *as, int r32, int disp_r32, int dis } } +STATIC void asm_x86_generic_r32_r32(asm_x86_t *as, int dest_r32, int src_r32, int op) { + asm_x86_write_byte_2(as, op, MODRM_R32(src_r32) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); +} + STATIC void asm_x86_nop(asm_x86_t *as) { asm_x86_write_byte_1(as, OPCODE_NOP); } @@ -232,8 +238,8 @@ STATIC void asm_x86_ret(asm_x86_t *as) { asm_x86_write_byte_1(as, OPCODE_RET); } -void asm_x86_mov_r32_to_r32(asm_x86_t *as, int src_r32, int dest_r32) { - asm_x86_write_byte_2(as, OPCODE_MOV_R32_TO_RM32, MODRM_R32(src_r32) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); +void asm_x86_mov_r32_r32(asm_x86_t *as, int dest_r32, int src_r32) { + asm_x86_generic_r32_r32(as, dest_r32, src_r32, OPCODE_MOV_R32_TO_RM32); } void asm_x86_mov_r8_to_disp(asm_x86_t *as, int src_r32, int dest_r32, int dest_disp) { @@ -281,12 +287,20 @@ void asm_x86_mov_i32_to_r32_aligned(asm_x86_t *as, int32_t src_i32, int dest_r32 asm_x86_mov_i32_to_r32(as, src_i32, dest_r32); } -void asm_x86_xor_r32_to_r32(asm_x86_t *as, int src_r32, int dest_r32) { - asm_x86_write_byte_2(as, OPCODE_XOR_R32_TO_RM32, MODRM_R32(src_r32) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); +void asm_x86_xor_r32_r32(asm_x86_t *as, int dest_r32, int src_r32) { + asm_x86_generic_r32_r32(as, dest_r32, src_r32, OPCODE_XOR_R32_TO_RM32); } -void asm_x86_add_r32_to_r32(asm_x86_t *as, int src_r32, int dest_r32) { - asm_x86_write_byte_2(as, OPCODE_ADD_R32_TO_RM32, MODRM_R32(src_r32) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); +void asm_x86_shl_r32_cl(asm_x86_t* as, int dest_r32) { + asm_x86_generic_r32_r32(as, dest_r32, 4, OPCODE_SHL_RM32_CL); +} + +void asm_x86_sar_r32_cl(asm_x86_t* as, int dest_r32) { + asm_x86_generic_r32_r32(as, dest_r32, 7, OPCODE_SAR_RM32_CL); +} + +void asm_x86_add_r32_r32(asm_x86_t *as, int dest_r32, int src_r32) { + asm_x86_generic_r32_r32(as, dest_r32, src_r32, OPCODE_ADD_R32_TO_RM32); } void asm_x86_add_i32_to_r32(asm_x86_t *as, int src_i32, int dest_r32) { @@ -299,13 +313,11 @@ void asm_x86_add_i32_to_r32(asm_x86_t *as, int src_i32, int dest_r32) { } } -#if 0 -void asm_x86_sub_r32_from_r32(asm_x86_t *as, int src_r32, int dest_r32) { - asm_x86_write_byte_2(as, OPCODE_SUB_R32_FROM_RM32, MODRM_R32(src_r32) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); +void asm_x86_sub_r32_r32(asm_x86_t *as, int dest_r32, int src_r32) { + asm_x86_generic_r32_r32(as, dest_r32, src_r32, OPCODE_SUB_R32_FROM_RM32); } -#endif -void asm_x86_sub_i32_from_r32(asm_x86_t *as, int src_i32, int dest_r32) { +STATIC void asm_x86_sub_r32_i32(asm_x86_t *as, int dest_r32, int src_i32) { if (SIGNED_FIT8(src_i32)) { // defaults to 32 bit operation asm_x86_write_byte_2(as, OPCODE_SUB_I8_FROM_RM32, MODRM_R32(5) | MODRM_RM_REG | MODRM_RM_R32(dest_r32)); @@ -426,9 +438,9 @@ void asm_x86_jcc_label(asm_x86_t *as, mp_uint_t jcc_type, mp_uint_t label) { void asm_x86_entry(asm_x86_t *as, mp_uint_t num_locals) { asm_x86_push_r32(as, ASM_X86_REG_EBP); - asm_x86_mov_r32_to_r32(as, ASM_X86_REG_ESP, ASM_X86_REG_EBP); + asm_x86_mov_r32_r32(as, ASM_X86_REG_EBP, ASM_X86_REG_ESP); if (num_locals > 0) { - asm_x86_sub_i32_from_r32(as, num_locals * WORD_SIZE, ASM_X86_REG_ESP); + asm_x86_sub_r32_i32(as, ASM_X86_REG_ESP, num_locals * WORD_SIZE); } asm_x86_push_r32(as, ASM_X86_REG_EBX); asm_x86_push_r32(as, ASM_X86_REG_ESI); @@ -487,7 +499,7 @@ void asm_x86_mov_r32_to_local(asm_x86_t *as, int src_r32, int dest_local_num) { void asm_x86_mov_local_addr_to_r32(asm_x86_t *as, int local_num, int dest_r32) { int offset = asm_x86_local_offset_from_ebp(as, local_num); if (offset == 0) { - asm_x86_mov_r32_to_r32(as, ASM_X86_REG_EBP, dest_r32); + asm_x86_mov_r32_r32(as, dest_r32, ASM_X86_REG_EBP); } else { asm_x86_lea_disp_to_r32(as, ASM_X86_REG_EBP, offset, dest_r32); } @@ -500,7 +512,7 @@ void asm_x86_push_local(asm_x86_t *as, int local_num) { void asm_x86_push_local_addr(asm_x86_t *as, int local_num, int temp_r32) { - asm_x86_mov_r32_to_r32(as, ASM_X86_REG_EBP, temp_r32); + asm_x86_mov_r32_r32(as, temp_r32, ASM_X86_REG_EBP); asm_x86_add_i32_to_r32(as, asm_x86_local_offset_from_ebp(as, local_num), temp_r32); asm_x86_push_r32(as, temp_r32); } diff --git a/py/asmx86.h b/py/asmx86.h index 0ee192378f..2d83f3a650 100644 --- a/py/asmx86.h +++ b/py/asmx86.h @@ -32,6 +32,11 @@ // - EAX, ECX, EDX are caller-save // - EBX, ESI, EDI, EBP, ESP, EIP are callee-save +// In the functions below, argument order follows x86 docs and generally +// the destination is the first argument. +// NOTE: this is a change from the old convention used in this file and +// some functions still use the old (reverse) convention. + #define ASM_X86_PASS_COMPUTE (1) #define ASM_X86_PASS_EMIT (2) @@ -59,6 +64,8 @@ #define ASM_X86_CC_JNZ (0x5) #define ASM_X86_CC_JNE (0x5) #define ASM_X86_CC_JL (0xc) // less, signed +#define ASM_X86_CC_JGE (0xd) // greater or equal, signed +#define ASM_X86_CC_JLE (0xe) // less or equal, signed #define ASM_X86_CC_JG (0xf) // greater, signed typedef struct _asm_x86_t asm_x86_t; @@ -70,14 +77,17 @@ void asm_x86_end_pass(asm_x86_t *as); mp_uint_t asm_x86_get_code_size(asm_x86_t* as); void* asm_x86_get_code(asm_x86_t* as); -void asm_x86_mov_r32_to_r32(asm_x86_t* as, int src_r32, int dest_r32); +void asm_x86_mov_r32_r32(asm_x86_t* as, int dest_r32, int src_r32); void asm_x86_mov_i32_to_r32(asm_x86_t *as, int32_t src_i32, int dest_r32); void asm_x86_mov_i32_to_r32_aligned(asm_x86_t *as, int32_t src_i32, int dest_r32); void asm_x86_mov_r8_to_disp(asm_x86_t *as, int src_r32, int dest_r32, int dest_disp); void asm_x86_mov_r16_to_disp(asm_x86_t *as, int src_r32, int dest_r32, int dest_disp); void asm_x86_mov_r32_to_disp(asm_x86_t *as, int src_r32, int dest_r32, int dest_disp); -void asm_x86_xor_r32_to_r32(asm_x86_t *as, int src_r32, int dest_r32); -void asm_x86_add_r32_to_r32(asm_x86_t* as, int src_r32, int dest_r32); +void asm_x86_xor_r32_r32(asm_x86_t *as, int dest_r32, int src_r32); +void asm_x86_shl_r32_cl(asm_x86_t* as, int dest_r32); +void asm_x86_sar_r32_cl(asm_x86_t* as, int dest_r32); +void asm_x86_add_r32_r32(asm_x86_t* as, int dest_r32, int src_r32); +void asm_x86_sub_r32_r32(asm_x86_t* as, int dest_r32, int src_r32); void asm_x86_cmp_r32_with_r32(asm_x86_t* as, int src_r32_a, int src_r32_b); void asm_x86_test_r8_with_r8(asm_x86_t* as, int src_r32_a, int src_r32_b); void asm_x86_setcc_r8(asm_x86_t* as, mp_uint_t jcc_type, int dest_r8); diff --git a/py/emitnative.c b/py/emitnative.c index cfcba27a13..d58da7e788 100644 --- a/py/emitnative.c +++ b/py/emitnative.c @@ -140,9 +140,14 @@ asm_x64_mov_r64_to_local(as, (reg_temp), (local_num)); \ } while (false) #define ASM_MOV_LOCAL_TO_REG asm_x64_mov_local_to_r64 -#define ASM_MOV_REG_TO_REG asm_x64_mov_r64_to_r64 +#define ASM_MOV_REG_REG(as, reg_dest, reg_src) asm_x64_mov_r64_r64((as), (reg_dest), (reg_src)) #define ASM_MOV_LOCAL_ADDR_TO_REG asm_x64_mov_local_addr_to_r64 +#define ASM_LSL_REG(as, reg) asm_x64_shl_r64_cl((as), (reg)) +#define ASM_ASR_REG(as, reg) asm_x64_sar_r64_cl((as), (reg)) +#define ASM_ADD_REG_REG(as, reg_dest, reg_src) asm_x64_add_r64_r64((as), (reg_dest), (reg_src)) +#define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_x64_sub_r64_r64((as), (reg_dest), (reg_src)) + #elif N_X86 // x86 specific stuff @@ -256,9 +261,14 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = { asm_x86_mov_r32_to_local(as, (reg_temp), (local_num)); \ } while (false) #define ASM_MOV_LOCAL_TO_REG asm_x86_mov_local_to_r32 -#define ASM_MOV_REG_TO_REG asm_x86_mov_r32_to_r32 +#define ASM_MOV_REG_REG(as, reg_dest, reg_src) asm_x86_mov_r32_r32((as), (reg_dest), (reg_src)) #define ASM_MOV_LOCAL_ADDR_TO_REG asm_x86_mov_local_addr_to_r32 +#define ASM_LSL_REG(as, reg) asm_x86_shl_r32_cl((as), (reg)) +#define ASM_ASR_REG(as, reg) asm_x86_sar_r32_cl((as), (reg)) +#define ASM_ADD_REG_REG(as, reg_dest, reg_src) asm_x86_add_r32_r32((as), (reg_dest), (reg_src)) +#define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_x86_sub_r32_r32((as), (reg_dest), (reg_src)) + #elif N_THUMB // thumb specific stuff @@ -323,9 +333,14 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = { asm_thumb_mov_local_reg(as, (local_num), (reg_temp)); \ } while (false) #define ASM_MOV_LOCAL_TO_REG(as, local_num, reg) asm_thumb_mov_reg_local(as, (reg), (local_num)) -#define ASM_MOV_REG_TO_REG(as, reg_src, reg_dest) asm_thumb_mov_reg_reg(as, (reg_dest), (reg_src)) +#define ASM_MOV_REG_REG(as, reg_dest, reg_src) asm_thumb_mov_reg_reg((as), (reg_dest), (reg_src)) #define ASM_MOV_LOCAL_ADDR_TO_REG(as, local_num, reg) asm_thumb_mov_reg_local_addr(as, (reg), (local_num)) +#define ASM_LSL_REG_REG(as, reg_dest, reg_shift) asm_thumb_format_4((as), ASM_THUMB_FORMAT_4_LSL, (reg_dest), (reg_shift)) +#define ASM_ASR_REG_REG(as, reg_dest, reg_shift) asm_thumb_format_4((as), ASM_THUMB_FORMAT_4_ASR, (reg_dest), (reg_shift)) +#define ASM_ADD_REG_REG(as, reg_dest, reg_src) asm_thumb_add_rlo_rlo_rlo((as), (reg_dest), (reg_dest), (reg_src)) +#define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_thumb_sub_rlo_rlo_rlo((as), (reg_dest), (reg_dest), (reg_src)) + #elif N_ARM // ARM specific stuff @@ -390,9 +405,15 @@ STATIC byte mp_f_n_args[MP_F_NUMBER_OF] = { asm_arm_mov_local_reg(as, (local_num), (reg_temp)); \ } while (false) #define ASM_MOV_LOCAL_TO_REG(as, local_num, reg) asm_arm_mov_reg_local(as, (reg), (local_num)) -#define ASM_MOV_REG_TO_REG(as, reg_src, reg_dest) asm_arm_mov_reg_reg(as, (reg_dest), (reg_src)) +#define ASM_MOV_REG_REG(as, reg_dest, reg_src) asm_arm_mov_reg_reg((as), (reg_dest), (reg_src)) #define ASM_MOV_LOCAL_ADDR_TO_REG(as, local_num, reg) asm_arm_mov_reg_local_addr(as, (reg), (local_num)) +// TODO someone please implement lsl and asr +#define ASM_LSL_REG_REG(as, reg_dest, reg_shift) asm_arm_lsl_((as), (reg_dest), (reg_shift)) +#define ASM_ASR_REG_REG(as, reg_dest, reg_shift) asm_arm_asr_((as), (reg_dest), (reg_shift)) +#define ASM_ADD_REG_REG(as, reg_dest, reg_src) asm_arm_add_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src)) +#define ASM_SUB_REG_REG(as, reg_dest, reg_src) asm_arm_sub_reg_reg_reg((as), (reg_dest), (reg_dest), (reg_src)) + #else #error unknown native emitter @@ -544,11 +565,11 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop #if N_X64 for (int i = 0; i < scope->num_pos_args; i++) { if (i == 0) { - asm_x64_mov_r64_to_r64(emit->as, REG_ARG_1, REG_LOCAL_1); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_1, REG_ARG_1); } else if (i == 1) { - asm_x64_mov_r64_to_r64(emit->as, REG_ARG_2, REG_LOCAL_2); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_2, REG_ARG_2); } else if (i == 2) { - asm_x64_mov_r64_to_r64(emit->as, REG_ARG_3, REG_LOCAL_3); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_3); } else if (i == 3) { asm_x64_mov_r64_to_local(emit->as, REG_ARG_4, i - REG_LOCAL_NUM); } else { @@ -572,11 +593,11 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop #elif N_THUMB for (int i = 0; i < scope->num_pos_args; i++) { if (i == 0) { - asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_1, REG_ARG_1); } else if (i == 1) { - asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_2, REG_ARG_2); } else if (i == 2) { - asm_thumb_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_3); } else if (i == 3) { asm_thumb_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4); } else { @@ -589,11 +610,11 @@ STATIC void emit_native_start_pass(emit_t *emit, pass_kind_t pass, scope_t *scop #elif N_ARM for (int i = 0; i < scope->num_pos_args; i++) { if (i == 0) { - asm_arm_mov_reg_reg(emit->as, REG_LOCAL_1, REG_ARG_1); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_1, REG_ARG_1); } else if (i == 1) { - asm_arm_mov_reg_reg(emit->as, REG_LOCAL_2, REG_ARG_2); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_2, REG_ARG_2); } else if (i == 2) { - asm_arm_mov_reg_reg(emit->as, REG_LOCAL_3, REG_ARG_3); + ASM_MOV_REG_REG(emit->as, REG_LOCAL_3, REG_ARG_3); } else if (i == 3) { asm_arm_mov_local_reg(emit->as, i - REG_LOCAL_NUM, REG_ARG_4); } else { @@ -698,8 +719,14 @@ STATIC void emit_native_pre(emit_t *emit) { */ } -STATIC vtype_kind_t peek_vtype(emit_t *emit) { - return emit->stack_info[emit->stack_size - 1].vtype; +// depth==0 is top, depth==1 is before top, etc +STATIC stack_info_t *peek_stack(emit_t *emit, mp_uint_t depth) { + return &emit->stack_info[emit->stack_size - 1 - depth]; +} + +// depth==0 is top, depth==1 is before top, etc +STATIC vtype_kind_t peek_vtype(emit_t *emit, mp_uint_t depth) { + return peek_stack(emit, depth)->vtype; } // pos=1 is TOS, pos=2 is next, etc @@ -759,7 +786,7 @@ STATIC void emit_access_stack(emit_t *emit, int pos, vtype_kind_t *vtype, int re case STACK_REG: if (si->u_reg != reg_dest) { - ASM_MOV_REG_TO_REG(emit->as, si->u_reg, reg_dest); + ASM_MOV_REG_REG(emit->as, reg_dest, si->u_reg); } break; @@ -769,6 +796,21 @@ STATIC void emit_access_stack(emit_t *emit, int pos, vtype_kind_t *vtype, int re } } +// If stacked value is in a register, then *reg_dest is set to that register. +// Otherwise, the value is put in *reg_dest. +STATIC void emit_pre_pop_reg_flexible(emit_t *emit, vtype_kind_t *vtype, int *reg_dest) { + emit->last_emit_was_return_value = false; + stack_info_t *si = peek_stack(emit, 0); + if (si->kind == STACK_REG) { + *vtype = si->vtype; + *reg_dest = si->u_reg; + need_reg_single(emit, *reg_dest, 1); + } else { + emit_access_stack(emit, 1, vtype, *reg_dest); + } + adjust_stack(emit, -1); +} + STATIC void emit_pre_pop_discard(emit_t *emit) { emit->last_emit_was_return_value = false; adjust_stack(emit, -1); @@ -1250,13 +1292,13 @@ STATIC void emit_native_store_name(emit_t *emit, qstr qst) { } STATIC void emit_native_store_global(emit_t *emit, qstr qst) { - vtype_kind_t vtype = peek_vtype(emit); + vtype_kind_t vtype = peek_vtype(emit, 0); if (vtype == VTYPE_PYOBJ) { emit_pre_pop_reg(emit, &vtype, REG_ARG_2); } else { emit_pre_pop_reg(emit, &vtype, REG_ARG_1); emit_call_with_imm_arg(emit, MP_F_CONVERT_NATIVE_TO_OBJ, vtype, REG_ARG_2); // arg2 = type - ASM_MOV_REG_TO_REG(emit->as, REG_RET, REG_ARG_2); + ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET); } emit_call_with_imm_arg(emit, MP_F_STORE_GLOBAL, qst, REG_ARG_1); // arg1 = name emit_post(emit); @@ -1364,7 +1406,7 @@ STATIC void emit_native_jump(emit_t *emit, mp_uint_t label) { } STATIC void emit_native_jump_helper(emit_t *emit, mp_uint_t label, bool pop) { - vtype_kind_t vtype = peek_vtype(emit); + vtype_kind_t vtype = peek_vtype(emit, 0); switch (vtype) { case VTYPE_PYOBJ: emit_pre_pop_reg(emit, &vtype, REG_ARG_1); @@ -1507,7 +1549,7 @@ STATIC void emit_native_unary_op(emit_t *emit, mp_unary_op_t op) { if (op == MP_UNARY_OP_NOT) { // we need to synthesise this operation by converting to bool first emit_call_with_imm_arg(emit, MP_F_UNARY_OP, MP_UNARY_OP_BOOL, REG_ARG_1); - ASM_MOV_REG_TO_REG(emit->as, REG_RET, REG_ARG_2); + ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET); } emit_call_with_imm_arg(emit, MP_F_UNARY_OP, op, REG_ARG_1); emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET); @@ -1515,47 +1557,108 @@ STATIC void emit_native_unary_op(emit_t *emit, mp_unary_op_t op) { STATIC void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) { DEBUG_printf("binary_op(" UINT_FMT ")\n", op); - vtype_kind_t vtype_lhs, vtype_rhs; - emit_pre_pop_reg_reg(emit, &vtype_rhs, REG_ARG_3, &vtype_lhs, REG_ARG_2); + vtype_kind_t vtype_lhs = peek_vtype(emit, 1); + vtype_kind_t vtype_rhs = peek_vtype(emit, 0); if (vtype_lhs == VTYPE_INT && vtype_rhs == VTYPE_INT) { - if (op == MP_BINARY_OP_ADD || op == MP_BINARY_OP_INPLACE_ADD) { -#if N_X64 - asm_x64_add_r64_to_r64(emit->as, REG_ARG_3, REG_ARG_2); -#elif N_X86 - asm_x86_add_r32_to_r32(emit->as, REG_ARG_3, REG_ARG_2); -#elif N_THUMB - asm_thumb_add_rlo_rlo_rlo(emit->as, REG_ARG_2, REG_ARG_2, REG_ARG_3); -#elif N_ARM - asm_arm_add_reg(emit->as, REG_ARG_2, REG_ARG_2, REG_ARG_3); -#else - #error not implemented -#endif + #if N_X64 || N_X86 + // special cases for x86 and shifting + if (op == MP_BINARY_OP_LSHIFT + || op == MP_BINARY_OP_INPLACE_LSHIFT + || op == MP_BINARY_OP_RSHIFT + || op == MP_BINARY_OP_INPLACE_RSHIFT) { + #if N_X64 + emit_pre_pop_reg_reg(emit, &vtype_rhs, ASM_X64_REG_RCX, &vtype_lhs, REG_RET); + #else + emit_pre_pop_reg_reg(emit, &vtype_rhs, ASM_X86_REG_ECX, &vtype_lhs, REG_RET); + #endif + if (op == MP_BINARY_OP_LSHIFT || op == MP_BINARY_OP_INPLACE_LSHIFT) { + ASM_LSL_REG(emit->as, REG_RET); + } else { + ASM_ASR_REG(emit->as, REG_RET); + } + emit_post_push_reg(emit, VTYPE_INT, REG_RET); + return; + } + #endif + int reg_rhs = REG_ARG_3; + emit_pre_pop_reg_flexible(emit, &vtype_rhs, ®_rhs); + emit_pre_pop_reg(emit, &vtype_lhs, REG_ARG_2); + if (0) { + // dummy + #if !(N_X64 || N_X86) + } else if (op == MP_BINARY_OP_LSHIFT || op == MP_BINARY_OP_INPLACE_LSHIFT) { + ASM_LSL_REG_REG(emit->as, REG_ARG_2, reg_rhs); emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2); - } else if (op == MP_BINARY_OP_LESS) { -#if N_X64 - asm_x64_xor_r64_to_r64(emit->as, REG_RET, REG_RET); - asm_x64_cmp_r64_with_r64(emit->as, REG_ARG_3, REG_ARG_2); - asm_x64_setcc_r8(emit->as, ASM_X64_CC_JL, REG_RET); -#elif N_X86 - asm_x86_xor_r32_to_r32(emit->as, REG_RET, REG_RET); - asm_x86_cmp_r32_with_r32(emit->as, REG_ARG_3, REG_ARG_2); - asm_x86_setcc_r8(emit->as, ASM_X86_CC_JL, REG_RET); -#elif N_THUMB - asm_thumb_cmp_rlo_rlo(emit->as, REG_ARG_2, REG_ARG_3); - asm_thumb_op16(emit->as, ASM_THUMB_OP_ITE_GE); - asm_thumb_mov_rlo_i8(emit->as, REG_RET, 0); // if r0 >= r1 - asm_thumb_mov_rlo_i8(emit->as, REG_RET, 1); // if r0 < r1 -#elif N_ARM - asm_arm_less_op(emit->as, REG_RET, REG_ARG_2, REG_ARG_3); -#else - #error not implemented -#endif + } else if (op == MP_BINARY_OP_RSHIFT || op == MP_BINARY_OP_INPLACE_RSHIFT) { + ASM_ASR_REG_REG(emit->as, REG_ARG_2, reg_rhs); + emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2); + #endif + } else if (op == MP_BINARY_OP_ADD || op == MP_BINARY_OP_INPLACE_ADD) { + ASM_ADD_REG_REG(emit->as, REG_ARG_2, reg_rhs); + emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2); + } else if (op == MP_BINARY_OP_SUBTRACT || op == MP_BINARY_OP_INPLACE_SUBTRACT) { + ASM_SUB_REG_REG(emit->as, REG_ARG_2, reg_rhs); + emit_post_push_reg(emit, VTYPE_INT, REG_ARG_2); + } else if (MP_BINARY_OP_LESS <= op && op <= MP_BINARY_OP_NOT_EQUAL) { + // comparison ops are (in enum order): + // MP_BINARY_OP_LESS + // MP_BINARY_OP_MORE + // MP_BINARY_OP_EQUAL + // MP_BINARY_OP_LESS_EQUAL + // MP_BINARY_OP_MORE_EQUAL + // MP_BINARY_OP_NOT_EQUAL + #if N_X64 + asm_x64_xor_r64_r64(emit->as, REG_RET, REG_RET); + asm_x64_cmp_r64_with_r64(emit->as, reg_rhs, REG_ARG_2); + static byte ops[6] = { + ASM_X64_CC_JL, + ASM_X64_CC_JG, + ASM_X64_CC_JE, + ASM_X64_CC_JLE, + ASM_X64_CC_JGE, + ASM_X64_CC_JNE, + }; + asm_x64_setcc_r8(emit->as, ops[op - MP_BINARY_OP_LESS], REG_RET); + #elif N_X86 + asm_x86_xor_r32_r32(emit->as, REG_RET, REG_RET); + asm_x86_cmp_r32_with_r32(emit->as, reg_rhs, REG_ARG_2); + static byte ops[6] = { + ASM_X86_CC_JL, + ASM_X86_CC_JG, + ASM_X86_CC_JE, + ASM_X86_CC_JLE, + ASM_X86_CC_JGE, + ASM_X86_CC_JNE, + }; + asm_x86_setcc_r8(emit->as, ops[op - MP_BINARY_OP_LESS], REG_RET); + #elif N_THUMB + asm_thumb_cmp_rlo_rlo(emit->as, REG_ARG_2, reg_rhs); + static uint16_t ops[6] = { + ASM_THUMB_OP_ITE_GE, + ASM_THUMB_OP_ITE_GT, + ASM_THUMB_OP_ITE_EQ, + ASM_THUMB_OP_ITE_GT, + ASM_THUMB_OP_ITE_GE, + ASM_THUMB_OP_ITE_EQ, + }; + static byte ret[6] = { 0, 1, 1, 0, 1, 0, }; + asm_thumb_op16(emit->as, ops[op - MP_BINARY_OP_LESS]); + asm_thumb_mov_rlo_i8(emit->as, REG_RET, ret[op - MP_BINARY_OP_LESS]); + asm_thumb_mov_rlo_i8(emit->as, REG_RET, ret[op - MP_BINARY_OP_LESS] ^ 1); + #elif N_ARM + #error generic comparisons for ARM needs implementing + //asm_arm_less_op(emit->as, REG_RET, REG_ARG_2, reg_rhs); + //asm_arm_more_op(emit->as, REG_RET, REG_ARG_2, reg_rhs); + #else + #error not implemented + #endif emit_post_push_reg(emit, VTYPE_BOOL, REG_RET); } else { // TODO other ops not yet implemented assert(0); } } else if (vtype_lhs == VTYPE_PYOBJ && vtype_rhs == VTYPE_PYOBJ) { + emit_pre_pop_reg_reg(emit, &vtype_rhs, REG_ARG_3, &vtype_lhs, REG_ARG_2); bool invert = false; if (op == MP_BINARY_OP_NOT_IN) { invert = true; @@ -1566,7 +1669,7 @@ STATIC void emit_native_binary_op(emit_t *emit, mp_binary_op_t op) { } emit_call_with_imm_arg(emit, MP_F_BINARY_OP, op, REG_ARG_1); if (invert) { - ASM_MOV_REG_TO_REG(emit->as, REG_RET, REG_ARG_2); + ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET); emit_call_with_imm_arg(emit, MP_F_UNARY_OP, MP_UNARY_OP_NOT, REG_ARG_1); } emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);