diff --git a/ext/opcache/jit/ir/ir.h b/ext/opcache/jit/ir/ir.h index a96650597055a..a274ceb5b1673 100644 --- a/ext/opcache/jit/ir/ir.h +++ b/ext/opcache/jit/ir/ir.h @@ -539,38 +539,38 @@ void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func); void ir_strtab_free(ir_strtab *strtab); /* IR Context Flags */ -#define IR_FUNCTION (1<<0) /* Generate a function. */ -#define IR_FASTCALL_FUNC (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */ -#define IR_VARARG_FUNC (1<<2) -#define IR_BUILTIN_FUNC (1<<3) -#define IR_STATIC (1<<4) -#define IR_EXTERN (1<<5) -#define IR_CONST (1<<6) - -#define IR_CONST_FUNC (1<<6) -#define IR_PURE_FUNC (1<<7) - -#define IR_INITIALIZED (1<<7) /* sym data flag: constant or an initialized variable */ -#define IR_CONST_STRING (1<<8) /* sym data flag: constant string */ - -#define IR_SKIP_PROLOGUE (1<<8) /* Don't generate function prologue. */ -#define IR_USE_FRAME_POINTER (1<<9) -#define IR_PREALLOCATED_STACK (1<<10) -#define IR_NO_STACK_COMBINE (1<<11) -#define IR_START_BR_TARGET (1<<12) -#define IR_ENTRY_BR_TARGET (1<<13) -#define IR_GEN_ENDBR (1<<14) -#define IR_MERGE_EMPTY_ENTRIES (1<<15) - -#define IR_OPT_INLINE (1<<16) -#define IR_OPT_FOLDING (1<<17) -#define IR_OPT_CFG (1<<18) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ -#define IR_OPT_MEM2SSA (1<<19) -#define IR_OPT_CODEGEN (1<<20) -#define IR_GEN_NATIVE (1<<21) -#define IR_GEN_CODE (1<<22) /* C or LLVM */ - -#define IR_GEN_CACHE_DEMOTE (1<<23) /* Demote the generated code from closest CPU caches */ +#define IR_PROTO_MASK 0xff +#define IR_CALL_CONV_MASK 0x0f + +#define IR_VARARG_FUNC (1<<4) +#define IR_CONST_FUNC (1<<5) +#define IR_PURE_FUNC (1<<6) + +#define IR_CONST (1<<5) +#define IR_INITIALIZED (1<<6) /* sym data flag: constant or an initialized variable */ +#define IR_CONST_STRING (1<<7) /* sym data flag: constant string */ + +#define IR_FUNCTION (1<<8) /* Generate a function. */ +#define IR_STATIC (1<<9) +#define IR_EXTERN (1<<10) + +#define IR_USE_FRAME_POINTER (1<<11) +#define IR_NO_STACK_COMBINE (1<<12) +#define IR_GEN_ENDBR (1<<13) +#define IR_GEN_CACHE_DEMOTE (1<<14) /* Demote the generated code from closest CPU caches */ + +#define IR_SKIP_PROLOGUE (1<<15) /* Don't generate function prologue. */ +#define IR_START_BR_TARGET (1<<16) +#define IR_ENTRY_BR_TARGET (1<<17) +#define IR_MERGE_EMPTY_ENTRIES (1<<18) + +#define IR_OPT_INLINE (1<<19) +#define IR_OPT_FOLDING (1<<20) +#define IR_OPT_CFG (1<<21) /* merge BBs, by remove END->BEGIN nodes during CFG construction */ +#define IR_OPT_MEM2SSA (1<<22) +#define IR_OPT_CODEGEN (1<<23) +#define IR_GEN_NATIVE (1<<24) +#define IR_GEN_CODE (1<<25) /* debug related */ #ifdef IR_DEBUG @@ -582,6 +582,24 @@ void ir_strtab_free(ir_strtab *strtab); # define IR_DEBUG_BB_SCHEDULE (1U<<31) #endif +/* Calling Conventions */ +#define IR_CC_DEFAULT 0x00 +#define IR_CC_BUILTIN 0x01 +#define IR_CC_FASTCALL 0x02 +#define IR_CC_PRESERVE_NONE 0x03 + +#if defined(IR_TARGET_X64) +# define IR_CC_X86_64_SYSV 0x08 +# define IR_CC_X86_64_MS 0x09 +#elif defined(IR_TARGET_AARCH64) +# define IR_CC_AARCH64_SYSV 0x08 +# define IR_CC_AARCH64_DARWIN 0x09 +#endif + +/* Deprecated constants */ +#define IR_BUILTIN_FUNC IR_CC_BUILTIN +#define IR_FASTCALL_FUNC IR_CC_FASTCALL + typedef struct _ir_ctx ir_ctx; typedef struct _ir_use_list ir_use_list; typedef struct _ir_block ir_block; @@ -728,7 +746,7 @@ const char *ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len); #define IR_MAX_PROTO_PARAMS 255 typedef struct _ir_proto_t { - uint8_t flags; + uint8_t flags; /* first 8 bits of ir_ctx.flags */ uint8_t ret_type; uint8_t params_count; uint8_t param_types[5]; diff --git a/ext/opcache/jit/ir/ir_aarch64.dasc b/ext/opcache/jit/ir/ir_aarch64.dasc index b553243309f54..88996cb6f98e1 100644 --- a/ext/opcache/jit/ir/ir_aarch64.dasc +++ b/ext/opcache/jit/ir/ir_aarch64.dasc @@ -213,14 +213,21 @@ static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) |.endmacro typedef struct _ir_backend_data { - ir_reg_alloc_data ra_data; - uint32_t dessa_from_block; + ir_reg_alloc_data ra_data; dasm_State *dasm_state; ir_bitset emit_constants; int rodata_label, jmp_table_label; bool resolved_label_syms; } ir_backend_data; +typedef struct _ir_aarch64_sysv_va_list { + void *stack; + void *gr_top; + void *vr_top; + int32_t gr_offset; + int32_t vr_offset; +} ir_aarch64_sysv_va_list; + #define IR_GP_REG_NAME(code, name64, name32) \ #name64, #define IR_GP_REG_NAME32(code, name64, name32) \ @@ -230,9 +237,11 @@ typedef struct _ir_backend_data { #define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ #name32, -static const char *_ir_reg_name[IR_REG_NUM] = { +static const char *_ir_reg_name[] = { IR_GP_REGS(IR_GP_REG_NAME) IR_FP_REGS(IR_FP_REG_NAME) + "ALL", + "SCRATCH", }; static const char *_ir_reg_name32[IR_REG_NUM] = { @@ -240,38 +249,11 @@ static const char *_ir_reg_name32[IR_REG_NUM] = { IR_FP_REGS(IR_FP_REG_NAME32) }; -/* Calling Convention */ -static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { - IR_REG_INT_ARG1, - IR_REG_INT_ARG2, - IR_REG_INT_ARG3, - IR_REG_INT_ARG4, - IR_REG_INT_ARG5, - IR_REG_INT_ARG6, - IR_REG_INT_ARG7, - IR_REG_INT_ARG8, -}; - -static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { - IR_REG_FP_ARG1, - IR_REG_FP_ARG2, - IR_REG_FP_ARG3, - IR_REG_FP_ARG4, - IR_REG_FP_ARG5, - IR_REG_FP_ARG6, - IR_REG_FP_ARG7, - IR_REG_FP_ARG8, -}; - const char *ir_reg_name(int8_t reg, ir_type type) { if (reg >= IR_REG_NUM) { - if (reg == IR_REG_SCRATCH) { - return "SCRATCH"; - } else { - IR_ASSERT(reg == IR_REG_ALL); - return "ALL"; - } + IR_ASSERT((uint8_t)reg < sizeof(_ir_reg_name) / sizeof(_ir_reg_name[0])); + return _ir_reg_name[reg]; } IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (type == IR_VOID) { @@ -284,6 +266,82 @@ const char *ir_reg_name(int8_t reg, ir_type type) } } +/* Calling Conventions */ +#define IR_REG_SCRATCH_AARCH64 IR_REG_SET_1 + +#define IR_REGSET_SCRATCH_AARCH64 \ + (IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) | \ + IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) | \ + IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31)) + +const ir_regset ir_scratch_regset[] = { + IR_REGSET_GP | IR_REGSET_FP, + IR_REGSET_SCRATCH_AARCH64, +}; + +const ir_call_conv_dsc ir_call_conv_aarch64_sysv = { + 0, /* cleanup_stack_by_callee */ + 0, /* pass_struct_by_val */ + 1, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 8, /* int_param_regs_count */ + 8, /* fp_param_regs_count */ + IR_REG_X0 , /* int_ret_reg */ + IR_REG_V0, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_SCRATCH_AARCH64, + (const int8_t[8]){IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7}, + (const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7}, + IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15), + +}; + +const ir_call_conv_dsc ir_call_conv_aarch64_darwin = { + 0, /* cleanup_stack_by_callee */ + 0, /* pass_struct_by_val */ + 0, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 8, /* int_param_regs_count */ + 8, /* fp_param_regs_count */ + IR_REG_X0 , /* int_ret_reg */ + IR_REG_V0, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_SCRATCH_AARCH64, + (const int8_t[8]){IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7}, + (const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7}, + IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15), + +}; + +const ir_call_conv_dsc ir_call_conv_aarch64_preserve_none = { + 0, /* cleanup_stack_by_callee */ + 0, /* pass_struct_by_val */ + 1, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 23, /* int_param_regs_count */ + 8, /* fp_param_regs_count */ + IR_REG_X0 , /* int_ret_reg */ + IR_REG_V0, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_ALL, + (const int8_t[23]){IR_REG_X20, IR_REG_X21, IR_REG_X22, IR_REG_X23, IR_REG_X24, IR_REG_X25, IR_REG_X26, IR_REG_X27, + IR_REG_X28, + IR_REG_X0, IR_REG_X1, IR_REG_X2, IR_REG_X3, IR_REG_X4, IR_REG_X5, IR_REG_X6, IR_REG_X7, + IR_REG_X10, IR_REG_X11, IR_REG_X12, IR_REG_X13, IR_REG_X14, IR_REG_X9}, + (const int8_t[8]){IR_REG_V0, IR_REG_V1, IR_REG_V2, IR_REG_V3, IR_REG_V4, IR_REG_V5, IR_REG_V6, IR_REG_V7}, + IR_REGSET_EMPTY, + +}; + +#ifdef __APPLE__ +# define ir_call_conv_default ir_call_conv_aarch64_darwin +#else +# define ir_call_conv_default ir_call_conv_aarch64_sysv +#endif + #define IR_RULES(_) \ _(CMP_INT) \ _(CMP_FP) \ @@ -342,6 +400,8 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co const ir_insn *insn; int n = 0; int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + const ir_proto_t *proto; + const ir_call_conv_dsc *cc; constraints->def_reg = IR_REG_NONE; constraints->hints_count = 0; @@ -584,20 +644,33 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co n++; break; case IR_ARGVAL: - constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); + /* memcpy() clobbers all scratch registers */ + constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH_AARCH64, IR_DEF_SUB_REF - IR_SUB_REFS_COUNT, IR_USE_SUB_REF); n = 1; break; case IR_CALL: insn = &ctx->ir_base[ref]; - constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; - constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + proto = ir_call_proto(ctx, insn); + cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + if (insn->type != IR_VOID) { + constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? + cc->int_ret_reg : cc->fp_ret_reg; + } + constraints->tmp_regs[0] = IR_SCRATCH_REG(cc->scratch_reg, IR_USE_SUB_REF, IR_DEF_SUB_REF); n = 1; - IR_FALLTHROUGH; + if (insn->inputs_count > 2) { + goto get_arg_hints; + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; case IR_TAILCALL: insn = &ctx->ir_base[ref]; if (insn->inputs_count > 2) { + proto = ir_call_proto(ctx, insn); + cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); +get_arg_hints: constraints->hints[2] = IR_REG_NONE; - constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + constraints->hints_count = ir_get_args_regs(ctx, insn, cc, constraints->hints); if (!IR_IS_CONST_REF(insn->op2)) { constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); n++; @@ -658,19 +731,22 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co flags = IR_USE_SHOULD_BE_IN_REG; break; case IR_EXITCALL: - constraints->def_reg = IR_REG_INT_RET1; + cc = ir_get_call_conv_dsc(ctx->flags); + constraints->def_reg = cc->int_ret_reg; break; case IR_RSTORE: flags = IR_OP3_SHOULD_BE_IN_REG; break; case IR_RETURN_INT: + cc = ir_get_call_conv_dsc(ctx->flags); flags = IR_OP2_SHOULD_BE_IN_REG; - constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints[2] = cc->int_ret_reg; constraints->hints_count = 3; break; case IR_RETURN_FP: + cc = ir_get_call_conv_dsc(ctx->flags); flags = IR_OP2_SHOULD_BE_IN_REG; - constraints->hints[2] = IR_REG_FP_RET1; + constraints->hints[2] = cc->fp_ret_reg; constraints->hints_count = 3; break; case IR_SNAPSHOT: @@ -1798,72 +1874,73 @@ static void ir_emit_prologue(ir_ctx *ctx) } } } + if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { -#ifndef __APPLE__ - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; - ir_reg fp; - int offset; - int i; + const ir_call_conv_dsc *cc = data->ra_data.cc; - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; + if (cc->sysv_varargs) { + ir_reg fp; + int offset; + int i; - offset = ctx->locals_area_size + sizeof(void*) * 2; - } else { - fp = IR_REG_STACK_POINTER; - offset = ctx->locals_area_size + ctx->call_stack_size; - } + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - ir_reg prev = IR_REG_NONE; + offset = ctx->locals_area_size + sizeof(void*) * 2; + } else { + fp = IR_REG_STACK_POINTER; + offset = ctx->locals_area_size + ctx->call_stack_size; + } + + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { + ir_reg prev = IR_REG_NONE; - /* skip named args */ - offset += sizeof(void*) * ctx->gp_reg_params; - for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { + /* skip named args */ + offset += sizeof(void*) * ctx->gp_reg_params; + for (i = ctx->gp_reg_params; i < cc->int_param_regs_count; i++) { + if (prev != IR_REG_NONE) { + if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { + | stp Rx(prev), Rx(cc->int_param_regs[i]), [Rx(fp), #offset] + } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) { + | str Rx(prev), [Rx(fp), #offset] + | str Rx(cc->int_param_regs[i]), [Rx(fp), #(offset+8)] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] + | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8 + | str Rx(cc->int_param_regs[i]), [Rx(fp), Rx(IR_REG_INT_TMP)] + } + prev = IR_REG_NONE; + offset += sizeof(void*) * 2; + } else { + prev = cc->int_param_regs[i]; + } + } if (prev != IR_REG_NONE) { - if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { - | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset] - } else if (aarch64_may_encode_addr_offset(offset + 8, 8)) { + if (aarch64_may_encode_addr_offset(offset + 8, 8)) { | str Rx(prev), [Rx(fp), #offset] - | str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)] } else { ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); | str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] - | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #8 - | str Rx(int_reg_params[i]), [Rx(fp), Rx(IR_REG_INT_TMP)] } - prev = IR_REG_NONE; - offset += sizeof(void*) * 2; - } else { - prev = int_reg_params[i]; + offset += sizeof(void*); } } - if (prev != IR_REG_NONE) { - if (aarch64_may_encode_addr_offset(offset + 8, 8)) { - | str Rx(prev), [Rx(fp), #offset] - } else { - ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); - | str Rx(prev), [Rx(fp), Rx(IR_REG_INT_TMP)] - } - offset += sizeof(void*); - } - } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - /* skip named args */ - offset += 16 * ctx->fp_reg_params; - for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { - // TODO: Rd->Rq stur->str ??? - if (aarch64_may_encode_addr_offset(offset, 8)) { - | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] - } else { - ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); - | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + /* skip named args */ + offset += 16 * ctx->fp_reg_params; + for (i = ctx->fp_reg_params; i < cc->fp_param_regs_count; i++) { + // TODO: Rd->Rq stur->str ??? + if (aarch64_may_encode_addr_offset(offset, 8)) { + | str Rd(cc->fp_param_regs[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] + } else { + ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); + | str Rd(cc->fp_param_regs[i]-IR_REG_FP_FIRST), [Rx(fp), Rx(IR_REG_INT_TMP)] + } + offset += 16; } - offset += 16; } } -#endif } } @@ -3257,10 +3334,6 @@ static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint break; case IR_UNORDERED: | bvs =>true_block -// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; -// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; -// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; -// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; } } if (false_block) { @@ -3434,15 +3507,17 @@ static void ir_emit_return_void(ir_ctx *ctx) static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_reg ret_reg = data->ra_data.cc->int_ret_reg; ir_reg op2_reg = ctx->regs[ref][2]; - if (op2_reg != IR_REG_INT_RET1) { + if (op2_reg != ret_reg) { ir_type type = ctx->ir_base[insn->op2].type; if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { - ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + ir_emit_mov(ctx, type, ret_reg, op2_reg); } else { - ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + ir_emit_load(ctx, type, ret_reg, insn->op2); } } ir_emit_return_void(ctx); @@ -3450,14 +3525,16 @@ static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_reg ret_reg = data->ra_data.cc->fp_ret_reg; ir_reg op2_reg = ctx->regs[ref][2]; ir_type type = ctx->ir_base[insn->op2].type; - if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != ret_reg) { if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { - ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + ir_emit_fp_mov(ctx, type, ret_reg, op2_reg); } else { - ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + ir_emit_load(ctx, type, ret_reg, insn->op2); } } ir_emit_return_void(ctx); @@ -4461,281 +4538,281 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#ifdef __APPLE__ ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + if (!cc->sysv_varargs) { + ir_reg fp; + int arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; + } + | add Rx(tmp_reg), Rx(fp), #arg_area_offset + | str Rx(tmp_reg), [Rx(op2_reg), #offset] } else { - fp = IR_REG_STACK_POINTER; - arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; - } - | add Rx(tmp_reg), Rx(fp), #arg_area_offset - | str Rx(tmp_reg), [Rx(op2_reg), #offset] -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int reg_save_area_offset; - int overflow_arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; + ir_reg fp; + int reg_save_area_offset; + int overflow_arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; - overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; - } else { - fp = IR_REG_STACK_POINTER; - reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; - overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; - } - - /* Set va_list.stack */ - | add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset - | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS; - /* Set va_list.gr_top */ - if (overflow_arg_area_offset != reg_save_area_offset) { - | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset - } - | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))] - /* Set va_list.gr_offset */ - | movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params))) - | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] - } else { - /* Set va_list.gr_offset */ - | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] - } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - reg_save_area_offset += 16 * IR_REG_FP_ARGS; - /* Set va_list.vr_top */ - if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < IR_REG_INT_ARGS) { - | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset - } - | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] - /* Set va_list.vr_offset */ - | movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params))) - | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] - } else { - /* Set va_list.vr_offset */ - | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; + overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; + } + + /* Set va_list.stack */ + | add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset + | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))] + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { + reg_save_area_offset += sizeof(void*) * cc->int_param_regs_count; + /* Set va_list.gr_top */ + if (overflow_arg_area_offset != reg_save_area_offset) { + | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset + } + | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_top))] + /* Set va_list.gr_offset */ + | movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (cc->int_param_regs_count - ctx->gp_reg_params))) + | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))] + } else { + /* Set va_list.gr_offset */ + | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))] + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + reg_save_area_offset += 16 * cc->fp_param_regs_count; + /* Set va_list.vr_top */ + if (overflow_arg_area_offset != reg_save_area_offset || ctx->gp_reg_params < cc->int_param_regs_count) { + | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset + } + | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_top))] + /* Set va_list.vr_offset */ + | movn Rw(tmp_reg), #~(0 - (16 * (cc->fp_param_regs_count - ctx->fp_reg_params))) + | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))] + } else { + /* Set va_list.vr_offset */ + | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))] + } } -#endif } static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#ifdef __APPLE__ ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_reg tmp_reg = ctx->regs[def][1]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg op3_reg = ctx->regs[def][3]; - int32_t op2_offset, op3_offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + if (!cc->sysv_varargs) { + ir_reg tmp_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + int32_t op2_offset, op3_offset; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + op2_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - op2_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (op3_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op3_reg)) { - op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + if (op3_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op3_reg)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + } + op3_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); + op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } - op3_offset = 0; + | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] + | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] } else { - IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); - op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); - } - | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] - | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg tmp_reg = ctx->regs[def][1]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg op3_reg = ctx->regs[def][3]; - int32_t op2_offset, op3_offset; + ir_reg tmp_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + int32_t op2_offset, op3_offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + op2_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - op2_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); + if (op3_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op3_reg)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + } + op3_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); + op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); + } + | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] + | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] + | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)] + | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)] + | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)] + | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)] + | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)] + | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)] } - if (op3_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op3_reg)) { - op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); - } - op3_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); - op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); - } - | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] - | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] - | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)] - | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)] - | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)] - | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)] - | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)] - | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)] -#endif } static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#ifdef __APPLE__ ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; - IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - | ldr Rx(tmp_reg), [Rx(op2_reg), #offset] - if (def_reg != IR_REG_NONE) { - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); - } - | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) - | str Rx(tmp_reg), [Rx(op2_reg), #offset] - if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; + if (!cc->sysv_varargs) { + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; - IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (IR_IS_TYPE_INT(type)) { - | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] - | cmp Rw(tmp_reg), wzr - | bge >1 - | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))] - | sxtw Rx(tmp_reg), Rw(tmp_reg) - | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) + | ldr Rx(tmp_reg), [Rx(op2_reg), #offset] if (def_reg != IR_REG_NONE) { - | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); } - | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) - | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] - | b >2 - |1: - | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - if (def_reg != IR_REG_NONE) { - | ldr Rx(def_reg), [Rx(tmp_reg)] + | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) + | str Rx(tmp_reg), [Rx(op2_reg), #offset] + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); } - | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) - | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - |2: } else { - | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] - | cmp Rw(tmp_reg), wzr - | bge >1 - | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] - | sxtw Rx(tmp_reg), Rw(tmp_reg) - | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) - if (def_reg != IR_REG_NONE) { - | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; + + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - | add Rw(tmp_reg), Rw(tmp_reg), #16 - | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] - | b >2 - |1: - | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - if (def_reg != IR_REG_NONE) { - | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] + if (IR_IS_TYPE_INT(type)) { + | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))] + | cmp Rw(tmp_reg), wzr + | bge >1 + | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_top))] + | sxtw Rx(tmp_reg), Rw(tmp_reg) + | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) + if (def_reg != IR_REG_NONE) { + | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] + } + | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) + | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, gr_offset))] + | b >2 + |1: + | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))] + if (def_reg != IR_REG_NONE) { + | ldr Rx(def_reg), [Rx(tmp_reg)] + } + | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) + | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))] + |2: + } else { + | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))] + | cmp Rw(tmp_reg), wzr + | bge >1 + | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_top))] + | sxtw Rx(tmp_reg), Rw(tmp_reg) + | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) + if (def_reg != IR_REG_NONE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] + } + | add Rw(tmp_reg), Rw(tmp_reg), #16 + | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, vr_offset))] + | b >2 + |1: + | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))] + if (def_reg != IR_REG_NONE) { + | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] + } + | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) + | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_aarch64_sysv_va_list, stack))] + |2: + } + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); } - | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) - | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] - |2: } - if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } -#endif } static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) @@ -4958,19 +5035,23 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } } -static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stack_ptr) +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, const ir_call_conv_dsc *cc, int32_t *copy_stack_ptr) { int j, n; ir_type type; int int_param = 0; int fp_param = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; int32_t used_stack = 0, copy_stack = 0; -#ifdef __APPLE__ - const ir_proto_t *proto = ir_call_proto(ctx, insn); - int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; -#endif + + /* On APPLE "unnamed" arguments always passed through stack */ + int last_named_input; + + if (!cc->sysv_varargs) { + const ir_proto_t *proto = ir_call_proto(ctx, insn); + last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; + } else { + last_named_input = insn->inputs_count; + } n = insn->inputs_count; for (j = 3; j <= n; j++) { @@ -4984,19 +5065,16 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stac copy_stack = IR_ALIGNED_SIZE(copy_stack, align); type = IR_ADDR; } -#ifdef __APPLE__ if (j > last_named_input) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); - } else -#endif - if (IR_IS_TYPE_INT(type)) { - if (int_param >= int_reg_params_count) { + } else if (IR_IS_TYPE_INT(type)) { + if (int_param >= cc->int_param_regs_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } int_param++; } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param >= fp_reg_params_count) { + if (fp_param >= cc->fp_param_regs_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } fp_param++; @@ -5008,7 +5086,7 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int32_t *copy_stac return used_stack + copy_stack; } -static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_call_conv_dsc *cc, ir_reg tmp_reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; @@ -5020,10 +5098,6 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int int_param = 0; int fp_param = 0; int count = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t used_stack, copy_stack = 0, stack_offset = 0, copy_stack_offset = 0; ir_copy *copies; bool do_pass3 = 0; @@ -5043,7 +5117,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg // TODO: support for preallocated stack used_stack = 0; } else { - used_stack = ir_call_used_stack(ctx, insn, ©_stack); + used_stack = ir_call_used_stack(ctx, insn, cc, ©_stack); /* Stack must be 16 byte aligned */ used_stack = IR_ALIGNED_SIZE(used_stack, 16); if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { @@ -5061,10 +5135,15 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } -#ifdef __APPLE__ - const ir_proto_t *proto = ir_call_proto(ctx, insn); - int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; -#endif + /* On APPLE "unnamed" arguments always passed through stack */ + int last_named_input; + + if (!cc->sysv_varargs) { + const ir_proto_t *proto = ir_call_proto(ctx, insn); + last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; + } else { + last_named_input = insn->inputs_count; + } if (copy_stack) { /* Copy struct arguments */ @@ -5085,17 +5164,17 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); src_reg = ctx->regs[arg][1]; - | add Rx(IR_REG_INT_ARG1), sp, #(used_stack - copy_stack_offset) + | add Rx(ir_call_conv_default.int_param_regs[0]), sp, #(used_stack - copy_stack_offset) if (src_reg != IR_REG_NONE) { if (IR_REG_SPILLED(src_reg)) { src_reg = IR_REG_NUM(src_reg); ir_emit_load(ctx, IR_ADDR, src_reg, arg_insn->op1); } - | mov Rx(IR_REG_INT_ARG2), Rx(src_reg) + | mov Rx(ir_call_conv_default.int_param_regs[1]), Rx(src_reg) } else { - ir_emit_load(ctx, IR_ADDR, IR_REG_INT_ARG2, arg_insn->op1); + ir_emit_load(ctx, IR_ADDR, ir_call_conv_default.int_param_regs[1], arg_insn->op1); } - ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_ARG3, size); + ir_emit_load_imm_int(ctx, IR_ADDR, ir_call_conv_default.int_param_regs[2], size); if (aarch64_may_use_b(ctx->code_buffer, addr)) { | bl &addr @@ -5117,18 +5196,15 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; -#ifdef __APPLE__ if (j > last_named_input) { if (arg_insn->op == IR_ARGVAL) { do_pass3 = 1; continue; } dst_reg = IR_REG_NONE; /* pass argument through stack */ - } else -#endif - if (IR_IS_TYPE_INT(type)) { - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; + } else if (IR_IS_TYPE_INT(type)) { + if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } @@ -5139,8 +5215,8 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param < fp_reg_params_count) { - dst_reg = fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + dst_reg = cc->fp_param_regs[fp_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } @@ -5149,7 +5225,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg if (dst_reg != IR_REG_NONE) { if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE || - (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) { + (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) { /* delay CONST->REG and MEM->REG moves to third pass */ do_pass3 = 1; } else { @@ -5201,14 +5277,11 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg copy_stack_offset += size; align = IR_MAX((int)sizeof(void*), align); copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); -#ifdef __APPLE__ if (j > last_named_input) { | add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset) ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); - } else -#endif - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; + } else if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; | add Rx(dst_reg), sp, #(used_stack - copy_stack_offset) } else { | add Rx(tmp_reg), sp, #(used_stack - copy_stack_offset) @@ -5218,22 +5291,19 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int_param++; continue; } -#ifdef __APPLE__ if (j > last_named_input) { dst_reg = IR_REG_NONE; /* pass argument through stack */ - } else -#endif - if (IR_IS_TYPE_INT(type)) { - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; + } else if (IR_IS_TYPE_INT(type)) { + if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } int_param++; } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param < fp_reg_params_count) { - dst_reg = fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + dst_reg = cc->fp_param_regs[fp_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } @@ -5242,7 +5312,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg if (dst_reg != IR_REG_NONE) { if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE || - (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) { + (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) { if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) { if (ir_type_size[type] == 1) { type = IR_ADDR; @@ -5282,7 +5352,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg return used_stack; } -static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) +static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_call_conv_dsc *cc, int32_t used_stack) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; @@ -5317,27 +5387,27 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used if (IR_IS_TYPE_INT(insn->type)) { def_reg = IR_REG_NUM(ctx->regs[def][0]); if (def_reg != IR_REG_NONE) { - if (def_reg != IR_REG_INT_RET1) { - ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + if (def_reg != cc->int_ret_reg) { + ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else if (ctx->use_lists[def].count > 1) { - ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + ir_emit_store(ctx, insn->type, def, cc->int_ret_reg); } } else { IR_ASSERT(IR_IS_TYPE_FP(insn->type)); def_reg = IR_REG_NUM(ctx->regs[def][0]); if (def_reg != IR_REG_NONE) { - if (def_reg != IR_REG_FP_RET1) { - ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); + if (def_reg != cc->fp_ret_reg) { + ir_emit_fp_mov(ctx, insn->type, def_reg, cc->fp_ret_reg); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else if (ctx->use_lists[def].count > 1) { - ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); + ir_emit_store(ctx, insn->type, def, cc->fp_ret_reg); } } } @@ -5345,18 +5415,22 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) { - int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); - ir_emit_call_ex(ctx, def, insn, used_stack); + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + int32_t used_stack = ir_emit_arguments(ctx, def, insn, cc, ctx->regs[def][1]); + ir_emit_call_ex(ctx, def, insn, cc, used_stack); } static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + int32_t used_stack = ir_emit_arguments(ctx, def, insn, cc, ctx->regs[def][1]); if (used_stack != 0) { - ir_emit_call_ex(ctx, def, insn, used_stack); + ir_emit_call_ex(ctx, def, insn, cc, used_stack); ir_emit_return_void(ctx); return; } @@ -5578,15 +5652,23 @@ static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) case IR_GT: | bgt &addr break; + case IR_ULT: + | blt &addr + break; + case IR_UGE: + | bhs &addr + break; + case IR_ULE: + | ble &addr + break; + case IR_UGT: + | bhi &addr + break; case IR_ORDERED: | bvc &addr break; case IR_UNORDERED: | bvs &addr -// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; -// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; -// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; -// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; } } } @@ -5660,7 +5742,11 @@ static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (insn->op == IR_GUARD) { - op ^= 1; // reverse + if (op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) { + op ^= 1; // reverse + } else { + op ^= 5; // reverse + } } ir_emit_guard_jcc(ctx, op, addr, 0); } @@ -5746,6 +5832,7 @@ static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = &ir_call_conv_default; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); @@ -5785,10 +5872,10 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | stp x2, x3, [sp, #-16]! | stp x0, x1, [sp, #-16]! - | mov Rx(IR_REG_INT_ARG2), sp - | add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8) - | str Rx(IR_REG_INT_ARG1), [sp, #(31*8)] - | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) + | mov Rx(cc->int_param_regs[1]), sp + | add Rx(cc->int_param_regs[0]), Rx(cc->int_param_regs[1]), #(32*8+32*8) + | str Rx(cc->int_param_regs[0]), [sp, #(31*8)] + | mov Rx(cc->int_param_regs[0]), Rx(IR_REG_INT_TMP) if (IR_IS_CONST_REF(insn->op2)) { void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); @@ -5805,8 +5892,8 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | add sp, sp, #(32*8+32*8) - if (def_reg != IR_REG_INT_RET1) { - ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + if (def_reg != cc->int_ret_reg) { + ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); @@ -5852,11 +5939,8 @@ static void ir_emit_load_params(ir_ctx *ctx) int fp_param_num = 0; ir_reg src_reg; ir_reg dst_reg; - // TODO: Calling convention specific - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; int32_t stack_offset = 0; int32_t stack_start = ctx->stack_frame_size; @@ -5866,15 +5950,15 @@ static void ir_emit_load_params(ir_ctx *ctx) insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { - if (int_param_num < int_reg_params_count) { - src_reg = int_reg_params[int_param_num]; + if (int_param_num < cc->int_param_regs_count) { + src_reg = cc->int_param_regs[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; } else { - if (fp_param_num < fp_reg_params_count) { - src_reg = fp_reg_params[fp_param_num]; + if (fp_param_num < cc->fp_param_regs_count) { + src_reg = cc->fp_param_regs[fp_param_num]; } else { src_reg = IR_REG_NONE; } @@ -5914,10 +5998,9 @@ static ir_reg ir_get_free_reg(ir_type type, ir_regset available) return IR_REGSET_FIRST(available); } -static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *dessa_from_block) { - ir_backend_data *data = ctx->data; - ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + ir_ref ref = ctx->cfg_blocks[(intptr_t)dessa_from_block].end; if (to == 0) { if (IR_IS_TYPE_INT(type)) { @@ -5953,11 +6036,8 @@ static void ir_fix_param_spills(ir_ctx *ctx) int int_param_num = 0; int fp_param_num = 0; ir_reg src_reg; - // TODO: Calling convention specific - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; int32_t stack_offset = 0; int32_t stack_start = ctx->stack_frame_size; @@ -5967,15 +6047,15 @@ static void ir_fix_param_spills(ir_ctx *ctx) insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { - if (int_param_num < int_reg_params_count) { - src_reg = int_reg_params[int_param_num]; + if (int_param_num < cc->int_param_regs_count) { + src_reg = cc->int_param_regs[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; } else { - if (fp_param_num < fp_reg_params_count) { - src_reg = fp_reg_params[fp_param_num]; + if (fp_param_num < cc->fp_param_regs_count) { + src_reg = cc->fp_param_regs[fp_param_num]; } else { src_reg = IR_REG_NONE; } @@ -5999,8 +6079,8 @@ static void ir_fix_param_spills(ir_ctx *ctx) } } - ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); - ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + ctx->gp_reg_params = IR_MIN(int_param_num, cc->int_param_regs_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, cc->fp_param_regs_count); ctx->param_stack_size = stack_offset; } @@ -6011,11 +6091,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_insn *insn; ir_ref i, n, j, *p; uint32_t *rule, insn_flags; - ir_backend_data *data = ctx->data; ir_regset available = 0; ir_target_constraints constraints; uint32_t def_flags; ir_reg reg; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; + ir_regset scratch = ir_scratch_regset[cc->scratch_reg - IR_REG_NUM]; ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); @@ -6051,7 +6133,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) && *rule != IR_CMP_AND_BRANCH_FP && *rule != IR_GUARD_CMP_INT && *rule != IR_GUARD_CMP_FP) { - available = IR_REGSET_SCRATCH; + available = scratch; } if (ctx->vregs[i]) { reg = constraints.def_reg; @@ -6081,7 +6163,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) if (insn->op == IR_PARAM && reg == IR_REG_NONE) { ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; } else { - ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type); } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); @@ -6092,7 +6174,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_ref n = use_list->count; if (n > 0) { - int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type); ir_ref i, *p, use; ir_insn *use_insn; @@ -6147,10 +6229,13 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; - } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { - available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); } else { - IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + reg = constraints.tmp_regs[n].reg; + if (reg >= IR_REG_NUM) { + available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]); + } else { + IR_REGSET_EXCL(available, reg); + } } } while (n); } @@ -6186,8 +6271,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) rule += n; } if (bb->flags & IR_BB_DESSA_MOVES) { - data->dessa_from_block = b; - ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, (void*)(intptr_t)b); } } @@ -6204,8 +6288,11 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { if (insn->op == IR_CALL) { + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); int32_t copy_stack; - call_stack_size = ir_call_used_stack(ctx, insn, ©_stack); + + call_stack_size = ir_call_used_stack(ctx, insn, cc, ©_stack); if (call_stack_size > peak_call_stack_size) { peak_call_stack_size = call_stack_size; } @@ -6237,11 +6324,14 @@ void ir_fix_stack_frame(ir_ctx *ctx) } if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - additional_size += sizeof(void*) * IR_REG_INT_ARGS; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; + + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { + additional_size += sizeof(void*) * cc->int_param_regs_count; } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - additional_size += 16 * IR_REG_FP_ARGS; + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + additional_size += 16 * cc->int_param_regs_count; } } @@ -6308,6 +6398,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) ir_ref igoto_dup_ref = IR_UNUSED; uint32_t igoto_dup_block = 0; + data.ra_data.cc = ir_get_call_conv_dsc(ctx->flags); data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; data.ra_data.unused_slot_1 = 0; diff --git a/ext/opcache/jit/ir/ir_aarch64.h b/ext/opcache/jit/ir/ir_aarch64.h index 9da64b9249f72..e0817f9b3303f 100644 --- a/ext/opcache/jit/ir/ir_aarch64.h +++ b/ext/opcache/jit/ir/ir_aarch64.h @@ -87,14 +87,15 @@ enum _ir_reg { IR_GP_REGS(IR_GP_REG_ENUM) IR_FP_REGS(IR_FP_REG_ENUM) IR_REG_NUM, + IR_REG_ALL = IR_REG_NUM, /* special name for regset */ + IR_REG_SET_1, /* special name for regset */ + IR_REG_SET_NUM, }; #define IR_REG_GP_FIRST IR_REG_X0 #define IR_REG_FP_FIRST IR_REG_V0 #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1) -#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ -#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ #define IR_REGSET_64BIT 1 @@ -125,65 +126,4 @@ enum _ir_reg { #define IR_REG_LR IR_REG_X30 #define IR_REG_ZR IR_REG_X31 -/* Calling Convention */ -#define IR_REG_INT_RET1 IR_REG_X0 -#define IR_REG_FP_RET1 IR_REG_V0 -#define IR_REG_INT_ARGS 8 -#define IR_REG_FP_ARGS 8 -#define IR_REG_INT_ARG1 IR_REG_X0 -#define IR_REG_INT_ARG2 IR_REG_X1 -#define IR_REG_INT_ARG3 IR_REG_X2 -#define IR_REG_INT_ARG4 IR_REG_X3 -#define IR_REG_INT_ARG5 IR_REG_X4 -#define IR_REG_INT_ARG6 IR_REG_X5 -#define IR_REG_INT_ARG7 IR_REG_X6 -#define IR_REG_INT_ARG8 IR_REG_X7 -#define IR_REG_FP_ARG1 IR_REG_V0 -#define IR_REG_FP_ARG2 IR_REG_V1 -#define IR_REG_FP_ARG3 IR_REG_V2 -#define IR_REG_FP_ARG4 IR_REG_V3 -#define IR_REG_FP_ARG5 IR_REG_V4 -#define IR_REG_FP_ARG6 IR_REG_V5 -#define IR_REG_FP_ARG7 IR_REG_V6 -#define IR_REG_FP_ARG8 IR_REG_V7 -#define IR_MAX_REG_ARGS 16 -#define IR_SHADOW_ARGS 0 - -# define IR_REGSET_SCRATCH \ - (IR_REGSET_INTERVAL(IR_REG_X0, IR_REG_X18) \ - | IR_REGSET_INTERVAL(IR_REG_V0, IR_REG_V7) \ - | IR_REGSET_INTERVAL(IR_REG_V16, IR_REG_V31)) - -# define IR_REGSET_PRESERVED \ - (IR_REGSET_INTERVAL(IR_REG_X19, IR_REG_X30) \ - | IR_REGSET_INTERVAL(IR_REG_V8, IR_REG_V15)) - -#ifndef __APPLE__ -typedef struct _ir_va_list { - void *stack; - void *gr_top; - void *vr_top; - int32_t gr_offset; - int32_t vr_offset; -} ir_va_list; -#endif - -typedef struct _ir_tmp_reg { - union { - uint8_t num; - int8_t reg; - }; - uint8_t type; - int8_t start; - int8_t end; -} ir_tmp_reg; - -struct _ir_target_constraints { - int8_t def_reg; - uint8_t tmps_count; - uint8_t hints_count; - ir_tmp_reg tmp_regs[3]; - int8_t hints[IR_MAX_REG_ARGS + 3]; -}; - #endif /* IR_AARCH64_H */ diff --git a/ext/opcache/jit/ir/ir_dump.c b/ext/opcache/jit/ir/ir_dump.c index 5cc732927d412..92962313d9992 100644 --- a/ext/opcache/jit/ir/ir_dump.c +++ b/ext/opcache/jit/ir/ir_dump.c @@ -8,6 +8,14 @@ #include "ir.h" #include "ir_private.h" +#if defined(IR_TARGET_X86) || defined(IR_TARGET_X64) +# include "ir_x86.h" +#elif defined(IR_TARGET_AARCH64) +# include "ir_aarch64.h" +#else +# error "Unknown IR target" +#endif + void ir_dump(const ir_ctx *ctx, FILE *f) { ir_ref i, j, n, ref, *p; @@ -456,8 +464,8 @@ void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f) } } #if 1 - n = ctx->vregs_count + ir_regs_number() + 2; - for (i = ctx->vregs_count + 1; i <= n; i++) { + n = ctx->vregs_count + 1 + IR_REG_SET_NUM; + for (i = ctx->vregs_count + 1; i < n; i++) { ir_live_interval *ival = ctx->live_intervals[i]; if (ival) { diff --git a/ext/opcache/jit/ir/ir_emit.c b/ext/opcache/jit/ir/ir_emit.c index 847ca375b5bd0..a6dfde77f576c 100644 --- a/ext/opcache/jit/ir/ir_emit.c +++ b/ext/opcache/jit/ir/ir_emit.c @@ -63,18 +63,7 @@ typedef struct _ir_dessa_copy { int32_t to; /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */ } ir_dessa_copy; -#if IR_REG_INT_ARGS -static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS]; -#else -static const int8_t *_ir_int_reg_params; -#endif -#if IR_REG_FP_ARGS -static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS]; -#else -static const int8_t *_ir_fp_reg_params; -#endif - -static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn) +const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn) { if (IR_IS_CONST_REF(insn->op2)) { const ir_insn *func = &ctx->ir_base[insn->op2]; @@ -90,49 +79,6 @@ static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn) return NULL; } -#ifdef IR_HAVE_FASTCALL -static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS]; -static const int8_t *_ir_fp_fc_reg_params; - -bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) -{ - if (sizeof(void*) == 4) { - if (IR_IS_CONST_REF(insn->op2)) { - const ir_insn *func = &ctx->ir_base[insn->op2]; - - if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) { - if (func->proto) { - const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); - - return (proto->flags & IR_FASTCALL_FUNC) != 0; - } - } - } else if (ctx->ir_base[insn->op2].op == IR_PROTO) { - const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2); - - return (proto->flags & IR_FASTCALL_FUNC) != 0; - } - return 0; - } - return 0; -} -#else -bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn) -{ - return 0; -} -#endif - -bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn) -{ - const ir_proto_t *proto = ir_call_proto(ctx, insn); - - if (proto) { - return (proto->flags & IR_VARARG_FUNC) != 0; - } - return 0; -} - IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref) { IR_ASSERT(!IR_IS_CONST_REF(ref)); @@ -153,19 +99,7 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) ir_insn *insn; int int_param = 0; int fp_param = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; - -#ifdef IR_HAVE_FASTCALL - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - int_reg_params = _ir_int_fc_reg_params; - fp_reg_params = _ir_fp_fc_reg_params; - } -#endif + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(ctx->flags); for (i = use_list->count, p = &ctx->use_edges[use_list->refs]; i > 0; p++, i--) { use = *p; @@ -173,70 +107,48 @@ static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref) if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { if (use == ref) { -#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) - if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) { /* struct passed by value on stack */ return IR_REG_NONE; - } else -#endif - if (int_param < int_reg_params_count) { - return int_reg_params[int_param]; + } else if (int_param < cc->int_param_regs_count) { + return cc->int_param_regs[int_param]; } else { return IR_REG_NONE; } -#if defined(IR_TARGET_X64) || defined(IR_TARGET_X86) - } else { - if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { - /* struct passed by value on stack */ - continue; - } -#endif + } else if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) { + /* struct passed by value on stack */ + continue; } int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; -#endif + if (cc->shadow_param_regs) { + fp_param++; + } } else { IR_ASSERT(IR_IS_TYPE_FP(insn->type)); if (use == ref) { - if (fp_param < fp_reg_params_count) { - return fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + return cc->fp_param_regs[fp_param]; } else { return IR_REG_NONE; } } fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif + if (cc->shadow_param_regs) { + int_param++; + } } } } return IR_REG_NONE; } -static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs) +static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, const ir_call_conv_dsc *cc, int8_t *regs) { int j, n; ir_type type; int int_param = 0; int fp_param = 0; int count = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; - -#ifdef IR_HAVE_FASTCALL - if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - int_reg_params = _ir_int_fc_reg_params; - fp_reg_params = _ir_fp_fc_reg_params; - } -#endif n = insn->inputs_count; n = IR_MIN(n, IR_MAX_REG_ARGS + 2); @@ -244,27 +156,25 @@ static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs ir_insn *arg = &ctx->ir_base[ir_insn_op(insn, j)]; type = arg->type; if (IR_IS_TYPE_INT(type)) { - if (int_param < int_reg_params_count && arg->op != IR_ARGVAL) { - regs[j] = int_reg_params[int_param]; + if (int_param < cc->int_param_regs_count && arg->op != IR_ARGVAL) { + regs[j] = cc->int_param_regs[int_param]; count = j + 1; int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; -#endif + if (cc->shadow_param_regs) { + fp_param++; + } } else { regs[j] = IR_REG_NONE; } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param < fp_reg_params_count) { - regs[j] = fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + regs[j] = cc->fp_param_regs[fp_param]; count = j + 1; fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif + if (cc->shadow_param_regs) { + int_param++; + } } else { regs[j] = IR_REG_NONE; } @@ -419,7 +329,6 @@ static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb); typedef struct _ir_common_backend_data { ir_reg_alloc_data ra_data; - uint32_t dessa_from_block; dasm_State *dasm_state; ir_bitset emit_constants; } ir_common_backend_data; @@ -1071,3 +980,32 @@ int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref) IR_ASSERT(offset != -1); return IR_SPILL_POS_TO_OFFSET(offset); } + +const ir_call_conv_dsc *ir_get_call_conv_dsc(uint32_t flags) +{ +#ifdef IR_TARGET_X86 + if ((flags & IR_CALL_CONV_MASK) == IR_CC_FASTCALL) { + return &ir_call_conv_x86_fastcall; + } +#elif defined(IR_TARGET_X64) + switch (flags & IR_CALL_CONV_MASK) { + case IR_CC_DEFAULT: return &ir_call_conv_default; + case IR_CC_FASTCALL: return &ir_call_conv_default; + case IR_CC_PRESERVE_NONE: return &ir_call_conv_x86_64_preserve_none; + case IR_CC_X86_64_SYSV: return &ir_call_conv_x86_64_sysv; + case IR_CC_X86_64_MS: return &ir_call_conv_x86_64_ms; + default: break; + } +#elif defined(IR_TARGET_AARCH64) + switch (flags & IR_CALL_CONV_MASK) { + case IR_CC_DEFAULT: return &ir_call_conv_default; + case IR_CC_FASTCALL: return &ir_call_conv_default; + case IR_CC_PRESERVE_NONE: return &ir_call_conv_aarch64_preserve_none; + case IR_CC_AARCH64_SYSV: return &ir_call_conv_aarch64_sysv; + case IR_CC_AARCH64_DARWIN: return &ir_call_conv_aarch64_darwin; + default: break; + } +#endif + IR_ASSERT((flags & IR_CALL_CONV_MASK) == IR_CC_DEFAULT || (flags & IR_CALL_CONV_MASK) == IR_CC_BUILTIN); + return &ir_call_conv_default; +} diff --git a/ext/opcache/jit/ir/ir_gcm.c b/ext/opcache/jit/ir/ir_gcm.c index e6486ba64a1c5..67c97611eaac4 100644 --- a/ext/opcache/jit/ir/ir_gcm.c +++ b/ext/opcache/jit/ir/ir_gcm.c @@ -361,20 +361,20 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { j = ctx->cfg_blocks[j].idom; } - clone = ir_hashtab_find(&hash, j); - if (clone == IR_INVALID_VAL) { - clone = clones_count++; - ir_hashtab_add(&hash, j, clone); - clones[clone].block = j; - clones[clone].use_count = 0; - clones[clone].use = -1; - } - uses[uses_count].ref = use; - uses[uses_count].block = i; - uses[uses_count].next = clones[clone].use; - clones[clone].use_count++; - clones[clone].use = uses_count++; } + clone = ir_hashtab_find(&hash, j); + if (clone == IR_INVALID_VAL) { + clone = clones_count++; + ir_hashtab_add(&hash, j, clone); + clones[clone].block = j; + clones[clone].use_count = 0; + clones[clone].use = -1; + } + uses[uses_count].ref = use; + uses[uses_count].block = i; + uses[uses_count].next = clones[clone].use; + clones[clone].use_count++; + clones[clone].use = uses_count++; } } @@ -413,7 +413,8 @@ static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) n = ctx->use_lists[ref].refs; for (i = 0; i < clones_count; i++) { clone = clones[i].ref; - if (clones[i].use_count == 1 + if (clones[i].block + && clones[i].use_count == 1 && ctx->cfg_blocks[clones[i].block].loop_depth >= ctx->cfg_blocks[uses[clones[i].use].block].loop_depth) { /* TOTALLY_USEFUL block may be a head of a diamond above the real usage. * Sink it down to the real usage block. diff --git a/ext/opcache/jit/ir/ir_private.h b/ext/opcache/jit/ir/ir_private.h index dbacc3967d0f7..acd7e41a3e9a5 100644 --- a/ext/opcache/jit/ir/ir_private.h +++ b/ext/opcache/jit/ir/ir_private.h @@ -1015,6 +1015,8 @@ IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn) #define IR_HAS_FP_RET_SLOT (1<<10) #define IR_16B_FRAME_ALIGNMENT (1<<11) #define IR_HAS_BLOCK_ADDR (1<<12) +#define IR_PREALLOCATED_STACK (1<<13) + /* Temporary: MEM2SSA -> SCCP */ #define IR_MEM2SSA_VARS (1<<25) @@ -1275,9 +1277,9 @@ struct _ir_live_interval { ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */ }; -typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to); +typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *data); -int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy); +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy, void *data); #if defined(IR_REGSET_64BIT) @@ -1363,16 +1365,44 @@ IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set) #endif /* defined(IR_REGSET_64BIT) */ +/*** Calling Conventions ***/ +#if defined(IR_REGSET_64BIT) +struct _ir_call_conv_dsc { + bool cleanup_stack_by_callee: 1; /* use "retn $size" to return */ + bool pass_struct_by_val: 1; /* pass aggreagate by value, otherwise their copies are passed by ref */ + bool sysv_varargs: 1; /* Use SysV varargs ABI */ + bool shadow_param_regs: 1; /* registers for INT and FP parametrs shadow each other */ + /* (WIN64: 1-st arg is passed in %rcx/%xmm0, 2-nd in %rdx/%xmm1) */ + uint8_t shadow_store_size; /* reserved stack space to keep arguemnts passed in registers (WIN64) */ + uint8_t int_param_regs_count; /* number of registers for INT parameters */ + uint8_t fp_param_regs_count; /* number of registers for FP parameters */ + int8_t int_ret_reg; /* register to return INT value */ + int8_t fp_ret_reg; /* register to return FP value */ + int8_t fp_varargs_reg; /* register to pass number of fp register arguments into vararg func */ + int8_t scratch_reg; /* pseudo register to reffer srcatch regset (clobbered by call) */ + const int8_t *int_param_regs; /* registers for INT parameters */ + const int8_t *fp_param_regs; /* registers for FP parameters */ + ir_regset preserved_regs; /* preserved or callee-saved registers */ +}; + +extern const ir_regset ir_scratch_regset[]; +#endif + +typedef struct _ir_call_conv_dsc ir_call_conv_dsc; + +const ir_call_conv_dsc *ir_get_call_conv_dsc(uint32_t flags); + /*** IR Register Allocation ***/ /* Flags for ctx->regs[][] (low bits are used for register number itself) */ typedef struct _ir_reg_alloc_data { + const ir_call_conv_dsc *cc; int32_t unused_slot_4; int32_t unused_slot_2; int32_t unused_slot_1; ir_live_interval **handled; } ir_reg_alloc_data; -int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data); +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type); IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg) { @@ -1406,9 +1436,27 @@ IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int o #define IR_RULE_MASK 0xff +#define IR_MAX_REG_ARGS 64 + extern const char *ir_rule_name[]; -typedef struct _ir_target_constraints ir_target_constraints; +typedef struct _ir_tmp_reg { + union { + uint8_t num; + int8_t reg; + }; + uint8_t type; + int8_t start; + int8_t end; +} ir_tmp_reg; + +typedef struct { + int8_t def_reg; + uint8_t tmps_count; + uint8_t hints_count; + ir_tmp_reg tmp_regs[3]; + int8_t hints[IR_MAX_REG_ARGS + 3]; +} ir_target_constraints; #define IR_TMP_REG(_num, _type, _start, _end) \ (ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)} @@ -1421,8 +1469,8 @@ void ir_fix_stack_frame(ir_ctx *ctx); /* Utility */ ir_type ir_get_return_type(ir_ctx *ctx); -bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn); -bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn); +const ir_proto_t *ir_call_proto(const ir_ctx *ctx, const ir_insn *insn); +void ir_print_call_conv(uint32_t flags, FILE *f); //#define IR_BITSET_LIVENESS diff --git a/ext/opcache/jit/ir/ir_ra.c b/ext/opcache/jit/ir/ir_ra.c index 2e8a8e3f34f3f..23f44482cb8b5 100644 --- a/ext/opcache/jit/ir/ir_ra.c +++ b/ext/opcache/jit/ir/ir_ra.c @@ -610,8 +610,8 @@ int ir_compute_live_ranges(ir_ctx *ctx) len = ir_bitset_len(ctx->vregs_count + 1); bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t)); - /* vregs + tmp + fixed + SRATCH + ALL */ - ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + /* vregs + tmp + fixed + ALL + SCRATCH_N */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*)); #ifdef IR_DEBUG visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1); @@ -1265,8 +1265,8 @@ int ir_compute_live_ranges(ir_ctx *ctx) /* Compute Live Ranges */ ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES; - /* vregs + tmp + fixed + SRATCH + ALL */ - ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); + /* vregs + tmp + fixed + ALL + SCRATCH_N */ + ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_SET_NUM, sizeof(ir_live_interval*)); if (!ctx->arena) { ctx->arena = ir_arena_create(16 * 1024); @@ -2037,8 +2037,8 @@ int ir_coalesce(ir_ctx *ctx) n--; if (n != ctx->vregs_count) { j = ctx->vregs_count - n; - /* vregs + tmp + fixed + SRATCH + ALL */ - for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) { + /* vregs + tmp + fixed + ALL + SCRATCH_N */ + for (i = n + 1; i <= n + IR_REG_SET_NUM; i++) { ctx->live_intervals[i] = ctx->live_intervals[i + j]; if (ctx->live_intervals[i]) { ctx->live_intervals[i]->vreg = i; @@ -2105,7 +2105,7 @@ int ir_compute_dessa_moves(ir_ctx *ctx) * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009, * pp. 114-125, doi: 10.1109/CGO.2009.19. */ -int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) +int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy, void *data) { uint32_t succ, k, n = 0; ir_block *bb, *succ_bb; @@ -2180,7 +2180,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) while ((b = ir_bitset_pop_first(ready, len)) >= 0) { a = pred[b]; c = loc[a]; - emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]); + emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b], data); ir_bitset_excl(todo, b); loc[a] = b; src[b] = dst[b]; @@ -2193,7 +2193,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) break; } IR_ASSERT(b != loc[pred[b]]); - emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0); + emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0, data); loc[b] = 0; ir_bitset_incl(ready, b); } @@ -2211,7 +2211,7 @@ int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy) if (insn->op == IR_PHI) { input = ir_insn_op(insn, k); if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) { - emit_copy(ctx, insn->type, input, ref); + emit_copy(ctx, insn->type, input, ref, data); } } } @@ -2501,8 +2501,9 @@ static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *iva return child; } -static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_alloc_data *data) +static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size) { + ir_reg_alloc_data *data = ctx->data; int32_t ret; IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8); @@ -2601,12 +2602,12 @@ static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_all return ret; } -int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data) +int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type) { - return ir_allocate_small_spill_slot(ctx, ir_type_size[type], data); + return ir_allocate_small_spill_slot(ctx, ir_type_size[type]); } -static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_alloc_data *data) +static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size) { int32_t ret; @@ -2616,7 +2617,7 @@ static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_allo } else if (size > 4 && size < 8) { size = 8; } - return ir_allocate_small_spill_slot(ctx, size, data); + return ir_allocate_small_spill_slot(ctx, size); } /* Align stack allocated data to 16 byte */ @@ -2836,13 +2837,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l /* freeUntilPos[it.reg] = 0 */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg >= IR_REG_SCRATCH) { - if (reg == IR_REG_SCRATCH) { - available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); - } else { - IR_ASSERT(reg == IR_REG_ALL); - available = IR_REGSET_EMPTY; - } + if (reg >= IR_REG_NUM) { + available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]); } else { IR_REGSET_EXCL(available, reg); } @@ -2864,15 +2860,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l if (next) { reg = other->reg; IR_ASSERT(reg >= 0); - if (reg >= IR_REG_SCRATCH) { - ir_regset regset; - - if (reg == IR_REG_SCRATCH) { - regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); - } else { - IR_ASSERT(reg == IR_REG_ALL); - regset = available; - } + if (reg >= IR_REG_NUM) { + ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]); overlapped = IR_REGSET_UNION(overlapped, regset); IR_REGSET_FOREACH(regset, reg) { if (next < freeUntilPos[reg]) { @@ -2922,7 +2911,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l } /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ - scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); + scratch = IR_REGSET_INTERSECTION(available, + ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM]); if (scratch != IR_REGSET_EMPTY) { /* prefer registers that don't conflict with the hints for the following unhandled intervals */ if (1) { @@ -2970,8 +2960,8 @@ static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_l pos = freeUntilPos[i]; reg = i; } else if (freeUntilPos[i] == pos - && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg) - && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) { + && !IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], reg) + && IR_REGSET_IN(ir_scratch_regset[((ir_reg_alloc_data*)(ctx->data))->cc->scratch_reg - IR_REG_NUM], i)) { /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */ pos = freeUntilPos[i]; reg = i; @@ -3077,15 +3067,8 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* nextUsePos[it.reg] = next use of it after start of current */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg >= IR_REG_SCRATCH) { - ir_regset regset; - - if (reg == IR_REG_SCRATCH) { - regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); - } else { - IR_ASSERT(reg == IR_REG_ALL); - regset = available; - } + if (reg >= IR_REG_NUM) { + ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]); IR_REGSET_FOREACH(regset, reg) { blockPos[reg] = nextUsePos[reg] = 0; } IR_REGSET_FOREACH_END(); @@ -3109,18 +3092,11 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li /* freeUntilPos[it.reg] = next intersection of it with current */ reg = other->reg; IR_ASSERT(reg >= 0); - if (reg >= IR_REG_SCRATCH) { + if (reg >= IR_REG_NUM) { ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range); if (overlap) { - ir_regset regset; - - if (reg == IR_REG_SCRATCH) { - regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH); - } else { - IR_ASSERT(reg == IR_REG_ALL); - regset = available; - } + ir_regset regset = IR_REGSET_INTERSECTION(available, ir_scratch_regset[reg - IR_REG_NUM]); IR_REGSET_FOREACH(regset, reg) { if (overlap < nextUsePos[reg]) { nextUsePos[reg] = overlap; @@ -3325,9 +3301,9 @@ static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_li return reg; } -static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *data) { - ir_block *bb = ctx->data; + ir_block *bb = data; ir_tmp_reg tmp_reg; if (to == 0) { @@ -3365,7 +3341,7 @@ static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) return 1; } -static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data) +static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival) { ir_use_pos *use_pos = ival->use_pos; @@ -3417,7 +3393,7 @@ static void ir_assign_bound_spill_slots(ir_ctx *ctx) } } -static int ir_linear_scan(ir_ctx *ctx) +static int ir_linear_scan(ir_ctx *ctx, ir_ref vars) { uint32_t b; ir_block *bb; @@ -3428,8 +3404,6 @@ static int ir_linear_scan(ir_ctx *ctx) int j; ir_live_pos position; ir_reg reg; - ir_reg_alloc_data data; - ir_ref vars = ctx->vars; if (!ctx->live_intervals) { return 0; @@ -3440,19 +3414,11 @@ static int ir_linear_scan(ir_ctx *ctx) for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); if (bb->flags & IR_BB_DESSA_MOVES) { - ctx->data = bb; - ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, bb); } } } - ctx->data = &data; - ctx->stack_frame_size = 0; - data.unused_slot_4 = 0; - data.unused_slot_2 = 0; - data.unused_slot_1 = 0; - data.handled = NULL; - while (vars) { ir_ref var = vars; ir_insn *insn = &ctx->ir_base[var]; @@ -3461,7 +3427,7 @@ static int ir_linear_scan(ir_ctx *ctx) vars = insn->op3; /* list next */ if (insn->op == IR_VAR) { - ir_ref slot = ir_allocate_spill_slot(ctx, insn->type, &data);; + ir_ref slot = ir_allocate_spill_slot(ctx, insn->type); ir_use_list *use_list; ir_ref n, *p; @@ -3484,7 +3450,7 @@ static int ir_linear_scan(ir_ctx *ctx) IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); IR_ASSERT(val->val.i64 < 0x7fffffff); - insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32, &data); + insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32); } } @@ -3492,7 +3458,7 @@ static int ir_linear_scan(ir_ctx *ctx) ival = ctx->live_intervals[j]; if (ival) { if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) - || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) { + || !ir_ival_spill_for_fuse_load(ctx, ival)) { ir_add_to_unhandled(&unhandled, ival); } } @@ -3503,8 +3469,8 @@ static int ir_linear_scan(ir_ctx *ctx) ir_merge_to_unhandled(&unhandled, ival); } - /* vregs + tmp + fixed + SRATCH + ALL */ - for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) { + /* vregs + tmp + fixed + ALL + SCRATCH_N */ + for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_SET_NUM; j++) { ival = ctx->live_intervals[j]; if (ival) { ival->current_range = &ival->range; @@ -3663,7 +3629,7 @@ static int ir_linear_scan(ir_ctx *ctx) ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL}; ir_live_interval *old; - data.handled = handled; + ((ir_reg_alloc_data*)(ctx->data))->handled = handled; active = NULL; while (unhandled) { ival = unhandled; @@ -3701,7 +3667,7 @@ static int ir_linear_scan(ir_ctx *ctx) other = prev ? prev->list_next : active; } - ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data); + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type); if (unhandled && ival->end > unhandled->range.start) { ival->list_next = active; active = ival; @@ -3721,15 +3687,16 @@ static int ir_linear_scan(ir_ctx *ctx) } } } - data.handled = NULL; + ((ir_reg_alloc_data*)(ctx->data))->handled = NULL; } } #ifdef IR_TARGET_X86 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { - ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data); - } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { - ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data); + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE); + } else if ((ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) + && ((ir_reg_alloc_data*)(ctx->data))->cc->fp_ret_reg == IR_REG_NONE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type); } else { ctx->ret_slot = -1; } @@ -4033,17 +4000,18 @@ static void assign_regs(ir_ctx *ctx) } while (ival); } + const ir_call_conv_dsc *cc = ((ir_reg_alloc_data*)(ctx->data))->cc; if (ctx->fixed_stack_frame_size != -1) { ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset; - if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), + if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs), ctx->used_preserved_regs)) { // TODO: Preserved reg and fixed frame conflict ??? // IR_ASSERT(0 && "Preserved reg and fixed frame conflict"); } } else { ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset, - IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED), - (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED)); + IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, cc->preserved_regs), + (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : cc->preserved_regs)); } ir_fix_stack_frame(ctx); @@ -4051,9 +4019,24 @@ static void assign_regs(ir_ctx *ctx) int ir_reg_alloc(ir_ctx *ctx) { - if (ir_linear_scan(ctx)) { + ir_reg_alloc_data data; + ir_ref vars = ctx->vars; + + data.cc = ir_get_call_conv_dsc(ctx->flags); + data.unused_slot_4 = 0; + data.unused_slot_2 = 0; + data.unused_slot_1 = 0; + data.handled = NULL; + + ctx->data = &data; + ctx->stack_frame_size = 0; + + if (ir_linear_scan(ctx, vars)) { assign_regs(ctx); + ctx->data = NULL; return 1; } + + ctx->data = NULL; return 0; } diff --git a/ext/opcache/jit/ir/ir_save.c b/ext/opcache/jit/ir/ir_save.c index dd955172950c8..51d7f96e518fa 100644 --- a/ext/opcache/jit/ir/ir_save.c +++ b/ext/opcache/jit/ir/ir_save.c @@ -18,6 +18,38 @@ void ir_print_proto(const ir_ctx *ctx, ir_ref func_proto, FILE *f) } } +void ir_print_call_conv(uint32_t flags, FILE *f) +{ + switch (flags & IR_CALL_CONV_MASK) { + case IR_CC_BUILTIN: + fprintf(f, " __builtin"); + break; + case IR_CC_FASTCALL: + fprintf(f, " __fastcall"); + break; + case IR_CC_PRESERVE_NONE: + fprintf(f, " __preserve_none"); + break; +#if defined(IR_TARGET_X64) + case IR_CC_X86_64_SYSV: + fprintf(f, " __sysv"); + break; + case IR_CC_X86_64_MS: + fprintf(f, " __win64"); + break; +#elif defined(IR_TARGET_AARCH64) + case IR_CC_AARCH64_SYSV: + fprintf(f, " __sysv"); + break; + case IR_CC_AARCH64_DARWIN: + fprintf(f, " __darwin"); + break; +#endif + default: + IR_ASSERT((flags & IR_CALL_CONV_MASK) == IR_CC_DEFAULT); + } +} + void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f) { uint32_t j; @@ -35,11 +67,7 @@ void ir_print_proto_ex(uint8_t flags, ir_type ret_type, uint32_t params_count, c fprintf(f, "..."); } fprintf(f, "): %s", ir_type_cname[ret_type]); - if (flags & IR_FASTCALL_FUNC) { - fprintf(f, " __fastcall"); - } else if (flags & IR_BUILTIN_FUNC) { - fprintf(f, " __builtin"); - } + ir_print_call_conv(flags, f); if (flags & IR_CONST_FUNC) { fprintf(f, " __const"); } else if (flags & IR_PURE_FUNC) { diff --git a/ext/opcache/jit/ir/ir_x86.dasc b/ext/opcache/jit/ir/ir_x86.dasc index 7f714dd11d27c..9072b0dd59147 100644 --- a/ext/opcache/jit/ir/ir_x86.dasc +++ b/ext/opcache/jit/ir/ir_x86.dasc @@ -882,8 +882,7 @@ IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_ |.endmacro typedef struct _ir_backend_data { - ir_reg_alloc_data ra_data; - uint32_t dessa_from_block; + ir_reg_alloc_data ra_data; dasm_State *dasm_state; ir_bitset emit_constants; int rodata_label, jmp_table_label; @@ -897,6 +896,13 @@ typedef struct _ir_backend_data { bool resolved_label_syms; } ir_backend_data; +typedef struct _ir_x86_64_sysv_va_list { + uint32_t gp_offset; + uint32_t fp_offset; + void *overflow_arg_area; + void *reg_save_area; +} ir_x86_64_sysv_va_list; + #define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ #name64, #define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ @@ -908,9 +914,19 @@ typedef struct _ir_backend_data { #define IR_FP_REG_NAME(code, name) \ #name, -static const char *_ir_reg_name[IR_REG_NUM] = { +static const char *_ir_reg_name[] = { IR_GP_REGS(IR_GP_REG_NAME) IR_FP_REGS(IR_FP_REG_NAME) + "ALL", + "SCRATCH", +#ifdef IR_TARGET_X64 +# ifdef _WIN64 + "SCRATCH_SYSV", +# else + "SCRATCH_MS", +# endif + "SCRATCH_PN", /* preserve none */ +#endif }; static const char *_ir_reg_name32[IR_REG_NUM] = { @@ -925,66 +941,11 @@ static const char *_ir_reg_name8[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME8) }; -/* Calling Convention */ -#ifdef _WIN64 - -static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { - IR_REG_INT_ARG1, - IR_REG_INT_ARG2, - IR_REG_INT_ARG3, - IR_REG_INT_ARG4, -}; - -static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { - IR_REG_FP_ARG1, - IR_REG_FP_ARG2, - IR_REG_FP_ARG3, - IR_REG_FP_ARG4, -}; - -#elif defined(IR_TARGET_X64) - -static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { - IR_REG_INT_ARG1, - IR_REG_INT_ARG2, - IR_REG_INT_ARG3, - IR_REG_INT_ARG4, - IR_REG_INT_ARG5, - IR_REG_INT_ARG6, -}; - -static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { - IR_REG_FP_ARG1, - IR_REG_FP_ARG2, - IR_REG_FP_ARG3, - IR_REG_FP_ARG4, - IR_REG_FP_ARG5, - IR_REG_FP_ARG6, - IR_REG_FP_ARG7, - IR_REG_FP_ARG8, -}; - -#else - -static const int8_t *_ir_int_reg_params = NULL; -static const int8_t *_ir_fp_reg_params = NULL; -static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { - IR_REG_INT_FCARG1, - IR_REG_INT_FCARG2, -}; -static const int8_t *_ir_fp_fc_reg_params = NULL; - -#endif - const char *ir_reg_name(int8_t reg, ir_type type) { if (reg >= IR_REG_NUM) { - if (reg == IR_REG_SCRATCH) { - return "SCRATCH"; - } else { - IR_ASSERT(reg == IR_REG_ALL); - return "ALL"; - } + IR_ASSERT((uint8_t)reg < sizeof(_ir_reg_name) / sizeof(_ir_reg_name[0])); + return _ir_reg_name[reg]; } IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (type == IR_VOID) { @@ -1002,6 +963,159 @@ const char *ir_reg_name(int8_t reg, ir_type type) } } +/* Calling Conventions */ +#ifdef IR_TARGET_X64 + +# ifdef _WIN64 +# define IR_REG_SCRATH_X86_64_MS IR_REG_SET_1 +# define IR_REG_SCRATH_X86_64_SYSV IR_REG_SET_2 +# define IR_REG_SCRATH_X86_64_PN IR_REG_SET_3 +# else +# define IR_REG_SCRATH_X86_64_SYSV IR_REG_SET_1 +# define IR_REG_SCRATH_X86_64_MS IR_REG_SET_2 +# define IR_REG_SCRATH_X86_64_PN IR_REG_SET_3 +# endif + +# define IR_REGSET_SCRATCH_X86_64_SYSV \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | \ + IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) | \ + IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) | \ + IR_REGSET_FP) + +# define IR_REGSET_SCRATCH_X86_64_WIN \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | \ + IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) | \ + IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5)) + +# define IR_REGSET_SCRATCH_X86_64_PN \ + (IR_REGSET_DIFFERENCE(IR_REGSET_GP, IR_REGSET(IR_REG_RBP)) | IR_REGSET_FP) + +const ir_regset ir_scratch_regset[] = { + IR_REGSET_GP | IR_REGSET_FP, +# ifdef _WIN64 + IR_REGSET_SCRATCH_X86_64_WIN, + IR_REGSET_SCRATCH_X86_64_SYSV, +# else + IR_REGSET_SCRATCH_X86_64_SYSV, + IR_REGSET_SCRATCH_X86_64_WIN, +# endif + IR_REGSET_SCRATCH_X86_64_PN, +}; + +const ir_call_conv_dsc ir_call_conv_x86_64_ms = { + 0, /* cleanup_stack_by_callee */ + 0, /* pass_struct_by_val */ + 0, /* sysv_varargs */ + 1, /* shadow_param_regs */ + 32, /* shadow_store_size */ + 4, /* int_param_regs_count */ + 4, /* fp_param_regs_count */ + IR_REG_RAX, /* int_ret_reg */ + IR_REG_XMM0, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_SCRATH_X86_64_MS, + (const int8_t[4]){IR_REG_RCX, IR_REG_RDX, IR_REG_R8, IR_REG_R9}, + (const int8_t[4]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3}, + IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI) | + IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) | IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15), +}; + +const ir_call_conv_dsc ir_call_conv_x86_64_sysv = { + 0, /* cleanup_stack_by_callee */ + 1, /* pass_struct_by_val */ + 1, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 6, /* int_param_regs_count */ + 8, /* fp_param_regs_count */ + IR_REG_RAX, /* int_ret_reg */ + IR_REG_XMM0, /* fp_ret_reg */ + IR_REG_RAX, /* fp_varargs_reg */ + IR_REG_SCRATH_X86_64_SYSV, + (const int8_t[6]){IR_REG_RDI, IR_REG_RSI, IR_REG_RDX, IR_REG_RCX, IR_REG_R8, IR_REG_R9}, + (const int8_t[8]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3, + IR_REG_XMM4, IR_REG_XMM5, IR_REG_XMM6, IR_REG_XMM7}, + IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15), + +}; + +const ir_call_conv_dsc ir_call_conv_x86_64_preserve_none = { + 0, /* cleanup_stack_by_callee */ + 1, /* pass_struct_by_val */ + 1, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 12, /* int_param_regs_count */ + 8, /* fp_param_regs_count */ + IR_REG_RAX, /* int_ret_reg */ + IR_REG_XMM0, /* fp_ret_reg */ + IR_REG_RAX, /* fp_varargs_reg */ + IR_REG_SCRATH_X86_64_PN, + (const int8_t[12]){IR_REG_R12, IR_REG_R13, IR_REG_R14, IR_REG_R15, + IR_REG_RDI, IR_REG_RSI, IR_REG_RDX, IR_REG_RCX, IR_REG_R8, IR_REG_R9, + IR_REG_R11, IR_REG_RAX}, + (const int8_t[8]){IR_REG_XMM0, IR_REG_XMM1, IR_REG_XMM2, IR_REG_XMM3, + IR_REG_XMM4, IR_REG_XMM5, IR_REG_XMM6, IR_REG_XMM7}, + IR_REGSET(IR_REG_RBP), + +}; + +# ifdef _WIN64 +# define ir_call_conv_default ir_call_conv_x86_64_ms +# else +# define ir_call_conv_default ir_call_conv_x86_64_sysv +# endif + +#else + +# define IR_REG_SCRATCH_X86 IR_REG_SET_1 + +# define IR_REGSET_SCRATCH_X86 \ + (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP) + +const ir_regset ir_scratch_regset[] = { + IR_REGSET_INTERVAL(IR_REG_GP_FIRST, IR_REG_FP_LAST), + IR_REGSET_SCRATCH_X86, +}; + +const ir_call_conv_dsc ir_call_conv_x86_cdecl = { + 0, /* cleanup_stack_by_callee */ + 1, /* pass_struct_by_val */ + 0, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 0, /* int_param_regs_count */ + 0, /* fp_param_regs_count */ + IR_REG_RAX, /* int_ret_reg */ + IR_REG_NONE, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_SCRATCH_X86, + NULL, + NULL, + IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI), +}; + +const ir_call_conv_dsc ir_call_conv_x86_fastcall = { + 1, /* cleanup_stack_by_callee */ + 1, /* pass_struct_by_val */ + 0, /* sysv_varargs */ + 0, /* shadow_param_regs */ + 0, /* shadow_store_size */ + 2, /* int_param_regs_count */ + 0, /* fp_param_regs_count */ + IR_REG_RAX, /* int_ret_reg */ + IR_REG_NONE, /* fp_ret_reg */ + IR_REG_NONE, /* fp_varargs_reg */ + IR_REG_SCRATCH_X86, + (const int8_t[4]){IR_REG_RCX, IR_REG_RDX}, + NULL, + IR_REGSET(IR_REG_RBX) | IR_REGSET(IR_REG_RBP) | IR_REGSET(IR_REG_RSI) | IR_REGSET(IR_REG_RDI), +}; + +# define ir_call_conv_default ir_call_conv_x86_cdecl + +#endif + #define IR_RULES(_) \ _(CMP_INT) \ _(CMP_FP) \ @@ -1156,6 +1270,8 @@ int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *co const ir_insn *insn; int n = 0; int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; + const ir_proto_t *proto; + const ir_call_conv_dsc *cc; constraints->def_reg = IR_REG_NONE; constraints->hints_count = 0; @@ -1391,21 +1507,48 @@ op2_const: break; case IR_CALL: insn = &ctx->ir_base[ref]; - if (IR_IS_TYPE_INT(insn->type)) { - constraints->def_reg = IR_REG_INT_RET1; -#ifdef IR_REG_FP_RET1 - } else { - constraints->def_reg = IR_REG_FP_RET1; + proto = ir_call_proto(ctx, insn); + cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + if (insn->type != IR_VOID) { + if (IR_IS_TYPE_INT(insn->type)) { + constraints->def_reg = cc->int_ret_reg; + } else { + IR_ASSERT(IR_IS_TYPE_FP(insn->type)); +#ifdef IR_TARGET_X86 + if (cc->fp_ret_reg == IR_REG_NONE) { + ctx->flags2 |= IR_HAS_FP_RET_SLOT; + } else #endif + { + constraints->def_reg = cc->fp_ret_reg; + } + } } - constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); + constraints->tmp_regs[0] = IR_SCRATCH_REG(cc->scratch_reg, IR_USE_SUB_REF, IR_DEF_SUB_REF); n = 1; - IR_FALLTHROUGH; + if (!IR_IS_CONST_REF(insn->op2) + && proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) { + constraints->tmp_regs[n] = IR_SCRATCH_REG(cc->fp_varargs_reg, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } + if (insn->inputs_count > 2) { + goto get_arg_hints; + } + flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; + break; case IR_TAILCALL: insn = &ctx->ir_base[ref]; + proto = ir_call_proto(ctx, insn); + cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + if (!IR_IS_CONST_REF(insn->op2) + && proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) { + constraints->tmp_regs[n] = IR_SCRATCH_REG(cc->fp_varargs_reg, IR_LOAD_SUB_REF, IR_USE_SUB_REF); + n++; + } if (insn->inputs_count > 2) { +get_arg_hints: constraints->hints[2] = IR_REG_NONE; - constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); + constraints->hints_count = ir_get_args_regs(ctx, insn, cc, constraints->hints); if (!IR_IS_CONST_REF(insn->op2)) { constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); n++; @@ -1533,7 +1676,8 @@ op2_const: break; case IR_EXITCALL: flags = IR_USE_MUST_BE_IN_REG; - constraints->def_reg = IR_REG_INT_RET1; + cc = ir_get_call_conv_dsc(ctx->flags); + constraints->def_reg = cc->int_ret_reg; break; case IR_IF_INT: case IR_GUARD: @@ -1548,16 +1692,21 @@ op2_const: flags = IR_OP3_SHOULD_BE_IN_REG; break; case IR_RETURN_INT: + cc = ir_get_call_conv_dsc(ctx->flags); flags = IR_OP2_SHOULD_BE_IN_REG; - constraints->hints[2] = IR_REG_INT_RET1; + constraints->hints[2] = cc->int_ret_reg; constraints->hints_count = 3; break; case IR_RETURN_FP: -#ifdef IR_REG_FP_RET1 - flags = IR_OP2_SHOULD_BE_IN_REG; - constraints->hints[2] = IR_REG_FP_RET1; - constraints->hints_count = 3; + cc = ir_get_call_conv_dsc(ctx->flags); +#ifdef IR_TARGET_X86 + if (cc->fp_ret_reg != IR_REG_NONE) #endif + { + flags = IR_OP2_SHOULD_BE_IN_REG; + constraints->hints[2] = cc->fp_ret_reg; + constraints->hints_count = 3; + } break; case IR_SNAPSHOT: flags = 0; @@ -1888,20 +2037,12 @@ static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) } } -static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) +static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root) { - if (direct) { - if (insn->op == IR_LT || insn->op == IR_LE) { - /* swap operands to avoid P flag check */ - ir_swap_ops(insn); - insn->op ^= 3; - } - } else { - if (insn->op == IR_GT || insn->op == IR_GE) { - /* swap operands to avoid P flag check */ - ir_swap_ops(insn); - insn->op ^= 3; - } + if (insn->op == IR_LT || insn->op == IR_LE || insn->op == IR_UGT || insn->op == IR_UGE) { + /* swap operands to avoid P flag check */ + ir_swap_ops(insn); + insn->op ^= 3; } if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { /* pass */ @@ -1926,7 +2067,7 @@ static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func) { const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); - if (proto->flags & IR_BUILTIN_FUNC) { + if ((proto->flags & IR_CALL_CONV_MASK) == IR_CC_BUILTIN) { size_t name_len; const char *name = ir_get_strl(ctx, func->val.name, &name_len); @@ -2452,15 +2593,23 @@ binop_fp: } } ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; -#ifndef IR_REG_FP_RET1 - if (IR_IS_TYPE_FP(insn->type)) { - ctx->flags2 |= IR_HAS_FP_RET_SLOT; - } -#endif IR_FALLTHROUGH; case IR_TAILCALL: case IR_IJMP: - ir_match_fuse_load(ctx, insn->op2, ref); + if (!IR_IS_CONST_REF(insn->op2)) { + if (ctx->ir_base[insn->op2].op == IR_PROTO) { + if (IR_IS_CONST_REF(ctx->ir_base[insn->op2].op1)) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_PROTO; + } else { + ir_match_fuse_load(ctx, ctx->ir_base[insn->op2].op1, ref); + if (ctx->rules[ctx->ir_base[insn->op2].op1] & IR_FUSED) { + ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_PROTO; + } + } + } else { + ir_match_fuse_load(ctx, insn->op2, ref); + } + } return insn->op; case IR_IGOTO: if (ctx->ir_base[insn->op1].op == IR_MERGE || ctx->ir_base[insn->op1].op == IR_LOOP_BEGIN) { @@ -2478,11 +2627,12 @@ binop_fp: case IR_VAR: return IR_STATIC_ALLOCA; case IR_PARAM: -#ifndef _WIN64 if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { - return IR_STATIC_ALLOCA; + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(ctx->flags); + if (cc->pass_struct_by_val) { + return IR_STATIC_ALLOCA; + } } -#endif return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: /* alloca() may be used only in functions */ @@ -2767,7 +2917,7 @@ store_int: return IR_CMP_AND_BRANCH_INT; } else { /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ - ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1); + ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_CMP_AND_BRANCH_FP; } @@ -2864,7 +3014,7 @@ store_int: ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT; return IR_COND_CMP_INT; } else { - ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref, 1); + ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref); ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP; return IR_COND_CMP_FP; } @@ -2956,7 +3106,7 @@ store_int: return IR_GUARD_CMP_INT; } else { /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ - ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); + ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_GUARD_CMP_FP; } @@ -3907,59 +4057,68 @@ static void ir_emit_prologue(ir_ctx *ctx) } } if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { -#if defined(_WIN64) - ir_reg fp; - int offset; + const ir_call_conv_dsc *cc = data->ra_data.cc; - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - offset = sizeof(void*) * 2; - } else { - fp = IR_REG_STACK_POINTER; - offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); + if (cc->shadow_store_size) { + ir_reg fp; + int shadow_store; + int offset = 0; + int n = 0; + + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + shadow_store = sizeof(void*) * 2; + } else { + fp = IR_REG_STACK_POINTER; + shadow_store = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); + } + + while (offset < cc->shadow_store_size && n < cc->int_param_regs_count) { + | mov [Ra(fp)+shadow_store+offset], Ra(cc->int_param_regs[n]) + n++; + offset += sizeof(void*); + } } - | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) - | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) - | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) - | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) -#elif defined(IR_TARGET_X64) + + if (cc->sysv_varargs) { + IR_ASSERT(sizeof(void*) == 8); +#ifdef IR_TARGET_X64 |.if X64 - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; - uint32_t i; - ir_reg fp; - int offset; + int32_t i; + ir_reg fp; + int offset; - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; - offset = -(ctx->stack_frame_size - ctx->locals_area_size); - } else { - fp = IR_REG_STACK_POINTER; - offset = ctx->locals_area_size + ctx->call_stack_size; - } + offset = -(ctx->stack_frame_size - ctx->locals_area_size); + } else { + fp = IR_REG_STACK_POINTER; + offset = ctx->locals_area_size + ctx->call_stack_size; + } - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - /* skip named args */ - offset += sizeof(void*) * ctx->gp_reg_params; - for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { - | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) - offset += sizeof(void*); + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { + /* skip named args */ + offset += sizeof(void*) * ctx->gp_reg_params; + for (i = ctx->gp_reg_params; i < cc->int_param_regs_count; i++) { + | mov qword [Ra(fp)+offset], Rq(cc->int_param_regs[i]) + offset += sizeof(void*); + } } - } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - | test al, al - | je >1 - /* skip named args */ - offset += 16 * ctx->fp_reg_params; - for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { - | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) - offset += 16; + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + | test al, al + | je >1 + /* skip named args */ + offset += 16 * ctx->fp_reg_params; + for (i = ctx->fp_reg_params; i < cc->fp_param_regs_count; i++) { + | movaps [Ra(fp)+offset], xmm(cc->fp_param_regs[i]-IR_REG_FP_FIRST) + offset += 16; + } + |1: } - |1: - } |.endif #endif + } } } @@ -6995,27 +7154,26 @@ static void ir_emit_return_void(ir_ctx *ctx) ir_emit_epilogue(ctx); -#ifdef IR_TARGET_X86 - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { + if (data->ra_data.cc->cleanup_stack_by_callee && ctx->param_stack_size) { | ret ctx->param_stack_size - return; + } else { + | ret } -#endif - - | ret } static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; + ir_reg ret_reg = data->ra_data.cc->int_ret_reg; ir_reg op2_reg = ctx->regs[ref][2]; - if (op2_reg != IR_REG_INT_RET1) { + if (op2_reg != ret_reg) { ir_type type = ctx->ir_base[insn->op2].type; if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { - ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); + ir_emit_mov(ctx, type, ret_reg, op2_reg); } else { - ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); + ir_emit_load(ctx, type, ret_reg, insn->op2); } } ir_emit_return_void(ctx); @@ -7023,64 +7181,68 @@ static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { + ir_backend_data *data = ctx->data; ir_reg op2_reg = ctx->regs[ref][2]; ir_type type = ctx->ir_base[insn->op2].type; + ir_reg ret_reg = data->ra_data.cc->fp_ret_reg; -#ifdef IR_REG_FP_RET1 - if (op2_reg != IR_REG_FP_RET1) { + if (op2_reg != ret_reg && ret_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { - ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); + ir_emit_fp_mov(ctx, type, ret_reg, op2_reg); } else { - ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); + ir_emit_load(ctx, type, ret_reg, insn->op2); } } -#else - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - if (IR_IS_CONST_REF(insn->op2)) { - ir_insn *value = &ctx->ir_base[insn->op2]; +#ifdef IR_TARGET_X86 + if (ret_reg == IR_REG_NONE) { + dasm_State **Dst = &data->dasm_state; - if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) { - | fldz - } else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) { - | fld1 - } else { - int label = ir_get_const_label(ctx, insn->op2); + if (IR_IS_CONST_REF(insn->op2)) { + ir_insn *value = &ctx->ir_base[insn->op2]; + + if ((type == IR_FLOAT && value->val.f == 0.0) || (type == IR_DOUBLE && value->val.d == 0.0)) { + | fldz + } else if ((type == IR_FLOAT && value->val.f == 1.0) || (type == IR_DOUBLE && value->val.d == 1.0)) { + | fld1 + } else { + int label = ir_get_const_label(ctx, insn->op2); + + if (type == IR_DOUBLE) { + | fld qword [=>label] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [=>label] + } + } + } else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { + ir_reg fp; + int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); if (type == IR_DOUBLE) { - | fld qword [=>label] + | fld qword [Ra(fp)+offset] } else { IR_ASSERT(type == IR_FLOAT); - | fld dword [=>label] + | fld dword [Ra(fp)+offset] } - } - } else if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { - ir_reg fp; - int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); - - if (type == IR_DOUBLE) { - | fld qword [Ra(fp)+offset] } else { - IR_ASSERT(type == IR_FLOAT); - | fld dword [Ra(fp)+offset] - } - } else { - int32_t offset = ctx->ret_slot; - ir_reg fp; + int32_t offset = ctx->ret_slot; + ir_reg fp; - IR_ASSERT(offset != -1); - offset = IR_SPILL_POS_TO_OFFSET(offset); - fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); - if (type == IR_DOUBLE) { - | fld qword [Ra(fp)+offset] - } else { - IR_ASSERT(type == IR_FLOAT); - | fld dword [Ra(fp)+offset] + IR_ASSERT(offset != -1); + offset = IR_SPILL_POS_TO_OFFSET(offset); + fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); + if (type == IR_DOUBLE) { + | fld qword [Ra(fp)+offset] + } else { + IR_ASSERT(type == IR_FLOAT); + | fld dword [Ra(fp)+offset] + } } } #endif + ir_emit_return_void(ctx); } @@ -8555,327 +8717,323 @@ static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + if (!cc->sysv_varargs) { + ir_reg fp; + int arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; + } + | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] + | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) } else { - fp = IR_REG_STACK_POINTER; - arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; - } - | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] - | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) -#elif defined(IR_TARGET_X64) + IR_ASSERT(sizeof(void*) == 8); +#ifdef IR_TARGET_X64 |.if X64 - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg fp; - int reg_save_area_offset; - int overflow_arg_area_offset; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - bool have_reg_save_area = 0; - int32_t offset; + ir_reg fp; + int reg_save_area_offset; + int overflow_arg_area_offset; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + bool have_reg_save_area = 0; + int32_t offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (ctx->flags & IR_USE_FRAME_POINTER) { - fp = IR_REG_FRAME_POINTER; - reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size); - overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; - } else { - fp = IR_REG_STACK_POINTER; - reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; - overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; - } + if (ctx->flags & IR_USE_FRAME_POINTER) { + fp = IR_REG_FRAME_POINTER; + reg_save_area_offset = -(ctx->stack_frame_size - ctx->locals_area_size); + overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; + } else { + fp = IR_REG_STACK_POINTER; + reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; + overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; + } - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] - have_reg_save_area = 1; - /* Set va_list.gp_offset */ - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params - } else { - reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; - /* Set va_list.gp_offset */ - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * IR_REG_INT_ARGS - } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - if (!have_reg_save_area) { + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] have_reg_save_area = 1; + /* Set va_list.gp_offset */ + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params + } else { + reg_save_area_offset -= sizeof(void*) * cc->int_param_regs_count; + /* Set va_list.gp_offset */ + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], sizeof(void*) * cc->int_param_regs_count } - /* Set va_list.fp_offset */ - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params - } else { - /* Set va_list.fp_offset */ - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS - } - if (have_reg_save_area) { - /* Set va_list.reg_save_area */ - | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) - } - | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] - /* Set va_list.overflow_arg_area */ - | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + if (!have_reg_save_area) { + | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] + have_reg_save_area = 1; + } + /* Set va_list.fp_offset */ + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], sizeof(void*) * cc->int_param_regs_count + 16 * ctx->fp_reg_params + } else { + /* Set va_list.fp_offset */ + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], sizeof(void*) * cc->int_param_regs_count + 16 * cc->fp_param_regs_count + } + if (have_reg_save_area) { + /* Set va_list.reg_save_area */ + | mov qword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))], Ra(tmp_reg) + } + | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] + /* Set va_list.overflow_arg_area */ + | mov qword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg) |.endif -#else - IR_ASSERT(0 && "NIY va_start"); #endif + } } static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_reg tmp_reg = ctx->regs[def][1]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg op3_reg = ctx->regs[def][3]; - int32_t op2_offset, op3_offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + if (!cc->sysv_varargs) { + ir_reg tmp_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + int32_t op2_offset, op3_offset; + + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + op2_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - op2_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (op3_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op3_reg)) { - op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + if (op3_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op3_reg)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + } + op3_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); + op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); } - op3_offset = 0; + | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] + | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) } else { - IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); - op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); - } - | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] - | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) -#elif defined(IR_TARGET_X64) + IR_ASSERT(sizeof(void*) == 8); +#ifdef IR_TARGET_X64 |.if X64 - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_reg tmp_reg = ctx->regs[def][1]; - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg op3_reg = ctx->regs[def][3]; - int32_t op2_offset, op3_offset; + ir_reg tmp_reg = ctx->regs[def][1]; + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg op3_reg = ctx->regs[def][3]; + int32_t op2_offset, op3_offset; - IR_ASSERT(tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - op2_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (op3_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op3_reg)) { - op3_reg = IR_REG_NUM(op3_reg); - ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + IR_ASSERT(tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + op2_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op2_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - op3_offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); - op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); - } - | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))] - | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) - | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, fp_offset))] - | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, fp_offset))], Ra(tmp_reg) - | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, overflow_arg_area))] - | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) - | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, reg_save_area))] - | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) + if (op3_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op3_reg)) { + op3_reg = IR_REG_NUM(op3_reg); + ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); + } + op3_offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); + op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + op3_offset = ir_local_offset(ctx, &ctx->ir_base[insn->op3]); + } + | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))] + | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], Rd(tmp_reg) + | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))] + | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], Ra(tmp_reg) + | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))] + | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg) + | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))] + | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))], Ra(tmp_reg) |.endif -#else - IR_ASSERT(0 && "NIY va_copy"); #endif + } } static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { -#if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; - IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + if (!cc->sysv_varargs) { + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; + + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] -#ifdef _WIN64 - if (def_reg != IR_REG_NONE) { - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); - } - | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) -#else - if (!insn->op3) { - if (def_reg != IR_REG_NONE) { - ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] + if (!cc->pass_struct_by_val || !insn->op3) { + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); + } + | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) + } else { + int size = IR_VA_ARG_SIZE(insn->op3); + + if (def_reg != IR_REG_NONE) { + IR_ASSERT(type == IR_ADDR); + int align = IR_VA_ARG_ALIGN(insn->op3); + + if (align > (int)sizeof(void*)) { + | add Ra(tmp_reg), (align-1) + | and Ra(tmp_reg), ~(align-1) + } + | mov Ra(def_reg), Ra(tmp_reg) + } + | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) + } + | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); } - | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) } else { - int size = IR_VA_ARG_SIZE(insn->op3); + IR_ASSERT(sizeof(void*) == 8); +#ifdef IR_TARGET_X64 +|.if X64 + ir_type type = insn->type; + ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); + ir_reg op2_reg = ctx->regs[def][2]; + ir_reg tmp_reg = ctx->regs[def][3]; + int32_t offset; - if (def_reg != IR_REG_NONE) { + IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); + if (op2_reg != IR_REG_NONE) { + if (IR_REG_SPILLED(op2_reg)) { + op2_reg = IR_REG_NUM(op2_reg); + ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + } + offset = 0; + } else { + IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); + op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; + offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); + } + if (insn->op3) { + /* long struct arguemnt */ IR_ASSERT(type == IR_ADDR); int align = IR_VA_ARG_ALIGN(insn->op3); + int size = IR_VA_ARG_SIZE(insn->op3); + | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))] if (align > (int)sizeof(void*)) { | add Ra(tmp_reg), (align-1) | and Ra(tmp_reg), ~(align-1) } - | mov Ra(def_reg), Ra(tmp_reg) - } - | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) - } -#endif - | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) - if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } -#elif defined(IR_TARGET_X64) -|.if X64 - ir_backend_data *data = ctx->data; - dasm_State **Dst = &data->dasm_state; - ir_type type = insn->type; - ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); - ir_reg op2_reg = ctx->regs[def][2]; - ir_reg tmp_reg = ctx->regs[def][3]; - int32_t offset; - - IR_ASSERT((def_reg != IR_REG_NONE || ctx->use_lists[def].count == 1) && tmp_reg != IR_REG_NONE); - if (op2_reg != IR_REG_NONE) { - if (IR_REG_SPILLED(op2_reg)) { - op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); - } - offset = 0; - } else { - IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); - op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; - offset = ir_local_offset(ctx, &ctx->ir_base[insn->op2]); - } - if (insn->op3) { - /* long struct arguemnt */ - IR_ASSERT(type == IR_ADDR); - int align = IR_VA_ARG_ALIGN(insn->op3); - int size = IR_VA_ARG_SIZE(insn->op3); - - | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] - if (align > (int)sizeof(void*)) { - | add Ra(tmp_reg), (align-1) - | and Ra(tmp_reg), ~(align-1) - } - if (def_reg != IR_REG_NONE) { - | mov Ra(def_reg), Ra(tmp_reg) - } - | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) - | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) - } else if (IR_IS_TYPE_INT(type)) { - | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))] - | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS - | jge >1 - | add Rd(tmp_reg), sizeof(void*) - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) - | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] - | jmp >2 - |1: - | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] - | add Ra(tmp_reg), sizeof(void*) - | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) - |2: - if (def_reg != IR_REG_NONE) { - if (ir_type_size[type] == 8) { - | mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)] - } else { - | mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)] + if (def_reg != IR_REG_NONE) { + | mov Ra(def_reg), Ra(tmp_reg) + } + | add Ra(tmp_reg), IR_ALIGNED_SIZE(size, sizeof(void*)) + | mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg) + } else if (IR_IS_TYPE_INT(type)) { + | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))] + | cmp Rd(tmp_reg), sizeof(void*) * cc->int_param_regs_count + | jge >1 + | add Rd(tmp_reg), sizeof(void*) + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, gp_offset))], Rd(tmp_reg) + | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))] + | jmp >2 + |1: + | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))] + | add Ra(tmp_reg), sizeof(void*) + | mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg) + |2: + if (def_reg != IR_REG_NONE) { + if (ir_type_size[type] == 8) { + | mov Rq(def_reg), qword [Ra(tmp_reg)-sizeof(void*)] + } else { + | mov Rd(def_reg), dword [Ra(tmp_reg)-sizeof(void*)] + } } + } else { + | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))] + | cmp Rd(tmp_reg), sizeof(void*) * cc->int_param_regs_count + 16 * cc->fp_param_regs_count + | jge >1 + | add Rd(tmp_reg), 16 + | mov dword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, fp_offset))], Rd(tmp_reg) + | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, reg_save_area))] + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); + } + | jmp >2 + |1: + | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))] + if (def_reg != IR_REG_NONE) { + ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + } + | add Ra(tmp_reg), 8 + | mov aword [Ra(op2_reg)+(offset+offsetof(ir_x86_64_sysv_va_list, overflow_arg_area))], Ra(tmp_reg) + |2: } - } else { - | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] - | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS - | jge >1 - | add Rd(tmp_reg), 16 - | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg) - | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] - if (def_reg != IR_REG_NONE) { - ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); - } - | jmp >2 - |1: - | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] - if (def_reg != IR_REG_NONE) { - ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); + if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, type, def, def_reg); } - | add Ra(tmp_reg), 8 - | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) - |2: - } - if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, type, def, def_reg); - } |.endif -#else - IR_ASSERT(0 && "NIY va_arg"); #endif + } } static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) @@ -9104,7 +9262,9 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) val = &ctx->ir_base[use_insn->op2]; IR_ASSERT(!IR_IS_SYM_CONST(val->op)); label = ir_skip_empty_target_blocks(ctx, use_block); - if (IR_IS_32BIT(type, val->val)) { + if (val->val.u64 == 0) { + | ASM_REG_REG_OP test, type, op2_reg, op2_reg + } else if (IR_IS_32BIT(type, val->val)) { | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 } else { IR_ASSERT(sizeof(void*) == 8); @@ -9158,25 +9318,14 @@ static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) } } -static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_ptr) +static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, const ir_call_conv_dsc *cc, int *copy_stack_ptr) { int j, n; ir_type type; int int_param = 0; int fp_param = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; int32_t used_stack = 0; -#ifdef _WIN64 int32_t copy_stack = 0; -#endif - -#ifdef IR_HAVE_FASTCALL - if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - } -#endif n = insn->inputs_count; for (j = 3; j <= n; j++) { @@ -9187,55 +9336,49 @@ static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn, int *copy_stack_pt int size = arg->op2; int align = arg->op3; -#ifdef _WIN64 - copy_stack += size; - align = IR_MAX((int)sizeof(void*), align); - copy_stack = IR_ALIGNED_SIZE(copy_stack, align); - type = IR_ADDR; -#else - align = IR_MAX((int)sizeof(void*), align); - used_stack = IR_ALIGNED_SIZE(used_stack, align); - used_stack += size; - used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*)); - continue; -#endif + if (!cc->pass_struct_by_val) { + copy_stack += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack = IR_ALIGNED_SIZE(copy_stack, align); + type = IR_ADDR; + } else { + align = IR_MAX((int)sizeof(void*), align); + used_stack = IR_ALIGNED_SIZE(used_stack, align); + used_stack += size; + used_stack = IR_ALIGNED_SIZE(used_stack, sizeof(void*)); + continue; + } } - if (int_param >= int_reg_params_count) { + if (int_param >= cc->int_param_regs_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; -#endif + if (cc->shadow_param_regs) { + fp_param++; + } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param >= fp_reg_params_count) { + if (fp_param >= cc->fp_param_regs_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif + if (cc->shadow_param_regs) { + int_param++; + } } } /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ - used_stack += IR_SHADOW_ARGS; + used_stack += cc->shadow_store_size; -#ifdef _WIN64 copy_stack = IR_ALIGNED_SIZE(copy_stack, 16); used_stack += copy_stack; *copy_stack_ptr = copy_stack; -#else - *copy_stack_ptr = 0; -#endif return used_stack; } -static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) +static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_proto_t *proto, const ir_call_conv_dsc *cc, ir_reg tmp_reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; @@ -9247,11 +9390,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int int_param = 0; int fp_param = 0; int count = 0; - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; - int32_t used_stack, copy_stack = 0, stack_offset = IR_SHADOW_ARGS; + int32_t used_stack, copy_stack = 0, stack_offset = cc->shadow_store_size; ir_copy *copies; bool do_pass3 = 0; /* For temporaries we may use any scratch registers except for registers used for parameters */ @@ -9266,40 +9405,24 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg = IR_REG_RAX; } -#ifdef IR_HAVE_FASTCALL - if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - int_reg_params = _ir_int_fc_reg_params; - fp_reg_params = _ir_fp_fc_reg_params; - } -#endif - if (insn->op == IR_CALL - && (ctx->flags & IR_PREALLOCATED_STACK) -#ifdef IR_HAVE_FASTCALL - && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ -#endif - ) { - // TODO: support for preallocated stack -#ifdef _WIN64 - used_stack = ir_call_used_stack(ctx, insn, ©_stack); -#else - used_stack = 0; -#endif + && (ctx->flags2 & IR_PREALLOCATED_STACK) + && !cc->cleanup_stack_by_callee) { + if (!cc->pass_struct_by_val) { + used_stack = ir_call_used_stack(ctx, insn, cc, ©_stack); + } else { + used_stack = 0; + } } else { - used_stack = ir_call_used_stack(ctx, insn, ©_stack); - if (IR_SHADOW_ARGS + used_stack = ir_call_used_stack(ctx, insn, cc, ©_stack); + if (cc->shadow_store_size && insn->op == IR_TAILCALL - && used_stack == IR_SHADOW_ARGS) { + && used_stack == cc->shadow_store_size) { used_stack = 0; } if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size -#ifdef IR_HAVE_FASTCALL - && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ -#endif - ) { + && !cc->cleanup_stack_by_callee) { used_stack = 0; } else { /* Stack must be 16 byte aligned */ @@ -9311,10 +9434,10 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } -#ifdef _WIN64 -|.if X64 if (copy_stack) { /* Copy struct arguments */ + IR_ASSERT(sizeof(void*) == 8); +|.if X64 int copy_stack_offset = 0; for (j = 3; j <= n; j++) { @@ -9347,9 +9470,8 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg | rep; movsb } } - } |.endif -#endif + } /* 1. move all register arguments that should be passed through stack * and collect arguments that should be passed through registers */ @@ -9360,8 +9482,7 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { -#ifndef _WIN64 - if (arg_insn->op == IR_ARGVAL) { + if (arg_insn->op == IR_ARGVAL && cc->pass_struct_by_val) { int size = arg_insn->op2; int align = arg_insn->op3; align = IR_MAX((int)sizeof(void*), align); @@ -9408,38 +9529,35 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); continue; } -#endif - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; + if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; - if (arg_insn->op == IR_ARGVAL) { + if (cc->shadow_param_regs) { + fp_param++; + } + if (arg_insn->op == IR_ARGVAL && !cc->pass_struct_by_val) { do_pass3 = 3; continue; } -#endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param < fp_reg_params_count) { - dst_reg = fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + dst_reg = cc->fp_param_regs[fp_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif + if (cc->shadow_param_regs) { + int_param++; + } } if (dst_reg != IR_REG_NONE) { if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE || - (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) { + (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) { /* delay CONST->REG and MEM->REG moves to third pass */ do_pass3 = 1; } else { @@ -9474,11 +9592,9 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg /* 3. move the remaining memory and immediate values */ if (do_pass3) { -#ifdef _WIN64 int copy_stack_offset = 0; -#endif - stack_offset = IR_SHADOW_ARGS; + stack_offset = cc->shadow_store_size; int_param = 0; fp_param = 0; for (j = 3; j <= n; j++) { @@ -9491,60 +9607,57 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg int size = arg_insn->op2; int align = arg_insn->op3; -#ifndef _WIN64 - align = IR_MAX((int)sizeof(void*), align); - stack_offset = IR_ALIGNED_SIZE(stack_offset, align); - stack_offset += size; - stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); - continue; -#else -|.if X64 - /* pass pointer to the copy on stack */ - copy_stack_offset += size; - align = IR_MAX((int)sizeof(void*), align); - copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; - | lea Ra(dst_reg), [rsp + (used_stack - copy_stack_offset)] + if (cc->pass_struct_by_val) { + align = IR_MAX((int)sizeof(void*), align); + stack_offset = IR_ALIGNED_SIZE(stack_offset, align); + stack_offset += size; + stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); + continue; } else { - | lea Ra(tmp_reg), [rsp + (used_stack - copy_stack_offset)] - ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); - stack_offset += sizeof(void*); + /* pass pointer to the copy on stack */ + copy_stack_offset += size; + align = IR_MAX((int)sizeof(void*), align); + copy_stack_offset = IR_ALIGNED_SIZE(copy_stack_offset, align); + if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; + | lea Ra(dst_reg), [r4 + (used_stack - copy_stack_offset)] + } else { + | lea Ra(tmp_reg), [r4 + (used_stack - copy_stack_offset)] + ir_emit_store_mem_int(ctx, IR_ADDR, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); + stack_offset += sizeof(void*); + } + int_param++; + if (cc->shadow_param_regs) { + fp_param++; + } + continue; } - int_param++; - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; - continue; -|.endif -#endif } - if (int_param < int_reg_params_count) { - dst_reg = int_reg_params[int_param]; + if (int_param < cc->int_param_regs_count) { + dst_reg = cc->int_param_regs[int_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } int_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param++; -#endif + if (cc->shadow_param_regs) { + fp_param++; + } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); - if (fp_param < fp_reg_params_count) { - dst_reg = fp_reg_params[fp_param]; + if (fp_param < cc->fp_param_regs_count) { + dst_reg = cc->fp_param_regs[fp_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } fp_param++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param++; -#endif + if (cc->shadow_param_regs) { + int_param++; + } } if (dst_reg != IR_REG_NONE) { if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE || - (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(IR_REGSET_PRESERVED, IR_REG_NUM(src_reg)))) { + (IR_REG_SPILLED(src_reg) && !IR_REGSET_IN(cc->preserved_regs, IR_REG_NUM(src_reg)))) { if (IR_IS_TYPE_INT(type)) { if (IR_IS_CONST_REF(arg)) { if (type == IR_I8 || type == IR_I16) { @@ -9612,17 +9725,16 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } -#ifdef _WIN64 /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ - if (ir_is_vararg(ctx, insn)) { - n = IR_MIN(n, IR_MAX_REG_ARGS + 2); + if (proto && (proto->flags & IR_VARARG_FUNC) && cc->shadow_param_regs) { + n = IR_MIN(n, IR_MIN(cc->int_param_regs_count, cc->fp_param_regs_count) + 2); for (j = 3; j <= n; j++) { arg = ir_insn_op(insn, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_FP(type)) { - src_reg = fp_reg_params[j-3]; - dst_reg = int_reg_params[j-3]; + src_reg = cc->fp_param_regs[j-3]; + dst_reg = cc->int_param_regs[j-3]; |.if X64 if (ctx->mflags & IR_X86_AVX) { | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) @@ -9633,41 +9745,46 @@ static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg } } } - if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { + + if (insn->op == IR_CALL && (ctx->flags2 & IR_PREALLOCATED_STACK)) { used_stack = 0; } -#endif -#ifdef IR_REG_VARARG_FP_REGS - /* set hidden argument to specify the number of vector registers used */ - if (ir_is_vararg(ctx, insn)) { - fp_param = IR_MIN(fp_param, fp_reg_params_count); - | mov Rd(IR_REG_VARARG_FP_REGS), fp_param + + if (proto && (proto->flags & IR_VARARG_FUNC) && cc->fp_varargs_reg != IR_REG_NONE) { + /* set hidden argument to specify the number of vector registers used */ + fp_param = IR_MIN(fp_param, cc->fp_param_regs_count); + if (fp_param) { + | mov Rd(cc->fp_varargs_reg), fp_param + } else { + | xor Rd(cc->fp_varargs_reg), Rd(cc->fp_varargs_reg) + } } -#endif return used_stack; } -static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) +static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, const ir_proto_t *proto, const ir_call_conv_dsc *cc, int32_t used_stack) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg; + ir_ref func = insn->op2; - if (IR_IS_CONST_REF(insn->op2)) { - void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); + if (!IR_IS_CONST_REF(func) && ctx->rules[func] == (IR_FUSED | IR_SIMPLE | IR_PROTO)) { + func = ctx->ir_base[func].op1; + } + if (IR_IS_CONST_REF(func)) { + void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[func]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | call aword &addr } else { |.if X64 -|| ir_reg tmp_reg = IR_REG_RAX; - -#ifdef IR_REG_VARARG_FP_REGS -|| if (ir_is_vararg(ctx, insn)) { -|| tmp_reg = IR_REG_R11; +|| ir_reg tmp_reg = cc->int_ret_reg; +|| +|| if (proto && (proto->flags & IR_VARARG_FUNC) && tmp_reg == cc->fp_varargs_reg) { +|| tmp_reg = IR_REG_R11; // TODO: avoid usage of hardcoded temporary register ??? || } -#endif || if (IR_IS_SIGNED_32BIT(addr)) { | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 || } else { @@ -9682,16 +9799,16 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + ir_emit_load(ctx, IR_ADDR, op2_reg, func); } | call Ra(op2_reg) } else { ir_mem mem; - if (ir_rule(ctx, insn->op2) & IR_FUSED) { - mem = ir_fuse_load(ctx, def, insn->op2); + if (ir_rule(ctx, func) & IR_FUSED) { + mem = ir_fuse_load(ctx, def, func); } else { - mem = ir_ref_spill_slot(ctx, insn->op2); + mem = ir_ref_spill_slot(ctx, func); } | ASM_TMEM_OP call, aword, mem @@ -9702,7 +9819,7 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); ctx->call_stack_size -= aligned_stack; - if (ir_is_fastcall(ctx, insn)) { + if (cc->cleanup_stack_by_callee) { aligned_stack -= used_stack; if (aligned_stack) { | add Ra(IR_REG_RSP), aligned_stack @@ -9716,31 +9833,32 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used if (IR_IS_TYPE_INT(insn->type)) { def_reg = IR_REG_NUM(ctx->regs[def][0]); if (def_reg != IR_REG_NONE) { - if (def_reg != IR_REG_INT_RET1) { - ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + if (def_reg != cc->int_ret_reg) { + ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else if (ctx->use_lists[def].count > 1) { - ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); + ir_emit_store(ctx, insn->type, def, cc->int_ret_reg); } } else { IR_ASSERT(IR_IS_TYPE_FP(insn->type)); def_reg = IR_REG_NUM(ctx->regs[def][0]); -#ifdef IR_REG_FP_RET1 - if (def_reg != IR_REG_NONE) { - if (def_reg != IR_REG_FP_RET1) { - ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); - } - if (IR_REG_SPILLED(ctx->regs[def][0])) { - ir_emit_store(ctx, insn->type, def, def_reg); + if (cc->fp_ret_reg != IR_REG_NONE) { + if (def_reg != IR_REG_NONE) { + if (def_reg != cc->fp_ret_reg) { + ir_emit_fp_mov(ctx, insn->type, def_reg, cc->fp_ret_reg); + } + if (IR_REG_SPILLED(ctx->regs[def][0])) { + ir_emit_store(ctx, insn->type, def, def_reg); + } + } else if (ctx->use_lists[def].count > 1) { + ir_emit_store(ctx, insn->type, def, cc->fp_ret_reg); } - } else if (ctx->use_lists[def].count > 1) { - ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); } -#else - if (ctx->use_lists[def].count > 1) { +#ifdef IR_TARGET_X86 + if (ctx->use_lists[def].count > 1 && cc->fp_ret_reg == IR_REG_NONE) { int32_t offset; ir_reg fp; @@ -9776,18 +9894,23 @@ static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) { - int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); - ir_emit_call_ex(ctx, def, insn, used_stack); + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + int32_t used_stack = ir_emit_arguments(ctx, def, insn, proto, cc, ctx->regs[def][1]); + ir_emit_call_ex(ctx, def, insn, proto, cc, used_stack); } static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; - int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + int32_t used_stack = ir_emit_arguments(ctx, def, insn, proto, cc, ctx->regs[def][1]); + ir_ref func = insn->op2; if (used_stack != 0) { - ir_emit_call_ex(ctx, def, insn, used_stack); + ir_emit_call_ex(ctx, def, insn, proto, cc, used_stack); ir_emit_return_void(ctx); return; } @@ -9797,7 +9920,10 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_reg op2_reg = IR_REG_NONE; ir_mem mem = IR_MEM_B(IR_REG_NONE); - if (!IR_IS_CONST_REF(insn->op2)) { + if (!IR_IS_CONST_REF(func) && ctx->rules[func] == (IR_FUSED | IR_SIMPLE | IR_PROTO)) { + func = ctx->ir_base[func].op1; + } + if (!IR_IS_CONST_REF(func)) { op2_reg = ctx->regs[def][2]; ir_regset preserved_regs = (ir_regset)ctx->used_preserved_regs | IR_REGSET(IR_REG_STACK_POINTER); @@ -9807,7 +9933,7 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) bool is_spill_slot = op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg) - && ctx->vregs[insn->op2]; + && ctx->vregs[func]; if (op2_reg != IR_REG_NONE && !is_spill_slot) { if (IR_REGSET_IN(preserved_regs, IR_REG_NUM(op2_reg))) { @@ -9815,20 +9941,20 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) op2_reg = IR_REG_RAX; if (IR_REG_SPILLED(orig_op2_reg)) { - ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); + ir_emit_load(ctx, IR_ADDR, op2_reg, func); } else { - ir_type type = ctx->ir_base[insn->op2].type; + ir_type type = ctx->ir_base[func].type; | ASM_REG_REG_OP mov, type, op2_reg, IR_REG_NUM(orig_op2_reg) } } else { op2_reg = IR_REG_NUM(op2_reg); } } else { - if (ir_rule(ctx, insn->op2) & IR_FUSED) { + if (ir_rule(ctx, func) & IR_FUSED) { IR_ASSERT(op2_reg == IR_REG_NONE); - mem = ir_fuse_load(ctx, def, insn->op2); + mem = ir_fuse_load(ctx, def, func); } else { - mem = ir_ref_spill_slot(ctx, insn->op2); + mem = ir_ref_spill_slot(ctx, func); } ir_reg base = IR_MEM_BASE(mem); ir_reg index = IR_MEM_INDEX(mem); @@ -9836,7 +9962,7 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) (index != IR_REG_NONE && IR_REGSET_IN(preserved_regs, index))) { op2_reg = IR_REG_RAX; - ir_type type = ctx->ir_base[insn->op2].type; + ir_type type = ctx->ir_base[func].type; ir_emit_load_mem_int(ctx, type, op2_reg, mem); } else { op2_reg = IR_REG_NONE; @@ -9846,20 +9972,18 @@ static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) ir_emit_epilogue(ctx); - if (IR_IS_CONST_REF(insn->op2)) { - void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); + if (IR_IS_CONST_REF(func)) { + void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[func]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | jmp aword &addr } else { |.if X64 -|| ir_reg tmp_reg = IR_REG_RAX; - -#ifdef IR_REG_VARARG_FP_REGS -|| if (ir_is_vararg(ctx, insn)) { -|| tmp_reg = IR_REG_R11; +|| ir_reg tmp_reg = cc->int_ret_reg; +|| +|| if (proto && (proto->flags & IR_VARARG_FUNC) && tmp_reg == cc->fp_varargs_reg) { +|| tmp_reg = IR_REG_R11; // TODO: avoid usage of hardcoded temporary register ??? || } -#endif || if (IR_IS_SIGNED_32BIT(addr)) { | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 || } else { @@ -10020,6 +10144,20 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | jp &addr | jbe =>target break; + case IR_ULT: + | jp =>target + | jae =>target + break; + case IR_UGE: + | jb =>target + break; + case IR_ULE: + | jp =>target + | ja =>target + break; + case IR_UGT: + | jbe =>target + break; case IR_ORDERED: | jnp =>target break; @@ -10105,6 +10243,20 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next | jp &addr | jbe &target_addr break; + case IR_ULT: + | jp &target_addr + | jae &target_addr + break; + case IR_UGE: + | jb &target_addr + break; + case IR_ULE: + | jp &target_addr + | ja &target_addr + break; + case IR_UGT: + | jbe &target_addr + break; case IR_ORDERED: | jnp &target_addr break; @@ -10190,16 +10342,26 @@ static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next case IR_GT: | ja &addr break; + case IR_ULT: + | jb &addr + break; + case IR_UGE: + | jp &addr + | jae &addr + break; + case IR_ULE: + | jbe &addr + break; + case IR_UGT: + | jp &addr + | ja &addr + break; case IR_ORDERED: | jp &addr break; case IR_UNORDERED: | jnp &addr break; -// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; -// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; -// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; -// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; } } return 0; @@ -10348,7 +10510,11 @@ static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *i void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (insn->op == IR_GUARD) { - op ^= 1; // reverse + if (op == IR_EQ || op == IR_NE || op == IR_ORDERED || op == IR_UNORDERED) { + op ^= 1; // reverse + } else { + op ^= 5; // reverse + } } return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0, 0); } @@ -10565,6 +10731,11 @@ static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; +#ifdef IR_TARGET_X86 + const ir_call_conv_dsc *cc = &ir_call_conv_x86_fastcall; +#else + const ir_call_conv_dsc *cc = &ir_call_conv_default; +#endif dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); @@ -10604,13 +10775,13 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | movsd qword [rsp+16*8+14*8], xmm14 | movsd qword [rsp+16*8+15*8], xmm15 | - | mov Ra(IR_REG_INT_ARG2), rsp - | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] - | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) - | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] - |.if X64WIN - | sub rsp, 32 /* shadow space */ - |.endif + | mov Ra(cc->int_param_regs[1]), rsp + | lea Ra(cc->int_param_regs[0]), [rsp+16*8+16*8+16] + | mov aword [rsp+4*8], Ra(cc->int_param_regs[0]) + | mov Ra(cc->int_param_regs[0]), [rsp+16*8+16*8+8] + || if (cc->shadow_store_size) { + | sub rsp, cc->shadow_store_size /* shadow space */ + || } |.else | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ | mov aword [esp+0*4], eax @@ -10629,10 +10800,10 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) | movsd qword [esp+8*4+6*8], xmm6 | movsd qword [esp+8*4+7*8], xmm7 | - | mov Ra(IR_REG_INT_FCARG2), esp - | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] - | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) - | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] + | mov Ra(cc->int_param_regs[1]), esp + | lea Ra(cc->int_param_regs[0]), [esp+8*4+8*8+16] + | mov aword [esp+4*4], Ra(cc->int_param_regs[0]) + | mov Ra(cc->int_param_regs[0]), [esp+8*4+8*8+12] |.endif if (IR_IS_CONST_REF(insn->op2)) { @@ -10655,16 +10826,14 @@ static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) } // restore SP - |.if X64WIN - | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ - |.elif X64 - | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ + |.if X64 + | add rsp, cc->shadow_store_size+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ |.else | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ |.endif - if (def_reg != IR_REG_INT_RET1) { - ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); + if (def_reg != cc->int_ret_reg) { + ir_emit_mov(ctx, insn->type, def_reg, cc->int_ret_reg); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); @@ -10710,23 +10879,11 @@ static void ir_emit_load_params(ir_ctx *ctx) int fp_param_num = 0; ir_reg src_reg; ir_reg dst_reg; - // TODO: Calling convention specific - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; int32_t stack_offset = 0; int32_t stack_start = 0; -#ifdef IR_TARGET_X86 - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - int_reg_params = _ir_int_fc_reg_params; - fp_reg_params = _ir_fp_fc_reg_params; - } -#endif - if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ stack_start = sizeof(void*) * 2 + ctx->stack_frame_size; @@ -10749,27 +10906,25 @@ static void ir_emit_load_params(ir_ctx *ctx) stack_offset += ctx->value_params[insn->op3 - 1].size; stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); continue; - } else if (int_param_num < int_reg_params_count) { - src_reg = int_reg_params[int_param_num]; + } else if (int_param_num < cc->int_param_regs_count) { + src_reg = cc->int_param_regs[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param_num++; -#endif + if (cc->shadow_param_regs) { + fp_param_num++; + } } else { - if (fp_param_num < fp_reg_params_count) { - src_reg = fp_reg_params[fp_param_num]; + if (fp_param_num < cc->fp_param_regs_count) { + src_reg = cc->fp_param_regs[fp_param_num]; } else { src_reg = IR_REG_NONE; } fp_param_num++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param_num++; -#endif + if (cc->shadow_param_regs) { + int_param_num++; + } } if (ctx->vregs[use]) { dst_reg = IR_REG_NUM(ctx->regs[use][0]); @@ -10805,10 +10960,9 @@ static ir_reg ir_get_free_reg(ir_type type, ir_regset available) return IR_REGSET_FIRST(available); } -static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) +static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to, void *dessa_from_block) { - ir_backend_data *data = ctx->data; - ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; + ir_ref ref = ctx->cfg_blocks[(intptr_t)dessa_from_block].end; if (to == 0) { if (IR_IS_TYPE_INT(type)) { @@ -10844,23 +10998,11 @@ static void ir_fix_param_spills(ir_ctx *ctx) int int_param_num = 0; int fp_param_num = 0; ir_reg src_reg; - // TODO: Calling convention specific - int int_reg_params_count = IR_REG_INT_ARGS; - int fp_reg_params_count = IR_REG_FP_ARGS; - const int8_t *int_reg_params = _ir_int_reg_params; - const int8_t *fp_reg_params = _ir_fp_reg_params; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; int32_t stack_start = 0; int32_t stack_offset = 0; -#ifdef IR_TARGET_X86 - if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { - int_reg_params_count = IR_REG_INT_FCARGS; - fp_reg_params_count = IR_REG_FP_FCARGS; - int_reg_params = _ir_int_fc_reg_params; - fp_reg_params = _ir_fp_fc_reg_params; - } -#endif - if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ stack_start = sizeof(void*) * 2 + ctx->stack_frame_size; @@ -10874,8 +11016,7 @@ static void ir_fix_param_spills(ir_ctx *ctx) insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { -#ifndef _WIN64 - if (ctx->value_params && ctx->value_params[insn->op3 - 1].align) { + if (ctx->value_params && ctx->value_params[insn->op3 - 1].align && cc->pass_struct_by_val) { /* struct passed by value on stack */ size_t align = ctx->value_params[insn->op3 - 1].align; @@ -10886,28 +11027,25 @@ static void ir_fix_param_spills(ir_ctx *ctx) stack_offset = IR_ALIGNED_SIZE(stack_offset, sizeof(void*)); continue; } -#endif - if (int_param_num < int_reg_params_count) { - src_reg = int_reg_params[int_param_num]; + if (int_param_num < cc->int_param_regs_count) { + src_reg = cc->int_param_regs[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - fp_param_num++; -#endif + if (cc->shadow_param_regs) { + fp_param_num++; + } } else { - if (fp_param_num < fp_reg_params_count) { - src_reg = fp_reg_params[fp_param_num]; + if (fp_param_num < cc->fp_param_regs_count) { + src_reg = cc->fp_param_regs[fp_param_num]; } else { src_reg = IR_REG_NONE; } fp_param_num++; -#ifdef _WIN64 - /* WIN64 calling convention use common couter for int and fp registers */ - int_param_num++; -#endif + if (cc->shadow_param_regs) { + int_param_num++; + } } if (src_reg == IR_REG_NONE) { if (ctx->vregs[use]) { @@ -10927,12 +11065,13 @@ static void ir_fix_param_spills(ir_ctx *ctx) } } -#ifdef _WIN64 - /* WIN64 uses shsow area for registers */ - stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); -#endif - ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); - ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); + if (cc->shadow_store_size) { + /* WIN64 uses shadow area for registers */ + stack_offset += IR_MIN(int_param_num, cc->int_param_regs_count) * sizeof(void*); + } + + ctx->gp_reg_params = IR_MIN(int_param_num, cc->int_param_regs_count); + ctx->fp_reg_params = IR_MIN(fp_param_num, cc->fp_param_regs_count); ctx->param_stack_size = stack_offset; } @@ -10943,17 +11082,20 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_insn *insn; ir_ref i, n, j, *p; uint32_t *rule, insn_flags; - ir_backend_data *data = ctx->data; ir_regset available = 0; ir_target_constraints constraints; uint32_t def_flags; ir_reg reg; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; + ir_regset scratch = ir_scratch_regset[cc->scratch_reg - IR_REG_NUM]; -#ifndef IR_REG_FP_RET1 +#ifdef IR_TARGET_X86 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { - ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); - } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { - ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE); + } else if ((ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) + && cc->fp_ret_reg == IR_REG_NONE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type); } else { ctx->ret_slot = -1; } @@ -10986,10 +11128,16 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) case IR_LOOP_END: case IR_IGOTO_DUP: break; -#ifndef IR_REG_FP_RET1 +#ifdef IR_TARGET_X86 case IR_CALL: - if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { - ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); + if (ctx->ret_slot == -1 + && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + + if (cc->fp_ret_reg == IR_REG_NONE) { + ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE); + } } #endif IR_FALLTHROUGH; @@ -11001,7 +11149,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) && *rule != IR_TEST_AND_BRANCH_INT && *rule != IR_GUARD_CMP_INT && *rule != IR_GUARD_CMP_FP) { - available = IR_REGSET_SCRATCH; + available = scratch; } if (ctx->vregs[i]) { reg = constraints.def_reg; @@ -11031,7 +11179,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) if (insn->op == IR_PARAM && reg == IR_REG_NONE) { ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; } else { - ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); + ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type); } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); @@ -11042,7 +11190,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) ir_ref n = use_list->count; if (n > 0) { - int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); + int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type); ir_ref i, *p, use; ir_insn *use_insn; @@ -11097,10 +11245,14 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) } } ctx->regs[i][constraints.tmp_regs[n].num] = reg; - } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { - available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); } else { - IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); + ir_reg reg = constraints.tmp_regs[n].reg; + + if (reg > IR_REG_NUM) { + available = IR_REGSET_DIFFERENCE(available, ir_scratch_regset[reg - IR_REG_NUM]); + } else { + IR_REGSET_EXCL(available, reg); + } } } while (n); } @@ -11136,8 +11288,7 @@ static void ir_allocate_unique_spill_slots(ir_ctx *ctx) rule += n; } if (bb->flags & IR_BB_DESSA_MOVES) { - data->dessa_from_block = b; - ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); + ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps, (void*)(intptr_t)b); } } @@ -11154,12 +11305,12 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { if (insn->op == IR_CALL) { - call_stack_size = ir_call_used_stack(ctx, insn, ©_stack); + const ir_proto_t *proto = ir_call_proto(ctx, insn); + const ir_call_conv_dsc *cc = ir_get_call_conv_dsc(proto ? proto->flags : IR_CC_DEFAULT); + + call_stack_size = ir_call_used_stack(ctx, insn, cc, ©_stack); if (call_stack_size > peak_call_stack_size -#ifdef IR_HAVE_FASTCALL - && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ -#endif - ) { + && !cc->cleanup_stack_by_callee) { peak_call_stack_size = call_stack_size; } } @@ -11169,7 +11320,7 @@ static void ir_preallocate_call_stack(ir_ctx *ctx) } if (peak_call_stack_size) { ctx->call_stack_size = peak_call_stack_size; - ctx->flags |= IR_PREALLOCATED_STACK; + ctx->flags2 |= IR_PREALLOCATED_STACK; } } @@ -11179,19 +11330,22 @@ void ir_fix_stack_frame(ir_ctx *ctx) ctx->locals_area_size = ctx->stack_frame_size; -#if defined(IR_TARGET_X64) && !defined(_WIN64) if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { - ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; - ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); - ctx->locals_area_size = ctx->stack_frame_size; - if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { - additional_size += sizeof(void*) * IR_REG_INT_ARGS; - } - if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { - additional_size += 16 * IR_REG_FP_ARGS; + ir_backend_data *data = ctx->data; + const ir_call_conv_dsc *cc = data->ra_data.cc; + + if (cc->sysv_varargs) { + ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; + ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); + ctx->locals_area_size = ctx->stack_frame_size; + if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < cc->int_param_regs_count) { + additional_size += sizeof(void*) * cc->int_param_regs_count; + } + if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < cc->fp_param_regs_count) { + additional_size += 16 * cc->fp_param_regs_count; + } } } -#endif if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; @@ -11259,6 +11413,7 @@ void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) ir_ref igoto_dup_ref = IR_UNUSED; uint32_t igoto_dup_block = 0; + data.ra_data.cc = ir_get_call_conv_dsc(ctx->flags); data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; data.ra_data.unused_slot_1 = 0; diff --git a/ext/opcache/jit/ir/ir_x86.h b/ext/opcache/jit/ir/ir_x86.h index 06bfa951cf21d..6399ca107fddc 100644 --- a/ext/opcache/jit/ir/ir_x86.h +++ b/ext/opcache/jit/ir/ir_x86.h @@ -82,14 +82,17 @@ enum _ir_reg { IR_GP_REGS(IR_GP_REG_ENUM) IR_FP_REGS(IR_FP_REG_ENUM) IR_REG_NUM, + IR_REG_ALL = IR_REG_NUM, /* special name for regset */ + IR_REG_SET_1, /* special name for regset */ + IR_REG_SET_2, /* special name for regset */ + IR_REG_SET_3, /* special name for regset */ + IR_REG_SET_NUM, }; #define IR_REG_GP_FIRST IR_REG_R0 #define IR_REG_FP_FIRST IR_REG_XMM0 #define IR_REG_GP_LAST (IR_REG_FP_FIRST - 1) #define IR_REG_FP_LAST (IR_REG_NUM - 1) -#define IR_REG_SCRATCH (IR_REG_NUM) /* special name for regset */ -#define IR_REG_ALL (IR_REG_NUM + 1) /* special name for regset */ #define IR_REGSET_64BIT 0 @@ -113,121 +116,4 @@ enum _ir_reg { #define IR_REG_RSI IR_REG_R6 #define IR_REG_RDI IR_REG_R7 -/* Calling Convention */ -#ifdef _WIN64 - -# define IR_REG_INT_RET1 IR_REG_RAX -# define IR_REG_FP_RET1 IR_REG_XMM0 -# define IR_REG_INT_ARGS 4 -# define IR_REG_FP_ARGS 4 -# define IR_REG_INT_ARG1 IR_REG_RCX -# define IR_REG_INT_ARG2 IR_REG_RDX -# define IR_REG_INT_ARG3 IR_REG_R8 -# define IR_REG_INT_ARG4 IR_REG_R9 -# define IR_REG_FP_ARG1 IR_REG_XMM0 -# define IR_REG_FP_ARG2 IR_REG_XMM1 -# define IR_REG_FP_ARG3 IR_REG_XMM2 -# define IR_REG_FP_ARG4 IR_REG_XMM3 -# define IR_MAX_REG_ARGS 4 -# define IR_SHADOW_ARGS 32 /* Reserved space in bytes - "home space" or "shadow store" for register arguments */ - -# define IR_REGSET_SCRATCH \ - (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ - | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ - | IR_REGSET_INTERVAL(IR_REG_XMM0, IR_REG_XMM5)) - -# define IR_REGSET_PRESERVED \ - (IR_REGSET(IR_REG_RBX) \ - | IR_REGSET_INTERVAL(IR_REG_RBP, IR_REG_RDI) \ - | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15) \ - | IR_REGSET_INTERVAL(IR_REG_XMM6, IR_REG_XMM15)) - -#elif defined(IR_TARGET_X64) - -# define IR_REG_INT_RET1 IR_REG_RAX -# define IR_REG_FP_RET1 IR_REG_XMM0 -# define IR_REG_INT_ARGS 6 -# define IR_REG_FP_ARGS 8 -# define IR_REG_INT_ARG1 IR_REG_RDI -# define IR_REG_INT_ARG2 IR_REG_RSI -# define IR_REG_INT_ARG3 IR_REG_RDX -# define IR_REG_INT_ARG4 IR_REG_RCX -# define IR_REG_INT_ARG5 IR_REG_R8 -# define IR_REG_INT_ARG6 IR_REG_R9 -# define IR_REG_FP_ARG1 IR_REG_XMM0 -# define IR_REG_FP_ARG2 IR_REG_XMM1 -# define IR_REG_FP_ARG3 IR_REG_XMM2 -# define IR_REG_FP_ARG4 IR_REG_XMM3 -# define IR_REG_FP_ARG5 IR_REG_XMM4 -# define IR_REG_FP_ARG6 IR_REG_XMM5 -# define IR_REG_FP_ARG7 IR_REG_XMM6 -# define IR_REG_FP_ARG8 IR_REG_XMM7 -# define IR_MAX_REG_ARGS 14 -# define IR_SHADOW_ARGS 0 - -# define IR_REG_VARARG_FP_REGS IR_REG_RAX /* hidden argument to specify the number of vector registers used */ - -# define IR_REGSET_SCRATCH \ - (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) \ - | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI) \ - | IR_REGSET_INTERVAL(IR_REG_R8, IR_REG_R11) \ - | IR_REGSET_FP) - -# define IR_REGSET_PRESERVED \ - (IR_REGSET(IR_REG_RBX) \ - | IR_REGSET(IR_REG_RBP) \ - | IR_REGSET_INTERVAL(IR_REG_R12, IR_REG_R15)) - -typedef struct _ir_va_list { - uint32_t gp_offset; - uint32_t fp_offset; - void *overflow_arg_area; - void *reg_save_area; -} ir_va_list; - -#elif defined(IR_TARGET_X86) - -# define IR_REG_INT_RET1 IR_REG_RAX -# define IR_REG_INT_RET2 IR_REG_RDX -# define IR_REG_INT_ARGS 0 -# define IR_REG_FP_ARGS 0 - -# define IR_HAVE_FASTCALL 1 -# define IR_REG_INT_FCARGS 2 -# define IR_REG_FP_FCARGS 0 -# define IR_REG_INT_FCARG1 IR_REG_RCX -# define IR_REG_INT_FCARG2 IR_REG_RDX -# define IR_MAX_REG_ARGS 2 -# define IR_SHADOW_ARGS 0 - -# define IR_REGSET_SCRATCH \ - (IR_REGSET_INTERVAL(IR_REG_RAX, IR_REG_RDX) | IR_REGSET_FP) - -# define IR_REGSET_PRESERVED \ - (IR_REGSET(IR_REG_RBX) \ - | IR_REGSET(IR_REG_RBP) \ - | IR_REGSET_INTERVAL(IR_REG_RSI, IR_REG_RDI)) - -#else -# error "Unsupported target architecture" -#endif - -typedef struct _ir_tmp_reg { - union { - uint8_t num; - int8_t reg; - }; - uint8_t type; - int8_t start; - int8_t end; -} ir_tmp_reg; - -struct _ir_target_constraints { - int8_t def_reg; - uint8_t tmps_count; - uint8_t hints_count; - ir_tmp_reg tmp_regs[3]; - int8_t hints[IR_MAX_REG_ARGS + 3]; -}; - #endif /* IR_X86_H */