use super::registers::*;
use crate::bitset::BitSet;
use crate::cursor::{Cursor, FuncCursor};
use crate::flowgraph::ControlFlowGraph;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::types::*;
use crate::ir::{self, Function, Inst, InstBuilder, MemFlags};
use crate::isa::constraints::*;
use crate::isa::enc_tables::*;
use crate::isa::encoding::base_size;
use crate::isa::encoding::{Encoding, RecipeSizing};
use crate::isa::RegUnit;
use crate::isa::{self, TargetIsa};
use crate::legalizer::expand_as_libcall;
use crate::predicates;
use crate::regalloc::RegDiversions;
include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
pub fn is_extended_reg(reg: RegUnit) -> bool {
reg as u8 & 0b1000 != 0
}
pub fn needs_sib_byte(reg: RegUnit) -> bool {
reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
}
pub fn needs_offset(reg: RegUnit) -> bool {
reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
}
pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
needs_sib_byte(reg) || needs_offset(reg)
}
fn test_input(
op_index: usize,
inst: Inst,
divert: &RegDiversions,
func: &Function,
condition_func: fn(RegUnit) -> bool,
) -> bool {
let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
condition_func(in_reg)
}
fn test_result(
result_index: usize,
inst: Inst,
divert: &RegDiversions,
func: &Function,
condition_func: fn(RegUnit) -> bool,
) -> bool {
let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations);
condition_func(out_reg)
}
fn size_plus_maybe_offset_for_inreg_0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_offset = test_input(0, inst, divert, func, needs_offset);
sizing.base_size + if needs_offset { 1 } else { 0 }
}
fn size_plus_maybe_offset_for_inreg_1(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_offset = test_input(1, inst, divert, func, needs_offset);
sizing.base_size + if needs_offset { 1 } else { 0 }
}
fn size_plus_maybe_sib_for_inreg_0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_sib = test_input(0, inst, divert, func, needs_sib_byte);
sizing.base_size + if needs_sib { 1 } else { 0 }
}
fn size_plus_maybe_sib_for_inreg_1(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_sib = test_input(1, inst, divert, func, needs_sib_byte);
sizing.base_size + if needs_sib { 1 } else { 0 }
}
fn size_plus_maybe_sib_or_offset_for_inreg_0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset);
sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
}
fn size_plus_maybe_sib_or_offset_for_inreg_1(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset);
sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
}
fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_input(1, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
+ if needs_rex { 1 } else { 0 }
}
fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_input(1, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
}
fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_result(0, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func)
+ if needs_rex { 1 } else { 0 }
}
fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
sizing: &RecipeSizing,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_result(0, inst, divert, func, is_extended_reg);
size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg1(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(1, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg2(
sizing: &RecipeSizing,
_: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(2, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg0_inreg1(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_input(1, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg1_inreg2(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
|| test_input(2, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_inreg0_outreg0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
|| test_result(0, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_outreg0(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_result(0, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn size_with_inferred_rex_for_cmov(
sizing: &RecipeSizing,
_enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> u8 {
let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
|| test_input(2, inst, divert, func, is_extended_reg);
sizing.base_size + if needs_rex { 1 } else { 0 }
}
fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option<i64> {
if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) {
if let ir::InstructionData::UnaryImm {
opcode: ir::Opcode::Iconst,
imm,
} = &pos.func.dfg[*inst]
{
let value: i64 = (*imm).into();
Some(value)
} else {
None
}
} else {
None
}
}
fn expand_sdivrem(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let (x, y, is_srem) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Sdiv,
args,
} => (args[0], args[1], false),
ir::InstructionData::Binary {
opcode: ir::Opcode::Srem,
args,
} => (args[0], args[1], true),
_ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
};
let old_block = func.layout.pp_block(inst);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
pos.func.dfg.clear_results(inst);
let avoid_div_traps = isa.flags().avoid_div_traps();
if !avoid_div_traps && !is_srem {
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
pos.remove_inst();
return;
}
let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) {
(imm == 0, imm == -1)
} else {
(true, true)
};
if avoid_div_traps && could_be_zero {
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
}
if !could_be_minus_one {
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
let reuse = if is_srem {
[None, Some(result)]
} else {
[Some(result), None]
};
pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y);
pos.remove_inst();
return;
}
let nominal = pos.func.dfg.make_block();
let minus_one = pos.func.dfg.make_block();
let done = pos.func.dfg.make_block();
pos.func.dfg.attach_block_param(done, result);
let is_m1 = pos.ins().ifcmp_imm(y, -1);
pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
pos.ins().jump(nominal, &[]);
pos.insert_block(nominal);
let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
let divres = if is_srem { rem } else { quot };
pos.ins().jump(done, &[divres]);
pos.insert_block(minus_one);
let m1_result = if is_srem {
pos.ins().iconst(ty, 0)
} else {
debug_assert!(avoid_div_traps, "Native trapping divide handled above");
let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
pos.ins()
.trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
pos.ins().irsub_imm(x, 0)
};
pos.func.dfg.replace(inst).jump(done, &[m1_result]);
pos.next_inst();
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, nominal);
cfg.recompute_block(pos.func, minus_one);
cfg.recompute_block(pos.func, done);
}
fn expand_udivrem(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let (x, y, is_urem) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Udiv,
args,
} => (args[0], args[1], false),
ir::InstructionData::Binary {
opcode: ir::Opcode::Urem,
args,
} => (args[0], args[1], true),
_ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
};
let avoid_div_traps = isa.flags().avoid_div_traps();
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
pos.func.dfg.clear_results(inst);
if avoid_div_traps {
let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) {
imm == 0
} else {
true
};
if zero_check {
pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
}
}
let xhi = pos.ins().iconst(ty, 0);
let reuse = if is_urem {
[None, Some(result)]
} else {
[Some(result), None]
};
pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
pos.remove_inst();
}
fn expand_minmax(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmin,
args,
} => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmax,
args,
} => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
};
let old_block = func.layout.pp_block(inst);
let one_block = func.dfg.make_block();
let uno_block = func.dfg.make_block();
let ueq_block = func.dfg.make_block();
let eq_block = func.dfg.make_block();
let done = func.dfg.make_block();
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
func.dfg.clear_results(inst);
func.dfg.attach_block_param(done, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
pos.ins().brnz(cmp_ueq, ueq_block, &[]);
pos.ins().jump(one_block, &[]);
pos.insert_block(one_block);
let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
let one_result = pos.func.dfg.first_result(one_inst);
pos.ins().jump(done, &[one_result]);
pos.insert_block(uno_block);
let uno_result = pos.ins().fadd(x, y);
pos.ins().jump(done, &[uno_result]);
pos.insert_block(ueq_block);
let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
pos.ins().brnz(cmp_uno, uno_block, &[]);
pos.ins().jump(eq_block, &[]);
pos.insert_block(eq_block);
let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
let bw_result = pos.func.dfg.first_result(bw_inst);
pos.func.dfg.replace(inst).jump(done, &[bw_result]);
pos.next_inst();
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, one_block);
cfg.recompute_block(pos.func, uno_block);
cfg.recompute_block(pos.func, ueq_block);
cfg.recompute_block(pos.func, eq_block);
cfg.recompute_block(pos.func, done);
}
fn expand_minmax_vector(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let ty = func.dfg.ctrl_typevar(inst);
debug_assert!(ty.is_vector());
let (x, y, x86_opcode, is_max) = match func.dfg[inst] {
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmin,
args,
} => (args[0], args[1], ir::Opcode::X86Fmin, false),
ir::InstructionData::Binary {
opcode: ir::Opcode::Fmax,
args,
} => (args[0], args[1], ir::Opcode::X86Fmax, true),
_ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
};
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y);
let forward = dfg.first_result(forward_inst);
let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x);
let backward = dfg.first_result(backward_inst);
let (value, mask) = if is_max {
let difference = pos.ins().bxor(forward, backward);
let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference);
let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference);
let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value);
(value, find_nan_lanes_mask)
} else {
let propagate_nans = pos.ins().bor(backward, forward);
let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans);
let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask);
let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask);
(tmp, bitcast_find_nan_lanes_mask)
};
let (shift_by, ty_as_int) = match ty {
F32X4 => (10, I32X4),
F64X2 => (13, I64X2),
_ => unimplemented!("this legalization only understands 128-bit floating point types"),
};
let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask);
let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by);
let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask);
pos.func
.dfg
.replace(inst)
.band_not(value, shift_mask_as_float);
}
fn expand_fcvt_from_uint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let x;
match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtFromUint,
arg,
} => x = arg,
_ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
}
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
match xty {
ir::types::I8 | ir::types::I16 | ir::types::I32 => {
let wide = pos.ins().uextend(ir::types::I64, x);
pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
return;
}
ir::types::I64 => {}
_ => unimplemented!(),
}
let old_block = pos.func.layout.pp_block(inst);
let poszero_block = pos.func.dfg.make_block();
let neg_block = pos.func.dfg.make_block();
let done = pos.func.dfg.make_block();
pos.func.dfg.clear_results(inst);
pos.func.dfg.attach_block_param(done, result);
let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
pos.ins().brnz(is_neg, neg_block, &[]);
pos.ins().jump(poszero_block, &[]);
pos.insert_block(poszero_block);
let posres = pos.ins().fcvt_from_sint(ty, x);
pos.ins().jump(done, &[posres]);
pos.insert_block(neg_block);
let ihalf = pos.ins().ushr_imm(x, 1);
let lsb = pos.ins().band_imm(x, 1);
let ifinal = pos.ins().bor(ihalf, lsb);
let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
let negres = pos.ins().fadd(fhalf, fhalf);
pos.func.dfg.replace(inst).jump(done, &[negres]);
pos.next_inst();
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, poszero_block);
cfg.recompute_block(pos.func, neg_block);
cfg.recompute_block(pos.func, done);
}
fn expand_fcvt_from_uint_vector(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtFromUint,
arg,
} = pos.func.dfg[inst]
{
let controlling_type = pos.func.dfg.ctrl_typevar(inst);
if controlling_type == F32X4 {
debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4);
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() {
pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg);
} else {
let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg);
let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into());
let zero = pos.ins().vconst(I16X8, zero_constant);
let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55);
let bitcast_low = pos.ins().raw_bitcast(I32X4, low);
let high = pos.ins().isub(arg, bitcast_low);
let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low);
let shift_high = pos.ins().ushr_imm(high, 1);
let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high);
let double_high = pos.ins().fadd(convert_high, convert_high);
pos.func.dfg.replace(inst).fadd(double_high, convert_low);
}
} else {
unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
}
}
}
fn expand_fcvt_to_sint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToSint,
arg,
} => arg,
_ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
};
let old_block = func.layout.pp_block(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let done = func.dfg.make_block();
let maybe_trap_block = func.dfg.make_block();
func.dfg.replace(inst).x86_cvtt2si(ty, x);
let mut pos = FuncCursor::new(func).after_inst(inst);
pos.use_srcloc(inst);
let is_done = pos
.ins()
.icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
pos.ins().brnz(is_done, done, &[]);
pos.ins().jump(maybe_trap_block, &[]);
pos.insert_block(maybe_trap_block);
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
pos.ins()
.trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
let mut overflow_cc = FloatCC::LessThan;
let output_bits = ty.lane_bits();
let flimit = match xty {
ir::types::F32 =>
{
pos.ins().f32const(if output_bits < 32 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee32::pow2(output_bits - 1).neg()
})
}
ir::types::F64 =>
{
pos.ins().f64const(if output_bits < 64 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee64::pow2(output_bits - 1).neg()
})
}
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
let fzero = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
pos.ins().jump(done, &[]);
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, maybe_trap_block);
cfg.recompute_block(pos.func, done);
}
fn expand_fcvt_to_sint_sat(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToSintSat,
arg,
} => arg,
_ => panic!(
"Need fcvt_to_sint_sat: {}",
func.dfg.display_inst(inst, None)
),
};
let old_block = func.layout.pp_block(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let done_block = func.dfg.make_block();
let intmin_block = func.dfg.make_block();
let minsat_block = func.dfg.make_block();
let maxsat_block = func.dfg.make_block();
func.dfg.clear_results(inst);
func.dfg.attach_block_param(done_block, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
let is_done = pos
.ins()
.icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
pos.ins().brnz(is_done, done_block, &[cvtt2si]);
pos.ins().jump(intmin_block, &[]);
pos.insert_block(intmin_block);
let zero = pos.ins().iconst(ty, 0);
let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
pos.ins().brnz(is_nan, done_block, &[zero]);
pos.ins().jump(minsat_block, &[]);
pos.insert_block(minsat_block);
let mut overflow_cc = FloatCC::LessThan;
let output_bits = ty.lane_bits();
let flimit = match xty {
ir::types::F32 =>
{
pos.ins().f32const(if output_bits < 32 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee32::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee32::pow2(output_bits - 1).neg()
})
}
ir::types::F64 =>
{
pos.ins().f64const(if output_bits < 64 {
overflow_cc = FloatCC::LessThanOrEqual;
Ieee64::fcvt_to_sint_negative_overflow(output_bits)
} else {
Ieee64::pow2(output_bits - 1).neg()
})
}
_ => panic!("Can't convert {}", xty),
};
let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
let min_imm = match ty {
ir::types::I32 => i32::min_value() as i64,
ir::types::I64 => i64::min_value(),
_ => panic!("Don't know the min value for {}", ty),
};
let min_value = pos.ins().iconst(ty, min_imm);
pos.ins().brnz(overflow, done_block, &[min_value]);
pos.ins().jump(maxsat_block, &[]);
pos.insert_block(maxsat_block);
let fzero = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
_ => panic!("Can't convert {}", xty),
};
let max_imm = match ty {
ir::types::I32 => i32::max_value() as i64,
ir::types::I64 => i64::max_value(),
_ => panic!("Don't know the max value for {}", ty),
};
let max_value = pos.ins().iconst(ty, max_imm);
let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
pos.ins().brnz(overflow, done_block, &[max_value]);
pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]);
pos.next_inst();
pos.insert_block(done_block);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, intmin_block);
cfg.recompute_block(pos.func, minsat_block);
cfg.recompute_block(pos.func, maxsat_block);
cfg.recompute_block(pos.func, done_block);
}
fn expand_fcvt_to_sint_sat_vector(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToSintSat,
arg,
} = pos.func.dfg[inst]
{
let controlling_type = pos.func.dfg.ctrl_typevar(inst);
if controlling_type == I32X4 {
debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg);
let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans);
let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast);
let differences = pos.ins().bxor(zeroed_nans_bitcast, arg);
let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences);
let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy);
let tmp = pos.ins().band(differences_bitcast, converted);
let mask = pos.ins().sshr_imm(tmp, 31);
pos.func.dfg.replace(inst).bxor(converted, mask);
} else {
unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
}
}
}
fn expand_fcvt_to_uint(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUint,
arg,
} => arg,
_ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
};
let old_block = func.layout.pp_block(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let below_uint_max_block = func.dfg.make_block();
let below_zero_block = func.dfg.make_block();
let large = func.dfg.make_block();
let done = func.dfg.make_block();
func.dfg.clear_results(inst);
func.dfg.attach_block_param(done, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let pow2nm1 = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
_ => panic!("Can't convert {}", xty),
};
let is_large = pos.ins().ffcmp(x, pow2nm1);
pos.ins()
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
pos.ins().jump(below_uint_max_block, &[]);
pos.insert_block(below_uint_max_block);
pos.ins().trapff(
FloatCC::Unordered,
is_large,
ir::TrapCode::BadConversionToInteger,
);
let sres = pos.ins().x86_cvtt2si(ty, x);
let is_neg = pos.ins().ifcmp_imm(sres, 0);
pos.ins()
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
pos.ins().jump(below_zero_block, &[]);
pos.insert_block(below_zero_block);
pos.ins().trap(ir::TrapCode::IntegerOverflow);
pos.insert_block(large);
let adjx = pos.ins().fsub(x, pow2nm1);
let lres = pos.ins().x86_cvtt2si(ty, adjx);
let is_neg = pos.ins().ifcmp_imm(lres, 0);
pos.ins()
.trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
pos.next_inst();
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, below_uint_max_block);
cfg.recompute_block(pos.func, below_zero_block);
cfg.recompute_block(pos.func, large);
cfg.recompute_block(pos.func, done);
}
fn expand_fcvt_to_uint_sat(
inst: ir::Inst,
func: &mut ir::Function,
cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
use crate::ir::immediates::{Ieee32, Ieee64};
let x = match func.dfg[inst] {
ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUintSat,
arg,
} => arg,
_ => panic!(
"Need fcvt_to_uint_sat: {}",
func.dfg.display_inst(inst, None)
),
};
let old_block = func.layout.pp_block(inst);
let xty = func.dfg.value_type(x);
let result = func.dfg.first_result(inst);
let ty = func.dfg.value_type(result);
let below_pow2nm1_or_nan_block = func.dfg.make_block();
let below_pow2nm1_block = func.dfg.make_block();
let large = func.dfg.make_block();
let uint_large_block = func.dfg.make_block();
let done = func.dfg.make_block();
func.dfg.clear_results(inst);
func.dfg.attach_block_param(done, result);
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
let pow2nm1 = match xty {
ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
_ => panic!("Can't convert {}", xty),
};
let zero = pos.ins().iconst(ty, 0);
let is_large = pos.ins().ffcmp(x, pow2nm1);
pos.ins()
.brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
pos.ins().jump(below_pow2nm1_or_nan_block, &[]);
pos.insert_block(below_pow2nm1_or_nan_block);
pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
pos.ins().jump(below_pow2nm1_block, &[]);
pos.insert_block(below_pow2nm1_block);
let sres = pos.ins().x86_cvtt2si(ty, x);
let is_neg = pos.ins().ifcmp_imm(sres, 0);
pos.ins()
.brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
pos.ins().jump(done, &[zero]);
pos.insert_block(large);
let adjx = pos.ins().fsub(x, pow2nm1);
let lres = pos.ins().x86_cvtt2si(ty, adjx);
let max_value = pos.ins().iconst(
ty,
match ty {
ir::types::I32 => u32::max_value() as i64,
ir::types::I64 => u64::max_value() as i64,
_ => panic!("Can't convert {}", ty),
},
);
let is_neg = pos.ins().ifcmp_imm(lres, 0);
pos.ins()
.brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
pos.ins().jump(uint_large_block, &[]);
pos.insert_block(uint_large_block);
let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
pos.func.dfg.replace(inst).jump(done, &[lfinal]);
pos.next_inst();
pos.insert_block(done);
cfg.recompute_block(pos.func, old_block);
cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block);
cfg.recompute_block(pos.func, below_pow2nm1_block);
cfg.recompute_block(pos.func, large);
cfg.recompute_block(pos.func, uint_large_block);
cfg.recompute_block(pos.func, done);
}
static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
];
fn expand_fcvt_to_uint_sat_vector(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Unary {
opcode: ir::Opcode::FcvtToUintSat,
arg,
} = pos.func.dfg[inst]
{
let controlling_type = pos.func.dfg.ctrl_typevar(inst);
if controlling_type == I32X4 {
debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
let max_signed_constant = pos
.func
.dfg
.constants
.insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
let ge_zero = pos.ins().x86_fmax(arg, zeroes);
let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
let le_max_signed =
pos.ins()
.fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
let difference = pos
.ins()
.bxor(minus_max_signed_as_int, le_max_signed_as_int);
let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
pos.func.dfg.replace(inst).iadd(converted, addend);
} else {
unreachable!(
"{} should not be legalized in expand_fcvt_to_uint_sat_vector",
pos.func.dfg.display_inst(inst, None)
)
}
}
}
fn convert_shuffle(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
let a = pos.func.dfg.resolve_aliases(args[0]);
let b = pos.func.dfg.resolve_aliases(args[1]);
let mask = pos
.func
.dfg
.immediates
.get(mask)
.expect("The shuffle immediate should have been recorded before this point")
.clone();
if a == b {
let constructed_mask = mask
.iter()
.map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
.map(zero_unknown_lane_index)
.collect();
let handle = pos.func.dfg.constants.insert(constructed_mask);
let a_type = pos.func.dfg.value_type(a);
let mask_value = pos.ins().vconst(a_type, handle);
pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
} else {
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
let handle = pos.func.dfg.constants.insert(constructed_mask);
let a_type = pos.func.dfg.value_type(a);
let mask_value = pos.ins().vconst(a_type, handle);
let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
let constructed_mask = mask
.iter()
.map(|b| b.wrapping_sub(16))
.map(zero_unknown_lane_index)
.collect();
let handle = pos.func.dfg.constants.insert(constructed_mask);
let b_type = pos.func.dfg.value_type(b);
let mask_value = pos.ins().vconst(b_type, handle);
let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
pos.func
.dfg
.replace(inst)
.bor(shuffled_first_arg, shuffled_second_arg);
};
}
}
fn convert_extractlane(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::BinaryImm8 {
opcode: ir::Opcode::Extractlane,
arg,
imm: lane,
} = pos.func.dfg[inst]
{
let value_type = pos.func.dfg.value_type(arg);
if value_type.lane_type().is_float() {
let shuffled = if lane != 0 {
match value_type {
F32X4 => {
let shuffle_mask: u8 = 0b00_00_00_00 | lane;
pos.ins().x86_pshufd(arg, shuffle_mask)
}
F64X2 => {
assert_eq!(lane, 1);
let shuffle_mask = 0b11_10_11_10;
let bitcast = pos.ins().raw_bitcast(F32X4, arg);
pos.ins().x86_pshufd(bitcast, shuffle_mask)
}
_ => unreachable!(),
}
} else {
arg
};
pos.func
.dfg
.replace(inst)
.raw_bitcast(value_type.lane_type(), shuffled);
} else {
pos.func.dfg.replace(inst).x86_pextr(arg, lane);
}
}
}
fn convert_insertlane(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::TernaryImm8 {
opcode: ir::Opcode::Insertlane,
args: [vector, replacement],
imm: lane,
} = pos.func.dfg[inst]
{
let value_type = pos.func.dfg.value_type(vector);
if value_type.lane_type().is_float() {
match value_type {
F32X4 => {
assert!(lane <= 3);
let immediate = 0b00_00_00_00 | lane << 4;
pos.func
.dfg
.replace(inst)
.x86_insertps(vector, replacement, immediate)
}
F64X2 => {
let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement);
if lane == 0 {
pos.func
.dfg
.replace(inst)
.x86_movsd(vector, replacement_as_vector)
} else {
assert_eq!(lane, 1);
pos.func
.dfg
.replace(inst)
.x86_movlhps(vector, replacement_as_vector)
}
}
_ => unreachable!(),
};
} else {
pos.func
.dfg
.replace(inst)
.x86_pinsr(vector, replacement, lane);
}
}
}
fn convert_ineg(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
_isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Unary {
opcode: ir::Opcode::Ineg,
arg,
} = pos.func.dfg[inst]
{
let value_type = pos.func.dfg.value_type(arg);
let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() {
let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into());
pos.ins().vconst(value_type, zero_immediate)
} else if value_type.is_int() {
pos.ins().iconst(value_type, 0)
} else {
panic!("Can't convert ineg of type {}", value_type)
};
pos.func.dfg.replace(inst).isub(zero_value, arg);
} else {
unreachable!()
}
}
fn expand_dword_to_xmm<'f>(
pos: &mut FuncCursor<'_>,
arg: ir::Value,
arg_type: ir::Type,
) -> ir::Value {
if arg_type == I64 {
let (arg_lo, arg_hi) = pos.ins().isplit(arg);
let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
let arg = pos.ins().insertlane(arg, arg_hi, 1);
let arg = pos.ins().raw_bitcast(I64X2, arg);
arg
} else {
pos.ins().bitcast(I64X2, arg)
}
}
fn contract_dword_from_xmm<'f>(
pos: &mut FuncCursor<'f>,
inst: ir::Inst,
ret: ir::Value,
ret_type: ir::Type,
) {
if ret_type == I64 {
let ret = pos.ins().raw_bitcast(I32X4, ret);
let ret_lo = pos.ins().extractlane(ret, 0);
let ret_hi = pos.ins().extractlane(ret, 1);
pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
} else {
let ret = pos.ins().extractlane(ret, 0);
pos.func.dfg.replace(inst).ireduce(ret_type, ret);
}
}
static USHR_MASKS: [u8; 128] = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
];
fn convert_ushr(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Binary {
opcode: ir::Opcode::Ushr,
args: [arg0, arg1],
} = pos.func.dfg[inst]
{
let arg0_type = pos.func.dfg.value_type(arg0);
let arg1_type = pos.func.dfg.value_type(arg1);
assert!(!arg1_type.is_vector() && arg1_type.is_int());
let shift_index = pos.ins().bitcast(I64X2, arg1);
if arg0_type == I8X16 {
let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
let shifted = pos.ins().x86_psrl(bitcasted, shift_index);
let shifted = pos.ins().raw_bitcast(I8X16, shifted);
let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into());
let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
let mask_offset = pos.ins().ishl_imm(arg1, 4);
let mask =
pos.ins()
.load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
pos.func.dfg.replace(inst).band(shifted, mask);
} else if arg0_type.is_vector() {
pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
} else if arg0_type == I64 {
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.has_sse41() {
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
let shifted = pos.ins().x86_psrl(value, amount);
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
} else {
expand_as_libcall(inst, func, isa);
}
} else {
unreachable!()
}
}
}
static SHL_MASKS: [u8; 128] = [
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
];
fn convert_ishl(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Binary {
opcode: ir::Opcode::Ishl,
args: [arg0, arg1],
} = pos.func.dfg[inst]
{
let arg0_type = pos.func.dfg.value_type(arg0);
let arg1_type = pos.func.dfg.value_type(arg1);
assert!(!arg1_type.is_vector() && arg1_type.is_int());
let shift_index = pos.ins().bitcast(I64X2, arg1);
if arg0_type == I8X16 {
let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
let shifted = pos.ins().x86_psll(bitcasted, shift_index);
let shifted = pos.ins().raw_bitcast(I8X16, shifted);
let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into());
let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
let mask_offset = pos.ins().ishl_imm(arg1, 4);
let mask =
pos.ins()
.load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
pos.func.dfg.replace(inst).band(shifted, mask);
} else if arg0_type.is_vector() {
pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
} else if arg0_type == I64 {
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.has_sse41() {
let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
let shifted = pos.ins().x86_psll(value, amount);
contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
} else {
expand_as_libcall(inst, func, isa);
}
} else {
unreachable!()
}
}
}
fn convert_i64x2_imul(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
let mut pos = FuncCursor::new(func).at_inst(inst);
pos.use_srcloc(inst);
if let ir::InstructionData::Binary {
opcode: ir::Opcode::Imul,
args: [arg0, arg1],
} = pos.func.dfg[inst]
{
let ty = pos.func.dfg.ctrl_typevar(inst);
if ty == I64X2 {
let x86_isa = isa
.as_any()
.downcast_ref::<isa::x86::Isa>()
.expect("the target ISA must be x86 at this point");
if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
} else {
let high0 = pos.ins().ushr_imm(arg0, 32);
let mul0 = pos.ins().x86_pmuludq(high0, arg1);
let high1 = pos.ins().ushr_imm(arg1, 32);
let mul1 = pos.ins().x86_pmuludq(high1, arg0);
let addhigh = pos.ins().iadd(mul0, mul1);
let high = pos.ins().ishl_imm(addhigh, 32);
let low = pos.ins().x86_pmuludq(arg0, arg1);
pos.func.dfg.replace(inst).iadd(low, high);
}
} else {
unreachable!(
"{} should be encodable; it cannot be legalized by convert_i64x2_imul",
pos.func.dfg.display_inst(inst, None)
);
}
}
}
fn expand_tls_value(
inst: ir::Inst,
func: &mut ir::Function,
_cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) {
use crate::settings::TlsModel;
assert!(
isa.triple().architecture == target_lexicon::Architecture::X86_64,
"Not yet implemented for {:?}",
isa.triple(),
);
if let ir::InstructionData::UnaryGlobalValue {
opcode: ir::Opcode::TlsValue,
global_value,
} = func.dfg[inst]
{
let ctrl_typevar = func.dfg.ctrl_typevar(inst);
assert_eq!(ctrl_typevar, ir::types::I64);
match isa.flags().tls_model() {
TlsModel::None => panic!("tls_model flag is not set."),
TlsModel::ElfGd => {
func.dfg.replace(inst).x86_elf_tls_get_addr(global_value);
}
TlsModel::Macho => {
func.dfg.replace(inst).x86_macho_tls_get_addr(global_value);
}
model => unimplemented!("tls_value for tls model {:?}", model),
}
} else {
unreachable!();
}
}