Skip to content

Commit 4748c53

Browse files
authored
Add llvm.x86.vcvtps2ph.128 (#1613)
* Add `llvm.x86.vcvtps2ph.128` * `cargo fmt` * Test `_mm_cvtps_ph`
1 parent 812320a commit 4748c53

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed

example/std_example.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,9 @@ unsafe fn test_simd() {
259259
test_mm_cvttps_epi32();
260260
test_mm_cvtsi128_si64();
261261

262+
#[cfg(not(jit))]
263+
test_mm_cvtps_ph();
264+
262265
test_mm_extract_epi8();
263266
test_mm_insert_epi16();
264267
test_mm_shuffle_epi8();
@@ -558,6 +561,21 @@ unsafe fn test_mm_cvttps_epi32() {
558561
}
559562
}
560563

564+
#[cfg(target_arch = "x86_64")]
565+
#[target_feature(enable = "f16c")]
566+
#[cfg(not(jit))]
567+
unsafe fn test_mm_cvtps_ph() {
568+
const F16_ONE: i16 = 0x3c00;
569+
const F16_TWO: i16 = 0x4000;
570+
const F16_THREE: i16 = 0x4200;
571+
const F16_FOUR: i16 = 0x4400;
572+
573+
let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
574+
let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
575+
let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
576+
assert_eq_m128i(r, e);
577+
}
578+
561579
fn test_checked_mul() {
562580
let u: Option<u8> = u8::from_str_radix("1000", 10).ok();
563581
assert_eq!(u, None);

src/intrinsics/llvm_x86.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1313,6 +1313,35 @@ pub(super) fn codegen_x86_llvm_intrinsic_call<'tcx>(
13131313
ret.write_cvalue_transmute(fx, res);
13141314
}
13151315

1316+
"llvm.x86.vcvtps2ph.128" => {
1317+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_ph
1318+
intrinsic_args!(fx, args => (a, _imm8); intrinsic);
1319+
let a = a.load_scalar(fx);
1320+
1321+
let imm8 =
1322+
if let Some(imm8) = crate::constant::mir_operand_get_const_val(fx, &args[1].node) {
1323+
imm8
1324+
} else {
1325+
fx.tcx
1326+
.dcx()
1327+
.span_fatal(span, "Index argument for `_mm_cvtps_ph` is not a constant");
1328+
};
1329+
1330+
let imm8 = imm8.to_u32();
1331+
1332+
codegen_inline_asm_inner(
1333+
fx,
1334+
&[InlineAsmTemplatePiece::String(format!("vcvtps2ph xmm0, xmm0, {imm8}").into())],
1335+
&[CInlineAsmOperand::InOut {
1336+
reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::xmm0)),
1337+
_late: true,
1338+
in_value: a,
1339+
out_place: Some(ret),
1340+
}],
1341+
InlineAsmOptions::NOSTACK | InlineAsmOptions::PURE | InlineAsmOptions::NOMEM,
1342+
);
1343+
}
1344+
13161345
_ => {
13171346
fx.tcx
13181347
.dcx()

0 commit comments

Comments
 (0)