Skip to content

Commit f920b86

Browse files
committed
Add SSE2 trivial aliases and conversions.
`_mm_cvtsd_f64`, `_mm_cvtsd_si64x` and `_mm_cvttsd_si64x`. See #40.
1 parent 46d64f0 commit f920b86

File tree

1 file changed

+42
-4
lines changed

1 file changed

+42
-4
lines changed

src/x86/sse2.rs

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1795,6 +1795,13 @@ pub unsafe fn _mm_cvtsd_si64(a: f64x2) -> i64 {
17951795
cvtsd2si64(a)
17961796
}
17971797

1798+
/// Alias for [`_mm_cvtsd_si64`](fn._mm_cvtsd_si64_ss.html).
1799+
#[cfg(target_arch = "x86_64")]
1800+
#[inline(always)]
1801+
#[target_feature = "+sse2"]
1802+
#[cfg_attr(test, assert_instr(cvtsd2si))]
1803+
pub unsafe fn _mm_cvtsd_si64x(a: f64x2) -> i64 { _mm_cvtsd_si64(a) }
1804+
17981805
/// Convert the lower double-precision (64-bit) floating-point element in `b`
17991806
/// to a single-precision (32-bit) floating-point element, store the result in
18001807
/// the lower element of the return value, and copy the upper element from `a`
@@ -1806,6 +1813,14 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 {
18061813
cvtsd2ss(a, b)
18071814
}
18081815

1816+
/// Return the lower double-precision (64-bit) floating-point element of "a".
1817+
#[inline(always)]
1818+
#[target_feature = "+sse2"]
1819+
// no particular instruction to test
1820+
pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 {
1821+
a.extract(0)
1822+
}
1823+
18091824
/// Convert the lower single-precision (32-bit) floating-point element in `b`
18101825
/// to a double-precision (64-bit) floating-point element, store the result in
18111826
/// the lower element of the return value, and copy the upper element from `a`
@@ -1845,6 +1860,13 @@ pub unsafe fn _mm_cvttsd_si64(a: f64x2) -> i64 {
18451860
cvttsd2si64(a)
18461861
}
18471862

1863+
/// Alias for [`_mm_cvttsd_si64`](fn._mm_cvttsd_si64_ss.html).
1864+
#[cfg(target_arch = "x86_64")]
1865+
#[inline(always)]
1866+
#[target_feature = "+sse2"]
1867+
#[cfg_attr(test, assert_instr(cvttsd2si))]
1868+
pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 { _mm_cvttsd_si64(a) }
1869+
18481870
/// Convert packed single-precision (32-bit) floating-point elements in `a` to
18491871
/// packed 32-bit integers with truncation.
18501872
#[inline(always)]
@@ -3989,8 +4011,14 @@ mod tests {
39894011

39904012
let r = sse2::_mm_cvtsd_si64(f64x2::new(f64::MAX, f64::MIN));
39914013
assert_eq!(r, i64::MIN);
4014+
}
4015+
4016+
#[cfg(target_arch = "x86_64")]
4017+
#[simd_test = "sse2"]
4018+
unsafe fn _mm_cvtsd_si64x() {
4019+
use std::{f64, i64};
39924020

3993-
let r = sse2::_mm_cvtsd_si64(f64x2::new(f64::NAN, f64::NAN));
4021+
let r = sse2::_mm_cvtsd_si64x(f64x2::new(f64::NAN, f64::NAN));
39944022
assert_eq!(r, i64::MIN);
39954023
}
39964024

@@ -4022,6 +4050,12 @@ mod tests {
40224050
);
40234051
}
40244052

4053+
#[simd_test = "sse2"]
4054+
unsafe fn _mm_cvtsd_f64() {
4055+
let r = sse2::_mm_cvtsd_f64(f64x2::new(-1.1, 2.2));
4056+
assert_eq!(r, -1.1);
4057+
}
4058+
40254059
#[simd_test = "sse2"]
40264060
unsafe fn _mm_cvtss_sd() {
40274061
use std::{f32, f64};
@@ -4068,14 +4102,18 @@ mod tests {
40684102
#[cfg(target_arch = "x86_64")]
40694103
#[simd_test = "sse2"]
40704104
unsafe fn _mm_cvttsd_si64() {
4071-
use std::{f64, i64};
4072-
40734105
let a = f64x2::new(-1.1, 2.2);
40744106
let r = sse2::_mm_cvttsd_si64(a);
40754107
assert_eq!(r, -1_i64);
4108+
}
4109+
4110+
#[cfg(target_arch = "x86_64")]
4111+
#[simd_test = "sse2"]
4112+
unsafe fn _mm_cvttsd_si64x() {
4113+
use std::{f64, i64};
40764114

40774115
let a = f64x2::new(f64::NEG_INFINITY, f64::NAN);
4078-
let r = sse2::_mm_cvttsd_si64(a);
4116+
let r = sse2::_mm_cvttsd_si64x(a);
40794117
assert_eq!(r, i64::MIN);
40804118
}
40814119

0 commit comments

Comments
 (0)