Skip to content

Commit 8ffc9ed

Browse files
committed
use marker traits to reduce compile times a LOT
1 parent 886a11c commit 8ffc9ed

File tree

3 files changed

+84
-99
lines changed

3 files changed

+84
-99
lines changed

src/lib.rs

Lines changed: 53 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -140,111 +140,66 @@ pub mod scalar;
140140
pub mod sse2;
141141
pub mod sse41;
142142

143+
/// Grouping all the constraints shared by associated types in
144+
/// the Simd trait into this marker trait drastically reduces
145+
/// compile time.
146+
pub trait SimdBase<T,U> :
147+
Copy
148+
+ Debug
149+
+ IndexMut<usize>
150+
+ Add<T, Output = T>
151+
+ Sub<T, Output = T>
152+
+ AddAssign<T>
153+
+ SubAssign<T>
154+
+ BitAnd<T, Output = T>
155+
+ BitOr<T, Output = T>
156+
+ BitXor<T, Output = T>
157+
+ BitAndAssign<T>
158+
+ BitOrAssign<T>
159+
+ BitXorAssign<T>
160+
+ Index<usize, Output = U> {}
161+
162+
pub trait SimdSmallInt<T,U> : SimdBase<T,U>
163+
+ Mul<T, Output = T>
164+
+ MulAssign<T>
165+
+ Not<Output = T>
166+
+ Shl<i32, Output = T>
167+
+ ShlAssign<i32>
168+
+ Shr<i32, Output = T>
169+
+ ShrAssign<i32> {}
170+
171+
pub trait SimdFloat<T,U> : SimdBase<T,U>
172+
+ Mul<T, Output = T>
173+
+ Div<T, Output = T>
174+
+ MulAssign<T>
175+
+ DivAssign<T> {}
176+
143177
pub trait Simd {
144-
type Vi16: Copy
145-
+ Debug
146-
+ Add<Self::Vi16, Output = Self::Vi16>
147-
+ Sub<Self::Vi16, Output = Self::Vi16>
148-
+ Mul<Self::Vi16, Output = Self::Vi16>
149-
+ AddAssign<Self::Vi16>
150-
+ SubAssign<Self::Vi16>
151-
+ MulAssign<Self::Vi16>
152-
+ BitAnd<Self::Vi16, Output = Self::Vi16>
153-
+ BitOr<Self::Vi16, Output = Self::Vi16>
154-
+ BitXor<Self::Vi16, Output = Self::Vi16>
155-
+ BitAndAssign<Self::Vi16>
156-
+ BitOrAssign<Self::Vi16>
157-
+ BitXorAssign<Self::Vi16>
158-
+ Index<usize, Output = i16>
159-
+ IndexMut<usize>
160-
+ Not<Output = Self::Vi16>
161-
+ Shl<i32, Output = Self::Vi16>
162-
+ ShlAssign<i32>
163-
+ Shr<i32, Output = Self::Vi16>
164-
+ ShrAssign<i32>;
165-
/// Vi32 stands for Vector of i32s. Corresponds to __m128i when used
178+
179+
type Vi16: SimdSmallInt<Self::Vi16,i16>;
180+
181+
/// Vector of i32s. Corresponds to __m128i when used
166182
/// with the Sse impl, __m256i when used with Avx2, or a single i32
167183
/// when used with Scalar.
168-
type Vi32: Copy
169-
+ Debug
170-
+ Add<Self::Vi32, Output = Self::Vi32>
171-
+ Sub<Self::Vi32, Output = Self::Vi32>
172-
+ Mul<Self::Vi32, Output = Self::Vi32>
173-
+ AddAssign<Self::Vi32>
174-
+ SubAssign<Self::Vi32>
175-
+ MulAssign<Self::Vi32>
176-
+ BitAnd<Self::Vi32, Output = Self::Vi32>
177-
+ BitOr<Self::Vi32, Output = Self::Vi32>
178-
+ BitXor<Self::Vi32, Output = Self::Vi32>
179-
+ BitAndAssign<Self::Vi32>
180-
+ BitOrAssign<Self::Vi32>
181-
+ BitXorAssign<Self::Vi32>
182-
+ Index<usize, Output = i32>
183-
+ IndexMut<usize>
184-
+ Not<Output = Self::Vi32>
185-
+ Shl<i32, Output = Self::Vi32>
186-
+ ShlAssign<i32>
187-
+ Shr<i32, Output = Self::Vi32>
188-
+ ShrAssign<i32>;
189-
/// Vf32 stands for Vector of f32s. Corresponds to __m128 when used
184+
type Vi32: SimdSmallInt<Self::Vi32,i32>;
185+
186+
/// Vector of i64s. Corresponds to __m128i when used
187+
/// with the Sse impl, __m256i when used with Avx2, or a single i64
188+
/// when used with Scalar.
189+
type Vi64: SimdBase<Self::Vi64,i64>
190+
+ Not<Output = Self::Vi64>;
191+
192+
/// Vector of f32s. Corresponds to __m128 when used
190193
/// with the Sse impl, __m256 when used with Avx2, or a single f32
191194
/// when used with Scalar.
192-
type Vf32: Copy
193-
+ Debug
194-
+ Add<Self::Vf32, Output = Self::Vf32>
195-
+ Sub<Self::Vf32, Output = Self::Vf32>
196-
+ Mul<Self::Vf32, Output = Self::Vf32>
197-
+ Div<Self::Vf32, Output = Self::Vf32>
198-
+ AddAssign<Self::Vf32>
199-
+ SubAssign<Self::Vf32>
200-
+ MulAssign<Self::Vf32>
201-
+ DivAssign<Self::Vf32>
202-
+ BitAnd<Self::Vf32, Output = Self::Vf32>
203-
+ BitOr<Self::Vf32, Output = Self::Vf32>
204-
+ BitXor<Self::Vf32, Output = Self::Vf32>
205-
+ BitAndAssign<Self::Vf32>
206-
+ BitOrAssign<Self::Vf32>
207-
+ BitXorAssign<Self::Vf32>
208-
+ Index<usize, Output = f32>
209-
+ IndexMut<usize>;
195+
type Vf32: SimdFloat<Self::Vf32,f32>;
210196

211-
/// Vi64 stands for Vector of f64s. Corresponds to __m128 when used
212-
/// with the Sse impl, __m256 when used with Avx2, or a single f64
197+
/// Vector of f64s. Corresponds to __m128d when used
198+
/// with the Sse impl, __m256d when used with Avx2, or a single f64
213199
/// when used with Scalar.
214-
type Vf64: Copy
215-
+ Debug
216-
+ Index<usize, Output = f64>
217-
+ IndexMut<usize>
218-
+ Add<Self::Vf64, Output = Self::Vf64>
219-
+ Sub<Self::Vf64, Output = Self::Vf64>
220-
+ Mul<Self::Vf64, Output = Self::Vf64>
221-
+ Div<Self::Vf64, Output = Self::Vf64>
222-
+ AddAssign<Self::Vf64>
223-
+ SubAssign<Self::Vf64>
224-
+ MulAssign<Self::Vf64>
225-
+ DivAssign<Self::Vf64>
226-
+ BitAnd<Self::Vf64, Output = Self::Vf64>
227-
+ BitOr<Self::Vf64, Output = Self::Vf64>
228-
+ BitXor<Self::Vf64, Output = Self::Vf64>
229-
+ BitAndAssign<Self::Vf64>
230-
+ BitOrAssign<Self::Vf64>
231-
+ BitXorAssign<Self::Vf64>;
232-
233-
type Vi64: Copy
234-
+ Debug
235-
+ Index<usize, Output = i64>
236-
+ IndexMut<usize>
237-
+ Add<Self::Vi64, Output = Self::Vi64>
238-
+ Sub<Self::Vi64, Output = Self::Vi64>
239-
+ AddAssign<Self::Vi64>
240-
+ SubAssign<Self::Vi64>
241-
+ BitAnd<Self::Vi64, Output = Self::Vi64>
242-
+ BitOr<Self::Vi64, Output = Self::Vi64>
243-
+ BitXor<Self::Vi64, Output = Self::Vi64>
244-
+ BitAndAssign<Self::Vi64>
245-
+ BitOrAssign<Self::Vi64>
246-
+ BitXorAssign<Self::Vi64>
247-
+ Not<Output = Self::Vi64>;
200+
type Vf64: SimdFloat<Self::Vf64,f64>;
201+
202+
248203
/// The width of the vector lane. Necessary for creating
249204
/// lane width agnostic code.
250205
const VF32_WIDTH: usize;

src/overloads/mod.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,58 +4,89 @@ use core::arch::x86::*;
44
use core::arch::x86_64::*;
55
use core::mem;
66
use core::ops::*;
7+
use super::*;
78

89
// Newtypes for i16 vectors
910
// We have to do this to allow for overloading of
1011
// __m128i etc
1112
#[derive(Copy, Debug, Clone)]
1213
pub struct I16x1(pub i16);
14+
impl SimdBase<I16x1,i16> for I16x1 {}
15+
impl SimdSmallInt<I16x1,i16> for I16x1 {}
1316
#[derive(Copy, Debug, Clone)]
1417
pub struct I16x8(pub __m128i);
18+
impl SimdBase<I16x8,i16> for I16x8 {}
19+
impl SimdSmallInt<I16x8,i16> for I16x8 {}
1520
#[derive(Copy, Debug, Clone)]
1621
pub struct I16x16(pub __m256i);
22+
impl SimdBase<I16x16,i16> for I16x16 {}
23+
impl SimdSmallInt<I16x16,i16> for I16x16 {}
1724

1825
// Newtypes for i32 vectors
1926
// We have to do this to allow for overloading of
2027
// __m128i etc
2128
#[derive(Copy, Debug, Clone)]
2229
pub struct I32x1(pub i32);
30+
impl SimdBase<I32x1,i32> for I32x1 {}
31+
impl SimdSmallInt<I32x1,i32> for I32x1 {}
2332
#[derive(Copy, Debug, Clone)]
2433
pub struct I32x4(pub __m128i);
34+
impl SimdBase<I32x4,i32> for I32x4 {}
35+
impl SimdSmallInt<I32x4,i32> for I32x4 {}
2536
#[derive(Copy, Debug, Clone)]
2637
pub struct I32x4_41(pub __m128i);
38+
impl SimdBase<I32x4_41,i32> for I32x4_41 {}
39+
impl SimdSmallInt<I32x4_41,i32> for I32x4_41 {}
2740
#[derive(Copy, Debug, Clone)]
2841
pub struct I32x8(pub __m256i);
42+
impl SimdBase<I32x8,i32> for I32x8 {}
43+
impl SimdSmallInt<I32x8,i32> for I32x8 {}
2944

3045
// Newtypes for i64 vectors
3146
// We have to do this to allow for overloading of
3247
// __m128i etc
3348
#[derive(Copy, Debug, Clone)]
3449
pub struct I64x1(pub i64);
50+
impl SimdBase<I64x1,i64> for I64x1 {}
3551
#[derive(Copy, Debug, Clone)]
3652
pub struct I64x2(pub __m128i);
53+
impl SimdBase<I64x2,i64> for I64x2 {}
3754
#[derive(Copy, Debug, Clone)]
3855
pub struct I64x2_41(pub __m128i);
56+
impl SimdBase<I64x2_41,i64> for I64x2_41 {}
3957
#[derive(Copy, Debug, Clone)]
4058
pub struct I64x4(pub __m256i);
59+
impl SimdBase<I64x4,i64> for I64x4 {}
4160

4261
// Newtypes for f32 vectors
4362
// We have to do this to allow for overloading of
4463
// __m128 etc
4564
#[derive(Copy, Debug, Clone)]
4665
pub struct F32x1(pub f32);
66+
impl SimdBase<F32x1,f32> for F32x1 {}
67+
impl SimdFloat<F32x1,f32> for F32x1 {}
4768
#[derive(Copy, Debug, Clone)]
4869
pub struct F32x4(pub __m128);
70+
impl SimdBase<F32x4,f32> for F32x4 {}
71+
impl SimdFloat<F32x4,f32> for F32x4 {}
4972
#[derive(Copy, Debug, Clone)]
5073
pub struct F32x8(pub __m256);
74+
impl SimdBase<F32x8,f32> for F32x8 {}
75+
impl SimdFloat<F32x8,f32> for F32x8 {}
5176

5277
// Newtypes for f64 vectors
5378
#[derive(Copy, Debug, Clone)]
5479
pub struct F64x1(pub f64);
80+
impl SimdBase<F64x1,f64> for F64x1 {}
81+
impl SimdFloat<F64x1,f64> for F64x1 {}
5582
#[derive(Copy, Debug, Clone)]
5683
pub struct F64x2(pub __m128d);
84+
impl SimdBase<F64x2,f64> for F64x2 {}
85+
impl SimdFloat<F64x2,f64> for F64x2 {}
5786
#[derive(Copy, Debug, Clone)]
5887
pub struct F64x4(pub __m256d);
88+
impl SimdBase<F64x4,f64> for F64x4 {}
89+
impl SimdFloat<F64x4,f64> for F64x4 {}
5990

6091
mod index;
6192
pub use self::index::*;

src/scalar.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
use super::*;
2-
use crate::libm::*;
32
use crate::overloads::*;
43
use core::mem;
54

0 commit comments

Comments
 (0)