Skip to content

Commit 3036e79

Browse files
beetreesfitzgen
andauthored
Add I128 atomic support to the x64 backend (#9459)
* Add I128 atomic support to the `x64` backend * fix typo in cranelift/codegen/src/isa/x64/inst/emit.rs --------- Co-authored-by: Nick Fitzgerald <[email protected]>
1 parent ba8ed6c commit 3036e79

File tree

16 files changed

+1540
-36
lines changed

16 files changed

+1540
-36
lines changed

cranelift/codegen/meta/src/isa/x86.rs

Lines changed: 47 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ pub(crate) fn define() -> TargetIsa {
1717
"SSSE3: CPUID.01H:ECX.SSSE3[bit 9]",
1818
false,
1919
);
20+
let has_cmpxchg16b = settings.add_bool(
21+
"has_cmpxchg16b",
22+
"Has support for CMPXCHG16b.",
23+
"CMPXCHG16b: CPUID.01H:ECX.CMPXCHG16B[bit 13]",
24+
false,
25+
);
2026
let has_sse41 = settings.add_bool(
2127
"has_sse41",
2228
"Has support for SSE4.1.",
@@ -106,6 +112,7 @@ pub(crate) fn define() -> TargetIsa {
106112
false,
107113
);
108114

115+
settings.add_predicate("use_cmpxchg16b", predicate!(has_cmpxchg16b));
109116
settings.add_predicate("use_ssse3", predicate!(has_ssse3));
110117
settings.add_predicate("use_sse41", predicate!(has_sse41));
111118
settings.add_predicate("use_sse42", predicate!(has_sse41 && has_sse42));
@@ -141,14 +148,30 @@ pub(crate) fn define() -> TargetIsa {
141148
// Intel CPUs
142149

143150
// Netburst
144-
settings.add_preset("nocona", "Nocona microarchitecture.", preset!(sse3));
151+
settings.add_preset(
152+
"nocona",
153+
"Nocona microarchitecture.",
154+
preset!(sse3 && has_cmpxchg16b),
155+
);
145156

146157
// Intel Core 2 Solo/Duo
147-
settings.add_preset("core2", "Core 2 microarchitecture.", preset!(sse3));
148-
settings.add_preset("penryn", "Penryn microarchitecture.", preset!(sse41));
158+
settings.add_preset(
159+
"core2",
160+
"Core 2 microarchitecture.",
161+
preset!(sse3 && has_cmpxchg16b),
162+
);
163+
settings.add_preset(
164+
"penryn",
165+
"Penryn microarchitecture.",
166+
preset!(sse41 && has_cmpxchg16b),
167+
);
149168

150169
// Intel Atom CPUs
151-
let atom = settings.add_preset("atom", "Atom microarchitecture.", preset!(ssse3));
170+
let atom = settings.add_preset(
171+
"atom",
172+
"Atom microarchitecture.",
173+
preset!(ssse3 && has_cmpxchg16b),
174+
);
152175
settings.add_preset("bonnell", "Bonnell microarchitecture.", preset!(atom));
153176
let silvermont = settings.add_preset(
154177
"silvermont",
@@ -186,7 +209,7 @@ pub(crate) fn define() -> TargetIsa {
186209
let nehalem = settings.add_preset(
187210
"nehalem",
188211
"Nehalem microarchitecture.",
189-
preset!(sse42 && has_popcnt),
212+
preset!(sse42 && has_popcnt && has_cmpxchg16b),
190213
);
191214
settings.add_preset("corei7", "Core i7 microarchitecture.", preset!(nehalem));
192215
let westmere = settings.add_preset("westmere", "Westmere microarchitecture.", preset!(nehalem));
@@ -229,7 +252,15 @@ pub(crate) fn define() -> TargetIsa {
229252
let knights_landing = settings.add_preset(
230253
"knl",
231254
"Knights Landing microarchitecture.",
232-
preset!(has_popcnt && has_avx512f && has_fma && has_bmi1 && has_bmi2 && has_lzcnt),
255+
preset!(
256+
has_popcnt
257+
&& has_avx512f
258+
&& has_fma
259+
&& has_bmi1
260+
&& has_bmi2
261+
&& has_lzcnt
262+
&& has_cmpxchg16b
263+
),
233264
);
234265
settings.add_preset(
235266
"knm",
@@ -312,22 +343,22 @@ pub(crate) fn define() -> TargetIsa {
312343
settings.add_preset(
313344
"opteron-sse3",
314345
"Opteron microarchitecture with support for SSE3 instructions.",
315-
preset!(sse3),
346+
preset!(sse3 && has_cmpxchg16b),
316347
);
317348
settings.add_preset(
318349
"k8-sse3",
319350
"K8 Hammer microarchitecture with support for SSE3 instructions.",
320-
preset!(sse3),
351+
preset!(sse3 && has_cmpxchg16b),
321352
);
322353
settings.add_preset(
323354
"athlon64-sse3",
324355
"Athlon 64 microarchitecture with support for SSE3 instructions.",
325-
preset!(sse3),
356+
preset!(sse3 && has_cmpxchg16b),
326357
);
327358
let barcelona = settings.add_preset(
328359
"barcelona",
329360
"Barcelona microarchitecture.",
330-
preset!(has_popcnt && has_lzcnt),
361+
preset!(has_popcnt && has_lzcnt && has_cmpxchg16b),
331362
);
332363
settings.add_preset(
333364
"amdfam10",
@@ -338,7 +369,7 @@ pub(crate) fn define() -> TargetIsa {
338369
let btver1 = settings.add_preset(
339370
"btver1",
340371
"Bobcat microarchitecture.",
341-
preset!(ssse3 && has_lzcnt && has_popcnt),
372+
preset!(ssse3 && has_lzcnt && has_popcnt && has_cmpxchg16b),
342373
);
343374
settings.add_preset(
344375
"btver2",
@@ -349,7 +380,7 @@ pub(crate) fn define() -> TargetIsa {
349380
let bdver1 = settings.add_preset(
350381
"bdver1",
351382
"Bulldozer microarchitecture",
352-
preset!(has_lzcnt && has_popcnt && ssse3),
383+
preset!(has_lzcnt && has_popcnt && ssse3 && has_cmpxchg16b),
353384
);
354385
let bdver2 = settings.add_preset(
355386
"bdver2",
@@ -366,7 +397,9 @@ pub(crate) fn define() -> TargetIsa {
366397
let znver1 = settings.add_preset(
367398
"znver1",
368399
"Zen (first generation) microarchitecture.",
369-
preset!(sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma),
400+
preset!(
401+
sse42 && has_popcnt && has_bmi1 && has_bmi2 && has_lzcnt && has_fma && has_cmpxchg16b
402+
),
370403
);
371404
let znver2 = settings.add_preset(
372405
"znver2",
@@ -397,7 +430,7 @@ pub(crate) fn define() -> TargetIsa {
397430
let x86_64_v2 = settings.add_preset(
398431
"x86-64-v2",
399432
"Generic x86-64 (V2) microarchitecture.",
400-
preset!(sse42 && has_popcnt),
433+
preset!(sse42 && has_popcnt && has_cmpxchg16b),
401434
);
402435
let x86_64_v3 = settings.add_preset(
403436
"x84_64_v3",

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3637,18 +3637,19 @@ pub(crate) fn define(
36373637
let AtomicMem = &TypeVar::new(
36383638
"AtomicMem",
36393639
"Any type that can be stored in memory, which can be used in an atomic operation",
3640-
TypeSetBuilder::new().ints(8..64).build(),
3640+
TypeSetBuilder::new().ints(8..128).build(),
36413641
);
36423642

36433643
ig.push(
36443644
Inst::new(
36453645
"atomic_rmw",
36463646
r#"
36473647
Atomically read-modify-write memory at `p`, with second operand `x`. The old value is
3648-
returned. `p` has the type of the target word size, and `x` may be an integer type of
3649-
8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned value is the
3650-
same as the type of `x`. This operation is sequentially consistent and creates
3651-
happens-before edges that order normal (non-atomic) loads and stores.
3648+
returned. `p` has the type of the target word size, and `x` may be any integer type; note
3649+
that some targets require specific target features to be enabled in order to support 128-bit
3650+
integer atomics. The type of the returned value is the same as the type of `x`. This
3651+
operation is sequentially consistent and creates happens-before edges that order normal
3652+
(non-atomic) loads and stores.
36523653
"#,
36533654
&formats.atomic_rmw,
36543655
)
@@ -3673,11 +3674,11 @@ pub(crate) fn define(
36733674
Perform an atomic compare-and-swap operation on memory at `p`, with expected value `e`,
36743675
storing `x` if the value at `p` equals `e`. The old value at `p` is returned,
36753676
regardless of whether the operation succeeds or fails. `p` has the type of the target
3676-
word size, and `x` and `e` must have the same type and the same size, which may be an
3677-
integer type of 8, 16, 32 or 64 bits, even on a 32-bit target. The type of the returned
3678-
value is the same as the type of `x` and `e`. This operation is sequentially
3679-
consistent and creates happens-before edges that order normal (non-atomic) loads and
3680-
stores.
3677+
word size, and `x` and `e` must have the same type and the same size, which may be any
3678+
integer type; note that some targets require specific target features to be enabled in order
3679+
to support 128-bit integer atomics. The type of the returned value is the same as the type
3680+
of `x` and `e`. This operation is sequentially consistent and creates happens-before edges
3681+
that order normal (non-atomic) loads and stores.
36813682
"#,
36823683
&formats.atomic_cas,
36833684
)
@@ -3702,9 +3703,10 @@ pub(crate) fn define(
37023703
Atomically load from memory at `p`.
37033704
37043705
This is a polymorphic instruction that can load any value type which has a memory
3705-
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
3706-
This operation is sequentially consistent and creates happens-before edges that order
3707-
normal (non-atomic) loads and stores.
3706+
representation. It can only be used for integer types; note that some targets require
3707+
specific target features to be enabled in order to support 128-bit integer atomics. This
3708+
operation is sequentially consistent and creates happens-before edges that order normal
3709+
(non-atomic) loads and stores.
37083710
"#,
37093711
&formats.load_no_offset,
37103712
)
@@ -3726,9 +3728,10 @@ pub(crate) fn define(
37263728
Atomically store `x` to memory at `p`.
37273729
37283730
This is a polymorphic instruction that can store any value type with a memory
3729-
representation. It should only be used for integer types with 8, 16, 32 or 64 bits.
3730-
This operation is sequentially consistent and creates happens-before edges that order
3731-
normal (non-atomic) loads and stores.
3731+
representation. It can only be used for integer types; note that some targets require
3732+
specific target features to be enabled in order to support 128-bit integer atomics This
3733+
operation is sequentially consistent and creates happens-before edges that order normal
3734+
(non-atomic) loads and stores.
37323735
"#,
37333736
&formats.store_no_offset,
37343737
)

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,6 +664,24 @@
664664
(mem SyntheticAmode)
665665
(dst_old WritableReg))
666666

667+
;; A standard (native) `lock cmpxchg16b (amode)`, with register
668+
;; conventions:
669+
;;
670+
;; `mem` (read) address
671+
;; %rbx (low), %rcx (high) (read) replacement value
672+
;; %rax (low), %rdx (high) (modified) in: expected value, out: value that was actually at `dst`
673+
;; %rflags is written. Do not assume anything about it after the instruction.
674+
;;
675+
;; The instruction "succeeded" iff the bits of %rax and %rdx
676+
;; afterwards are the same as they were before.
677+
(LockCmpxchg16b (replacement_low Reg)
678+
(replacement_high Reg)
679+
(expected_low Reg)
680+
(expected_high Reg)
681+
(mem BoxSyntheticAmode)
682+
(dst_old_low WritableReg)
683+
(dst_old_high WritableReg))
684+
667685
;; A synthetic instruction, based on a loop around a native `lock
668686
;; cmpxchg` instruction.
669687
;;
@@ -696,6 +714,46 @@
696714
(temp WritableReg)
697715
(dst_old WritableReg))
698716

717+
;; A synthetic instruction, based on a loop around a native `lock
718+
;; cmpxchg16b` instruction.
719+
;;
720+
;; This is the same as `AtomicRmwSeq`, but for 128-bit integers.
721+
;;
722+
;; For `MachAtomicRmwOp::Xchg`, use `Atomic128XchgSeq` instead.
723+
;;
724+
;; This instruction sequence has fixed register uses as follows:
725+
;; - %rax (low), %rdx (high) (written) the old value at `mem`
726+
;; - %rbx (low), %rcx (high) (written) used as temp registers to hold
727+
;; the replacement value
728+
;; - %rflags is written. Do not assume anything about it after the
729+
;; instruction.
730+
(Atomic128RmwSeq (op MachAtomicRmwOp)
731+
(mem BoxSyntheticAmode)
732+
(operand_low Reg)
733+
(operand_high Reg)
734+
(temp_low WritableReg)
735+
(temp_high WritableReg)
736+
(dst_old_low WritableReg)
737+
(dst_old_high WritableReg))
738+
739+
;; A synthetic instruction, based on a loop around a native `lock
740+
;; cmpxchg16b` instruction.
741+
;;
742+
;; This is `Atomic128XchgSeq` but only for `MachAtomicRmwOp::Xchg`. As
743+
;; the replacement value is the same every time, this instruction doesn't
744+
;; require any temporary registers.
745+
;;
746+
;; This instruction sequence has fixed register uses as follows:
747+
;; - %rax (low), %rdx (high) (written) the old value at `mem`
748+
;; - %rbx (low), %rcx (high) (read) the replacement value
749+
;; - %rflags is written. Do not assume anything about it after the
750+
;; instruction.
751+
(Atomic128XchgSeq (mem SyntheticAmode)
752+
(operand_low Reg)
753+
(operand_high Reg)
754+
(dst_old_low WritableReg)
755+
(dst_old_high WritableReg))
756+
699757
;; A memory fence (mfence, lfence or sfence).
700758
(Fence (kind FenceKind))
701759

@@ -762,6 +820,11 @@
762820
(type BoxCallIndInfo extern (enum))
763821
(type BoxReturnCallInfo extern (enum))
764822
(type BoxReturnCallIndInfo extern (enum))
823+
(type BoxSyntheticAmode extern (enum))
824+
825+
(decl pure box_synthetic_amode (SyntheticAmode) BoxSyntheticAmode)
826+
(extern constructor box_synthetic_amode box_synthetic_amode)
827+
(convert SyntheticAmode BoxSyntheticAmode box_synthetic_amode)
765828

766829
;; Get the `OperandSize` for a given `Type`, rounding smaller types up to 32 bits.
767830
(decl operand_size_of_type_32_64 (Type) OperandSize)
@@ -1862,6 +1925,9 @@
18621925
(decl pure use_avx2 () bool)
18631926
(extern constructor use_avx2 use_avx2)
18641927

1928+
(decl pure use_cmpxchg16b () bool)
1929+
(extern constructor use_cmpxchg16b use_cmpxchg16b)
1930+
18651931
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
18661932

18671933
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
@@ -5214,13 +5280,54 @@
52145280
(_ Unit (emit (MInst.LockCmpxchg ty replacement expected addr dst))))
52155281
dst))
52165282

5283+
(decl x64_cmpxchg16b (ValueRegs ValueRegs SyntheticAmode) ValueRegs)
5284+
(rule (x64_cmpxchg16b expected replacement addr)
5285+
(let ((expected_low Gpr (value_regs_get_gpr expected 0))
5286+
(expected_high Gpr (value_regs_get_gpr expected 1))
5287+
(replacement_low Gpr (value_regs_get_gpr replacement 0))
5288+
(replacement_high Gpr (value_regs_get_gpr replacement 1))
5289+
(dst_low WritableGpr (temp_writable_gpr))
5290+
(dst_high WritableGpr (temp_writable_gpr))
5291+
(_ Unit (emit (MInst.LockCmpxchg16b replacement_low replacement_high expected_low expected_high addr dst_low dst_high))))
5292+
(value_regs dst_low dst_high)))
5293+
52175294
(decl x64_atomic_rmw_seq (Type MachAtomicRmwOp SyntheticAmode Gpr) Gpr)
52185295
(rule (x64_atomic_rmw_seq ty op mem input)
52195296
(let ((dst WritableGpr (temp_writable_gpr))
52205297
(tmp WritableGpr (temp_writable_gpr))
52215298
(_ Unit (emit (MInst.AtomicRmwSeq ty op mem input tmp dst))))
52225299
dst))
52235300

5301+
(decl x64_atomic_128_rmw_seq (MachAtomicRmwOp SyntheticAmode ValueRegs) ValueRegs)
5302+
(rule (x64_atomic_128_rmw_seq op mem input)
5303+
(let ((dst_low WritableGpr (temp_writable_gpr))
5304+
(dst_high WritableGpr (temp_writable_gpr))
5305+
(tmp_low WritableGpr (temp_writable_gpr))
5306+
(tmp_high WritableGpr (temp_writable_gpr))
5307+
(input_low Gpr (value_regs_get_gpr input 0))
5308+
(input_high Gpr (value_regs_get_gpr input 1))
5309+
(_ Unit (emit (MInst.Atomic128RmwSeq op mem input_low input_high tmp_low tmp_high dst_low dst_high))))
5310+
(value_regs dst_low dst_high)))
5311+
5312+
(rule 1 (x64_atomic_128_rmw_seq (mach_atomic_rmw_op_xchg) mem input)
5313+
(let ((dst_low WritableGpr (temp_writable_gpr))
5314+
(dst_high WritableGpr (temp_writable_gpr))
5315+
(input_low Gpr (value_regs_get_gpr input 0))
5316+
(input_high Gpr (value_regs_get_gpr input 1))
5317+
(_ Unit (emit (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))
5318+
(value_regs dst_low dst_high)))
5319+
5320+
(decl x64_atomic_128_store_seq (SyntheticAmode ValueRegs) SideEffectNoResult)
5321+
(rule (x64_atomic_128_store_seq mem input)
5322+
(let ((dst_low WritableGpr (temp_writable_gpr))
5323+
(dst_high WritableGpr (temp_writable_gpr))
5324+
(input_low Gpr (value_regs_get_gpr input 0))
5325+
(input_high Gpr (value_regs_get_gpr input 1)))
5326+
(SideEffectNoResult.Inst (MInst.Atomic128XchgSeq mem input_low input_high dst_low dst_high))))
5327+
5328+
(decl mach_atomic_rmw_op_xchg () MachAtomicRmwOp)
5329+
(extern extractor mach_atomic_rmw_op_xchg mach_atomic_rmw_op_is_xchg)
5330+
52245331
;; CLIF IR has one enumeration for atomic operations (`AtomicRmwOp`) while the
52255332
;; mach backend has another (`MachAtomicRmwOp`)--this converts one to the other.
52265333
(type MachAtomicRmwOp extern (enum))

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ pub enum CmpOpcode {
959959
pub(crate) enum InstructionSet {
960960
SSE,
961961
SSE2,
962+
CMPXCHG16b,
962963
SSSE3,
963964
SSE41,
964965
SSE42,

0 commit comments

Comments
 (0)