Skip to content

Commit 5fdd094

Browse files
authored
[clang][CodeGen] Emit atomic IR in place of optimized libcalls. (#73176)
In the beginning, Clang only emitted atomic IR for operations it knew the underlying microarch had instructions for, meaning it required significant knowledge of the target. Later, the backend acquired the ability to lower IR to libcalls. To avoid duplicating logic and improve logic locality, we'd like to move as much as possible to the backend. There are many ways to describe this change. For example, this change reduces the variables Clang uses to decide whether to emit libcalls or IR, down to only the atomic's size.
1 parent 20948df commit 5fdd094

13 files changed

+497
-569
lines changed

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 46 additions & 275 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/LoongArch/atomics.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
1212
// LA32: load atomic i8, ptr %a seq_cst, align 1
1313
// LA32: store atomic i8 %b, ptr %a seq_cst, align 1
14-
// LA32: atomicrmw add ptr %a, i8 %b seq_cst
14+
// LA32: atomicrmw add ptr %a, i8 %b seq_cst, align 1
1515
// LA64: load atomic i8, ptr %a seq_cst, align 1
1616
// LA64: store atomic i8 %b, ptr %a seq_cst, align 1
17-
// LA64: atomicrmw add ptr %a, i8 %b seq_cst
17+
// LA64: atomicrmw add ptr %a, i8 %b seq_cst, align 1
1818
__c11_atomic_load(a, memory_order_seq_cst);
1919
__c11_atomic_store(a, b, memory_order_seq_cst);
2020
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
@@ -23,22 +23,22 @@ void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
2323
void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
2424
// LA32: load atomic i32, ptr %a seq_cst, align 4
2525
// LA32: store atomic i32 %b, ptr %a seq_cst, align 4
26-
// LA32: atomicrmw add ptr %a, i32 %b seq_cst
26+
// LA32: atomicrmw add ptr %a, i32 %b seq_cst, align 4
2727
// LA64: load atomic i32, ptr %a seq_cst, align 4
2828
// LA64: store atomic i32 %b, ptr %a seq_cst, align 4
29-
// LA64: atomicrmw add ptr %a, i32 %b seq_cst
29+
// LA64: atomicrmw add ptr %a, i32 %b seq_cst, align 4
3030
__c11_atomic_load(a, memory_order_seq_cst);
3131
__c11_atomic_store(a, b, memory_order_seq_cst);
3232
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
3333
}
3434

3535
void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
36-
// LA32: call i64 @__atomic_load_8
37-
// LA32: call void @__atomic_store_8
38-
// LA32: call i64 @__atomic_fetch_add_8
36+
// LA32: load atomic i64, ptr %a seq_cst, align 8
37+
// LA32: store atomic i64 %b, ptr %a seq_cst, align 8
38+
// LA32: atomicrmw add ptr %a, i64 %b seq_cst, align 8
3939
// LA64: load atomic i64, ptr %a seq_cst, align 8
4040
// LA64: store atomic i64 %b, ptr %a seq_cst, align 8
41-
// LA64: atomicrmw add ptr %a, i64 %b seq_cst
41+
// LA64: atomicrmw add ptr %a, i64 %b seq_cst, align 8
4242
__c11_atomic_load(a, memory_order_seq_cst);
4343
__c11_atomic_store(a, b, memory_order_seq_cst);
4444
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);

clang/test/CodeGen/PowerPC/quadword-atomics.c

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,18 @@
11
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
2-
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-QUADWORD-ATOMICS
2+
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
3+
// RUN: --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
34
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \
4-
// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
5+
// RUN: -emit-llvm -o - %s | FileCheck %s \
6+
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
57
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
6-
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
8+
// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s \
9+
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
710
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
8-
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64
11+
// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
12+
// RUN: --check-prefixes=PPC64,PPC64-NO-QUADWORD-ATOMICS
913
// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \
10-
// RUN: -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s \
11-
// RUN: --check-prefix=PPC64-QUADWORD-ATOMICS
14+
// RUN: -mabi=quadword-atomics -target-cpu pwr8 -emit-llvm -o - %s | \
15+
// RUN: FileCheck %s --check-prefixes=PPC64,PPC64-QUADWORD-ATOMICS
1216

1317

1418
typedef struct {
@@ -19,66 +23,48 @@ typedef _Atomic(Q) AtomicQ;
1923

2024
typedef __int128_t int128_t;
2125

22-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_load(
23-
// PPC64-QUADWORD-ATOMICS: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
24-
//
2526
// PPC64-LABEL: @test_load(
26-
// PPC64: call void @__atomic_load(i64 noundef 16, ptr noundef [[TMP3:%.*]], ptr noundef [[TMP4:%.*]], i32 noundef signext 2)
27+
// PPC64: [[TMP3:%.*]] = load atomic i128, ptr [[TMP1:%.*]] acquire, align 16
2728
//
2829
Q test_load(AtomicQ *ptr) {
2930
// expected-no-diagnostics
3031
return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE);
3132
}
3233

33-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_store(
34-
// PPC64-QUADWORD-ATOMICS: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
35-
//
3634
// PPC64-LABEL: @test_store(
37-
// PPC64: call void @__atomic_store(i64 noundef 16, ptr noundef [[TMP6:%.*]], ptr noundef [[TMP7:%.*]], i32 noundef signext 3)
35+
// PPC64: store atomic i128 [[TMP6:%.*]], ptr [[TMP4:%.*]] release, align 16
3836
//
3937
void test_store(Q val, AtomicQ *ptr) {
4038
// expected-no-diagnostics
4139
__c11_atomic_store(ptr, val, __ATOMIC_RELEASE);
4240
}
4341

44-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_add(
45-
// PPC64-QUADWORD-ATOMICS: [[TMP3:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
46-
//
4742
// PPC64-LABEL: @test_add(
48-
// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(ptr noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0)
43+
// PPC64: [[ATOMICRMW:%.*]] = atomicrmw add ptr [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16
4944
//
5045
void test_add(_Atomic(int128_t) *ptr, int128_t x) {
5146
// expected-no-diagnostics
5247
__c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED);
5348
}
5449

55-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_xchg(
56-
// PPC64-QUADWORD-ATOMICS: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
57-
//
5850
// PPC64-LABEL: @test_xchg(
59-
// PPC64: call void @__atomic_exchange(i64 noundef 16, ptr noundef [[TMP7:%.*]], ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], i32 noundef signext 5)
51+
// PPC64: [[TMP8:%.*]] = atomicrmw xchg ptr [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16
6052
//
6153
Q test_xchg(AtomicQ *ptr, Q new) {
6254
// expected-no-diagnostics
6355
return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST);
6456
}
6557

66-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg(
67-
// PPC64-QUADWORD-ATOMICS: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
68-
//
6958
// PPC64-LABEL: @test_cmpxchg(
70-
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
59+
// PPC64: [[TMP10:%.*]] = cmpxchg ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
7160
//
7261
int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) {
7362
// expected-no-diagnostics
7463
return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
7564
}
7665

77-
// PPC64-QUADWORD-ATOMICS-LABEL: @test_cmpxchg_weak(
78-
// PPC64-QUADWORD-ATOMICS: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
79-
//
8066
// PPC64-LABEL: @test_cmpxchg_weak(
81-
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[TMP8:%.*]], ptr noundef [[TMP9:%.*]], ptr noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0)
67+
// PPC64: [[TMP10:%.*]] = cmpxchg weak ptr [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16
8268
//
8369
int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
8470
// expected-no-diagnostics
@@ -88,8 +74,8 @@ int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) {
8874
// PPC64-QUADWORD-ATOMICS-LABEL: @is_lock_free(
8975
// PPC64-QUADWORD-ATOMICS: ret i32 1
9076
//
91-
// PPC64-LABEL: @is_lock_free(
92-
// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
77+
// PPC64-NO-QUADWORD-ATOMICS-LABEL: @is_lock_free(
78+
// PPC64-NO-QUADWORD-ATOMICS: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, ptr noundef null)
9379
//
9480
int is_lock_free() {
9581
AtomicQ q;
Lines changed: 17 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,68 +1,34 @@
11
// RUN: %clang_cc1 -triple riscv32 -O1 -emit-llvm %s -o - \
2-
// RUN: | FileCheck %s -check-prefix=RV32I
2+
// RUN: -verify=no-atomics
33
// RUN: %clang_cc1 -triple riscv32 -target-feature +a -O1 -emit-llvm %s -o - \
4-
// RUN: | FileCheck %s -check-prefix=RV32IA
4+
// RUN: -verify=small-atomics
55
// RUN: %clang_cc1 -triple riscv64 -O1 -emit-llvm %s -o - \
6-
// RUN: | FileCheck %s -check-prefix=RV64I
6+
// RUN: -verify=no-atomics
77
// RUN: %clang_cc1 -triple riscv64 -target-feature +a -O1 -emit-llvm %s -o - \
8-
// RUN: | FileCheck %s -check-prefix=RV64IA
8+
// RUN: -verify=all-atomics
99

10-
// This test demonstrates that MaxAtomicInlineWidth is set appropriately when
11-
// the atomics instruction set extension is enabled.
10+
// all-atomics-no-diagnostics
1211

1312
#include <stdatomic.h>
1413
#include <stdint.h>
1514

1615
void test_i8_atomics(_Atomic(int8_t) * a, int8_t b) {
17-
// RV32I: call zeroext i8 @__atomic_load_1
18-
// RV32I: call void @__atomic_store_1
19-
// RV32I: call zeroext i8 @__atomic_fetch_add_1
20-
// RV32IA: load atomic i8, ptr %a seq_cst, align 1
21-
// RV32IA: store atomic i8 %b, ptr %a seq_cst, align 1
22-
// RV32IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
23-
// RV64I: call zeroext i8 @__atomic_load_1
24-
// RV64I: call void @__atomic_store_1
25-
// RV64I: call zeroext i8 @__atomic_fetch_add_1
26-
// RV64IA: load atomic i8, ptr %a seq_cst, align 1
27-
// RV64IA: store atomic i8 %b, ptr %a seq_cst, align 1
28-
// RV64IA: atomicrmw add ptr %a, i8 %b seq_cst, align 1
29-
__c11_atomic_load(a, memory_order_seq_cst);
30-
__c11_atomic_store(a, b, memory_order_seq_cst);
31-
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
16+
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
17+
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
18+
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (1 bytes) exceeds the max lock-free size (0 bytes)}}
3219
}
3320

3421
void test_i32_atomics(_Atomic(int32_t) * a, int32_t b) {
35-
// RV32I: call i32 @__atomic_load_4
36-
// RV32I: call void @__atomic_store_4
37-
// RV32I: call i32 @__atomic_fetch_add_4
38-
// RV32IA: load atomic i32, ptr %a seq_cst, align 4
39-
// RV32IA: store atomic i32 %b, ptr %a seq_cst, align 4
40-
// RV32IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
41-
// RV64I: call signext i32 @__atomic_load_4
42-
// RV64I: call void @__atomic_store_4
43-
// RV64I: call signext i32 @__atomic_fetch_add_4
44-
// RV64IA: load atomic i32, ptr %a seq_cst, align 4
45-
// RV64IA: store atomic i32 %b, ptr %a seq_cst, align 4
46-
// RV64IA: atomicrmw add ptr %a, i32 %b seq_cst, align 4
47-
__c11_atomic_load(a, memory_order_seq_cst);
48-
__c11_atomic_store(a, b, memory_order_seq_cst);
49-
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
22+
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
23+
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
24+
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (4 bytes) exceeds the max lock-free size (0 bytes)}}
5025
}
5126

5227
void test_i64_atomics(_Atomic(int64_t) * a, int64_t b) {
53-
// RV32I: call i64 @__atomic_load_8
54-
// RV32I: call void @__atomic_store_8
55-
// RV32I: call i64 @__atomic_fetch_add_8
56-
// RV32IA: call i64 @__atomic_load_8
57-
// RV32IA: call void @__atomic_store_8
58-
// RV32IA: call i64 @__atomic_fetch_add_8
59-
// RV64I: call i64 @__atomic_load_8
60-
// RV64I: call void @__atomic_store_8
61-
// RV64I: call i64 @__atomic_fetch_add_8
62-
// RV64IA: load atomic i64, ptr %a seq_cst, align 8
63-
// RV64IA: store atomic i64 %b, ptr %a seq_cst, align 8
64-
// RV64IA: atomicrmw add ptr %a, i64 %b seq_cst, align 8
65-
__c11_atomic_load(a, memory_order_seq_cst);
66-
__c11_atomic_store(a, b, memory_order_seq_cst);
67-
__c11_atomic_fetch_add(a, b, memory_order_seq_cst);
28+
__c11_atomic_load(a, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
29+
// small-atomics-warning@28 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
30+
__c11_atomic_store(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
31+
// small-atomics-warning@30 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
32+
__c11_atomic_fetch_add(a, b, memory_order_seq_cst); // no-atomics-warning {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (0 bytes)}}
33+
// small-atomics-warning@32 {{large atomic operation may incur significant performance penalty; the access size (8 bytes) exceeds the max lock-free size (4 bytes)}}
6834
}

0 commit comments

Comments
 (0)