Skip to content

Commit 0022382

Browse files
committed
Improve code generation for thread_local variables:
Summary: * Don't bother using a thread wrapper when the variable is known to have constant initialization. * Emit the thread wrapper as discardable-if-unused in TUs that don't contain a definition of the thread_local variable. * Don't emit the thread wrapper at all if the thread_local variable is unused and discardable; it will be emitted by all TUs that need it. Reviewers: rjmccall, jdoerfert Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67429 llvm-svn: 371767
1 parent 5806022 commit 0022382

10 files changed

+165
-62
lines changed

clang/include/clang/Basic/Linkage.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ inline bool isDiscardableGVALinkage(GVALinkage L) {
8282
return L <= GVA_DiscardableODR;
8383
}
8484

85+
/// Do we know that this will be the only definition of this symbol (excluding
86+
/// inlining-only definitions)?
87+
inline bool isUniqueGVALinkage(GVALinkage L) {
88+
return L == GVA_Internal || L == GVA_StrongExternal;
89+
}
90+
8591
inline bool isExternallyVisible(Linkage L) {
8692
return L >= VisibleNoLinkage;
8793
}

clang/lib/CodeGen/CGCXXABI.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -577,7 +577,7 @@ class CGCXXABI {
577577

578578
// Determine if references to thread_local global variables can be made
579579
// directly or require access through a thread wrapper function.
580-
virtual bool usesThreadWrapperFunction() const = 0;
580+
virtual bool usesThreadWrapperFunction(const VarDecl *VD) const = 0;
581581

582582
/// Emit a reference to a non-local thread_local variable (including
583583
/// triggering the initialization of all thread_local variables in its

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2361,7 +2361,7 @@ static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF,
23612361

23622362
// If it's thread_local, emit a call to its wrapper function instead.
23632363
if (VD->getTLSKind() == VarDecl::TLS_Dynamic &&
2364-
CGF.CGM.getCXXABI().usesThreadWrapperFunction())
2364+
CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD))
23652365
return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T);
23662366
// Check if the variable is marked as declare target with link clause in
23672367
// device codegen.

clang/lib/CodeGen/ItaniumCXXABI.cpp

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
4343
/// VTables - All the vtables which have been defined.
4444
llvm::DenseMap<const CXXRecordDecl *, llvm::GlobalVariable *> VTables;
4545

46+
/// All the thread wrapper functions that have been used.
47+
llvm::SmallVector<std::pair<const VarDecl *, llvm::Function *>, 8>
48+
ThreadWrappers;
49+
4650
protected:
4751
bool UseARMMethodPtrABI;
4852
bool UseARMGuardVarABI;
@@ -322,7 +326,42 @@ class ItaniumCXXABI : public CodeGen::CGCXXABI {
322326
ArrayRef<llvm::Function *> CXXThreadLocalInits,
323327
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
324328

325-
bool usesThreadWrapperFunction() const override { return true; }
329+
/// Determine whether we will definitely emit this variable with a constant
330+
/// initializer, either because the language semantics demand it or because
331+
/// we know that the initializer is a constant.
332+
bool isEmittedWithConstantInitializer(const VarDecl *VD) const {
333+
VD = VD->getMostRecentDecl();
334+
if (VD->hasAttr<ConstInitAttr>())
335+
return true;
336+
337+
// All later checks examine the initializer specified on the variable. If
338+
// the variable is weak, such examination would not be correct.
339+
if (VD->isWeak() || VD->hasAttr<SelectAnyAttr>())
340+
return false;
341+
342+
const VarDecl *InitDecl = VD->getInitializingDeclaration();
343+
if (!InitDecl)
344+
return false;
345+
346+
// If there's no initializer to run, this is constant initialization.
347+
if (!InitDecl->hasInit())
348+
return true;
349+
350+
// If we have the only definition, we don't need a thread wrapper if we
351+
// will emit the value as a constant.
352+
if (isUniqueGVALinkage(getContext().GetGVALinkageForVariable(VD)))
353+
return !VD->getType().isDestructedType() && InitDecl->evaluateValue();
354+
355+
// Otherwise, we need a thread wrapper unless we know that every
356+
// translation unit will emit the value as a constant. We rely on
357+
// ICE-ness not varying between translation units, which isn't actually
358+
// guaranteed by the standard but is necessary for sanity.
359+
return InitDecl->isInitKnownICE() && InitDecl->isInitICE();
360+
}
361+
362+
bool usesThreadWrapperFunction(const VarDecl *VD) const override {
363+
return !isEmittedWithConstantInitializer(VD);
364+
}
326365
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
327366
QualType LValType) override;
328367

@@ -2456,9 +2495,6 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
24562495

24572496
CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Wrapper);
24582497

2459-
if (VD->hasDefinition())
2460-
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
2461-
24622498
// Always resolve references to the wrapper at link time.
24632499
if (!Wrapper->hasLocalLinkage())
24642500
if (!isThreadWrapperReplaceable(VD, CGM) ||
@@ -2471,6 +2507,8 @@ ItaniumCXXABI::getOrCreateThreadLocalWrapper(const VarDecl *VD,
24712507
Wrapper->setCallingConv(llvm::CallingConv::CXX_FAST_TLS);
24722508
Wrapper->addFnAttr(llvm::Attribute::NoUnwind);
24732509
}
2510+
2511+
ThreadWrappers.push_back({VD, Wrapper});
24742512
return Wrapper;
24752513
}
24762514

@@ -2519,20 +2557,40 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
25192557
}
25202558
}
25212559

2522-
// Emit thread wrappers.
2560+
// Create declarations for thread wrappers for all thread-local variables
2561+
// with non-discardable definitions in this translation unit.
25232562
for (const VarDecl *VD : CXXThreadLocals) {
2563+
if (VD->hasDefinition() &&
2564+
!isDiscardableGVALinkage(getContext().GetGVALinkageForVariable(VD))) {
2565+
llvm::GlobalValue *GV = CGM.GetGlobalValue(CGM.getMangledName(VD));
2566+
getOrCreateThreadLocalWrapper(VD, GV);
2567+
}
2568+
}
2569+
2570+
// Emit all referenced thread wrappers.
2571+
for (auto VDAndWrapper : ThreadWrappers) {
2572+
const VarDecl *VD = VDAndWrapper.first;
25242573
llvm::GlobalVariable *Var =
25252574
cast<llvm::GlobalVariable>(CGM.GetGlobalValue(CGM.getMangledName(VD)));
2526-
llvm::Function *Wrapper = getOrCreateThreadLocalWrapper(VD, Var);
2575+
llvm::Function *Wrapper = VDAndWrapper.second;
25272576

25282577
// Some targets require that all access to thread local variables go through
25292578
// the thread wrapper. This means that we cannot attempt to create a thread
25302579
// wrapper or a thread helper.
2531-
if (isThreadWrapperReplaceable(VD, CGM) && !VD->hasDefinition()) {
2532-
Wrapper->setLinkage(llvm::Function::ExternalLinkage);
2533-
continue;
2580+
if (!VD->hasDefinition()) {
2581+
if (isThreadWrapperReplaceable(VD, CGM)) {
2582+
Wrapper->setLinkage(llvm::Function::ExternalLinkage);
2583+
continue;
2584+
}
2585+
2586+
// If this isn't a TU in which this variable is defined, the thread
2587+
// wrapper is discardable.
2588+
if (Wrapper->getLinkage() == llvm::Function::WeakODRLinkage)
2589+
Wrapper->setLinkage(llvm::Function::LinkOnceODRLinkage);
25342590
}
25352591

2592+
CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Wrapper);
2593+
25362594
// Mangle the name for the thread_local initialization function.
25372595
SmallString<256> InitFnName;
25382596
{
@@ -2547,7 +2605,10 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
25472605
// produce a declaration of the initialization function.
25482606
llvm::GlobalValue *Init = nullptr;
25492607
bool InitIsInitFunc = false;
2550-
if (VD->hasDefinition()) {
2608+
bool HasConstantInitialization = false;
2609+
if (isEmittedWithConstantInitializer(VD)) {
2610+
HasConstantInitialization = true;
2611+
} else if (VD->hasDefinition()) {
25512612
InitIsInitFunc = true;
25522613
llvm::Function *InitFuncToUse = InitFunc;
25532614
if (isTemplateInstantiation(VD->getTemplateSpecializationKind()))
@@ -2576,7 +2637,9 @@ void ItaniumCXXABI::EmitThreadLocalInitFuncs(
25762637
llvm::LLVMContext &Context = CGM.getModule().getContext();
25772638
llvm::BasicBlock *Entry = llvm::BasicBlock::Create(Context, "", Wrapper);
25782639
CGBuilderTy Builder(CGM, Entry);
2579-
if (InitIsInitFunc) {
2640+
if (HasConstantInitialization) {
2641+
// No dynamic initialization to invoke.
2642+
} else if (InitIsInitFunc) {
25802643
if (Init) {
25812644
llvm::CallInst *CallVal = Builder.CreateCall(InitFnTy, Init);
25822645
if (isThreadWrapperReplaceable(VD, CGM)) {

clang/lib/CodeGen/MicrosoftCXXABI.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,9 @@ class MicrosoftCXXABI : public CGCXXABI {
386386
ArrayRef<llvm::Function *> CXXThreadLocalInits,
387387
ArrayRef<const VarDecl *> CXXThreadLocalInitVars) override;
388388

389-
bool usesThreadWrapperFunction() const override { return false; }
389+
bool usesThreadWrapperFunction(const VarDecl *VD) const override {
390+
return false;
391+
}
390392
LValue EmitThreadLocalVarDeclLValue(CodeGenFunction &CGF, const VarDecl *VD,
391393
QualType LValType) override;
392394

clang/test/CodeGenCXX/cxx11-thread-local.cpp

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -91,15 +91,16 @@ void *e2 = V<char>::m + W<char>::m + &X<char>::m;
9191

9292
// LINUX-DAG: @_ZTH1a = alias void (), void ()* @__tls_init
9393
// DARWIN-DAG: @_ZTH1a = internal alias void (), void ()* @__tls_init
94-
// CHECK-DAG: @_ZTHL1d = internal alias void (), void ()* @__tls_init
9594
// LINUX-DAG: @_ZTHN1U1mE = alias void (), void ()* @__tls_init
9695
// DARWIN-DAG: @_ZTHN1U1mE = internal alias void (), void ()* @__tls_init
9796
// CHECK-DAG: @_ZTHN1VIiE1mE = linkonce_odr alias void (), void ()* @[[V_M_INIT:[^, ]*]]
98-
// CHECK-NOT: @_ZTHN1WIiE1mE =
9997
// CHECK-DAG: @_ZTHN1XIiE1mE = linkonce_odr alias void (), void ()* @[[X_M_INIT:[^, ]*]]
10098
// CHECK-DAG: @_ZTHN1VIfE1mE = weak_odr alias void (), void ()* @[[VF_M_INIT:[^, ]*]]
101-
// CHECK-NOT: @_ZTHN1WIfE1mE =
10299
// CHECK-DAG: @_ZTHN1XIfE1mE = weak_odr alias void (), void ()* @[[XF_M_INIT:[^, ]*]]
100+
// FIXME: We really want a CHECK-DAG-NOT for these.
101+
// CHECK-NOT: @_ZTHN1WIiE1mE =
102+
// CHECK-NOT: @_ZTHN1WIfE1mE =
103+
// CHECK-NOT: @_ZTHL1d =
103104

104105

105106
// Individual variable initialization functions:
@@ -130,7 +131,7 @@ int f() {
130131
// CHECK-NEXT: load i32, i32* %{{.*}}, align 4
131132
// CHECK-NEXT: store i32 %{{.*}}, i32* @c, align 4
132133

133-
// LINUX-LABEL: define weak_odr hidden i32* @_ZTW1b()
134+
// LINUX-LABEL: define linkonce_odr hidden i32* @_ZTW1b()
134135
// LINUX: br i1 icmp ne (void ()* @_ZTH1b, void ()* null),
135136
// not null:
136137
// LINUX: call void @_ZTH1b()
@@ -203,21 +204,21 @@ int f() {
203204
// DARWIN: declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
204205

205206
// DARWIN: declare cxx_fast_tlscc i32* @_ZTWN1VIcE1mE()
206-
// LINUX: define weak_odr hidden i32* @_ZTWN1VIcE1mE()
207+
// LINUX: define linkonce_odr hidden i32* @_ZTWN1VIcE1mE()
207208
// LINUX-NOT: comdat
208209
// LINUX: br i1 icmp ne (void ()* @_ZTHN1VIcE1mE,
209210
// LINUX: call void @_ZTHN1VIcE1mE()
210211
// LINUX: ret i32* @_ZN1VIcE1mE
211212

212213
// DARWIN: declare cxx_fast_tlscc i32* @_ZTWN1WIcE1mE()
213-
// LINUX: define weak_odr hidden i32* @_ZTWN1WIcE1mE()
214+
// LINUX: define linkonce_odr hidden i32* @_ZTWN1WIcE1mE()
214215
// LINUX-NOT: comdat
215216
// LINUX: br i1 icmp ne (void ()* @_ZTHN1WIcE1mE,
216217
// LINUX: call void @_ZTHN1WIcE1mE()
217218
// LINUX: ret i32* @_ZN1WIcE1mE
218219

219220
// DARWIN: declare cxx_fast_tlscc {{.*}}* @_ZTWN1XIcE1mE()
220-
// LINUX: define weak_odr hidden {{.*}}* @_ZTWN1XIcE1mE()
221+
// LINUX: define linkonce_odr hidden {{.*}}* @_ZTWN1XIcE1mE()
221222
// LINUX-NOT: comdat
222223
// LINUX: br i1 icmp ne (void ()* @_ZTHN1XIcE1mE,
223224
// LINUX: call void @_ZTHN1XIcE1mE()
@@ -269,7 +270,7 @@ int PR19254::f() {
269270
}
270271

271272
namespace {
272-
thread_local int anon_i{1};
273+
thread_local int anon_i{f()};
273274
}
274275
void set_anon_i() {
275276
anon_i = 2;
@@ -332,19 +333,17 @@ void set_anon_i() {
332333
// CHECK: }
333334

334335

335-
// LINUX: declare extern_weak void @_ZTH1b() [[ATTR:#[0-9]+]]
336-
337-
338-
// LINUX-LABEL: define internal i32* @_ZTWL1d()
339-
// DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWL1d()
340-
// LINUX: call void @_ZTHL1d()
341-
// DARWIN: call cxx_fast_tlscc void @_ZTHL1d()
342-
// CHECK: ret i32* @_ZL1d
336+
// Should not emit a thread wrapper for internal-linkage unused variable 'd'.
337+
// We separately check that 'd' does in fact get initialized with the other
338+
// thread-local variables in this TU.
339+
// CHECK-NOT: define {{.*}} @_ZTWL1d()
343340

344341
// LINUX-LABEL: define weak_odr hidden i32* @_ZTWN1U1mE()
345342
// DARWIN-LABEL: define cxx_fast_tlscc i32* @_ZTWN1U1mE()
346343
// LINUX: call void @_ZTHN1U1mE()
347344
// DARWIN: call cxx_fast_tlscc void @_ZTHN1U1mE()
348345
// CHECK: ret i32* @_ZN1U1mE
349346

347+
// LINUX: declare extern_weak void @_ZTH1b() [[ATTR:#[0-9]+]]
348+
350349
// LINUX: attributes [[ATTR]] = { {{.+}} }
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// RUN: %clang_cc1 -triple x86_64-linux-gnu -std=c++2a %s -emit-llvm -o - | FileCheck %s
2+
3+
// CHECK-DAG: @a = external thread_local global i32
4+
extern thread_local int a;
5+
6+
// CHECK-DAG: @b = external thread_local global i32
7+
extern thread_local constinit int b;
8+
9+
// CHECK-LABEL: define i32 @_Z1fv()
10+
// CHECK: call i32* @_ZTW1a()
11+
// CHECK: }
12+
int f() { return a; }
13+
14+
// CHECK-LABEL: define linkonce_odr {{.*}} @_ZTW1a()
15+
// CHECK: br i1
16+
// CHECK: call void @_ZTH1a()
17+
// CHECK: }
18+
19+
// CHECK-LABEL: define i32 @_Z1gv()
20+
// CHECK-NOT: call
21+
// CHECK: load i32, i32* @b
22+
// CHECK-NOT: call
23+
// CHECK: }
24+
int g() { return b; }
25+
26+
// CHECK-NOT: define {{.*}} @_ZTW1b()
27+
28+
extern thread_local int c;
29+
30+
// CHECK-LABEL: define i32 @_Z1hv()
31+
// CHECK: call i32* @_ZTW1c()
32+
// CHECK: load i32, i32* %
33+
// CHECK: }
34+
int h() { return c; }
35+
36+
thread_local int c = 0;
37+
38+
int d_init();
39+
thread_local int d = d_init();
40+
41+
// Note: use of 'c' does not trigger initialization of 'd', because 'c' has a
42+
// constant initializer.
43+
// CHECK-LABEL: define weak_odr {{.*}} @_ZTW1c()
44+
// CHECK-NOT: br i1
45+
// CHECK-NOT: call
46+
// CHECK: ret i32* @c
47+
// CHECK: }

clang/test/CodeGenCXX/tls-init-funcs.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
// CHECK: @_tlv_atexit({{.*}}@_ZN1AD1Ev
77
// CHECK: call cxx_fast_tlscc i32* @_ZTW3ext()
88
// CHECK: declare cxx_fast_tlscc i32* @_ZTW3ext()
9-
// CHECK: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIiE()
10-
// CHECK: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIvE()
11-
// CHECK: define {{.*}} @_ZTW1a
9+
// CHECK-DAG: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIiE()
10+
// CHECK-DAG: define weak_odr hidden cxx_fast_tlscc i32* @_ZTW2vtIvE()
11+
// CHECK-DAG: define {{.*}} @_ZTW1a
1212

1313
struct A {
1414
~A();
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
// RUN: %clang_cc1 -triple thumbv7--windows-itanium -fdeclspec -fms-compatibility -fms-compatibility-version=19.0 -S -emit-llvm -o - %s | FileCheck %s
1+
// RUN: %clang_cc1 -triple thumbv7--windows-itanium -fdeclspec -fms-compatibility -fms-compatibility-version=19.0 -emit-llvm -o - %s | FileCheck %s
22

3-
__declspec(thread) static void *c;
3+
void *g();
4+
thread_local static void *c = g();
45
void f(void *p) {
56
c = p;
67
}
78

8-
// CHECK-LABEL: @f(i8* %p)
9+
// CHECK-LABEL: @_Z1fPv(i8* %p)
910
// CHECK-NOT: call i8** @_ZTWL1c()
1011
// CHECK: call arm_aapcs_vfpcc i8** @_ZTWL1c()
1112

0 commit comments

Comments
 (0)