Skip to content

Commit f32f6d1

Browse files
authored
[flang][cuda] Implicitly load cudadevice module in device/global subprogram (#91668)
Some functions and subroutines are available in device context (device/global). These functions have interfaces declared in the `cudadevice` module. This patch adds interfaces as `__cuda_device_builtins_<fctname>` in a builtin module and they are USE'd rename in the `cudadevice` module. The module is implicitly used in device/global subprograms. The builtin module only contains procedures from section 3.6.4 for now.
1 parent a7eff59 commit f32f6d1

File tree

9 files changed

+181
-1
lines changed

9 files changed

+181
-1
lines changed

flang/include/flang/Semantics/semantics.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,8 +215,10 @@ class SemanticsContext {
215215
void UseFortranBuiltinsModule();
216216
const Scope *GetBuiltinsScope() const { return builtinsScope_; }
217217

218-
void UsePPCBuiltinTypesModule();
219218
const Scope &GetCUDABuiltinsScope();
219+
const Scope &GetCUDADeviceScope();
220+
221+
void UsePPCBuiltinTypesModule();
220222
void UsePPCBuiltinsModule();
221223
Scope *GetPPCBuiltinTypesScope() { return ppcBuiltinTypesScope_; }
222224
const Scope *GetPPCBuiltinsScope() const { return ppcBuiltinsScope_; }
@@ -292,6 +294,7 @@ class SemanticsContext {
292294
const Scope *builtinsScope_{nullptr}; // module __Fortran_builtins
293295
Scope *ppcBuiltinTypesScope_{nullptr}; // module __Fortran_PPC_types
294296
std::optional<const Scope *> cudaBuiltinsScope_; // module __CUDA_builtins
297+
std::optional<const Scope *> cudaDeviceScope_; // module cudadevice
295298
const Scope *ppcBuiltinsScope_{nullptr}; // module __ppc_intrinsics
296299
std::list<parser::Program> modFileParseTrees_;
297300
std::unique_ptr<CommonBlockMap> commonBlockMap_;

flang/lib/Semantics/check-cuda.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,11 @@ struct DeviceExprChecker
8282
}
8383
}
8484
}
85+
if (sym->owner().IsModule() &&
86+
sym->owner().parent().IsIntrinsicModules() &&
87+
DEREF(sym->owner().symbol()).name() == "__cuda_device_builtins") {
88+
return {};
89+
}
8590
} else if (x.GetSpecificIntrinsic()) {
8691
// TODO(CUDA): Check for unsupported intrinsics here
8792
return {};

flang/lib/Semantics/resolve-names.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3797,6 +3797,19 @@ bool SubprogramVisitor::Pre(const parser::PrefixSpec::Attributes &attrs) {
37973797
subp->set_cudaSubprogramAttrs(attr);
37983798
}
37993799
}
3800+
if (auto attrs{subp->cudaSubprogramAttrs()}) {
3801+
if (*attrs == common::CUDASubprogramAttrs::Global ||
3802+
*attrs == common::CUDASubprogramAttrs::Device) {
3803+
// Implicitly USE the cudadevice module by copying its symbols in the
3804+
// current scope.
3805+
const Scope &scope{context().GetCUDADeviceScope()};
3806+
for (auto sym : scope.GetSymbols()) {
3807+
if (!currScope().FindSymbol(sym->name())) {
3808+
currScope().CopySymbol(sym);
3809+
}
3810+
}
3811+
}
3812+
}
38003813
}
38013814
return false;
38023815
}

flang/lib/Semantics/semantics.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,14 @@ const Scope &SemanticsContext::GetCUDABuiltinsScope() {
543543
return **cudaBuiltinsScope_;
544544
}
545545

546+
const Scope &SemanticsContext::GetCUDADeviceScope() {
547+
if (!cudaDeviceScope_) {
548+
cudaDeviceScope_ = GetBuiltinModule("cudadevice");
549+
CHECK(cudaDeviceScope_.value() != nullptr);
550+
}
551+
return **cudaDeviceScope_;
552+
}
553+
546554
void SemanticsContext::UsePPCBuiltinsModule() {
547555
if (ppcBuiltinsScope_ == nullptr) {
548556
ppcBuiltinsScope_ = GetBuiltinModule("__ppc_intrinsics");
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
!===-- module/__cuda_device_builtins.f90 -----------------------------------===!
2+
!
3+
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
! See https://llvm.org/LICENSE.txt for license information.
5+
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
!
7+
!===------------------------------------------------------------------------===!
8+
9+
! CUDA Fortran procedures available in device subprogram
10+
11+
module __CUDA_device_builtins
12+
13+
implicit none
14+
15+
! Set PRIVATE by default to explicitly only export what is meant
16+
! to be exported by this MODULE.
17+
private
18+
19+
! Synchronization Functions
20+
21+
interface
22+
subroutine __cuda_device_builtins_syncthreads()
23+
end subroutine
24+
end interface
25+
public :: __cuda_device_builtins_syncthreads
26+
27+
interface
28+
integer function __cuda_device_builtins_syncthreads_and(value)
29+
integer :: value
30+
end function
31+
end interface
32+
public :: __cuda_device_builtins_syncthreads_and
33+
34+
interface
35+
integer function __cuda_device_builtins_syncthreads_count(value)
36+
integer :: value
37+
end function
38+
end interface
39+
public :: __cuda_device_builtins_syncthreads_count
40+
41+
interface
42+
integer function __cuda_device_builtins_syncthreads_or(int_value)
43+
end function
44+
end interface
45+
public :: __cuda_device_builtins_syncthreads_or
46+
47+
interface
48+
subroutine __cuda_device_builtins_syncwarp(mask)
49+
integer :: mask
50+
end subroutine
51+
end interface
52+
public :: __cuda_device_builtins_syncwarp
53+
54+
! Memory Fences
55+
56+
interface
57+
subroutine __cuda_device_builtins_threadfence()
58+
end subroutine
59+
end interface
60+
public :: __cuda_device_builtins_threadfence
61+
62+
interface
63+
subroutine __cuda_device_builtins_threadfence_block()
64+
end subroutine
65+
end interface
66+
public :: __cuda_device_builtins_threadfence_block
67+
68+
interface
69+
subroutine __cuda_device_builtins_threadfence_system()
70+
end subroutine
71+
end interface
72+
public :: __cuda_device_builtins_threadfence_system
73+
74+
end module

flang/module/cudadevice.f90

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
!===-- module/cudedevice.f90 -----------------------------------------------===!
2+
!
3+
! Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
! See https://llvm.org/LICENSE.txt for license information.
5+
! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
!
7+
!===------------------------------------------------------------------------===!
8+
9+
! CUDA Fortran procedures available in device subprogram
10+
11+
module cudadevice
12+
use __cuda_device_builtins, only: &
13+
syncthreads => __cuda_device_builtins_syncthreads, &
14+
syncthreads_and => __cuda_device_builtins_syncthreads_and, &
15+
syncthreads_count => __cuda_device_builtins_syncthreads_count, &
16+
syncthreads_or => __cuda_device_builtins_syncthreads_or, &
17+
syncwarp => __cuda_device_builtins_syncwarp, &
18+
threadfence => __cuda_device_builtins_threadfence, &
19+
threadfence_block => __cuda_device_builtins_threadfence_block, &
20+
threadfence_system => __cuda_device_builtins_threadfence_system
21+
end module
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
! RUN: %flang_fc1 -fdebug-dump-symbols %s | FileCheck %s
2+
3+
! Test CUDA Fortran intrinsic can pass semantic
4+
5+
attributes(global) subroutine devsub()
6+
implicit none
7+
integer :: ret
8+
9+
! 3.6.4. Synchronization Functions
10+
call syncthreads()
11+
call syncwarp(1)
12+
call threadfence()
13+
call threadfence_block()
14+
call threadfence_system()
15+
ret = syncthreads_and(1)
16+
ret = syncthreads_count(1)
17+
ret = syncthreads_or(1)
18+
end
19+
20+
! CHECK-LABEL: Subprogram scope: devsub
21+
! CHECK: syncthreads, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncthreads in __cuda_device_builtins
22+
! CHECK: syncthreads_and, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_and in __cuda_device_builtins
23+
! CHECK: syncthreads_count, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_count in __cuda_device_builtins
24+
! CHECK: syncthreads_or, EXTERNAL, PUBLIC (Function): Use from __cuda_device_builtins_syncthreads_or in __cuda_device_builtins
25+
! CHECK: syncwarp, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_syncwarp in __cuda_device_builtins
26+
! CHECK: threadfence, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence in __cuda_device_builtins
27+
! CHECK: threadfence_block, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_block in __cuda_device_builtins
28+
! CHECK: threadfence_system, EXTERNAL, PUBLIC (Subroutine): Use from __cuda_device_builtins_threadfence_system in __cuda_device_builtins
29+
30+
subroutine host()
31+
call syncthreads()
32+
end subroutine
33+
34+
! CHECK-LABEL: Subprogram scope: host
35+
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
! RUN: %python %S/test_errors.py %s %flang_fc1
2+
3+
module dev
4+
integer, device :: syncthreads
5+
6+
contains
7+
8+
attributes(device) subroutine sub1()
9+
syncthreads = 1 ! syncthreads not overwritten by cudadevice
10+
end subroutine
11+
12+
attributes(global) subroutine sub2()
13+
!ERROR: 'threadfence' is use-associated from module '__cuda_device_builtins' and cannot be re-declared
14+
integer :: threadfence
15+
end subroutine
16+
end module
17+

flang/tools/f18/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ set(MODULES
1212
"__ppc_intrinsics"
1313
"mma"
1414
"__cuda_builtins"
15+
"__cuda_device_builtins"
16+
"cudadevice"
1517
"ieee_arithmetic"
1618
"ieee_exceptions"
1719
"ieee_features"
@@ -31,6 +33,8 @@ if (NOT CMAKE_CROSSCOMPILING)
3133
elseif(${filename} STREQUAL "__ppc_intrinsics" OR
3234
${filename} STREQUAL "mma")
3335
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__ppc_types.mod)
36+
elseif(${filename} STREQUAL "cudadevice")
37+
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__cuda_device_builtins.mod)
3438
else()
3539
set(depends ${FLANG_INTRINSIC_MODULES_DIR}/__fortran_builtins.mod)
3640
if(NOT ${filename} STREQUAL "__fortran_type_info")

0 commit comments

Comments
 (0)