Skip to content

Commit bc5be46

Browse files
authored
[SYCL-PTX] Add _Float16 as native type (#1848)
The NVPTX backend supports half and handle the insertion of conversion operations when required. This reverts commit a4f4fa9 - "[SYCL] Disable FP16 support check for SYCL CUDA BE" Signed-off-by: Victor Lomuller <victor@codeplay.com>
1 parent 73fb605 commit bc5be46

File tree

4 files changed

+29
-11
lines changed

4 files changed

+29
-11
lines changed

clang/lib/Basic/Targets/NVPTX.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
6464
VLASupported = false;
6565
AddrSpaceMap = &NVPTXAddrSpaceMap;
6666
UseAddrSpaceMapMangling = true;
67+
HasLegalHalfType = true;
68+
HasFloat16 = true;
6769

6870
// Define available target features
6971
// These must be defined in sorted order!

clang/lib/Sema/Sema.cpp

+1-6
Original file line numberDiff line numberDiff line change
@@ -1753,12 +1753,7 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
17531753
if (Ty->isDependentType())
17541754
return;
17551755

1756-
auto IsSYCLDeviceCuda = getLangOpts().SYCLIsDevice &&
1757-
Context.getTargetInfo().getTriple().isNVPTX();
1758-
if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type() &&
1759-
// Disable check for SYCL CUDA BE until FP16 support is properly
1760-
// reported there (issue#1799)
1761-
!IsSYCLDeviceCuda) ||
1756+
if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type()) ||
17621757
((Ty->isFloat128Type() ||
17631758
(Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128)) &&
17641759
!Context.getTargetInfo().hasFloat128Type()) ||

clang/lib/Sema/SemaType.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -1521,12 +1521,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
15211521
// CUDA host and device may have different _Float16 support, therefore
15221522
// do not diagnose _Float16 usage to avoid false alarm.
15231523
// ToDo: more precise diagnostics for CUDA.
1524-
auto IsSYCLDeviceCuda =
1525-
S.getLangOpts().SYCLIsDevice && S.Context.getTargetInfo().getTriple().isNVPTX();
1526-
if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA &&
1527-
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) && !IsSYCLDeviceCuda)
1524+
if (!S.Context.getTargetInfo().hasFloat16Type() &&
1525+
!S.getLangOpts().CUDA &&
1526+
!(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
15281527
S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
1529-
<< "_Float16";
1528+
<< "_Float16";
15301529
}
15311530
Result = Context.Float16Ty;
15321531
break;
+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// REQUIRES: nvptx-registered-target
2+
// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_20 -S -o - %s | FileCheck %s -check-prefix=NOF16
3+
// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_60 -S -o - %s | FileCheck %s
4+
5+
// CHECK: .target sm_60
6+
// NOF16: .target sm_20
7+
8+
void f() {
9+
_Float16 x, y, z;
10+
// CHECK: add.rn.f16
11+
// NOF16: add.rn.f32
12+
z = x + y;
13+
// CHECK: sub.rn.f16
14+
// NOF16: sub.rn.f32
15+
z = x - y;
16+
// CHECK: mul.rn.f16
17+
// NOF16: mul.rn.f32
18+
z = x * y;
19+
// CHECK: div.rn.f32
20+
// NOF16: div.rn.f32
21+
z = x / y;
22+
}

0 commit comments

Comments
 (0)