[SYCL-PTX] Add _Float16 as native type (#1848)

Naghasan · web-flow · commit bc5be468bdb8 · 2020-06-11T12:56:54.000+03:00
The NVPTX backend supports half and handle the insertion of conversion operations when required. This reverts commit a4f4fa9 - "[SYCL] Disable FP16 support check for SYCL CUDA BE" Signed-off-by: Victor Lomuller <victor@codeplay.com>
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -64,6 +64,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
   UseAddrSpaceMapMangling = true;
+  HasLegalHalfType = true;
+  HasFloat16 = true;
 
   // Define available target features
   // These must be defined in sorted order!
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
@@ -1753,12 +1753,7 @@ void Sema::checkDeviceDecl(const ValueDecl *D, SourceLocation Loc) {
     if (Ty->isDependentType())
       return;
 
-    auto IsSYCLDeviceCuda = getLangOpts().SYCLIsDevice &&
-                            Context.getTargetInfo().getTriple().isNVPTX();
-    if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type() &&
-         // Disable check for SYCL CUDA BE until FP16 support is properly
-         // reported there (issue#1799)
-         !IsSYCLDeviceCuda) ||
+    if ((Ty->isFloat16Type() && !Context.getTargetInfo().hasFloat16Type()) ||
         ((Ty->isFloat128Type() ||
           (Ty->isRealFloatingType() && Context.getTypeSize(Ty) == 128)) &&
          !Context.getTargetInfo().hasFloat128Type()) ||
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
@@ -1521,12 +1521,11 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
       // CUDA host and device may have different _Float16 support, therefore
       // do not diagnose _Float16 usage to avoid false alarm.
       // ToDo: more precise diagnostics for CUDA.
-      auto IsSYCLDeviceCuda =
-        S.getLangOpts().SYCLIsDevice && S.Context.getTargetInfo().getTriple().isNVPTX();
-      if (!S.Context.getTargetInfo().hasFloat16Type() && !S.getLangOpts().CUDA &&
-          !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice) && !IsSYCLDeviceCuda)
+      if (!S.Context.getTargetInfo().hasFloat16Type() &&
+          !S.getLangOpts().CUDA &&
+          !(S.getLangOpts().OpenMP && S.getLangOpts().OpenMPIsDevice))
         S.Diag(DS.getTypeSpecTypeLoc(), diag::err_type_unsupported)
-          << "_Float16";
+            << "_Float16";
     }
     Result = Context.Float16Ty;
     break;
diff --git a/clang/test/CodeGenCXX/nvptx-float16.cpp b/clang/test/CodeGenCXX/nvptx-float16.cpp
@@ -0,0 +1,22 @@
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_20 -S -o - %s | FileCheck %s -check-prefix=NOF16
+// RUN: %clang_cc1 -triple nvptx-nvidia-cuda -target-cpu sm_60 -S -o - %s | FileCheck %s
+
+// CHECK: .target sm_60
+// NOF16: .target sm_20
+
+void f() {
+  _Float16 x, y, z;
+  // CHECK: add.rn.f16
+  // NOF16: add.rn.f32
+  z = x + y;
+  // CHECK: sub.rn.f16
+  // NOF16: sub.rn.f32
+  z = x - y;
+  // CHECK: mul.rn.f16
+  // NOF16: mul.rn.f32
+  z = x * y;
+  // CHECK: div.rn.f32
+  // NOF16: div.rn.f32
+  z = x / y;
+}