Commit 48c8948f49
2020-12-04 23:39:16
1 parent
f385419Changed files (3)
lib
include
lib/include/openmp_wrappers/complex
@@ -18,7 +18,9 @@
#include <cmath>
#define __CUDA__
+#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_NVPTX__
#endif
// Grab the host header too.
lib/include/openmp_wrappers/complex.h
@@ -18,7 +18,9 @@
#include <math.h>
#define __CUDA__
+#define __OPENMP_NVPTX__
#include <__clang_cuda_complex_builtins.h>
+#undef __OPENMP_NVPTX__
#endif
// Grab the host header too.
lib/include/__clang_cuda_complex_builtins.h
@@ -16,7 +16,7 @@
// to work with CUDA and OpenMP target offloading [in C and C++ mode].)
#pragma push_macro("__DEVICE__")
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
#pragma omp declare target
#define __DEVICE__ __attribute__((noinline, nothrow, cold, weak))
#else
@@ -26,7 +26,7 @@
// To make the algorithms available for C and C++ in CUDA and OpenMP we select
// different but equivalent function versions. TODO: For OpenMP we currently
// select the native builtins as the overload support for templates is lacking.
-#if !defined(_OPENMP)
+#if !defined(__OPENMP_NVPTX__)
#define _ISNANd std::isnan
#define _ISNANf std::isnan
#define _ISINFd std::isinf
@@ -250,7 +250,7 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) {
#undef _LOGBd
#undef _LOGBf
-#ifdef _OPENMP
+#ifdef __OPENMP_NVPTX__
#pragma omp end declare target
#endif