1*7330f729Sjoerg/*===---- complex - CUDA wrapper for <complex> ------------------------------=== 2*7330f729Sjoerg * 3*7330f729Sjoerg * Permission is hereby granted, free of charge, to any person obtaining a copy 4*7330f729Sjoerg * of this software and associated documentation files (the "Software"), to deal 5*7330f729Sjoerg * in the Software without restriction, including without limitation the rights 6*7330f729Sjoerg * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7*7330f729Sjoerg * copies of the Software, and to permit persons to whom the Software is 8*7330f729Sjoerg * furnished to do so, subject to the following conditions: 9*7330f729Sjoerg * 10*7330f729Sjoerg * The above copyright notice and this permission notice shall be included in 11*7330f729Sjoerg * all copies or substantial portions of the Software. 12*7330f729Sjoerg * 13*7330f729Sjoerg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14*7330f729Sjoerg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15*7330f729Sjoerg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16*7330f729Sjoerg * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17*7330f729Sjoerg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18*7330f729Sjoerg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19*7330f729Sjoerg * THE SOFTWARE. 20*7330f729Sjoerg * 21*7330f729Sjoerg *===-----------------------------------------------------------------------=== 22*7330f729Sjoerg */ 23*7330f729Sjoerg 24*7330f729Sjoerg#ifndef __CLANG_CUDA_WRAPPERS_COMPLEX 25*7330f729Sjoerg#define __CLANG_CUDA_WRAPPERS_COMPLEX 26*7330f729Sjoerg 27*7330f729Sjoerg// Wrapper around <complex> that forces its functions to be __host__ 28*7330f729Sjoerg// __device__. 29*7330f729Sjoerg 30*7330f729Sjoerg// First, include host-only headers we think are likely to be included by 31*7330f729Sjoerg// <complex>, so that the pragma below only applies to <complex> itself. 32*7330f729Sjoerg#if __cplusplus >= 201103L 33*7330f729Sjoerg#include <type_traits> 34*7330f729Sjoerg#endif 35*7330f729Sjoerg#include <stdexcept> 36*7330f729Sjoerg#include <cmath> 37*7330f729Sjoerg#include <sstream> 38*7330f729Sjoerg 39*7330f729Sjoerg// Next, include our <algorithm> wrapper, to ensure that device overloads of 40*7330f729Sjoerg// std::min/max are available. 41*7330f729Sjoerg#include <algorithm> 42*7330f729Sjoerg 43*7330f729Sjoerg#pragma clang force_cuda_host_device begin 44*7330f729Sjoerg 45*7330f729Sjoerg// When compiling for device, ask libstdc++ to use its own implements of 46*7330f729Sjoerg// complex functions, rather than calling builtins (which resolve to library 47*7330f729Sjoerg// functions that don't exist when compiling CUDA device code). 48*7330f729Sjoerg// 49*7330f729Sjoerg// This is a little dicey, because it causes libstdc++ to define a different 50*7330f729Sjoerg// set of overloads on host and device. 51*7330f729Sjoerg// 52*7330f729Sjoerg// // Present only when compiling for host. 53*7330f729Sjoerg// __host__ __device__ void complex<float> sin(const complex<float>& x) { 54*7330f729Sjoerg// return __builtin_csinf(x); 55*7330f729Sjoerg// } 56*7330f729Sjoerg// 57*7330f729Sjoerg// // Present when compiling for host and for device. 58*7330f729Sjoerg// template <typename T> 59*7330f729Sjoerg// void __host__ __device__ complex<T> sin(const complex<T>& x) { 60*7330f729Sjoerg// return complex<T>(sin(x.real()) * cosh(x.imag()), 61*7330f729Sjoerg// cos(x.real()), sinh(x.imag())); 62*7330f729Sjoerg// } 63*7330f729Sjoerg// 64*7330f729Sjoerg// This is safe because when compiling for device, all function calls in 65*7330f729Sjoerg// __host__ code to sin() will still resolve to *something*, even if they don't 66*7330f729Sjoerg// resolve to the same function as they resolve to when compiling for host. We 67*7330f729Sjoerg// don't care that they don't resolve to the right function because we won't 68*7330f729Sjoerg// codegen this host code when compiling for device. 69*7330f729Sjoerg 70*7330f729Sjoerg#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX") 71*7330f729Sjoerg#pragma push_macro("_GLIBCXX_USE_C99_COMPLEX_TR1") 72*7330f729Sjoerg#define _GLIBCXX_USE_C99_COMPLEX 0 73*7330f729Sjoerg#define _GLIBCXX_USE_C99_COMPLEX_TR1 0 74*7330f729Sjoerg 75*7330f729Sjoerg#include_next <complex> 76*7330f729Sjoerg 77*7330f729Sjoerg#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX_TR1") 78*7330f729Sjoerg#pragma pop_macro("_GLIBCXX_USE_C99_COMPLEX") 79*7330f729Sjoerg 80*7330f729Sjoerg#pragma clang force_cuda_host_device end 81*7330f729Sjoerg 82*7330f729Sjoerg#endif // include guard 83