option(FLANG_EXPERIMENTAL_CUDA_RUNTIME "Compile Fortran runtime as CUDA sources (experimental)" OFF ) option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS "Do not compile global variables' definitions when producing PTX library" OFF ) set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation") set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING "Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'") set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING "List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')") macro(enable_cuda_compilation name files) if (FLANG_EXPERIMENTAL_CUDA_RUNTIME) if (BUILD_SHARED_LIBS) message(FATAL_ERROR "BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime" ) endif() enable_language(CUDA) # TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION # work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION. set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # Treat all supported sources as CUDA files. set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA) set(CUDA_COMPILE_OPTIONS) if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang") # Allow varargs. set(CUDA_COMPILE_OPTIONS -Xclang -fcuda-allow-variadic-functions ) endif() if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA") set(CUDA_COMPILE_OPTIONS --expt-relaxed-constexpr # Disable these warnings: # 'long double' is treated as 'double' in device code -Xcudafe --diag_suppress=20208 -Xcudafe --display_error_number ) endif() set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS "${CUDA_COMPILE_OPTIONS}" ) if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include") # When using libcudacxx headers files, we have to use them # for all files of F18 runtime. include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include) add_compile_definitions(RT_USE_LIBCUDACXX=1) endif() # Add an OBJECT library consisting of CUDA PTX. llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files}) set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON) if (FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS) target_compile_definitions(obj.${name}PTX PRIVATE FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS ) endif() endif() endmacro() macro(enable_omp_offload_compilation files) if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off") # 'host_device' build only works with Clang compiler currently. # The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use # the in-tree built Clang. We may have a mode that would use the in-tree # built Clang. # # 'nohost' is supposed to produce an LLVM Bitcode library, # and it has to be done with a C/C++ compiler producing LLVM Bitcode # compatible with the LLVM toolchain version distributed with the Flang # compiler. # In general, the in-tree built Clang should be used for 'nohost' build. # Note that 'nohost' build does not produce the host version of Flang # runtime library, so there will be two separate distributable objects. # 'nohost' build is a TODO. if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device") message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime") endif() if (BUILD_SHARED_LIBS) message(FATAL_ERROR "BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime" ) endif() if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang") set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906" "gfx908;gfx90a;gfx90c;gfx940;gfx1010;gfx1030" "gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036" "gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151" "gfx1152;gfx1153" ) set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62" "sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90" ) set(all_gpu_architectures "${all_amdgpu_architectures};${all_nvptx_architectures}" ) # TODO: support auto detection on the build system. if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all") set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures}) endif() list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES) string(REPLACE ";" "," compile_for_architectures "${FLANG_OMP_DEVICE_ARCHITECTURES}" ) set(OMP_COMPILE_OPTIONS -fopenmp -fvisibility=hidden -fopenmp-cuda-mode --offload-arch=${compile_for_architectures} # Force LTO for the device part. -foffload-lto ) set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS "${OMP_COMPILE_OPTIONS}" ) # Enable "declare target" in the source code. set_source_files_properties(${files} PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD ) else() message(FATAL_ERROR "Flang runtime build is not supported for these compilers:\n" "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n" "CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}") endif() endif() endmacro()