1 // Tests CUDA compilation pipeline construction in Driver. 2 3 // Simple compilation case. Compile device-side to PTX assembly and make sure 4 // we use it on the host side. 5 // RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \ 6 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 7 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 8 // RUN: -check-prefix NOLINK %s 9 10 // Typical compilation + link case. 11 // RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \ 12 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 13 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 14 // RUN: -check-prefix LINK %s 15 16 // Verify that --cuda-host-only disables device-side compilation, but doesn't 17 // disable host-side compilation/linking. 18 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \ 19 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 20 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 21 22 // Verify that --cuda-device-only disables host-side compilation and linking. 23 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -nogpulib -nogpuinc %s 2>&1 \ 24 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 25 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 26 27 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and 28 // --cuda-device-only wins. 29 30 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 31 // RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \ 32 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 33 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 34 35 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \ 36 // RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ 37 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \ 38 // RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s 39 40 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \ 41 // RUN: -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \ 42 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 43 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 44 45 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \ 46 // RUN: -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \ 47 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 48 // RUN: -check-prefix NOHOST -check-prefix NOLINK %s 49 50 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-host-only \ 51 // RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ 52 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 53 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 54 // RUN: -check-prefix LINK %s 55 56 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-device-only \ 57 // RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \ 58 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 59 // RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \ 60 // RUN: -check-prefix LINK %s 61 62 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to 63 // device compilation. 64 // RUN: %clang -### -nogpulib -nogpuinc --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \ 65 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 66 // RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \ 67 // RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s 68 69 // Verify that there is one device-side compilation per --cuda-gpu-arch args 70 // and that all results are included on the host side. 71 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu \ 72 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \ 73 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \ 74 // RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \ 75 // RUN: -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \ 76 // RUN: -check-prefixes HOST,HOST-NOSAVE,NOLINK %s 77 78 // Verify that device-side results are passed to the correct tool when 79 // -save-temps is used. 80 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -save-temps -c %s 2>&1 \ 81 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \ 82 // RUN: -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s 83 84 // Verify that device-side results are passed to the correct tool when 85 // -fno-integrated-as is used. 86 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -fno-integrated-as -c %s 2>&1 \ 87 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \ 88 // RUN: -check-prefix HOST -check-prefix HOST-NOSAVE \ 89 // RUN: -check-prefix HOST-AS -check-prefix NOLINK %s 90 91 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly. 92 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X 93 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 94 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \ 95 // RUN: --no-cuda-gpu-arch=sm_70 \ 96 // RUN: -c %s 2>&1 \ 97 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s 98 99 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X 100 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 101 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \ 102 // RUN: --no-cuda-gpu-arch=sm_70 \ 103 // RUN: -c %s 2>&1 \ 104 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s 105 106 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X 107 // we default to sm_52 -- same as if no --cuda-gpu-arch were passed. 108 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 109 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \ 110 // RUN: --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \ 111 // RUN: -c %s 2>&1 \ 112 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s 113 114 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X 115 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 116 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52\ 117 // RUN: --no-cuda-gpu-arch=sm_70 \ 118 // RUN: -c %s 2>&1 \ 119 // RUN: | FileCheck -check-prefixes ARCH-SM52,ARCH-SM60,NOARCH-SM70 %s 120 121 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X 122 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 123 // RUN: -nogpulib -nogpuinc --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_52 \ 124 // RUN: --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \ 125 // RUN: -c %s 2>&1 \ 126 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,ARCH-SM70 %s 127 128 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X 129 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \ 130 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 131 // RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \ 132 // RUN: --cuda-gpu-arch=sm_70 \ 133 // RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \ 134 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s 135 136 // g) There's no --cuda-gpu-arch=all 137 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \ 138 // RUN: -nogpulib -nogpuinc --cuda-gpu-arch=all \ 139 // RUN: -c %s 2>&1 \ 140 // RUN: | FileCheck -check-prefix ARCHALLERROR %s 141 142 143 // Verify that --[no-]cuda-include-ptx arguments are handled correctly. 144 // a) by default we're not including PTX for all GPUs. 145 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 146 // RUN: --cuda-include-ptx=all --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 147 // RUN: -c %s 2>&1 \ 148 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s 149 150 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs 151 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 152 // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 153 // RUN: --no-cuda-include-ptx=all \ 154 // RUN: -c %s 2>&1 \ 155 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,NOPTX-SM52 %s 156 157 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only. 158 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 159 // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 160 // RUN: --no-cuda-include-ptx=sm_60 --cuda-include-ptx=sm_52 \ 161 // RUN: -c %s 2>&1 \ 162 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s 163 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 164 // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 165 // RUN: --no-cuda-include-ptx=sm_52 --cuda-include-ptx=sm_60 \ 166 // RUN: -c %s 2>&1 \ 167 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s 168 169 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all 170 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 171 // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 172 // RUN: --no-cuda-include-ptx=all --cuda-include-ptx=all \ 173 // RUN: -c %s 2>&1 \ 174 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s 175 176 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX 177 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \ 178 // RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \ 179 // RUN: --no-cuda-include-ptx=sm_52 --cuda-include-ptx=all \ 180 // RUN: -c %s 2>&1 \ 181 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s 182 183 // Verify -flto=thin -fwhole-program-vtables handling. This should result in 184 // both options being passed to the host compilation, with neither passed to 185 // the device compilation. 186 // RUN: %clang -### --cuda-include-ptx=sm_60 --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \ 187 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,NOLINK,THINLTOWPD %s 188 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto' 189 190 // ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52" 191 // NOARCH-SM52-NOT: "-cc1"{{.*}}"-target-cpu" "sm_52" 192 // ARCH-SM60: "-cc1"{{.*}}"-target-cpu" "sm_60" 193 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60" 194 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70" 195 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70" 196 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all 197 198 // Match device-side preprocessor and compiler phases with -save-temps. 199 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 200 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 201 // DEVICE-SAVE-SAME: "-fcuda-is-device" 202 // DEVICE-SAVE-SAME: "-x" "cuda" 203 204 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 205 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 206 // DEVICE-SAVE-SAME: "-fcuda-is-device" 207 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output" 208 209 // Match the job that produces PTX assembly. 210 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda" 211 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 212 // THINLTOWPD-NOT: "-flto=thin" 213 // DEVICE-SAME: "-fcuda-is-device" 214 // DEVICE-SM52-SAME: "-target-cpu" "sm_52" 215 // THINLTOWPD-NOT: "-fwhole-program-vtables" 216 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]" 217 // DEVICE-NOSAVE-SAME: "-x" "cuda" 218 // DEVICE-SAVE-SAME: "-x" "ir" 219 220 // Match the call to ptxas (which assembles PTX to SASS). 221 // DEVICE:ptxas 222 // DEVICE-SM52-DAG: "--gpu-name" "sm_52" 223 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]" 224 // DEVICE-DAG: "[[PTXFILE]]" 225 226 // Match another device-side compilation. 227 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda" 228 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" 229 // DEVICE2-SAME: "-fcuda-is-device" 230 // DEVICE2-SM60-SAME: "-target-cpu" "sm_60" 231 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]" 232 // DEVICE2-SAME: "-x" "cuda" 233 234 // Match another call to ptxas. 235 // DEVICE2: ptxas 236 // DEVICE2-SM60-DAG: "--gpu-name" "sm_60" 237 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]" 238 // DEVICE2-DAG: "[[PTXFILE2]]" 239 240 // Match no device-side compilation. 241 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda" 242 // NODEVICE-NOT: "-fcuda-is-device" 243 244 // INCLUDES-DEVICE:fatbinary 245 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]" 246 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]" 247 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]" 248 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]" 249 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]" 250 251 // Match host-side preprocessor job with -save-temps. 252 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu" 253 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 254 // HOST-SAVE-NOT: "-fcuda-is-device" 255 // HOST-SAVE-SAME: "-x" "cuda" 256 257 // Match host-side compilation. 258 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu" 259 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda" 260 // THINLTOWPD-SAME: "-flto=thin" 261 // HOST-NOT: "-fcuda-is-device" 262 // There is only one GPU binary after combining it with fatbinary! 263 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" 264 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]" 265 // There is only one GPU binary after combining it with fatbinary. 266 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary" 267 // THINLTOWPD-SAME: "-fwhole-program-vtables" 268 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]" 269 // HOST-NOSAVE-SAME: "-x" "cuda" 270 // HOST-SAVE-SAME: "-x" "cuda-cpp-output" 271 272 // Match external assembler that uses compilation output. 273 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]" 274 275 // Match no GPU code inclusion. 276 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary" 277 278 // Match no host compilation. 279 // NOHOST-NOT: "-cc1" "-triple" 280 // NOHOST-NOT: "-x" "cuda" 281 282 // Match linker. 283 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}" 284 // LINK-SAME: "[[HOSTOUTPUT]]" 285 286 // Match no linker. 287 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}" 288 289 // FATBIN-COMMON:fatbinary 290 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]" 291 // FATBIN-COMMON: "--image=profile=sm_52,file= 292 // PTX-SM52: "--image=profile=compute_52,file= 293 // NOPTX-SM52-NOT: "--image=profile=compute_52,file= 294 // FATBIN-COMMON: "--image=profile=sm_60,file= 295 // PTX-SM60: "--image=profile=compute_60,file= 296 // NOPTX-SM60-NOT: "--image=profile=compute_60,file= 297