xref: /llvm-project/clang/test/Driver/cuda-options.cu (revision 953beb9fe969bf8ab1857924ea0d3dd6ea506ab1)
1 // Tests CUDA compilation pipeline construction in Driver.
2 
3 // Simple compilation case. Compile device-side to PTX assembly and make sure
4 // we use it on the host side.
5 // RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
6 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
7 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
8 // RUN:    -check-prefix NOLINK %s
9 
10 // Typical compilation + link case.
11 // RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
12 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
13 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
14 // RUN:    -check-prefix LINK %s
15 
16 // Verify that --cuda-host-only disables device-side compilation, but doesn't
17 // disable host-side compilation/linking.
18 // RUN: %clang -### -target x86_64-linux-gnu --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
19 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
20 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
21 
22 // Verify that --cuda-device-only disables host-side compilation and linking.
23 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only -nogpulib -nogpuinc %s 2>&1 \
24 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
25 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
26 
27 // Check that the last of --cuda-compile-host-device, --cuda-host-only, and
28 // --cuda-device-only wins.
29 
30 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
31 // RUN:    --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
32 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
33 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
34 
35 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
36 // RUN:    --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
37 // RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
38 // RUN:    -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
39 
40 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
41 // RUN:    -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \
42 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
43 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
44 
45 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
46 // RUN:    -nogpulib -nogpuinc --cuda-device-only %s 2>&1 \
47 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
48 // RUN:    -check-prefix NOHOST -check-prefix NOLINK %s
49 
50 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-host-only \
51 // RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
52 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
53 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
54 // RUN:    -check-prefix LINK %s
55 
56 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-device-only \
57 // RUN:   -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
58 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
59 // RUN:    -check-prefix HOST -check-prefix INCLUDES-DEVICE \
60 // RUN:    -check-prefix LINK %s
61 
62 // Verify that --cuda-gpu-arch option passes the correct GPU architecture to
63 // device compilation.
64 // RUN: %clang -### -nogpulib -nogpuinc --cuda-include-ptx=all --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
65 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
66 // RUN:    -check-prefix DEVICE-SM52 -check-prefix HOST \
67 // RUN:    -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
68 
69 // Verify that there is one device-side compilation per --cuda-gpu-arch args
70 // and that all results are included on the host side.
71 // RUN: %clang -### --cuda-include-ptx=all --target=x86_64-linux-gnu \
72 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
73 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
74 // RUN:             -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
75 // RUN:             -check-prefixes INCLUDES-DEVICE,INCLUDES-DEVICE2 \
76 // RUN:             -check-prefixes HOST,HOST-NOSAVE,NOLINK %s
77 
78 // Verify that device-side results are passed to the correct tool when
79 // -save-temps is used.
80 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -save-temps -c %s 2>&1 \
81 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-SAVE \
82 // RUN:    -check-prefix HOST -check-prefix HOST-SAVE -check-prefix NOLINK %s
83 
84 // Verify that device-side results are passed to the correct tool when
85 // -fno-integrated-as is used.
86 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc -fno-integrated-as -c %s 2>&1 \
87 // RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
88 // RUN:    -check-prefix HOST -check-prefix HOST-NOSAVE \
89 // RUN:    -check-prefix HOST-AS -check-prefix NOLINK %s
90 
91 // Verify that --[no-]cuda-gpu-arch arguments are handled correctly.
92 // a) --no-cuda-gpu-arch=X negates preceding --cuda-gpu-arch=X
93 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
94 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
95 // RUN:   --no-cuda-gpu-arch=sm_70 \
96 // RUN:   -c %s 2>&1 \
97 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
98 
99 // b) --no-cuda-gpu-arch=X negates more than one preceding --cuda-gpu-arch=X
100 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
101 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
102 // RUN:   --no-cuda-gpu-arch=sm_70 \
103 // RUN:   -c %s 2>&1 \
104 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
105 
106 // c) if --no-cuda-gpu-arch=X negates all preceding --cuda-gpu-arch=X
107 //    we default to sm_52 -- same as if no --cuda-gpu-arch were passed.
108 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
109 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_60 \
110 // RUN:   --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_60 \
111 // RUN:   -c %s 2>&1 \
112 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,NOARCH-SM70 %s
113 
114 // d) --no-cuda-gpu-arch=X is a no-op if there's no preceding --cuda-gpu-arch=X
115 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
116 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52\
117 // RUN:   --no-cuda-gpu-arch=sm_70 \
118 // RUN:   -c %s 2>&1 \
119 // RUN: | FileCheck -check-prefixes ARCH-SM52,ARCH-SM60,NOARCH-SM70 %s
120 
121 // e) --no-cuda-gpu-arch=X does not affect following --cuda-gpu-arch=X
122 // RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
123 // RUN:   -nogpulib -nogpuinc --no-cuda-gpu-arch=sm_70 --no-cuda-gpu-arch=sm_52 \
124 // RUN:   --cuda-gpu-arch=sm_70 --cuda-gpu-arch=sm_52 \
125 // RUN:   -c %s 2>&1 \
126 // RUN: | FileCheck -check-prefixes ARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
127 
128 // f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
129 // RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
130 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
131 // RUN:   --no-cuda-version-check --no-cuda-gpu-arch=all \
132 // RUN:   --cuda-gpu-arch=sm_70 \
133 // RUN:   -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
134 // RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
135 
136 // g) There's no --cuda-gpu-arch=all
137 // RUN: not %clang -### --target=x86_64-linux-gnu --cuda-device-only \
138 // RUN:   -nogpulib -nogpuinc --cuda-gpu-arch=all \
139 // RUN:   -c %s 2>&1 \
140 // RUN: | FileCheck -check-prefix ARCHALLERROR %s
141 
142 
143 // Verify that --[no-]cuda-include-ptx arguments are handled correctly.
144 // a) by default we're not including PTX for all GPUs.
145 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
146 // RUN:   --cuda-include-ptx=all --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
147 // RUN:   -c %s 2>&1 \
148 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
149 
150 // b) --no-cuda-include-ptx=all disables PTX inclusion for all GPUs
151 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
152 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
153 // RUN:   --no-cuda-include-ptx=all \
154 // RUN:   -c %s 2>&1 \
155 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,NOPTX-SM52 %s
156 
157 // c) --no-cuda-include-ptx=sm_XX disables PTX inclusion for that GPU only.
158 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
159 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
160 // RUN:   --no-cuda-include-ptx=sm_60 --cuda-include-ptx=sm_52 \
161 // RUN:   -c %s 2>&1 \
162 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,NOPTX-SM60,PTX-SM52 %s
163 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
164 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
165 // RUN:   --no-cuda-include-ptx=sm_52 --cuda-include-ptx=sm_60 \
166 // RUN:   -c %s 2>&1 \
167 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,NOPTX-SM52 %s
168 
169 // d) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=all
170 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
171 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
172 // RUN:   --no-cuda-include-ptx=all --cuda-include-ptx=all \
173 // RUN:   -c %s 2>&1 \
174 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
175 
176 // e) --cuda-include-ptx=all overrides preceding --no-cuda-include-ptx=sm_XX
177 // RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
178 // RUN:   --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
179 // RUN:   --no-cuda-include-ptx=sm_52 --cuda-include-ptx=all \
180 // RUN:   -c %s 2>&1 \
181 // RUN: | FileCheck -check-prefixes FATBIN-COMMON,PTX-SM60,PTX-SM52 %s
182 
183 // Verify -flto=thin -fwhole-program-vtables handling. This should result in
184 // both options being passed to the host compilation, with neither passed to
185 // the device compilation.
186 // RUN: %clang -### --cuda-include-ptx=sm_60 --target=x86_64-linux-gnu -nogpulib -nogpuinc -c -flto=thin -fwhole-program-vtables %s 2>&1 \
187 // RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,HOST,NOLINK,THINLTOWPD %s
188 // THINLTOWPD-NOT: error: invalid argument '-fwhole-program-vtables' only allowed with '-flto'
189 
190 // ARCH-SM52: "-cc1"{{.*}}"-target-cpu" "sm_52"
191 // NOARCH-SM52-NOT: "-cc1"{{.*}}"-target-cpu" "sm_52"
192 // ARCH-SM60: "-cc1"{{.*}}"-target-cpu" "sm_60"
193 // NOARCH-SM60-NOT: "-cc1"{{.*}}"-target-cpu" "sm_60"
194 // ARCH-SM70: "-cc1"{{.*}}"-target-cpu" "sm_70"
195 // NOARCH-SM70-NOT: "-cc1"{{.*}}"-target-cpu" "sm_70"
196 // ARCHALLERROR: error: unsupported CUDA gpu architecture: all
197 
198 // Match device-side preprocessor and compiler phases with -save-temps.
199 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
200 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
201 // DEVICE-SAVE-SAME: "-fcuda-is-device"
202 // DEVICE-SAVE-SAME: "-x" "cuda"
203 
204 // DEVICE-SAVE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
205 // DEVICE-SAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
206 // DEVICE-SAVE-SAME: "-fcuda-is-device"
207 // DEVICE-SAVE-SAME: "-x" "cuda-cpp-output"
208 
209 // Match the job that produces PTX assembly.
210 // DEVICE: "-cc1" "-triple" "nvptx64-nvidia-cuda"
211 // DEVICE-NOSAVE-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
212 // THINLTOWPD-NOT: "-flto=thin"
213 // DEVICE-SAME: "-fcuda-is-device"
214 // DEVICE-SM52-SAME: "-target-cpu" "sm_52"
215 // THINLTOWPD-NOT: "-fwhole-program-vtables"
216 // DEVICE-SAME: "-o" "[[PTXFILE:[^"]*]]"
217 // DEVICE-NOSAVE-SAME: "-x" "cuda"
218 // DEVICE-SAVE-SAME: "-x" "ir"
219 
220 // Match the call to ptxas (which assembles PTX to SASS).
221 // DEVICE:ptxas
222 // DEVICE-SM52-DAG: "--gpu-name" "sm_52"
223 // DEVICE-DAG: "--output-file" "[[CUBINFILE:[^"]*]]"
224 // DEVICE-DAG: "[[PTXFILE]]"
225 
226 // Match another device-side compilation.
227 // DEVICE2: "-cc1" "-triple" "nvptx64-nvidia-cuda"
228 // DEVICE2-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
229 // DEVICE2-SAME: "-fcuda-is-device"
230 // DEVICE2-SM60-SAME: "-target-cpu" "sm_60"
231 // DEVICE2-SAME: "-o" "[[PTXFILE2:[^"]*]]"
232 // DEVICE2-SAME: "-x" "cuda"
233 
234 // Match another call to ptxas.
235 // DEVICE2: ptxas
236 // DEVICE2-SM60-DAG: "--gpu-name" "sm_60"
237 // DEVICE2-DAG: "--output-file" "[[CUBINFILE2:[^"]*]]"
238 // DEVICE2-DAG: "[[PTXFILE2]]"
239 
240 // Match no device-side compilation.
241 // NODEVICE-NOT: "-cc1" "-triple" "nvptx64-nvidia-cuda"
242 // NODEVICE-NOT: "-fcuda-is-device"
243 
244 // INCLUDES-DEVICE:fatbinary
245 // INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
246 // INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
247 // INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
248 // INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
249 // INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
250 
251 // Match host-side preprocessor job with -save-temps.
252 // HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
253 // HOST-SAVE-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
254 // HOST-SAVE-NOT: "-fcuda-is-device"
255 // HOST-SAVE-SAME: "-x" "cuda"
256 
257 // Match host-side compilation.
258 // HOST: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
259 // HOST-SAME: "-aux-triple" "nvptx64-nvidia-cuda"
260 // THINLTOWPD-SAME: "-flto=thin"
261 // HOST-NOT: "-fcuda-is-device"
262 // There is only one GPU binary after combining it with fatbinary!
263 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
264 // INCLUDES-DEVICE-SAME: "-fcuda-include-gpubinary" "[[FATBINARY]]"
265 // There is only one GPU binary after combining it with fatbinary.
266 // INCLUDES-DEVICE2-NOT: "-fcuda-include-gpubinary"
267 // THINLTOWPD-SAME: "-fwhole-program-vtables"
268 // HOST-SAME: "-o" "[[HOSTOUTPUT:[^"]*]]"
269 // HOST-NOSAVE-SAME: "-x" "cuda"
270 // HOST-SAVE-SAME: "-x" "cuda-cpp-output"
271 
272 // Match external assembler that uses compilation output.
273 // HOST-AS: "-o" "{{.*}}.o" "[[HOSTOUTPUT]]"
274 
275 // Match no GPU code inclusion.
276 // NOINCLUDES-DEVICE-NOT: "-fcuda-include-gpubinary"
277 
278 // Match no host compilation.
279 // NOHOST-NOT: "-cc1" "-triple"
280 // NOHOST-NOT: "-x" "cuda"
281 
282 // Match linker.
283 // LINK: "{{.*}}{{ld|link}}{{(.exe)?}}"
284 // LINK-SAME: "[[HOSTOUTPUT]]"
285 
286 // Match no linker.
287 // NOLINK-NOT: "{{.*}}{{ld|link}}{{(.exe)?}}"
288 
289 // FATBIN-COMMON:fatbinary
290 // FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
291 // FATBIN-COMMON: "--image=profile=sm_52,file=
292 // PTX-SM52: "--image=profile=compute_52,file=
293 // NOPTX-SM52-NOT: "--image=profile=compute_52,file=
294 // FATBIN-COMMON: "--image=profile=sm_60,file=
295 // PTX-SM60: "--image=profile=compute_60,file=
296 // NOPTX-SM60-NOT: "--image=profile=compute_60,file=
297