xref: /llvm-project/llvm/test/CodeGen/X86/retpoline.ll (revision e6bf48d11047e970cb24554a01b65b566d6b5d22)
1; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64
2; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST
3
4; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86
5; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST
6
7declare void @bar(i32)
8
9; Test a simple indirect call and tail call.
10define void @icall_reg(ptr %fp, i32 %x) #0 {
11entry:
12  tail call void @bar(i32 %x)
13  tail call void %fp(i32 %x)
14  tail call void @bar(i32 %x)
15  tail call void %fp(i32 %x)
16  ret void
17}
18
19; X64-LABEL: icall_reg:
20; X64-DAG:   movq %rdi, %[[fp:[^ ]*]]
21; X64-DAG:   movl %esi, %[[x:[^ ]*]]
22; X64:       movl %esi, %edi
23; X64:       callq bar
24; X64-DAG:   movl %[[x]], %edi
25; X64-DAG:   movq %[[fp]], %r11
26; X64:       callq __llvm_retpoline_r11
27; X64:       movl %[[x]], %edi
28; X64:       callq bar
29; X64-DAG:   movl %[[x]], %edi
30; X64-DAG:   movq %[[fp]], %r11
31; X64:       jmp __llvm_retpoline_r11 # TAILCALL
32
33; X64FAST-LABEL: icall_reg:
34; X64FAST:       callq bar
35; X64FAST:       callq __llvm_retpoline_r11
36; X64FAST:       callq bar
37; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
38
39; X86-LABEL: icall_reg:
40; X86-DAG:   movl 12(%esp), %[[fp:[^ ]*]]
41; X86-DAG:   movl 16(%esp), %[[x:[^ ]*]]
42; X86:       pushl %[[x]]
43; X86:       calll bar
44; X86:       movl %[[fp]], %eax
45; X86:       pushl %[[x]]
46; X86:       calll __llvm_retpoline_eax
47; X86:       pushl %[[x]]
48; X86:       calll bar
49; X86:       movl %[[fp]], %eax
50; X86:       pushl %[[x]]
51; X86:       calll __llvm_retpoline_eax
52; X86-NOT:   # TAILCALL
53
54; X86FAST-LABEL: icall_reg:
55; X86FAST:       calll bar
56; X86FAST:       calll __llvm_retpoline_eax
57; X86FAST:       calll bar
58; X86FAST:       calll __llvm_retpoline_eax
59
60
61@global_fp = external dso_local global ptr
62
63; Test an indirect call through a global variable.
64define void @icall_global_fp(i32 %x, ptr %fpp) #0 {
65  %fp1 = load ptr, ptr @global_fp
66  call void %fp1(i32 %x)
67  %fp2 = load ptr, ptr @global_fp
68  tail call void %fp2(i32 %x)
69  ret void
70}
71
72; X64-LABEL: icall_global_fp:
73; X64-DAG:   movl %edi, %[[x:[^ ]*]]
74; X64-DAG:   movq global_fp(%rip), %r11
75; X64:       callq __llvm_retpoline_r11
76; X64-DAG:   movl %[[x]], %edi
77; X64-DAG:   movq global_fp(%rip), %r11
78; X64:       jmp __llvm_retpoline_r11 # TAILCALL
79
80; X64FAST-LABEL: icall_global_fp:
81; X64FAST:       movq global_fp(%rip), %r11
82; X64FAST:       callq __llvm_retpoline_r11
83; X64FAST:       movq global_fp(%rip), %r11
84; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
85
86; X86-LABEL: icall_global_fp:
87; X86:       movl global_fp, %eax
88; X86:       pushl 4(%esp)
89; X86:       calll __llvm_retpoline_eax
90; X86:       addl $4, %esp
91; X86:       movl global_fp, %eax
92; X86:       jmp __llvm_retpoline_eax # TAILCALL
93
94; X86FAST-LABEL: icall_global_fp:
95; X86FAST:       calll __llvm_retpoline_eax
96; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
97
98
99%struct.Foo = type { ptr }
100
101; Test an indirect call through a vtable.
102define void @vcall(ptr %obj) #0 {
103  %vptr = load ptr, ptr %obj
104  %vslot = getelementptr ptr, ptr %vptr, i32 1
105  %fp = load ptr, ptr %vslot
106  tail call void %fp(ptr %obj)
107  tail call void %fp(ptr %obj)
108  ret void
109}
110
111; X64-LABEL: vcall:
112; X64:       movq %rdi, %[[obj:[^ ]*]]
113; X64:       movq (%rdi), %[[vptr:[^ ]*]]
114; X64:       movq 8(%[[vptr]]), %[[fp:[^ ]*]]
115; X64:       movq %[[fp]], %r11
116; X64:       callq __llvm_retpoline_r11
117; X64-DAG:   movq %[[obj]], %rdi
118; X64-DAG:   movq %[[fp]], %r11
119; X64:       jmp __llvm_retpoline_r11 # TAILCALL
120
121; X64FAST-LABEL: vcall:
122; X64FAST:       callq __llvm_retpoline_r11
123; X64FAST:       jmp __llvm_retpoline_r11 # TAILCALL
124
125; X86-LABEL: vcall:
126; X86:       movl 8(%esp), %[[obj:[^ ]*]]
127; X86:       movl (%[[obj]]), %[[vptr:[^ ]*]]
128; X86:       movl 4(%[[vptr]]), %[[fp:[^ ]*]]
129; X86:       movl %[[fp]], %eax
130; X86:       pushl %[[obj]]
131; X86:       calll __llvm_retpoline_eax
132; X86:       addl $4, %esp
133; X86:       movl %[[fp]], %eax
134; X86:       jmp __llvm_retpoline_eax # TAILCALL
135
136; X86FAST-LABEL: vcall:
137; X86FAST:       calll __llvm_retpoline_eax
138; X86FAST:       jmp __llvm_retpoline_eax # TAILCALL
139
140
141declare void @direct_callee()
142
143define void @direct_tail() #0 {
144  tail call void @direct_callee()
145  ret void
146}
147
148; X64-LABEL: direct_tail:
149; X64:       jmp direct_callee@PLT # TAILCALL
150; X64FAST-LABEL: direct_tail:
151; X64FAST:   jmp direct_callee@PLT # TAILCALL
152; X86-LABEL: direct_tail:
153; X86:       jmp direct_callee@PLT # TAILCALL
154; X86FAST-LABEL: direct_tail:
155; X86FAST:   jmp direct_callee@PLT # TAILCALL
156
157
158declare void @nonlazybind_callee() #2
159
160define void @nonlazybind_caller() #0 {
161  call void @nonlazybind_callee()
162  tail call void @nonlazybind_callee()
163  ret void
164}
165
166; X64-LABEL: nonlazybind_caller:
167; X64:       movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]]
168; X64:       movq %[[REG]], %r11
169; X64:       callq __llvm_retpoline_r11
170; X64:       movq %[[REG]], %r11
171; X64:       jmp __llvm_retpoline_r11 # TAILCALL
172; X64FAST-LABEL: nonlazybind_caller:
173; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
174; X64FAST:   callq __llvm_retpoline_r11
175; X64FAST:   movq nonlazybind_callee@GOTPCREL(%rip), %r11
176; X64FAST:   jmp __llvm_retpoline_r11 # TAILCALL
177; X86-LABEL: nonlazybind_caller:
178; X86:       calll nonlazybind_callee@PLT
179; X86:       jmp nonlazybind_callee@PLT # TAILCALL
180; X86FAST-LABEL: nonlazybind_caller:
181; X86FAST:   calll nonlazybind_callee@PLT
182; X86FAST:   jmp nonlazybind_callee@PLT # TAILCALL
183
184
185; Check that a switch gets lowered using a jump table when retpolines are only
186; enabled for calls.
187define void @switch_jumptable(ptr %ptr, ptr %sink) #0 {
188; X64-LABEL: switch_jumptable:
189; X64:         jmpq *
190; X86-LABEL: switch_jumptable:
191; X86:         jmpl *
192entry:
193  br label %header
194
195header:
196  %i = load volatile i32, ptr %ptr
197  switch i32 %i, label %bb0 [
198    i32 1, label %bb1
199    i32 2, label %bb2
200    i32 3, label %bb3
201    i32 4, label %bb4
202    i32 5, label %bb5
203    i32 6, label %bb6
204    i32 7, label %bb7
205    i32 8, label %bb8
206    i32 9, label %bb9
207  ]
208
209bb0:
210  store volatile i64 0, ptr %sink
211  br label %header
212
213bb1:
214  store volatile i64 1, ptr %sink
215  br label %header
216
217bb2:
218  store volatile i64 2, ptr %sink
219  br label %header
220
221bb3:
222  store volatile i64 3, ptr %sink
223  br label %header
224
225bb4:
226  store volatile i64 4, ptr %sink
227  br label %header
228
229bb5:
230  store volatile i64 5, ptr %sink
231  br label %header
232
233bb6:
234  store volatile i64 6, ptr %sink
235  br label %header
236
237bb7:
238  store volatile i64 7, ptr %sink
239  br label %header
240
241bb8:
242  store volatile i64 8, ptr %sink
243  br label %header
244
245bb9:
246  store volatile i64 9, ptr %sink
247  br label %header
248}
249
250
251@indirectbr_preserved.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_preserved, %bb0),
252                                                     ptr blockaddress(@indirectbr_preserved, %bb1),
253                                                     ptr blockaddress(@indirectbr_preserved, %bb2),
254                                                     ptr blockaddress(@indirectbr_preserved, %bb3),
255                                                     ptr blockaddress(@indirectbr_preserved, %bb4),
256                                                     ptr blockaddress(@indirectbr_preserved, %bb5),
257                                                     ptr blockaddress(@indirectbr_preserved, %bb6),
258                                                     ptr blockaddress(@indirectbr_preserved, %bb7),
259                                                     ptr blockaddress(@indirectbr_preserved, %bb8),
260                                                     ptr blockaddress(@indirectbr_preserved, %bb9)]
261
262; Check that we preserve indirectbr when only calls are retpolined.
263define void @indirectbr_preserved(ptr readonly %p, ptr %sink) #0 {
264; X64-LABEL: indirectbr_preserved:
265; X64:         jmpq *
266; X86-LABEL: indirectbr_preserved:
267; X86:         jmpl *
268entry:
269  %i0 = load i64, ptr %p
270  %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i0
271  %target0 = load ptr, ptr %target.i0
272  indirectbr ptr %target0, [label %bb1, label %bb3]
273
274bb0:
275  store volatile i64 0, ptr %sink
276  br label %latch
277
278bb1:
279  store volatile i64 1, ptr %sink
280  br label %latch
281
282bb2:
283  store volatile i64 2, ptr %sink
284  br label %latch
285
286bb3:
287  store volatile i64 3, ptr %sink
288  br label %latch
289
290bb4:
291  store volatile i64 4, ptr %sink
292  br label %latch
293
294bb5:
295  store volatile i64 5, ptr %sink
296  br label %latch
297
298bb6:
299  store volatile i64 6, ptr %sink
300  br label %latch
301
302bb7:
303  store volatile i64 7, ptr %sink
304  br label %latch
305
306bb8:
307  store volatile i64 8, ptr %sink
308  br label %latch
309
310bb9:
311  store volatile i64 9, ptr %sink
312  br label %latch
313
314latch:
315  %i.next = load i64, ptr %p
316  %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i.next
317  %target.next = load ptr, ptr %target.i.next
318  ; Potentially hit a full 10 successors here so that even if we rewrite as
319  ; a switch it will try to be lowered with a jump table.
320  indirectbr ptr %target.next, [label %bb0,
321                                label %bb1,
322                                label %bb2,
323                                label %bb3,
324                                label %bb4,
325                                label %bb5,
326                                label %bb6,
327                                label %bb7,
328                                label %bb8,
329                                label %bb9]
330}
331
332@indirectbr_rewrite.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_rewrite, %bb0),
333                                                   ptr blockaddress(@indirectbr_rewrite, %bb1),
334                                                   ptr blockaddress(@indirectbr_rewrite, %bb2),
335                                                   ptr blockaddress(@indirectbr_rewrite, %bb3),
336                                                   ptr blockaddress(@indirectbr_rewrite, %bb4),
337                                                   ptr blockaddress(@indirectbr_rewrite, %bb5),
338                                                   ptr blockaddress(@indirectbr_rewrite, %bb6),
339                                                   ptr blockaddress(@indirectbr_rewrite, %bb7),
340                                                   ptr blockaddress(@indirectbr_rewrite, %bb8),
341                                                   ptr blockaddress(@indirectbr_rewrite, %bb9)]
342
343; Check that when retpolines are enabled for indirect branches the indirectbr
344; instruction gets rewritten to use switch, and that in turn doesn't get lowered
345; as a jump table.
346define void @indirectbr_rewrite(ptr readonly %p, ptr %sink) #1 {
347; X64-LABEL: indirectbr_rewrite:
348; X64-NOT:     jmpq
349; X86-LABEL: indirectbr_rewrite:
350; X86-NOT:     jmpl
351entry:
352  %i0 = load i64, ptr %p
353  %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i0
354  %target0 = load ptr, ptr %target.i0
355  indirectbr ptr %target0, [label %bb1, label %bb3]
356
357bb0:
358  store volatile i64 0, ptr %sink
359  br label %latch
360
361bb1:
362  store volatile i64 1, ptr %sink
363  br label %latch
364
365bb2:
366  store volatile i64 2, ptr %sink
367  br label %latch
368
369bb3:
370  store volatile i64 3, ptr %sink
371  br label %latch
372
373bb4:
374  store volatile i64 4, ptr %sink
375  br label %latch
376
377bb5:
378  store volatile i64 5, ptr %sink
379  br label %latch
380
381bb6:
382  store volatile i64 6, ptr %sink
383  br label %latch
384
385bb7:
386  store volatile i64 7, ptr %sink
387  br label %latch
388
389bb8:
390  store volatile i64 8, ptr %sink
391  br label %latch
392
393bb9:
394  store volatile i64 9, ptr %sink
395  br label %latch
396
397latch:
398  %i.next = load i64, ptr %p
399  %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i.next
400  %target.next = load ptr, ptr %target.i.next
401  ; Potentially hit a full 10 successors here so that even if we rewrite as
402  ; a switch it will try to be lowered with a jump table.
403  indirectbr ptr %target.next, [label %bb0,
404                                label %bb1,
405                                label %bb2,
406                                label %bb3,
407                                label %bb4,
408                                label %bb5,
409                                label %bb6,
410                                label %bb7,
411                                label %bb8,
412                                label %bb9]
413}
414
415; Lastly check that the necessary thunks were emitted.
416;
417; X64-LABEL:         .section        .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat
418; X64-NEXT:          .hidden __llvm_retpoline_r11
419; X64-NEXT:          .weak   __llvm_retpoline_r11
420; X64:       __llvm_retpoline_r11:
421; X64-NEXT:  # {{.*}}                                # %entry
422; X64-NEXT:          callq   [[CALL_TARGET:.*]]
423; X64-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
424; X64-NEXT:                                          # %entry
425; X64-NEXT:                                          # =>This Inner Loop Header: Depth=1
426; X64-NEXT:          pause
427; X64-NEXT:          lfence
428; X64-NEXT:          jmp     [[CAPTURE_SPEC]]
429; X64-NEXT:          .p2align        4
430; X64-NEXT:  {{.*}}                                  # Block address taken
431; X64-NEXT:                                          # %entry
432; X64-NEXT:  [[CALL_TARGET]]:
433; X64-NEXT:          movq    %r11, (%rsp)
434; X64-NEXT:          retq
435;
436; X86-LABEL:         .section        .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat
437; X86-NEXT:          .hidden __llvm_retpoline_eax
438; X86-NEXT:          .weak   __llvm_retpoline_eax
439; X86:       __llvm_retpoline_eax:
440; X86-NEXT:  # {{.*}}                                # %entry
441; X86-NEXT:          calll   [[CALL_TARGET:.*]]
442; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
443; X86-NEXT:                                          # %entry
444; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
445; X86-NEXT:          pause
446; X86-NEXT:          lfence
447; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
448; X86-NEXT:          .p2align        4
449; X86-NEXT:  {{.*}}                                  # Block address taken
450; X86-NEXT:                                          # %entry
451; X86-NEXT:  [[CALL_TARGET]]:
452; X86-NEXT:          movl    %eax, (%esp)
453; X86-NEXT:          retl
454;
455; X86-LABEL:         .section        .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat
456; X86-NEXT:          .hidden __llvm_retpoline_ecx
457; X86-NEXT:          .weak   __llvm_retpoline_ecx
458; X86:       __llvm_retpoline_ecx:
459; X86-NEXT:  # {{.*}}                                # %entry
460; X86-NEXT:          calll   [[CALL_TARGET:.*]]
461; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
462; X86-NEXT:                                          # %entry
463; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
464; X86-NEXT:          pause
465; X86-NEXT:          lfence
466; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
467; X86-NEXT:          .p2align        4
468; X86-NEXT:  {{.*}}                                  # Block address taken
469; X86-NEXT:                                          # %entry
470; X86-NEXT:  [[CALL_TARGET]]:
471; X86-NEXT:          movl    %ecx, (%esp)
472; X86-NEXT:          retl
473;
474; X86-LABEL:         .section        .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat
475; X86-NEXT:          .hidden __llvm_retpoline_edx
476; X86-NEXT:          .weak   __llvm_retpoline_edx
477; X86:       __llvm_retpoline_edx:
478; X86-NEXT:  # {{.*}}                                # %entry
479; X86-NEXT:          calll   [[CALL_TARGET:.*]]
480; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
481; X86-NEXT:                                          # %entry
482; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
483; X86-NEXT:          pause
484; X86-NEXT:          lfence
485; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
486; X86-NEXT:          .p2align        4
487; X86-NEXT:  {{.*}}                                  # Block address taken
488; X86-NEXT:                                          # %entry
489; X86-NEXT:  [[CALL_TARGET]]:
490; X86-NEXT:          movl    %edx, (%esp)
491; X86-NEXT:          retl
492;
493; X86-LABEL:         .section        .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat
494; X86-NEXT:          .hidden __llvm_retpoline_edi
495; X86-NEXT:          .weak   __llvm_retpoline_edi
496; X86:       __llvm_retpoline_edi:
497; X86-NEXT:  # {{.*}}                                # %entry
498; X86-NEXT:          calll   [[CALL_TARGET:.*]]
499; X86-NEXT:  [[CAPTURE_SPEC:.*]]:                    # Block address taken
500; X86-NEXT:                                          # %entry
501; X86-NEXT:                                          # =>This Inner Loop Header: Depth=1
502; X86-NEXT:          pause
503; X86-NEXT:          lfence
504; X86-NEXT:          jmp     [[CAPTURE_SPEC]]
505; X86-NEXT:          .p2align        4
506; X86-NEXT:  {{.*}}                                  # Block address taken
507; X86-NEXT:                                          # %entry
508; X86-NEXT:  [[CALL_TARGET]]:
509; X86-NEXT:          movl    %edi, (%esp)
510; X86-NEXT:          retl
511
512
513attributes #0 = { "target-features"="+retpoline-indirect-calls" }
514attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" }
515attributes #2 = { nonlazybind }
516