1; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64 2; RUN: llc -verify-machineinstrs -mtriple=x86_64-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X64FAST 3 4; RUN: llc -verify-machineinstrs -mtriple=i686-unknown < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86 5; RUN: llc -verify-machineinstrs -mtriple=i686-unknown -O0 < %s | FileCheck %s --implicit-check-not="jmp.*\*" --implicit-check-not="call.*\*" --check-prefix=X86FAST 6 7declare void @bar(i32) 8 9; Test a simple indirect call and tail call. 10define void @icall_reg(ptr %fp, i32 %x) #0 { 11entry: 12 tail call void @bar(i32 %x) 13 tail call void %fp(i32 %x) 14 tail call void @bar(i32 %x) 15 tail call void %fp(i32 %x) 16 ret void 17} 18 19; X64-LABEL: icall_reg: 20; X64-DAG: movq %rdi, %[[fp:[^ ]*]] 21; X64-DAG: movl %esi, %[[x:[^ ]*]] 22; X64: movl %esi, %edi 23; X64: callq bar 24; X64-DAG: movl %[[x]], %edi 25; X64-DAG: movq %[[fp]], %r11 26; X64: callq __llvm_retpoline_r11 27; X64: movl %[[x]], %edi 28; X64: callq bar 29; X64-DAG: movl %[[x]], %edi 30; X64-DAG: movq %[[fp]], %r11 31; X64: jmp __llvm_retpoline_r11 # TAILCALL 32 33; X64FAST-LABEL: icall_reg: 34; X64FAST: callq bar 35; X64FAST: callq __llvm_retpoline_r11 36; X64FAST: callq bar 37; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 38 39; X86-LABEL: icall_reg: 40; X86-DAG: movl 12(%esp), %[[fp:[^ ]*]] 41; X86-DAG: movl 16(%esp), %[[x:[^ ]*]] 42; X86: pushl %[[x]] 43; X86: calll bar 44; X86: movl %[[fp]], %eax 45; X86: pushl %[[x]] 46; X86: calll __llvm_retpoline_eax 47; X86: pushl %[[x]] 48; X86: calll bar 49; X86: movl %[[fp]], %eax 50; X86: pushl %[[x]] 51; X86: calll __llvm_retpoline_eax 52; X86-NOT: # TAILCALL 53 54; X86FAST-LABEL: icall_reg: 55; X86FAST: calll bar 56; X86FAST: calll __llvm_retpoline_eax 57; X86FAST: calll bar 58; X86FAST: calll __llvm_retpoline_eax 59 60 61@global_fp = external dso_local global ptr 62 63; Test an indirect call through a global variable. 64define void @icall_global_fp(i32 %x, ptr %fpp) #0 { 65 %fp1 = load ptr, ptr @global_fp 66 call void %fp1(i32 %x) 67 %fp2 = load ptr, ptr @global_fp 68 tail call void %fp2(i32 %x) 69 ret void 70} 71 72; X64-LABEL: icall_global_fp: 73; X64-DAG: movl %edi, %[[x:[^ ]*]] 74; X64-DAG: movq global_fp(%rip), %r11 75; X64: callq __llvm_retpoline_r11 76; X64-DAG: movl %[[x]], %edi 77; X64-DAG: movq global_fp(%rip), %r11 78; X64: jmp __llvm_retpoline_r11 # TAILCALL 79 80; X64FAST-LABEL: icall_global_fp: 81; X64FAST: movq global_fp(%rip), %r11 82; X64FAST: callq __llvm_retpoline_r11 83; X64FAST: movq global_fp(%rip), %r11 84; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 85 86; X86-LABEL: icall_global_fp: 87; X86: movl global_fp, %eax 88; X86: pushl 4(%esp) 89; X86: calll __llvm_retpoline_eax 90; X86: addl $4, %esp 91; X86: movl global_fp, %eax 92; X86: jmp __llvm_retpoline_eax # TAILCALL 93 94; X86FAST-LABEL: icall_global_fp: 95; X86FAST: calll __llvm_retpoline_eax 96; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 97 98 99%struct.Foo = type { ptr } 100 101; Test an indirect call through a vtable. 102define void @vcall(ptr %obj) #0 { 103 %vptr = load ptr, ptr %obj 104 %vslot = getelementptr ptr, ptr %vptr, i32 1 105 %fp = load ptr, ptr %vslot 106 tail call void %fp(ptr %obj) 107 tail call void %fp(ptr %obj) 108 ret void 109} 110 111; X64-LABEL: vcall: 112; X64: movq %rdi, %[[obj:[^ ]*]] 113; X64: movq (%rdi), %[[vptr:[^ ]*]] 114; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] 115; X64: movq %[[fp]], %r11 116; X64: callq __llvm_retpoline_r11 117; X64-DAG: movq %[[obj]], %rdi 118; X64-DAG: movq %[[fp]], %r11 119; X64: jmp __llvm_retpoline_r11 # TAILCALL 120 121; X64FAST-LABEL: vcall: 122; X64FAST: callq __llvm_retpoline_r11 123; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 124 125; X86-LABEL: vcall: 126; X86: movl 8(%esp), %[[obj:[^ ]*]] 127; X86: movl (%[[obj]]), %[[vptr:[^ ]*]] 128; X86: movl 4(%[[vptr]]), %[[fp:[^ ]*]] 129; X86: movl %[[fp]], %eax 130; X86: pushl %[[obj]] 131; X86: calll __llvm_retpoline_eax 132; X86: addl $4, %esp 133; X86: movl %[[fp]], %eax 134; X86: jmp __llvm_retpoline_eax # TAILCALL 135 136; X86FAST-LABEL: vcall: 137; X86FAST: calll __llvm_retpoline_eax 138; X86FAST: jmp __llvm_retpoline_eax # TAILCALL 139 140 141declare void @direct_callee() 142 143define void @direct_tail() #0 { 144 tail call void @direct_callee() 145 ret void 146} 147 148; X64-LABEL: direct_tail: 149; X64: jmp direct_callee@PLT # TAILCALL 150; X64FAST-LABEL: direct_tail: 151; X64FAST: jmp direct_callee@PLT # TAILCALL 152; X86-LABEL: direct_tail: 153; X86: jmp direct_callee@PLT # TAILCALL 154; X86FAST-LABEL: direct_tail: 155; X86FAST: jmp direct_callee@PLT # TAILCALL 156 157 158declare void @nonlazybind_callee() #2 159 160define void @nonlazybind_caller() #0 { 161 call void @nonlazybind_callee() 162 tail call void @nonlazybind_callee() 163 ret void 164} 165 166; X64-LABEL: nonlazybind_caller: 167; X64: movq nonlazybind_callee@GOTPCREL(%rip), %[[REG:.*]] 168; X64: movq %[[REG]], %r11 169; X64: callq __llvm_retpoline_r11 170; X64: movq %[[REG]], %r11 171; X64: jmp __llvm_retpoline_r11 # TAILCALL 172; X64FAST-LABEL: nonlazybind_caller: 173; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 174; X64FAST: callq __llvm_retpoline_r11 175; X64FAST: movq nonlazybind_callee@GOTPCREL(%rip), %r11 176; X64FAST: jmp __llvm_retpoline_r11 # TAILCALL 177; X86-LABEL: nonlazybind_caller: 178; X86: calll nonlazybind_callee@PLT 179; X86: jmp nonlazybind_callee@PLT # TAILCALL 180; X86FAST-LABEL: nonlazybind_caller: 181; X86FAST: calll nonlazybind_callee@PLT 182; X86FAST: jmp nonlazybind_callee@PLT # TAILCALL 183 184 185; Check that a switch gets lowered using a jump table when retpolines are only 186; enabled for calls. 187define void @switch_jumptable(ptr %ptr, ptr %sink) #0 { 188; X64-LABEL: switch_jumptable: 189; X64: jmpq * 190; X86-LABEL: switch_jumptable: 191; X86: jmpl * 192entry: 193 br label %header 194 195header: 196 %i = load volatile i32, ptr %ptr 197 switch i32 %i, label %bb0 [ 198 i32 1, label %bb1 199 i32 2, label %bb2 200 i32 3, label %bb3 201 i32 4, label %bb4 202 i32 5, label %bb5 203 i32 6, label %bb6 204 i32 7, label %bb7 205 i32 8, label %bb8 206 i32 9, label %bb9 207 ] 208 209bb0: 210 store volatile i64 0, ptr %sink 211 br label %header 212 213bb1: 214 store volatile i64 1, ptr %sink 215 br label %header 216 217bb2: 218 store volatile i64 2, ptr %sink 219 br label %header 220 221bb3: 222 store volatile i64 3, ptr %sink 223 br label %header 224 225bb4: 226 store volatile i64 4, ptr %sink 227 br label %header 228 229bb5: 230 store volatile i64 5, ptr %sink 231 br label %header 232 233bb6: 234 store volatile i64 6, ptr %sink 235 br label %header 236 237bb7: 238 store volatile i64 7, ptr %sink 239 br label %header 240 241bb8: 242 store volatile i64 8, ptr %sink 243 br label %header 244 245bb9: 246 store volatile i64 9, ptr %sink 247 br label %header 248} 249 250 251@indirectbr_preserved.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_preserved, %bb0), 252 ptr blockaddress(@indirectbr_preserved, %bb1), 253 ptr blockaddress(@indirectbr_preserved, %bb2), 254 ptr blockaddress(@indirectbr_preserved, %bb3), 255 ptr blockaddress(@indirectbr_preserved, %bb4), 256 ptr blockaddress(@indirectbr_preserved, %bb5), 257 ptr blockaddress(@indirectbr_preserved, %bb6), 258 ptr blockaddress(@indirectbr_preserved, %bb7), 259 ptr blockaddress(@indirectbr_preserved, %bb8), 260 ptr blockaddress(@indirectbr_preserved, %bb9)] 261 262; Check that we preserve indirectbr when only calls are retpolined. 263define void @indirectbr_preserved(ptr readonly %p, ptr %sink) #0 { 264; X64-LABEL: indirectbr_preserved: 265; X64: jmpq * 266; X86-LABEL: indirectbr_preserved: 267; X86: jmpl * 268entry: 269 %i0 = load i64, ptr %p 270 %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i0 271 %target0 = load ptr, ptr %target.i0 272 indirectbr ptr %target0, [label %bb1, label %bb3] 273 274bb0: 275 store volatile i64 0, ptr %sink 276 br label %latch 277 278bb1: 279 store volatile i64 1, ptr %sink 280 br label %latch 281 282bb2: 283 store volatile i64 2, ptr %sink 284 br label %latch 285 286bb3: 287 store volatile i64 3, ptr %sink 288 br label %latch 289 290bb4: 291 store volatile i64 4, ptr %sink 292 br label %latch 293 294bb5: 295 store volatile i64 5, ptr %sink 296 br label %latch 297 298bb6: 299 store volatile i64 6, ptr %sink 300 br label %latch 301 302bb7: 303 store volatile i64 7, ptr %sink 304 br label %latch 305 306bb8: 307 store volatile i64 8, ptr %sink 308 br label %latch 309 310bb9: 311 store volatile i64 9, ptr %sink 312 br label %latch 313 314latch: 315 %i.next = load i64, ptr %p 316 %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_preserved.targets, i64 0, i64 %i.next 317 %target.next = load ptr, ptr %target.i.next 318 ; Potentially hit a full 10 successors here so that even if we rewrite as 319 ; a switch it will try to be lowered with a jump table. 320 indirectbr ptr %target.next, [label %bb0, 321 label %bb1, 322 label %bb2, 323 label %bb3, 324 label %bb4, 325 label %bb5, 326 label %bb6, 327 label %bb7, 328 label %bb8, 329 label %bb9] 330} 331 332@indirectbr_rewrite.targets = constant [10 x ptr] [ptr blockaddress(@indirectbr_rewrite, %bb0), 333 ptr blockaddress(@indirectbr_rewrite, %bb1), 334 ptr blockaddress(@indirectbr_rewrite, %bb2), 335 ptr blockaddress(@indirectbr_rewrite, %bb3), 336 ptr blockaddress(@indirectbr_rewrite, %bb4), 337 ptr blockaddress(@indirectbr_rewrite, %bb5), 338 ptr blockaddress(@indirectbr_rewrite, %bb6), 339 ptr blockaddress(@indirectbr_rewrite, %bb7), 340 ptr blockaddress(@indirectbr_rewrite, %bb8), 341 ptr blockaddress(@indirectbr_rewrite, %bb9)] 342 343; Check that when retpolines are enabled for indirect branches the indirectbr 344; instruction gets rewritten to use switch, and that in turn doesn't get lowered 345; as a jump table. 346define void @indirectbr_rewrite(ptr readonly %p, ptr %sink) #1 { 347; X64-LABEL: indirectbr_rewrite: 348; X64-NOT: jmpq 349; X86-LABEL: indirectbr_rewrite: 350; X86-NOT: jmpl 351entry: 352 %i0 = load i64, ptr %p 353 %target.i0 = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i0 354 %target0 = load ptr, ptr %target.i0 355 indirectbr ptr %target0, [label %bb1, label %bb3] 356 357bb0: 358 store volatile i64 0, ptr %sink 359 br label %latch 360 361bb1: 362 store volatile i64 1, ptr %sink 363 br label %latch 364 365bb2: 366 store volatile i64 2, ptr %sink 367 br label %latch 368 369bb3: 370 store volatile i64 3, ptr %sink 371 br label %latch 372 373bb4: 374 store volatile i64 4, ptr %sink 375 br label %latch 376 377bb5: 378 store volatile i64 5, ptr %sink 379 br label %latch 380 381bb6: 382 store volatile i64 6, ptr %sink 383 br label %latch 384 385bb7: 386 store volatile i64 7, ptr %sink 387 br label %latch 388 389bb8: 390 store volatile i64 8, ptr %sink 391 br label %latch 392 393bb9: 394 store volatile i64 9, ptr %sink 395 br label %latch 396 397latch: 398 %i.next = load i64, ptr %p 399 %target.i.next = getelementptr [10 x ptr], ptr @indirectbr_rewrite.targets, i64 0, i64 %i.next 400 %target.next = load ptr, ptr %target.i.next 401 ; Potentially hit a full 10 successors here so that even if we rewrite as 402 ; a switch it will try to be lowered with a jump table. 403 indirectbr ptr %target.next, [label %bb0, 404 label %bb1, 405 label %bb2, 406 label %bb3, 407 label %bb4, 408 label %bb5, 409 label %bb6, 410 label %bb7, 411 label %bb8, 412 label %bb9] 413} 414 415; Lastly check that the necessary thunks were emitted. 416; 417; X64-LABEL: .section .text.__llvm_retpoline_r11,{{.*}},__llvm_retpoline_r11,comdat 418; X64-NEXT: .hidden __llvm_retpoline_r11 419; X64-NEXT: .weak __llvm_retpoline_r11 420; X64: __llvm_retpoline_r11: 421; X64-NEXT: # {{.*}} # %entry 422; X64-NEXT: callq [[CALL_TARGET:.*]] 423; X64-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 424; X64-NEXT: # %entry 425; X64-NEXT: # =>This Inner Loop Header: Depth=1 426; X64-NEXT: pause 427; X64-NEXT: lfence 428; X64-NEXT: jmp [[CAPTURE_SPEC]] 429; X64-NEXT: .p2align 4 430; X64-NEXT: {{.*}} # Block address taken 431; X64-NEXT: # %entry 432; X64-NEXT: [[CALL_TARGET]]: 433; X64-NEXT: movq %r11, (%rsp) 434; X64-NEXT: retq 435; 436; X86-LABEL: .section .text.__llvm_retpoline_eax,{{.*}},__llvm_retpoline_eax,comdat 437; X86-NEXT: .hidden __llvm_retpoline_eax 438; X86-NEXT: .weak __llvm_retpoline_eax 439; X86: __llvm_retpoline_eax: 440; X86-NEXT: # {{.*}} # %entry 441; X86-NEXT: calll [[CALL_TARGET:.*]] 442; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 443; X86-NEXT: # %entry 444; X86-NEXT: # =>This Inner Loop Header: Depth=1 445; X86-NEXT: pause 446; X86-NEXT: lfence 447; X86-NEXT: jmp [[CAPTURE_SPEC]] 448; X86-NEXT: .p2align 4 449; X86-NEXT: {{.*}} # Block address taken 450; X86-NEXT: # %entry 451; X86-NEXT: [[CALL_TARGET]]: 452; X86-NEXT: movl %eax, (%esp) 453; X86-NEXT: retl 454; 455; X86-LABEL: .section .text.__llvm_retpoline_ecx,{{.*}},__llvm_retpoline_ecx,comdat 456; X86-NEXT: .hidden __llvm_retpoline_ecx 457; X86-NEXT: .weak __llvm_retpoline_ecx 458; X86: __llvm_retpoline_ecx: 459; X86-NEXT: # {{.*}} # %entry 460; X86-NEXT: calll [[CALL_TARGET:.*]] 461; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 462; X86-NEXT: # %entry 463; X86-NEXT: # =>This Inner Loop Header: Depth=1 464; X86-NEXT: pause 465; X86-NEXT: lfence 466; X86-NEXT: jmp [[CAPTURE_SPEC]] 467; X86-NEXT: .p2align 4 468; X86-NEXT: {{.*}} # Block address taken 469; X86-NEXT: # %entry 470; X86-NEXT: [[CALL_TARGET]]: 471; X86-NEXT: movl %ecx, (%esp) 472; X86-NEXT: retl 473; 474; X86-LABEL: .section .text.__llvm_retpoline_edx,{{.*}},__llvm_retpoline_edx,comdat 475; X86-NEXT: .hidden __llvm_retpoline_edx 476; X86-NEXT: .weak __llvm_retpoline_edx 477; X86: __llvm_retpoline_edx: 478; X86-NEXT: # {{.*}} # %entry 479; X86-NEXT: calll [[CALL_TARGET:.*]] 480; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 481; X86-NEXT: # %entry 482; X86-NEXT: # =>This Inner Loop Header: Depth=1 483; X86-NEXT: pause 484; X86-NEXT: lfence 485; X86-NEXT: jmp [[CAPTURE_SPEC]] 486; X86-NEXT: .p2align 4 487; X86-NEXT: {{.*}} # Block address taken 488; X86-NEXT: # %entry 489; X86-NEXT: [[CALL_TARGET]]: 490; X86-NEXT: movl %edx, (%esp) 491; X86-NEXT: retl 492; 493; X86-LABEL: .section .text.__llvm_retpoline_edi,{{.*}},__llvm_retpoline_edi,comdat 494; X86-NEXT: .hidden __llvm_retpoline_edi 495; X86-NEXT: .weak __llvm_retpoline_edi 496; X86: __llvm_retpoline_edi: 497; X86-NEXT: # {{.*}} # %entry 498; X86-NEXT: calll [[CALL_TARGET:.*]] 499; X86-NEXT: [[CAPTURE_SPEC:.*]]: # Block address taken 500; X86-NEXT: # %entry 501; X86-NEXT: # =>This Inner Loop Header: Depth=1 502; X86-NEXT: pause 503; X86-NEXT: lfence 504; X86-NEXT: jmp [[CAPTURE_SPEC]] 505; X86-NEXT: .p2align 4 506; X86-NEXT: {{.*}} # Block address taken 507; X86-NEXT: # %entry 508; X86-NEXT: [[CALL_TARGET]]: 509; X86-NEXT: movl %edi, (%esp) 510; X86-NEXT: retl 511 512 513attributes #0 = { "target-features"="+retpoline-indirect-calls" } 514attributes #1 = { "target-features"="+retpoline-indirect-calls,+retpoline-indirect-branches" } 515attributes #2 = { nonlazybind } 516