1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -mcpu=skylake < %s | FileCheck %s 3 4target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-pc-linux-gnu" 6 7; Shows a case where we spill a 32 byte value onto a stack which is only 8; 16 byte aligned. With stack realignment, we can use an aligned spill slot 9; (if we think it's profitable), but without realignment, using a stack 10; slot which is 32 byte aligned or a store which expects 32 byte alignment 11; is incorrect. 12 13declare void @foo() 14define void @can_realign(ptr %p) { 15; CHECK-LABEL: can_realign: 16; CHECK: # %bb.0: 17; CHECK-NEXT: pushq %rbp 18; CHECK-NEXT: .cfi_def_cfa_offset 16 19; CHECK-NEXT: .cfi_offset %rbp, -16 20; CHECK-NEXT: movq %rsp, %rbp 21; CHECK-NEXT: .cfi_def_cfa_register %rbp 22; CHECK-NEXT: andq $-32, %rsp 23; CHECK-NEXT: subq $64, %rsp 24; CHECK-NEXT: vmovaps (%rdi), %ymm0 25; CHECK-NEXT: vmovaps %ymm0, (%rsp) 26; CHECK-NEXT: vzeroupper 27; CHECK-NEXT: callq foo@PLT 28; CHECK-NEXT: .Ltmp0: 29; CHECK-NEXT: movq %rbp, %rsp 30; CHECK-NEXT: popq %rbp 31; CHECK-NEXT: .cfi_def_cfa %rsp, 8 32; CHECK-NEXT: retq 33 %val = load <8 x i32>, ptr %p, align 32 34 call void @foo() ["deopt" (<8 x i32> %val)] 35 ret void 36} 37 38define void @no_realign(ptr %p) "no-realign-stack" { 39; CHECK-LABEL: no_realign: 40; CHECK: # %bb.0: 41; CHECK-NEXT: subq $40, %rsp 42; CHECK-NEXT: .cfi_def_cfa_offset 48 43; CHECK-NEXT: vmovaps (%rdi), %ymm0 44; CHECK-NEXT: vmovups %ymm0, (%rsp) 45; CHECK-NEXT: vzeroupper 46; CHECK-NEXT: callq foo@PLT 47; CHECK-NEXT: .Ltmp1: 48; CHECK-NEXT: addq $40, %rsp 49; CHECK-NEXT: .cfi_def_cfa_offset 8 50; CHECK-NEXT: retq 51 %val = load <8 x i32>, ptr %p, align 32 52 call void @foo() ["deopt" (<8 x i32> %val)] 53 ret void 54} 55 56;; Next batch are similiar to the above, but require a reload of the 57;; spilled value as well. 58 59define <4 x ptr addrspace(1)> @spillfill_can_realign(<4 x ptr addrspace(1)> %obj) gc "statepoint-example" { 60; CHECK-LABEL: spillfill_can_realign: 61; CHECK: # %bb.0: # %entry 62; CHECK-NEXT: pushq %rbp 63; CHECK-NEXT: .cfi_def_cfa_offset 16 64; CHECK-NEXT: .cfi_offset %rbp, -16 65; CHECK-NEXT: movq %rsp, %rbp 66; CHECK-NEXT: .cfi_def_cfa_register %rbp 67; CHECK-NEXT: andq $-32, %rsp 68; CHECK-NEXT: subq $64, %rsp 69; CHECK-NEXT: vmovaps %ymm0, (%rsp) 70; CHECK-NEXT: vzeroupper 71; CHECK-NEXT: callq do_safepoint@PLT 72; CHECK-NEXT: .Ltmp2: 73; CHECK-NEXT: vmovaps (%rsp), %ymm0 74; CHECK-NEXT: movq %rbp, %rsp 75; CHECK-NEXT: popq %rbp 76; CHECK-NEXT: .cfi_def_cfa %rsp, 8 77; CHECK-NEXT: retq 78entry: 79 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<4 x ptr addrspace(1)> %obj)] 80 %obj.relocated = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj) 81 ret <4 x ptr addrspace(1)> %obj.relocated 82} 83 84define <4 x ptr addrspace(1)> @spillfill_no_realign(<4 x ptr addrspace(1)> %obj) "no-realign-stack" gc "statepoint-example" { 85; CHECK-LABEL: spillfill_no_realign: 86; CHECK: # %bb.0: # %entry 87; CHECK-NEXT: subq $40, %rsp 88; CHECK-NEXT: .cfi_def_cfa_offset 48 89; CHECK-NEXT: vmovups %ymm0, (%rsp) 90; CHECK-NEXT: vzeroupper 91; CHECK-NEXT: callq do_safepoint@PLT 92; CHECK-NEXT: .Ltmp3: 93; CHECK-NEXT: vmovups (%rsp), %ymm0 94; CHECK-NEXT: addq $40, %rsp 95; CHECK-NEXT: .cfi_def_cfa_offset 8 96; CHECK-NEXT: retq 97entry: 98 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<4 x ptr addrspace(1)> %obj)] 99 %obj.relocated = call coldcc <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj) 100 ret <4 x ptr addrspace(1)> %obj.relocated 101} 102 103declare void @do_safepoint() 104 105declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) 106declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) 107declare <4 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v4p1(token, i32, i32) 108