1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s 3; REQUIRES: asserts 4 5target triple = "x86_64-pc-linux-gnu" 6 7; Can we lower a single vector? 8define <2 x ptr addrspace(1)> @test(<2 x ptr addrspace(1)> %obj) gc "statepoint-example" { 9; CHECK-LABEL: test: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: subq $24, %rsp 12; CHECK-NEXT: .cfi_def_cfa_offset 32 13; CHECK-NEXT: movaps %xmm0, (%rsp) 14; CHECK-NEXT: callq do_safepoint@PLT 15; CHECK-NEXT: .Ltmp0: 16; CHECK-NEXT: movaps (%rsp), %xmm0 17; CHECK-NEXT: addq $24, %rsp 18; CHECK-NEXT: .cfi_def_cfa_offset 8 19; CHECK-NEXT: retq 20entry: 21 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj)] 22 %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj) 23 ret <2 x ptr addrspace(1)> %obj.relocated 24} 25 26; Can we lower the base, derived pairs if both are vectors? 27define <2 x ptr addrspace(1)> @test2(<2 x ptr addrspace(1)> %obj, i64 %offset) gc "statepoint-example" { 28; CHECK-LABEL: test2: 29; CHECK: # %bb.0: # %entry 30; CHECK-NEXT: subq $40, %rsp 31; CHECK-NEXT: .cfi_def_cfa_offset 48 32; CHECK-NEXT: movq %rdi, %xmm1 33; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 34; CHECK-NEXT: paddq %xmm0, %xmm1 35; CHECK-NEXT: movdqa %xmm0, (%rsp) 36; CHECK-NEXT: movdqa %xmm1, {{[0-9]+}}(%rsp) 37; CHECK-NEXT: callq do_safepoint@PLT 38; CHECK-NEXT: .Ltmp1: 39; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 40; CHECK-NEXT: addq $40, %rsp 41; CHECK-NEXT: .cfi_def_cfa_offset 8 42; CHECK-NEXT: retq 43entry: 44 %derived = getelementptr i8, <2 x ptr addrspace(1)> %obj, i64 %offset 45 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj, <2 x ptr addrspace(1)> %derived)] 46 %derived.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 1) ; (%obj, %derived) 47 ret <2 x ptr addrspace(1)> %derived.relocated 48} 49 50; Originally, this was just a variant of @test2 above, but it ends up 51; covering a bunch of interesting missed optimizations. Specifically: 52; - We waste a stack slot for a value that a backend transform pass 53; CSEd to another spilled one. 54; - We don't remove the testb even though it serves no purpose 55; - We could in principal reuse the argument memory (%rsi) and do away 56; with stack slots entirely. 57define <2 x ptr addrspace(1)> @test3(i1 %cnd, ptr %ptr) gc "statepoint-example" { 58; CHECK-LABEL: test3: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: testb $1, %dil 61; CHECK-NEXT: movaps (%rsi), %xmm0 62; CHECK-NEXT: subq $40, %rsp 63; CHECK-NEXT: .cfi_def_cfa_offset 48 64; CHECK-NEXT: movaps %xmm0, (%rsp) 65; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) 66; CHECK-NEXT: callq do_safepoint@PLT 67; CHECK-NEXT: .Ltmp2: 68; CHECK-NEXT: movaps (%rsp), %xmm0 69; CHECK-NEXT: addq $40, %rsp 70; CHECK-NEXT: .cfi_def_cfa_offset 8 71; CHECK-NEXT: retq 72entry: 73 br i1 %cnd, label %taken, label %untaken 74 75taken: ; preds = %entry 76 %obja = load <2 x ptr addrspace(1)>, ptr %ptr 77 br label %merge 78 79untaken: ; preds = %entry 80 %objb = load <2 x ptr addrspace(1)>, ptr %ptr 81 br label %merge 82 83merge: ; preds = %untaken, %taken 84 %obj.base = phi <2 x ptr addrspace(1)> [ %obja, %taken ], [ %objb, %untaken ] 85 %obj = phi <2 x ptr addrspace(1)> [ %obja, %taken ], [ %objb, %untaken ] 86 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> %obj, <2 x ptr addrspace(1)> %obj.base)] 87 %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 1, i32 0) ; (%obj.base, %obj) 88 %obj.relocated.casted = bitcast <2 x ptr addrspace(1)> %obj.relocated to <2 x ptr addrspace(1)> 89 %obj.base.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 1, i32 1) ; (%obj.base, %obj.base) 90 %obj.base.relocated.casted = bitcast <2 x ptr addrspace(1)> %obj.base.relocated to <2 x ptr addrspace(1)> 91 ret <2 x ptr addrspace(1)> %obj.relocated.casted 92} 93 94; Can we handle vector constants? At the moment, we don't appear to actually 95; get selection dag nodes for these. 96define <2 x ptr addrspace(1)> @test4() gc "statepoint-example" { 97; CHECK-LABEL: test4: 98; CHECK: # %bb.0: # %entry 99; CHECK-NEXT: subq $24, %rsp 100; CHECK-NEXT: .cfi_def_cfa_offset 32 101; CHECK-NEXT: xorps %xmm0, %xmm0 102; CHECK-NEXT: movaps %xmm0, (%rsp) 103; CHECK-NEXT: callq do_safepoint@PLT 104; CHECK-NEXT: .Ltmp3: 105; CHECK-NEXT: movaps (%rsp), %xmm0 106; CHECK-NEXT: addq $24, %rsp 107; CHECK-NEXT: .cfi_def_cfa_offset 8 108; CHECK-NEXT: retq 109entry: 110 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x ptr addrspace(1)> zeroinitializer)] 111 %obj.relocated = call coldcc <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token %safepoint_token, i32 0, i32 0) 112 ret <2 x ptr addrspace(1)> %obj.relocated 113} 114 115; Check that we can lower a constant typed as i128 correctly. We don't have 116; a representation of larger than 64 bit constant in the StackMap format. At 117; the moment, this simply means spilling them, but there's a potential 118; optimization for values representable as sext(Con64). 119define void @test5() gc "statepoint-example" { 120; CHECK-LABEL: test5: 121; CHECK: # %bb.0: # %entry 122; CHECK-NEXT: subq $40, %rsp 123; CHECK-NEXT: .cfi_def_cfa_offset 48 124; CHECK-NEXT: xorps %xmm0, %xmm0 125; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) 126; CHECK-NEXT: movq $-1, {{[0-9]+}}(%rsp) 127; CHECK-NEXT: movq $-1, (%rsp) 128; CHECK-NEXT: callq do_safepoint@PLT 129; CHECK-NEXT: .Ltmp4: 130; CHECK-NEXT: addq $40, %rsp 131; CHECK-NEXT: .cfi_def_cfa_offset 8 132; CHECK-NEXT: retq 133entry: 134 %safepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["deopt" (i128 0, i128 -1)] 135 ret void 136} 137 138; CHECK: __LLVM_StackMaps: 139 140; CHECK: .Ltmp0-test 141; Check for the two spill slots 142; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 143; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 144; CHECK: .byte 3 145; CHECK: .byte 0 146; CHECK: .short 16 147; CHECK: .short 7 148; CHECK: .short 0 149; CHECK: .long 0 150; CHECK: .byte 3 151; CHECK: .byte 0 152; CHECK: .short 16 153; CHECK: .short 7 154; CHECK: .short 0 155; CHECK: .long 0 156 157; CHECK: .Ltmp1-test2 158; Check for the two spill slots 159; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 160; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 161; CHECK: .byte 3 162; CHECK: .byte 0 163; CHECK: .short 16 164; CHECK: .short 7 165; CHECK: .short 0 166; CHECK: .long 0 167; CHECK: .byte 3 168; CHECK: .byte 0 169; CHECK: .short 16 170; CHECK: .short 7 171; CHECK: .short 0 172; CHECK: .long 16 173 174; CHECK: .Ltmp2-test3 175; Check for the four spill slots 176; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 177; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 178; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16] 179; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0] 180; CHECK: .byte 3 181; CHECK: .byte 0 182; CHECK: .short 16 183; CHECK: .short 7 184; CHECK: .short 0 185; CHECK: .long 16 186; CHECK: .byte 3 187; CHECK: .byte 0 188; CHECK: .short 16 189; CHECK: .short 7 190; CHECK: .short 0 191; CHECK: .long 16 192; CHECK: .byte 3 193; CHECK: .byte 0 194; CHECK: .short 16 195; CHECK: .short 7 196; CHECK: .short 0 197; CHECK: .long 16 198; CHECK: .byte 3 199; CHECK: .byte 0 200; CHECK: .short 16 201; CHECK: .short 7 202; CHECK: .short 0 203; CHECK: .long 0 204 205declare void @do_safepoint() 206 207declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) 208declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) 209declare <2 x ptr addrspace(1)> @llvm.experimental.gc.relocate.v2p1(token, i32, i32) 210