1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve %s -o - | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -O3 -mattr=+mve -early-live-intervals -verify-machineinstrs %s -o - | FileCheck %s 4 5declare void @external_function() 6 7define arm_aapcs_vfpcc void @spill_multivector(ptr %p) { 8; CHECK-LABEL: spill_multivector: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: .save {r4, r5, r7, lr} 11; CHECK-NEXT: push {r4, r5, r7, lr} 12; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 13; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 14; CHECK-NEXT: .pad #96 15; CHECK-NEXT: sub sp, #96 16; CHECK-NEXT: vld20.32 {q0, q1}, [r0] 17; CHECK-NEXT: mov r5, r0 18; CHECK-NEXT: add.w lr, sp, #64 19; CHECK-NEXT: mov r4, r0 20; CHECK-NEXT: vld21.32 {q0, q1}, [r5]! 21; CHECK-NEXT: adds r0, #64 22; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill 23; CHECK-NEXT: vld20.32 {q0, q1}, [r0] 24; CHECK-NEXT: add.w lr, sp, #32 25; CHECK-NEXT: vld21.32 {q0, q1}, [r0] 26; CHECK-NEXT: add.w r0, r4, #128 27; CHECK-NEXT: vstmia lr, {d0, d1, d2, d3} @ 32-byte Spill 28; CHECK-NEXT: vld20.32 {q0, q1}, [r0] 29; CHECK-NEXT: vld21.32 {q0, q1}, [r0] 30; CHECK-NEXT: add.w r0, r4, #192 31; CHECK-NEXT: vld20.32 {q6, q7}, [r0] 32; CHECK-NEXT: vstmia sp, {d0, d1, d2, d3} @ 32-byte Spill 33; CHECK-NEXT: vld21.32 {q6, q7}, [r0] 34; CHECK-NEXT: add.w r0, r4, #256 35; CHECK-NEXT: vld20.32 {q4, q5}, [r0] 36; CHECK-NEXT: vld21.32 {q4, q5}, [r0] 37; CHECK-NEXT: bl external_function 38; CHECK-NEXT: vldmia sp, {d2, d3, d4, d5} @ 32-byte Reload 39; CHECK-NEXT: add.w lr, sp, #32 40; CHECK-NEXT: vstrw.32 q2, [r4, #80] 41; CHECK-NEXT: vstrw.32 q5, [r4, #144] 42; CHECK-NEXT: vstrw.32 q4, [r4, #128] 43; CHECK-NEXT: vstrw.32 q7, [r4, #112] 44; CHECK-NEXT: vstrw.32 q1, [r4, #64] 45; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload 46; CHECK-NEXT: add.w lr, sp, #64 47; CHECK-NEXT: vstrw.32 q2, [r4, #48] 48; CHECK-NEXT: vstrw.32 q6, [r4, #96] 49; CHECK-NEXT: vstrw.32 q1, [r5] 50; CHECK-NEXT: vldmia lr, {d2, d3, d4, d5} @ 32-byte Reload 51; CHECK-NEXT: vstrw.32 q2, [r4, #16] 52; CHECK-NEXT: vstrw.32 q1, [r4] 53; CHECK-NEXT: add sp, #96 54; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 55; CHECK-NEXT: pop {r4, r5, r7, pc} 56entry: 57 %v01 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %p) 58 %ip23 = getelementptr i32, ptr %p, i32 16 59 %v23 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip23) 60 %ip45 = getelementptr i32, ptr %ip23, i32 16 61 %v45 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip45) 62 %ip67 = getelementptr i32, ptr %ip45, i32 16 63 %v67 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip67) 64 %ip89 = getelementptr i32, ptr %ip67, i32 16 65 %v89 = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr %ip89) 66 call void @external_function() 67 68 %v0 = extractvalue { <4 x i32>, <4 x i32> } %v01, 0 69 %v1 = extractvalue { <4 x i32>, <4 x i32> } %v01, 1 70 store <4 x i32> %v0, ptr %p, align 4 71 %p1 = getelementptr <4 x i32>, ptr %p, i32 1 72 store <4 x i32> %v1, ptr %p1, align 4 73 74 %v2 = extractvalue { <4 x i32>, <4 x i32> } %v23, 0 75 %v3 = extractvalue { <4 x i32>, <4 x i32> } %v23, 1 76 %p2 = getelementptr <4 x i32>, ptr %p, i32 2 77 store <4 x i32> %v2, ptr %p2, align 4 78 %p3 = getelementptr <4 x i32>, ptr %p, i32 3 79 store <4 x i32> %v3, ptr %p3, align 4 80 81 %v4 = extractvalue { <4 x i32>, <4 x i32> } %v45, 0 82 %v5 = extractvalue { <4 x i32>, <4 x i32> } %v45, 1 83 %p4 = getelementptr <4 x i32>, ptr %p, i32 4 84 store <4 x i32> %v4, ptr %p4, align 4 85 %p5 = getelementptr <4 x i32>, ptr %p, i32 5 86 store <4 x i32> %v5, ptr %p5, align 4 87 88 %v6 = extractvalue { <4 x i32>, <4 x i32> } %v67, 0 89 %v7 = extractvalue { <4 x i32>, <4 x i32> } %v67, 1 90 %p6 = getelementptr <4 x i32>, ptr %p, i32 6 91 store <4 x i32> %v6, ptr %p6, align 4 92 %p7 = getelementptr <4 x i32>, ptr %p, i32 7 93 store <4 x i32> %v7, ptr %p7, align 4 94 95 %v8 = extractvalue { <4 x i32>, <4 x i32> } %v89, 0 96 %v9 = extractvalue { <4 x i32>, <4 x i32> } %v89, 1 97 %p8 = getelementptr <4 x i32>, ptr %p, i32 8 98 store <4 x i32> %v8, ptr %p8, align 4 99 %p9 = getelementptr <4 x i32>, ptr %p, i32 9 100 store <4 x i32> %v9, ptr %p9, align 4 101 ret void 102} 103 104declare { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.v4i32.p0(ptr) 105