1; RUN: llc -mtriple=arm-eabi -mattr=+neon -fast-isel=0 -O0 %s -o - | FileCheck %s 2 3define void @vst3i8(ptr %A, ptr %B) nounwind { 4;CHECK-LABEL: vst3i8: 5;Check the alignment value. Max for this instruction is 64 bits: 6;This test runs at -O0 so do not check for specific register numbers. 7;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64] 8 %tmp1 = load <8 x i8>, ptr %B 9 call void @llvm.arm.neon.vst3.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32) 10 ret void 11} 12 13define void @vst3i16(ptr %A, ptr %B) nounwind { 14;CHECK-LABEL: vst3i16: 15;CHECK: vst3.16 16 %tmp1 = load <4 x i16>, ptr %B 17 call void @llvm.arm.neon.vst3.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1) 18 ret void 19} 20 21define void @vst3i32(ptr %A, ptr %B) nounwind { 22;CHECK-LABEL: vst3i32: 23;CHECK: vst3.32 24 %tmp1 = load <2 x i32>, ptr %B 25 call void @llvm.arm.neon.vst3.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) 26 ret void 27} 28 29;Check for a post-increment updating store. 30define void @vst3i32_update(ptr %ptr, ptr %B) nounwind { 31;CHECK-LABEL: vst3i32_update: 32;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! 33 %A = load ptr, ptr %ptr 34 %tmp1 = load <2 x i32>, ptr %B 35 call void @llvm.arm.neon.vst3.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1) 36 %tmp2 = getelementptr i32, ptr %A, i32 6 37 store ptr %tmp2, ptr %ptr 38 ret void 39} 40 41define void @vst3f(ptr %A, ptr %B) nounwind { 42;CHECK-LABEL: vst3f: 43;CHECK: vst3.32 44 %tmp1 = load <2 x float>, ptr %B 45 call void @llvm.arm.neon.vst3.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1) 46 ret void 47} 48 49define void @vst3i64(ptr %A, ptr %B) nounwind { 50;CHECK-LABEL: vst3i64: 51;Check the alignment value. Max for this instruction is 64 bits: 52;This test runs at -O0 so do not check for specific register numbers. 53;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64] 54 %tmp1 = load <1 x i64>, ptr %B 55 call void @llvm.arm.neon.vst3.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16) 56 ret void 57} 58 59define void @vst3i64_update(ptr %ptr, ptr %B) nounwind { 60;CHECK-LABEL: vst3i64_update 61;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! 62 %A = load ptr, ptr %ptr 63 %tmp1 = load <1 x i64>, ptr %B 64 call void @llvm.arm.neon.vst3.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) 65 %tmp2 = getelementptr i64, ptr %A, i32 3 66 store ptr %tmp2, ptr %ptr 67 ret void 68} 69 70define void @vst3i64_reg_update(ptr %ptr, ptr %B) nounwind { 71;CHECK-LABEL: vst3i64_reg_update 72;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}], r{{.*}} 73 %A = load ptr, ptr %ptr 74 %tmp1 = load <1 x i64>, ptr %B 75 call void @llvm.arm.neon.vst3.p0.v1i64(ptr %A, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 1) 76 %tmp2 = getelementptr i64, ptr %A, i32 1 77 store ptr %tmp2, ptr %ptr 78 ret void 79} 80 81define void @vst3Qi8(ptr %A, ptr %B) nounwind { 82;CHECK-LABEL: vst3Qi8: 83;Check the alignment value. Max for this instruction is 64 bits: 84;This test runs at -O0 so do not check for specific register numbers. 85;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64]! 86;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}:64] 87 %tmp1 = load <16 x i8>, ptr %B 88 call void @llvm.arm.neon.vst3.p0.v16i8(ptr %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32) 89 ret void 90} 91 92define void @vst3Qi16(ptr %A, ptr %B) nounwind { 93;CHECK-LABEL: vst3Qi16: 94;CHECK: vst3.16 95;CHECK: vst3.16 96 %tmp1 = load <8 x i16>, ptr %B 97 call void @llvm.arm.neon.vst3.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) 98 ret void 99} 100 101;Check for a post-increment updating store. 102define void @vst3Qi16_update(ptr %ptr, ptr %B) nounwind { 103;CHECK-LABEL: vst3Qi16_update: 104;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! 105;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]! 106 %A = load ptr, ptr %ptr 107 %tmp1 = load <8 x i16>, ptr %B 108 call void @llvm.arm.neon.vst3.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1) 109 %tmp2 = getelementptr i16, ptr %A, i32 24 110 store ptr %tmp2, ptr %ptr 111 ret void 112} 113 114define void @vst3Qi32(ptr %A, ptr %B) nounwind { 115;CHECK-LABEL: vst3Qi32: 116;CHECK: vst3.32 117;CHECK: vst3.32 118 %tmp1 = load <4 x i32>, ptr %B 119 call void @llvm.arm.neon.vst3.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1) 120 ret void 121} 122 123define void @vst3Qf(ptr %A, ptr %B) nounwind { 124;CHECK-LABEL: vst3Qf: 125;CHECK: vst3.32 126;CHECK: vst3.32 127 %tmp1 = load <4 x float>, ptr %B 128 call void @llvm.arm.neon.vst3.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1) 129 ret void 130} 131 132declare void @llvm.arm.neon.vst3.p0.v8i8(ptr, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind 133declare void @llvm.arm.neon.vst3.p0.v4i16(ptr, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind 134declare void @llvm.arm.neon.vst3.p0.v2i32(ptr, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind 135declare void @llvm.arm.neon.vst3.p0.v2f32(ptr, <2 x float>, <2 x float>, <2 x float>, i32) nounwind 136declare void @llvm.arm.neon.vst3.p0.v1i64(ptr, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind 137 138declare void @llvm.arm.neon.vst3.p0.v16i8(ptr, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind 139declare void @llvm.arm.neon.vst3.p0.v8i16(ptr, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind 140declare void @llvm.arm.neon.vst3.p0.v4i32(ptr, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind 141declare void @llvm.arm.neon.vst3.p0.v4f32(ptr, <4 x float>, <4 x float>, <4 x float>, i32) nounwind 142