1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o -| FileCheck %s 2; RUN: llc -mtriple=arm-eabi -mattr=+neon -regalloc=basic %s -o - | FileCheck %s 3 4%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 5%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 6%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> } 7%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } 8%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> } 9 10%struct.__neon_int8x16x3_t = type { <16 x i8>, <16 x i8>, <16 x i8> } 11%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> } 12%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> } 13%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> } 14 15define <8 x i8> @vld3i8(ptr %A) nounwind { 16;CHECK-LABEL: vld3i8: 17;Check the alignment value. Max for this instruction is 64 bits: 18;CHECK: vld3.8 {d16, d17, d18}, [{{r[0-9]+|lr}}:64] 19 %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0(ptr %A, i32 32) 20 %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 21 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 22 %tmp4 = add <8 x i8> %tmp2, %tmp3 23 ret <8 x i8> %tmp4 24} 25 26define <4 x i16> @vld3i16(ptr %A) nounwind { 27;CHECK-LABEL: vld3i16: 28;CHECK: vld3.16 29 %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0(ptr %A, i32 1) 30 %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 31 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 32 %tmp4 = add <4 x i16> %tmp2, %tmp3 33 ret <4 x i16> %tmp4 34} 35 36;Check for a post-increment updating load with register increment. 37define <4 x i16> @vld3i16_update(ptr %ptr, i32 %inc) nounwind { 38;CHECK-LABEL: vld3i16_update: 39;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+|lr}}], {{r[0-9]+|lr}} 40 %A = load ptr, ptr %ptr 41 %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0(ptr %A, i32 1) 42 %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0 43 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2 44 %tmp4 = add <4 x i16> %tmp2, %tmp3 45 %tmp5 = getelementptr i16, ptr %A, i32 %inc 46 store ptr %tmp5, ptr %ptr 47 ret <4 x i16> %tmp4 48} 49 50define <2 x i32> @vld3i32(ptr %A) nounwind { 51;CHECK-LABEL: vld3i32: 52;CHECK: vld3.32 53 %tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0(ptr %A, i32 1) 54 %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0 55 %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2 56 %tmp4 = add <2 x i32> %tmp2, %tmp3 57 ret <2 x i32> %tmp4 58} 59 60define <2 x float> @vld3f(ptr %A) nounwind { 61;CHECK-LABEL: vld3f: 62;CHECK: vld3.32 63 %tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0(ptr %A, i32 1) 64 %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0 65 %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2 66 %tmp4 = fadd <2 x float> %tmp2, %tmp3 67 ret <2 x float> %tmp4 68} 69 70define <1 x i64> @vld3i64(ptr %A) nounwind { 71;CHECK-LABEL: vld3i64: 72;Check the alignment value. Max for this instruction is 64 bits: 73;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64] 74 %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0(ptr %A, i32 16) 75 %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 76 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 77 %tmp4 = add <1 x i64> %tmp2, %tmp3 78 ret <1 x i64> %tmp4 79} 80 81define <1 x i64> @vld3i64_update(ptr %ptr, ptr %A) nounwind { 82;CHECK-LABEL: vld3i64_update: 83;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64]! 84 %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0(ptr %A, i32 16) 85 %tmp5 = getelementptr i64, ptr %A, i32 3 86 store ptr %tmp5, ptr %ptr 87 %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 88 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 89 %tmp4 = add <1 x i64> %tmp2, %tmp3 90 ret <1 x i64> %tmp4 91} 92 93define <1 x i64> @vld3i64_reg_update(ptr %ptr, ptr %A) nounwind { 94;CHECK-LABEL: vld3i64_reg_update: 95;CHECK: vld1.64 {d16, d17, d18}, [{{r[0-9]+|lr}}:64], {{r[0-9]+|lr}} 96 %tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0(ptr %A, i32 16) 97 %tmp5 = getelementptr i64, ptr %A, i32 1 98 store ptr %tmp5, ptr %ptr 99 %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0 100 %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2 101 %tmp4 = add <1 x i64> %tmp2, %tmp3 102 ret <1 x i64> %tmp4 103} 104 105define <16 x i8> @vld3Qi8(ptr %A) nounwind { 106;CHECK-LABEL: vld3Qi8: 107;Check the alignment value. Max for this instruction is 64 bits: 108;CHECK: vld3.8 {d16, d18, d20}, [{{r[0-9]+|lr}}:64]! 109;CHECK: vld3.8 {d17, d19, d21}, [{{r[0-9]+|lr}}:64] 110 %tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0(ptr %A, i32 32) 111 %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0 112 %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2 113 %tmp4 = add <16 x i8> %tmp2, %tmp3 114 ret <16 x i8> %tmp4 115} 116 117define <8 x i16> @vld3Qi16(ptr %A) nounwind { 118;CHECK-LABEL: vld3Qi16: 119;CHECK: vld3.16 120;CHECK: vld3.16 121 %tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0(ptr %A, i32 1) 122 %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0 123 %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2 124 %tmp4 = add <8 x i16> %tmp2, %tmp3 125 ret <8 x i16> %tmp4 126} 127 128define <4 x i32> @vld3Qi32(ptr %A) nounwind { 129;CHECK-LABEL: vld3Qi32: 130;CHECK: vld3.32 131;CHECK: vld3.32 132 %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0(ptr %A, i32 1) 133 %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 134 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 135 %tmp4 = add <4 x i32> %tmp2, %tmp3 136 ret <4 x i32> %tmp4 137} 138 139;Check for a post-increment updating load. 140define <4 x i32> @vld3Qi32_update(ptr %ptr) nounwind { 141;CHECK-LABEL: vld3Qi32_update: 142;CHECK: vld3.32 {d16, d18, d20}, [[[R:r[0-9]+|lr]]]! 143;CHECK: vld3.32 {d17, d19, d21}, [[[R]]]! 144 %A = load ptr, ptr %ptr 145 %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0(ptr %A, i32 1) 146 %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0 147 %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2 148 %tmp4 = add <4 x i32> %tmp2, %tmp3 149 %tmp5 = getelementptr i32, ptr %A, i32 12 150 store ptr %tmp5, ptr %ptr 151 ret <4 x i32> %tmp4 152} 153 154define <4 x float> @vld3Qf(ptr %A) nounwind { 155;CHECK-LABEL: vld3Qf: 156;CHECK: vld3.32 157;CHECK: vld3.32 158 %tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0(ptr %A, i32 1) 159 %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0 160 %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2 161 %tmp4 = fadd <4 x float> %tmp2, %tmp3 162 ret <4 x float> %tmp4 163} 164 165declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8.p0(ptr, i32) nounwind readonly 166declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16.p0(ptr, i32) nounwind readonly 167declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32.p0(ptr, i32) nounwind readonly 168declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32.p0(ptr, i32) nounwind readonly 169declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64.p0(ptr, i32) nounwind readonly 170 171declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8.p0(ptr, i32) nounwind readonly 172declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16.p0(ptr, i32) nounwind readonly 173declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32.p0(ptr, i32) nounwind readonly 174declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32.p0(ptr, i32) nounwind readonly 175