1; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s 2 3define <8 x i8> @vld1dupi8(i8* %A) nounwind { 4;CHECK-LABEL: vld1dupi8: 5;Check the (default) alignment value. 6;CHECK: vld1.8 {d16[]}, [r0] 7 %tmp1 = load i8, i8* %A, align 8 8 %tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0 9 %tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer 10 ret <8 x i8> %tmp3 11} 12 13define <8 x i8> @vld1dupi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind { 14entry: 15;CHECK-LABEL: vld1dupi8_preinc: 16;CHECK: vld1.8 {d16[]}, [r1] 17 %0 = load i8*, i8** %a, align 4 18 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b 19 %1 = load i8, i8* %add.ptr, align 1 20 %2 = insertelement <8 x i8> undef, i8 %1, i32 0 21 %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 22 store i8* %add.ptr, i8** %a, align 4 23 ret <8 x i8> %lane 24} 25 26define <8 x i8> @vld1dupi8_postinc_fixed(i8** noalias nocapture %a) nounwind { 27entry: 28;CHECK-LABEL: vld1dupi8_postinc_fixed: 29;CHECK: vld1.8 {d16[]}, [r1]! 30 %0 = load i8*, i8** %a, align 4 31 %1 = load i8, i8* %0, align 1 32 %2 = insertelement <8 x i8> undef, i8 %1, i32 0 33 %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 34 %add.ptr = getelementptr inbounds i8, i8* %0, i32 1 35 store i8* %add.ptr, i8** %a, align 4 36 ret <8 x i8> %lane 37} 38 39define <8 x i8> @vld1dupi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind { 40entry: 41;CHECK-LABEL: vld1dupi8_postinc_register: 42;CHECK: vld1.8 {d16[]}, [r2], r1 43 %0 = load i8*, i8** %a, align 4 44 %1 = load i8, i8* %0, align 1 45 %2 = insertelement <8 x i8> undef, i8 %1, i32 0 46 %lane = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 47 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n 48 store i8* %add.ptr, i8** %a, align 4 49 ret <8 x i8> %lane 50} 51 52define <16 x i8> @vld1dupqi8_preinc(i8** noalias nocapture %a, i32 %b) nounwind { 53entry: 54;CHECK-LABEL: vld1dupqi8_preinc: 55;CHECK: vld1.8 {d16[], d17[]}, [r1] 56 %0 = load i8*, i8** %a, align 4 57 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b 58 %1 = load i8, i8* %add.ptr, align 1 59 %2 = insertelement <16 x i8> undef, i8 %1, i32 0 60 %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer 61 store i8* %add.ptr, i8** %a, align 4 62 ret <16 x i8> %lane 63} 64 65define <16 x i8> @vld1dupqi8_postinc_fixed(i8** noalias nocapture %a) nounwind { 66entry: 67;CHECK-LABEL: vld1dupqi8_postinc_fixed: 68;CHECK: vld1.8 {d16[], d17[]}, [r1]! 69 %0 = load i8*, i8** %a, align 4 70 %1 = load i8, i8* %0, align 1 71 %2 = insertelement <16 x i8> undef, i8 %1, i32 0 72 %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer 73 %add.ptr = getelementptr inbounds i8, i8* %0, i32 1 74 store i8* %add.ptr, i8** %a, align 4 75 ret <16 x i8> %lane 76} 77 78define <16 x i8> @vld1dupqi8_postinc_register(i8** noalias nocapture %a, i32 %n) nounwind { 79entry: 80;CHECK-LABEL: vld1dupqi8_postinc_register: 81;CHECK: vld1.8 {d16[], d17[]}, [r2], r1 82 %0 = load i8*, i8** %a, align 4 83 %1 = load i8, i8* %0, align 1 84 %2 = insertelement <16 x i8> undef, i8 %1, i32 0 85 %lane = shufflevector <16 x i8> %2, <16 x i8> undef, <16 x i32> zeroinitializer 86 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n 87 store i8* %add.ptr, i8** %a, align 4 88 ret <16 x i8> %lane 89} 90 91define <4 x i16> @vld1dupi16(i16* %A) nounwind { 92;CHECK-LABEL: vld1dupi16: 93;Check the alignment value. Max for this instruction is 16 bits: 94;CHECK: vld1.16 {d16[]}, [r0:16] 95 %tmp1 = load i16, i16* %A, align 8 96 %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 97 %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer 98 ret <4 x i16> %tmp3 99} 100 101define <4 x i16> @vld1dupi16_misaligned(i16* %A) nounwind { 102;CHECK-LABEL: vld1dupi16_misaligned: 103;CHECK: vld1.16 {d16[]}, [r0] 104 %tmp1 = load i16, i16* %A, align 1 105 %tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0 106 %tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer 107 ret <4 x i16> %tmp3 108} 109 110define <2 x i32> @vld1dupi32(i32* %A) nounwind { 111;CHECK-LABEL: vld1dupi32: 112;Check the alignment value. Max for this instruction is 32 bits: 113;CHECK: vld1.32 {d16[]}, [r0:32] 114 %tmp1 = load i32, i32* %A, align 8 115 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 116 %tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer 117 ret <2 x i32> %tmp3 118} 119 120define <2 x float> @vld1dupf(float* %A) nounwind { 121;CHECK-LABEL: vld1dupf: 122;CHECK: vld1.32 {d16[]}, [r0:32] 123 %tmp0 = load float, float* %A 124 %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0 125 %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer 126 ret <2 x float> %tmp2 127} 128 129define <16 x i8> @vld1dupQi8(i8* %A) nounwind { 130;CHECK-LABEL: vld1dupQi8: 131;Check the (default) alignment value. 132;CHECK: vld1.8 {d16[], d17[]}, [r0] 133 %tmp1 = load i8, i8* %A, align 8 134 %tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0 135 %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer 136 ret <16 x i8> %tmp3 137} 138 139define <4 x float> @vld1dupQf(float* %A) nounwind { 140;CHECK-LABEL: vld1dupQf: 141;CHECK: vld1.32 {d16[], d17[]}, [r0:32] 142 %tmp0 = load float, float* %A 143 %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0 144 %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer 145 ret <4 x float> %tmp2 146} 147 148%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } 149%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> } 150%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> } 151 152define <8 x i8> @vld2dupi8(i8* %A) nounwind { 153;CHECK-LABEL: vld2dupi8: 154;Check the (default) alignment value. 155;CHECK: vld2.8 {d16[], d17[]}, [r0] 156 %tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 157 %tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0 158 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 159 %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1 160 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer 161 %tmp5 = add <8 x i8> %tmp2, %tmp4 162 ret <8 x i8> %tmp5 163} 164 165define void @vld2dupi8_preinc(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a, i32 %b) nounwind { 166;CHECK-LABEL: vld2dupi8_preinc: 167;CHECK: vld2.8 {d16[], d17[]}, [r2] 168entry: 169 %0 = load i8*, i8** %a, align 4 170 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %b 171 %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %add.ptr, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 172 %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 173 %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer 174 %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 175 %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 176 store i8* %add.ptr, i8** %a, align 4 177 %r8 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 178 store <8 x i8> %lane, <8 x i8>* %r8, align 8 179 %r11 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 180 store <8 x i8> %lane1, <8 x i8>* %r11, align 8 181 ret void 182} 183 184define void @vld2dupi8_postinc_fixed(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a) nounwind { 185entry: 186;CHECK-LABEL: vld2dupi8_postinc_fixed: 187;CHECK: vld2.8 {d16[], d17[]}, [r2]! 188 %0 = load i8*, i8** %a, align 4 189 %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %0, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 190 %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 191 %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer 192 %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 193 %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 194 %add.ptr = getelementptr inbounds i8, i8* %0, i32 2 195 store i8* %add.ptr, i8** %a, align 4 196 %r7 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 197 store <8 x i8> %lane, <8 x i8>* %r7, align 8 198 %r10 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 199 store <8 x i8> %lane1, <8 x i8>* %r10, align 8 200 ret void 201} 202 203define void @vld2dupi8_postinc_variable(%struct.__neon_int8x8x2_t* noalias nocapture sret %agg.result, i8** noalias nocapture %a, i32 %n) nounwind { 204entry: 205;CHECK-LABEL: vld2dupi8_postinc_variable: 206;CHECK: vld2.8 {d16[], d17[]}, [r3], r2 207 %0 = load i8*, i8** %a, align 4 208 %vld_dup = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8* %0, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1) 209 %1 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 0 210 %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer 211 %2 = extractvalue %struct.__neon_int8x8x2_t %vld_dup, 1 212 %lane1 = shufflevector <8 x i8> %2, <8 x i8> undef, <8 x i32> zeroinitializer 213 %add.ptr = getelementptr inbounds i8, i8* %0, i32 %n 214 store i8* %add.ptr, i8** %a, align 4 215 %r7 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 0 216 store <8 x i8> %lane, <8 x i8>* %r7, align 8 217 %r10 = getelementptr inbounds %struct.__neon_int8x8x2_t, %struct.__neon_int8x8x2_t* %agg.result, i32 0, i32 1 218 store <8 x i8> %lane1, <8 x i8>* %r10, align 8 219 ret void 220} 221 222define <4 x i16> @vld2dupi16(i8* %A) nounwind { 223;CHECK-LABEL: vld2dupi16: 224;Check that a power-of-two alignment smaller than the total size of the memory 225;being loaded is ignored. 226;CHECK: vld2.16 {d16[], d17[]}, [r0] 227 %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 228 %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 229 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 230 %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 231 %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer 232 %tmp5 = add <4 x i16> %tmp2, %tmp4 233 ret <4 x i16> %tmp5 234} 235 236;Check for a post-increment updating load. 237define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind { 238;CHECK-LABEL: vld2dupi16_update: 239;CHECK: vld2.16 {d16[], d17[]}, [r1]! 240 %A = load i16*, i16** %ptr 241 %A2 = bitcast i16* %A to i8* 242 %tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2) 243 %tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0 244 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 245 %tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1 246 %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer 247 %tmp5 = add <4 x i16> %tmp2, %tmp4 248 %tmp6 = getelementptr i16, i16* %A, i32 2 249 store i16* %tmp6, i16** %ptr 250 ret <4 x i16> %tmp5 251} 252 253define <2 x i32> @vld2dupi32(i8* %A) nounwind { 254;CHECK-LABEL: vld2dupi32: 255;Check the alignment value. Max for this instruction is 64 bits: 256;CHECK: vld2.32 {d16[], d17[]}, [r0:64] 257 %tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16) 258 %tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0 259 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 260 %tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1 261 %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer 262 %tmp5 = add <2 x i32> %tmp2, %tmp4 263 ret <2 x i32> %tmp5 264} 265 266declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly 267declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly 268declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly 269 270%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } 271%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } 272 273;Check for a post-increment updating load with register increment. 274define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind { 275;CHECK-LABEL: vld3dupi8_update: 276;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1 277 %A = load i8*, i8** %ptr 278 %tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8) 279 %tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0 280 %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer 281 %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1 282 %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer 283 %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2 284 %tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer 285 %tmp7 = add <8 x i8> %tmp2, %tmp4 286 %tmp8 = add <8 x i8> %tmp7, %tmp6 287 %tmp9 = getelementptr i8, i8* %A, i32 %inc 288 store i8* %tmp9, i8** %ptr 289 ret <8 x i8> %tmp8 290} 291 292define <4 x i16> @vld3dupi16(i8* %A) nounwind { 293;CHECK-LABEL: vld3dupi16: 294;Check the (default) alignment value. VLD3 does not support alignment. 295;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0] 296 %tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8) 297 %tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0 298 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 299 %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1 300 %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer 301 %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2 302 %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer 303 %tmp7 = add <4 x i16> %tmp2, %tmp4 304 %tmp8 = add <4 x i16> %tmp7, %tmp6 305 ret <4 x i16> %tmp8 306} 307 308declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8.p0i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly 309declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly 310 311%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } 312%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } 313 314;Check for a post-increment updating load. 315define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind { 316;CHECK-LABEL: vld4dupi16_update: 317;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]! 318 %A = load i16*, i16** %ptr 319 %A2 = bitcast i16* %A to i8* 320 %tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8* %A2, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1) 321 %tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0 322 %tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer 323 %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1 324 %tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer 325 %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2 326 %tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer 327 %tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3 328 %tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer 329 %tmp9 = add <4 x i16> %tmp2, %tmp4 330 %tmp10 = add <4 x i16> %tmp6, %tmp8 331 %tmp11 = add <4 x i16> %tmp9, %tmp10 332 %tmp12 = getelementptr i16, i16* %A, i32 4 333 store i16* %tmp12, i16** %ptr 334 ret <4 x i16> %tmp11 335} 336 337define <2 x i32> @vld4dupi32(i8* %A) nounwind { 338;CHECK-LABEL: vld4dupi32: 339;Check the alignment value. An 8-byte alignment is allowed here even though 340;it is smaller than the total size of the memory being loaded. 341;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0:64] 342 %tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8) 343 %tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0 344 %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer 345 %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1 346 %tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer 347 %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2 348 %tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer 349 %tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3 350 %tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer 351 %tmp9 = add <2 x i32> %tmp2, %tmp4 352 %tmp10 = add <2 x i32> %tmp6, %tmp8 353 %tmp11 = add <2 x i32> %tmp9, %tmp10 354 ret <2 x i32> %tmp11 355} 356 357declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16.p0i8(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly 358declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32.p0i8(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly 359