1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm -mattr=+neon | FileCheck %s 3 4;Check the (default) alignment. 5define void @vst1lanei8(ptr %A, ptr %B) nounwind { 6; CHECK-LABEL: vst1lanei8: 7; CHECK: @ %bb.0: 8; CHECK-NEXT: vldr d16, [r1] 9; CHECK-NEXT: vst1.8 {d16[3]}, [r0] 10; CHECK-NEXT: mov pc, lr 11 %tmp1 = load <8 x i8>, ptr %B 12 %tmp2 = extractelement <8 x i8> %tmp1, i32 3 13 store i8 %tmp2, ptr %A, align 8 14 ret void 15} 16 17;Check for a post-increment updating store. 18define void @vst1lanei8_update(ptr %ptr, ptr %B) nounwind { 19; CHECK-LABEL: vst1lanei8_update: 20; CHECK: @ %bb.0: 21; CHECK-NEXT: ldr r2, [r0] 22; CHECK-NEXT: vldr d16, [r1] 23; CHECK-NEXT: vst1.8 {d16[3]}, [r2]! 24; CHECK-NEXT: str r2, [r0] 25; CHECK-NEXT: mov pc, lr 26 %A = load ptr, ptr %ptr 27 %tmp1 = load <8 x i8>, ptr %B 28 %tmp2 = extractelement <8 x i8> %tmp1, i32 3 29 store i8 %tmp2, ptr %A, align 8 30 %tmp3 = getelementptr i8, ptr %A, i32 1 31 store ptr %tmp3, ptr %ptr 32 ret void 33} 34 35;Check the alignment value. Max for this instruction is 16 bits: 36define void @vst1lanei16(ptr %A, ptr %B) nounwind { 37; CHECK-LABEL: vst1lanei16: 38; CHECK: @ %bb.0: 39; CHECK-NEXT: vldr d16, [r1] 40; CHECK-NEXT: vst1.16 {d16[2]}, [r0:16] 41; CHECK-NEXT: mov pc, lr 42 %tmp1 = load <4 x i16>, ptr %B 43 %tmp2 = extractelement <4 x i16> %tmp1, i32 2 44 store i16 %tmp2, ptr %A, align 8 45 ret void 46} 47 48;Check the alignment value. Max for this instruction is 32 bits: 49define void @vst1lanei32(ptr %A, ptr %B) nounwind { 50; CHECK-LABEL: vst1lanei32: 51; CHECK: @ %bb.0: 52; CHECK-NEXT: vldr d16, [r1] 53; CHECK-NEXT: vst1.32 {d16[1]}, [r0:32] 54; CHECK-NEXT: mov pc, lr 55 %tmp1 = load <2 x i32>, ptr %B 56 %tmp2 = extractelement <2 x i32> %tmp1, i32 1 57 store i32 %tmp2, ptr %A, align 8 58 ret void 59} 60 61define void @vst1lanef(ptr %A, ptr %B) nounwind { 62; CHECK-LABEL: vst1lanef: 63; CHECK: @ %bb.0: 64; CHECK-NEXT: vldr d16, [r1] 65; CHECK-NEXT: vst1.32 {d16[1]}, [r0:32] 66; CHECK-NEXT: mov pc, lr 67 %tmp1 = load <2 x float>, ptr %B 68 %tmp2 = extractelement <2 x float> %tmp1, i32 1 69 store float %tmp2, ptr %A 70 ret void 71} 72 73; // Can use scalar load. No need to use vectors. 74; // CHE-CK: vst1.8 {d17[1]}, [r0] 75define void @vst1laneQi8(ptr %A, ptr %B) nounwind { 76; CHECK-LABEL: vst1laneQi8: 77; CHECK: @ %bb.0: 78; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 79; CHECK-NEXT: vst1.8 {d17[1]}, [r0] 80; CHECK-NEXT: mov pc, lr 81 %tmp1 = load <16 x i8>, ptr %B 82 %tmp2 = extractelement <16 x i8> %tmp1, i32 9 83 store i8 %tmp2, ptr %A, align 8 84 ret void 85} 86 87define void @vst1laneQi16(ptr %A, ptr %B) nounwind { 88; CHECK-LABEL: vst1laneQi16: 89; CHECK: @ %bb.0: 90; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 91; CHECK-NEXT: vst1.16 {d17[1]}, [r0:16] 92; CHECK-NEXT: mov pc, lr 93 %tmp1 = load <8 x i16>, ptr %B 94 %tmp2 = extractelement <8 x i16> %tmp1, i32 5 95 store i16 %tmp2, ptr %A, align 8 96 ret void 97} 98 99; // Can use scalar load. No need to use vectors. 100; // CHE-CK: vst1.32 {d17[1]}, [r0:32] 101define void @vst1laneQi32(ptr %A, ptr %B) nounwind { 102; CHECK-LABEL: vst1laneQi32: 103; CHECK: @ %bb.0: 104; CHECK-NEXT: ldr r1, [r1, #12] 105; CHECK-NEXT: str r1, [r0] 106; CHECK-NEXT: mov pc, lr 107 %tmp1 = load <4 x i32>, ptr %B 108 %tmp2 = extractelement <4 x i32> %tmp1, i32 3 109 store i32 %tmp2, ptr %A, align 8 110 ret void 111} 112 113;Check for a post-increment updating store. 114; // Can use scalar load. No need to use vectors. 115; // CHE-CK: vst1.32 {d17[1]}, [r1:32]! 116define void @vst1laneQi32_update(ptr %ptr, ptr %B) nounwind { 117; CHECK-LABEL: vst1laneQi32_update: 118; CHECK: @ %bb.0: 119; CHECK-NEXT: ldr r2, [r0] 120; CHECK-NEXT: ldr r1, [r1, #12] 121; CHECK-NEXT: str r1, [r2], #4 122; CHECK-NEXT: str r2, [r0] 123; CHECK-NEXT: mov pc, lr 124 %A = load ptr, ptr %ptr 125 %tmp1 = load <4 x i32>, ptr %B 126 %tmp2 = extractelement <4 x i32> %tmp1, i32 3 127 store i32 %tmp2, ptr %A, align 8 128 %tmp3 = getelementptr i32, ptr %A, i32 1 129 store ptr %tmp3, ptr %ptr 130 ret void 131} 132 133; // Can use scalar load. No need to use vectors. 134; // CHE-CK: vst1.32 {d17[1]}, [r0] 135define void @vst1laneQf(ptr %A, ptr %B) nounwind { 136; CHECK-LABEL: vst1laneQf: 137; CHECK: @ %bb.0: 138; CHECK-NEXT: ldr r1, [r1, #12] 139; CHECK-NEXT: str r1, [r0] 140; CHECK-NEXT: mov pc, lr 141 %tmp1 = load <4 x float>, ptr %B 142 %tmp2 = extractelement <4 x float> %tmp1, i32 3 143 store float %tmp2, ptr %A 144 ret void 145} 146 147;Check the alignment value. Max for this instruction is 16 bits: 148define void @vst2lanei8(ptr %A, ptr %B) nounwind { 149; CHECK-LABEL: vst2lanei8: 150; CHECK: @ %bb.0: 151; CHECK-NEXT: vldr d16, [r1] 152; CHECK-NEXT: vorr d17, d16, d16 153; CHECK-NEXT: vst2.8 {d16[1], d17[1]}, [r0:16] 154; CHECK-NEXT: mov pc, lr 155 %tmp1 = load <8 x i8>, ptr %B 156 call void @llvm.arm.neon.vst2lane.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4) 157 ret void 158} 159 160;Check the alignment value. Max for this instruction is 32 bits: 161define void @vst2lanei16(ptr %A, ptr %B) nounwind { 162; CHECK-LABEL: vst2lanei16: 163; CHECK: @ %bb.0: 164; CHECK-NEXT: vldr d16, [r1] 165; CHECK-NEXT: vorr d17, d16, d16 166; CHECK-NEXT: vst2.16 {d16[1], d17[1]}, [r0:32] 167; CHECK-NEXT: mov pc, lr 168 %tmp1 = load <4 x i16>, ptr %B 169 call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) 170 ret void 171} 172 173;Check for a post-increment updating store with register increment. 174define void @vst2lanei16_update(ptr %ptr, ptr %B, i32 %inc) nounwind { 175; CHECK-LABEL: vst2lanei16_update: 176; CHECK: @ %bb.0: 177; CHECK-NEXT: vldr d16, [r1] 178; CHECK-NEXT: lsl r1, r2, #1 179; CHECK-NEXT: ldr r3, [r0] 180; CHECK-NEXT: vorr d17, d16, d16 181; CHECK-NEXT: vst2.16 {d16[1], d17[1]}, [r3], r1 182; CHECK-NEXT: str r3, [r0] 183; CHECK-NEXT: mov pc, lr 184 %A = load ptr, ptr %ptr 185 %tmp1 = load <4 x i16>, ptr %B 186 call void @llvm.arm.neon.vst2lane.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2) 187 %tmp2 = getelementptr i16, ptr %A, i32 %inc 188 store ptr %tmp2, ptr %ptr 189 ret void 190} 191 192define void @vst2lanei32(ptr %A, ptr %B) nounwind { 193; CHECK-LABEL: vst2lanei32: 194; CHECK: @ %bb.0: 195; CHECK-NEXT: vldr d16, [r1] 196; CHECK-NEXT: vorr d17, d16, d16 197; CHECK-NEXT: vst2.32 {d16[1], d17[1]}, [r0] 198; CHECK-NEXT: mov pc, lr 199 %tmp1 = load <2 x i32>, ptr %B 200 call void @llvm.arm.neon.vst2lane.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) 201 ret void 202} 203 204define void @vst2lanef(ptr %A, ptr %B) nounwind { 205; CHECK-LABEL: vst2lanef: 206; CHECK: @ %bb.0: 207; CHECK-NEXT: vldr d16, [r1] 208; CHECK-NEXT: vorr d17, d16, d16 209; CHECK-NEXT: vst2.32 {d16[1], d17[1]}, [r0] 210; CHECK-NEXT: mov pc, lr 211 %tmp1 = load <2 x float>, ptr %B 212 call void @llvm.arm.neon.vst2lane.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) 213 ret void 214} 215 216;Check the (default) alignment. 217define void @vst2laneQi16(ptr %A, ptr %B) nounwind { 218; CHECK-LABEL: vst2laneQi16: 219; CHECK: @ %bb.0: 220; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 221; CHECK-NEXT: vorr q9, q8, q8 222; CHECK-NEXT: vst2.16 {d17[1], d19[1]}, [r0] 223; CHECK-NEXT: mov pc, lr 224 %tmp1 = load <8 x i16>, ptr %B 225 call void @llvm.arm.neon.vst2lane.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1) 226 ret void 227} 228 229;Check the alignment value. Max for this instruction is 64 bits: 230define void @vst2laneQi32(ptr %A, ptr %B) nounwind { 231; CHECK-LABEL: vst2laneQi32: 232; CHECK: @ %bb.0: 233; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 234; CHECK-NEXT: vorr q9, q8, q8 235; CHECK-NEXT: vst2.32 {d17[0], d19[0]}, [r0:64] 236; CHECK-NEXT: mov pc, lr 237 %tmp1 = load <4 x i32>, ptr %B 238 call void @llvm.arm.neon.vst2lane.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16) 239 ret void 240} 241 242define void @vst2laneQf(ptr %A, ptr %B) nounwind { 243; CHECK-LABEL: vst2laneQf: 244; CHECK: @ %bb.0: 245; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 246; CHECK-NEXT: vorr q9, q8, q8 247; CHECK-NEXT: vst2.32 {d17[1], d19[1]}, [r0] 248; CHECK-NEXT: mov pc, lr 249 %tmp1 = load <4 x float>, ptr %B 250 call void @llvm.arm.neon.vst2lane.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1) 251 ret void 252} 253 254declare void @llvm.arm.neon.vst2lane.p0.v8i8(ptr, <8 x i8>, <8 x i8>, i32, i32) nounwind 255declare void @llvm.arm.neon.vst2lane.p0.v4i16(ptr, <4 x i16>, <4 x i16>, i32, i32) nounwind 256declare void @llvm.arm.neon.vst2lane.p0.v2i32(ptr, <2 x i32>, <2 x i32>, i32, i32) nounwind 257declare void @llvm.arm.neon.vst2lane.p0.v2f32(ptr, <2 x float>, <2 x float>, i32, i32) nounwind 258 259declare void @llvm.arm.neon.vst2lane.p0.v8i16(ptr, <8 x i16>, <8 x i16>, i32, i32) nounwind 260declare void @llvm.arm.neon.vst2lane.p0.v4i32(ptr, <4 x i32>, <4 x i32>, i32, i32) nounwind 261declare void @llvm.arm.neon.vst2lane.p0.v4f32(ptr, <4 x float>, <4 x float>, i32, i32) nounwind 262 263define void @vst3lanei8(ptr %A, ptr %B) nounwind { 264; CHECK-LABEL: vst3lanei8: 265; CHECK: @ %bb.0: 266; CHECK-NEXT: vldr d16, [r1] 267; CHECK-NEXT: vorr d17, d16, d16 268; CHECK-NEXT: vorr d18, d16, d16 269; CHECK-NEXT: vst3.8 {d16[1], d17[1], d18[1]}, [r0] 270; CHECK-NEXT: mov pc, lr 271 %tmp1 = load <8 x i8>, ptr %B 272 call void @llvm.arm.neon.vst3lane.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) 273 ret void 274} 275 276;Check the (default) alignment value. VST3 does not support alignment. 277define void @vst3lanei16(ptr %A, ptr %B) nounwind { 278; CHECK-LABEL: vst3lanei16: 279; CHECK: @ %bb.0: 280; CHECK-NEXT: vldr d16, [r1] 281; CHECK-NEXT: vorr d17, d16, d16 282; CHECK-NEXT: vorr d18, d16, d16 283; CHECK-NEXT: vst3.16 {d16[1], d17[1], d18[1]}, [r0] 284; CHECK-NEXT: mov pc, lr 285 %tmp1 = load <4 x i16>, ptr %B 286 call void @llvm.arm.neon.vst3lane.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8) 287 ret void 288} 289 290define void @vst3lanei32(ptr %A, ptr %B) nounwind { 291; CHECK-LABEL: vst3lanei32: 292; CHECK: @ %bb.0: 293; CHECK-NEXT: vldr d16, [r1] 294; CHECK-NEXT: vorr d17, d16, d16 295; CHECK-NEXT: vorr d18, d16, d16 296; CHECK-NEXT: vst3.32 {d16[1], d17[1], d18[1]}, [r0] 297; CHECK-NEXT: mov pc, lr 298 %tmp1 = load <2 x i32>, ptr %B 299 call void @llvm.arm.neon.vst3lane.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1) 300 ret void 301} 302 303define void @vst3lanef(ptr %A, ptr %B) nounwind { 304; CHECK-LABEL: vst3lanef: 305; CHECK: @ %bb.0: 306; CHECK-NEXT: vldr d16, [r1] 307; CHECK-NEXT: vorr d17, d16, d16 308; CHECK-NEXT: vorr d18, d16, d16 309; CHECK-NEXT: vst3.32 {d16[1], d17[1], d18[1]}, [r0] 310; CHECK-NEXT: mov pc, lr 311 %tmp1 = load <2 x float>, ptr %B 312 call void @llvm.arm.neon.vst3lane.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) 313 ret void 314} 315 316define void @vst3laneQi16(ptr %A, ptr %B) nounwind { 317; CHECK-LABEL: vst3laneQi16: 318; CHECK: @ %bb.0: 319; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 320; CHECK-NEXT: vorr q9, q8, q8 321; CHECK-NEXT: vorr q10, q8, q8 322; CHECK-NEXT: vst3.16 {d17[2], d19[2], d21[2]}, [r0] 323; CHECK-NEXT: mov pc, lr 324;Check the (default) alignment value. VST3 does not support alignment. 325 %tmp1 = load <8 x i16>, ptr %B 326 call void @llvm.arm.neon.vst3lane.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8) 327 ret void 328} 329 330define void @vst3laneQi32(ptr %A, ptr %B) nounwind { 331; CHECK-LABEL: vst3laneQi32: 332; CHECK: @ %bb.0: 333; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 334; CHECK-NEXT: vorr q9, q8, q8 335; CHECK-NEXT: vorr q10, q8, q8 336; CHECK-NEXT: vst3.32 {d16[0], d18[0], d20[0]}, [r0] 337; CHECK-NEXT: mov pc, lr 338 %tmp1 = load <4 x i32>, ptr %B 339 call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1) 340 ret void 341} 342 343;Check for a post-increment updating store. 344define void @vst3laneQi32_update(ptr %ptr, ptr %B) nounwind { 345; CHECK-LABEL: vst3laneQi32_update: 346; CHECK: @ %bb.0: 347; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 348; CHECK-NEXT: vorr q9, q8, q8 349; CHECK-NEXT: ldr r2, [r0] 350; CHECK-NEXT: vorr q10, q8, q8 351; CHECK-NEXT: vst3.32 {d16[0], d18[0], d20[0]}, [r2]! 352; CHECK-NEXT: str r2, [r0] 353; CHECK-NEXT: mov pc, lr 354 %A = load ptr, ptr %ptr 355 %tmp1 = load <4 x i32>, ptr %B 356 call void @llvm.arm.neon.vst3lane.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1) 357 %tmp2 = getelementptr i32, ptr %A, i32 3 358 store ptr %tmp2, ptr %ptr 359 ret void 360} 361 362define void @vst3laneQf(ptr %A, ptr %B) nounwind { 363; CHECK-LABEL: vst3laneQf: 364; CHECK: @ %bb.0: 365; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 366; CHECK-NEXT: vorr q9, q8, q8 367; CHECK-NEXT: vorr q10, q8, q8 368; CHECK-NEXT: vst3.32 {d16[1], d18[1], d20[1]}, [r0] 369; CHECK-NEXT: mov pc, lr 370 %tmp1 = load <4 x float>, ptr %B 371 call void @llvm.arm.neon.vst3lane.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) 372 ret void 373} 374 375declare void @llvm.arm.neon.vst3lane.p0.v8i8(ptr, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind 376declare void @llvm.arm.neon.vst3lane.p0.v4i16(ptr, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind 377declare void @llvm.arm.neon.vst3lane.p0.v2i32(ptr, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind 378declare void @llvm.arm.neon.vst3lane.p0.v2f32(ptr, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind 379 380declare void @llvm.arm.neon.vst3lane.p0.v8i16(ptr, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind 381declare void @llvm.arm.neon.vst3lane.p0.v4i32(ptr, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind 382declare void @llvm.arm.neon.vst3lane.p0.v4f32(ptr, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind 383 384 385;Check the alignment value. Max for this instruction is 32 bits: 386define void @vst4lanei8(ptr %A, ptr %B) nounwind { 387; CHECK-LABEL: vst4lanei8: 388; CHECK: @ %bb.0: 389; CHECK-NEXT: vldr d16, [r1] 390; CHECK-NEXT: vorr d17, d16, d16 391; CHECK-NEXT: vorr d18, d16, d16 392; CHECK-NEXT: vorr d19, d16, d16 393; CHECK-NEXT: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0:32] 394; CHECK-NEXT: mov pc, lr 395 %tmp1 = load <8 x i8>, ptr %B 396 call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) 397 ret void 398} 399 400;Check for a post-increment updating store. 401define void @vst4lanei8_update(ptr %ptr, ptr %B) nounwind { 402; CHECK-LABEL: vst4lanei8_update: 403; CHECK: @ %bb.0: 404; CHECK-NEXT: vldr d16, [r1] 405; CHECK-NEXT: vorr d17, d16, d16 406; CHECK-NEXT: ldr r2, [r0] 407; CHECK-NEXT: vorr d18, d16, d16 408; CHECK-NEXT: vorr d19, d16, d16 409; CHECK-NEXT: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r2:32]! 410; CHECK-NEXT: str r2, [r0] 411; CHECK-NEXT: mov pc, lr 412 %A = load ptr, ptr %ptr 413 %tmp1 = load <8 x i8>, ptr %B 414 call void @llvm.arm.neon.vst4lane.p0.v8i8(ptr %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8) 415 %tmp2 = getelementptr i8, ptr %A, i32 4 416 store ptr %tmp2, ptr %ptr 417 ret void 418} 419 420define void @vst4lanei16(ptr %A, ptr %B) nounwind { 421; CHECK-LABEL: vst4lanei16: 422; CHECK: @ %bb.0: 423; CHECK-NEXT: vldr d16, [r1] 424; CHECK-NEXT: vorr d17, d16, d16 425; CHECK-NEXT: vorr d18, d16, d16 426; CHECK-NEXT: vorr d19, d16, d16 427; CHECK-NEXT: vst4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0] 428; CHECK-NEXT: mov pc, lr 429 %tmp1 = load <4 x i16>, ptr %B 430 call void @llvm.arm.neon.vst4lane.p0.v4i16(ptr %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1) 431 ret void 432} 433 434;Check the alignment value. Max for this instruction is 128 bits: 435define void @vst4lanei32(ptr %A, ptr %B) nounwind { 436; CHECK-LABEL: vst4lanei32: 437; CHECK: @ %bb.0: 438; CHECK-NEXT: vldr d16, [r1] 439; CHECK-NEXT: vorr d17, d16, d16 440; CHECK-NEXT: vorr d18, d16, d16 441; CHECK-NEXT: vorr d19, d16, d16 442; CHECK-NEXT: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0:128] 443; CHECK-NEXT: mov pc, lr 444 %tmp1 = load <2 x i32>, ptr %B 445 call void @llvm.arm.neon.vst4lane.p0.v2i32(ptr %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16) 446 ret void 447} 448 449define void @vst4lanef(ptr %A, ptr %B) nounwind { 450; CHECK-LABEL: vst4lanef: 451; CHECK: @ %bb.0: 452; CHECK-NEXT: vldr d16, [r1] 453; CHECK-NEXT: vorr d17, d16, d16 454; CHECK-NEXT: vorr d18, d16, d16 455; CHECK-NEXT: vorr d19, d16, d16 456; CHECK-NEXT: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0] 457; CHECK-NEXT: mov pc, lr 458 %tmp1 = load <2 x float>, ptr %B 459 call void @llvm.arm.neon.vst4lane.p0.v2f32(ptr %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1) 460 ret void 461} 462 463;Check the alignment value. Max for this instruction is 64 bits: 464define void @vst4laneQi16(ptr %A, ptr %B) nounwind { 465; CHECK-LABEL: vst4laneQi16: 466; CHECK: @ %bb.0: 467; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 468; CHECK-NEXT: vorr q9, q8, q8 469; CHECK-NEXT: vorr q10, q8, q8 470; CHECK-NEXT: vorr q11, q8, q8 471; CHECK-NEXT: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0:64] 472; CHECK-NEXT: mov pc, lr 473 %tmp1 = load <8 x i16>, ptr %B 474 call void @llvm.arm.neon.vst4lane.p0.v8i16(ptr %A, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16) 475 ret void 476} 477 478;Check the (default) alignment. 479define void @vst4laneQi32(ptr %A, ptr %B) nounwind { 480; CHECK-LABEL: vst4laneQi32: 481; CHECK: @ %bb.0: 482; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 483; CHECK-NEXT: vorr q9, q8, q8 484; CHECK-NEXT: vorr q10, q8, q8 485; CHECK-NEXT: vorr q11, q8, q8 486; CHECK-NEXT: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0] 487; CHECK-NEXT: mov pc, lr 488 %tmp1 = load <4 x i32>, ptr %B 489 call void @llvm.arm.neon.vst4lane.p0.v4i32(ptr %A, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1) 490 ret void 491} 492 493define void @vst4laneQf(ptr %A, ptr %B) nounwind { 494; CHECK-LABEL: vst4laneQf: 495; CHECK: @ %bb.0: 496; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 497; CHECK-NEXT: vorr q9, q8, q8 498; CHECK-NEXT: vorr q10, q8, q8 499; CHECK-NEXT: vorr q11, q8, q8 500; CHECK-NEXT: vst4.32 {d16[1], d18[1], d20[1], d22[1]}, [r0] 501; CHECK-NEXT: mov pc, lr 502 %tmp1 = load <4 x float>, ptr %B 503 call void @llvm.arm.neon.vst4lane.p0.v4f32(ptr %A, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1) 504 ret void 505} 506 507; Make sure this doesn't crash; PR10258 508define <8 x i16> @variable_insertelement(<8 x i16> %a, i16 %b, i32 %c) nounwind readnone { 509; CHECK-LABEL: variable_insertelement: 510; CHECK: @ %bb.0: 511; CHECK-NEXT: push {r11, lr} 512; CHECK-NEXT: mov r11, sp 513; CHECK-NEXT: sub sp, sp, #24 514; CHECK-NEXT: bic sp, sp, #15 515; CHECK-NEXT: ldr lr, [r11, #12] 516; CHECK-NEXT: vmov d17, r2, r3 517; CHECK-NEXT: vmov d16, r0, r1 518; CHECK-NEXT: mov r1, sp 519; CHECK-NEXT: and r0, lr, #7 520; CHECK-NEXT: mov r2, r1 521; CHECK-NEXT: ldrh r12, [r11, #8] 522; CHECK-NEXT: lsl r0, r0, #1 523; CHECK-NEXT: vst1.64 {d16, d17}, [r2:128], r0 524; CHECK-NEXT: strh r12, [r2] 525; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128] 526; CHECK-NEXT: vmov r0, r1, d16 527; CHECK-NEXT: vmov r2, r3, d17 528; CHECK-NEXT: mov sp, r11 529; CHECK-NEXT: pop {r11, lr} 530; CHECK-NEXT: mov pc, lr 531 %r = insertelement <8 x i16> %a, i16 %b, i32 %c 532 ret <8 x i16> %r 533} 534 535declare void @llvm.arm.neon.vst4lane.p0.v8i8(ptr, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind 536declare void @llvm.arm.neon.vst4lane.p0.v4i16(ptr, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind 537declare void @llvm.arm.neon.vst4lane.p0.v2i32(ptr, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind 538declare void @llvm.arm.neon.vst4lane.p0.v2f32(ptr, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind 539 540declare void @llvm.arm.neon.vst4lane.p0.v8i16(ptr, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind 541declare void @llvm.arm.neon.vst4lane.p0.v4i32(ptr, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind 542declare void @llvm.arm.neon.vst4lane.p0.v4f32(ptr, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind 543