1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst %s -o - | FileCheck %s 3 4 5define arm_aapcs_vfpcc void @scatter_inc_minipred_4i32(<4 x i32> %data, ptr %dst, <4 x i32> %offs) { 6; CHECK-LABEL: scatter_inc_minipred_4i32: 7; CHECK: @ %bb.0: 8; CHECK-NEXT: movs r1, #4 9; CHECK-NEXT: movw r2, #3855 10; CHECK-NEXT: vadd.i32 q1, q1, r1 11; CHECK-NEXT: vmsr p0, r2 12; CHECK-NEXT: vpst 13; CHECK-NEXT: vstrwt.32 q0, [r0, q1, uxtw #2] 14; CHECK-NEXT: bx lr 15 %1 = add <4 x i32> %offs, <i32 4, i32 4, i32 4, i32 4> 16 %2 = getelementptr inbounds i32, ptr %dst, <4 x i32> %1 17 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data, <4 x ptr> %2, i32 4, <4 x i1> <i1 true, i1 false, i1 true, i1 false>) 18 ret void 19} 20 21define arm_aapcs_vfpcc void @scatter_inc_mini_8i16(<8 x i16> %data, ptr %dst, <8 x i32> %offs) { 22; CHECK-LABEL: scatter_inc_mini_8i16: 23; CHECK: @ %bb.0: 24; CHECK-NEXT: .save {r4, r5, r6, lr} 25; CHECK-NEXT: push {r4, r5, r6, lr} 26; CHECK-NEXT: vshl.i32 q1, q1, #1 27; CHECK-NEXT: mov.w r12, #16 28; CHECK-NEXT: vadd.i32 q1, q1, r0 29; CHECK-NEXT: vmov.u16 r6, q0[0] 30; CHECK-NEXT: vadd.i32 q1, q1, r12 31; CHECK-NEXT: vmov r2, r3, d2 32; CHECK-NEXT: vmov r1, lr, d3 33; CHECK-NEXT: vshl.i32 q1, q2, #1 34; CHECK-NEXT: vadd.i32 q1, q1, r0 35; CHECK-NEXT: vadd.i32 q1, q1, r12 36; CHECK-NEXT: vmov r0, r12, d2 37; CHECK-NEXT: vmov r4, r5, d3 38; CHECK-NEXT: strh r6, [r2] 39; CHECK-NEXT: vmov.u16 r2, q0[1] 40; CHECK-NEXT: strh r2, [r3] 41; CHECK-NEXT: vmov.u16 r2, q0[2] 42; CHECK-NEXT: strh r2, [r1] 43; CHECK-NEXT: vmov.u16 r1, q0[3] 44; CHECK-NEXT: strh.w r1, [lr] 45; CHECK-NEXT: vmov.u16 r1, q0[4] 46; CHECK-NEXT: strh r1, [r0] 47; CHECK-NEXT: vmov.u16 r0, q0[5] 48; CHECK-NEXT: strh.w r0, [r12] 49; CHECK-NEXT: vmov.u16 r0, q0[6] 50; CHECK-NEXT: strh r0, [r4] 51; CHECK-NEXT: vmov.u16 r0, q0[7] 52; CHECK-NEXT: strh r0, [r5] 53; CHECK-NEXT: pop {r4, r5, r6, pc} 54 %1 = add <8 x i32> %offs, <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8> 55 %2 = getelementptr inbounds i16, ptr %dst, <8 x i32> %1 56 call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> %data, <8 x ptr> %2, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 57 ret void 58} 59 60define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <16 x i32> %offs) { 61; CHECK-LABEL: scatter_inc_mini_16i8: 62; CHECK: @ %bb.0: 63; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} 64; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} 65; CHECK-NEXT: .pad #4 66; CHECK-NEXT: sub sp, #4 67; CHECK-NEXT: movs r1, #16 68; CHECK-NEXT: vadd.i32 q1, q1, r0 69; CHECK-NEXT: vadd.i32 q1, q1, r1 70; CHECK-NEXT: add.w r12, sp, #32 71; CHECK-NEXT: vmov r2, r3, d2 72; CHECK-NEXT: vadd.i32 q3, q3, r0 73; CHECK-NEXT: vmov lr, r5, d3 74; CHECK-NEXT: vadd.i32 q1, q2, r0 75; CHECK-NEXT: vadd.i32 q2, q1, r1 76; CHECK-NEXT: vldrw.u32 q1, [r12] 77; CHECK-NEXT: vmov r4, r12, d4 78; CHECK-NEXT: vmov.u8 r6, q0[0] 79; CHECK-NEXT: vadd.i32 q1, q1, r0 80; CHECK-NEXT: vmov r0, r8, d5 81; CHECK-NEXT: vadd.i32 q3, q3, r1 82; CHECK-NEXT: vadd.i32 q1, q1, r1 83; CHECK-NEXT: vmov.u8 r1, q0[4] 84; CHECK-NEXT: vmov.u8 r7, q0[6] 85; CHECK-NEXT: strb r6, [r2] 86; CHECK-NEXT: vmov.u8 r2, q0[1] 87; CHECK-NEXT: strb r2, [r3] 88; CHECK-NEXT: vmov.u8 r6, q0[2] 89; CHECK-NEXT: vmov r2, r9, d6 90; CHECK-NEXT: strb.w r6, [lr] 91; CHECK-NEXT: vmov.u8 r6, q0[3] 92; CHECK-NEXT: vmov.u8 r3, q0[8] 93; CHECK-NEXT: strb r6, [r5] 94; CHECK-NEXT: vmov r6, r5, d7 95; CHECK-NEXT: strb r1, [r4] 96; CHECK-NEXT: vmov.u8 r1, q0[5] 97; CHECK-NEXT: strb.w r1, [r12] 98; CHECK-NEXT: vmov r1, r4, d2 99; CHECK-NEXT: strb r7, [r0] 100; CHECK-NEXT: vmov.u8 r0, q0[7] 101; CHECK-NEXT: strb.w r0, [r8] 102; CHECK-NEXT: vmov r0, r7, d3 103; CHECK-NEXT: strb r3, [r2] 104; CHECK-NEXT: vmov.u8 r2, q0[9] 105; CHECK-NEXT: strb.w r2, [r9] 106; CHECK-NEXT: vmov.u8 r2, q0[10] 107; CHECK-NEXT: strb r2, [r6] 108; CHECK-NEXT: vmov.u8 r2, q0[11] 109; CHECK-NEXT: strb r2, [r5] 110; CHECK-NEXT: vmov.u8 r2, q0[12] 111; CHECK-NEXT: strb r2, [r1] 112; CHECK-NEXT: vmov.u8 r1, q0[13] 113; CHECK-NEXT: strb r1, [r4] 114; CHECK-NEXT: vmov.u8 r1, q0[14] 115; CHECK-NEXT: strb r1, [r0] 116; CHECK-NEXT: vmov.u8 r0, q0[15] 117; CHECK-NEXT: strb r0, [r7] 118; CHECK-NEXT: add sp, #4 119; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} 120 %1 = add <16 x i32> %offs, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 121 %2 = getelementptr inbounds i8, ptr %dst, <16 x i32> %1 122 call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> %data, <16 x ptr> %2, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>) 123 ret void 124} 125 126define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) { 127; CHECK-LABEL: scatter_inc_v4i32_complex: 128; CHECK: @ %bb.0: @ %entry 129; CHECK-NEXT: cmp r1, #1 130; CHECK-NEXT: it lt 131; CHECK-NEXT: bxlt lr 132; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader 133; CHECK-NEXT: .save {r4, lr} 134; CHECK-NEXT: push {r4, lr} 135; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} 136; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 137; CHECK-NEXT: .pad #16 138; CHECK-NEXT: sub sp, #16 139; CHECK-NEXT: adr r4, .LCPI3_2 140; CHECK-NEXT: bic r2, r1, #3 141; CHECK-NEXT: vldrw.u32 q3, [r4] 142; CHECK-NEXT: sub.w r12, r2, #4 143; CHECK-NEXT: adr.w lr, .LCPI3_1 144; CHECK-NEXT: movs r3, #1 145; CHECK-NEXT: vadd.i32 q3, q3, r0 146; CHECK-NEXT: add.w r3, r3, r12, lsr #2 147; CHECK-NEXT: vstrw.32 q3, [sp] @ 16-byte Spill 148; CHECK-NEXT: vldrw.u32 q3, [lr] 149; CHECK-NEXT: adr.w r12, .LCPI3_0 150; CHECK-NEXT: vadd.i32 q4, q3, r0 151; CHECK-NEXT: vldrw.u32 q3, [r12] 152; CHECK-NEXT: vadd.i32 q3, q3, r0 153; CHECK-NEXT: .LBB3_2: @ %vector.ph 154; CHECK-NEXT: @ =>This Loop Header: Depth=1 155; CHECK-NEXT: @ Child Loop BB3_3 Depth 2 156; CHECK-NEXT: dls lr, r3 157; CHECK-NEXT: vmov q6, q4 158; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload 159; CHECK-NEXT: vmov q5, q3 160; CHECK-NEXT: .LBB3_3: @ %vector.body 161; CHECK-NEXT: @ Parent Loop BB3_2 Depth=1 162; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 163; CHECK-NEXT: vstrw.32 q0, [q5, #48]! 164; CHECK-NEXT: vstrw.32 q1, [q6, #48]! 165; CHECK-NEXT: vstrw.32 q2, [q7, #48]! 166; CHECK-NEXT: le lr, .LBB3_3 167; CHECK-NEXT: @ %bb.4: @ %middle.block 168; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1 169; CHECK-NEXT: cmp r2, r1 170; CHECK-NEXT: bne .LBB3_2 171; CHECK-NEXT: @ %bb.5: 172; CHECK-NEXT: add sp, #16 173; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 174; CHECK-NEXT: pop.w {r4, lr} 175; CHECK-NEXT: bx lr 176; CHECK-NEXT: .p2align 4 177; CHECK-NEXT: @ %bb.6: 178; CHECK-NEXT: .LCPI3_0: 179; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 180; CHECK-NEXT: .long 4294967260 @ 0xffffffdc 181; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 182; CHECK-NEXT: .long 4294967284 @ 0xfffffff4 183; CHECK-NEXT: .LCPI3_1: 184; CHECK-NEXT: .long 4294967252 @ 0xffffffd4 185; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 186; CHECK-NEXT: .long 4294967276 @ 0xffffffec 187; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 188; CHECK-NEXT: .LCPI3_2: 189; CHECK-NEXT: .long 4294967256 @ 0xffffffd8 190; CHECK-NEXT: .long 4294967268 @ 0xffffffe4 191; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 192; CHECK-NEXT: .long 4294967292 @ 0xfffffffc 193entry: 194 %cmp22 = icmp sgt i32 %n, 0 195 br i1 %cmp22, label %vector.ph, label %for.cond.cleanup 196 197vector.ph: ; preds = %for.body.preheader 198 %n.vec = and i32 %n, -4 199 br label %vector.body 200 201vector.body: ; preds = %vector.body, %vector.ph 202 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 203 %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] 204 %0 = mul nuw nsw <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> 205 %1 = getelementptr inbounds i32, ptr %dst, <4 x i32> %0 206 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data1, <4 x ptr> %1, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 207 %2 = add nuw nsw <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1> 208 %3 = getelementptr inbounds i32, ptr %dst, <4 x i32> %2 209 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data2, <4 x ptr> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 210 %4 = add nuw nsw <4 x i32> %0, <i32 2, i32 2, i32 2, i32 2> 211 %5 = getelementptr inbounds i32, ptr %dst, <4 x i32> %4 212 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %data3, <4 x ptr> %5, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 213 %index.next = add i32 %index, 4 214 %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> 215 %6 = icmp eq i32 %index.next, %n.vec 216 br i1 %6, label %middle.block, label %vector.body 217 218middle.block: ; preds = %vector.body 219 %cmp.n = icmp eq i32 %n.vec, %n 220 br i1 %cmp.n, label %for.cond.cleanup, label %vector.ph 221 222for.cond.cleanup: ; preds = %for.body, %middle.block, %entry 223 ret void 224} 225 226define void @shl(ptr nocapture readonly %x, ptr noalias nocapture %y, i32 %n) { 227; CHECK-LABEL: shl: 228; CHECK: @ %bb.0: @ %entry 229; CHECK-NEXT: .save {r7, lr} 230; CHECK-NEXT: push {r7, lr} 231; CHECK-NEXT: cmp r2, #1 232; CHECK-NEXT: it lt 233; CHECK-NEXT: poplt {r7, pc} 234; CHECK-NEXT: .LBB4_1: @ %vector.ph 235; CHECK-NEXT: adr r3, .LCPI4_0 236; CHECK-NEXT: vldrw.u32 q0, [r3] 237; CHECK-NEXT: vadd.i32 q0, q0, r1 238; CHECK-NEXT: dlstp.32 lr, r2 239; CHECK-NEXT: .LBB4_2: @ %vector.body 240; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 241; CHECK-NEXT: vldrw.u32 q1, [r0], #16 242; CHECK-NEXT: vstrw.32 q1, [q0, #64]! 243; CHECK-NEXT: letp lr, .LBB4_2 244; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 245; CHECK-NEXT: pop {r7, pc} 246; CHECK-NEXT: .p2align 4 247; CHECK-NEXT: @ %bb.4: 248; CHECK-NEXT: .LCPI4_0: 249; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 250; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 251; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 252; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 253entry: 254 %cmp6 = icmp sgt i32 %n, 0 255 br i1 %cmp6, label %vector.ph, label %for.cond.cleanup 256 257vector.ph: ; preds = %entry 258 %n.rnd.up = add i32 %n, 3 259 %n.vec = and i32 %n.rnd.up, -4 260 br label %vector.body 261 262vector.body: ; preds = %vector.body, %vector.ph 263 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 264 %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] 265 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) 266 %0 = getelementptr inbounds i32, ptr %x, i32 %index 267 %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %0, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) 268 %1 = shl nsw <4 x i32> %vec.ind, <i32 2, i32 2, i32 2, i32 2> 269 %2 = getelementptr inbounds i32, ptr %y, <4 x i32> %1 270 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %wide.masked.load, <4 x ptr> %2, i32 4, <4 x i1> %active.lane.mask) 271 %index.next = add i32 %index, 4 272 %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> 273 %3 = icmp eq i32 %index.next, %n.vec 274 br i1 %3, label %for.cond.cleanup, label %vector.body 275 276for.cond.cleanup: ; preds = %vector.body, %entry 277 ret void 278} 279 280define void @shlor(ptr nocapture readonly %x, ptr noalias nocapture %y, i32 %n) { 281; CHECK-LABEL: shlor: 282; CHECK: @ %bb.0: @ %entry 283; CHECK-NEXT: .save {r4, r5, r6, lr} 284; CHECK-NEXT: push {r4, r5, r6, lr} 285; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 286; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} 287; CHECK-NEXT: cmp r2, #1 288; CHECK-NEXT: blt .LBB5_3 289; CHECK-NEXT: @ %bb.1: @ %vector.ph 290; CHECK-NEXT: adr.w lr, .LCPI5_0 291; CHECK-NEXT: adr r4, .LCPI5_1 292; CHECK-NEXT: adr r5, .LCPI5_2 293; CHECK-NEXT: adr r6, .LCPI5_3 294; CHECK-NEXT: vldrw.u32 q2, [r4] 295; CHECK-NEXT: vldrw.u32 q0, [r6] 296; CHECK-NEXT: vldrw.u32 q1, [r5] 297; CHECK-NEXT: vldrw.u32 q3, [lr] 298; CHECK-NEXT: vadd.i32 q0, q0, r1 299; CHECK-NEXT: vadd.i32 q1, q1, r1 300; CHECK-NEXT: vadd.i32 q2, q2, r1 301; CHECK-NEXT: vadd.i32 q3, q3, r1 302; CHECK-NEXT: mov.w r12, #1 303; CHECK-NEXT: movs r4, #3 304; CHECK-NEXT: movs r3, #2 305; CHECK-NEXT: movs r1, #4 306; CHECK-NEXT: dlstp.32 lr, r2 307; CHECK-NEXT: .LBB5_2: @ %vector.body 308; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 309; CHECK-NEXT: vldrw.u32 q4, [r0], #16 310; CHECK-NEXT: vadd.i32 q6, q4, r12 311; CHECK-NEXT: vadd.i32 q5, q4, r1 312; CHECK-NEXT: vstrw.32 q6, [q3, #128]! 313; CHECK-NEXT: vadd.i32 q6, q4, r3 314; CHECK-NEXT: vadd.i32 q4, q4, r4 315; CHECK-NEXT: vstrw.32 q6, [q2, #128]! 316; CHECK-NEXT: vstrw.32 q4, [q1, #128]! 317; CHECK-NEXT: vstrw.32 q5, [q0, #128]! 318; CHECK-NEXT: letp lr, .LBB5_2 319; CHECK-NEXT: .LBB5_3: @ %for.cond.cleanup 320; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} 321; CHECK-NEXT: pop {r4, r5, r6, pc} 322; CHECK-NEXT: .p2align 4 323; CHECK-NEXT: @ %bb.4: 324; CHECK-NEXT: .LCPI5_0: 325; CHECK-NEXT: .long 4294967168 @ 0xffffff80 326; CHECK-NEXT: .long 4294967200 @ 0xffffffa0 327; CHECK-NEXT: .long 4294967232 @ 0xffffffc0 328; CHECK-NEXT: .long 4294967264 @ 0xffffffe0 329; CHECK-NEXT: .LCPI5_1: 330; CHECK-NEXT: .long 4294967176 @ 0xffffff88 331; CHECK-NEXT: .long 4294967208 @ 0xffffffa8 332; CHECK-NEXT: .long 4294967240 @ 0xffffffc8 333; CHECK-NEXT: .long 4294967272 @ 0xffffffe8 334; CHECK-NEXT: .LCPI5_2: 335; CHECK-NEXT: .long 4294967184 @ 0xffffff90 336; CHECK-NEXT: .long 4294967216 @ 0xffffffb0 337; CHECK-NEXT: .long 4294967248 @ 0xffffffd0 338; CHECK-NEXT: .long 4294967280 @ 0xfffffff0 339; CHECK-NEXT: .LCPI5_3: 340; CHECK-NEXT: .long 4294967192 @ 0xffffff98 341; CHECK-NEXT: .long 4294967224 @ 0xffffffb8 342; CHECK-NEXT: .long 4294967256 @ 0xffffffd8 343; CHECK-NEXT: .long 4294967288 @ 0xfffffff8 344entry: 345 %cmp33 = icmp sgt i32 %n, 0 346 br i1 %cmp33, label %vector.ph, label %for.cond.cleanup 347 348vector.ph: ; preds = %entry 349 %n.rnd.up = add i32 %n, 3 350 %n.vec = and i32 %n.rnd.up, -4 351 br label %vector.body 352 353vector.body: ; preds = %vector.body, %vector.ph 354 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 355 %vec.ind = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %vec.ind.next, %vector.body ] 356 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %n) 357 %0 = getelementptr inbounds i32, ptr %x, i32 %index 358 %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %0, i32 4, <4 x i1> %active.lane.mask, <4 x i32> poison) 359 %1 = add nsw <4 x i32> %wide.masked.load, <i32 1, i32 1, i32 1, i32 1> 360 %2 = shl nsw <4 x i32> %vec.ind, <i32 3, i32 3, i32 3, i32 3> 361 %3 = getelementptr inbounds i32, ptr %y, <4 x i32> %2 362 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %1, <4 x ptr> %3, i32 4, <4 x i1> %active.lane.mask) 363 %4 = add nsw <4 x i32> %wide.masked.load, <i32 2, i32 2, i32 2, i32 2> 364 %5 = or <4 x i32> %2, <i32 2, i32 2, i32 2, i32 2> 365 %6 = getelementptr inbounds i32, ptr %y, <4 x i32> %5 366 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %4, <4 x ptr> %6, i32 4, <4 x i1> %active.lane.mask) 367 %7 = add nsw <4 x i32> %wide.masked.load, <i32 3, i32 3, i32 3, i32 3> 368 %8 = or <4 x i32> %2, <i32 4, i32 4, i32 4, i32 4> 369 %9 = getelementptr inbounds i32, ptr %y, <4 x i32> %8 370 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %7, <4 x ptr> %9, i32 4, <4 x i1> %active.lane.mask) 371 %10 = add nsw <4 x i32> %wide.masked.load, <i32 4, i32 4, i32 4, i32 4> 372 %11 = or <4 x i32> %2, <i32 6, i32 6, i32 6, i32 6> 373 %12 = getelementptr inbounds i32, ptr %y, <4 x i32> %11 374 call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %10, <4 x ptr> %12, i32 4, <4 x i1> %active.lane.mask) 375 %index.next = add i32 %index, 4 376 %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4> 377 %13 = icmp eq i32 %index.next, %n.vec 378 br i1 %13, label %for.cond.cleanup, label %vector.body 379 380for.cond.cleanup: ; preds = %vector.body, %entry 381 ret void 382} 383 384declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) 385declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) 386declare void @llvm.masked.scatter.v8f16.v8p0(<8 x half>, <8 x ptr>, i32, <8 x i1>) 387declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) 388declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32, <4 x i1>) 389declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) 390declare void @llvm.masked.scatter.v4f16.v4p0(<4 x half>, <4 x ptr>, i32, <4 x i1>) 391declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) 392declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) 393declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) 394declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) 395