1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve --verify-machineinstrs %s -o - | FileCheck %s 3 4%struct.arm_2d_size_t = type { i16, i16 } 5define void @__arm_2d_impl_rgb16_colour_filling_with_alpha(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) { 6; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: ldrsh.w r12, [r2, #2] 9; CHECK-NEXT: cmp.w r12, #1 10; CHECK-NEXT: it lt 11; CHECK-NEXT: bxlt lr 12; CHECK-NEXT: .LBB0_1: @ %for.cond3.preheader.lr.ph 13; CHECK-NEXT: push {r4, r5, r6, r7, lr} 14; CHECK-NEXT: sub sp, #4 15; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 16; CHECK-NEXT: sub sp, #64 17; CHECK-NEXT: ldrsh.w r7, [r2] 18; CHECK-NEXT: cmp r7, #1 19; CHECK-NEXT: blt.w .LBB0_6 20; CHECK-NEXT: @ %bb.2: @ %for.cond3.preheader.us.preheader 21; CHECK-NEXT: movs r2, #252 22; CHECK-NEXT: ldr r4, [sp, #152] 23; CHECK-NEXT: and.w r6, r2, r3, lsr #3 24; CHECK-NEXT: movs r2, #120 25; CHECK-NEXT: and.w r5, r2, r3, lsr #9 26; CHECK-NEXT: lsls r3, r3, #3 27; CHECK-NEXT: uxtb r3, r3 28; CHECK-NEXT: muls r6, r4, r6 29; CHECK-NEXT: rsb.w r2, r4, #256 30; CHECK-NEXT: vmov.i16 q2, #0xfc 31; CHECK-NEXT: mul lr, r5, r4 32; CHECK-NEXT: vdup.16 q4, r6 33; CHECK-NEXT: mov.w r6, #2016 34; CHECK-NEXT: vmov.i16 q6, #0xf8 35; CHECK-NEXT: mul r5, r3, r4 36; CHECK-NEXT: adds r3, r7, #7 37; CHECK-NEXT: bic r3, r3, #7 38; CHECK-NEXT: vdup.16 q3, lr 39; CHECK-NEXT: subs r3, #8 40; CHECK-NEXT: movs r4, #1 41; CHECK-NEXT: vdup.16 q0, r5 42; CHECK-NEXT: lsls r1, r1, #1 43; CHECK-NEXT: add.w r3, r4, r3, lsr #3 44; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill 45; CHECK-NEXT: vmov.i16 q0, #0xf800 46; CHECK-NEXT: movs r4, #0 47; CHECK-NEXT: vdup.16 q5, r6 48; CHECK-NEXT: vmov.i16 q7, #0x78 49; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill 50; CHECK-NEXT: vstrw.32 q2, [sp, #32] @ 16-byte Spill 51; CHECK-NEXT: vstrw.32 q3, [sp, #16] @ 16-byte Spill 52; CHECK-NEXT: .LBB0_3: @ %vector.ph 53; CHECK-NEXT: @ =>This Loop Header: Depth=1 54; CHECK-NEXT: @ Child Loop BB0_4 Depth 2 55; CHECK-NEXT: mov r5, r0 56; CHECK-NEXT: mov r6, r7 57; CHECK-NEXT: dls lr, r3 58; CHECK-NEXT: .LBB0_4: @ %vector.body 59; CHECK-NEXT: @ Parent Loop BB0_3 Depth=1 60; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 61; CHECK-NEXT: vctp.16 r6 62; CHECK-NEXT: subs r6, #8 63; CHECK-NEXT: vpst 64; CHECK-NEXT: vldrht.u16 q0, [r5] 65; CHECK-NEXT: vshr.u16 q1, q0, #3 66; CHECK-NEXT: vand q1, q1, q2 67; CHECK-NEXT: vmov q2, q4 68; CHECK-NEXT: vmla.i16 q2, q1, r2 69; CHECK-NEXT: vshr.u16 q1, q2, #5 70; CHECK-NEXT: vshl.i16 q2, q0, #3 71; CHECK-NEXT: vand q3, q1, q5 72; CHECK-NEXT: vmov q1, q7 73; CHECK-NEXT: vand q2, q2, q6 74; CHECK-NEXT: vmov q7, q6 75; CHECK-NEXT: vmov q6, q5 76; CHECK-NEXT: vmov q5, q4 77; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload 78; CHECK-NEXT: vshr.u16 q0, q0, #9 79; CHECK-NEXT: vmla.i16 q4, q2, r2 80; CHECK-NEXT: vshr.u16 q2, q4, #11 81; CHECK-NEXT: vmov q4, q5 82; CHECK-NEXT: vmov q5, q6 83; CHECK-NEXT: vmov q6, q7 84; CHECK-NEXT: vmov q7, q1 85; CHECK-NEXT: vorr q1, q3, q2 86; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload 87; CHECK-NEXT: vand q0, q0, q7 88; CHECK-NEXT: vmla.i16 q2, q0, r2 89; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload 90; CHECK-NEXT: vand q0, q2, q0 91; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload 92; CHECK-NEXT: vorr q0, q1, q0 93; CHECK-NEXT: vpst 94; CHECK-NEXT: vstrht.16 q0, [r5], #16 95; CHECK-NEXT: le lr, .LBB0_4 96; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us 97; CHECK-NEXT: @ in Loop: Header=BB0_3 Depth=1 98; CHECK-NEXT: adds r4, #1 99; CHECK-NEXT: add r0, r1 100; CHECK-NEXT: cmp r4, r12 101; CHECK-NEXT: bne .LBB0_3 102; CHECK-NEXT: .LBB0_6: 103; CHECK-NEXT: add sp, #64 104; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 105; CHECK-NEXT: add sp, #4 106; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} 107; CHECK-NEXT: bx lr 108entry: 109 %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 110 %0 = load i16, ptr %iHeight, align 2 111 %conv1 = sext i16 %0 to i32 112 %and.i = shl i16 %hwColour, 3 113 %shl.i = and i16 %and.i, 248 114 %1 = lshr i16 %hwColour, 9 115 %shl4.i = and i16 %1, 120 116 %2 = lshr i16 %hwColour, 3 117 %3 = and i16 %2, 252 118 %4 = trunc i32 %chRatio to i16 119 %5 = sub i16 256, %4 120 %conv30 = sext i16 %iTargetStride to i32 121 %cmp61 = icmp sgt i16 %0, 0 122 br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup 123 124for.cond3.preheader.lr.ph: ; preds = %entry 125 %6 = load i16, ptr %ptCopySize, align 2 126 %conv4 = sext i16 %6 to i32 127 %cmp558 = icmp sgt i16 %6, 0 128 br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup 129 130for.cond3.preheader.us.preheader: ; preds = %for.cond3.preheader.lr.ph 131 %conv15.us = mul i16 %shl.i, %4 132 %conv15.us.1 = mul i16 %3, %4 133 %conv15.us.2 = mul i16 %shl4.i, %4 134 %n.rnd.up = add nsw i32 %conv4, 7 135 %n.vec = and i32 %n.rnd.up, -8 136 %broadcast.splatinsert75 = insertelement <8 x i16> poison, i16 %5, i32 0 137 %broadcast.splat76 = shufflevector <8 x i16> %broadcast.splatinsert75, <8 x i16> poison, <8 x i32> zeroinitializer 138 %broadcast.splatinsert77 = insertelement <8 x i16> poison, i16 %conv15.us, i32 0 139 %broadcast.splat78 = shufflevector <8 x i16> %broadcast.splatinsert77, <8 x i16> poison, <8 x i32> zeroinitializer 140 %broadcast.splatinsert79 = insertelement <8 x i16> poison, i16 %conv15.us.1, i32 0 141 %broadcast.splat80 = shufflevector <8 x i16> %broadcast.splatinsert79, <8 x i16> poison, <8 x i32> zeroinitializer 142 %broadcast.splatinsert81 = insertelement <8 x i16> poison, i16 %conv15.us.2, i32 0 143 %broadcast.splat82 = shufflevector <8 x i16> %broadcast.splatinsert81, <8 x i16> poison, <8 x i32> zeroinitializer 144 br label %vector.ph 145 146vector.ph: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader 147 %phwTargetBase.addr.063.us = phi ptr [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ] 148 %y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ] 149 br label %vector.body 150 151vector.body: ; preds = %vector.body, %vector.ph 152 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 153 %next.gep = getelementptr i16, ptr %phwTargetBase.addr.063.us, i32 %index 154 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4) 155 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %next.gep, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison) 156 %7 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 157 %8 = and <8 x i16> %7, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248> 158 %9 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 159 %10 = and <8 x i16> %9, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120> 160 %11 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 161 %12 = and <8 x i16> %11, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252> 162 %13 = mul <8 x i16> %8, %broadcast.splat76 163 %14 = add <8 x i16> %13, %broadcast.splat78 164 %15 = lshr <8 x i16> %14, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 165 %16 = mul <8 x i16> %12, %broadcast.splat76 166 %17 = add <8 x i16> %16, %broadcast.splat80 167 %18 = mul <8 x i16> %10, %broadcast.splat76 168 %19 = add <8 x i16> %18, %broadcast.splat82 169 %20 = lshr <8 x i16> %17, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 170 %21 = and <8 x i16> %20, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016> 171 %22 = or <8 x i16> %21, %15 172 %23 = and <8 x i16> %19, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048> 173 %24 = or <8 x i16> %22, %23 174 call void @llvm.masked.store.v8i16.p0(<8 x i16> %24, ptr %next.gep, i32 2, <8 x i1> %active.lane.mask) 175 %index.next = add i32 %index, 8 176 %25 = icmp eq i32 %index.next, %n.vec 177 br i1 %25, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body 178 179for.cond3.for.cond.cleanup7_crit_edge.us: ; preds = %vector.body 180 %add.ptr.us = getelementptr inbounds i16, ptr %phwTargetBase.addr.063.us, i32 %conv30 181 %inc32.us = add nuw nsw i32 %y.062.us, 1 182 %exitcond66.not = icmp eq i32 %inc32.us, %conv1 183 br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph 184 185for.cond.cleanup: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.lr.ph, %entry 186 ret void 187} 188define void @__arm_2d_impl_rgb16_colour_filling_with_alpha_sched(ptr noalias nocapture %phwTargetBase, i16 signext %iTargetStride, ptr noalias nocapture readonly %ptCopySize, i16 zeroext %hwColour, i32 %chRatio) "target-cpu"="cortex-m55" { 189; CHECK-LABEL: __arm_2d_impl_rgb16_colour_filling_with_alpha_sched: 190; CHECK: @ %bb.0: @ %entry 191; CHECK-NEXT: ldrsh.w r12, [r2, #2] 192; CHECK-NEXT: cmp.w r12, #1 193; CHECK-NEXT: blt.w .LBB1_7 194; CHECK-NEXT: @ %bb.1: @ %for.cond3.preheader.lr.ph 195; CHECK-NEXT: ldrsh.w r2, [r2] 196; CHECK-NEXT: cmp r2, #1 197; CHECK-NEXT: it lt 198; CHECK-NEXT: bxlt lr 199; CHECK-NEXT: .LBB1_2: @ %for.cond3.preheader.us.preheader 200; CHECK-NEXT: push {r4, r5, r6, r7, lr} 201; CHECK-NEXT: sub sp, #4 202; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} 203; CHECK-NEXT: sub sp, #80 204; CHECK-NEXT: ldr r7, [sp, #168] 205; CHECK-NEXT: movs r5, #120 206; CHECK-NEXT: lsls r6, r3, #3 207; CHECK-NEXT: movs r4, #252 208; CHECK-NEXT: and.w r5, r5, r3, lsr #9 209; CHECK-NEXT: uxtb r6, r6 210; CHECK-NEXT: and.w r3, r4, r3, lsr #3 211; CHECK-NEXT: muls r6, r7, r6 212; CHECK-NEXT: mul lr, r3, r7 213; CHECK-NEXT: vdup.16 q0, r6 214; CHECK-NEXT: vstrw.32 q0, [sp, #64] @ 16-byte Spill 215; CHECK-NEXT: vdup.16 q0, lr 216; CHECK-NEXT: muls r5, r7, r5 217; CHECK-NEXT: vstrw.32 q0, [sp, #48] @ 16-byte Spill 218; CHECK-NEXT: vmov.i16 q0, #0xfc 219; CHECK-NEXT: mov.w r6, #2016 220; CHECK-NEXT: vstrw.32 q0, [sp, #32] @ 16-byte Spill 221; CHECK-NEXT: vdup.16 q0, r5 222; CHECK-NEXT: rsb.w r3, r7, #256 223; CHECK-NEXT: lsls r7, r1, #1 224; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill 225; CHECK-NEXT: vdup.16 q0, r6 226; CHECK-NEXT: vmov.i16 q2, #0xf8 227; CHECK-NEXT: vmov.i16 q5, #0x78 228; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill 229; CHECK-NEXT: vmov.i16 q6, #0xf800 230; CHECK-NEXT: movs r4, #0 231; CHECK-NEXT: vldrw.u32 q7, [sp] @ 16-byte Reload 232; CHECK-NEXT: .p2align 2 233; CHECK-NEXT: .LBB1_3: @ %vector.ph 234; CHECK-NEXT: @ =>This Loop Header: Depth=1 235; CHECK-NEXT: @ Child Loop BB1_4 Depth 2 236; CHECK-NEXT: mov r5, r0 237; CHECK-NEXT: dlstp.16 lr, r2 238; CHECK-NEXT: .p2align 2 239; CHECK-NEXT: .LBB1_4: @ %vector.body 240; CHECK-NEXT: @ Parent Loop BB1_3 Depth=1 241; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 242; CHECK-NEXT: vldrh.u16 q0, [r5] 243; CHECK-NEXT: vshl.i16 q1, q0, #3 244; CHECK-NEXT: vldrw.u32 q3, [sp, #64] @ 16-byte Reload 245; CHECK-NEXT: vand q1, q1, q2 246; CHECK-NEXT: vmla.i16 q3, q1, r3 247; CHECK-NEXT: vmov.f64 d8, d4 248; CHECK-NEXT: vmov.f64 d9, d5 249; CHECK-NEXT: vldrw.u32 q1, [sp, #32] @ 16-byte Reload 250; CHECK-NEXT: vshr.u16 q2, q0, #9 251; CHECK-NEXT: vshr.u16 q0, q0, #3 252; CHECK-NEXT: vand q0, q0, q1 253; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload 254; CHECK-NEXT: vmla.i16 q1, q0, r3 255; CHECK-NEXT: vand q2, q2, q5 256; CHECK-NEXT: vshr.u16 q0, q3, #11 257; CHECK-NEXT: vldrw.u32 q3, [sp, #16] @ 16-byte Reload 258; CHECK-NEXT: vshr.u16 q1, q1, #5 259; CHECK-NEXT: vmla.i16 q3, q2, r3 260; CHECK-NEXT: vand q1, q1, q7 261; CHECK-NEXT: vorr q0, q1, q0 262; CHECK-NEXT: vand q1, q3, q6 263; CHECK-NEXT: vorr q0, q0, q1 264; CHECK-NEXT: vstrh.16 q0, [r5], #16 265; CHECK-NEXT: vmov.f64 d4, d8 266; CHECK-NEXT: vmov.f64 d5, d9 267; CHECK-NEXT: letp lr, .LBB1_4 268; CHECK-NEXT: @ %bb.5: @ %for.cond3.for.cond.cleanup7_crit_edge.us 269; CHECK-NEXT: @ in Loop: Header=BB1_3 Depth=1 270; CHECK-NEXT: adds r4, #1 271; CHECK-NEXT: add r0, r7 272; CHECK-NEXT: cmp r4, r12 273; CHECK-NEXT: bne .LBB1_3 274; CHECK-NEXT: @ %bb.6: 275; CHECK-NEXT: add sp, #80 276; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} 277; CHECK-NEXT: add sp, #4 278; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} 279; CHECK-NEXT: .LBB1_7: @ %for.cond.cleanup 280; CHECK-NEXT: bx lr 281entry: 282 %iHeight = getelementptr inbounds %struct.arm_2d_size_t, ptr %ptCopySize, i32 0, i32 1 283 %0 = load i16, ptr %iHeight, align 2 284 %conv1 = sext i16 %0 to i32 285 %and.i = shl i16 %hwColour, 3 286 %shl.i = and i16 %and.i, 248 287 %1 = lshr i16 %hwColour, 9 288 %shl4.i = and i16 %1, 120 289 %2 = lshr i16 %hwColour, 3 290 %3 = and i16 %2, 252 291 %4 = trunc i32 %chRatio to i16 292 %5 = sub i16 256, %4 293 %conv30 = sext i16 %iTargetStride to i32 294 %cmp61 = icmp sgt i16 %0, 0 295 br i1 %cmp61, label %for.cond3.preheader.lr.ph, label %for.cond.cleanup 296 297for.cond3.preheader.lr.ph: ; preds = %entry 298 %6 = load i16, ptr %ptCopySize, align 2 299 %conv4 = sext i16 %6 to i32 300 %cmp558 = icmp sgt i16 %6, 0 301 br i1 %cmp558, label %for.cond3.preheader.us.preheader, label %for.cond.cleanup 302 303for.cond3.preheader.us.preheader: ; preds = %for.cond3.preheader.lr.ph 304 %conv15.us = mul i16 %shl.i, %4 305 %conv15.us.1 = mul i16 %3, %4 306 %conv15.us.2 = mul i16 %shl4.i, %4 307 %n.rnd.up = add nsw i32 %conv4, 7 308 %n.vec = and i32 %n.rnd.up, -8 309 %broadcast.splatinsert75 = insertelement <8 x i16> poison, i16 %5, i32 0 310 %broadcast.splat76 = shufflevector <8 x i16> %broadcast.splatinsert75, <8 x i16> poison, <8 x i32> zeroinitializer 311 %broadcast.splatinsert77 = insertelement <8 x i16> poison, i16 %conv15.us, i32 0 312 %broadcast.splat78 = shufflevector <8 x i16> %broadcast.splatinsert77, <8 x i16> poison, <8 x i32> zeroinitializer 313 %broadcast.splatinsert79 = insertelement <8 x i16> poison, i16 %conv15.us.1, i32 0 314 %broadcast.splat80 = shufflevector <8 x i16> %broadcast.splatinsert79, <8 x i16> poison, <8 x i32> zeroinitializer 315 %broadcast.splatinsert81 = insertelement <8 x i16> poison, i16 %conv15.us.2, i32 0 316 %broadcast.splat82 = shufflevector <8 x i16> %broadcast.splatinsert81, <8 x i16> poison, <8 x i32> zeroinitializer 317 br label %vector.ph 318 319vector.ph: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.us.preheader 320 %phwTargetBase.addr.063.us = phi ptr [ %add.ptr.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ %phwTargetBase, %for.cond3.preheader.us.preheader ] 321 %y.062.us = phi i32 [ %inc32.us, %for.cond3.for.cond.cleanup7_crit_edge.us ], [ 0, %for.cond3.preheader.us.preheader ] 322 br label %vector.body 323 324vector.body: ; preds = %vector.body, %vector.ph 325 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 326 %next.gep = getelementptr i16, ptr %phwTargetBase.addr.063.us, i32 %index 327 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %conv4) 328 %wide.masked.load = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %next.gep, i32 2, <8 x i1> %active.lane.mask, <8 x i16> poison) 329 %7 = shl <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 330 %8 = and <8 x i16> %7, <i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248, i16 248> 331 %9 = lshr <8 x i16> %wide.masked.load, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9> 332 %10 = and <8 x i16> %9, <i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120, i16 120> 333 %11 = lshr <8 x i16> %wide.masked.load, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 334 %12 = and <8 x i16> %11, <i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252, i16 252> 335 %13 = mul <8 x i16> %8, %broadcast.splat76 336 %14 = add <8 x i16> %13, %broadcast.splat78 337 %15 = lshr <8 x i16> %14, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 338 %16 = mul <8 x i16> %12, %broadcast.splat76 339 %17 = add <8 x i16> %16, %broadcast.splat80 340 %18 = mul <8 x i16> %10, %broadcast.splat76 341 %19 = add <8 x i16> %18, %broadcast.splat82 342 %20 = lshr <8 x i16> %17, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 343 %21 = and <8 x i16> %20, <i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016, i16 2016> 344 %22 = or <8 x i16> %21, %15 345 %23 = and <8 x i16> %19, <i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048, i16 -2048> 346 %24 = or <8 x i16> %22, %23 347 call void @llvm.masked.store.v8i16.p0(<8 x i16> %24, ptr %next.gep, i32 2, <8 x i1> %active.lane.mask) 348 %index.next = add i32 %index, 8 349 %25 = icmp eq i32 %index.next, %n.vec 350 br i1 %25, label %for.cond3.for.cond.cleanup7_crit_edge.us, label %vector.body 351 352for.cond3.for.cond.cleanup7_crit_edge.us: ; preds = %vector.body 353 %add.ptr.us = getelementptr inbounds i16, ptr %phwTargetBase.addr.063.us, i32 %conv30 354 %inc32.us = add nuw nsw i32 %y.062.us, 1 355 %exitcond66.not = icmp eq i32 %inc32.us, %conv1 356 br i1 %exitcond66.not, label %for.cond.cleanup, label %vector.ph 357 358for.cond.cleanup: ; preds = %for.cond3.for.cond.cleanup7_crit_edge.us, %for.cond3.preheader.lr.ph, %entry 359 ret void 360} 361 362declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) #1 363declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) #2 364declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) #3 365