1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -enable-arm-maskedldst -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc void @thres_i32(ptr %data, i16 zeroext %N, i32 %T) { 5; CHECK-LABEL: thres_i32: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: .save {r7, lr} 8; CHECK-NEXT: push {r7, lr} 9; CHECK-NEXT: cmp r1, #0 10; CHECK-NEXT: it eq 11; CHECK-NEXT: popeq {r7, pc} 12; CHECK-NEXT: .LBB0_1: @ %vector.ph 13; CHECK-NEXT: mvn r3, #3 14; CHECK-NEXT: add.w r1, r3, r1, lsl #2 15; CHECK-NEXT: movs r3, #1 16; CHECK-NEXT: vmov.i32 q0, #0x0 17; CHECK-NEXT: add.w lr, r3, r1, lsr #2 18; CHECK-NEXT: rsbs r1, r2, #0 19; CHECK-NEXT: .LBB0_2: @ %vector.body 20; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 21; CHECK-NEXT: vldrw.u32 q1, [r0] 22; CHECK-NEXT: vpte.s32 ge, q1, r2 23; CHECK-NEXT: vcmpt.s32 le, q1, r1 24; CHECK-NEXT: vstrwe.32 q0, [r0], #16 25; CHECK-NEXT: le lr, .LBB0_2 26; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 27; CHECK-NEXT: pop {r7, pc} 28entry: 29 %conv = zext i16 %N to i32 30 %mul = shl nuw nsw i32 %conv, 2 31 %cmp15 = icmp eq i16 %N, 0 32 br i1 %cmp15, label %for.cond.cleanup, label %vector.ph 33 34vector.ph: ; preds = %entry 35 %sub = sub nsw i32 0, %T 36 %broadcast.splatinsert17 = insertelement <4 x i32> undef, i32 %T, i32 0 37 %broadcast.splat18 = shufflevector <4 x i32> %broadcast.splatinsert17, <4 x i32> undef, <4 x i32> zeroinitializer 38 %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %sub, i32 0 39 %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer 40 br label %vector.body 41 42vector.body: ; preds = %vector.body, %vector.ph 43 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 44 %0 = getelementptr inbounds i32, ptr %data, i32 %index 45 %wide.load = load <4 x i32>, ptr %0, align 4 46 %1 = icmp slt <4 x i32> %wide.load, %broadcast.splat18 47 %2 = icmp sgt <4 x i32> %wide.load, %broadcast.splat20 48 %3 = or <4 x i1> %1, %2 49 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %3) 50 %index.next = add i32 %index, 4 51 %4 = icmp eq i32 %index.next, %mul 52 br i1 %4, label %for.cond.cleanup, label %vector.body 53 54for.cond.cleanup: ; preds = %vector.body, %entry 55 ret void 56} 57 58define arm_aapcs_vfpcc void @thresh_i16(ptr %data, i16 zeroext %N, i16 signext %T) { 59; CHECK-LABEL: thresh_i16: 60; CHECK: @ %bb.0: @ %entry 61; CHECK-NEXT: .save {r7, lr} 62; CHECK-NEXT: push {r7, lr} 63; CHECK-NEXT: cmp r1, #0 64; CHECK-NEXT: it eq 65; CHECK-NEXT: popeq {r7, pc} 66; CHECK-NEXT: .LBB1_1: @ %vector.ph 67; CHECK-NEXT: mvn r3, #7 68; CHECK-NEXT: add.w r1, r3, r1, lsl #3 69; CHECK-NEXT: movs r3, #1 70; CHECK-NEXT: vmov.i32 q0, #0x0 71; CHECK-NEXT: add.w lr, r3, r1, lsr #3 72; CHECK-NEXT: rsbs r1, r2, #0 73; CHECK-NEXT: .LBB1_2: @ %vector.body 74; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 75; CHECK-NEXT: vldrh.u16 q1, [r0] 76; CHECK-NEXT: vpte.s16 ge, q1, r2 77; CHECK-NEXT: vcmpt.s16 le, q1, r1 78; CHECK-NEXT: vstrhe.16 q0, [r0], #16 79; CHECK-NEXT: le lr, .LBB1_2 80; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 81; CHECK-NEXT: pop {r7, pc} 82entry: 83 %conv2 = zext i16 %N to i32 84 %mul = shl nuw nsw i32 %conv2, 3 85 %cmp22 = icmp eq i16 %N, 0 86 br i1 %cmp22, label %for.cond.cleanup, label %vector.ph 87 88vector.ph: ; preds = %entry 89 %sub = sub i16 0, %T 90 %broadcast.splatinsert24 = insertelement <8 x i16> undef, i16 %T, i32 0 91 %broadcast.splat25 = shufflevector <8 x i16> %broadcast.splatinsert24, <8 x i16> undef, <8 x i32> zeroinitializer 92 %broadcast.splatinsert26 = insertelement <8 x i16> undef, i16 %sub, i32 0 93 %broadcast.splat27 = shufflevector <8 x i16> %broadcast.splatinsert26, <8 x i16> undef, <8 x i32> zeroinitializer 94 br label %vector.body 95 96vector.body: ; preds = %vector.body, %vector.ph 97 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 98 %0 = getelementptr inbounds i16, ptr %data, i32 %index 99 %wide.load = load <8 x i16>, ptr %0, align 2 100 %1 = icmp slt <8 x i16> %wide.load, %broadcast.splat25 101 %2 = icmp sgt <8 x i16> %wide.load, %broadcast.splat27 102 %3 = or <8 x i1> %1, %2 103 call void @llvm.masked.store.v8i16.p0(<8 x i16> zeroinitializer, ptr %0, i32 2, <8 x i1> %3) 104 %index.next = add i32 %index, 8 105 %4 = icmp eq i32 %index.next, %mul 106 br i1 %4, label %for.cond.cleanup, label %vector.body 107 108for.cond.cleanup: ; preds = %vector.body, %entry 109 ret void 110} 111 112define arm_aapcs_vfpcc void @thresh_i8(ptr %data, i16 zeroext %N, i8 signext %T) { 113; CHECK-LABEL: thresh_i8: 114; CHECK: @ %bb.0: @ %entry 115; CHECK-NEXT: .save {r7, lr} 116; CHECK-NEXT: push {r7, lr} 117; CHECK-NEXT: cmp r1, #0 118; CHECK-NEXT: it eq 119; CHECK-NEXT: popeq {r7, pc} 120; CHECK-NEXT: .LBB2_1: @ %vector.ph 121; CHECK-NEXT: mvn r3, #15 122; CHECK-NEXT: add.w r1, r3, r1, lsl #4 123; CHECK-NEXT: movs r3, #1 124; CHECK-NEXT: vmov.i32 q0, #0x0 125; CHECK-NEXT: add.w lr, r3, r1, lsr #4 126; CHECK-NEXT: rsbs r1, r2, #0 127; CHECK-NEXT: .LBB2_2: @ %vector.body 128; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 129; CHECK-NEXT: vldrb.u8 q1, [r0] 130; CHECK-NEXT: vpte.s8 ge, q1, r2 131; CHECK-NEXT: vcmpt.s8 le, q1, r1 132; CHECK-NEXT: vstrbe.8 q0, [r0], #16 133; CHECK-NEXT: le lr, .LBB2_2 134; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 135; CHECK-NEXT: pop {r7, pc} 136entry: 137 %conv2 = zext i16 %N to i32 138 %mul = shl nuw nsw i32 %conv2, 4 139 %cmp20 = icmp eq i16 %N, 0 140 br i1 %cmp20, label %for.cond.cleanup, label %vector.ph 141 142vector.ph: ; preds = %entry 143 %sub = sub i8 0, %T 144 %broadcast.splatinsert22 = insertelement <16 x i8> undef, i8 %T, i32 0 145 %broadcast.splat23 = shufflevector <16 x i8> %broadcast.splatinsert22, <16 x i8> undef, <16 x i32> zeroinitializer 146 %broadcast.splatinsert24 = insertelement <16 x i8> undef, i8 %sub, i32 0 147 %broadcast.splat25 = shufflevector <16 x i8> %broadcast.splatinsert24, <16 x i8> undef, <16 x i32> zeroinitializer 148 br label %vector.body 149 150vector.body: ; preds = %vector.body, %vector.ph 151 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 152 %0 = getelementptr inbounds i8, ptr %data, i32 %index 153 %wide.load = load <16 x i8>, ptr %0, align 1 154 %1 = icmp slt <16 x i8> %wide.load, %broadcast.splat23 155 %2 = icmp sgt <16 x i8> %wide.load, %broadcast.splat25 156 %3 = or <16 x i1> %1, %2 157 call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr %0, i32 1, <16 x i1> %3) 158 %index.next = add i32 %index, 16 159 %4 = icmp eq i32 %index.next, %mul 160 br i1 %4, label %for.cond.cleanup, label %vector.body 161 162for.cond.cleanup: ; preds = %vector.body, %entry 163 ret void 164} 165 166define arm_aapcs_vfpcc void @thresh_f32(ptr %data, i16 zeroext %N, float %T) { 167; CHECK-LABEL: thresh_f32: 168; CHECK: @ %bb.0: @ %entry 169; CHECK-NEXT: .save {r7, lr} 170; CHECK-NEXT: push {r7, lr} 171; CHECK-NEXT: cmp r1, #0 172; CHECK-NEXT: it eq 173; CHECK-NEXT: popeq {r7, pc} 174; CHECK-NEXT: .LBB3_1: @ %vector.ph 175; CHECK-NEXT: mvn r2, #3 176; CHECK-NEXT: add.w r1, r2, r1, lsl #2 177; CHECK-NEXT: movs r2, #1 178; CHECK-NEXT: add.w lr, r2, r1, lsr #2 179; CHECK-NEXT: vmov r1, s0 180; CHECK-NEXT: vmov.i32 q0, #0x0 181; CHECK-NEXT: eor r2, r1, #-2147483648 182; CHECK-NEXT: .LBB3_2: @ %vector.body 183; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 184; CHECK-NEXT: vldrw.u32 q1, [r0] 185; CHECK-NEXT: vpte.f32 ge, q1, r1 186; CHECK-NEXT: vcmpt.f32 le, q1, r2 187; CHECK-NEXT: vstrwe.32 q0, [r0], #16 188; CHECK-NEXT: le lr, .LBB3_2 189; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 190; CHECK-NEXT: pop {r7, pc} 191entry: 192 %conv = zext i16 %N to i32 193 %mul = shl nuw nsw i32 %conv, 2 194 %cmp15 = icmp eq i16 %N, 0 195 br i1 %cmp15, label %for.cond.cleanup, label %vector.ph 196 197vector.ph: ; preds = %entry 198 %fneg = fneg fast float %T 199 %broadcast.splatinsert17 = insertelement <4 x float> undef, float %T, i32 0 200 %broadcast.splat18 = shufflevector <4 x float> %broadcast.splatinsert17, <4 x float> undef, <4 x i32> zeroinitializer 201 %broadcast.splatinsert19 = insertelement <4 x float> undef, float %fneg, i32 0 202 %broadcast.splat20 = shufflevector <4 x float> %broadcast.splatinsert19, <4 x float> undef, <4 x i32> zeroinitializer 203 br label %vector.body 204 205vector.body: ; preds = %vector.body, %vector.ph 206 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 207 %0 = getelementptr inbounds float, ptr %data, i32 %index 208 %wide.load = load <4 x float>, ptr %0, align 4 209 %1 = fcmp fast olt <4 x float> %wide.load, %broadcast.splat18 210 %2 = fcmp fast ogt <4 x float> %wide.load, %broadcast.splat20 211 %3 = or <4 x i1> %1, %2 212 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %3) 213 %index.next = add i32 %index, 4 214 %4 = icmp eq i32 %index.next, %mul 215 br i1 %4, label %for.cond.cleanup, label %vector.body 216 217for.cond.cleanup: ; preds = %vector.body, %entry 218 ret void 219} 220 221define arm_aapcs_vfpcc void @thresh_f16(ptr %data, i16 zeroext %N, float %T.coerce) { 222; CHECK-LABEL: thresh_f16: 223; CHECK: @ %bb.0: @ %entry 224; CHECK-NEXT: .save {r7, lr} 225; CHECK-NEXT: push {r7, lr} 226; CHECK-NEXT: cmp r1, #0 227; CHECK-NEXT: it eq 228; CHECK-NEXT: popeq {r7, pc} 229; CHECK-NEXT: .LBB4_1: @ %vector.ph 230; CHECK-NEXT: mvn r3, #7 231; CHECK-NEXT: add.w r1, r3, r1, lsl #3 232; CHECK-NEXT: vmov r2, s0 233; CHECK-NEXT: vneg.f16 s0, s0 234; CHECK-NEXT: movs r3, #1 235; CHECK-NEXT: add.w lr, r3, r1, lsr #3 236; CHECK-NEXT: vmov.f16 r1, s0 237; CHECK-NEXT: vmov.i32 q0, #0x0 238; CHECK-NEXT: .LBB4_2: @ %vector.body 239; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 240; CHECK-NEXT: vldrh.u16 q1, [r0] 241; CHECK-NEXT: vpte.f16 ge, q1, r2 242; CHECK-NEXT: vcmpt.f16 le, q1, r1 243; CHECK-NEXT: vstrhe.16 q0, [r0], #16 244; CHECK-NEXT: le lr, .LBB4_2 245; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 246; CHECK-NEXT: pop {r7, pc} 247entry: 248 %0 = bitcast float %T.coerce to i32 249 %tmp.0.extract.trunc = trunc i32 %0 to i16 250 %1 = bitcast i16 %tmp.0.extract.trunc to half 251 %conv = zext i16 %N to i32 252 %mul = shl nuw nsw i32 %conv, 3 253 %cmp17 = icmp eq i16 %N, 0 254 br i1 %cmp17, label %for.cond.cleanup, label %vector.ph 255 256vector.ph: ; preds = %entry 257 %fneg = fneg fast half %1 258 %broadcast.splatinsert19 = insertelement <8 x half> undef, half %1, i32 0 259 %broadcast.splat20 = shufflevector <8 x half> %broadcast.splatinsert19, <8 x half> undef, <8 x i32> zeroinitializer 260 %broadcast.splatinsert21 = insertelement <8 x half> undef, half %fneg, i32 0 261 %broadcast.splat22 = shufflevector <8 x half> %broadcast.splatinsert21, <8 x half> undef, <8 x i32> zeroinitializer 262 br label %vector.body 263 264vector.body: ; preds = %vector.body, %vector.ph 265 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 266 %2 = getelementptr inbounds half, ptr %data, i32 %index 267 %wide.load = load <8 x half>, ptr %2, align 2 268 %3 = fcmp fast olt <8 x half> %wide.load, %broadcast.splat20 269 %4 = fcmp fast ogt <8 x half> %wide.load, %broadcast.splat22 270 %5 = or <8 x i1> %3, %4 271 call void @llvm.masked.store.v8f16.p0(<8 x half> zeroinitializer, ptr %2, i32 2, <8 x i1> %5) 272 %index.next = add i32 %index, 8 273 %6 = icmp eq i32 %index.next, %mul 274 br i1 %6, label %for.cond.cleanup, label %vector.body 275 276for.cond.cleanup: ; preds = %vector.body, %entry 277 ret void 278} 279 280 281 282define arm_aapcs_vfpcc void @thres_rev_i32(ptr %data, i16 zeroext %N, i32 %T) { 283; CHECK-LABEL: thres_rev_i32: 284; CHECK: @ %bb.0: @ %entry 285; CHECK-NEXT: .save {r7, lr} 286; CHECK-NEXT: push {r7, lr} 287; CHECK-NEXT: cmp r1, #0 288; CHECK-NEXT: it eq 289; CHECK-NEXT: popeq {r7, pc} 290; CHECK-NEXT: .LBB5_1: @ %vector.ph 291; CHECK-NEXT: mvn r3, #3 292; CHECK-NEXT: add.w r1, r3, r1, lsl #2 293; CHECK-NEXT: movs r3, #1 294; CHECK-NEXT: vmov.i32 q0, #0x0 295; CHECK-NEXT: add.w lr, r3, r1, lsr #2 296; CHECK-NEXT: rsbs r1, r2, #0 297; CHECK-NEXT: .LBB5_2: @ %vector.body 298; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 299; CHECK-NEXT: vldrw.u32 q1, [r0] 300; CHECK-NEXT: vpte.s32 ge, q1, r2 301; CHECK-NEXT: vcmpt.s32 le, q1, r1 302; CHECK-NEXT: vstrwe.32 q0, [r0], #16 303; CHECK-NEXT: le lr, .LBB5_2 304; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 305; CHECK-NEXT: pop {r7, pc} 306entry: 307 %conv = zext i16 %N to i32 308 %mul = shl nuw nsw i32 %conv, 2 309 %cmp15 = icmp eq i16 %N, 0 310 br i1 %cmp15, label %for.cond.cleanup, label %vector.ph 311 312vector.ph: ; preds = %entry 313 %sub = sub nsw i32 0, %T 314 %broadcast.splatinsert17 = insertelement <4 x i32> undef, i32 %T, i32 0 315 %broadcast.splat18 = shufflevector <4 x i32> %broadcast.splatinsert17, <4 x i32> undef, <4 x i32> zeroinitializer 316 %broadcast.splatinsert19 = insertelement <4 x i32> undef, i32 %sub, i32 0 317 %broadcast.splat20 = shufflevector <4 x i32> %broadcast.splatinsert19, <4 x i32> undef, <4 x i32> zeroinitializer 318 br label %vector.body 319 320vector.body: ; preds = %vector.body, %vector.ph 321 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 322 %0 = getelementptr inbounds i32, ptr %data, i32 %index 323 %wide.load = load <4 x i32>, ptr %0, align 4 324 %1 = icmp sgt <4 x i32> %broadcast.splat18, %wide.load 325 %2 = icmp slt <4 x i32> %broadcast.splat20, %wide.load 326 %3 = or <4 x i1> %1, %2 327 call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %3) 328 %index.next = add i32 %index, 4 329 %4 = icmp eq i32 %index.next, %mul 330 br i1 %4, label %for.cond.cleanup, label %vector.body 331 332for.cond.cleanup: ; preds = %vector.body, %entry 333 ret void 334} 335 336define arm_aapcs_vfpcc void @thresh_rev_i16(ptr %data, i16 zeroext %N, i16 signext %T) { 337; CHECK-LABEL: thresh_rev_i16: 338; CHECK: @ %bb.0: @ %entry 339; CHECK-NEXT: .save {r7, lr} 340; CHECK-NEXT: push {r7, lr} 341; CHECK-NEXT: cmp r1, #0 342; CHECK-NEXT: it eq 343; CHECK-NEXT: popeq {r7, pc} 344; CHECK-NEXT: .LBB6_1: @ %vector.ph 345; CHECK-NEXT: mvn r3, #7 346; CHECK-NEXT: add.w r1, r3, r1, lsl #3 347; CHECK-NEXT: movs r3, #1 348; CHECK-NEXT: vmov.i32 q0, #0x0 349; CHECK-NEXT: add.w lr, r3, r1, lsr #3 350; CHECK-NEXT: rsbs r1, r2, #0 351; CHECK-NEXT: .LBB6_2: @ %vector.body 352; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 353; CHECK-NEXT: vldrh.u16 q1, [r0] 354; CHECK-NEXT: vpte.s16 ge, q1, r2 355; CHECK-NEXT: vcmpt.s16 le, q1, r1 356; CHECK-NEXT: vstrhe.16 q0, [r0], #16 357; CHECK-NEXT: le lr, .LBB6_2 358; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 359; CHECK-NEXT: pop {r7, pc} 360entry: 361 %conv2 = zext i16 %N to i32 362 %mul = shl nuw nsw i32 %conv2, 3 363 %cmp22 = icmp eq i16 %N, 0 364 br i1 %cmp22, label %for.cond.cleanup, label %vector.ph 365 366vector.ph: ; preds = %entry 367 %sub = sub i16 0, %T 368 %broadcast.splatinsert24 = insertelement <8 x i16> undef, i16 %T, i32 0 369 %broadcast.splat25 = shufflevector <8 x i16> %broadcast.splatinsert24, <8 x i16> undef, <8 x i32> zeroinitializer 370 %broadcast.splatinsert26 = insertelement <8 x i16> undef, i16 %sub, i32 0 371 %broadcast.splat27 = shufflevector <8 x i16> %broadcast.splatinsert26, <8 x i16> undef, <8 x i32> zeroinitializer 372 br label %vector.body 373 374vector.body: ; preds = %vector.body, %vector.ph 375 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 376 %0 = getelementptr inbounds i16, ptr %data, i32 %index 377 %wide.load = load <8 x i16>, ptr %0, align 2 378 %1 = icmp sgt <8 x i16> %broadcast.splat25, %wide.load 379 %2 = icmp slt <8 x i16> %broadcast.splat27, %wide.load 380 %3 = or <8 x i1> %1, %2 381 call void @llvm.masked.store.v8i16.p0(<8 x i16> zeroinitializer, ptr %0, i32 2, <8 x i1> %3) 382 %index.next = add i32 %index, 8 383 %4 = icmp eq i32 %index.next, %mul 384 br i1 %4, label %for.cond.cleanup, label %vector.body 385 386for.cond.cleanup: ; preds = %vector.body, %entry 387 ret void 388} 389 390define arm_aapcs_vfpcc void @thresh_rev_i8(ptr %data, i16 zeroext %N, i8 signext %T) { 391; CHECK-LABEL: thresh_rev_i8: 392; CHECK: @ %bb.0: @ %entry 393; CHECK-NEXT: .save {r7, lr} 394; CHECK-NEXT: push {r7, lr} 395; CHECK-NEXT: cmp r1, #0 396; CHECK-NEXT: it eq 397; CHECK-NEXT: popeq {r7, pc} 398; CHECK-NEXT: .LBB7_1: @ %vector.ph 399; CHECK-NEXT: mvn r3, #15 400; CHECK-NEXT: add.w r1, r3, r1, lsl #4 401; CHECK-NEXT: movs r3, #1 402; CHECK-NEXT: vmov.i32 q0, #0x0 403; CHECK-NEXT: add.w lr, r3, r1, lsr #4 404; CHECK-NEXT: rsbs r1, r2, #0 405; CHECK-NEXT: .LBB7_2: @ %vector.body 406; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 407; CHECK-NEXT: vldrb.u8 q1, [r0] 408; CHECK-NEXT: vpte.s8 ge, q1, r2 409; CHECK-NEXT: vcmpt.s8 le, q1, r1 410; CHECK-NEXT: vstrbe.8 q0, [r0], #16 411; CHECK-NEXT: le lr, .LBB7_2 412; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 413; CHECK-NEXT: pop {r7, pc} 414entry: 415 %conv2 = zext i16 %N to i32 416 %mul = shl nuw nsw i32 %conv2, 4 417 %cmp20 = icmp eq i16 %N, 0 418 br i1 %cmp20, label %for.cond.cleanup, label %vector.ph 419 420vector.ph: ; preds = %entry 421 %sub = sub i8 0, %T 422 %broadcast.splatinsert22 = insertelement <16 x i8> undef, i8 %T, i32 0 423 %broadcast.splat23 = shufflevector <16 x i8> %broadcast.splatinsert22, <16 x i8> undef, <16 x i32> zeroinitializer 424 %broadcast.splatinsert24 = insertelement <16 x i8> undef, i8 %sub, i32 0 425 %broadcast.splat25 = shufflevector <16 x i8> %broadcast.splatinsert24, <16 x i8> undef, <16 x i32> zeroinitializer 426 br label %vector.body 427 428vector.body: ; preds = %vector.body, %vector.ph 429 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 430 %0 = getelementptr inbounds i8, ptr %data, i32 %index 431 %wide.load = load <16 x i8>, ptr %0, align 1 432 %1 = icmp sgt <16 x i8> %broadcast.splat23, %wide.load 433 %2 = icmp slt <16 x i8> %broadcast.splat25, %wide.load 434 %3 = or <16 x i1> %1, %2 435 call void @llvm.masked.store.v16i8.p0(<16 x i8> zeroinitializer, ptr %0, i32 1, <16 x i1> %3) 436 %index.next = add i32 %index, 16 437 %4 = icmp eq i32 %index.next, %mul 438 br i1 %4, label %for.cond.cleanup, label %vector.body 439 440for.cond.cleanup: ; preds = %vector.body, %entry 441 ret void 442} 443 444define arm_aapcs_vfpcc void @thresh_rev_f32(ptr %data, i16 zeroext %N, float %T) { 445; CHECK-LABEL: thresh_rev_f32: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: .save {r7, lr} 448; CHECK-NEXT: push {r7, lr} 449; CHECK-NEXT: cmp r1, #0 450; CHECK-NEXT: it eq 451; CHECK-NEXT: popeq {r7, pc} 452; CHECK-NEXT: .LBB8_1: @ %vector.ph 453; CHECK-NEXT: mvn r2, #3 454; CHECK-NEXT: add.w r1, r2, r1, lsl #2 455; CHECK-NEXT: movs r2, #1 456; CHECK-NEXT: add.w lr, r2, r1, lsr #2 457; CHECK-NEXT: vmov r1, s0 458; CHECK-NEXT: vmov.i32 q0, #0x0 459; CHECK-NEXT: eor r2, r1, #-2147483648 460; CHECK-NEXT: .LBB8_2: @ %vector.body 461; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 462; CHECK-NEXT: vldrw.u32 q1, [r0] 463; CHECK-NEXT: vpte.f32 ge, q1, r1 464; CHECK-NEXT: vcmpt.f32 le, q1, r2 465; CHECK-NEXT: vstrwe.32 q0, [r0], #16 466; CHECK-NEXT: le lr, .LBB8_2 467; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 468; CHECK-NEXT: pop {r7, pc} 469entry: 470 %conv = zext i16 %N to i32 471 %mul = shl nuw nsw i32 %conv, 2 472 %cmp15 = icmp eq i16 %N, 0 473 br i1 %cmp15, label %for.cond.cleanup, label %vector.ph 474 475vector.ph: ; preds = %entry 476 %fneg = fneg fast float %T 477 %broadcast.splatinsert17 = insertelement <4 x float> undef, float %T, i32 0 478 %broadcast.splat18 = shufflevector <4 x float> %broadcast.splatinsert17, <4 x float> undef, <4 x i32> zeroinitializer 479 %broadcast.splatinsert19 = insertelement <4 x float> undef, float %fneg, i32 0 480 %broadcast.splat20 = shufflevector <4 x float> %broadcast.splatinsert19, <4 x float> undef, <4 x i32> zeroinitializer 481 br label %vector.body 482 483vector.body: ; preds = %vector.body, %vector.ph 484 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 485 %0 = getelementptr inbounds float, ptr %data, i32 %index 486 %wide.load = load <4 x float>, ptr %0, align 4 487 %1 = fcmp fast ogt <4 x float> %broadcast.splat18, %wide.load 488 %2 = fcmp fast olt <4 x float> %broadcast.splat20, %wide.load 489 %3 = or <4 x i1> %1, %2 490 call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %3) 491 %index.next = add i32 %index, 4 492 %4 = icmp eq i32 %index.next, %mul 493 br i1 %4, label %for.cond.cleanup, label %vector.body 494 495for.cond.cleanup: ; preds = %vector.body, %entry 496 ret void 497} 498 499define arm_aapcs_vfpcc void @thresh_rev_f16(ptr %data, i16 zeroext %N, float %T.coerce) { 500; CHECK-LABEL: thresh_rev_f16: 501; CHECK: @ %bb.0: @ %entry 502; CHECK-NEXT: .save {r7, lr} 503; CHECK-NEXT: push {r7, lr} 504; CHECK-NEXT: cmp r1, #0 505; CHECK-NEXT: it eq 506; CHECK-NEXT: popeq {r7, pc} 507; CHECK-NEXT: .LBB9_1: @ %vector.ph 508; CHECK-NEXT: mvn r3, #7 509; CHECK-NEXT: add.w r1, r3, r1, lsl #3 510; CHECK-NEXT: vmov r2, s0 511; CHECK-NEXT: vneg.f16 s0, s0 512; CHECK-NEXT: movs r3, #1 513; CHECK-NEXT: add.w lr, r3, r1, lsr #3 514; CHECK-NEXT: vmov.f16 r1, s0 515; CHECK-NEXT: vmov.i32 q0, #0x0 516; CHECK-NEXT: .LBB9_2: @ %vector.body 517; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 518; CHECK-NEXT: vldrh.u16 q1, [r0] 519; CHECK-NEXT: vpte.f16 ge, q1, r2 520; CHECK-NEXT: vcmpt.f16 le, q1, r1 521; CHECK-NEXT: vstrhe.16 q0, [r0], #16 522; CHECK-NEXT: le lr, .LBB9_2 523; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup 524; CHECK-NEXT: pop {r7, pc} 525entry: 526 %0 = bitcast float %T.coerce to i32 527 %tmp.0.extract.trunc = trunc i32 %0 to i16 528 %1 = bitcast i16 %tmp.0.extract.trunc to half 529 %conv = zext i16 %N to i32 530 %mul = shl nuw nsw i32 %conv, 3 531 %cmp17 = icmp eq i16 %N, 0 532 br i1 %cmp17, label %for.cond.cleanup, label %vector.ph 533 534vector.ph: ; preds = %entry 535 %fneg = fneg fast half %1 536 %broadcast.splatinsert19 = insertelement <8 x half> undef, half %1, i32 0 537 %broadcast.splat20 = shufflevector <8 x half> %broadcast.splatinsert19, <8 x half> undef, <8 x i32> zeroinitializer 538 %broadcast.splatinsert21 = insertelement <8 x half> undef, half %fneg, i32 0 539 %broadcast.splat22 = shufflevector <8 x half> %broadcast.splatinsert21, <8 x half> undef, <8 x i32> zeroinitializer 540 br label %vector.body 541 542vector.body: ; preds = %vector.body, %vector.ph 543 %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] 544 %2 = getelementptr inbounds half, ptr %data, i32 %index 545 %wide.load = load <8 x half>, ptr %2, align 2 546 %3 = fcmp fast ogt <8 x half> %broadcast.splat20, %wide.load 547 %4 = fcmp fast olt <8 x half> %broadcast.splat22, %wide.load 548 %5 = or <8 x i1> %3, %4 549 call void @llvm.masked.store.v8f16.p0(<8 x half> zeroinitializer, ptr %2, i32 2, <8 x i1> %5) 550 %index.next = add i32 %index, 8 551 %6 = icmp eq i32 %index.next, %mul 552 br i1 %6, label %for.cond.cleanup, label %vector.body 553 554for.cond.cleanup: ; preds = %vector.body, %entry 555 ret void 556} 557 558 559 560 561declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) 562declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) 563declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) 564declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) 565declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32 immarg, <8 x i1>) 566