1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE 4 5define arm_aapcs_vfpcc void @masked_v4i32(ptr %dest, <4 x i32> %a) { 6; CHECK-LE-LABEL: masked_v4i32: 7; CHECK-LE: @ %bb.0: @ %entry 8; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 9; CHECK-LE-NEXT: vstrwt.32 q0, [r0] 10; CHECK-LE-NEXT: bx lr 11; 12; CHECK-BE-LABEL: masked_v4i32: 13; CHECK-BE: @ %bb.0: @ %entry 14; CHECK-BE-NEXT: vrev64.32 q1, q0 15; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 16; CHECK-BE-NEXT: vstrwt.32 q1, [r0] 17; CHECK-BE-NEXT: bx lr 18entry: 19 %c = icmp sgt <4 x i32> %a, zeroinitializer 20 call void @llvm.masked.store.v4i32.p0(<4 x i32> %a, ptr %dest, i32 4, <4 x i1> %c) 21 ret void 22} 23 24define arm_aapcs_vfpcc void @masked_v4i32_align1(ptr %dest, <4 x i32> %a) { 25; CHECK-LE-LABEL: masked_v4i32_align1: 26; CHECK-LE: @ %bb.0: @ %entry 27; CHECK-LE-NEXT: .pad #4 28; CHECK-LE-NEXT: sub sp, #4 29; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 30; CHECK-LE-NEXT: vmrs r2, p0 31; CHECK-LE-NEXT: and r1, r2, #1 32; CHECK-LE-NEXT: rsbs r3, r1, #0 33; CHECK-LE-NEXT: movs r1, #0 34; CHECK-LE-NEXT: bfi r1, r3, #0, #1 35; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 36; CHECK-LE-NEXT: rsbs r3, r3, #0 37; CHECK-LE-NEXT: bfi r1, r3, #1, #1 38; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 39; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 40; CHECK-LE-NEXT: rsbs r3, r3, #0 41; CHECK-LE-NEXT: bfi r1, r3, #2, #1 42; CHECK-LE-NEXT: rsbs r2, r2, #0 43; CHECK-LE-NEXT: bfi r1, r2, #3, #1 44; CHECK-LE-NEXT: lsls r2, r1, #31 45; CHECK-LE-NEXT: itt ne 46; CHECK-LE-NEXT: vmovne r2, s0 47; CHECK-LE-NEXT: strne r2, [r0] 48; CHECK-LE-NEXT: lsls r2, r1, #30 49; CHECK-LE-NEXT: itt mi 50; CHECK-LE-NEXT: vmovmi r2, s1 51; CHECK-LE-NEXT: strmi r2, [r0, #4] 52; CHECK-LE-NEXT: lsls r2, r1, #29 53; CHECK-LE-NEXT: itt mi 54; CHECK-LE-NEXT: vmovmi r2, s2 55; CHECK-LE-NEXT: strmi r2, [r0, #8] 56; CHECK-LE-NEXT: lsls r1, r1, #28 57; CHECK-LE-NEXT: itt mi 58; CHECK-LE-NEXT: vmovmi r1, s3 59; CHECK-LE-NEXT: strmi r1, [r0, #12] 60; CHECK-LE-NEXT: add sp, #4 61; CHECK-LE-NEXT: bx lr 62; 63; CHECK-BE-LABEL: masked_v4i32_align1: 64; CHECK-BE: @ %bb.0: @ %entry 65; CHECK-BE-NEXT: .pad #4 66; CHECK-BE-NEXT: sub sp, #4 67; CHECK-BE-NEXT: vrev64.32 q1, q0 68; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 69; CHECK-BE-NEXT: vmrs r2, p0 70; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 71; CHECK-BE-NEXT: rsbs r3, r1, #0 72; CHECK-BE-NEXT: movs r1, #0 73; CHECK-BE-NEXT: bfi r1, r3, #0, #1 74; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 75; CHECK-BE-NEXT: rsbs r3, r3, #0 76; CHECK-BE-NEXT: bfi r1, r3, #1, #1 77; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 78; CHECK-BE-NEXT: and r2, r2, #1 79; CHECK-BE-NEXT: rsbs r3, r3, #0 80; CHECK-BE-NEXT: bfi r1, r3, #2, #1 81; CHECK-BE-NEXT: rsbs r2, r2, #0 82; CHECK-BE-NEXT: bfi r1, r2, #3, #1 83; CHECK-BE-NEXT: lsls r2, r1, #28 84; CHECK-BE-NEXT: itt mi 85; CHECK-BE-NEXT: vmovmi r2, s4 86; CHECK-BE-NEXT: strmi r2, [r0] 87; CHECK-BE-NEXT: lsls r2, r1, #29 88; CHECK-BE-NEXT: itt mi 89; CHECK-BE-NEXT: vmovmi r2, s5 90; CHECK-BE-NEXT: strmi r2, [r0, #4] 91; CHECK-BE-NEXT: lsls r2, r1, #30 92; CHECK-BE-NEXT: itt mi 93; CHECK-BE-NEXT: vmovmi r2, s6 94; CHECK-BE-NEXT: strmi r2, [r0, #8] 95; CHECK-BE-NEXT: lsls r1, r1, #31 96; CHECK-BE-NEXT: itt ne 97; CHECK-BE-NEXT: vmovne r1, s7 98; CHECK-BE-NEXT: strne r1, [r0, #12] 99; CHECK-BE-NEXT: add sp, #4 100; CHECK-BE-NEXT: bx lr 101entry: 102 %c = icmp sgt <4 x i32> %a, zeroinitializer 103 call void @llvm.masked.store.v4i32.p0(<4 x i32> %a, ptr %dest, i32 1, <4 x i1> %c) 104 ret void 105} 106 107define ptr @masked_v4i32_pre(ptr %y, ptr %x, <4 x i32> %a) { 108; CHECK-LE-LABEL: masked_v4i32_pre: 109; CHECK-LE: @ %bb.0: @ %entry 110; CHECK-LE-NEXT: vldr d1, [sp] 111; CHECK-LE-NEXT: vmov d0, r2, r3 112; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 113; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 114; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! 115; CHECK-LE-NEXT: bx lr 116; 117; CHECK-BE-LABEL: masked_v4i32_pre: 118; CHECK-BE: @ %bb.0: @ %entry 119; CHECK-BE-NEXT: vldr d1, [sp] 120; CHECK-BE-NEXT: vmov d0, r3, r2 121; CHECK-BE-NEXT: vldrw.u32 q1, [r1] 122; CHECK-BE-NEXT: vrev64.32 q2, q0 123; CHECK-BE-NEXT: vpt.s32 gt, q2, zr 124; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! 125; CHECK-BE-NEXT: bx lr 126entry: 127 %z = getelementptr inbounds i8, ptr %y, i32 4 128 %0 = load <4 x i32>, ptr %x, align 4 129 %c = icmp sgt <4 x i32> %a, zeroinitializer 130 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %z, i32 4, <4 x i1> %c) 131 ret ptr %z 132} 133 134define ptr @masked_v4i32_post(ptr %y, ptr %x, <4 x i32> %a) { 135; CHECK-LE-LABEL: masked_v4i32_post: 136; CHECK-LE: @ %bb.0: @ %entry 137; CHECK-LE-NEXT: vldr d1, [sp] 138; CHECK-LE-NEXT: vmov d0, r2, r3 139; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 140; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 141; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 142; CHECK-LE-NEXT: bx lr 143; 144; CHECK-BE-LABEL: masked_v4i32_post: 145; CHECK-BE: @ %bb.0: @ %entry 146; CHECK-BE-NEXT: vldr d1, [sp] 147; CHECK-BE-NEXT: vmov d0, r3, r2 148; CHECK-BE-NEXT: vldrw.u32 q1, [r1] 149; CHECK-BE-NEXT: vrev64.32 q2, q0 150; CHECK-BE-NEXT: vpt.s32 gt, q2, zr 151; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 152; CHECK-BE-NEXT: bx lr 153entry: 154 %z = getelementptr inbounds i8, ptr %y, i32 4 155 %0 = load <4 x i32>, ptr %x, align 4 156 %c = icmp sgt <4 x i32> %a, zeroinitializer 157 call void @llvm.masked.store.v4i32.p0(<4 x i32> %0, ptr %y, i32 4, <4 x i1> %c) 158 ret ptr %z 159} 160 161 162define arm_aapcs_vfpcc void @masked_v8i16(ptr %dest, <8 x i16> %a) { 163; CHECK-LE-LABEL: masked_v8i16: 164; CHECK-LE: @ %bb.0: @ %entry 165; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 166; CHECK-LE-NEXT: vstrht.16 q0, [r0] 167; CHECK-LE-NEXT: bx lr 168; 169; CHECK-BE-LABEL: masked_v8i16: 170; CHECK-BE: @ %bb.0: @ %entry 171; CHECK-BE-NEXT: vrev64.16 q1, q0 172; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 173; CHECK-BE-NEXT: vstrht.16 q1, [r0] 174; CHECK-BE-NEXT: bx lr 175entry: 176 %c = icmp sgt <8 x i16> %a, zeroinitializer 177 call void @llvm.masked.store.v8i16.p0(<8 x i16> %a, ptr %dest, i32 2, <8 x i1> %c) 178 ret void 179} 180 181define arm_aapcs_vfpcc void @masked_v8i16_align1(ptr %dest, <8 x i16> %a) { 182; CHECK-LE-LABEL: masked_v8i16_align1: 183; CHECK-LE: @ %bb.0: @ %entry 184; CHECK-LE-NEXT: .pad #4 185; CHECK-LE-NEXT: sub sp, #4 186; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr 187; CHECK-LE-NEXT: vmrs r1, p0 188; CHECK-LE-NEXT: and r2, r1, #1 189; CHECK-LE-NEXT: rsbs r3, r2, #0 190; CHECK-LE-NEXT: movs r2, #0 191; CHECK-LE-NEXT: bfi r2, r3, #0, #1 192; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 193; CHECK-LE-NEXT: rsbs r3, r3, #0 194; CHECK-LE-NEXT: bfi r2, r3, #1, #1 195; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 196; CHECK-LE-NEXT: rsbs r3, r3, #0 197; CHECK-LE-NEXT: bfi r2, r3, #2, #1 198; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 199; CHECK-LE-NEXT: rsbs r3, r3, #0 200; CHECK-LE-NEXT: bfi r2, r3, #3, #1 201; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 202; CHECK-LE-NEXT: rsbs r3, r3, #0 203; CHECK-LE-NEXT: bfi r2, r3, #4, #1 204; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 205; CHECK-LE-NEXT: rsbs r3, r3, #0 206; CHECK-LE-NEXT: bfi r2, r3, #5, #1 207; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 208; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 209; CHECK-LE-NEXT: rsbs r3, r3, #0 210; CHECK-LE-NEXT: bfi r2, r3, #6, #1 211; CHECK-LE-NEXT: rsbs r1, r1, #0 212; CHECK-LE-NEXT: bfi r2, r1, #7, #1 213; CHECK-LE-NEXT: uxtb r1, r2 214; CHECK-LE-NEXT: lsls r2, r2, #31 215; CHECK-LE-NEXT: itt ne 216; CHECK-LE-NEXT: vmovne.u16 r2, q0[0] 217; CHECK-LE-NEXT: strhne r2, [r0] 218; CHECK-LE-NEXT: lsls r2, r1, #30 219; CHECK-LE-NEXT: itt mi 220; CHECK-LE-NEXT: vmovmi.u16 r2, q0[1] 221; CHECK-LE-NEXT: strhmi r2, [r0, #2] 222; CHECK-LE-NEXT: lsls r2, r1, #29 223; CHECK-LE-NEXT: itt mi 224; CHECK-LE-NEXT: vmovmi.u16 r2, q0[2] 225; CHECK-LE-NEXT: strhmi r2, [r0, #4] 226; CHECK-LE-NEXT: lsls r2, r1, #28 227; CHECK-LE-NEXT: itt mi 228; CHECK-LE-NEXT: vmovmi.u16 r2, q0[3] 229; CHECK-LE-NEXT: strhmi r2, [r0, #6] 230; CHECK-LE-NEXT: lsls r2, r1, #27 231; CHECK-LE-NEXT: itt mi 232; CHECK-LE-NEXT: vmovmi.u16 r2, q0[4] 233; CHECK-LE-NEXT: strhmi r2, [r0, #8] 234; CHECK-LE-NEXT: lsls r2, r1, #26 235; CHECK-LE-NEXT: itt mi 236; CHECK-LE-NEXT: vmovmi.u16 r2, q0[5] 237; CHECK-LE-NEXT: strhmi r2, [r0, #10] 238; CHECK-LE-NEXT: lsls r2, r1, #25 239; CHECK-LE-NEXT: itt mi 240; CHECK-LE-NEXT: vmovmi.u16 r2, q0[6] 241; CHECK-LE-NEXT: strhmi r2, [r0, #12] 242; CHECK-LE-NEXT: lsls r1, r1, #24 243; CHECK-LE-NEXT: itt mi 244; CHECK-LE-NEXT: vmovmi.u16 r1, q0[7] 245; CHECK-LE-NEXT: strhmi r1, [r0, #14] 246; CHECK-LE-NEXT: add sp, #4 247; CHECK-LE-NEXT: bx lr 248; 249; CHECK-BE-LABEL: masked_v8i16_align1: 250; CHECK-BE: @ %bb.0: @ %entry 251; CHECK-BE-NEXT: .pad #4 252; CHECK-BE-NEXT: sub sp, #4 253; CHECK-BE-NEXT: vrev64.16 q1, q0 254; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr 255; CHECK-BE-NEXT: vmrs r1, p0 256; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 257; CHECK-BE-NEXT: rsbs r3, r2, #0 258; CHECK-BE-NEXT: movs r2, #0 259; CHECK-BE-NEXT: bfi r2, r3, #0, #1 260; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 261; CHECK-BE-NEXT: rsbs r3, r3, #0 262; CHECK-BE-NEXT: bfi r2, r3, #1, #1 263; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 264; CHECK-BE-NEXT: rsbs r3, r3, #0 265; CHECK-BE-NEXT: bfi r2, r3, #2, #1 266; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 267; CHECK-BE-NEXT: rsbs r3, r3, #0 268; CHECK-BE-NEXT: bfi r2, r3, #3, #1 269; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 270; CHECK-BE-NEXT: rsbs r3, r3, #0 271; CHECK-BE-NEXT: bfi r2, r3, #4, #1 272; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 273; CHECK-BE-NEXT: rsbs r3, r3, #0 274; CHECK-BE-NEXT: bfi r2, r3, #5, #1 275; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 276; CHECK-BE-NEXT: and r1, r1, #1 277; CHECK-BE-NEXT: rsbs r3, r3, #0 278; CHECK-BE-NEXT: bfi r2, r3, #6, #1 279; CHECK-BE-NEXT: rsbs r1, r1, #0 280; CHECK-BE-NEXT: bfi r2, r1, #7, #1 281; CHECK-BE-NEXT: uxtb r1, r2 282; CHECK-BE-NEXT: lsls r2, r2, #24 283; CHECK-BE-NEXT: itt mi 284; CHECK-BE-NEXT: vmovmi.u16 r2, q1[0] 285; CHECK-BE-NEXT: strhmi r2, [r0] 286; CHECK-BE-NEXT: lsls r2, r1, #25 287; CHECK-BE-NEXT: itt mi 288; CHECK-BE-NEXT: vmovmi.u16 r2, q1[1] 289; CHECK-BE-NEXT: strhmi r2, [r0, #2] 290; CHECK-BE-NEXT: lsls r2, r1, #26 291; CHECK-BE-NEXT: itt mi 292; CHECK-BE-NEXT: vmovmi.u16 r2, q1[2] 293; CHECK-BE-NEXT: strhmi r2, [r0, #4] 294; CHECK-BE-NEXT: lsls r2, r1, #27 295; CHECK-BE-NEXT: itt mi 296; CHECK-BE-NEXT: vmovmi.u16 r2, q1[3] 297; CHECK-BE-NEXT: strhmi r2, [r0, #6] 298; CHECK-BE-NEXT: lsls r2, r1, #28 299; CHECK-BE-NEXT: itt mi 300; CHECK-BE-NEXT: vmovmi.u16 r2, q1[4] 301; CHECK-BE-NEXT: strhmi r2, [r0, #8] 302; CHECK-BE-NEXT: lsls r2, r1, #29 303; CHECK-BE-NEXT: itt mi 304; CHECK-BE-NEXT: vmovmi.u16 r2, q1[5] 305; CHECK-BE-NEXT: strhmi r2, [r0, #10] 306; CHECK-BE-NEXT: lsls r2, r1, #30 307; CHECK-BE-NEXT: itt mi 308; CHECK-BE-NEXT: vmovmi.u16 r2, q1[6] 309; CHECK-BE-NEXT: strhmi r2, [r0, #12] 310; CHECK-BE-NEXT: lsls r1, r1, #31 311; CHECK-BE-NEXT: itt ne 312; CHECK-BE-NEXT: vmovne.u16 r1, q1[7] 313; CHECK-BE-NEXT: strhne r1, [r0, #14] 314; CHECK-BE-NEXT: add sp, #4 315; CHECK-BE-NEXT: bx lr 316entry: 317 %c = icmp sgt <8 x i16> %a, zeroinitializer 318 call void @llvm.masked.store.v8i16.p0(<8 x i16> %a, ptr %dest, i32 1, <8 x i1> %c) 319 ret void 320} 321 322define ptr @masked_v8i16_pre(ptr %y, ptr %x, <8 x i16> %a) { 323; CHECK-LE-LABEL: masked_v8i16_pre: 324; CHECK-LE: @ %bb.0: @ %entry 325; CHECK-LE-NEXT: vldr d1, [sp] 326; CHECK-LE-NEXT: vmov d0, r2, r3 327; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 328; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 329; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! 330; CHECK-LE-NEXT: bx lr 331; 332; CHECK-BE-LABEL: masked_v8i16_pre: 333; CHECK-BE: @ %bb.0: @ %entry 334; CHECK-BE-NEXT: vldr d1, [sp] 335; CHECK-BE-NEXT: vmov d0, r3, r2 336; CHECK-BE-NEXT: vldrh.u16 q1, [r1] 337; CHECK-BE-NEXT: vrev64.16 q2, q0 338; CHECK-BE-NEXT: vpt.s16 gt, q2, zr 339; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! 340; CHECK-BE-NEXT: bx lr 341entry: 342 %z = getelementptr inbounds i8, ptr %y, i32 4 343 %0 = load <8 x i16>, ptr %x, align 4 344 %c = icmp sgt <8 x i16> %a, zeroinitializer 345 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %z, i32 2, <8 x i1> %c) 346 ret ptr %z 347} 348 349define ptr @masked_v8i16_post(ptr %y, ptr %x, <8 x i16> %a) { 350; CHECK-LE-LABEL: masked_v8i16_post: 351; CHECK-LE: @ %bb.0: @ %entry 352; CHECK-LE-NEXT: vldr d1, [sp] 353; CHECK-LE-NEXT: vmov d0, r2, r3 354; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 355; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 356; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 357; CHECK-LE-NEXT: bx lr 358; 359; CHECK-BE-LABEL: masked_v8i16_post: 360; CHECK-BE: @ %bb.0: @ %entry 361; CHECK-BE-NEXT: vldr d1, [sp] 362; CHECK-BE-NEXT: vmov d0, r3, r2 363; CHECK-BE-NEXT: vldrh.u16 q1, [r1] 364; CHECK-BE-NEXT: vrev64.16 q2, q0 365; CHECK-BE-NEXT: vpt.s16 gt, q2, zr 366; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 367; CHECK-BE-NEXT: bx lr 368entry: 369 %z = getelementptr inbounds i8, ptr %y, i32 4 370 %0 = load <8 x i16>, ptr %x, align 4 371 %c = icmp sgt <8 x i16> %a, zeroinitializer 372 call void @llvm.masked.store.v8i16.p0(<8 x i16> %0, ptr %y, i32 2, <8 x i1> %c) 373 ret ptr %z 374} 375 376 377define arm_aapcs_vfpcc void @masked_v16i8(ptr %dest, <16 x i8> %a) { 378; CHECK-LE-LABEL: masked_v16i8: 379; CHECK-LE: @ %bb.0: @ %entry 380; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 381; CHECK-LE-NEXT: vstrbt.8 q0, [r0] 382; CHECK-LE-NEXT: bx lr 383; 384; CHECK-BE-LABEL: masked_v16i8: 385; CHECK-BE: @ %bb.0: @ %entry 386; CHECK-BE-NEXT: vrev64.8 q1, q0 387; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 388; CHECK-BE-NEXT: vstrbt.8 q1, [r0] 389; CHECK-BE-NEXT: bx lr 390entry: 391 %c = icmp sgt <16 x i8> %a, zeroinitializer 392 call void @llvm.masked.store.v16i8.p0(<16 x i8> %a, ptr %dest, i32 1, <16 x i1> %c) 393 ret void 394} 395 396define ptr @masked_v16i8_pre(ptr %y, ptr %x, <16 x i8> %a) { 397; CHECK-LE-LABEL: masked_v16i8_pre: 398; CHECK-LE: @ %bb.0: @ %entry 399; CHECK-LE-NEXT: vldr d1, [sp] 400; CHECK-LE-NEXT: vmov d0, r2, r3 401; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 402; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 403; CHECK-LE-NEXT: vstrbt.8 q1, [r0, #4]! 404; CHECK-LE-NEXT: bx lr 405; 406; CHECK-BE-LABEL: masked_v16i8_pre: 407; CHECK-BE: @ %bb.0: @ %entry 408; CHECK-BE-NEXT: vldr d1, [sp] 409; CHECK-BE-NEXT: vmov d0, r3, r2 410; CHECK-BE-NEXT: vldrb.u8 q1, [r1] 411; CHECK-BE-NEXT: vrev64.8 q2, q0 412; CHECK-BE-NEXT: vpt.s8 gt, q2, zr 413; CHECK-BE-NEXT: vstrbt.8 q1, [r0, #4]! 414; CHECK-BE-NEXT: bx lr 415entry: 416 %z = getelementptr inbounds i8, ptr %y, i32 4 417 %0 = load <16 x i8>, ptr %x, align 4 418 %c = icmp sgt <16 x i8> %a, zeroinitializer 419 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %z, i32 1, <16 x i1> %c) 420 ret ptr %z 421} 422 423define ptr @masked_v16i8_post(ptr %y, ptr %x, <16 x i8> %a) { 424; CHECK-LE-LABEL: masked_v16i8_post: 425; CHECK-LE: @ %bb.0: @ %entry 426; CHECK-LE-NEXT: vldr d1, [sp] 427; CHECK-LE-NEXT: vmov d0, r2, r3 428; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 429; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 430; CHECK-LE-NEXT: vstrbt.8 q1, [r0], #4 431; CHECK-LE-NEXT: bx lr 432; 433; CHECK-BE-LABEL: masked_v16i8_post: 434; CHECK-BE: @ %bb.0: @ %entry 435; CHECK-BE-NEXT: vldr d1, [sp] 436; CHECK-BE-NEXT: vmov d0, r3, r2 437; CHECK-BE-NEXT: vldrb.u8 q1, [r1] 438; CHECK-BE-NEXT: vrev64.8 q2, q0 439; CHECK-BE-NEXT: vpt.s8 gt, q2, zr 440; CHECK-BE-NEXT: vstrbt.8 q1, [r0], #4 441; CHECK-BE-NEXT: bx lr 442entry: 443 %z = getelementptr inbounds i8, ptr %y, i32 4 444 %0 = load <16 x i8>, ptr %x, align 4 445 %c = icmp sgt <16 x i8> %a, zeroinitializer 446 call void @llvm.masked.store.v16i8.p0(<16 x i8> %0, ptr %y, i32 1, <16 x i1> %c) 447 ret ptr %z 448} 449 450 451define arm_aapcs_vfpcc void @masked_v4f32(ptr %dest, <4 x float> %a, <4 x i32> %b) { 452; CHECK-LE-LABEL: masked_v4f32: 453; CHECK-LE: @ %bb.0: @ %entry 454; CHECK-LE-NEXT: vpt.i32 ne, q1, zr 455; CHECK-LE-NEXT: vstrwt.32 q0, [r0] 456; CHECK-LE-NEXT: bx lr 457; 458; CHECK-BE-LABEL: masked_v4f32: 459; CHECK-BE: @ %bb.0: @ %entry 460; CHECK-BE-NEXT: vrev64.32 q2, q1 461; CHECK-BE-NEXT: vrev64.32 q1, q0 462; CHECK-BE-NEXT: vpt.i32 ne, q2, zr 463; CHECK-BE-NEXT: vstrwt.32 q1, [r0] 464; CHECK-BE-NEXT: bx lr 465entry: 466 %c = icmp ugt <4 x i32> %b, zeroinitializer 467 call void @llvm.masked.store.v4f32.p0(<4 x float> %a, ptr %dest, i32 4, <4 x i1> %c) 468 ret void 469} 470 471define arm_aapcs_vfpcc void @masked_v4f32_align1(ptr %dest, <4 x float> %a, <4 x i32> %b) { 472; CHECK-LE-LABEL: masked_v4f32_align1: 473; CHECK-LE: @ %bb.0: @ %entry 474; CHECK-LE-NEXT: .pad #4 475; CHECK-LE-NEXT: sub sp, #4 476; CHECK-LE-NEXT: vcmp.i32 ne, q1, zr 477; CHECK-LE-NEXT: movs r1, #0 478; CHECK-LE-NEXT: vmrs r2, p0 479; CHECK-LE-NEXT: and r3, r2, #1 480; CHECK-LE-NEXT: rsbs r3, r3, #0 481; CHECK-LE-NEXT: bfi r1, r3, #0, #1 482; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 483; CHECK-LE-NEXT: rsbs r3, r3, #0 484; CHECK-LE-NEXT: bfi r1, r3, #1, #1 485; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 486; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 487; CHECK-LE-NEXT: rsbs r3, r3, #0 488; CHECK-LE-NEXT: bfi r1, r3, #2, #1 489; CHECK-LE-NEXT: rsbs r2, r2, #0 490; CHECK-LE-NEXT: bfi r1, r2, #3, #1 491; CHECK-LE-NEXT: lsls r2, r1, #31 492; CHECK-LE-NEXT: itt ne 493; CHECK-LE-NEXT: vmovne r2, s0 494; CHECK-LE-NEXT: strne r2, [r0] 495; CHECK-LE-NEXT: lsls r2, r1, #30 496; CHECK-LE-NEXT: itt mi 497; CHECK-LE-NEXT: vmovmi r2, s1 498; CHECK-LE-NEXT: strmi r2, [r0, #4] 499; CHECK-LE-NEXT: lsls r2, r1, #29 500; CHECK-LE-NEXT: itt mi 501; CHECK-LE-NEXT: vmovmi r2, s2 502; CHECK-LE-NEXT: strmi r2, [r0, #8] 503; CHECK-LE-NEXT: lsls r1, r1, #28 504; CHECK-LE-NEXT: itt mi 505; CHECK-LE-NEXT: vmovmi r1, s3 506; CHECK-LE-NEXT: strmi r1, [r0, #12] 507; CHECK-LE-NEXT: add sp, #4 508; CHECK-LE-NEXT: bx lr 509; 510; CHECK-BE-LABEL: masked_v4f32_align1: 511; CHECK-BE: @ %bb.0: @ %entry 512; CHECK-BE-NEXT: .pad #4 513; CHECK-BE-NEXT: sub sp, #4 514; CHECK-BE-NEXT: vrev64.32 q2, q1 515; CHECK-BE-NEXT: movs r1, #0 516; CHECK-BE-NEXT: vcmp.i32 ne, q2, zr 517; CHECK-BE-NEXT: vrev64.32 q1, q0 518; CHECK-BE-NEXT: vmrs r2, p0 519; CHECK-BE-NEXT: ubfx r3, r2, #12, #1 520; CHECK-BE-NEXT: rsbs r3, r3, #0 521; CHECK-BE-NEXT: bfi r1, r3, #0, #1 522; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 523; CHECK-BE-NEXT: rsbs r3, r3, #0 524; CHECK-BE-NEXT: bfi r1, r3, #1, #1 525; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 526; CHECK-BE-NEXT: and r2, r2, #1 527; CHECK-BE-NEXT: rsbs r3, r3, #0 528; CHECK-BE-NEXT: bfi r1, r3, #2, #1 529; CHECK-BE-NEXT: rsbs r2, r2, #0 530; CHECK-BE-NEXT: bfi r1, r2, #3, #1 531; CHECK-BE-NEXT: lsls r2, r1, #28 532; CHECK-BE-NEXT: itt mi 533; CHECK-BE-NEXT: vmovmi r2, s4 534; CHECK-BE-NEXT: strmi r2, [r0] 535; CHECK-BE-NEXT: lsls r2, r1, #29 536; CHECK-BE-NEXT: itt mi 537; CHECK-BE-NEXT: vmovmi r2, s5 538; CHECK-BE-NEXT: strmi r2, [r0, #4] 539; CHECK-BE-NEXT: lsls r2, r1, #30 540; CHECK-BE-NEXT: itt mi 541; CHECK-BE-NEXT: vmovmi r2, s6 542; CHECK-BE-NEXT: strmi r2, [r0, #8] 543; CHECK-BE-NEXT: lsls r1, r1, #31 544; CHECK-BE-NEXT: itt ne 545; CHECK-BE-NEXT: vmovne r1, s7 546; CHECK-BE-NEXT: strne r1, [r0, #12] 547; CHECK-BE-NEXT: add sp, #4 548; CHECK-BE-NEXT: bx lr 549entry: 550 %c = icmp ugt <4 x i32> %b, zeroinitializer 551 call void @llvm.masked.store.v4f32.p0(<4 x float> %a, ptr %dest, i32 1, <4 x i1> %c) 552 ret void 553} 554 555define ptr @masked_v4f32_pre(ptr %y, ptr %x, <4 x i32> %a) { 556; CHECK-LE-LABEL: masked_v4f32_pre: 557; CHECK-LE: @ %bb.0: @ %entry 558; CHECK-LE-NEXT: vldr d1, [sp] 559; CHECK-LE-NEXT: vmov d0, r2, r3 560; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 561; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 562; CHECK-LE-NEXT: vstrwt.32 q1, [r0, #4]! 563; CHECK-LE-NEXT: bx lr 564; 565; CHECK-BE-LABEL: masked_v4f32_pre: 566; CHECK-BE: @ %bb.0: @ %entry 567; CHECK-BE-NEXT: vldr d1, [sp] 568; CHECK-BE-NEXT: vmov d0, r3, r2 569; CHECK-BE-NEXT: vldrw.u32 q1, [r1] 570; CHECK-BE-NEXT: vrev64.32 q2, q0 571; CHECK-BE-NEXT: vpt.s32 gt, q2, zr 572; CHECK-BE-NEXT: vstrwt.32 q1, [r0, #4]! 573; CHECK-BE-NEXT: bx lr 574entry: 575 %z = getelementptr inbounds i8, ptr %y, i32 4 576 %0 = load <4 x float>, ptr %x, align 4 577 %c = icmp sgt <4 x i32> %a, zeroinitializer 578 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %z, i32 4, <4 x i1> %c) 579 ret ptr %z 580} 581 582define ptr @masked_v4f32_post(ptr %y, ptr %x, <4 x i32> %a) { 583; CHECK-LE-LABEL: masked_v4f32_post: 584; CHECK-LE: @ %bb.0: @ %entry 585; CHECK-LE-NEXT: vldr d1, [sp] 586; CHECK-LE-NEXT: vmov d0, r2, r3 587; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 588; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 589; CHECK-LE-NEXT: vstrwt.32 q1, [r0], #4 590; CHECK-LE-NEXT: bx lr 591; 592; CHECK-BE-LABEL: masked_v4f32_post: 593; CHECK-BE: @ %bb.0: @ %entry 594; CHECK-BE-NEXT: vldr d1, [sp] 595; CHECK-BE-NEXT: vmov d0, r3, r2 596; CHECK-BE-NEXT: vldrw.u32 q1, [r1] 597; CHECK-BE-NEXT: vrev64.32 q2, q0 598; CHECK-BE-NEXT: vpt.s32 gt, q2, zr 599; CHECK-BE-NEXT: vstrwt.32 q1, [r0], #4 600; CHECK-BE-NEXT: bx lr 601entry: 602 %z = getelementptr inbounds i8, ptr %y, i32 4 603 %0 = load <4 x float>, ptr %x, align 4 604 %c = icmp sgt <4 x i32> %a, zeroinitializer 605 call void @llvm.masked.store.v4f32.p0(<4 x float> %0, ptr %y, i32 4, <4 x i1> %c) 606 ret ptr %z 607} 608 609 610define arm_aapcs_vfpcc void @masked_v8f16(ptr %dest, <8 x half> %a, <8 x i16> %b) { 611; CHECK-LE-LABEL: masked_v8f16: 612; CHECK-LE: @ %bb.0: @ %entry 613; CHECK-LE-NEXT: vpt.i16 ne, q1, zr 614; CHECK-LE-NEXT: vstrht.16 q0, [r0] 615; CHECK-LE-NEXT: bx lr 616; 617; CHECK-BE-LABEL: masked_v8f16: 618; CHECK-BE: @ %bb.0: @ %entry 619; CHECK-BE-NEXT: vrev64.16 q2, q1 620; CHECK-BE-NEXT: vrev64.16 q1, q0 621; CHECK-BE-NEXT: vpt.i16 ne, q2, zr 622; CHECK-BE-NEXT: vstrht.16 q1, [r0] 623; CHECK-BE-NEXT: bx lr 624entry: 625 %c = icmp ugt <8 x i16> %b, zeroinitializer 626 call void @llvm.masked.store.v8f16.p0(<8 x half> %a, ptr %dest, i32 2, <8 x i1> %c) 627 ret void 628} 629 630define arm_aapcs_vfpcc void @masked_v8f16_align1(ptr %dest, <8 x half> %a, <8 x i16> %b) { 631; CHECK-LE-LABEL: masked_v8f16_align1: 632; CHECK-LE: @ %bb.0: @ %entry 633; CHECK-LE-NEXT: .pad #36 634; CHECK-LE-NEXT: sub sp, #36 635; CHECK-LE-NEXT: vcmp.i16 ne, q1, zr 636; CHECK-LE-NEXT: movs r2, #0 637; CHECK-LE-NEXT: vmrs r1, p0 638; CHECK-LE-NEXT: and r3, r1, #1 639; CHECK-LE-NEXT: rsbs r3, r3, #0 640; CHECK-LE-NEXT: bfi r2, r3, #0, #1 641; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 642; CHECK-LE-NEXT: rsbs r3, r3, #0 643; CHECK-LE-NEXT: bfi r2, r3, #1, #1 644; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 645; CHECK-LE-NEXT: rsbs r3, r3, #0 646; CHECK-LE-NEXT: bfi r2, r3, #2, #1 647; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 648; CHECK-LE-NEXT: rsbs r3, r3, #0 649; CHECK-LE-NEXT: bfi r2, r3, #3, #1 650; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 651; CHECK-LE-NEXT: rsbs r3, r3, #0 652; CHECK-LE-NEXT: bfi r2, r3, #4, #1 653; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 654; CHECK-LE-NEXT: rsbs r3, r3, #0 655; CHECK-LE-NEXT: bfi r2, r3, #5, #1 656; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 657; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 658; CHECK-LE-NEXT: rsbs r3, r3, #0 659; CHECK-LE-NEXT: bfi r2, r3, #6, #1 660; CHECK-LE-NEXT: rsbs r1, r1, #0 661; CHECK-LE-NEXT: bfi r2, r1, #7, #1 662; CHECK-LE-NEXT: uxtb r1, r2 663; CHECK-LE-NEXT: lsls r2, r2, #31 664; CHECK-LE-NEXT: bne .LBB16_9 665; CHECK-LE-NEXT: @ %bb.1: @ %else 666; CHECK-LE-NEXT: lsls r2, r1, #30 667; CHECK-LE-NEXT: bmi .LBB16_10 668; CHECK-LE-NEXT: .LBB16_2: @ %else2 669; CHECK-LE-NEXT: lsls r2, r1, #29 670; CHECK-LE-NEXT: bmi .LBB16_11 671; CHECK-LE-NEXT: .LBB16_3: @ %else4 672; CHECK-LE-NEXT: lsls r2, r1, #28 673; CHECK-LE-NEXT: bmi .LBB16_12 674; CHECK-LE-NEXT: .LBB16_4: @ %else6 675; CHECK-LE-NEXT: lsls r2, r1, #27 676; CHECK-LE-NEXT: bmi .LBB16_13 677; CHECK-LE-NEXT: .LBB16_5: @ %else8 678; CHECK-LE-NEXT: lsls r2, r1, #26 679; CHECK-LE-NEXT: bmi .LBB16_14 680; CHECK-LE-NEXT: .LBB16_6: @ %else10 681; CHECK-LE-NEXT: lsls r2, r1, #25 682; CHECK-LE-NEXT: bmi .LBB16_15 683; CHECK-LE-NEXT: .LBB16_7: @ %else12 684; CHECK-LE-NEXT: lsls r1, r1, #24 685; CHECK-LE-NEXT: bmi .LBB16_16 686; CHECK-LE-NEXT: .LBB16_8: @ %else14 687; CHECK-LE-NEXT: add sp, #36 688; CHECK-LE-NEXT: bx lr 689; CHECK-LE-NEXT: .LBB16_9: @ %cond.store 690; CHECK-LE-NEXT: vstr.16 s0, [sp, #28] 691; CHECK-LE-NEXT: ldrh.w r2, [sp, #28] 692; CHECK-LE-NEXT: strh r2, [r0] 693; CHECK-LE-NEXT: lsls r2, r1, #30 694; CHECK-LE-NEXT: bpl .LBB16_2 695; CHECK-LE-NEXT: .LBB16_10: @ %cond.store1 696; CHECK-LE-NEXT: vmovx.f16 s0, s0 697; CHECK-LE-NEXT: vstr.16 s0, [sp, #24] 698; CHECK-LE-NEXT: ldrh.w r2, [sp, #24] 699; CHECK-LE-NEXT: strh r2, [r0, #2] 700; CHECK-LE-NEXT: lsls r2, r1, #29 701; CHECK-LE-NEXT: bpl .LBB16_3 702; CHECK-LE-NEXT: .LBB16_11: @ %cond.store3 703; CHECK-LE-NEXT: vstr.16 s1, [sp, #20] 704; CHECK-LE-NEXT: ldrh.w r2, [sp, #20] 705; CHECK-LE-NEXT: strh r2, [r0, #4] 706; CHECK-LE-NEXT: lsls r2, r1, #28 707; CHECK-LE-NEXT: bpl .LBB16_4 708; CHECK-LE-NEXT: .LBB16_12: @ %cond.store5 709; CHECK-LE-NEXT: vmovx.f16 s0, s1 710; CHECK-LE-NEXT: vstr.16 s0, [sp, #16] 711; CHECK-LE-NEXT: ldrh.w r2, [sp, #16] 712; CHECK-LE-NEXT: strh r2, [r0, #6] 713; CHECK-LE-NEXT: lsls r2, r1, #27 714; CHECK-LE-NEXT: bpl .LBB16_5 715; CHECK-LE-NEXT: .LBB16_13: @ %cond.store7 716; CHECK-LE-NEXT: vstr.16 s2, [sp, #12] 717; CHECK-LE-NEXT: ldrh.w r2, [sp, #12] 718; CHECK-LE-NEXT: strh r2, [r0, #8] 719; CHECK-LE-NEXT: lsls r2, r1, #26 720; CHECK-LE-NEXT: bpl .LBB16_6 721; CHECK-LE-NEXT: .LBB16_14: @ %cond.store9 722; CHECK-LE-NEXT: vmovx.f16 s0, s2 723; CHECK-LE-NEXT: vstr.16 s0, [sp, #8] 724; CHECK-LE-NEXT: ldrh.w r2, [sp, #8] 725; CHECK-LE-NEXT: strh r2, [r0, #10] 726; CHECK-LE-NEXT: lsls r2, r1, #25 727; CHECK-LE-NEXT: bpl .LBB16_7 728; CHECK-LE-NEXT: .LBB16_15: @ %cond.store11 729; CHECK-LE-NEXT: vstr.16 s3, [sp, #4] 730; CHECK-LE-NEXT: ldrh.w r2, [sp, #4] 731; CHECK-LE-NEXT: strh r2, [r0, #12] 732; CHECK-LE-NEXT: lsls r1, r1, #24 733; CHECK-LE-NEXT: bpl .LBB16_8 734; CHECK-LE-NEXT: .LBB16_16: @ %cond.store13 735; CHECK-LE-NEXT: vmovx.f16 s0, s3 736; CHECK-LE-NEXT: vstr.16 s0, [sp] 737; CHECK-LE-NEXT: ldrh.w r1, [sp] 738; CHECK-LE-NEXT: strh r1, [r0, #14] 739; CHECK-LE-NEXT: add sp, #36 740; CHECK-LE-NEXT: bx lr 741; 742; CHECK-BE-LABEL: masked_v8f16_align1: 743; CHECK-BE: @ %bb.0: @ %entry 744; CHECK-BE-NEXT: .pad #36 745; CHECK-BE-NEXT: sub sp, #36 746; CHECK-BE-NEXT: vrev64.16 q2, q1 747; CHECK-BE-NEXT: vrev64.16 q1, q0 748; CHECK-BE-NEXT: vcmp.i16 ne, q2, zr 749; CHECK-BE-NEXT: vmrs r1, p0 750; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 751; CHECK-BE-NEXT: rsbs r3, r2, #0 752; CHECK-BE-NEXT: movs r2, #0 753; CHECK-BE-NEXT: bfi r2, r3, #0, #1 754; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 755; CHECK-BE-NEXT: rsbs r3, r3, #0 756; CHECK-BE-NEXT: bfi r2, r3, #1, #1 757; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 758; CHECK-BE-NEXT: rsbs r3, r3, #0 759; CHECK-BE-NEXT: bfi r2, r3, #2, #1 760; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 761; CHECK-BE-NEXT: rsbs r3, r3, #0 762; CHECK-BE-NEXT: bfi r2, r3, #3, #1 763; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 764; CHECK-BE-NEXT: rsbs r3, r3, #0 765; CHECK-BE-NEXT: bfi r2, r3, #4, #1 766; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 767; CHECK-BE-NEXT: rsbs r3, r3, #0 768; CHECK-BE-NEXT: bfi r2, r3, #5, #1 769; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 770; CHECK-BE-NEXT: and r1, r1, #1 771; CHECK-BE-NEXT: rsbs r3, r3, #0 772; CHECK-BE-NEXT: bfi r2, r3, #6, #1 773; CHECK-BE-NEXT: rsbs r1, r1, #0 774; CHECK-BE-NEXT: bfi r2, r1, #7, #1 775; CHECK-BE-NEXT: uxtb r1, r2 776; CHECK-BE-NEXT: lsls r2, r2, #24 777; CHECK-BE-NEXT: bmi .LBB16_9 778; CHECK-BE-NEXT: @ %bb.1: @ %else 779; CHECK-BE-NEXT: lsls r2, r1, #25 780; CHECK-BE-NEXT: bmi .LBB16_10 781; CHECK-BE-NEXT: .LBB16_2: @ %else2 782; CHECK-BE-NEXT: lsls r2, r1, #26 783; CHECK-BE-NEXT: bmi .LBB16_11 784; CHECK-BE-NEXT: .LBB16_3: @ %else4 785; CHECK-BE-NEXT: lsls r2, r1, #27 786; CHECK-BE-NEXT: bmi .LBB16_12 787; CHECK-BE-NEXT: .LBB16_4: @ %else6 788; CHECK-BE-NEXT: lsls r2, r1, #28 789; CHECK-BE-NEXT: bmi .LBB16_13 790; CHECK-BE-NEXT: .LBB16_5: @ %else8 791; CHECK-BE-NEXT: lsls r2, r1, #29 792; CHECK-BE-NEXT: bmi .LBB16_14 793; CHECK-BE-NEXT: .LBB16_6: @ %else10 794; CHECK-BE-NEXT: lsls r2, r1, #30 795; CHECK-BE-NEXT: bmi .LBB16_15 796; CHECK-BE-NEXT: .LBB16_7: @ %else12 797; CHECK-BE-NEXT: lsls r1, r1, #31 798; CHECK-BE-NEXT: bne .LBB16_16 799; CHECK-BE-NEXT: .LBB16_8: @ %else14 800; CHECK-BE-NEXT: add sp, #36 801; CHECK-BE-NEXT: bx lr 802; CHECK-BE-NEXT: .LBB16_9: @ %cond.store 803; CHECK-BE-NEXT: vstr.16 s4, [sp, #28] 804; CHECK-BE-NEXT: ldrh.w r2, [sp, #28] 805; CHECK-BE-NEXT: strh r2, [r0] 806; CHECK-BE-NEXT: lsls r2, r1, #25 807; CHECK-BE-NEXT: bpl .LBB16_2 808; CHECK-BE-NEXT: .LBB16_10: @ %cond.store1 809; CHECK-BE-NEXT: vmovx.f16 s0, s4 810; CHECK-BE-NEXT: vstr.16 s0, [sp, #24] 811; CHECK-BE-NEXT: ldrh.w r2, [sp, #24] 812; CHECK-BE-NEXT: strh r2, [r0, #2] 813; CHECK-BE-NEXT: lsls r2, r1, #26 814; CHECK-BE-NEXT: bpl .LBB16_3 815; CHECK-BE-NEXT: .LBB16_11: @ %cond.store3 816; CHECK-BE-NEXT: vstr.16 s5, [sp, #20] 817; CHECK-BE-NEXT: ldrh.w r2, [sp, #20] 818; CHECK-BE-NEXT: strh r2, [r0, #4] 819; CHECK-BE-NEXT: lsls r2, r1, #27 820; CHECK-BE-NEXT: bpl .LBB16_4 821; CHECK-BE-NEXT: .LBB16_12: @ %cond.store5 822; CHECK-BE-NEXT: vmovx.f16 s0, s5 823; CHECK-BE-NEXT: vstr.16 s0, [sp, #16] 824; CHECK-BE-NEXT: ldrh.w r2, [sp, #16] 825; CHECK-BE-NEXT: strh r2, [r0, #6] 826; CHECK-BE-NEXT: lsls r2, r1, #28 827; CHECK-BE-NEXT: bpl .LBB16_5 828; CHECK-BE-NEXT: .LBB16_13: @ %cond.store7 829; CHECK-BE-NEXT: vstr.16 s6, [sp, #12] 830; CHECK-BE-NEXT: ldrh.w r2, [sp, #12] 831; CHECK-BE-NEXT: strh r2, [r0, #8] 832; CHECK-BE-NEXT: lsls r2, r1, #29 833; CHECK-BE-NEXT: bpl .LBB16_6 834; CHECK-BE-NEXT: .LBB16_14: @ %cond.store9 835; CHECK-BE-NEXT: vmovx.f16 s0, s6 836; CHECK-BE-NEXT: vstr.16 s0, [sp, #8] 837; CHECK-BE-NEXT: ldrh.w r2, [sp, #8] 838; CHECK-BE-NEXT: strh r2, [r0, #10] 839; CHECK-BE-NEXT: lsls r2, r1, #30 840; CHECK-BE-NEXT: bpl .LBB16_7 841; CHECK-BE-NEXT: .LBB16_15: @ %cond.store11 842; CHECK-BE-NEXT: vstr.16 s7, [sp, #4] 843; CHECK-BE-NEXT: ldrh.w r2, [sp, #4] 844; CHECK-BE-NEXT: strh r2, [r0, #12] 845; CHECK-BE-NEXT: lsls r1, r1, #31 846; CHECK-BE-NEXT: beq .LBB16_8 847; CHECK-BE-NEXT: .LBB16_16: @ %cond.store13 848; CHECK-BE-NEXT: vmovx.f16 s0, s7 849; CHECK-BE-NEXT: vstr.16 s0, [sp] 850; CHECK-BE-NEXT: ldrh.w r1, [sp] 851; CHECK-BE-NEXT: strh r1, [r0, #14] 852; CHECK-BE-NEXT: add sp, #36 853; CHECK-BE-NEXT: bx lr 854entry: 855 %c = icmp ugt <8 x i16> %b, zeroinitializer 856 call void @llvm.masked.store.v8f16.p0(<8 x half> %a, ptr %dest, i32 1, <8 x i1> %c) 857 ret void 858} 859 860define ptr @masked_v8f16_pre(ptr %y, ptr %x, <8 x i16> %a) { 861; CHECK-LE-LABEL: masked_v8f16_pre: 862; CHECK-LE: @ %bb.0: @ %entry 863; CHECK-LE-NEXT: vldr d1, [sp] 864; CHECK-LE-NEXT: vmov d0, r2, r3 865; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 866; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 867; CHECK-LE-NEXT: vstrht.16 q1, [r0, #4]! 868; CHECK-LE-NEXT: bx lr 869; 870; CHECK-BE-LABEL: masked_v8f16_pre: 871; CHECK-BE: @ %bb.0: @ %entry 872; CHECK-BE-NEXT: vldr d1, [sp] 873; CHECK-BE-NEXT: vmov d0, r3, r2 874; CHECK-BE-NEXT: vldrh.u16 q1, [r1] 875; CHECK-BE-NEXT: vrev64.16 q2, q0 876; CHECK-BE-NEXT: vpt.s16 gt, q2, zr 877; CHECK-BE-NEXT: vstrht.16 q1, [r0, #4]! 878; CHECK-BE-NEXT: bx lr 879entry: 880 %z = getelementptr inbounds i8, ptr %y, i32 4 881 %0 = load <8 x half>, ptr %x, align 4 882 %c = icmp sgt <8 x i16> %a, zeroinitializer 883 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %z, i32 2, <8 x i1> %c) 884 ret ptr %z 885} 886 887define ptr @masked_v8f16_post(ptr %y, ptr %x, <8 x i16> %a) { 888; CHECK-LE-LABEL: masked_v8f16_post: 889; CHECK-LE: @ %bb.0: @ %entry 890; CHECK-LE-NEXT: vldr d1, [sp] 891; CHECK-LE-NEXT: vmov d0, r2, r3 892; CHECK-LE-NEXT: vldrw.u32 q1, [r1] 893; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 894; CHECK-LE-NEXT: vstrht.16 q1, [r0], #4 895; CHECK-LE-NEXT: bx lr 896; 897; CHECK-BE-LABEL: masked_v8f16_post: 898; CHECK-BE: @ %bb.0: @ %entry 899; CHECK-BE-NEXT: vldr d1, [sp] 900; CHECK-BE-NEXT: vmov d0, r3, r2 901; CHECK-BE-NEXT: vldrh.u16 q1, [r1] 902; CHECK-BE-NEXT: vrev64.16 q2, q0 903; CHECK-BE-NEXT: vpt.s16 gt, q2, zr 904; CHECK-BE-NEXT: vstrht.16 q1, [r0], #4 905; CHECK-BE-NEXT: bx lr 906entry: 907 %z = getelementptr inbounds i8, ptr %y, i32 4 908 %0 = load <8 x half>, ptr %x, align 4 909 %c = icmp sgt <8 x i16> %a, zeroinitializer 910 call void @llvm.masked.store.v8f16.p0(<8 x half> %0, ptr %y, i32 2, <8 x i1> %c) 911 ret ptr %z 912} 913 914 915define arm_aapcs_vfpcc void @masked_v2i64(ptr %dest, <2 x i64> %a) { 916; CHECK-LE-LABEL: masked_v2i64: 917; CHECK-LE: @ %bb.0: @ %entry 918; CHECK-LE-NEXT: .save {r7, lr} 919; CHECK-LE-NEXT: push {r7, lr} 920; CHECK-LE-NEXT: .pad #4 921; CHECK-LE-NEXT: sub sp, #4 922; CHECK-LE-NEXT: vmov r2, r3, d0 923; CHECK-LE-NEXT: movs r1, #0 924; CHECK-LE-NEXT: vmov r12, lr, d1 925; CHECK-LE-NEXT: rsbs r2, r2, #0 926; CHECK-LE-NEXT: sbcs.w r2, r1, r3 927; CHECK-LE-NEXT: csetm r2, lt 928; CHECK-LE-NEXT: rsbs.w r3, r12, #0 929; CHECK-LE-NEXT: sbcs.w r3, r1, lr 930; CHECK-LE-NEXT: bfi r1, r2, #0, #1 931; CHECK-LE-NEXT: csetm r2, lt 932; CHECK-LE-NEXT: bfi r1, r2, #1, #1 933; CHECK-LE-NEXT: lsls r2, r1, #31 934; CHECK-LE-NEXT: it ne 935; CHECK-LE-NEXT: vstrne d0, [r0] 936; CHECK-LE-NEXT: lsls r1, r1, #30 937; CHECK-LE-NEXT: it mi 938; CHECK-LE-NEXT: vstrmi d1, [r0, #8] 939; CHECK-LE-NEXT: add sp, #4 940; CHECK-LE-NEXT: pop {r7, pc} 941; 942; CHECK-BE-LABEL: masked_v2i64: 943; CHECK-BE: @ %bb.0: @ %entry 944; CHECK-BE-NEXT: .save {r7, lr} 945; CHECK-BE-NEXT: push {r7, lr} 946; CHECK-BE-NEXT: .pad #4 947; CHECK-BE-NEXT: sub sp, #4 948; CHECK-BE-NEXT: vrev64.32 q1, q0 949; CHECK-BE-NEXT: movs r1, #0 950; CHECK-BE-NEXT: vmov r2, r3, d3 951; CHECK-BE-NEXT: vmov r12, lr, d2 952; CHECK-BE-NEXT: rsbs r3, r3, #0 953; CHECK-BE-NEXT: sbcs.w r2, r1, r2 954; CHECK-BE-NEXT: csetm r2, lt 955; CHECK-BE-NEXT: rsbs.w r3, lr, #0 956; CHECK-BE-NEXT: sbcs.w r3, r1, r12 957; CHECK-BE-NEXT: bfi r1, r2, #0, #1 958; CHECK-BE-NEXT: csetm r2, lt 959; CHECK-BE-NEXT: bfi r1, r2, #1, #1 960; CHECK-BE-NEXT: lsls r2, r1, #30 961; CHECK-BE-NEXT: it mi 962; CHECK-BE-NEXT: vstrmi d0, [r0] 963; CHECK-BE-NEXT: lsls r1, r1, #31 964; CHECK-BE-NEXT: it ne 965; CHECK-BE-NEXT: vstrne d1, [r0, #8] 966; CHECK-BE-NEXT: add sp, #4 967; CHECK-BE-NEXT: pop {r7, pc} 968entry: 969 %c = icmp sgt <2 x i64> %a, zeroinitializer 970 call void @llvm.masked.store.v2i64.p0(<2 x i64> %a, ptr %dest, i32 8, <2 x i1> %c) 971 ret void 972} 973 974define arm_aapcs_vfpcc void @masked_v2f64(ptr %dest, <2 x double> %a, <2 x i64> %b) { 975; CHECK-LE-LABEL: masked_v2f64: 976; CHECK-LE: @ %bb.0: @ %entry 977; CHECK-LE-NEXT: .save {r7, lr} 978; CHECK-LE-NEXT: push {r7, lr} 979; CHECK-LE-NEXT: .pad #4 980; CHECK-LE-NEXT: sub sp, #4 981; CHECK-LE-NEXT: vmov r2, r3, d2 982; CHECK-LE-NEXT: movs r1, #0 983; CHECK-LE-NEXT: vmov r12, lr, d3 984; CHECK-LE-NEXT: rsbs r2, r2, #0 985; CHECK-LE-NEXT: sbcs.w r2, r1, r3 986; CHECK-LE-NEXT: csetm r2, lt 987; CHECK-LE-NEXT: rsbs.w r3, r12, #0 988; CHECK-LE-NEXT: sbcs.w r3, r1, lr 989; CHECK-LE-NEXT: bfi r1, r2, #0, #1 990; CHECK-LE-NEXT: csetm r2, lt 991; CHECK-LE-NEXT: bfi r1, r2, #1, #1 992; CHECK-LE-NEXT: lsls r2, r1, #31 993; CHECK-LE-NEXT: it ne 994; CHECK-LE-NEXT: vstrne d0, [r0] 995; CHECK-LE-NEXT: lsls r1, r1, #30 996; CHECK-LE-NEXT: it mi 997; CHECK-LE-NEXT: vstrmi d1, [r0, #8] 998; CHECK-LE-NEXT: add sp, #4 999; CHECK-LE-NEXT: pop {r7, pc} 1000; 1001; CHECK-BE-LABEL: masked_v2f64: 1002; CHECK-BE: @ %bb.0: @ %entry 1003; CHECK-BE-NEXT: .save {r7, lr} 1004; CHECK-BE-NEXT: push {r7, lr} 1005; CHECK-BE-NEXT: .pad #4 1006; CHECK-BE-NEXT: sub sp, #4 1007; CHECK-BE-NEXT: vrev64.32 q2, q1 1008; CHECK-BE-NEXT: movs r1, #0 1009; CHECK-BE-NEXT: vmov r2, r3, d5 1010; CHECK-BE-NEXT: vmov r12, lr, d4 1011; CHECK-BE-NEXT: rsbs r3, r3, #0 1012; CHECK-BE-NEXT: sbcs.w r2, r1, r2 1013; CHECK-BE-NEXT: csetm r2, lt 1014; CHECK-BE-NEXT: rsbs.w r3, lr, #0 1015; CHECK-BE-NEXT: sbcs.w r3, r1, r12 1016; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1017; CHECK-BE-NEXT: csetm r2, lt 1018; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1019; CHECK-BE-NEXT: lsls r2, r1, #30 1020; CHECK-BE-NEXT: it mi 1021; CHECK-BE-NEXT: vstrmi d0, [r0] 1022; CHECK-BE-NEXT: lsls r1, r1, #31 1023; CHECK-BE-NEXT: it ne 1024; CHECK-BE-NEXT: vstrne d1, [r0, #8] 1025; CHECK-BE-NEXT: add sp, #4 1026; CHECK-BE-NEXT: pop {r7, pc} 1027entry: 1028 %c = icmp sgt <2 x i64> %b, zeroinitializer 1029 call void @llvm.masked.store.v2f64.p0(<2 x double> %a, ptr %dest, i32 8, <2 x i1> %c) 1030 ret void 1031} 1032 1033define arm_aapcs_vfpcc void @masked_v4i16(ptr %dest, <4 x i32> %a) { 1034; CHECK-LE-LABEL: masked_v4i16: 1035; CHECK-LE: @ %bb.0: @ %entry 1036; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1037; CHECK-LE-NEXT: vstrht.32 q0, [r0] 1038; CHECK-LE-NEXT: bx lr 1039; 1040; CHECK-BE-LABEL: masked_v4i16: 1041; CHECK-BE: @ %bb.0: @ %entry 1042; CHECK-BE-NEXT: vrev64.32 q1, q0 1043; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1044; CHECK-BE-NEXT: vstrht.32 q1, [r0] 1045; CHECK-BE-NEXT: bx lr 1046entry: 1047 %c = icmp sgt <4 x i32> %a, zeroinitializer 1048 %trunc = trunc <4 x i32> %a to <4 x i16> 1049 call void @llvm.masked.store.v4i16.p0(<4 x i16> %trunc, ptr %dest, i32 2, <4 x i1> %c) 1050 ret void 1051} 1052 1053define arm_aapcs_vfpcc void @masked_v4i8(ptr %dest, <4 x i32> %a) { 1054; CHECK-LE-LABEL: masked_v4i8: 1055; CHECK-LE: @ %bb.0: @ %entry 1056; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1057; CHECK-LE-NEXT: vstrbt.32 q0, [r0] 1058; CHECK-LE-NEXT: bx lr 1059; 1060; CHECK-BE-LABEL: masked_v4i8: 1061; CHECK-BE: @ %bb.0: @ %entry 1062; CHECK-BE-NEXT: vrev64.32 q1, q0 1063; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1064; CHECK-BE-NEXT: vstrbt.32 q1, [r0] 1065; CHECK-BE-NEXT: bx lr 1066entry: 1067 %c = icmp sgt <4 x i32> %a, zeroinitializer 1068 %trunc = trunc <4 x i32> %a to <4 x i8> 1069 call void @llvm.masked.store.v4i8.p0(<4 x i8> %trunc, ptr %dest, i32 1, <4 x i1> %c) 1070 ret void 1071} 1072 1073define arm_aapcs_vfpcc void @masked_v8i8(ptr %dest, <8 x i16> %a) { 1074; CHECK-LE-LABEL: masked_v8i8: 1075; CHECK-LE: @ %bb.0: @ %entry 1076; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1077; CHECK-LE-NEXT: vstrbt.16 q0, [r0] 1078; CHECK-LE-NEXT: bx lr 1079; 1080; CHECK-BE-LABEL: masked_v8i8: 1081; CHECK-BE: @ %bb.0: @ %entry 1082; CHECK-BE-NEXT: vrev64.16 q1, q0 1083; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1084; CHECK-BE-NEXT: vstrbt.16 q1, [r0] 1085; CHECK-BE-NEXT: bx lr 1086entry: 1087 %c = icmp sgt <8 x i16> %a, zeroinitializer 1088 %trunc = trunc <8 x i16> %a to <8 x i8> 1089 call void @llvm.masked.store.v8i8.p0(<8 x i8> %trunc, ptr %dest, i32 1, <8 x i1> %c) 1090 ret void 1091} 1092 1093define arm_aapcs_vfpcc void @masked_v4i16_align1(ptr %dest, <4 x i32> %a) { 1094; CHECK-LE-LABEL: masked_v4i16_align1: 1095; CHECK-LE: @ %bb.0: @ %entry 1096; CHECK-LE-NEXT: .pad #4 1097; CHECK-LE-NEXT: sub sp, #4 1098; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 1099; CHECK-LE-NEXT: vmrs r2, p0 1100; CHECK-LE-NEXT: and r1, r2, #1 1101; CHECK-LE-NEXT: rsbs r3, r1, #0 1102; CHECK-LE-NEXT: movs r1, #0 1103; CHECK-LE-NEXT: bfi r1, r3, #0, #1 1104; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 1105; CHECK-LE-NEXT: rsbs r3, r3, #0 1106; CHECK-LE-NEXT: bfi r1, r3, #1, #1 1107; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 1108; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 1109; CHECK-LE-NEXT: rsbs r3, r3, #0 1110; CHECK-LE-NEXT: bfi r1, r3, #2, #1 1111; CHECK-LE-NEXT: rsbs r2, r2, #0 1112; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1113; CHECK-LE-NEXT: lsls r2, r1, #31 1114; CHECK-LE-NEXT: itt ne 1115; CHECK-LE-NEXT: vmovne r2, s0 1116; CHECK-LE-NEXT: strhne r2, [r0] 1117; CHECK-LE-NEXT: lsls r2, r1, #30 1118; CHECK-LE-NEXT: itt mi 1119; CHECK-LE-NEXT: vmovmi r2, s1 1120; CHECK-LE-NEXT: strhmi r2, [r0, #2] 1121; CHECK-LE-NEXT: lsls r2, r1, #29 1122; CHECK-LE-NEXT: itt mi 1123; CHECK-LE-NEXT: vmovmi r2, s2 1124; CHECK-LE-NEXT: strhmi r2, [r0, #4] 1125; CHECK-LE-NEXT: lsls r1, r1, #28 1126; CHECK-LE-NEXT: itt mi 1127; CHECK-LE-NEXT: vmovmi r1, s3 1128; CHECK-LE-NEXT: strhmi r1, [r0, #6] 1129; CHECK-LE-NEXT: add sp, #4 1130; CHECK-LE-NEXT: bx lr 1131; 1132; CHECK-BE-LABEL: masked_v4i16_align1: 1133; CHECK-BE: @ %bb.0: @ %entry 1134; CHECK-BE-NEXT: .pad #4 1135; CHECK-BE-NEXT: sub sp, #4 1136; CHECK-BE-NEXT: vrev64.32 q1, q0 1137; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 1138; CHECK-BE-NEXT: vmrs r2, p0 1139; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 1140; CHECK-BE-NEXT: rsbs r3, r1, #0 1141; CHECK-BE-NEXT: movs r1, #0 1142; CHECK-BE-NEXT: bfi r1, r3, #0, #1 1143; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 1144; CHECK-BE-NEXT: rsbs r3, r3, #0 1145; CHECK-BE-NEXT: bfi r1, r3, #1, #1 1146; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 1147; CHECK-BE-NEXT: and r2, r2, #1 1148; CHECK-BE-NEXT: rsbs r3, r3, #0 1149; CHECK-BE-NEXT: bfi r1, r3, #2, #1 1150; CHECK-BE-NEXT: rsbs r2, r2, #0 1151; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1152; CHECK-BE-NEXT: lsls r2, r1, #28 1153; CHECK-BE-NEXT: itt mi 1154; CHECK-BE-NEXT: vmovmi r2, s4 1155; CHECK-BE-NEXT: strhmi r2, [r0] 1156; CHECK-BE-NEXT: lsls r2, r1, #29 1157; CHECK-BE-NEXT: itt mi 1158; CHECK-BE-NEXT: vmovmi r2, s5 1159; CHECK-BE-NEXT: strhmi r2, [r0, #2] 1160; CHECK-BE-NEXT: lsls r2, r1, #30 1161; CHECK-BE-NEXT: itt mi 1162; CHECK-BE-NEXT: vmovmi r2, s6 1163; CHECK-BE-NEXT: strhmi r2, [r0, #4] 1164; CHECK-BE-NEXT: lsls r1, r1, #31 1165; CHECK-BE-NEXT: itt ne 1166; CHECK-BE-NEXT: vmovne r1, s7 1167; CHECK-BE-NEXT: strhne r1, [r0, #6] 1168; CHECK-BE-NEXT: add sp, #4 1169; CHECK-BE-NEXT: bx lr 1170entry: 1171 %c = icmp sgt <4 x i32> %a, zeroinitializer 1172 %trunc = trunc <4 x i32> %a to <4 x i16> 1173 call void @llvm.masked.store.v4i16.p0(<4 x i16> %trunc, ptr %dest, i32 1, <4 x i1> %c) 1174 ret void 1175} 1176 1177define arm_aapcs_vfpcc void @masked_v4f16_align4(ptr %dest, <4 x float> %a) { 1178; CHECK-LE-LABEL: masked_v4f16_align4: 1179; CHECK-LE: @ %bb.0: @ %entry 1180; CHECK-LE-NEXT: .pad #4 1181; CHECK-LE-NEXT: sub sp, #4 1182; CHECK-LE-NEXT: vcmp.f32 s0, #0 1183; CHECK-LE-NEXT: movs r1, #0 1184; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1185; CHECK-LE-NEXT: vcmp.f32 s1, #0 1186; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 1187; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 1188; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 1189; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 1190; CHECK-LE-NEXT: csetm r2, gt 1191; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1192; CHECK-LE-NEXT: vcmp.f32 s2, #0 1193; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1194; CHECK-LE-NEXT: csetm r2, gt 1195; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1196; CHECK-LE-NEXT: vcmp.f32 s3, #0 1197; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1198; CHECK-LE-NEXT: csetm r2, gt 1199; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1200; CHECK-LE-NEXT: bfi r1, r2, #2, #1 1201; CHECK-LE-NEXT: csetm r2, gt 1202; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1203; CHECK-LE-NEXT: lsls r2, r1, #31 1204; CHECK-LE-NEXT: bne .LBB25_5 1205; CHECK-LE-NEXT: @ %bb.1: @ %else 1206; CHECK-LE-NEXT: lsls r2, r1, #30 1207; CHECK-LE-NEXT: bmi .LBB25_6 1208; CHECK-LE-NEXT: .LBB25_2: @ %else2 1209; CHECK-LE-NEXT: lsls r2, r1, #29 1210; CHECK-LE-NEXT: bmi .LBB25_7 1211; CHECK-LE-NEXT: .LBB25_3: @ %else4 1212; CHECK-LE-NEXT: lsls r1, r1, #28 1213; CHECK-LE-NEXT: bmi .LBB25_8 1214; CHECK-LE-NEXT: .LBB25_4: @ %else6 1215; CHECK-LE-NEXT: add sp, #4 1216; CHECK-LE-NEXT: bx lr 1217; CHECK-LE-NEXT: .LBB25_5: @ %cond.store 1218; CHECK-LE-NEXT: vstr.16 s4, [r0] 1219; CHECK-LE-NEXT: lsls r2, r1, #30 1220; CHECK-LE-NEXT: bpl .LBB25_2 1221; CHECK-LE-NEXT: .LBB25_6: @ %cond.store1 1222; CHECK-LE-NEXT: vmovx.f16 s0, s4 1223; CHECK-LE-NEXT: vstr.16 s0, [r0, #2] 1224; CHECK-LE-NEXT: lsls r2, r1, #29 1225; CHECK-LE-NEXT: bpl .LBB25_3 1226; CHECK-LE-NEXT: .LBB25_7: @ %cond.store3 1227; CHECK-LE-NEXT: vstr.16 s5, [r0, #4] 1228; CHECK-LE-NEXT: lsls r1, r1, #28 1229; CHECK-LE-NEXT: bpl .LBB25_4 1230; CHECK-LE-NEXT: .LBB25_8: @ %cond.store5 1231; CHECK-LE-NEXT: vmovx.f16 s0, s5 1232; CHECK-LE-NEXT: vstr.16 s0, [r0, #6] 1233; CHECK-LE-NEXT: add sp, #4 1234; CHECK-LE-NEXT: bx lr 1235; 1236; CHECK-BE-LABEL: masked_v4f16_align4: 1237; CHECK-BE: @ %bb.0: @ %entry 1238; CHECK-BE-NEXT: .pad #4 1239; CHECK-BE-NEXT: sub sp, #4 1240; CHECK-BE-NEXT: vrev64.32 q1, q0 1241; CHECK-BE-NEXT: movs r1, #0 1242; CHECK-BE-NEXT: vcmp.f32 s7, #0 1243; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 1244; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1245; CHECK-BE-NEXT: vcmp.f32 s6, #0 1246; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 1247; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 1248; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 1249; CHECK-BE-NEXT: csetm r2, gt 1250; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1251; CHECK-BE-NEXT: vcmp.f32 s5, #0 1252; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1253; CHECK-BE-NEXT: csetm r2, gt 1254; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1255; CHECK-BE-NEXT: vcmp.f32 s4, #0 1256; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1257; CHECK-BE-NEXT: csetm r2, gt 1258; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1259; CHECK-BE-NEXT: bfi r1, r2, #2, #1 1260; CHECK-BE-NEXT: csetm r2, gt 1261; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1262; CHECK-BE-NEXT: lsls r2, r1, #28 1263; CHECK-BE-NEXT: bmi .LBB25_5 1264; CHECK-BE-NEXT: @ %bb.1: @ %else 1265; CHECK-BE-NEXT: lsls r2, r1, #29 1266; CHECK-BE-NEXT: bmi .LBB25_6 1267; CHECK-BE-NEXT: .LBB25_2: @ %else2 1268; CHECK-BE-NEXT: lsls r2, r1, #30 1269; CHECK-BE-NEXT: bmi .LBB25_7 1270; CHECK-BE-NEXT: .LBB25_3: @ %else4 1271; CHECK-BE-NEXT: lsls r1, r1, #31 1272; CHECK-BE-NEXT: bne .LBB25_8 1273; CHECK-BE-NEXT: .LBB25_4: @ %else6 1274; CHECK-BE-NEXT: add sp, #4 1275; CHECK-BE-NEXT: bx lr 1276; CHECK-BE-NEXT: .LBB25_5: @ %cond.store 1277; CHECK-BE-NEXT: vstr.16 s0, [r0] 1278; CHECK-BE-NEXT: lsls r2, r1, #29 1279; CHECK-BE-NEXT: bpl .LBB25_2 1280; CHECK-BE-NEXT: .LBB25_6: @ %cond.store1 1281; CHECK-BE-NEXT: vmovx.f16 s0, s0 1282; CHECK-BE-NEXT: vstr.16 s0, [r0, #2] 1283; CHECK-BE-NEXT: lsls r2, r1, #30 1284; CHECK-BE-NEXT: bpl .LBB25_3 1285; CHECK-BE-NEXT: .LBB25_7: @ %cond.store3 1286; CHECK-BE-NEXT: vstr.16 s1, [r0, #4] 1287; CHECK-BE-NEXT: lsls r1, r1, #31 1288; CHECK-BE-NEXT: beq .LBB25_4 1289; CHECK-BE-NEXT: .LBB25_8: @ %cond.store5 1290; CHECK-BE-NEXT: vmovx.f16 s0, s1 1291; CHECK-BE-NEXT: vstr.16 s0, [r0, #6] 1292; CHECK-BE-NEXT: add sp, #4 1293; CHECK-BE-NEXT: bx lr 1294entry: 1295 %c = fcmp ogt <4 x float> %a, zeroinitializer 1296 %trunc = fptrunc <4 x float> %a to <4 x half> 1297 call void @llvm.masked.store.v4f16.p0(<4 x half> %trunc, ptr %dest, i32 4, <4 x i1> %c) 1298 ret void 1299} 1300 1301define arm_aapcs_vfpcc void @masked_v4f16_align2(ptr %dest, <4 x float> %a) { 1302; CHECK-LE-LABEL: masked_v4f16_align2: 1303; CHECK-LE: @ %bb.0: @ %entry 1304; CHECK-LE-NEXT: .pad #4 1305; CHECK-LE-NEXT: sub sp, #4 1306; CHECK-LE-NEXT: vcmp.f32 s0, #0 1307; CHECK-LE-NEXT: movs r1, #0 1308; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1309; CHECK-LE-NEXT: vcmp.f32 s1, #0 1310; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 1311; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 1312; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 1313; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 1314; CHECK-LE-NEXT: csetm r2, gt 1315; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1316; CHECK-LE-NEXT: vcmp.f32 s2, #0 1317; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1318; CHECK-LE-NEXT: csetm r2, gt 1319; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1320; CHECK-LE-NEXT: vcmp.f32 s3, #0 1321; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1322; CHECK-LE-NEXT: csetm r2, gt 1323; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1324; CHECK-LE-NEXT: bfi r1, r2, #2, #1 1325; CHECK-LE-NEXT: csetm r2, gt 1326; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1327; CHECK-LE-NEXT: lsls r2, r1, #31 1328; CHECK-LE-NEXT: bne .LBB26_5 1329; CHECK-LE-NEXT: @ %bb.1: @ %else 1330; CHECK-LE-NEXT: lsls r2, r1, #30 1331; CHECK-LE-NEXT: bmi .LBB26_6 1332; CHECK-LE-NEXT: .LBB26_2: @ %else2 1333; CHECK-LE-NEXT: lsls r2, r1, #29 1334; CHECK-LE-NEXT: bmi .LBB26_7 1335; CHECK-LE-NEXT: .LBB26_3: @ %else4 1336; CHECK-LE-NEXT: lsls r1, r1, #28 1337; CHECK-LE-NEXT: bmi .LBB26_8 1338; CHECK-LE-NEXT: .LBB26_4: @ %else6 1339; CHECK-LE-NEXT: add sp, #4 1340; CHECK-LE-NEXT: bx lr 1341; CHECK-LE-NEXT: .LBB26_5: @ %cond.store 1342; CHECK-LE-NEXT: vstr.16 s4, [r0] 1343; CHECK-LE-NEXT: lsls r2, r1, #30 1344; CHECK-LE-NEXT: bpl .LBB26_2 1345; CHECK-LE-NEXT: .LBB26_6: @ %cond.store1 1346; CHECK-LE-NEXT: vmovx.f16 s0, s4 1347; CHECK-LE-NEXT: vstr.16 s0, [r0, #2] 1348; CHECK-LE-NEXT: lsls r2, r1, #29 1349; CHECK-LE-NEXT: bpl .LBB26_3 1350; CHECK-LE-NEXT: .LBB26_7: @ %cond.store3 1351; CHECK-LE-NEXT: vstr.16 s5, [r0, #4] 1352; CHECK-LE-NEXT: lsls r1, r1, #28 1353; CHECK-LE-NEXT: bpl .LBB26_4 1354; CHECK-LE-NEXT: .LBB26_8: @ %cond.store5 1355; CHECK-LE-NEXT: vmovx.f16 s0, s5 1356; CHECK-LE-NEXT: vstr.16 s0, [r0, #6] 1357; CHECK-LE-NEXT: add sp, #4 1358; CHECK-LE-NEXT: bx lr 1359; 1360; CHECK-BE-LABEL: masked_v4f16_align2: 1361; CHECK-BE: @ %bb.0: @ %entry 1362; CHECK-BE-NEXT: .pad #4 1363; CHECK-BE-NEXT: sub sp, #4 1364; CHECK-BE-NEXT: vrev64.32 q1, q0 1365; CHECK-BE-NEXT: movs r1, #0 1366; CHECK-BE-NEXT: vcmp.f32 s7, #0 1367; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 1368; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1369; CHECK-BE-NEXT: vcmp.f32 s6, #0 1370; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 1371; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 1372; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 1373; CHECK-BE-NEXT: csetm r2, gt 1374; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1375; CHECK-BE-NEXT: vcmp.f32 s5, #0 1376; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1377; CHECK-BE-NEXT: csetm r2, gt 1378; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1379; CHECK-BE-NEXT: vcmp.f32 s4, #0 1380; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1381; CHECK-BE-NEXT: csetm r2, gt 1382; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1383; CHECK-BE-NEXT: bfi r1, r2, #2, #1 1384; CHECK-BE-NEXT: csetm r2, gt 1385; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1386; CHECK-BE-NEXT: lsls r2, r1, #28 1387; CHECK-BE-NEXT: bmi .LBB26_5 1388; CHECK-BE-NEXT: @ %bb.1: @ %else 1389; CHECK-BE-NEXT: lsls r2, r1, #29 1390; CHECK-BE-NEXT: bmi .LBB26_6 1391; CHECK-BE-NEXT: .LBB26_2: @ %else2 1392; CHECK-BE-NEXT: lsls r2, r1, #30 1393; CHECK-BE-NEXT: bmi .LBB26_7 1394; CHECK-BE-NEXT: .LBB26_3: @ %else4 1395; CHECK-BE-NEXT: lsls r1, r1, #31 1396; CHECK-BE-NEXT: bne .LBB26_8 1397; CHECK-BE-NEXT: .LBB26_4: @ %else6 1398; CHECK-BE-NEXT: add sp, #4 1399; CHECK-BE-NEXT: bx lr 1400; CHECK-BE-NEXT: .LBB26_5: @ %cond.store 1401; CHECK-BE-NEXT: vstr.16 s0, [r0] 1402; CHECK-BE-NEXT: lsls r2, r1, #29 1403; CHECK-BE-NEXT: bpl .LBB26_2 1404; CHECK-BE-NEXT: .LBB26_6: @ %cond.store1 1405; CHECK-BE-NEXT: vmovx.f16 s0, s0 1406; CHECK-BE-NEXT: vstr.16 s0, [r0, #2] 1407; CHECK-BE-NEXT: lsls r2, r1, #30 1408; CHECK-BE-NEXT: bpl .LBB26_3 1409; CHECK-BE-NEXT: .LBB26_7: @ %cond.store3 1410; CHECK-BE-NEXT: vstr.16 s1, [r0, #4] 1411; CHECK-BE-NEXT: lsls r1, r1, #31 1412; CHECK-BE-NEXT: beq .LBB26_4 1413; CHECK-BE-NEXT: .LBB26_8: @ %cond.store5 1414; CHECK-BE-NEXT: vmovx.f16 s0, s1 1415; CHECK-BE-NEXT: vstr.16 s0, [r0, #6] 1416; CHECK-BE-NEXT: add sp, #4 1417; CHECK-BE-NEXT: bx lr 1418entry: 1419 %c = fcmp ogt <4 x float> %a, zeroinitializer 1420 %trunc = fptrunc <4 x float> %a to <4 x half> 1421 call void @llvm.masked.store.v4f16.p0(<4 x half> %trunc, ptr %dest, i32 2, <4 x i1> %c) 1422 ret void 1423} 1424 1425define arm_aapcs_vfpcc void @masked_v4f16_align1(ptr %dest, <4 x float> %a) { 1426; CHECK-LE-LABEL: masked_v4f16_align1: 1427; CHECK-LE: @ %bb.0: @ %entry 1428; CHECK-LE-NEXT: .pad #20 1429; CHECK-LE-NEXT: sub sp, #20 1430; CHECK-LE-NEXT: vcmp.f32 s0, #0 1431; CHECK-LE-NEXT: movs r1, #0 1432; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1433; CHECK-LE-NEXT: vcmp.f32 s1, #0 1434; CHECK-LE-NEXT: vcvtb.f16.f32 s4, s0 1435; CHECK-LE-NEXT: vcvtb.f16.f32 s5, s2 1436; CHECK-LE-NEXT: vcvtt.f16.f32 s4, s1 1437; CHECK-LE-NEXT: vcvtt.f16.f32 s5, s3 1438; CHECK-LE-NEXT: csetm r2, gt 1439; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1440; CHECK-LE-NEXT: vcmp.f32 s2, #0 1441; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1442; CHECK-LE-NEXT: csetm r2, gt 1443; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1444; CHECK-LE-NEXT: vcmp.f32 s3, #0 1445; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1446; CHECK-LE-NEXT: csetm r2, gt 1447; CHECK-LE-NEXT: vmrs APSR_nzcv, fpscr 1448; CHECK-LE-NEXT: bfi r1, r2, #2, #1 1449; CHECK-LE-NEXT: csetm r2, gt 1450; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1451; CHECK-LE-NEXT: lsls r2, r1, #31 1452; CHECK-LE-NEXT: bne .LBB27_5 1453; CHECK-LE-NEXT: @ %bb.1: @ %else 1454; CHECK-LE-NEXT: lsls r2, r1, #30 1455; CHECK-LE-NEXT: bmi .LBB27_6 1456; CHECK-LE-NEXT: .LBB27_2: @ %else2 1457; CHECK-LE-NEXT: lsls r2, r1, #29 1458; CHECK-LE-NEXT: bmi .LBB27_7 1459; CHECK-LE-NEXT: .LBB27_3: @ %else4 1460; CHECK-LE-NEXT: lsls r1, r1, #28 1461; CHECK-LE-NEXT: bmi .LBB27_8 1462; CHECK-LE-NEXT: .LBB27_4: @ %else6 1463; CHECK-LE-NEXT: add sp, #20 1464; CHECK-LE-NEXT: bx lr 1465; CHECK-LE-NEXT: .LBB27_5: @ %cond.store 1466; CHECK-LE-NEXT: vstr.16 s4, [sp, #12] 1467; CHECK-LE-NEXT: ldrh.w r2, [sp, #12] 1468; CHECK-LE-NEXT: strh r2, [r0] 1469; CHECK-LE-NEXT: lsls r2, r1, #30 1470; CHECK-LE-NEXT: bpl .LBB27_2 1471; CHECK-LE-NEXT: .LBB27_6: @ %cond.store1 1472; CHECK-LE-NEXT: vmovx.f16 s0, s4 1473; CHECK-LE-NEXT: vstr.16 s0, [sp, #8] 1474; CHECK-LE-NEXT: ldrh.w r2, [sp, #8] 1475; CHECK-LE-NEXT: strh r2, [r0, #2] 1476; CHECK-LE-NEXT: lsls r2, r1, #29 1477; CHECK-LE-NEXT: bpl .LBB27_3 1478; CHECK-LE-NEXT: .LBB27_7: @ %cond.store3 1479; CHECK-LE-NEXT: vstr.16 s5, [sp, #4] 1480; CHECK-LE-NEXT: ldrh.w r2, [sp, #4] 1481; CHECK-LE-NEXT: strh r2, [r0, #4] 1482; CHECK-LE-NEXT: lsls r1, r1, #28 1483; CHECK-LE-NEXT: bpl .LBB27_4 1484; CHECK-LE-NEXT: .LBB27_8: @ %cond.store5 1485; CHECK-LE-NEXT: vmovx.f16 s0, s5 1486; CHECK-LE-NEXT: vstr.16 s0, [sp] 1487; CHECK-LE-NEXT: ldrh.w r1, [sp] 1488; CHECK-LE-NEXT: strh r1, [r0, #6] 1489; CHECK-LE-NEXT: add sp, #20 1490; CHECK-LE-NEXT: bx lr 1491; 1492; CHECK-BE-LABEL: masked_v4f16_align1: 1493; CHECK-BE: @ %bb.0: @ %entry 1494; CHECK-BE-NEXT: .pad #20 1495; CHECK-BE-NEXT: sub sp, #20 1496; CHECK-BE-NEXT: vrev64.32 q1, q0 1497; CHECK-BE-NEXT: movs r1, #0 1498; CHECK-BE-NEXT: vcmp.f32 s7, #0 1499; CHECK-BE-NEXT: vcvtb.f16.f32 s0, s4 1500; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1501; CHECK-BE-NEXT: vcmp.f32 s6, #0 1502; CHECK-BE-NEXT: vcvtb.f16.f32 s1, s6 1503; CHECK-BE-NEXT: vcvtt.f16.f32 s0, s5 1504; CHECK-BE-NEXT: vcvtt.f16.f32 s1, s7 1505; CHECK-BE-NEXT: csetm r2, gt 1506; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1507; CHECK-BE-NEXT: vcmp.f32 s5, #0 1508; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1509; CHECK-BE-NEXT: csetm r2, gt 1510; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1511; CHECK-BE-NEXT: vcmp.f32 s4, #0 1512; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1513; CHECK-BE-NEXT: csetm r2, gt 1514; CHECK-BE-NEXT: vmrs APSR_nzcv, fpscr 1515; CHECK-BE-NEXT: bfi r1, r2, #2, #1 1516; CHECK-BE-NEXT: csetm r2, gt 1517; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1518; CHECK-BE-NEXT: lsls r2, r1, #28 1519; CHECK-BE-NEXT: bmi .LBB27_5 1520; CHECK-BE-NEXT: @ %bb.1: @ %else 1521; CHECK-BE-NEXT: lsls r2, r1, #29 1522; CHECK-BE-NEXT: bmi .LBB27_6 1523; CHECK-BE-NEXT: .LBB27_2: @ %else2 1524; CHECK-BE-NEXT: lsls r2, r1, #30 1525; CHECK-BE-NEXT: bmi .LBB27_7 1526; CHECK-BE-NEXT: .LBB27_3: @ %else4 1527; CHECK-BE-NEXT: lsls r1, r1, #31 1528; CHECK-BE-NEXT: bne .LBB27_8 1529; CHECK-BE-NEXT: .LBB27_4: @ %else6 1530; CHECK-BE-NEXT: add sp, #20 1531; CHECK-BE-NEXT: bx lr 1532; CHECK-BE-NEXT: .LBB27_5: @ %cond.store 1533; CHECK-BE-NEXT: vstr.16 s0, [sp, #12] 1534; CHECK-BE-NEXT: ldrh.w r2, [sp, #12] 1535; CHECK-BE-NEXT: strh r2, [r0] 1536; CHECK-BE-NEXT: lsls r2, r1, #29 1537; CHECK-BE-NEXT: bpl .LBB27_2 1538; CHECK-BE-NEXT: .LBB27_6: @ %cond.store1 1539; CHECK-BE-NEXT: vmovx.f16 s0, s0 1540; CHECK-BE-NEXT: vstr.16 s0, [sp, #8] 1541; CHECK-BE-NEXT: ldrh.w r2, [sp, #8] 1542; CHECK-BE-NEXT: strh r2, [r0, #2] 1543; CHECK-BE-NEXT: lsls r2, r1, #30 1544; CHECK-BE-NEXT: bpl .LBB27_3 1545; CHECK-BE-NEXT: .LBB27_7: @ %cond.store3 1546; CHECK-BE-NEXT: vstr.16 s1, [sp, #4] 1547; CHECK-BE-NEXT: ldrh.w r2, [sp, #4] 1548; CHECK-BE-NEXT: strh r2, [r0, #4] 1549; CHECK-BE-NEXT: lsls r1, r1, #31 1550; CHECK-BE-NEXT: beq .LBB27_4 1551; CHECK-BE-NEXT: .LBB27_8: @ %cond.store5 1552; CHECK-BE-NEXT: vmovx.f16 s0, s1 1553; CHECK-BE-NEXT: vstr.16 s0, [sp] 1554; CHECK-BE-NEXT: ldrh.w r1, [sp] 1555; CHECK-BE-NEXT: strh r1, [r0, #6] 1556; CHECK-BE-NEXT: add sp, #20 1557; CHECK-BE-NEXT: bx lr 1558entry: 1559 %c = fcmp ogt <4 x float> %a, zeroinitializer 1560 %trunc = fptrunc <4 x float> %a to <4 x half> 1561 call void @llvm.masked.store.v4f16.p0(<4 x half> %trunc, ptr %dest, i32 1, <4 x i1> %c) 1562 ret void 1563} 1564 1565declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32, <4 x i1>) 1566declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) 1567declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) 1568declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) 1569declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) 1570declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) 1571declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) 1572declare void @llvm.masked.store.v4f16.p0(<4 x half>, ptr, i32, <4 x i1>) 1573declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32, <8 x i1>) 1574declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) 1575declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) 1576