1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-LE 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-BE 4 5define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_zero(ptr %dest, <4 x i32> %a) { 6; CHECK-LE-LABEL: masked_v4i32_align4_zero: 7; CHECK-LE: @ %bb.0: @ %entry 8; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 9; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] 10; CHECK-LE-NEXT: bx lr 11; 12; CHECK-BE-LABEL: masked_v4i32_align4_zero: 13; CHECK-BE: @ %bb.0: @ %entry 14; CHECK-BE-NEXT: vrev64.32 q1, q0 15; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 16; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] 17; CHECK-BE-NEXT: vrev64.32 q0, q1 18; CHECK-BE-NEXT: bx lr 19entry: 20 %c = icmp sgt <4 x i32> %a, zeroinitializer 21 %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> zeroinitializer) 22 ret <4 x i32> %l 23} 24 25define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_undef(ptr %dest, <4 x i32> %a) { 26; CHECK-LE-LABEL: masked_v4i32_align4_undef: 27; CHECK-LE: @ %bb.0: @ %entry 28; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 29; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] 30; CHECK-LE-NEXT: bx lr 31; 32; CHECK-BE-LABEL: masked_v4i32_align4_undef: 33; CHECK-BE: @ %bb.0: @ %entry 34; CHECK-BE-NEXT: vrev64.32 q1, q0 35; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 36; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] 37; CHECK-BE-NEXT: vrev64.32 q0, q1 38; CHECK-BE-NEXT: bx lr 39entry: 40 %c = icmp sgt <4 x i32> %a, zeroinitializer 41 %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> undef) 42 ret <4 x i32> %l 43} 44 45define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) { 46; CHECK-LE-LABEL: masked_v4i32_align1_undef: 47; CHECK-LE: @ %bb.0: @ %entry 48; CHECK-LE-NEXT: .pad #4 49; CHECK-LE-NEXT: sub sp, #4 50; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 51; CHECK-LE-NEXT: @ implicit-def: $q0 52; CHECK-LE-NEXT: vmrs r2, p0 53; CHECK-LE-NEXT: and r1, r2, #1 54; CHECK-LE-NEXT: rsbs r3, r1, #0 55; CHECK-LE-NEXT: movs r1, #0 56; CHECK-LE-NEXT: bfi r1, r3, #0, #1 57; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 58; CHECK-LE-NEXT: rsbs r3, r3, #0 59; CHECK-LE-NEXT: bfi r1, r3, #1, #1 60; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 61; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 62; CHECK-LE-NEXT: rsbs r3, r3, #0 63; CHECK-LE-NEXT: bfi r1, r3, #2, #1 64; CHECK-LE-NEXT: rsbs r2, r2, #0 65; CHECK-LE-NEXT: bfi r1, r2, #3, #1 66; CHECK-LE-NEXT: lsls r2, r1, #31 67; CHECK-LE-NEXT: itt ne 68; CHECK-LE-NEXT: ldrne r2, [r0] 69; CHECK-LE-NEXT: vmovne.32 q0[0], r2 70; CHECK-LE-NEXT: lsls r2, r1, #30 71; CHECK-LE-NEXT: itt mi 72; CHECK-LE-NEXT: ldrmi r2, [r0, #4] 73; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 74; CHECK-LE-NEXT: lsls r2, r1, #29 75; CHECK-LE-NEXT: itt mi 76; CHECK-LE-NEXT: ldrmi r2, [r0, #8] 77; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 78; CHECK-LE-NEXT: lsls r1, r1, #28 79; CHECK-LE-NEXT: itt mi 80; CHECK-LE-NEXT: ldrmi r0, [r0, #12] 81; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 82; CHECK-LE-NEXT: add sp, #4 83; CHECK-LE-NEXT: bx lr 84; 85; CHECK-BE-LABEL: masked_v4i32_align1_undef: 86; CHECK-BE: @ %bb.0: @ %entry 87; CHECK-BE-NEXT: .pad #4 88; CHECK-BE-NEXT: sub sp, #4 89; CHECK-BE-NEXT: vrev64.32 q1, q0 90; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 91; CHECK-BE-NEXT: @ implicit-def: $q1 92; CHECK-BE-NEXT: vmrs r2, p0 93; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 94; CHECK-BE-NEXT: rsbs r3, r1, #0 95; CHECK-BE-NEXT: movs r1, #0 96; CHECK-BE-NEXT: bfi r1, r3, #0, #1 97; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 98; CHECK-BE-NEXT: rsbs r3, r3, #0 99; CHECK-BE-NEXT: bfi r1, r3, #1, #1 100; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 101; CHECK-BE-NEXT: and r2, r2, #1 102; CHECK-BE-NEXT: rsbs r3, r3, #0 103; CHECK-BE-NEXT: bfi r1, r3, #2, #1 104; CHECK-BE-NEXT: rsbs r2, r2, #0 105; CHECK-BE-NEXT: bfi r1, r2, #3, #1 106; CHECK-BE-NEXT: lsls r2, r1, #28 107; CHECK-BE-NEXT: itt mi 108; CHECK-BE-NEXT: ldrmi r2, [r0] 109; CHECK-BE-NEXT: vmovmi.32 q1[0], r2 110; CHECK-BE-NEXT: lsls r2, r1, #29 111; CHECK-BE-NEXT: itt mi 112; CHECK-BE-NEXT: ldrmi r2, [r0, #4] 113; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 114; CHECK-BE-NEXT: lsls r2, r1, #30 115; CHECK-BE-NEXT: itt mi 116; CHECK-BE-NEXT: ldrmi r2, [r0, #8] 117; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 118; CHECK-BE-NEXT: lsls r1, r1, #31 119; CHECK-BE-NEXT: itt ne 120; CHECK-BE-NEXT: ldrne r0, [r0, #12] 121; CHECK-BE-NEXT: vmovne.32 q1[3], r0 122; CHECK-BE-NEXT: vrev64.32 q0, q1 123; CHECK-BE-NEXT: add sp, #4 124; CHECK-BE-NEXT: bx lr 125entry: 126 %c = icmp sgt <4 x i32> %a, zeroinitializer 127 %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i32> undef) 128 ret <4 x i32> %l 129} 130 131define arm_aapcs_vfpcc <4 x i32> @masked_v4i32_align4_other(ptr %dest, <4 x i32> %a) { 132; CHECK-LE-LABEL: masked_v4i32_align4_other: 133; CHECK-LE: @ %bb.0: @ %entry 134; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 135; CHECK-LE-NEXT: vldrwt.u32 q1, [r0] 136; CHECK-LE-NEXT: vpsel q0, q1, q0 137; CHECK-LE-NEXT: bx lr 138; 139; CHECK-BE-LABEL: masked_v4i32_align4_other: 140; CHECK-BE: @ %bb.0: @ %entry 141; CHECK-BE-NEXT: vrev64.32 q1, q0 142; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 143; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] 144; CHECK-BE-NEXT: vpsel q1, q0, q1 145; CHECK-BE-NEXT: vrev64.32 q0, q1 146; CHECK-BE-NEXT: bx lr 147entry: 148 %c = icmp sgt <4 x i32> %a, zeroinitializer 149 %l = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x i32> %a) 150 ret <4 x i32> %l 151} 152 153define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_zero(ptr %dest, <4 x i32> %a) { 154; CHECK-LE-LABEL: zext16_masked_v4i32_align2_zero: 155; CHECK-LE: @ %bb.0: @ %entry 156; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 157; CHECK-LE-NEXT: vldrht.u32 q0, [r0] 158; CHECK-LE-NEXT: bx lr 159; 160; CHECK-BE-LABEL: zext16_masked_v4i32_align2_zero: 161; CHECK-BE: @ %bb.0: @ %entry 162; CHECK-BE-NEXT: vrev64.32 q1, q0 163; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 164; CHECK-BE-NEXT: vldrht.u32 q1, [r0] 165; CHECK-BE-NEXT: vrev64.32 q0, q1 166; CHECK-BE-NEXT: bx lr 167entry: 168 %c = icmp sgt <4 x i32> %a, zeroinitializer 169 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer) 170 %ext = zext <4 x i16> %l to <4 x i32> 171 ret <4 x i32> %ext 172} 173 174define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_undef(ptr %dest, <4 x i32> %a) { 175; CHECK-LE-LABEL: zext16_masked_v4i32_align2_undef: 176; CHECK-LE: @ %bb.0: @ %entry 177; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 178; CHECK-LE-NEXT: vldrht.u32 q0, [r0] 179; CHECK-LE-NEXT: bx lr 180; 181; CHECK-BE-LABEL: zext16_masked_v4i32_align2_undef: 182; CHECK-BE: @ %bb.0: @ %entry 183; CHECK-BE-NEXT: vrev64.32 q1, q0 184; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 185; CHECK-BE-NEXT: vldrht.u32 q1, [r0] 186; CHECK-BE-NEXT: vrev64.32 q0, q1 187; CHECK-BE-NEXT: bx lr 188entry: 189 %c = icmp sgt <4 x i32> %a, zeroinitializer 190 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> undef) 191 %ext = zext <4 x i16> %l to <4 x i32> 192 ret <4 x i32> %ext 193} 194 195define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) { 196; CHECK-LE-LABEL: zext16_masked_v4i32_align1_undef: 197; CHECK-LE: @ %bb.0: @ %entry 198; CHECK-LE-NEXT: .pad #4 199; CHECK-LE-NEXT: sub sp, #4 200; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 201; CHECK-LE-NEXT: @ implicit-def: $q0 202; CHECK-LE-NEXT: vmrs r2, p0 203; CHECK-LE-NEXT: and r1, r2, #1 204; CHECK-LE-NEXT: rsbs r3, r1, #0 205; CHECK-LE-NEXT: movs r1, #0 206; CHECK-LE-NEXT: bfi r1, r3, #0, #1 207; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 208; CHECK-LE-NEXT: rsbs r3, r3, #0 209; CHECK-LE-NEXT: bfi r1, r3, #1, #1 210; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 211; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 212; CHECK-LE-NEXT: rsbs r3, r3, #0 213; CHECK-LE-NEXT: bfi r1, r3, #2, #1 214; CHECK-LE-NEXT: rsbs r2, r2, #0 215; CHECK-LE-NEXT: bfi r1, r2, #3, #1 216; CHECK-LE-NEXT: lsls r2, r1, #31 217; CHECK-LE-NEXT: itt ne 218; CHECK-LE-NEXT: ldrhne r2, [r0] 219; CHECK-LE-NEXT: vmovne.32 q0[0], r2 220; CHECK-LE-NEXT: lsls r2, r1, #30 221; CHECK-LE-NEXT: itt mi 222; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] 223; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 224; CHECK-LE-NEXT: lsls r2, r1, #29 225; CHECK-LE-NEXT: itt mi 226; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] 227; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 228; CHECK-LE-NEXT: lsls r1, r1, #28 229; CHECK-LE-NEXT: itt mi 230; CHECK-LE-NEXT: ldrhmi r0, [r0, #6] 231; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 232; CHECK-LE-NEXT: vmovlb.s16 q0, q0 233; CHECK-LE-NEXT: add sp, #4 234; CHECK-LE-NEXT: bx lr 235; 236; CHECK-BE-LABEL: zext16_masked_v4i32_align1_undef: 237; CHECK-BE: @ %bb.0: @ %entry 238; CHECK-BE-NEXT: .pad #4 239; CHECK-BE-NEXT: sub sp, #4 240; CHECK-BE-NEXT: vrev64.32 q1, q0 241; CHECK-BE-NEXT: @ implicit-def: $q0 242; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 243; CHECK-BE-NEXT: vmrs r2, p0 244; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 245; CHECK-BE-NEXT: rsbs r3, r1, #0 246; CHECK-BE-NEXT: movs r1, #0 247; CHECK-BE-NEXT: bfi r1, r3, #0, #1 248; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 249; CHECK-BE-NEXT: rsbs r3, r3, #0 250; CHECK-BE-NEXT: bfi r1, r3, #1, #1 251; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 252; CHECK-BE-NEXT: and r2, r2, #1 253; CHECK-BE-NEXT: rsbs r3, r3, #0 254; CHECK-BE-NEXT: bfi r1, r3, #2, #1 255; CHECK-BE-NEXT: rsbs r2, r2, #0 256; CHECK-BE-NEXT: bfi r1, r2, #3, #1 257; CHECK-BE-NEXT: lsls r2, r1, #28 258; CHECK-BE-NEXT: itt mi 259; CHECK-BE-NEXT: ldrhmi r2, [r0] 260; CHECK-BE-NEXT: vmovmi.32 q0[0], r2 261; CHECK-BE-NEXT: lsls r2, r1, #29 262; CHECK-BE-NEXT: itt mi 263; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] 264; CHECK-BE-NEXT: vmovmi.32 q0[1], r2 265; CHECK-BE-NEXT: lsls r2, r1, #30 266; CHECK-BE-NEXT: itt mi 267; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] 268; CHECK-BE-NEXT: vmovmi.32 q0[2], r2 269; CHECK-BE-NEXT: lsls r1, r1, #31 270; CHECK-BE-NEXT: itt ne 271; CHECK-BE-NEXT: ldrhne r0, [r0, #6] 272; CHECK-BE-NEXT: vmovne.32 q0[3], r0 273; CHECK-BE-NEXT: vmovlb.s16 q1, q0 274; CHECK-BE-NEXT: vrev64.32 q0, q1 275; CHECK-BE-NEXT: add sp, #4 276; CHECK-BE-NEXT: bx lr 277entry: 278 %c = icmp sgt <4 x i32> %a, zeroinitializer 279 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> undef) 280 %ext = sext <4 x i16> %l to <4 x i32> 281 ret <4 x i32> %ext 282} 283 284define arm_aapcs_vfpcc <4 x i32> @zext16_masked_v4i32_align2_other(ptr %dest, <4 x i16> %a) { 285; CHECK-LE-LABEL: zext16_masked_v4i32_align2_other: 286; CHECK-LE: @ %bb.0: @ %entry 287; CHECK-LE-NEXT: vmovlb.u16 q1, q0 288; CHECK-LE-NEXT: vmovlb.s16 q0, q0 289; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 290; CHECK-LE-NEXT: vldrht.u32 q0, [r0] 291; CHECK-LE-NEXT: vpsel q0, q0, q1 292; CHECK-LE-NEXT: bx lr 293; 294; CHECK-BE-LABEL: zext16_masked_v4i32_align2_other: 295; CHECK-BE: @ %bb.0: @ %entry 296; CHECK-BE-NEXT: vrev64.32 q1, q0 297; CHECK-BE-NEXT: vmovlb.u16 q0, q1 298; CHECK-BE-NEXT: vmovlb.s16 q1, q1 299; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 300; CHECK-BE-NEXT: vldrht.u32 q1, [r0] 301; CHECK-BE-NEXT: vpsel q1, q1, q0 302; CHECK-BE-NEXT: vrev64.32 q0, q1 303; CHECK-BE-NEXT: bx lr 304entry: 305 %c = icmp sgt <4 x i16> %a, zeroinitializer 306 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> %a) 307 %ext = zext <4 x i16> %l to <4 x i32> 308 ret <4 x i32> %ext 309} 310 311define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_zero(ptr %dest, <4 x i32> %a) { 312; CHECK-LE-LABEL: sext16_masked_v4i32_align2_zero: 313; CHECK-LE: @ %bb.0: @ %entry 314; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 315; CHECK-LE-NEXT: vldrht.s32 q0, [r0] 316; CHECK-LE-NEXT: bx lr 317; 318; CHECK-BE-LABEL: sext16_masked_v4i32_align2_zero: 319; CHECK-BE: @ %bb.0: @ %entry 320; CHECK-BE-NEXT: vrev64.32 q1, q0 321; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 322; CHECK-BE-NEXT: vldrht.s32 q1, [r0] 323; CHECK-BE-NEXT: vrev64.32 q0, q1 324; CHECK-BE-NEXT: bx lr 325entry: 326 %c = icmp sgt <4 x i32> %a, zeroinitializer 327 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer) 328 %sext = sext <4 x i16> %l to <4 x i32> 329 ret <4 x i32> %sext 330} 331 332define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_undef(ptr %dest, <4 x i32> %a) { 333; CHECK-LE-LABEL: sext16_masked_v4i32_align2_undef: 334; CHECK-LE: @ %bb.0: @ %entry 335; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 336; CHECK-LE-NEXT: vldrht.s32 q0, [r0] 337; CHECK-LE-NEXT: bx lr 338; 339; CHECK-BE-LABEL: sext16_masked_v4i32_align2_undef: 340; CHECK-BE: @ %bb.0: @ %entry 341; CHECK-BE-NEXT: vrev64.32 q1, q0 342; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 343; CHECK-BE-NEXT: vldrht.s32 q1, [r0] 344; CHECK-BE-NEXT: vrev64.32 q0, q1 345; CHECK-BE-NEXT: bx lr 346entry: 347 %c = icmp sgt <4 x i32> %a, zeroinitializer 348 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> undef) 349 %sext = sext <4 x i16> %l to <4 x i32> 350 ret <4 x i32> %sext 351} 352 353define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align1_undef(ptr %dest, <4 x i32> %a) { 354; CHECK-LE-LABEL: sext16_masked_v4i32_align1_undef: 355; CHECK-LE: @ %bb.0: @ %entry 356; CHECK-LE-NEXT: .pad #4 357; CHECK-LE-NEXT: sub sp, #4 358; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 359; CHECK-LE-NEXT: @ implicit-def: $q0 360; CHECK-LE-NEXT: vmrs r2, p0 361; CHECK-LE-NEXT: and r1, r2, #1 362; CHECK-LE-NEXT: rsbs r3, r1, #0 363; CHECK-LE-NEXT: movs r1, #0 364; CHECK-LE-NEXT: bfi r1, r3, #0, #1 365; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 366; CHECK-LE-NEXT: rsbs r3, r3, #0 367; CHECK-LE-NEXT: bfi r1, r3, #1, #1 368; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 369; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 370; CHECK-LE-NEXT: rsbs r3, r3, #0 371; CHECK-LE-NEXT: bfi r1, r3, #2, #1 372; CHECK-LE-NEXT: rsbs r2, r2, #0 373; CHECK-LE-NEXT: bfi r1, r2, #3, #1 374; CHECK-LE-NEXT: lsls r2, r1, #31 375; CHECK-LE-NEXT: itt ne 376; CHECK-LE-NEXT: ldrhne r2, [r0] 377; CHECK-LE-NEXT: vmovne.32 q0[0], r2 378; CHECK-LE-NEXT: lsls r2, r1, #30 379; CHECK-LE-NEXT: itt mi 380; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] 381; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 382; CHECK-LE-NEXT: lsls r2, r1, #29 383; CHECK-LE-NEXT: itt mi 384; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] 385; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 386; CHECK-LE-NEXT: lsls r1, r1, #28 387; CHECK-LE-NEXT: itt mi 388; CHECK-LE-NEXT: ldrhmi r0, [r0, #6] 389; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 390; CHECK-LE-NEXT: vmovlb.s16 q0, q0 391; CHECK-LE-NEXT: add sp, #4 392; CHECK-LE-NEXT: bx lr 393; 394; CHECK-BE-LABEL: sext16_masked_v4i32_align1_undef: 395; CHECK-BE: @ %bb.0: @ %entry 396; CHECK-BE-NEXT: .pad #4 397; CHECK-BE-NEXT: sub sp, #4 398; CHECK-BE-NEXT: vrev64.32 q1, q0 399; CHECK-BE-NEXT: @ implicit-def: $q0 400; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 401; CHECK-BE-NEXT: vmrs r2, p0 402; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 403; CHECK-BE-NEXT: rsbs r3, r1, #0 404; CHECK-BE-NEXT: movs r1, #0 405; CHECK-BE-NEXT: bfi r1, r3, #0, #1 406; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 407; CHECK-BE-NEXT: rsbs r3, r3, #0 408; CHECK-BE-NEXT: bfi r1, r3, #1, #1 409; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 410; CHECK-BE-NEXT: and r2, r2, #1 411; CHECK-BE-NEXT: rsbs r3, r3, #0 412; CHECK-BE-NEXT: bfi r1, r3, #2, #1 413; CHECK-BE-NEXT: rsbs r2, r2, #0 414; CHECK-BE-NEXT: bfi r1, r2, #3, #1 415; CHECK-BE-NEXT: lsls r2, r1, #28 416; CHECK-BE-NEXT: itt mi 417; CHECK-BE-NEXT: ldrhmi r2, [r0] 418; CHECK-BE-NEXT: vmovmi.32 q0[0], r2 419; CHECK-BE-NEXT: lsls r2, r1, #29 420; CHECK-BE-NEXT: itt mi 421; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] 422; CHECK-BE-NEXT: vmovmi.32 q0[1], r2 423; CHECK-BE-NEXT: lsls r2, r1, #30 424; CHECK-BE-NEXT: itt mi 425; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] 426; CHECK-BE-NEXT: vmovmi.32 q0[2], r2 427; CHECK-BE-NEXT: lsls r1, r1, #31 428; CHECK-BE-NEXT: itt ne 429; CHECK-BE-NEXT: ldrhne r0, [r0, #6] 430; CHECK-BE-NEXT: vmovne.32 q0[3], r0 431; CHECK-BE-NEXT: vmovlb.s16 q1, q0 432; CHECK-BE-NEXT: vrev64.32 q0, q1 433; CHECK-BE-NEXT: add sp, #4 434; CHECK-BE-NEXT: bx lr 435entry: 436 %c = icmp sgt <4 x i32> %a, zeroinitializer 437 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> undef) 438 %sext = sext <4 x i16> %l to <4 x i32> 439 ret <4 x i32> %sext 440} 441 442define arm_aapcs_vfpcc <4 x i32> @sext16_masked_v4i32_align2_other(ptr %dest, <4 x i16> %a) { 443; CHECK-LE-LABEL: sext16_masked_v4i32_align2_other: 444; CHECK-LE: @ %bb.0: @ %entry 445; CHECK-LE-NEXT: vmovlb.s16 q0, q0 446; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 447; CHECK-LE-NEXT: vldrht.s32 q1, [r0] 448; CHECK-LE-NEXT: vpsel q0, q1, q0 449; CHECK-LE-NEXT: bx lr 450; 451; CHECK-BE-LABEL: sext16_masked_v4i32_align2_other: 452; CHECK-BE: @ %bb.0: @ %entry 453; CHECK-BE-NEXT: vrev64.32 q1, q0 454; CHECK-BE-NEXT: vmovlb.s16 q0, q1 455; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 456; CHECK-BE-NEXT: vldrht.s32 q1, [r0] 457; CHECK-BE-NEXT: vpsel q1, q1, q0 458; CHECK-BE-NEXT: vrev64.32 q0, q1 459; CHECK-BE-NEXT: bx lr 460entry: 461 %c = icmp sgt <4 x i16> %a, zeroinitializer 462 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> %a) 463 %sext = sext <4 x i16> %l to <4 x i32> 464 ret <4 x i32> %sext 465} 466 467define arm_aapcs_vfpcc ptr @masked_v4i32_preinc(ptr %x, ptr %y, <4 x i32> %a) { 468; CHECK-LE-LABEL: masked_v4i32_preinc: 469; CHECK-LE: @ %bb.0: @ %entry 470; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 471; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! 472; CHECK-LE-NEXT: vstrw.32 q0, [r1] 473; CHECK-LE-NEXT: bx lr 474; 475; CHECK-BE-LABEL: masked_v4i32_preinc: 476; CHECK-BE: @ %bb.0: @ %entry 477; CHECK-BE-NEXT: vrev64.32 q1, q0 478; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 479; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! 480; CHECK-BE-NEXT: vstrw.32 q0, [r1] 481; CHECK-BE-NEXT: bx lr 482entry: 483 %z = getelementptr inbounds i8, ptr %x, i32 4 484 %c = icmp sgt <4 x i32> %a, zeroinitializer 485 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x i32> undef) 486 store <4 x i32> %0, ptr %y, align 4 487 ret ptr %z 488} 489 490define arm_aapcs_vfpcc ptr @masked_v4i32_postinc(ptr %x, ptr %y, <4 x i32> %a) { 491; CHECK-LE-LABEL: masked_v4i32_postinc: 492; CHECK-LE: @ %bb.0: @ %entry 493; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 494; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 495; CHECK-LE-NEXT: vstrw.32 q0, [r1] 496; CHECK-LE-NEXT: bx lr 497; 498; CHECK-BE-LABEL: masked_v4i32_postinc: 499; CHECK-BE: @ %bb.0: @ %entry 500; CHECK-BE-NEXT: vrev64.32 q1, q0 501; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 502; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 503; CHECK-BE-NEXT: vstrw.32 q0, [r1] 504; CHECK-BE-NEXT: bx lr 505entry: 506 %z = getelementptr inbounds i8, ptr %x, i32 4 507 %c = icmp sgt <4 x i32> %a, zeroinitializer 508 %0 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %x, i32 4, <4 x i1> %c, <4 x i32> undef) 509 store <4 x i32> %0, ptr %y, align 4 510 ret ptr %z 511} 512 513define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_zero(ptr %dest, <8 x i16> %a) { 514; CHECK-LE-LABEL: masked_v8i16_align4_zero: 515; CHECK-LE: @ %bb.0: @ %entry 516; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 517; CHECK-LE-NEXT: vldrht.u16 q0, [r0] 518; CHECK-LE-NEXT: bx lr 519; 520; CHECK-BE-LABEL: masked_v8i16_align4_zero: 521; CHECK-BE: @ %bb.0: @ %entry 522; CHECK-BE-NEXT: vrev64.16 q1, q0 523; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 524; CHECK-BE-NEXT: vldrht.u16 q1, [r0] 525; CHECK-BE-NEXT: vrev64.16 q0, q1 526; CHECK-BE-NEXT: bx lr 527entry: 528 %c = icmp sgt <8 x i16> %a, zeroinitializer 529 %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> zeroinitializer) 530 ret <8 x i16> %l 531} 532 533define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align2_undef(ptr %dest, <8 x i16> %a) { 534; CHECK-LE-LABEL: masked_v8i16_align2_undef: 535; CHECK-LE: @ %bb.0: @ %entry 536; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 537; CHECK-LE-NEXT: vldrht.u16 q0, [r0] 538; CHECK-LE-NEXT: bx lr 539; 540; CHECK-BE-LABEL: masked_v8i16_align2_undef: 541; CHECK-BE: @ %bb.0: @ %entry 542; CHECK-BE-NEXT: vrev64.16 q1, q0 543; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 544; CHECK-BE-NEXT: vldrht.u16 q1, [r0] 545; CHECK-BE-NEXT: vrev64.16 q0, q1 546; CHECK-BE-NEXT: bx lr 547entry: 548 %c = icmp sgt <8 x i16> %a, zeroinitializer 549 %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> undef) 550 ret <8 x i16> %l 551} 552 553define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align1_undef(ptr %dest, <8 x i16> %a) { 554; CHECK-LE-LABEL: masked_v8i16_align1_undef: 555; CHECK-LE: @ %bb.0: @ %entry 556; CHECK-LE-NEXT: .pad #4 557; CHECK-LE-NEXT: sub sp, #4 558; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr 559; CHECK-LE-NEXT: @ implicit-def: $q0 560; CHECK-LE-NEXT: vmrs r1, p0 561; CHECK-LE-NEXT: and r2, r1, #1 562; CHECK-LE-NEXT: rsbs r3, r2, #0 563; CHECK-LE-NEXT: movs r2, #0 564; CHECK-LE-NEXT: bfi r2, r3, #0, #1 565; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 566; CHECK-LE-NEXT: rsbs r3, r3, #0 567; CHECK-LE-NEXT: bfi r2, r3, #1, #1 568; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 569; CHECK-LE-NEXT: rsbs r3, r3, #0 570; CHECK-LE-NEXT: bfi r2, r3, #2, #1 571; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 572; CHECK-LE-NEXT: rsbs r3, r3, #0 573; CHECK-LE-NEXT: bfi r2, r3, #3, #1 574; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 575; CHECK-LE-NEXT: rsbs r3, r3, #0 576; CHECK-LE-NEXT: bfi r2, r3, #4, #1 577; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 578; CHECK-LE-NEXT: rsbs r3, r3, #0 579; CHECK-LE-NEXT: bfi r2, r3, #5, #1 580; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 581; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 582; CHECK-LE-NEXT: rsbs r3, r3, #0 583; CHECK-LE-NEXT: bfi r2, r3, #6, #1 584; CHECK-LE-NEXT: rsbs r1, r1, #0 585; CHECK-LE-NEXT: bfi r2, r1, #7, #1 586; CHECK-LE-NEXT: uxtb r1, r2 587; CHECK-LE-NEXT: lsls r2, r2, #31 588; CHECK-LE-NEXT: itt ne 589; CHECK-LE-NEXT: ldrhne r2, [r0] 590; CHECK-LE-NEXT: vmovne.16 q0[0], r2 591; CHECK-LE-NEXT: lsls r2, r1, #30 592; CHECK-LE-NEXT: itt mi 593; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] 594; CHECK-LE-NEXT: vmovmi.16 q0[1], r2 595; CHECK-LE-NEXT: lsls r2, r1, #29 596; CHECK-LE-NEXT: itt mi 597; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] 598; CHECK-LE-NEXT: vmovmi.16 q0[2], r2 599; CHECK-LE-NEXT: lsls r2, r1, #28 600; CHECK-LE-NEXT: itt mi 601; CHECK-LE-NEXT: ldrhmi r2, [r0, #6] 602; CHECK-LE-NEXT: vmovmi.16 q0[3], r2 603; CHECK-LE-NEXT: lsls r2, r1, #27 604; CHECK-LE-NEXT: itt mi 605; CHECK-LE-NEXT: ldrhmi r2, [r0, #8] 606; CHECK-LE-NEXT: vmovmi.16 q0[4], r2 607; CHECK-LE-NEXT: lsls r2, r1, #26 608; CHECK-LE-NEXT: itt mi 609; CHECK-LE-NEXT: ldrhmi r2, [r0, #10] 610; CHECK-LE-NEXT: vmovmi.16 q0[5], r2 611; CHECK-LE-NEXT: lsls r2, r1, #25 612; CHECK-LE-NEXT: itt mi 613; CHECK-LE-NEXT: ldrhmi r2, [r0, #12] 614; CHECK-LE-NEXT: vmovmi.16 q0[6], r2 615; CHECK-LE-NEXT: lsls r1, r1, #24 616; CHECK-LE-NEXT: itt mi 617; CHECK-LE-NEXT: ldrhmi r0, [r0, #14] 618; CHECK-LE-NEXT: vmovmi.16 q0[7], r0 619; CHECK-LE-NEXT: add sp, #4 620; CHECK-LE-NEXT: bx lr 621; 622; CHECK-BE-LABEL: masked_v8i16_align1_undef: 623; CHECK-BE: @ %bb.0: @ %entry 624; CHECK-BE-NEXT: .pad #4 625; CHECK-BE-NEXT: sub sp, #4 626; CHECK-BE-NEXT: vrev64.16 q1, q0 627; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr 628; CHECK-BE-NEXT: @ implicit-def: $q1 629; CHECK-BE-NEXT: vmrs r1, p0 630; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 631; CHECK-BE-NEXT: rsbs r3, r2, #0 632; CHECK-BE-NEXT: movs r2, #0 633; CHECK-BE-NEXT: bfi r2, r3, #0, #1 634; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 635; CHECK-BE-NEXT: rsbs r3, r3, #0 636; CHECK-BE-NEXT: bfi r2, r3, #1, #1 637; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 638; CHECK-BE-NEXT: rsbs r3, r3, #0 639; CHECK-BE-NEXT: bfi r2, r3, #2, #1 640; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 641; CHECK-BE-NEXT: rsbs r3, r3, #0 642; CHECK-BE-NEXT: bfi r2, r3, #3, #1 643; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 644; CHECK-BE-NEXT: rsbs r3, r3, #0 645; CHECK-BE-NEXT: bfi r2, r3, #4, #1 646; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 647; CHECK-BE-NEXT: rsbs r3, r3, #0 648; CHECK-BE-NEXT: bfi r2, r3, #5, #1 649; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 650; CHECK-BE-NEXT: and r1, r1, #1 651; CHECK-BE-NEXT: rsbs r3, r3, #0 652; CHECK-BE-NEXT: bfi r2, r3, #6, #1 653; CHECK-BE-NEXT: rsbs r1, r1, #0 654; CHECK-BE-NEXT: bfi r2, r1, #7, #1 655; CHECK-BE-NEXT: uxtb r1, r2 656; CHECK-BE-NEXT: lsls r2, r2, #24 657; CHECK-BE-NEXT: itt mi 658; CHECK-BE-NEXT: ldrhmi r2, [r0] 659; CHECK-BE-NEXT: vmovmi.16 q1[0], r2 660; CHECK-BE-NEXT: lsls r2, r1, #25 661; CHECK-BE-NEXT: itt mi 662; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] 663; CHECK-BE-NEXT: vmovmi.16 q1[1], r2 664; CHECK-BE-NEXT: lsls r2, r1, #26 665; CHECK-BE-NEXT: itt mi 666; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] 667; CHECK-BE-NEXT: vmovmi.16 q1[2], r2 668; CHECK-BE-NEXT: lsls r2, r1, #27 669; CHECK-BE-NEXT: itt mi 670; CHECK-BE-NEXT: ldrhmi r2, [r0, #6] 671; CHECK-BE-NEXT: vmovmi.16 q1[3], r2 672; CHECK-BE-NEXT: lsls r2, r1, #28 673; CHECK-BE-NEXT: itt mi 674; CHECK-BE-NEXT: ldrhmi r2, [r0, #8] 675; CHECK-BE-NEXT: vmovmi.16 q1[4], r2 676; CHECK-BE-NEXT: lsls r2, r1, #29 677; CHECK-BE-NEXT: itt mi 678; CHECK-BE-NEXT: ldrhmi r2, [r0, #10] 679; CHECK-BE-NEXT: vmovmi.16 q1[5], r2 680; CHECK-BE-NEXT: lsls r2, r1, #30 681; CHECK-BE-NEXT: itt mi 682; CHECK-BE-NEXT: ldrhmi r2, [r0, #12] 683; CHECK-BE-NEXT: vmovmi.16 q1[6], r2 684; CHECK-BE-NEXT: lsls r1, r1, #31 685; CHECK-BE-NEXT: itt ne 686; CHECK-BE-NEXT: ldrhne r0, [r0, #14] 687; CHECK-BE-NEXT: vmovne.16 q1[7], r0 688; CHECK-BE-NEXT: vrev64.16 q0, q1 689; CHECK-BE-NEXT: add sp, #4 690; CHECK-BE-NEXT: bx lr 691entry: 692 %c = icmp sgt <8 x i16> %a, zeroinitializer 693 %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i16> undef) 694 ret <8 x i16> %l 695} 696 697define arm_aapcs_vfpcc <8 x i16> @masked_v8i16_align4_other(ptr %dest, <8 x i16> %a) { 698; CHECK-LE-LABEL: masked_v8i16_align4_other: 699; CHECK-LE: @ %bb.0: @ %entry 700; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 701; CHECK-LE-NEXT: vldrht.u16 q1, [r0] 702; CHECK-LE-NEXT: vpsel q0, q1, q0 703; CHECK-LE-NEXT: bx lr 704; 705; CHECK-BE-LABEL: masked_v8i16_align4_other: 706; CHECK-BE: @ %bb.0: @ %entry 707; CHECK-BE-NEXT: vrev64.16 q1, q0 708; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 709; CHECK-BE-NEXT: vldrht.u16 q0, [r0] 710; CHECK-BE-NEXT: vpsel q1, q0, q1 711; CHECK-BE-NEXT: vrev64.16 q0, q1 712; CHECK-BE-NEXT: bx lr 713entry: 714 %c = icmp sgt <8 x i16> %a, zeroinitializer 715 %l = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x i16> %a) 716 ret <8 x i16> %l 717} 718 719define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_zero(ptr %dest, <8 x i8> %a) { 720; CHECK-LE-LABEL: sext8_masked_v8i16_align1_zero: 721; CHECK-LE: @ %bb.0: @ %entry 722; CHECK-LE-NEXT: vmovlb.s8 q0, q0 723; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 724; CHECK-LE-NEXT: vldrbt.s16 q0, [r0] 725; CHECK-LE-NEXT: bx lr 726; 727; CHECK-BE-LABEL: sext8_masked_v8i16_align1_zero: 728; CHECK-BE: @ %bb.0: @ %entry 729; CHECK-BE-NEXT: vrev64.16 q1, q0 730; CHECK-BE-NEXT: vmovlb.s8 q0, q1 731; CHECK-BE-NEXT: vpt.s16 gt, q0, zr 732; CHECK-BE-NEXT: vldrbt.s16 q1, [r0] 733; CHECK-BE-NEXT: vrev64.16 q0, q1 734; CHECK-BE-NEXT: bx lr 735entry: 736 %c = icmp sgt <8 x i8> %a, zeroinitializer 737 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer) 738 %ext = sext <8 x i8> %l to <8 x i16> 739 ret <8 x i16> %ext 740} 741 742define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_undef(ptr %dest, <8 x i8> %a) { 743; CHECK-LE-LABEL: sext8_masked_v8i16_align1_undef: 744; CHECK-LE: @ %bb.0: @ %entry 745; CHECK-LE-NEXT: vmovlb.s8 q0, q0 746; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 747; CHECK-LE-NEXT: vldrbt.s16 q0, [r0] 748; CHECK-LE-NEXT: bx lr 749; 750; CHECK-BE-LABEL: sext8_masked_v8i16_align1_undef: 751; CHECK-BE: @ %bb.0: @ %entry 752; CHECK-BE-NEXT: vrev64.16 q1, q0 753; CHECK-BE-NEXT: vmovlb.s8 q0, q1 754; CHECK-BE-NEXT: vpt.s16 gt, q0, zr 755; CHECK-BE-NEXT: vldrbt.s16 q1, [r0] 756; CHECK-BE-NEXT: vrev64.16 q0, q1 757; CHECK-BE-NEXT: bx lr 758entry: 759 %c = icmp sgt <8 x i8> %a, zeroinitializer 760 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> undef) 761 %ext = sext <8 x i8> %l to <8 x i16> 762 ret <8 x i16> %ext 763} 764 765define arm_aapcs_vfpcc <8 x i16> @sext8_masked_v8i16_align1_other(ptr %dest, <8 x i8> %a) { 766; CHECK-LE-LABEL: sext8_masked_v8i16_align1_other: 767; CHECK-LE: @ %bb.0: @ %entry 768; CHECK-LE-NEXT: vmovlb.s8 q0, q0 769; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 770; CHECK-LE-NEXT: vldrbt.s16 q1, [r0] 771; CHECK-LE-NEXT: vpsel q0, q1, q0 772; CHECK-LE-NEXT: bx lr 773; 774; CHECK-BE-LABEL: sext8_masked_v8i16_align1_other: 775; CHECK-BE: @ %bb.0: @ %entry 776; CHECK-BE-NEXT: vrev64.16 q1, q0 777; CHECK-BE-NEXT: vmovlb.s8 q0, q1 778; CHECK-BE-NEXT: vpt.s16 gt, q0, zr 779; CHECK-BE-NEXT: vldrbt.s16 q1, [r0] 780; CHECK-BE-NEXT: vpsel q1, q1, q0 781; CHECK-BE-NEXT: vrev64.16 q0, q1 782; CHECK-BE-NEXT: bx lr 783entry: 784 %c = icmp sgt <8 x i8> %a, zeroinitializer 785 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> %a) 786 %ext = sext <8 x i8> %l to <8 x i16> 787 ret <8 x i16> %ext 788} 789 790define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_zero(ptr %dest, <4 x i8> %a) { 791; CHECK-LE-LABEL: sext8_masked_v4i32_align1_zero: 792; CHECK-LE: @ %bb.0: @ %entry 793; CHECK-LE-NEXT: vmovlb.s8 q0, q0 794; CHECK-LE-NEXT: vmovlb.s16 q0, q0 795; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 796; CHECK-LE-NEXT: vldrbt.s32 q0, [r0] 797; CHECK-LE-NEXT: bx lr 798; 799; CHECK-BE-LABEL: sext8_masked_v4i32_align1_zero: 800; CHECK-BE: @ %bb.0: @ %entry 801; CHECK-BE-NEXT: vrev64.32 q1, q0 802; CHECK-BE-NEXT: vmovlb.s8 q0, q1 803; CHECK-BE-NEXT: vmovlb.s16 q0, q0 804; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 805; CHECK-BE-NEXT: vldrbt.s32 q1, [r0] 806; CHECK-BE-NEXT: vrev64.32 q0, q1 807; CHECK-BE-NEXT: bx lr 808entry: 809 %c = icmp sgt <4 x i8> %a, zeroinitializer 810 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer) 811 %ext = sext <4 x i8> %l to <4 x i32> 812 ret <4 x i32> %ext 813} 814 815define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_undef(ptr %dest, <4 x i8> %a) { 816; CHECK-LE-LABEL: sext8_masked_v4i32_align1_undef: 817; CHECK-LE: @ %bb.0: @ %entry 818; CHECK-LE-NEXT: vmovlb.s8 q0, q0 819; CHECK-LE-NEXT: vmovlb.s16 q0, q0 820; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 821; CHECK-LE-NEXT: vldrbt.s32 q0, [r0] 822; CHECK-LE-NEXT: bx lr 823; 824; CHECK-BE-LABEL: sext8_masked_v4i32_align1_undef: 825; CHECK-BE: @ %bb.0: @ %entry 826; CHECK-BE-NEXT: vrev64.32 q1, q0 827; CHECK-BE-NEXT: vmovlb.s8 q0, q1 828; CHECK-BE-NEXT: vmovlb.s16 q0, q0 829; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 830; CHECK-BE-NEXT: vldrbt.s32 q1, [r0] 831; CHECK-BE-NEXT: vrev64.32 q0, q1 832; CHECK-BE-NEXT: bx lr 833entry: 834 %c = icmp sgt <4 x i8> %a, zeroinitializer 835 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> undef) 836 %ext = sext <4 x i8> %l to <4 x i32> 837 ret <4 x i32> %ext 838} 839 840define arm_aapcs_vfpcc <4 x i32> @sext8_masked_v4i32_align1_other(ptr %dest, <4 x i8> %a) { 841; CHECK-LE-LABEL: sext8_masked_v4i32_align1_other: 842; CHECK-LE: @ %bb.0: @ %entry 843; CHECK-LE-NEXT: vmovlb.s8 q0, q0 844; CHECK-LE-NEXT: vmovlb.s16 q0, q0 845; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 846; CHECK-LE-NEXT: vldrbt.s32 q1, [r0] 847; CHECK-LE-NEXT: vpsel q0, q1, q0 848; CHECK-LE-NEXT: bx lr 849; 850; CHECK-BE-LABEL: sext8_masked_v4i32_align1_other: 851; CHECK-BE: @ %bb.0: @ %entry 852; CHECK-BE-NEXT: vrev64.32 q1, q0 853; CHECK-BE-NEXT: vmovlb.s8 q0, q1 854; CHECK-BE-NEXT: vmovlb.s16 q0, q0 855; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 856; CHECK-BE-NEXT: vldrbt.s32 q1, [r0] 857; CHECK-BE-NEXT: vpsel q1, q1, q0 858; CHECK-BE-NEXT: vrev64.32 q0, q1 859; CHECK-BE-NEXT: bx lr 860entry: 861 %c = icmp sgt <4 x i8> %a, zeroinitializer 862 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> %a) 863 %ext = sext <4 x i8> %l to <4 x i32> 864 ret <4 x i32> %ext 865} 866 867define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_zero(ptr %dest, <4 x i8> %a) { 868; CHECK-LE-LABEL: zext8_masked_v4i32_align1_zero: 869; CHECK-LE: @ %bb.0: @ %entry 870; CHECK-LE-NEXT: vmovlb.s8 q0, q0 871; CHECK-LE-NEXT: vmovlb.s16 q0, q0 872; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 873; CHECK-LE-NEXT: vldrbt.u32 q0, [r0] 874; CHECK-LE-NEXT: bx lr 875; 876; CHECK-BE-LABEL: zext8_masked_v4i32_align1_zero: 877; CHECK-BE: @ %bb.0: @ %entry 878; CHECK-BE-NEXT: vrev64.32 q1, q0 879; CHECK-BE-NEXT: vmovlb.s8 q0, q1 880; CHECK-BE-NEXT: vmovlb.s16 q0, q0 881; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 882; CHECK-BE-NEXT: vldrbt.u32 q1, [r0] 883; CHECK-BE-NEXT: vrev64.32 q0, q1 884; CHECK-BE-NEXT: bx lr 885entry: 886 %c = icmp sgt <4 x i8> %a, zeroinitializer 887 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer) 888 %ext = zext <4 x i8> %l to <4 x i32> 889 ret <4 x i32> %ext 890} 891 892define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_undef(ptr %dest, <4 x i8> %a) { 893; CHECK-LE-LABEL: zext8_masked_v4i32_align1_undef: 894; CHECK-LE: @ %bb.0: @ %entry 895; CHECK-LE-NEXT: vmovlb.s8 q0, q0 896; CHECK-LE-NEXT: vmovlb.s16 q0, q0 897; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 898; CHECK-LE-NEXT: vldrbt.u32 q0, [r0] 899; CHECK-LE-NEXT: bx lr 900; 901; CHECK-BE-LABEL: zext8_masked_v4i32_align1_undef: 902; CHECK-BE: @ %bb.0: @ %entry 903; CHECK-BE-NEXT: vrev64.32 q1, q0 904; CHECK-BE-NEXT: vmovlb.s8 q0, q1 905; CHECK-BE-NEXT: vmovlb.s16 q0, q0 906; CHECK-BE-NEXT: vpt.s32 gt, q0, zr 907; CHECK-BE-NEXT: vldrbt.u32 q1, [r0] 908; CHECK-BE-NEXT: vrev64.32 q0, q1 909; CHECK-BE-NEXT: bx lr 910entry: 911 %c = icmp sgt <4 x i8> %a, zeroinitializer 912 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> undef) 913 %ext = zext <4 x i8> %l to <4 x i32> 914 ret <4 x i32> %ext 915} 916 917define arm_aapcs_vfpcc <4 x i32> @zext8_masked_v4i32_align1_other(ptr %dest, <4 x i8> %a) { 918; CHECK-LE-LABEL: zext8_masked_v4i32_align1_other: 919; CHECK-LE: @ %bb.0: @ %entry 920; CHECK-LE-NEXT: vmov.i32 q1, #0xff 921; CHECK-LE-NEXT: vand q1, q0, q1 922; CHECK-LE-NEXT: vmovlb.s8 q0, q0 923; CHECK-LE-NEXT: vmovlb.s16 q0, q0 924; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 925; CHECK-LE-NEXT: vldrbt.u32 q0, [r0] 926; CHECK-LE-NEXT: vpsel q0, q0, q1 927; CHECK-LE-NEXT: bx lr 928; 929; CHECK-BE-LABEL: zext8_masked_v4i32_align1_other: 930; CHECK-BE: @ %bb.0: @ %entry 931; CHECK-BE-NEXT: vmov.i32 q1, #0xff 932; CHECK-BE-NEXT: vrev64.32 q2, q0 933; CHECK-BE-NEXT: vand q0, q2, q1 934; CHECK-BE-NEXT: vmovlb.s8 q1, q2 935; CHECK-BE-NEXT: vmovlb.s16 q1, q1 936; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 937; CHECK-BE-NEXT: vldrbt.u32 q1, [r0] 938; CHECK-BE-NEXT: vpsel q1, q1, q0 939; CHECK-BE-NEXT: vrev64.32 q0, q1 940; CHECK-BE-NEXT: bx lr 941entry: 942 %c = icmp sgt <4 x i8> %a, zeroinitializer 943 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> %a) 944 %ext = zext <4 x i8> %l to <4 x i32> 945 ret <4 x i32> %ext 946} 947 948define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_zero(ptr %dest, <8 x i8> %a) { 949; CHECK-LE-LABEL: zext8_masked_v8i16_align1_zero: 950; CHECK-LE: @ %bb.0: @ %entry 951; CHECK-LE-NEXT: vmovlb.s8 q0, q0 952; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 953; CHECK-LE-NEXT: vldrbt.u16 q0, [r0] 954; CHECK-LE-NEXT: bx lr 955; 956; CHECK-BE-LABEL: zext8_masked_v8i16_align1_zero: 957; CHECK-BE: @ %bb.0: @ %entry 958; CHECK-BE-NEXT: vrev64.16 q1, q0 959; CHECK-BE-NEXT: vmovlb.s8 q0, q1 960; CHECK-BE-NEXT: vpt.s16 gt, q0, zr 961; CHECK-BE-NEXT: vldrbt.u16 q1, [r0] 962; CHECK-BE-NEXT: vrev64.16 q0, q1 963; CHECK-BE-NEXT: bx lr 964entry: 965 %c = icmp sgt <8 x i8> %a, zeroinitializer 966 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer) 967 %ext = zext <8 x i8> %l to <8 x i16> 968 ret <8 x i16> %ext 969} 970 971define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_undef(ptr %dest, <8 x i8> %a) { 972; CHECK-LE-LABEL: zext8_masked_v8i16_align1_undef: 973; CHECK-LE: @ %bb.0: @ %entry 974; CHECK-LE-NEXT: vmovlb.s8 q0, q0 975; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 976; CHECK-LE-NEXT: vldrbt.u16 q0, [r0] 977; CHECK-LE-NEXT: bx lr 978; 979; CHECK-BE-LABEL: zext8_masked_v8i16_align1_undef: 980; CHECK-BE: @ %bb.0: @ %entry 981; CHECK-BE-NEXT: vrev64.16 q1, q0 982; CHECK-BE-NEXT: vmovlb.s8 q0, q1 983; CHECK-BE-NEXT: vpt.s16 gt, q0, zr 984; CHECK-BE-NEXT: vldrbt.u16 q1, [r0] 985; CHECK-BE-NEXT: vrev64.16 q0, q1 986; CHECK-BE-NEXT: bx lr 987entry: 988 %c = icmp sgt <8 x i8> %a, zeroinitializer 989 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> undef) 990 %ext = zext <8 x i8> %l to <8 x i16> 991 ret <8 x i16> %ext 992} 993 994define arm_aapcs_vfpcc <8 x i16> @zext8_masked_v8i16_align1_other(ptr %dest, <8 x i8> %a) { 995; CHECK-LE-LABEL: zext8_masked_v8i16_align1_other: 996; CHECK-LE: @ %bb.0: @ %entry 997; CHECK-LE-NEXT: vmovlb.u8 q1, q0 998; CHECK-LE-NEXT: vmovlb.s8 q0, q0 999; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1000; CHECK-LE-NEXT: vldrbt.u16 q0, [r0] 1001; CHECK-LE-NEXT: vpsel q0, q0, q1 1002; CHECK-LE-NEXT: bx lr 1003; 1004; CHECK-BE-LABEL: zext8_masked_v8i16_align1_other: 1005; CHECK-BE: @ %bb.0: @ %entry 1006; CHECK-BE-NEXT: vrev64.16 q1, q0 1007; CHECK-BE-NEXT: vmovlb.u8 q0, q1 1008; CHECK-BE-NEXT: vmovlb.s8 q1, q1 1009; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1010; CHECK-BE-NEXT: vldrbt.u16 q1, [r0] 1011; CHECK-BE-NEXT: vpsel q1, q1, q0 1012; CHECK-BE-NEXT: vrev64.16 q0, q1 1013; CHECK-BE-NEXT: bx lr 1014entry: 1015 %c = icmp sgt <8 x i8> %a, zeroinitializer 1016 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> %a) 1017 %ext = zext <8 x i8> %l to <8 x i16> 1018 ret <8 x i16> %ext 1019} 1020 1021define ptr @masked_v8i16_preinc(ptr %x, ptr %y, <8 x i16> %a) { 1022; CHECK-LE-LABEL: masked_v8i16_preinc: 1023; CHECK-LE: @ %bb.0: @ %entry 1024; CHECK-LE-NEXT: vldr d1, [sp] 1025; CHECK-LE-NEXT: vmov d0, r2, r3 1026; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1027; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! 1028; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1029; CHECK-LE-NEXT: bx lr 1030; 1031; CHECK-BE-LABEL: masked_v8i16_preinc: 1032; CHECK-BE: @ %bb.0: @ %entry 1033; CHECK-BE-NEXT: vldr d1, [sp] 1034; CHECK-BE-NEXT: vmov d0, r3, r2 1035; CHECK-BE-NEXT: vrev64.16 q1, q0 1036; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1037; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! 1038; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1039; CHECK-BE-NEXT: bx lr 1040entry: 1041 %z = getelementptr inbounds i8, ptr %x, i32 4 1042 %c = icmp sgt <8 x i16> %a, zeroinitializer 1043 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %z, i32 4, <8 x i1> %c, <8 x i16> undef) 1044 store <8 x i16> %0, ptr %y, align 4 1045 ret ptr %z 1046} 1047 1048define arm_aapcs_vfpcc ptr @masked_v8i16_postinc(ptr %x, ptr %y, <8 x i16> %a) { 1049; CHECK-LE-LABEL: masked_v8i16_postinc: 1050; CHECK-LE: @ %bb.0: @ %entry 1051; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1052; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 1053; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1054; CHECK-LE-NEXT: bx lr 1055; 1056; CHECK-BE-LABEL: masked_v8i16_postinc: 1057; CHECK-BE: @ %bb.0: @ %entry 1058; CHECK-BE-NEXT: vrev64.16 q1, q0 1059; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1060; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 1061; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1062; CHECK-BE-NEXT: bx lr 1063entry: 1064 %z = getelementptr inbounds i8, ptr %x, i32 4 1065 %c = icmp sgt <8 x i16> %a, zeroinitializer 1066 %0 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %x, i32 4, <8 x i1> %c, <8 x i16> undef) 1067 store <8 x i16> %0, ptr %y, align 4 1068 ret ptr %z 1069} 1070 1071 1072define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_zero(ptr %dest, <16 x i8> %a) { 1073; CHECK-LE-LABEL: masked_v16i8_align4_zero: 1074; CHECK-LE: @ %bb.0: @ %entry 1075; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 1076; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] 1077; CHECK-LE-NEXT: bx lr 1078; 1079; CHECK-BE-LABEL: masked_v16i8_align4_zero: 1080; CHECK-BE: @ %bb.0: @ %entry 1081; CHECK-BE-NEXT: vrev64.8 q1, q0 1082; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 1083; CHECK-BE-NEXT: vldrbt.u8 q1, [r0] 1084; CHECK-BE-NEXT: vrev64.8 q0, q1 1085; CHECK-BE-NEXT: bx lr 1086entry: 1087 %c = icmp sgt <16 x i8> %a, zeroinitializer 1088 %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> zeroinitializer) 1089 ret <16 x i8> %l 1090} 1091 1092define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_undef(ptr %dest, <16 x i8> %a) { 1093; CHECK-LE-LABEL: masked_v16i8_align4_undef: 1094; CHECK-LE: @ %bb.0: @ %entry 1095; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 1096; CHECK-LE-NEXT: vldrbt.u8 q0, [r0] 1097; CHECK-LE-NEXT: bx lr 1098; 1099; CHECK-BE-LABEL: masked_v16i8_align4_undef: 1100; CHECK-BE: @ %bb.0: @ %entry 1101; CHECK-BE-NEXT: vrev64.8 q1, q0 1102; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 1103; CHECK-BE-NEXT: vldrbt.u8 q1, [r0] 1104; CHECK-BE-NEXT: vrev64.8 q0, q1 1105; CHECK-BE-NEXT: bx lr 1106entry: 1107 %c = icmp sgt <16 x i8> %a, zeroinitializer 1108 %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> undef) 1109 ret <16 x i8> %l 1110} 1111 1112define arm_aapcs_vfpcc <16 x i8> @masked_v16i8_align4_other(ptr %dest, <16 x i8> %a) { 1113; CHECK-LE-LABEL: masked_v16i8_align4_other: 1114; CHECK-LE: @ %bb.0: @ %entry 1115; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 1116; CHECK-LE-NEXT: vldrbt.u8 q1, [r0] 1117; CHECK-LE-NEXT: vpsel q0, q1, q0 1118; CHECK-LE-NEXT: bx lr 1119; 1120; CHECK-BE-LABEL: masked_v16i8_align4_other: 1121; CHECK-BE: @ %bb.0: @ %entry 1122; CHECK-BE-NEXT: vrev64.8 q1, q0 1123; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 1124; CHECK-BE-NEXT: vldrbt.u8 q0, [r0] 1125; CHECK-BE-NEXT: vpsel q1, q0, q1 1126; CHECK-BE-NEXT: vrev64.8 q0, q1 1127; CHECK-BE-NEXT: bx lr 1128entry: 1129 %c = icmp sgt <16 x i8> %a, zeroinitializer 1130 %l = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %dest, i32 1, <16 x i1> %c, <16 x i8> %a) 1131 ret <16 x i8> %l 1132} 1133 1134define arm_aapcs_vfpcc ptr @masked_v16i8_preinc(ptr %x, ptr %y, <16 x i8> %a) { 1135; CHECK-LE-LABEL: masked_v16i8_preinc: 1136; CHECK-LE: @ %bb.0: @ %entry 1137; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 1138; CHECK-LE-NEXT: vldrbt.u8 q0, [r0, #4]! 1139; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1140; CHECK-LE-NEXT: bx lr 1141; 1142; CHECK-BE-LABEL: masked_v16i8_preinc: 1143; CHECK-BE: @ %bb.0: @ %entry 1144; CHECK-BE-NEXT: vrev64.8 q1, q0 1145; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 1146; CHECK-BE-NEXT: vldrbt.u8 q0, [r0, #4]! 1147; CHECK-BE-NEXT: vstrb.8 q0, [r1] 1148; CHECK-BE-NEXT: bx lr 1149entry: 1150 %z = getelementptr inbounds i8, ptr %x, i32 4 1151 %c = icmp sgt <16 x i8> %a, zeroinitializer 1152 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %z, i32 4, <16 x i1> %c, <16 x i8> undef) 1153 store <16 x i8> %0, ptr %y, align 4 1154 ret ptr %z 1155} 1156 1157define arm_aapcs_vfpcc ptr @masked_v16i8_postinc(ptr %x, ptr %y, <16 x i8> %a) { 1158; CHECK-LE-LABEL: masked_v16i8_postinc: 1159; CHECK-LE: @ %bb.0: @ %entry 1160; CHECK-LE-NEXT: vpt.s8 gt, q0, zr 1161; CHECK-LE-NEXT: vldrbt.u8 q0, [r0], #4 1162; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1163; CHECK-LE-NEXT: bx lr 1164; 1165; CHECK-BE-LABEL: masked_v16i8_postinc: 1166; CHECK-BE: @ %bb.0: @ %entry 1167; CHECK-BE-NEXT: vrev64.8 q1, q0 1168; CHECK-BE-NEXT: vpt.s8 gt, q1, zr 1169; CHECK-BE-NEXT: vldrbt.u8 q0, [r0], #4 1170; CHECK-BE-NEXT: vstrb.8 q0, [r1] 1171; CHECK-BE-NEXT: bx lr 1172entry: 1173 %z = getelementptr inbounds i8, ptr %x, i32 4 1174 %c = icmp sgt <16 x i8> %a, zeroinitializer 1175 %0 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %x, i32 4, <16 x i1> %c, <16 x i8> undef) 1176 store <16 x i8> %0, ptr %y, align 4 1177 ret ptr %z 1178} 1179 1180 1181define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_zero(ptr %dest, <4 x i32> %a) { 1182; CHECK-LE-LABEL: masked_v4f32_align4_zero: 1183; CHECK-LE: @ %bb.0: @ %entry 1184; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1185; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] 1186; CHECK-LE-NEXT: bx lr 1187; 1188; CHECK-BE-LABEL: masked_v4f32_align4_zero: 1189; CHECK-BE: @ %bb.0: @ %entry 1190; CHECK-BE-NEXT: vrev64.32 q1, q0 1191; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1192; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] 1193; CHECK-BE-NEXT: vrev64.32 q0, q1 1194; CHECK-BE-NEXT: bx lr 1195entry: 1196 %c = icmp sgt <4 x i32> %a, zeroinitializer 1197 %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> zeroinitializer) 1198 ret <4 x float> %l 1199} 1200 1201define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_undef(ptr %dest, <4 x i32> %a) { 1202; CHECK-LE-LABEL: masked_v4f32_align4_undef: 1203; CHECK-LE: @ %bb.0: @ %entry 1204; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1205; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] 1206; CHECK-LE-NEXT: bx lr 1207; 1208; CHECK-BE-LABEL: masked_v4f32_align4_undef: 1209; CHECK-BE: @ %bb.0: @ %entry 1210; CHECK-BE-NEXT: vrev64.32 q1, q0 1211; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1212; CHECK-BE-NEXT: vldrwt.u32 q1, [r0] 1213; CHECK-BE-NEXT: vrev64.32 q0, q1 1214; CHECK-BE-NEXT: bx lr 1215entry: 1216 %c = icmp sgt <4 x i32> %a, zeroinitializer 1217 %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> undef) 1218 ret <4 x float> %l 1219} 1220 1221define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align1_undef(ptr %dest, <4 x i32> %a) { 1222; CHECK-LE-LABEL: masked_v4f32_align1_undef: 1223; CHECK-LE: @ %bb.0: @ %entry 1224; CHECK-LE-NEXT: .pad #4 1225; CHECK-LE-NEXT: sub sp, #4 1226; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 1227; CHECK-LE-NEXT: @ implicit-def: $q0 1228; CHECK-LE-NEXT: vmrs r2, p0 1229; CHECK-LE-NEXT: and r1, r2, #1 1230; CHECK-LE-NEXT: rsbs r3, r1, #0 1231; CHECK-LE-NEXT: movs r1, #0 1232; CHECK-LE-NEXT: bfi r1, r3, #0, #1 1233; CHECK-LE-NEXT: ubfx r3, r2, #4, #1 1234; CHECK-LE-NEXT: rsbs r3, r3, #0 1235; CHECK-LE-NEXT: bfi r1, r3, #1, #1 1236; CHECK-LE-NEXT: ubfx r3, r2, #8, #1 1237; CHECK-LE-NEXT: ubfx r2, r2, #12, #1 1238; CHECK-LE-NEXT: rsbs r3, r3, #0 1239; CHECK-LE-NEXT: bfi r1, r3, #2, #1 1240; CHECK-LE-NEXT: rsbs r2, r2, #0 1241; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1242; CHECK-LE-NEXT: lsls r2, r1, #31 1243; CHECK-LE-NEXT: itt ne 1244; CHECK-LE-NEXT: ldrne r2, [r0] 1245; CHECK-LE-NEXT: vmovne s0, r2 1246; CHECK-LE-NEXT: lsls r2, r1, #30 1247; CHECK-LE-NEXT: itt mi 1248; CHECK-LE-NEXT: ldrmi r2, [r0, #4] 1249; CHECK-LE-NEXT: vmovmi s1, r2 1250; CHECK-LE-NEXT: lsls r2, r1, #29 1251; CHECK-LE-NEXT: itt mi 1252; CHECK-LE-NEXT: ldrmi r2, [r0, #8] 1253; CHECK-LE-NEXT: vmovmi s2, r2 1254; CHECK-LE-NEXT: lsls r1, r1, #28 1255; CHECK-LE-NEXT: itt mi 1256; CHECK-LE-NEXT: ldrmi r0, [r0, #12] 1257; CHECK-LE-NEXT: vmovmi s3, r0 1258; CHECK-LE-NEXT: add sp, #4 1259; CHECK-LE-NEXT: bx lr 1260; 1261; CHECK-BE-LABEL: masked_v4f32_align1_undef: 1262; CHECK-BE: @ %bb.0: @ %entry 1263; CHECK-BE-NEXT: .pad #4 1264; CHECK-BE-NEXT: sub sp, #4 1265; CHECK-BE-NEXT: vrev64.32 q1, q0 1266; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 1267; CHECK-BE-NEXT: @ implicit-def: $q1 1268; CHECK-BE-NEXT: vmrs r2, p0 1269; CHECK-BE-NEXT: ubfx r1, r2, #12, #1 1270; CHECK-BE-NEXT: rsbs r3, r1, #0 1271; CHECK-BE-NEXT: movs r1, #0 1272; CHECK-BE-NEXT: bfi r1, r3, #0, #1 1273; CHECK-BE-NEXT: ubfx r3, r2, #8, #1 1274; CHECK-BE-NEXT: rsbs r3, r3, #0 1275; CHECK-BE-NEXT: bfi r1, r3, #1, #1 1276; CHECK-BE-NEXT: ubfx r3, r2, #4, #1 1277; CHECK-BE-NEXT: and r2, r2, #1 1278; CHECK-BE-NEXT: rsbs r3, r3, #0 1279; CHECK-BE-NEXT: bfi r1, r3, #2, #1 1280; CHECK-BE-NEXT: rsbs r2, r2, #0 1281; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1282; CHECK-BE-NEXT: lsls r2, r1, #28 1283; CHECK-BE-NEXT: itt mi 1284; CHECK-BE-NEXT: ldrmi r2, [r0] 1285; CHECK-BE-NEXT: vmovmi s4, r2 1286; CHECK-BE-NEXT: lsls r2, r1, #29 1287; CHECK-BE-NEXT: itt mi 1288; CHECK-BE-NEXT: ldrmi r2, [r0, #4] 1289; CHECK-BE-NEXT: vmovmi s5, r2 1290; CHECK-BE-NEXT: lsls r2, r1, #30 1291; CHECK-BE-NEXT: itt mi 1292; CHECK-BE-NEXT: ldrmi r2, [r0, #8] 1293; CHECK-BE-NEXT: vmovmi s6, r2 1294; CHECK-BE-NEXT: lsls r1, r1, #31 1295; CHECK-BE-NEXT: itt ne 1296; CHECK-BE-NEXT: ldrne r0, [r0, #12] 1297; CHECK-BE-NEXT: vmovne s7, r0 1298; CHECK-BE-NEXT: vrev64.32 q0, q1 1299; CHECK-BE-NEXT: add sp, #4 1300; CHECK-BE-NEXT: bx lr 1301entry: 1302 %c = icmp sgt <4 x i32> %a, zeroinitializer 1303 %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x float> undef) 1304 ret <4 x float> %l 1305} 1306 1307define arm_aapcs_vfpcc <4 x float> @masked_v4f32_align4_other(ptr %dest, <4 x i32> %a, <4 x float> %b) { 1308; CHECK-LE-LABEL: masked_v4f32_align4_other: 1309; CHECK-LE: @ %bb.0: @ %entry 1310; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1311; CHECK-LE-NEXT: vldrwt.u32 q0, [r0] 1312; CHECK-LE-NEXT: vpsel q0, q0, q1 1313; CHECK-LE-NEXT: bx lr 1314; 1315; CHECK-BE-LABEL: masked_v4f32_align4_other: 1316; CHECK-BE: @ %bb.0: @ %entry 1317; CHECK-BE-NEXT: vrev64.32 q2, q1 1318; CHECK-BE-NEXT: vrev64.32 q1, q0 1319; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1320; CHECK-BE-NEXT: vldrwt.u32 q0, [r0] 1321; CHECK-BE-NEXT: vpsel q1, q0, q2 1322; CHECK-BE-NEXT: vrev64.32 q0, q1 1323; CHECK-BE-NEXT: bx lr 1324entry: 1325 %c = icmp sgt <4 x i32> %a, zeroinitializer 1326 %l = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %dest, i32 4, <4 x i1> %c, <4 x float> %b) 1327 ret <4 x float> %l 1328} 1329 1330define arm_aapcs_vfpcc ptr @masked_v4f32_preinc(ptr %x, ptr %y, <4 x i32> %a) { 1331; CHECK-LE-LABEL: masked_v4f32_preinc: 1332; CHECK-LE: @ %bb.0: @ %entry 1333; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1334; CHECK-LE-NEXT: vldrwt.u32 q0, [r0, #4]! 1335; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1336; CHECK-LE-NEXT: bx lr 1337; 1338; CHECK-BE-LABEL: masked_v4f32_preinc: 1339; CHECK-BE: @ %bb.0: @ %entry 1340; CHECK-BE-NEXT: vrev64.32 q1, q0 1341; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1342; CHECK-BE-NEXT: vldrwt.u32 q0, [r0, #4]! 1343; CHECK-BE-NEXT: vstrw.32 q0, [r1] 1344; CHECK-BE-NEXT: bx lr 1345entry: 1346 %z = getelementptr inbounds i8, ptr %x, i32 4 1347 %c = icmp sgt <4 x i32> %a, zeroinitializer 1348 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %z, i32 4, <4 x i1> %c, <4 x float> undef) 1349 store <4 x float> %0, ptr %y, align 4 1350 ret ptr %z 1351} 1352 1353define arm_aapcs_vfpcc ptr @masked_v4f32_postinc(ptr %x, ptr %y, <4 x i32> %a) { 1354; CHECK-LE-LABEL: masked_v4f32_postinc: 1355; CHECK-LE: @ %bb.0: @ %entry 1356; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1357; CHECK-LE-NEXT: vldrwt.u32 q0, [r0], #4 1358; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1359; CHECK-LE-NEXT: bx lr 1360; 1361; CHECK-BE-LABEL: masked_v4f32_postinc: 1362; CHECK-BE: @ %bb.0: @ %entry 1363; CHECK-BE-NEXT: vrev64.32 q1, q0 1364; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1365; CHECK-BE-NEXT: vldrwt.u32 q0, [r0], #4 1366; CHECK-BE-NEXT: vstrw.32 q0, [r1] 1367; CHECK-BE-NEXT: bx lr 1368entry: 1369 %z = getelementptr inbounds i8, ptr %x, i32 4 1370 %c = icmp sgt <4 x i32> %a, zeroinitializer 1371 %0 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %x, i32 4, <4 x i1> %c, <4 x float> undef) 1372 store <4 x float> %0, ptr %y, align 4 1373 ret ptr %z 1374} 1375 1376 1377define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_zero(ptr %dest, <8 x i16> %a) { 1378; CHECK-LE-LABEL: masked_v8f16_align4_zero: 1379; CHECK-LE: @ %bb.0: @ %entry 1380; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1381; CHECK-LE-NEXT: vldrht.u16 q0, [r0] 1382; CHECK-LE-NEXT: bx lr 1383; 1384; CHECK-BE-LABEL: masked_v8f16_align4_zero: 1385; CHECK-BE: @ %bb.0: @ %entry 1386; CHECK-BE-NEXT: vrev64.16 q1, q0 1387; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1388; CHECK-BE-NEXT: vldrht.u16 q1, [r0] 1389; CHECK-BE-NEXT: vrev64.16 q0, q1 1390; CHECK-BE-NEXT: bx lr 1391entry: 1392 %c = icmp sgt <8 x i16> %a, zeroinitializer 1393 %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> zeroinitializer) 1394 ret <8 x half> %l 1395} 1396 1397define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_undef(ptr %dest, <8 x i16> %a) { 1398; CHECK-LE-LABEL: masked_v8f16_align4_undef: 1399; CHECK-LE: @ %bb.0: @ %entry 1400; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1401; CHECK-LE-NEXT: vldrht.u16 q0, [r0] 1402; CHECK-LE-NEXT: bx lr 1403; 1404; CHECK-BE-LABEL: masked_v8f16_align4_undef: 1405; CHECK-BE: @ %bb.0: @ %entry 1406; CHECK-BE-NEXT: vrev64.16 q1, q0 1407; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1408; CHECK-BE-NEXT: vldrht.u16 q1, [r0] 1409; CHECK-BE-NEXT: vrev64.16 q0, q1 1410; CHECK-BE-NEXT: bx lr 1411entry: 1412 %c = icmp sgt <8 x i16> %a, zeroinitializer 1413 %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> undef) 1414 ret <8 x half> %l 1415} 1416 1417define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align1_undef(ptr %dest, <8 x i16> %a) { 1418; CHECK-LE-LABEL: masked_v8f16_align1_undef: 1419; CHECK-LE: @ %bb.0: @ %entry 1420; CHECK-LE-NEXT: .pad #36 1421; CHECK-LE-NEXT: sub sp, #36 1422; CHECK-LE-NEXT: vcmp.s16 gt, q0, zr 1423; CHECK-LE-NEXT: @ implicit-def: $q0 1424; CHECK-LE-NEXT: vmrs r1, p0 1425; CHECK-LE-NEXT: and r2, r1, #1 1426; CHECK-LE-NEXT: rsbs r3, r2, #0 1427; CHECK-LE-NEXT: movs r2, #0 1428; CHECK-LE-NEXT: bfi r2, r3, #0, #1 1429; CHECK-LE-NEXT: ubfx r3, r1, #2, #1 1430; CHECK-LE-NEXT: rsbs r3, r3, #0 1431; CHECK-LE-NEXT: bfi r2, r3, #1, #1 1432; CHECK-LE-NEXT: ubfx r3, r1, #4, #1 1433; CHECK-LE-NEXT: rsbs r3, r3, #0 1434; CHECK-LE-NEXT: bfi r2, r3, #2, #1 1435; CHECK-LE-NEXT: ubfx r3, r1, #6, #1 1436; CHECK-LE-NEXT: rsbs r3, r3, #0 1437; CHECK-LE-NEXT: bfi r2, r3, #3, #1 1438; CHECK-LE-NEXT: ubfx r3, r1, #8, #1 1439; CHECK-LE-NEXT: rsbs r3, r3, #0 1440; CHECK-LE-NEXT: bfi r2, r3, #4, #1 1441; CHECK-LE-NEXT: ubfx r3, r1, #10, #1 1442; CHECK-LE-NEXT: rsbs r3, r3, #0 1443; CHECK-LE-NEXT: bfi r2, r3, #5, #1 1444; CHECK-LE-NEXT: ubfx r3, r1, #12, #1 1445; CHECK-LE-NEXT: ubfx r1, r1, #14, #1 1446; CHECK-LE-NEXT: rsbs r3, r3, #0 1447; CHECK-LE-NEXT: bfi r2, r3, #6, #1 1448; CHECK-LE-NEXT: rsbs r1, r1, #0 1449; CHECK-LE-NEXT: bfi r2, r1, #7, #1 1450; CHECK-LE-NEXT: uxtb r1, r2 1451; CHECK-LE-NEXT: lsls r2, r2, #31 1452; CHECK-LE-NEXT: bne .LBB45_9 1453; CHECK-LE-NEXT: @ %bb.1: @ %else 1454; CHECK-LE-NEXT: lsls r2, r1, #30 1455; CHECK-LE-NEXT: bmi .LBB45_10 1456; CHECK-LE-NEXT: .LBB45_2: @ %else2 1457; CHECK-LE-NEXT: lsls r2, r1, #29 1458; CHECK-LE-NEXT: bmi .LBB45_11 1459; CHECK-LE-NEXT: .LBB45_3: @ %else5 1460; CHECK-LE-NEXT: lsls r2, r1, #28 1461; CHECK-LE-NEXT: bmi .LBB45_12 1462; CHECK-LE-NEXT: .LBB45_4: @ %else8 1463; CHECK-LE-NEXT: lsls r2, r1, #27 1464; CHECK-LE-NEXT: bmi .LBB45_13 1465; CHECK-LE-NEXT: .LBB45_5: @ %else11 1466; CHECK-LE-NEXT: lsls r2, r1, #26 1467; CHECK-LE-NEXT: bmi .LBB45_14 1468; CHECK-LE-NEXT: .LBB45_6: @ %else14 1469; CHECK-LE-NEXT: lsls r2, r1, #25 1470; CHECK-LE-NEXT: bmi .LBB45_15 1471; CHECK-LE-NEXT: .LBB45_7: @ %else17 1472; CHECK-LE-NEXT: lsls r1, r1, #24 1473; CHECK-LE-NEXT: bmi .LBB45_16 1474; CHECK-LE-NEXT: .LBB45_8: @ %else20 1475; CHECK-LE-NEXT: add sp, #36 1476; CHECK-LE-NEXT: bx lr 1477; CHECK-LE-NEXT: .LBB45_9: @ %cond.load 1478; CHECK-LE-NEXT: ldrh r2, [r0] 1479; CHECK-LE-NEXT: strh.w r2, [sp, #28] 1480; CHECK-LE-NEXT: vldr.16 s0, [sp, #28] 1481; CHECK-LE-NEXT: lsls r2, r1, #30 1482; CHECK-LE-NEXT: bpl .LBB45_2 1483; CHECK-LE-NEXT: .LBB45_10: @ %cond.load1 1484; CHECK-LE-NEXT: ldrh r2, [r0, #2] 1485; CHECK-LE-NEXT: strh.w r2, [sp, #24] 1486; CHECK-LE-NEXT: vldr.16 s4, [sp, #24] 1487; CHECK-LE-NEXT: vins.f16 s0, s4 1488; CHECK-LE-NEXT: lsls r2, r1, #29 1489; CHECK-LE-NEXT: bpl .LBB45_3 1490; CHECK-LE-NEXT: .LBB45_11: @ %cond.load4 1491; CHECK-LE-NEXT: ldrh r2, [r0, #4] 1492; CHECK-LE-NEXT: strh.w r2, [sp, #20] 1493; CHECK-LE-NEXT: vldr.16 s4, [sp, #20] 1494; CHECK-LE-NEXT: vmov r2, s4 1495; CHECK-LE-NEXT: vmov.16 q0[2], r2 1496; CHECK-LE-NEXT: lsls r2, r1, #28 1497; CHECK-LE-NEXT: bpl .LBB45_4 1498; CHECK-LE-NEXT: .LBB45_12: @ %cond.load7 1499; CHECK-LE-NEXT: ldrh r2, [r0, #6] 1500; CHECK-LE-NEXT: strh.w r2, [sp, #16] 1501; CHECK-LE-NEXT: vldr.16 s4, [sp, #16] 1502; CHECK-LE-NEXT: vins.f16 s1, s4 1503; CHECK-LE-NEXT: lsls r2, r1, #27 1504; CHECK-LE-NEXT: bpl .LBB45_5 1505; CHECK-LE-NEXT: .LBB45_13: @ %cond.load10 1506; CHECK-LE-NEXT: ldrh r2, [r0, #8] 1507; CHECK-LE-NEXT: strh.w r2, [sp, #12] 1508; CHECK-LE-NEXT: vldr.16 s4, [sp, #12] 1509; CHECK-LE-NEXT: vmov r2, s4 1510; CHECK-LE-NEXT: vmov.16 q0[4], r2 1511; CHECK-LE-NEXT: lsls r2, r1, #26 1512; CHECK-LE-NEXT: bpl .LBB45_6 1513; CHECK-LE-NEXT: .LBB45_14: @ %cond.load13 1514; CHECK-LE-NEXT: ldrh r2, [r0, #10] 1515; CHECK-LE-NEXT: strh.w r2, [sp, #8] 1516; CHECK-LE-NEXT: vldr.16 s4, [sp, #8] 1517; CHECK-LE-NEXT: vins.f16 s2, s4 1518; CHECK-LE-NEXT: lsls r2, r1, #25 1519; CHECK-LE-NEXT: bpl .LBB45_7 1520; CHECK-LE-NEXT: .LBB45_15: @ %cond.load16 1521; CHECK-LE-NEXT: ldrh r2, [r0, #12] 1522; CHECK-LE-NEXT: strh.w r2, [sp, #4] 1523; CHECK-LE-NEXT: vldr.16 s4, [sp, #4] 1524; CHECK-LE-NEXT: vmov r2, s4 1525; CHECK-LE-NEXT: vmov.16 q0[6], r2 1526; CHECK-LE-NEXT: lsls r1, r1, #24 1527; CHECK-LE-NEXT: bpl .LBB45_8 1528; CHECK-LE-NEXT: .LBB45_16: @ %cond.load19 1529; CHECK-LE-NEXT: ldrh r0, [r0, #14] 1530; CHECK-LE-NEXT: strh.w r0, [sp] 1531; CHECK-LE-NEXT: vldr.16 s4, [sp] 1532; CHECK-LE-NEXT: vins.f16 s3, s4 1533; CHECK-LE-NEXT: add sp, #36 1534; CHECK-LE-NEXT: bx lr 1535; 1536; CHECK-BE-LABEL: masked_v8f16_align1_undef: 1537; CHECK-BE: @ %bb.0: @ %entry 1538; CHECK-BE-NEXT: .pad #36 1539; CHECK-BE-NEXT: sub sp, #36 1540; CHECK-BE-NEXT: vrev64.16 q1, q0 1541; CHECK-BE-NEXT: vcmp.s16 gt, q1, zr 1542; CHECK-BE-NEXT: @ implicit-def: $q1 1543; CHECK-BE-NEXT: vmrs r1, p0 1544; CHECK-BE-NEXT: ubfx r2, r1, #14, #1 1545; CHECK-BE-NEXT: rsbs r3, r2, #0 1546; CHECK-BE-NEXT: movs r2, #0 1547; CHECK-BE-NEXT: bfi r2, r3, #0, #1 1548; CHECK-BE-NEXT: ubfx r3, r1, #12, #1 1549; CHECK-BE-NEXT: rsbs r3, r3, #0 1550; CHECK-BE-NEXT: bfi r2, r3, #1, #1 1551; CHECK-BE-NEXT: ubfx r3, r1, #10, #1 1552; CHECK-BE-NEXT: rsbs r3, r3, #0 1553; CHECK-BE-NEXT: bfi r2, r3, #2, #1 1554; CHECK-BE-NEXT: ubfx r3, r1, #8, #1 1555; CHECK-BE-NEXT: rsbs r3, r3, #0 1556; CHECK-BE-NEXT: bfi r2, r3, #3, #1 1557; CHECK-BE-NEXT: ubfx r3, r1, #6, #1 1558; CHECK-BE-NEXT: rsbs r3, r3, #0 1559; CHECK-BE-NEXT: bfi r2, r3, #4, #1 1560; CHECK-BE-NEXT: ubfx r3, r1, #4, #1 1561; CHECK-BE-NEXT: rsbs r3, r3, #0 1562; CHECK-BE-NEXT: bfi r2, r3, #5, #1 1563; CHECK-BE-NEXT: ubfx r3, r1, #2, #1 1564; CHECK-BE-NEXT: and r1, r1, #1 1565; CHECK-BE-NEXT: rsbs r3, r3, #0 1566; CHECK-BE-NEXT: bfi r2, r3, #6, #1 1567; CHECK-BE-NEXT: rsbs r1, r1, #0 1568; CHECK-BE-NEXT: bfi r2, r1, #7, #1 1569; CHECK-BE-NEXT: uxtb r1, r2 1570; CHECK-BE-NEXT: lsls r2, r2, #24 1571; CHECK-BE-NEXT: bmi .LBB45_10 1572; CHECK-BE-NEXT: @ %bb.1: @ %else 1573; CHECK-BE-NEXT: lsls r2, r1, #25 1574; CHECK-BE-NEXT: bmi .LBB45_11 1575; CHECK-BE-NEXT: .LBB45_2: @ %else2 1576; CHECK-BE-NEXT: lsls r2, r1, #26 1577; CHECK-BE-NEXT: bmi .LBB45_12 1578; CHECK-BE-NEXT: .LBB45_3: @ %else5 1579; CHECK-BE-NEXT: lsls r2, r1, #27 1580; CHECK-BE-NEXT: bmi .LBB45_13 1581; CHECK-BE-NEXT: .LBB45_4: @ %else8 1582; CHECK-BE-NEXT: lsls r2, r1, #28 1583; CHECK-BE-NEXT: bmi .LBB45_14 1584; CHECK-BE-NEXT: .LBB45_5: @ %else11 1585; CHECK-BE-NEXT: lsls r2, r1, #29 1586; CHECK-BE-NEXT: bmi .LBB45_15 1587; CHECK-BE-NEXT: .LBB45_6: @ %else14 1588; CHECK-BE-NEXT: lsls r2, r1, #30 1589; CHECK-BE-NEXT: bmi .LBB45_16 1590; CHECK-BE-NEXT: .LBB45_7: @ %else17 1591; CHECK-BE-NEXT: lsls r1, r1, #31 1592; CHECK-BE-NEXT: beq .LBB45_9 1593; CHECK-BE-NEXT: .LBB45_8: @ %cond.load19 1594; CHECK-BE-NEXT: ldrh r0, [r0, #14] 1595; CHECK-BE-NEXT: strh.w r0, [sp] 1596; CHECK-BE-NEXT: vldr.16 s0, [sp] 1597; CHECK-BE-NEXT: vins.f16 s7, s0 1598; CHECK-BE-NEXT: .LBB45_9: @ %else20 1599; CHECK-BE-NEXT: vrev64.16 q0, q1 1600; CHECK-BE-NEXT: add sp, #36 1601; CHECK-BE-NEXT: bx lr 1602; CHECK-BE-NEXT: .LBB45_10: @ %cond.load 1603; CHECK-BE-NEXT: ldrh r2, [r0] 1604; CHECK-BE-NEXT: strh.w r2, [sp, #28] 1605; CHECK-BE-NEXT: vldr.16 s4, [sp, #28] 1606; CHECK-BE-NEXT: lsls r2, r1, #25 1607; CHECK-BE-NEXT: bpl .LBB45_2 1608; CHECK-BE-NEXT: .LBB45_11: @ %cond.load1 1609; CHECK-BE-NEXT: ldrh r2, [r0, #2] 1610; CHECK-BE-NEXT: strh.w r2, [sp, #24] 1611; CHECK-BE-NEXT: vldr.16 s0, [sp, #24] 1612; CHECK-BE-NEXT: vins.f16 s4, s0 1613; CHECK-BE-NEXT: lsls r2, r1, #26 1614; CHECK-BE-NEXT: bpl .LBB45_3 1615; CHECK-BE-NEXT: .LBB45_12: @ %cond.load4 1616; CHECK-BE-NEXT: ldrh r2, [r0, #4] 1617; CHECK-BE-NEXT: strh.w r2, [sp, #20] 1618; CHECK-BE-NEXT: vldr.16 s0, [sp, #20] 1619; CHECK-BE-NEXT: vmov r2, s0 1620; CHECK-BE-NEXT: vmov.16 q1[2], r2 1621; CHECK-BE-NEXT: lsls r2, r1, #27 1622; CHECK-BE-NEXT: bpl .LBB45_4 1623; CHECK-BE-NEXT: .LBB45_13: @ %cond.load7 1624; CHECK-BE-NEXT: ldrh r2, [r0, #6] 1625; CHECK-BE-NEXT: strh.w r2, [sp, #16] 1626; CHECK-BE-NEXT: vldr.16 s0, [sp, #16] 1627; CHECK-BE-NEXT: vins.f16 s5, s0 1628; CHECK-BE-NEXT: lsls r2, r1, #28 1629; CHECK-BE-NEXT: bpl .LBB45_5 1630; CHECK-BE-NEXT: .LBB45_14: @ %cond.load10 1631; CHECK-BE-NEXT: ldrh r2, [r0, #8] 1632; CHECK-BE-NEXT: strh.w r2, [sp, #12] 1633; CHECK-BE-NEXT: vldr.16 s0, [sp, #12] 1634; CHECK-BE-NEXT: vmov r2, s0 1635; CHECK-BE-NEXT: vmov.16 q1[4], r2 1636; CHECK-BE-NEXT: lsls r2, r1, #29 1637; CHECK-BE-NEXT: bpl .LBB45_6 1638; CHECK-BE-NEXT: .LBB45_15: @ %cond.load13 1639; CHECK-BE-NEXT: ldrh r2, [r0, #10] 1640; CHECK-BE-NEXT: strh.w r2, [sp, #8] 1641; CHECK-BE-NEXT: vldr.16 s0, [sp, #8] 1642; CHECK-BE-NEXT: vins.f16 s6, s0 1643; CHECK-BE-NEXT: lsls r2, r1, #30 1644; CHECK-BE-NEXT: bpl .LBB45_7 1645; CHECK-BE-NEXT: .LBB45_16: @ %cond.load16 1646; CHECK-BE-NEXT: ldrh r2, [r0, #12] 1647; CHECK-BE-NEXT: strh.w r2, [sp, #4] 1648; CHECK-BE-NEXT: vldr.16 s0, [sp, #4] 1649; CHECK-BE-NEXT: vmov r2, s0 1650; CHECK-BE-NEXT: vmov.16 q1[6], r2 1651; CHECK-BE-NEXT: lsls r1, r1, #31 1652; CHECK-BE-NEXT: bne .LBB45_8 1653; CHECK-BE-NEXT: b .LBB45_9 1654entry: 1655 %c = icmp sgt <8 x i16> %a, zeroinitializer 1656 %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x half> undef) 1657 ret <8 x half> %l 1658} 1659 1660define arm_aapcs_vfpcc <8 x half> @masked_v8f16_align4_other(ptr %dest, <8 x i16> %a, <8 x half> %b) { 1661; CHECK-LE-LABEL: masked_v8f16_align4_other: 1662; CHECK-LE: @ %bb.0: @ %entry 1663; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1664; CHECK-LE-NEXT: vldrht.u16 q0, [r0] 1665; CHECK-LE-NEXT: vpsel q0, q0, q1 1666; CHECK-LE-NEXT: bx lr 1667; 1668; CHECK-BE-LABEL: masked_v8f16_align4_other: 1669; CHECK-BE: @ %bb.0: @ %entry 1670; CHECK-BE-NEXT: vrev64.16 q2, q1 1671; CHECK-BE-NEXT: vrev64.16 q1, q0 1672; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1673; CHECK-BE-NEXT: vldrht.u16 q0, [r0] 1674; CHECK-BE-NEXT: vpsel q1, q0, q2 1675; CHECK-BE-NEXT: vrev64.16 q0, q1 1676; CHECK-BE-NEXT: bx lr 1677entry: 1678 %c = icmp sgt <8 x i16> %a, zeroinitializer 1679 %l = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %dest, i32 2, <8 x i1> %c, <8 x half> %b) 1680 ret <8 x half> %l 1681} 1682 1683define arm_aapcs_vfpcc ptr @masked_v8f16_preinc(ptr %x, ptr %y, <8 x i16> %a) { 1684; CHECK-LE-LABEL: masked_v8f16_preinc: 1685; CHECK-LE: @ %bb.0: @ %entry 1686; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1687; CHECK-LE-NEXT: vldrht.u16 q0, [r0, #4]! 1688; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1689; CHECK-LE-NEXT: bx lr 1690; 1691; CHECK-BE-LABEL: masked_v8f16_preinc: 1692; CHECK-BE: @ %bb.0: @ %entry 1693; CHECK-BE-NEXT: vrev64.16 q1, q0 1694; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1695; CHECK-BE-NEXT: vldrht.u16 q0, [r0, #4]! 1696; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1697; CHECK-BE-NEXT: bx lr 1698entry: 1699 %z = getelementptr inbounds i8, ptr %x, i32 4 1700 %c = icmp sgt <8 x i16> %a, zeroinitializer 1701 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %z, i32 4, <8 x i1> %c, <8 x half> undef) 1702 store <8 x half> %0, ptr %y, align 4 1703 ret ptr %z 1704} 1705 1706define arm_aapcs_vfpcc ptr @masked_v8f16_postinc(ptr %x, ptr %y, <8 x i16> %a) { 1707; CHECK-LE-LABEL: masked_v8f16_postinc: 1708; CHECK-LE: @ %bb.0: @ %entry 1709; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 1710; CHECK-LE-NEXT: vldrht.u16 q0, [r0], #4 1711; CHECK-LE-NEXT: vstrw.32 q0, [r1] 1712; CHECK-LE-NEXT: bx lr 1713; 1714; CHECK-BE-LABEL: masked_v8f16_postinc: 1715; CHECK-BE: @ %bb.0: @ %entry 1716; CHECK-BE-NEXT: vrev64.16 q1, q0 1717; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 1718; CHECK-BE-NEXT: vldrht.u16 q0, [r0], #4 1719; CHECK-BE-NEXT: vstrh.16 q0, [r1] 1720; CHECK-BE-NEXT: bx lr 1721entry: 1722 %z = getelementptr inbounds i8, ptr %x, i32 4 1723 %c = icmp sgt <8 x i16> %a, zeroinitializer 1724 %0 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %x, i32 4, <8 x i1> %c, <8 x half> undef) 1725 store <8 x half> %0, ptr %y, align 4 1726 ret ptr %z 1727} 1728 1729 1730define arm_aapcs_vfpcc <2 x i64> @masked_v2i64_align4_zero(ptr %dest, <2 x i64> %a) { 1731; CHECK-LE-LABEL: masked_v2i64_align4_zero: 1732; CHECK-LE: @ %bb.0: @ %entry 1733; CHECK-LE-NEXT: .save {r7, lr} 1734; CHECK-LE-NEXT: push {r7, lr} 1735; CHECK-LE-NEXT: .pad #4 1736; CHECK-LE-NEXT: sub sp, #4 1737; CHECK-LE-NEXT: vmov r2, r3, d0 1738; CHECK-LE-NEXT: movs r1, #0 1739; CHECK-LE-NEXT: vmov r12, lr, d1 1740; CHECK-LE-NEXT: rsbs r2, r2, #0 1741; CHECK-LE-NEXT: sbcs.w r2, r1, r3 1742; CHECK-LE-NEXT: csetm r2, lt 1743; CHECK-LE-NEXT: rsbs.w r3, r12, #0 1744; CHECK-LE-NEXT: sbcs.w r3, r1, lr 1745; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1746; CHECK-LE-NEXT: csetm r2, lt 1747; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1748; CHECK-LE-NEXT: lsls r2, r1, #31 1749; CHECK-LE-NEXT: beq .LBB49_2 1750; CHECK-LE-NEXT: @ %bb.1: @ %cond.load 1751; CHECK-LE-NEXT: vldr d1, .LCPI49_0 1752; CHECK-LE-NEXT: vldr d0, [r0] 1753; CHECK-LE-NEXT: b .LBB49_3 1754; CHECK-LE-NEXT: .LBB49_2: 1755; CHECK-LE-NEXT: vmov.i32 q0, #0x0 1756; CHECK-LE-NEXT: .LBB49_3: @ %else 1757; CHECK-LE-NEXT: lsls r1, r1, #30 1758; CHECK-LE-NEXT: it mi 1759; CHECK-LE-NEXT: vldrmi d1, [r0, #8] 1760; CHECK-LE-NEXT: add sp, #4 1761; CHECK-LE-NEXT: pop {r7, pc} 1762; CHECK-LE-NEXT: .p2align 3 1763; CHECK-LE-NEXT: @ %bb.4: 1764; CHECK-LE-NEXT: .LCPI49_0: 1765; CHECK-LE-NEXT: .long 0 @ double 0 1766; CHECK-LE-NEXT: .long 0 1767; 1768; CHECK-BE-LABEL: masked_v2i64_align4_zero: 1769; CHECK-BE: @ %bb.0: @ %entry 1770; CHECK-BE-NEXT: .save {r7, lr} 1771; CHECK-BE-NEXT: push {r7, lr} 1772; CHECK-BE-NEXT: .pad #4 1773; CHECK-BE-NEXT: sub sp, #4 1774; CHECK-BE-NEXT: vrev64.32 q1, q0 1775; CHECK-BE-NEXT: movs r1, #0 1776; CHECK-BE-NEXT: vmov r2, r3, d3 1777; CHECK-BE-NEXT: vmov r12, lr, d2 1778; CHECK-BE-NEXT: rsbs r3, r3, #0 1779; CHECK-BE-NEXT: sbcs.w r2, r1, r2 1780; CHECK-BE-NEXT: csetm r2, lt 1781; CHECK-BE-NEXT: rsbs.w r3, lr, #0 1782; CHECK-BE-NEXT: sbcs.w r3, r1, r12 1783; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1784; CHECK-BE-NEXT: csetm r2, lt 1785; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1786; CHECK-BE-NEXT: lsls r2, r1, #30 1787; CHECK-BE-NEXT: bpl .LBB49_2 1788; CHECK-BE-NEXT: @ %bb.1: @ %cond.load 1789; CHECK-BE-NEXT: vldr d1, .LCPI49_0 1790; CHECK-BE-NEXT: vldr d0, [r0] 1791; CHECK-BE-NEXT: b .LBB49_3 1792; CHECK-BE-NEXT: .LBB49_2: 1793; CHECK-BE-NEXT: vmov.i32 q0, #0x0 1794; CHECK-BE-NEXT: .LBB49_3: @ %else 1795; CHECK-BE-NEXT: lsls r1, r1, #31 1796; CHECK-BE-NEXT: it ne 1797; CHECK-BE-NEXT: vldrne d1, [r0, #8] 1798; CHECK-BE-NEXT: add sp, #4 1799; CHECK-BE-NEXT: pop {r7, pc} 1800; CHECK-BE-NEXT: .p2align 3 1801; CHECK-BE-NEXT: @ %bb.4: 1802; CHECK-BE-NEXT: .LCPI49_0: 1803; CHECK-BE-NEXT: .long 0 @ double 0 1804; CHECK-BE-NEXT: .long 0 1805entry: 1806 %c = icmp sgt <2 x i64> %a, zeroinitializer 1807 %l = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %dest, i32 8, <2 x i1> %c, <2 x i64> zeroinitializer) 1808 ret <2 x i64> %l 1809} 1810 1811define arm_aapcs_vfpcc <2 x double> @masked_v2f64_align4_zero(ptr %dest, <2 x double> %a, <2 x i64> %b) { 1812; CHECK-LE-LABEL: masked_v2f64_align4_zero: 1813; CHECK-LE: @ %bb.0: @ %entry 1814; CHECK-LE-NEXT: .save {r7, lr} 1815; CHECK-LE-NEXT: push {r7, lr} 1816; CHECK-LE-NEXT: .pad #4 1817; CHECK-LE-NEXT: sub sp, #4 1818; CHECK-LE-NEXT: vmov r2, r3, d2 1819; CHECK-LE-NEXT: movs r1, #0 1820; CHECK-LE-NEXT: vmov r12, lr, d3 1821; CHECK-LE-NEXT: rsbs r2, r2, #0 1822; CHECK-LE-NEXT: sbcs.w r2, r1, r3 1823; CHECK-LE-NEXT: csetm r2, lt 1824; CHECK-LE-NEXT: rsbs.w r3, r12, #0 1825; CHECK-LE-NEXT: sbcs.w r3, r1, lr 1826; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1827; CHECK-LE-NEXT: csetm r2, lt 1828; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1829; CHECK-LE-NEXT: lsls r2, r1, #31 1830; CHECK-LE-NEXT: beq .LBB50_2 1831; CHECK-LE-NEXT: @ %bb.1: @ %cond.load 1832; CHECK-LE-NEXT: vldr d1, .LCPI50_0 1833; CHECK-LE-NEXT: vldr d0, [r0] 1834; CHECK-LE-NEXT: b .LBB50_3 1835; CHECK-LE-NEXT: .LBB50_2: 1836; CHECK-LE-NEXT: vmov.i32 q0, #0x0 1837; CHECK-LE-NEXT: .LBB50_3: @ %else 1838; CHECK-LE-NEXT: lsls r1, r1, #30 1839; CHECK-LE-NEXT: it mi 1840; CHECK-LE-NEXT: vldrmi d1, [r0, #8] 1841; CHECK-LE-NEXT: add sp, #4 1842; CHECK-LE-NEXT: pop {r7, pc} 1843; CHECK-LE-NEXT: .p2align 3 1844; CHECK-LE-NEXT: @ %bb.4: 1845; CHECK-LE-NEXT: .LCPI50_0: 1846; CHECK-LE-NEXT: .long 0 @ double 0 1847; CHECK-LE-NEXT: .long 0 1848; 1849; CHECK-BE-LABEL: masked_v2f64_align4_zero: 1850; CHECK-BE: @ %bb.0: @ %entry 1851; CHECK-BE-NEXT: .save {r7, lr} 1852; CHECK-BE-NEXT: push {r7, lr} 1853; CHECK-BE-NEXT: .pad #4 1854; CHECK-BE-NEXT: sub sp, #4 1855; CHECK-BE-NEXT: vrev64.32 q0, q1 1856; CHECK-BE-NEXT: movs r1, #0 1857; CHECK-BE-NEXT: vmov r2, r3, d1 1858; CHECK-BE-NEXT: vmov r12, lr, d0 1859; CHECK-BE-NEXT: rsbs r3, r3, #0 1860; CHECK-BE-NEXT: sbcs.w r2, r1, r2 1861; CHECK-BE-NEXT: csetm r2, lt 1862; CHECK-BE-NEXT: rsbs.w r3, lr, #0 1863; CHECK-BE-NEXT: sbcs.w r3, r1, r12 1864; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1865; CHECK-BE-NEXT: csetm r2, lt 1866; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1867; CHECK-BE-NEXT: lsls r2, r1, #30 1868; CHECK-BE-NEXT: bpl .LBB50_2 1869; CHECK-BE-NEXT: @ %bb.1: @ %cond.load 1870; CHECK-BE-NEXT: vldr d1, .LCPI50_0 1871; CHECK-BE-NEXT: vldr d0, [r0] 1872; CHECK-BE-NEXT: b .LBB50_3 1873; CHECK-BE-NEXT: .LBB50_2: 1874; CHECK-BE-NEXT: vmov.i32 q0, #0x0 1875; CHECK-BE-NEXT: .LBB50_3: @ %else 1876; CHECK-BE-NEXT: lsls r1, r1, #31 1877; CHECK-BE-NEXT: it ne 1878; CHECK-BE-NEXT: vldrne d1, [r0, #8] 1879; CHECK-BE-NEXT: add sp, #4 1880; CHECK-BE-NEXT: pop {r7, pc} 1881; CHECK-BE-NEXT: .p2align 3 1882; CHECK-BE-NEXT: @ %bb.4: 1883; CHECK-BE-NEXT: .LCPI50_0: 1884; CHECK-BE-NEXT: .long 0 @ double 0 1885; CHECK-BE-NEXT: .long 0 1886entry: 1887 %c = icmp sgt <2 x i64> %b, zeroinitializer 1888 %l = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %dest, i32 8, <2 x i1> %c, <2 x double> zeroinitializer) 1889 ret <2 x double> %l 1890} 1891 1892define arm_aapcs_vfpcc <4 x i16> @anyext_v4i16(ptr %dest, <4 x i32> %a) { 1893; CHECK-LE-LABEL: anyext_v4i16: 1894; CHECK-LE: @ %bb.0: @ %entry 1895; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 1896; CHECK-LE-NEXT: vldrht.u32 q0, [r0] 1897; CHECK-LE-NEXT: bx lr 1898; 1899; CHECK-BE-LABEL: anyext_v4i16: 1900; CHECK-BE: @ %bb.0: @ %entry 1901; CHECK-BE-NEXT: vrev64.32 q1, q0 1902; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 1903; CHECK-BE-NEXT: vldrht.u32 q1, [r0] 1904; CHECK-BE-NEXT: vrev64.32 q0, q1 1905; CHECK-BE-NEXT: bx lr 1906entry: 1907 %c = icmp sgt <4 x i32> %a, zeroinitializer 1908 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer) 1909 ret <4 x i16> %l 1910} 1911 1912define arm_aapcs_vfpcc <4 x i16> @anyext_v4i16_align1(ptr %dest, <4 x i32> %a) { 1913; CHECK-LE-LABEL: anyext_v4i16_align1: 1914; CHECK-LE: @ %bb.0: @ %entry 1915; CHECK-LE-NEXT: .pad #4 1916; CHECK-LE-NEXT: sub sp, #4 1917; CHECK-LE-NEXT: vcmp.s32 gt, q0, zr 1918; CHECK-LE-NEXT: mov.w r12, #0 1919; CHECK-LE-NEXT: vmrs r3, p0 1920; CHECK-LE-NEXT: and r1, r3, #1 1921; CHECK-LE-NEXT: rsbs r2, r1, #0 1922; CHECK-LE-NEXT: movs r1, #0 1923; CHECK-LE-NEXT: bfi r1, r2, #0, #1 1924; CHECK-LE-NEXT: ubfx r2, r3, #4, #1 1925; CHECK-LE-NEXT: rsbs r2, r2, #0 1926; CHECK-LE-NEXT: bfi r1, r2, #1, #1 1927; CHECK-LE-NEXT: ubfx r2, r3, #8, #1 1928; CHECK-LE-NEXT: rsbs r2, r2, #0 1929; CHECK-LE-NEXT: bfi r1, r2, #2, #1 1930; CHECK-LE-NEXT: ubfx r2, r3, #12, #1 1931; CHECK-LE-NEXT: rsbs r2, r2, #0 1932; CHECK-LE-NEXT: bfi r1, r2, #3, #1 1933; CHECK-LE-NEXT: lsls r2, r1, #31 1934; CHECK-LE-NEXT: beq .LBB52_2 1935; CHECK-LE-NEXT: @ %bb.1: @ %cond.load 1936; CHECK-LE-NEXT: ldrh r2, [r0] 1937; CHECK-LE-NEXT: vdup.32 q0, r12 1938; CHECK-LE-NEXT: vmov.32 q0[0], r2 1939; CHECK-LE-NEXT: b .LBB52_3 1940; CHECK-LE-NEXT: .LBB52_2: 1941; CHECK-LE-NEXT: vmov.i32 q0, #0x0 1942; CHECK-LE-NEXT: .LBB52_3: @ %else 1943; CHECK-LE-NEXT: lsls r2, r1, #30 1944; CHECK-LE-NEXT: itt mi 1945; CHECK-LE-NEXT: ldrhmi r2, [r0, #2] 1946; CHECK-LE-NEXT: vmovmi.32 q0[1], r2 1947; CHECK-LE-NEXT: lsls r2, r1, #29 1948; CHECK-LE-NEXT: itt mi 1949; CHECK-LE-NEXT: ldrhmi r2, [r0, #4] 1950; CHECK-LE-NEXT: vmovmi.32 q0[2], r2 1951; CHECK-LE-NEXT: lsls r1, r1, #28 1952; CHECK-LE-NEXT: itt mi 1953; CHECK-LE-NEXT: ldrhmi r0, [r0, #6] 1954; CHECK-LE-NEXT: vmovmi.32 q0[3], r0 1955; CHECK-LE-NEXT: add sp, #4 1956; CHECK-LE-NEXT: bx lr 1957; 1958; CHECK-BE-LABEL: anyext_v4i16_align1: 1959; CHECK-BE: @ %bb.0: @ %entry 1960; CHECK-BE-NEXT: .pad #4 1961; CHECK-BE-NEXT: sub sp, #4 1962; CHECK-BE-NEXT: vrev64.32 q1, q0 1963; CHECK-BE-NEXT: mov.w r12, #0 1964; CHECK-BE-NEXT: vcmp.s32 gt, q1, zr 1965; CHECK-BE-NEXT: vmrs r3, p0 1966; CHECK-BE-NEXT: ubfx r1, r3, #12, #1 1967; CHECK-BE-NEXT: rsbs r2, r1, #0 1968; CHECK-BE-NEXT: movs r1, #0 1969; CHECK-BE-NEXT: bfi r1, r2, #0, #1 1970; CHECK-BE-NEXT: ubfx r2, r3, #8, #1 1971; CHECK-BE-NEXT: rsbs r2, r2, #0 1972; CHECK-BE-NEXT: bfi r1, r2, #1, #1 1973; CHECK-BE-NEXT: ubfx r2, r3, #4, #1 1974; CHECK-BE-NEXT: rsbs r2, r2, #0 1975; CHECK-BE-NEXT: bfi r1, r2, #2, #1 1976; CHECK-BE-NEXT: and r2, r3, #1 1977; CHECK-BE-NEXT: rsbs r2, r2, #0 1978; CHECK-BE-NEXT: bfi r1, r2, #3, #1 1979; CHECK-BE-NEXT: lsls r2, r1, #28 1980; CHECK-BE-NEXT: bpl .LBB52_2 1981; CHECK-BE-NEXT: @ %bb.1: @ %cond.load 1982; CHECK-BE-NEXT: ldrh r2, [r0] 1983; CHECK-BE-NEXT: vdup.32 q1, r12 1984; CHECK-BE-NEXT: vmov.32 q1[0], r2 1985; CHECK-BE-NEXT: b .LBB52_3 1986; CHECK-BE-NEXT: .LBB52_2: 1987; CHECK-BE-NEXT: vmov.i32 q1, #0x0 1988; CHECK-BE-NEXT: .LBB52_3: @ %else 1989; CHECK-BE-NEXT: lsls r2, r1, #29 1990; CHECK-BE-NEXT: itt mi 1991; CHECK-BE-NEXT: ldrhmi r2, [r0, #2] 1992; CHECK-BE-NEXT: vmovmi.32 q1[1], r2 1993; CHECK-BE-NEXT: lsls r2, r1, #30 1994; CHECK-BE-NEXT: itt mi 1995; CHECK-BE-NEXT: ldrhmi r2, [r0, #4] 1996; CHECK-BE-NEXT: vmovmi.32 q1[2], r2 1997; CHECK-BE-NEXT: lsls r1, r1, #31 1998; CHECK-BE-NEXT: itt ne 1999; CHECK-BE-NEXT: ldrhne r0, [r0, #6] 2000; CHECK-BE-NEXT: vmovne.32 q1[3], r0 2001; CHECK-BE-NEXT: vrev64.32 q0, q1 2002; CHECK-BE-NEXT: add sp, #4 2003; CHECK-BE-NEXT: bx lr 2004entry: 2005 %c = icmp sgt <4 x i32> %a, zeroinitializer 2006 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i16> zeroinitializer) 2007 ret <4 x i16> %l 2008} 2009 2010define arm_aapcs_vfpcc <4 x i8> @anyext_v4i8(ptr %dest, <4 x i32> %a) { 2011; CHECK-LE-LABEL: anyext_v4i8: 2012; CHECK-LE: @ %bb.0: @ %entry 2013; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 2014; CHECK-LE-NEXT: vldrbt.u32 q0, [r0] 2015; CHECK-LE-NEXT: bx lr 2016; 2017; CHECK-BE-LABEL: anyext_v4i8: 2018; CHECK-BE: @ %bb.0: @ %entry 2019; CHECK-BE-NEXT: vrev64.32 q1, q0 2020; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 2021; CHECK-BE-NEXT: vldrbt.u32 q1, [r0] 2022; CHECK-BE-NEXT: vrev64.32 q0, q1 2023; CHECK-BE-NEXT: bx lr 2024entry: 2025 %c = icmp sgt <4 x i32> %a, zeroinitializer 2026 %l = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %dest, i32 1, <4 x i1> %c, <4 x i8> zeroinitializer) 2027 ret <4 x i8> %l 2028} 2029 2030define arm_aapcs_vfpcc <8 x i8> @anyext_v8i8(ptr %dest, <8 x i16> %a) { 2031; CHECK-LE-LABEL: anyext_v8i8: 2032; CHECK-LE: @ %bb.0: @ %entry 2033; CHECK-LE-NEXT: vpt.s16 gt, q0, zr 2034; CHECK-LE-NEXT: vldrbt.u16 q0, [r0] 2035; CHECK-LE-NEXT: bx lr 2036; 2037; CHECK-BE-LABEL: anyext_v8i8: 2038; CHECK-BE: @ %bb.0: @ %entry 2039; CHECK-BE-NEXT: vrev64.16 q1, q0 2040; CHECK-BE-NEXT: vpt.s16 gt, q1, zr 2041; CHECK-BE-NEXT: vldrbt.u16 q1, [r0] 2042; CHECK-BE-NEXT: vrev64.16 q0, q1 2043; CHECK-BE-NEXT: bx lr 2044entry: 2045 %c = icmp sgt <8 x i16> %a, zeroinitializer 2046 %l = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %dest, i32 1, <8 x i1> %c, <8 x i8> zeroinitializer) 2047 ret <8 x i8> %l 2048} 2049 2050define arm_aapcs_vfpcc <4 x i32> @multi_user_zext(ptr %dest, <4 x i32> %a) { 2051; CHECK-LE-LABEL: multi_user_zext: 2052; CHECK-LE: @ %bb.0: @ %entry 2053; CHECK-LE-NEXT: .save {r7, lr} 2054; CHECK-LE-NEXT: push {r7, lr} 2055; CHECK-LE-NEXT: .vsave {d8, d9} 2056; CHECK-LE-NEXT: vpush {d8, d9} 2057; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 2058; CHECK-LE-NEXT: vldrht.u32 q4, [r0] 2059; CHECK-LE-NEXT: vmov r0, r1, d8 2060; CHECK-LE-NEXT: vmov r2, r3, d9 2061; CHECK-LE-NEXT: bl foo 2062; CHECK-LE-NEXT: vmov q0, q4 2063; CHECK-LE-NEXT: vpop {d8, d9} 2064; CHECK-LE-NEXT: pop {r7, pc} 2065; 2066; CHECK-BE-LABEL: multi_user_zext: 2067; CHECK-BE: @ %bb.0: @ %entry 2068; CHECK-BE-NEXT: .save {r7, lr} 2069; CHECK-BE-NEXT: push {r7, lr} 2070; CHECK-BE-NEXT: .vsave {d8, d9} 2071; CHECK-BE-NEXT: vpush {d8, d9} 2072; CHECK-BE-NEXT: vrev64.32 q1, q0 2073; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 2074; CHECK-BE-NEXT: vldrht.u32 q0, [r0] 2075; CHECK-BE-NEXT: vrev64.32 q4, q0 2076; CHECK-BE-NEXT: vmov r1, r0, d8 2077; CHECK-BE-NEXT: vmov r3, r2, d9 2078; CHECK-BE-NEXT: bl foo 2079; CHECK-BE-NEXT: vmov q0, q4 2080; CHECK-BE-NEXT: vpop {d8, d9} 2081; CHECK-BE-NEXT: pop {r7, pc} 2082entry: 2083 %c = icmp sgt <4 x i32> %a, zeroinitializer 2084 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer) 2085 call void @foo(<4 x i16> %l) 2086 %ext = zext <4 x i16> %l to <4 x i32> 2087 ret <4 x i32> %ext 2088} 2089 2090define arm_aapcs_vfpcc <4 x i32> @multi_user_sext(ptr %dest, <4 x i32> %a) { 2091; CHECK-LE-LABEL: multi_user_sext: 2092; CHECK-LE: @ %bb.0: @ %entry 2093; CHECK-LE-NEXT: .save {r7, lr} 2094; CHECK-LE-NEXT: push {r7, lr} 2095; CHECK-LE-NEXT: .vsave {d8, d9} 2096; CHECK-LE-NEXT: vpush {d8, d9} 2097; CHECK-LE-NEXT: vpt.s32 gt, q0, zr 2098; CHECK-LE-NEXT: vldrht.u32 q4, [r0] 2099; CHECK-LE-NEXT: vmov r0, r1, d8 2100; CHECK-LE-NEXT: vmov r2, r3, d9 2101; CHECK-LE-NEXT: bl foo 2102; CHECK-LE-NEXT: vmovlb.s16 q0, q4 2103; CHECK-LE-NEXT: vpop {d8, d9} 2104; CHECK-LE-NEXT: pop {r7, pc} 2105; 2106; CHECK-BE-LABEL: multi_user_sext: 2107; CHECK-BE: @ %bb.0: @ %entry 2108; CHECK-BE-NEXT: .save {r7, lr} 2109; CHECK-BE-NEXT: push {r7, lr} 2110; CHECK-BE-NEXT: .vsave {d8, d9} 2111; CHECK-BE-NEXT: vpush {d8, d9} 2112; CHECK-BE-NEXT: vrev64.32 q1, q0 2113; CHECK-BE-NEXT: vpt.s32 gt, q1, zr 2114; CHECK-BE-NEXT: vldrht.u32 q4, [r0] 2115; CHECK-BE-NEXT: vrev64.32 q0, q4 2116; CHECK-BE-NEXT: vmov r1, r0, d0 2117; CHECK-BE-NEXT: vmov r3, r2, d1 2118; CHECK-BE-NEXT: bl foo 2119; CHECK-BE-NEXT: vmovlb.s16 q1, q4 2120; CHECK-BE-NEXT: vrev64.32 q0, q1 2121; CHECK-BE-NEXT: vpop {d8, d9} 2122; CHECK-BE-NEXT: pop {r7, pc} 2123entry: 2124 %c = icmp sgt <4 x i32> %a, zeroinitializer 2125 %l = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %dest, i32 2, <4 x i1> %c, <4 x i16> zeroinitializer) 2126 call void @foo(<4 x i16> %l) 2127 %ext = sext <4 x i16> %l to <4 x i32> 2128 ret <4 x i32> %ext 2129} 2130 2131declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) 2132declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) 2133declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) 2134declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32, <4 x i1>, <4 x i8>) 2135declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) 2136declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) 2137declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) 2138declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32, <8 x i1>, <8 x half>) 2139declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) 2140declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) 2141declare void @foo(<4 x i16>) 2142