1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) { 5; CHECK-LABEL: test_vshlq_n_s8: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vshl.i8 q0, q0, #5 8; CHECK-NEXT: bx lr 9entry: 10 %0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> 11 ret <16 x i8> %0 12} 13 14define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) { 15; CHECK-LABEL: test_vshlq_n_s16: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vshl.i16 q0, q0, #5 18; CHECK-NEXT: bx lr 19entry: 20 %0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 21 ret <8 x i16> %0 22} 23 24define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) { 25; CHECK-LABEL: test_vshlq_n_s32: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vshl.i32 q0, q0, #18 28; CHECK-NEXT: bx lr 29entry: 30 %0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18> 31 ret <4 x i32> %0 32} 33 34define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { 35; CHECK-LABEL: test_vshrq_n_s8: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vshr.s8 q0, q0, #4 38; CHECK-NEXT: bx lr 39entry: 40 %0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> 41 ret <16 x i8> %0 42} 43 44define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { 45; CHECK-LABEL: test_vshrq_n_s16: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vshr.s16 q0, q0, #10 48; CHECK-NEXT: bx lr 49entry: 50 %0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 51 ret <8 x i16> %0 52} 53 54define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { 55; CHECK-LABEL: test_vshrq_n_s32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vshr.s32 q0, q0, #19 58; CHECK-NEXT: bx lr 59entry: 60 %0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19> 61 ret <4 x i32> %0 62} 63 64define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { 65; CHECK-LABEL: test_vshrq_n_u8: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vshr.u8 q0, q0, #1 68; CHECK-NEXT: bx lr 69entry: 70 %0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 71 ret <16 x i8> %0 72} 73 74define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { 75; CHECK-LABEL: test_vshrq_n_u16: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vshr.u16 q0, q0, #10 78; CHECK-NEXT: bx lr 79entry: 80 %0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 81 ret <8 x i16> %0 82} 83 84define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { 85; CHECK-LABEL: test_vshrq_n_u32: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: vshr.u32 q0, q0, #10 88; CHECK-NEXT: bx lr 89entry: 90 %0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10> 91 ret <4 x i32> %0 92} 93 94define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 95; CHECK-LABEL: test_vshlq_m_n_s8: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vmsr p0, r0 98; CHECK-NEXT: vpst 99; CHECK-NEXT: vshlt.i8 q0, q1, #6 100; CHECK-NEXT: bx lr 101entry: 102 %0 = zext i16 %p to i32 103 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 104 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive) 105 ret <16 x i8> %2 106} 107 108define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 109; CHECK-LABEL: test_vshlq_m_n_s16: 110; CHECK: @ %bb.0: @ %entry 111; CHECK-NEXT: vmsr p0, r0 112; CHECK-NEXT: vpst 113; CHECK-NEXT: vshlt.i16 q0, q1, #13 114; CHECK-NEXT: bx lr 115entry: 116 %0 = zext i16 %p to i32 117 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 118 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive) 119 ret <8 x i16> %2 120} 121 122define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 123; CHECK-LABEL: test_vshlq_m_n_s32: 124; CHECK: @ %bb.0: @ %entry 125; CHECK-NEXT: vmsr p0, r0 126; CHECK-NEXT: vpst 127; CHECK-NEXT: vshlt.i32 q0, q1, #0 128; CHECK-NEXT: bx lr 129entry: 130 %0 = zext i16 %p to i32 131 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 132 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive) 133 ret <4 x i32> %2 134} 135 136define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 137; CHECK-LABEL: test_vshrq_m_n_s8: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: vmsr p0, r0 140; CHECK-NEXT: vpst 141; CHECK-NEXT: vshrt.s8 q0, q1, #2 142; CHECK-NEXT: bx lr 143entry: 144 %0 = zext i16 %p to i32 145 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 146 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive) 147 ret <16 x i8> %2 148} 149 150define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 151; CHECK-LABEL: test_vshrq_m_n_s16: 152; CHECK: @ %bb.0: @ %entry 153; CHECK-NEXT: vmsr p0, r0 154; CHECK-NEXT: vpst 155; CHECK-NEXT: vshrt.s16 q0, q1, #3 156; CHECK-NEXT: bx lr 157entry: 158 %0 = zext i16 %p to i32 159 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 160 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive) 161 ret <8 x i16> %2 162} 163 164define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 165; CHECK-LABEL: test_vshrq_m_n_s32: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vmsr p0, r0 168; CHECK-NEXT: vpst 169; CHECK-NEXT: vshrt.s32 q0, q1, #13 170; CHECK-NEXT: bx lr 171entry: 172 %0 = zext i16 %p to i32 173 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 174 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive) 175 ret <4 x i32> %2 176} 177 178define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 179; CHECK-LABEL: test_vshrq_m_n_u8: 180; CHECK: @ %bb.0: @ %entry 181; CHECK-NEXT: vmsr p0, r0 182; CHECK-NEXT: vpst 183; CHECK-NEXT: vshrt.u8 q0, q1, #4 184; CHECK-NEXT: bx lr 185entry: 186 %0 = zext i16 %p to i32 187 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 188 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive) 189 ret <16 x i8> %2 190} 191 192define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 193; CHECK-LABEL: test_vshrq_m_n_u16: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: vmsr p0, r0 196; CHECK-NEXT: vpst 197; CHECK-NEXT: vshrt.u16 q0, q1, #14 198; CHECK-NEXT: bx lr 199entry: 200 %0 = zext i16 %p to i32 201 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 202 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive) 203 ret <8 x i16> %2 204} 205 206define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 207; CHECK-LABEL: test_vshrq_m_n_u32: 208; CHECK: @ %bb.0: @ %entry 209; CHECK-NEXT: vmsr p0, r0 210; CHECK-NEXT: vpst 211; CHECK-NEXT: vshrt.u32 q0, q1, #21 212; CHECK-NEXT: bx lr 213entry: 214 %0 = zext i16 %p to i32 215 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 216 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive) 217 ret <4 x i32> %2 218} 219 220define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { 221; CHECK-LABEL: test_vshlq_x_n_s8: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vmsr p0, r0 224; CHECK-NEXT: vpst 225; CHECK-NEXT: vshlt.i8 q0, q0, #1 226; CHECK-NEXT: bx lr 227entry: 228 %0 = zext i16 %p to i32 229 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 230 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef) 231 ret <16 x i8> %2 232} 233 234define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { 235; CHECK-LABEL: test_vshlq_x_n_s16: 236; CHECK: @ %bb.0: @ %entry 237; CHECK-NEXT: vmsr p0, r0 238; CHECK-NEXT: vpst 239; CHECK-NEXT: vshlt.i16 q0, q0, #15 240; CHECK-NEXT: bx lr 241entry: 242 %0 = zext i16 %p to i32 243 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 244 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef) 245 ret <8 x i16> %2 246} 247 248define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { 249; CHECK-LABEL: test_vshlq_x_n_s32: 250; CHECK: @ %bb.0: @ %entry 251; CHECK-NEXT: vmsr p0, r0 252; CHECK-NEXT: vpst 253; CHECK-NEXT: vshlt.i32 q0, q0, #13 254; CHECK-NEXT: bx lr 255entry: 256 %0 = zext i16 %p to i32 257 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 258 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef) 259 ret <4 x i32> %2 260} 261 262define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { 263; CHECK-LABEL: test_vshlq_x_n_u8: 264; CHECK: @ %bb.0: @ %entry 265; CHECK-NEXT: vmsr p0, r0 266; CHECK-NEXT: vpst 267; CHECK-NEXT: vshlt.i8 q0, q0, #4 268; CHECK-NEXT: bx lr 269entry: 270 %0 = zext i16 %p to i32 271 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 272 %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef) 273 ret <16 x i8> %2 274} 275 276define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { 277; CHECK-LABEL: test_vshlq_x_n_u16: 278; CHECK: @ %bb.0: @ %entry 279; CHECK-NEXT: vmsr p0, r0 280; CHECK-NEXT: vpst 281; CHECK-NEXT: vshlt.i16 q0, q0, #10 282; CHECK-NEXT: bx lr 283entry: 284 %0 = zext i16 %p to i32 285 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 286 %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef) 287 ret <8 x i16> %2 288} 289 290define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { 291; CHECK-LABEL: test_vshlq_x_n_u32: 292; CHECK: @ %bb.0: @ %entry 293; CHECK-NEXT: vmsr p0, r0 294; CHECK-NEXT: vpst 295; CHECK-NEXT: vshlt.i32 q0, q0, #30 296; CHECK-NEXT: bx lr 297entry: 298 %0 = zext i16 %p to i32 299 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 300 %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef) 301 ret <4 x i32> %2 302} 303 304define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { 305; CHECK-LABEL: test_vshrq_x_n_s8: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vmsr p0, r0 308; CHECK-NEXT: vpst 309; CHECK-NEXT: vshrt.s8 q0, q0, #4 310; CHECK-NEXT: bx lr 311entry: 312 %0 = zext i16 %p to i32 313 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 314 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef) 315 ret <16 x i8> %2 316} 317 318define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { 319; CHECK-LABEL: test_vshrq_x_n_s16: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: vmsr p0, r0 322; CHECK-NEXT: vpst 323; CHECK-NEXT: vshrt.s16 q0, q0, #10 324; CHECK-NEXT: bx lr 325entry: 326 %0 = zext i16 %p to i32 327 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 328 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef) 329 ret <8 x i16> %2 330} 331 332define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { 333; CHECK-LABEL: test_vshrq_x_n_s32: 334; CHECK: @ %bb.0: @ %entry 335; CHECK-NEXT: vmsr p0, r0 336; CHECK-NEXT: vpst 337; CHECK-NEXT: vshrt.s32 q0, q0, #7 338; CHECK-NEXT: bx lr 339entry: 340 %0 = zext i16 %p to i32 341 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 342 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef) 343 ret <4 x i32> %2 344} 345 346define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { 347; CHECK-LABEL: test_vshrq_x_n_u8: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vmsr p0, r0 350; CHECK-NEXT: vpst 351; CHECK-NEXT: vshrt.u8 q0, q0, #7 352; CHECK-NEXT: bx lr 353entry: 354 %0 = zext i16 %p to i32 355 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 356 %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef) 357 ret <16 x i8> %2 358} 359 360define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { 361; CHECK-LABEL: test_vshrq_x_n_u16: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: vmsr p0, r0 364; CHECK-NEXT: vpst 365; CHECK-NEXT: vshrt.u16 q0, q0, #7 366; CHECK-NEXT: bx lr 367entry: 368 %0 = zext i16 %p to i32 369 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 370 %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef) 371 ret <8 x i16> %2 372} 373 374define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { 375; CHECK-LABEL: test_vshrq_x_n_u32: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vmsr p0, r0 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vshrt.u32 q0, q0, #6 380; CHECK-NEXT: bx lr 381entry: 382 %0 = zext i16 %p to i32 383 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 384 %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef) 385 ret <4 x i32> %2 386} 387 388define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { 389; CHECK-LABEL: test_vqshlq_n_s8: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vqshl.s8 q0, q0, #3 392; CHECK-NEXT: bx lr 393entry: 394 %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 3, i32 0) 395 ret <16 x i8> %0 396} 397 398define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { 399; CHECK-LABEL: test_vqshlq_n_s16: 400; CHECK: @ %bb.0: @ %entry 401; CHECK-NEXT: vqshl.s16 q0, q0, #4 402; CHECK-NEXT: bx lr 403entry: 404 %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 4, i32 0) 405 ret <8 x i16> %0 406} 407 408define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { 409; CHECK-LABEL: test_vqshlq_n_s32: 410; CHECK: @ %bb.0: @ %entry 411; CHECK-NEXT: vqshl.s32 q0, q0, #4 412; CHECK-NEXT: bx lr 413entry: 414 %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 4, i32 0) 415 ret <4 x i32> %0 416} 417 418define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { 419; CHECK-LABEL: test_vqshlq_n_u8: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vqshl.u8 q0, q0, #0 422; CHECK-NEXT: bx lr 423entry: 424 %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 0, i32 1) 425 ret <16 x i8> %0 426} 427 428define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { 429; CHECK-LABEL: test_vqshlq_n_u16: 430; CHECK: @ %bb.0: @ %entry 431; CHECK-NEXT: vqshl.u16 q0, q0, #13 432; CHECK-NEXT: bx lr 433entry: 434 %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 13, i32 1) 435 ret <8 x i16> %0 436} 437 438define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { 439; CHECK-LABEL: test_vqshlq_n_u32: 440; CHECK: @ %bb.0: @ %entry 441; CHECK-NEXT: vqshl.u32 q0, q0, #6 442; CHECK-NEXT: bx lr 443entry: 444 %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 6, i32 1) 445 ret <4 x i32> %0 446} 447 448define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) { 449; CHECK-LABEL: test_vqshluq_n_s8: 450; CHECK: @ %bb.0: @ %entry 451; CHECK-NEXT: vqshlu.s8 q0, q0, #5 452; CHECK-NEXT: bx lr 453entry: 454 %0 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> %a, i32 5) 455 ret <16 x i8> %0 456} 457 458define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) { 459; CHECK-LABEL: test_vqshluq_n_s16: 460; CHECK: @ %bb.0: @ %entry 461; CHECK-NEXT: vqshlu.s16 q0, q0, #5 462; CHECK-NEXT: bx lr 463entry: 464 %0 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> %a, i32 5) 465 ret <8 x i16> %0 466} 467 468define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) { 469; CHECK-LABEL: test_vqshluq_n_s32: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vqshlu.s32 q0, q0, #4 472; CHECK-NEXT: bx lr 473entry: 474 %0 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> %a, i32 4) 475 ret <4 x i32> %0 476} 477 478define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) { 479; CHECK-LABEL: test_vrshrq_n_s8: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: vrshr.s8 q0, q0, #4 482; CHECK-NEXT: bx lr 483entry: 484 %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 4, i32 0) 485 ret <16 x i8> %0 486} 487 488define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) { 489; CHECK-LABEL: test_vrshrq_n_s16: 490; CHECK: @ %bb.0: @ %entry 491; CHECK-NEXT: vrshr.s16 q0, q0, #12 492; CHECK-NEXT: bx lr 493entry: 494 %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 12, i32 0) 495 ret <8 x i16> %0 496} 497 498define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) { 499; CHECK-LABEL: test_vrshrq_n_s32: 500; CHECK: @ %bb.0: @ %entry 501; CHECK-NEXT: vrshr.s32 q0, q0, #30 502; CHECK-NEXT: bx lr 503entry: 504 %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 30, i32 0) 505 ret <4 x i32> %0 506} 507 508define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) { 509; CHECK-LABEL: test_vrshrq_n_u8: 510; CHECK: @ %bb.0: @ %entry 511; CHECK-NEXT: vrshr.u8 q0, q0, #1 512; CHECK-NEXT: bx lr 513entry: 514 %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 1, i32 1) 515 ret <16 x i8> %0 516} 517 518define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) { 519; CHECK-LABEL: test_vrshrq_n_u16: 520; CHECK: @ %bb.0: @ %entry 521; CHECK-NEXT: vrshr.u16 q0, q0, #15 522; CHECK-NEXT: bx lr 523entry: 524 %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 15, i32 1) 525 ret <8 x i16> %0 526} 527 528define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) { 529; CHECK-LABEL: test_vrshrq_n_u32: 530; CHECK: @ %bb.0: @ %entry 531; CHECK-NEXT: vrshr.u32 q0, q0, #20 532; CHECK-NEXT: bx lr 533entry: 534 %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 20, i32 1) 535 ret <4 x i32> %0 536} 537 538define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 539; CHECK-LABEL: test_vqshlq_m_n_s8: 540; CHECK: @ %bb.0: @ %entry 541; CHECK-NEXT: vmsr p0, r0 542; CHECK-NEXT: vpst 543; CHECK-NEXT: vqshlt.s8 q0, q1, #6 544; CHECK-NEXT: bx lr 545entry: 546 %0 = zext i16 %p to i32 547 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 548 %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, <16 x i1> %1, <16 x i8> %inactive) 549 ret <16 x i8> %2 550} 551 552define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 553; CHECK-LABEL: test_vqshlq_m_n_s16: 554; CHECK: @ %bb.0: @ %entry 555; CHECK-NEXT: vmsr p0, r0 556; CHECK-NEXT: vpst 557; CHECK-NEXT: vqshlt.s16 q0, q1, #13 558; CHECK-NEXT: bx lr 559entry: 560 %0 = zext i16 %p to i32 561 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 562 %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 0, <8 x i1> %1, <8 x i16> %inactive) 563 ret <8 x i16> %2 564} 565 566define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 567; CHECK-LABEL: test_vqshlq_m_n_s32: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vmsr p0, r0 570; CHECK-NEXT: vpst 571; CHECK-NEXT: vqshlt.s32 q0, q1, #14 572; CHECK-NEXT: bx lr 573entry: 574 %0 = zext i16 %p to i32 575 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 576 %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 14, i32 0, <4 x i1> %1, <4 x i32> %inactive) 577 ret <4 x i32> %2 578} 579 580define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 581; CHECK-LABEL: test_vqshlq_m_n_u8: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vmsr p0, r0 584; CHECK-NEXT: vpst 585; CHECK-NEXT: vqshlt.u8 q0, q1, #4 586; CHECK-NEXT: bx lr 587entry: 588 %0 = zext i16 %p to i32 589 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 590 %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive) 591 ret <16 x i8> %2 592} 593 594define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 595; CHECK-LABEL: test_vqshlq_m_n_u16: 596; CHECK: @ %bb.0: @ %entry 597; CHECK-NEXT: vmsr p0, r0 598; CHECK-NEXT: vpst 599; CHECK-NEXT: vqshlt.u16 q0, q1, #9 600; CHECK-NEXT: bx lr 601entry: 602 %0 = zext i16 %p to i32 603 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 604 %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, <8 x i1> %1, <8 x i16> %inactive) 605 ret <8 x i16> %2 606} 607 608define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 609; CHECK-LABEL: test_vqshlq_m_n_u32: 610; CHECK: @ %bb.0: @ %entry 611; CHECK-NEXT: vmsr p0, r0 612; CHECK-NEXT: vpst 613; CHECK-NEXT: vqshlt.u32 q0, q1, #25 614; CHECK-NEXT: bx lr 615entry: 616 %0 = zext i16 %p to i32 617 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 618 %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25, i32 1, <4 x i1> %1, <4 x i32> %inactive) 619 ret <4 x i32> %2 620} 621 622define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 623; CHECK-LABEL: test_vqshluq_m_n_s8: 624; CHECK: @ %bb.0: @ %entry 625; CHECK-NEXT: vmsr p0, r0 626; CHECK-NEXT: vpst 627; CHECK-NEXT: vqshlut.s8 q0, q1, #2 628; CHECK-NEXT: bx lr 629entry: 630 %0 = zext i16 %p to i32 631 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 632 %2 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, <16 x i1> %1, <16 x i8> %inactive) 633 ret <16 x i8> %2 634} 635 636define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 637; CHECK-LABEL: test_vqshluq_m_n_s16: 638; CHECK: @ %bb.0: @ %entry 639; CHECK-NEXT: vmsr p0, r0 640; CHECK-NEXT: vpst 641; CHECK-NEXT: vqshlut.s16 q0, q1, #12 642; CHECK-NEXT: bx lr 643entry: 644 %0 = zext i16 %p to i32 645 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 646 %2 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, <8 x i1> %1, <8 x i16> %inactive) 647 ret <8 x i16> %2 648} 649 650define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 651; CHECK-LABEL: test_vqshluq_m_n_s32: 652; CHECK: @ %bb.0: @ %entry 653; CHECK-NEXT: vmsr p0, r0 654; CHECK-NEXT: vpst 655; CHECK-NEXT: vqshlut.s32 q0, q1, #24 656; CHECK-NEXT: bx lr 657entry: 658 %0 = zext i16 %p to i32 659 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 660 %2 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, <4 x i1> %1, <4 x i32> %inactive) 661 ret <4 x i32> %2 662} 663 664define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 665; CHECK-LABEL: test_vrshrq_m_n_s8: 666; CHECK: @ %bb.0: @ %entry 667; CHECK-NEXT: vmsr p0, r0 668; CHECK-NEXT: vpst 669; CHECK-NEXT: vrshrt.s8 q0, q1, #2 670; CHECK-NEXT: bx lr 671entry: 672 %0 = zext i16 %p to i32 673 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 674 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive) 675 ret <16 x i8> %2 676} 677 678define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 679; CHECK-LABEL: test_vrshrq_m_n_s16: 680; CHECK: @ %bb.0: @ %entry 681; CHECK-NEXT: vmsr p0, r0 682; CHECK-NEXT: vpst 683; CHECK-NEXT: vrshrt.s16 q0, q1, #11 684; CHECK-NEXT: bx lr 685entry: 686 %0 = zext i16 %p to i32 687 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 688 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 11, i32 0, <8 x i1> %1, <8 x i16> %inactive) 689 ret <8 x i16> %2 690} 691 692define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 693; CHECK-LABEL: test_vrshrq_m_n_s32: 694; CHECK: @ %bb.0: @ %entry 695; CHECK-NEXT: vmsr p0, r0 696; CHECK-NEXT: vpst 697; CHECK-NEXT: vrshrt.s32 q0, q1, #24 698; CHECK-NEXT: bx lr 699entry: 700 %0 = zext i16 %p to i32 701 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 702 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, i32 0, <4 x i1> %1, <4 x i32> %inactive) 703 ret <4 x i32> %2 704} 705 706define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { 707; CHECK-LABEL: test_vrshrq_m_n_u8: 708; CHECK: @ %bb.0: @ %entry 709; CHECK-NEXT: vmsr p0, r0 710; CHECK-NEXT: vpst 711; CHECK-NEXT: vrshrt.u8 q0, q1, #7 712; CHECK-NEXT: bx lr 713entry: 714 %0 = zext i16 %p to i32 715 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 716 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> %inactive) 717 ret <16 x i8> %2 718} 719 720define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { 721; CHECK-LABEL: test_vrshrq_m_n_u16: 722; CHECK: @ %bb.0: @ %entry 723; CHECK-NEXT: vmsr p0, r0 724; CHECK-NEXT: vpst 725; CHECK-NEXT: vrshrt.u16 q0, q1, #4 726; CHECK-NEXT: bx lr 727entry: 728 %0 = zext i16 %p to i32 729 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 730 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 4, i32 1, <8 x i1> %1, <8 x i16> %inactive) 731 ret <8 x i16> %2 732} 733 734define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { 735; CHECK-LABEL: test_vrshrq_m_n_u32: 736; CHECK: @ %bb.0: @ %entry 737; CHECK-NEXT: vmsr p0, r0 738; CHECK-NEXT: vpst 739; CHECK-NEXT: vrshrt.u32 q0, q1, #27 740; CHECK-NEXT: bx lr 741entry: 742 %0 = zext i16 %p to i32 743 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 744 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 27, i32 1, <4 x i1> %1, <4 x i32> %inactive) 745 ret <4 x i32> %2 746} 747 748define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { 749; CHECK-LABEL: test_vrshrq_x_n_s8: 750; CHECK: @ %bb.0: @ %entry 751; CHECK-NEXT: vmsr p0, r0 752; CHECK-NEXT: vpst 753; CHECK-NEXT: vrshrt.s8 q0, q0, #3 754; CHECK-NEXT: bx lr 755entry: 756 %0 = zext i16 %p to i32 757 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 758 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 3, i32 0, <16 x i1> %1, <16 x i8> undef) 759 ret <16 x i8> %2 760} 761 762define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { 763; CHECK-LABEL: test_vrshrq_x_n_s16: 764; CHECK: @ %bb.0: @ %entry 765; CHECK-NEXT: vmsr p0, r0 766; CHECK-NEXT: vpst 767; CHECK-NEXT: vrshrt.s16 q0, q0, #12 768; CHECK-NEXT: bx lr 769entry: 770 %0 = zext i16 %p to i32 771 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 772 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, <8 x i1> %1, <8 x i16> undef) 773 ret <8 x i16> %2 774} 775 776define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { 777; CHECK-LABEL: test_vrshrq_x_n_s32: 778; CHECK: @ %bb.0: @ %entry 779; CHECK-NEXT: vmsr p0, r0 780; CHECK-NEXT: vpst 781; CHECK-NEXT: vrshrt.s32 q0, q0, #20 782; CHECK-NEXT: bx lr 783entry: 784 %0 = zext i16 %p to i32 785 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 786 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 20, i32 0, <4 x i1> %1, <4 x i32> undef) 787 ret <4 x i32> %2 788} 789 790define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { 791; CHECK-LABEL: test_vrshrq_x_n_u8: 792; CHECK: @ %bb.0: @ %entry 793; CHECK-NEXT: vmsr p0, r0 794; CHECK-NEXT: vpst 795; CHECK-NEXT: vrshrt.u8 q0, q0, #1 796; CHECK-NEXT: bx lr 797entry: 798 %0 = zext i16 %p to i32 799 %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 800 %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef) 801 ret <16 x i8> %2 802} 803 804define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { 805; CHECK-LABEL: test_vrshrq_x_n_u16: 806; CHECK: @ %bb.0: @ %entry 807; CHECK-NEXT: vmsr p0, r0 808; CHECK-NEXT: vpst 809; CHECK-NEXT: vrshrt.u16 q0, q0, #13 810; CHECK-NEXT: bx lr 811entry: 812 %0 = zext i16 %p to i32 813 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 814 %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 1, <8 x i1> %1, <8 x i16> undef) 815 ret <8 x i16> %2 816} 817 818define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { 819; CHECK-LABEL: test_vrshrq_x_n_u32: 820; CHECK: @ %bb.0: @ %entry 821; CHECK-NEXT: vmsr p0, r0 822; CHECK-NEXT: vpst 823; CHECK-NEXT: vrshrt.u32 q0, q0, #6 824; CHECK-NEXT: bx lr 825entry: 826 %0 = zext i16 %p to i32 827 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 828 %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef) 829 ret <4 x i32> %2 830} 831 832define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8(<16 x i8> %a) { 833; CHECK-LABEL: test_vshllbq_n_s8: 834; CHECK: @ %bb.0: @ %entry 835; CHECK-NEXT: vshllb.s8 q0, q0, #2 836; CHECK-NEXT: bx lr 837entry: 838 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 2, i32 0, i32 0) 839 ret <8 x i16> %0 840} 841 842define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8_lanewidth(<16 x i8> %a) { 843; CHECK-LABEL: test_vshllbq_n_s8_lanewidth: 844; CHECK: @ %bb.0: @ %entry 845; CHECK-NEXT: vshllb.s8 q0, q0, #8 846; CHECK-NEXT: bx lr 847entry: 848 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 0) 849 ret <8 x i16> %0 850} 851 852define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16(<8 x i16> %a) { 853; CHECK-LABEL: test_vshllbq_n_s16: 854; CHECK: @ %bb.0: @ %entry 855; CHECK-NEXT: vshllb.s16 q0, q0, #13 856; CHECK-NEXT: bx lr 857entry: 858 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 13, i32 0, i32 0) 859 ret <4 x i32> %0 860} 861 862define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16_lanewidth(<8 x i16> %a) { 863; CHECK-LABEL: test_vshllbq_n_s16_lanewidth: 864; CHECK: @ %bb.0: @ %entry 865; CHECK-NEXT: vshllb.s16 q0, q0, #16 866; CHECK-NEXT: bx lr 867entry: 868 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 0) 869 ret <4 x i32> %0 870} 871 872define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8(<16 x i8> %a) { 873; CHECK-LABEL: test_vshllbq_n_u8: 874; CHECK: @ %bb.0: @ %entry 875; CHECK-NEXT: vshllb.u8 q0, q0, #5 876; CHECK-NEXT: bx lr 877entry: 878 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 5, i32 1, i32 0) 879 ret <8 x i16> %0 880} 881 882define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8_lanewidth(<16 x i8> %a) { 883; CHECK-LABEL: test_vshllbq_n_u8_lanewidth: 884; CHECK: @ %bb.0: @ %entry 885; CHECK-NEXT: vshllb.u8 q0, q0, #8 886; CHECK-NEXT: bx lr 887entry: 888 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 0) 889 ret <8 x i16> %0 890} 891 892define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16(<8 x i16> %a) { 893; CHECK-LABEL: test_vshllbq_n_u16: 894; CHECK: @ %bb.0: @ %entry 895; CHECK-NEXT: vshllb.u16 q0, q0, #6 896; CHECK-NEXT: bx lr 897entry: 898 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 6, i32 1, i32 0) 899 ret <4 x i32> %0 900} 901 902define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16_lanewidth(<8 x i16> %a) { 903; CHECK-LABEL: test_vshllbq_n_u16_lanewidth: 904; CHECK: @ %bb.0: @ %entry 905; CHECK-NEXT: vshllb.u16 q0, q0, #16 906; CHECK-NEXT: bx lr 907entry: 908 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 0) 909 ret <4 x i32> %0 910} 911 912define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8(<16 x i8> %a) { 913; CHECK-LABEL: test_vshlltq_n_s8: 914; CHECK: @ %bb.0: @ %entry 915; CHECK-NEXT: vshllt.s8 q0, q0, #7 916; CHECK-NEXT: bx lr 917entry: 918 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 0, i32 1) 919 ret <8 x i16> %0 920} 921 922define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8_lanewidth(<16 x i8> %a) { 923; CHECK-LABEL: test_vshlltq_n_s8_lanewidth: 924; CHECK: @ %bb.0: @ %entry 925; CHECK-NEXT: vshllt.s8 q0, q0, #8 926; CHECK-NEXT: bx lr 927entry: 928 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 1) 929 ret <8 x i16> %0 930} 931 932define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16(<8 x i16> %a) { 933; CHECK-LABEL: test_vshlltq_n_s16: 934; CHECK: @ %bb.0: @ %entry 935; CHECK-NEXT: vshllt.s16 q0, q0, #2 936; CHECK-NEXT: bx lr 937entry: 938 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 2, i32 0, i32 1) 939 ret <4 x i32> %0 940} 941 942define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16_lanewidth(<8 x i16> %a) { 943; CHECK-LABEL: test_vshlltq_n_s16_lanewidth: 944; CHECK: @ %bb.0: @ %entry 945; CHECK-NEXT: vshllt.s16 q0, q0, #16 946; CHECK-NEXT: bx lr 947entry: 948 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 1) 949 ret <4 x i32> %0 950} 951 952define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8(<16 x i8> %a) { 953; CHECK-LABEL: test_vshlltq_n_u8: 954; CHECK: @ %bb.0: @ %entry 955; CHECK-NEXT: vshllt.u8 q0, q0, #7 956; CHECK-NEXT: bx lr 957entry: 958 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 1, i32 1) 959 ret <8 x i16> %0 960} 961 962define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8_lanewidth(<16 x i8> %a) { 963; CHECK-LABEL: test_vshlltq_n_u8_lanewidth: 964; CHECK: @ %bb.0: @ %entry 965; CHECK-NEXT: vshllt.u8 q0, q0, #8 966; CHECK-NEXT: bx lr 967entry: 968 %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 1) 969 ret <8 x i16> %0 970} 971 972define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16(<8 x i16> %a) { 973; CHECK-LABEL: test_vshlltq_n_u16: 974; CHECK: @ %bb.0: @ %entry 975; CHECK-NEXT: vshllt.u16 q0, q0, #14 976; CHECK-NEXT: bx lr 977entry: 978 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 14, i32 1, i32 1) 979 ret <4 x i32> %0 980} 981 982define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16_lanewidth(<8 x i16> %a) { 983; CHECK-LABEL: test_vshlltq_n_u16_lanewidth: 984; CHECK: @ %bb.0: @ %entry 985; CHECK-NEXT: vshllt.u16 q0, q0, #16 986; CHECK-NEXT: bx lr 987entry: 988 %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 1) 989 ret <4 x i32> %0 990} 991 992define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 993; CHECK-LABEL: test_vshllbq_m_n_s8: 994; CHECK: @ %bb.0: @ %entry 995; CHECK-NEXT: vmsr p0, r0 996; CHECK-NEXT: vpst 997; CHECK-NEXT: vshllbt.s8 q0, q1, #6 998; CHECK-NEXT: bx lr 999entry: 1000 %0 = zext i16 %p to i32 1001 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1002 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) 1003 ret <8 x i16> %2 1004} 1005 1006define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1007; CHECK-LABEL: test_vshllbq_m_n_s8_lanewidth: 1008; CHECK: @ %bb.0: @ %entry 1009; CHECK-NEXT: vmsr p0, r0 1010; CHECK-NEXT: vpst 1011; CHECK-NEXT: vshllbt.s8 q0, q1, #8 1012; CHECK-NEXT: bx lr 1013entry: 1014 %0 = zext i16 %p to i32 1015 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1016 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) 1017 ret <8 x i16> %2 1018} 1019 1020define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1021; CHECK-LABEL: test_vshllbq_m_n_s16: 1022; CHECK: @ %bb.0: @ %entry 1023; CHECK-NEXT: vmsr p0, r0 1024; CHECK-NEXT: vpst 1025; CHECK-NEXT: vshllbt.s16 q0, q1, #10 1026; CHECK-NEXT: bx lr 1027entry: 1028 %0 = zext i16 %p to i32 1029 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1030 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) 1031 ret <4 x i32> %2 1032} 1033 1034define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1035; CHECK-LABEL: test_vshllbq_m_n_s16_lanewidth: 1036; CHECK: @ %bb.0: @ %entry 1037; CHECK-NEXT: vmsr p0, r0 1038; CHECK-NEXT: vpst 1039; CHECK-NEXT: vshllbt.s16 q0, q1, #16 1040; CHECK-NEXT: bx lr 1041entry: 1042 %0 = zext i16 %p to i32 1043 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1044 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) 1045 ret <4 x i32> %2 1046} 1047 1048define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1049; CHECK-LABEL: test_vshllbq_m_n_u8: 1050; CHECK: @ %bb.0: @ %entry 1051; CHECK-NEXT: vmsr p0, r0 1052; CHECK-NEXT: vpst 1053; CHECK-NEXT: vshllbt.u8 q0, q1, #3 1054; CHECK-NEXT: bx lr 1055entry: 1056 %0 = zext i16 %p to i32 1057 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1058 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 3, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) 1059 ret <8 x i16> %2 1060} 1061 1062define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1063; CHECK-LABEL: test_vshllbq_m_n_u8_lanewidth: 1064; CHECK: @ %bb.0: @ %entry 1065; CHECK-NEXT: vmsr p0, r0 1066; CHECK-NEXT: vpst 1067; CHECK-NEXT: vshllbt.u8 q0, q1, #8 1068; CHECK-NEXT: bx lr 1069entry: 1070 %0 = zext i16 %p to i32 1071 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1072 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) 1073 ret <8 x i16> %2 1074} 1075 1076define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1077; CHECK-LABEL: test_vshllbq_m_n_u16: 1078; CHECK: @ %bb.0: @ %entry 1079; CHECK-NEXT: vmsr p0, r0 1080; CHECK-NEXT: vpst 1081; CHECK-NEXT: vshllbt.u16 q0, q1, #14 1082; CHECK-NEXT: bx lr 1083entry: 1084 %0 = zext i16 %p to i32 1085 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1086 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 14, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) 1087 ret <4 x i32> %2 1088} 1089 1090define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1091; CHECK-LABEL: test_vshllbq_m_n_u16_lanewidth: 1092; CHECK: @ %bb.0: @ %entry 1093; CHECK-NEXT: vmsr p0, r0 1094; CHECK-NEXT: vpst 1095; CHECK-NEXT: vshllbt.u16 q0, q1, #16 1096; CHECK-NEXT: bx lr 1097entry: 1098 %0 = zext i16 %p to i32 1099 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1100 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) 1101 ret <4 x i32> %2 1102} 1103 1104define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1105; CHECK-LABEL: test_vshlltq_m_n_s8: 1106; CHECK: @ %bb.0: @ %entry 1107; CHECK-NEXT: vmsr p0, r0 1108; CHECK-NEXT: vpst 1109; CHECK-NEXT: vshlltt.s8 q0, q1, #4 1110; CHECK-NEXT: bx lr 1111entry: 1112 %0 = zext i16 %p to i32 1113 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1114 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 4, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) 1115 ret <8 x i16> %2 1116} 1117 1118define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1119; CHECK-LABEL: test_vshlltq_m_n_s8_lanewidth: 1120; CHECK: @ %bb.0: @ %entry 1121; CHECK-NEXT: vmsr p0, r0 1122; CHECK-NEXT: vpst 1123; CHECK-NEXT: vshlltt.s8 q0, q1, #8 1124; CHECK-NEXT: bx lr 1125entry: 1126 %0 = zext i16 %p to i32 1127 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1128 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) 1129 ret <8 x i16> %2 1130} 1131 1132define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1133; CHECK-LABEL: test_vshlltq_m_n_s16: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: vmsr p0, r0 1136; CHECK-NEXT: vpst 1137; CHECK-NEXT: vshlltt.s16 q0, q1, #12 1138; CHECK-NEXT: bx lr 1139entry: 1140 %0 = zext i16 %p to i32 1141 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1142 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 12, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) 1143 ret <4 x i32> %2 1144} 1145 1146define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1147; CHECK-LABEL: test_vshlltq_m_n_s16_lanewidth: 1148; CHECK: @ %bb.0: @ %entry 1149; CHECK-NEXT: vmsr p0, r0 1150; CHECK-NEXT: vpst 1151; CHECK-NEXT: vshlltt.s16 q0, q1, #16 1152; CHECK-NEXT: bx lr 1153entry: 1154 %0 = zext i16 %p to i32 1155 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1156 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) 1157 ret <4 x i32> %2 1158} 1159 1160define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1161; CHECK-LABEL: test_vshlltq_m_n_u8: 1162; CHECK: @ %bb.0: @ %entry 1163; CHECK-NEXT: vmsr p0, r0 1164; CHECK-NEXT: vpst 1165; CHECK-NEXT: vshlltt.u8 q0, q1, #2 1166; CHECK-NEXT: bx lr 1167entry: 1168 %0 = zext i16 %p to i32 1169 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1170 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) 1171 ret <8 x i16> %2 1172} 1173 1174define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { 1175; CHECK-LABEL: test_vshlltq_m_n_u8_lanewidth: 1176; CHECK: @ %bb.0: @ %entry 1177; CHECK-NEXT: vmsr p0, r0 1178; CHECK-NEXT: vpst 1179; CHECK-NEXT: vshlltt.u8 q0, q1, #8 1180; CHECK-NEXT: bx lr 1181entry: 1182 %0 = zext i16 %p to i32 1183 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1184 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) 1185 ret <8 x i16> %2 1186} 1187 1188define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1189; CHECK-LABEL: test_vshlltq_m_n_u16: 1190; CHECK: @ %bb.0: @ %entry 1191; CHECK-NEXT: vmsr p0, r0 1192; CHECK-NEXT: vpst 1193; CHECK-NEXT: vshlltt.u16 q0, q1, #9 1194; CHECK-NEXT: bx lr 1195entry: 1196 %0 = zext i16 %p to i32 1197 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1198 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 9, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) 1199 ret <4 x i32> %2 1200} 1201 1202define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { 1203; CHECK-LABEL: test_vshlltq_m_n_u16_lanewidth: 1204; CHECK: @ %bb.0: @ %entry 1205; CHECK-NEXT: vmsr p0, r0 1206; CHECK-NEXT: vpst 1207; CHECK-NEXT: vshlltt.u16 q0, q1, #16 1208; CHECK-NEXT: bx lr 1209entry: 1210 %0 = zext i16 %p to i32 1211 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1212 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) 1213 ret <4 x i32> %2 1214} 1215 1216define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { 1217; CHECK-LABEL: test_vshllbq_x_n_s8: 1218; CHECK: @ %bb.0: @ %entry 1219; CHECK-NEXT: vmsr p0, r0 1220; CHECK-NEXT: vpst 1221; CHECK-NEXT: vshllbt.s8 q0, q0, #1 1222; CHECK-NEXT: bx lr 1223entry: 1224 %0 = zext i16 %p to i32 1225 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1226 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef) 1227 ret <8 x i16> %2 1228} 1229 1230define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) { 1231; CHECK-LABEL: test_vshllbq_x_n_s8_lanewidth: 1232; CHECK: @ %bb.0: @ %entry 1233; CHECK-NEXT: vmsr p0, r0 1234; CHECK-NEXT: vpst 1235; CHECK-NEXT: vshllbt.s8 q0, q0, #8 1236; CHECK-NEXT: bx lr 1237entry: 1238 %0 = zext i16 %p to i32 1239 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1240 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 0, <8 x i1> %1, <8 x i16> undef) 1241 ret <8 x i16> %2 1242} 1243 1244define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { 1245; CHECK-LABEL: test_vshllbq_x_n_s16: 1246; CHECK: @ %bb.0: @ %entry 1247; CHECK-NEXT: vmsr p0, r0 1248; CHECK-NEXT: vpst 1249; CHECK-NEXT: vshllbt.s16 q0, q0, #10 1250; CHECK-NEXT: bx lr 1251entry: 1252 %0 = zext i16 %p to i32 1253 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1254 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef) 1255 ret <4 x i32> %2 1256} 1257 1258define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) { 1259; CHECK-LABEL: test_vshllbq_x_n_s16_lanewidth: 1260; CHECK: @ %bb.0: @ %entry 1261; CHECK-NEXT: vmsr p0, r0 1262; CHECK-NEXT: vpst 1263; CHECK-NEXT: vshllbt.s16 q0, q0, #16 1264; CHECK-NEXT: bx lr 1265entry: 1266 %0 = zext i16 %p to i32 1267 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1268 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 0, <4 x i1> %1, <4 x i32> undef) 1269 ret <4 x i32> %2 1270} 1271 1272define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { 1273; CHECK-LABEL: test_vshllbq_x_n_u8: 1274; CHECK: @ %bb.0: @ %entry 1275; CHECK-NEXT: vmsr p0, r0 1276; CHECK-NEXT: vpst 1277; CHECK-NEXT: vshllbt.u8 q0, q0, #6 1278; CHECK-NEXT: bx lr 1279entry: 1280 %0 = zext i16 %p to i32 1281 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1282 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 6, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef) 1283 ret <8 x i16> %2 1284} 1285 1286define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) { 1287; CHECK-LABEL: test_vshllbq_x_n_u8_lanewidth: 1288; CHECK: @ %bb.0: @ %entry 1289; CHECK-NEXT: vmsr p0, r0 1290; CHECK-NEXT: vpst 1291; CHECK-NEXT: vshllbt.u8 q0, q0, #8 1292; CHECK-NEXT: bx lr 1293entry: 1294 %0 = zext i16 %p to i32 1295 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1296 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 0, <8 x i1> %1, <8 x i16> undef) 1297 ret <8 x i16> %2 1298} 1299 1300define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { 1301; CHECK-LABEL: test_vshllbq_x_n_u16: 1302; CHECK: @ %bb.0: @ %entry 1303; CHECK-NEXT: vmsr p0, r0 1304; CHECK-NEXT: vpst 1305; CHECK-NEXT: vshllbt.u16 q0, q0, #10 1306; CHECK-NEXT: bx lr 1307entry: 1308 %0 = zext i16 %p to i32 1309 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1310 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 10, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef) 1311 ret <4 x i32> %2 1312} 1313 1314define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) { 1315; CHECK-LABEL: test_vshllbq_x_n_u16_lanewidth: 1316; CHECK: @ %bb.0: @ %entry 1317; CHECK-NEXT: vmsr p0, r0 1318; CHECK-NEXT: vpst 1319; CHECK-NEXT: vshllbt.u16 q0, q0, #16 1320; CHECK-NEXT: bx lr 1321entry: 1322 %0 = zext i16 %p to i32 1323 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1324 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 0, <4 x i1> %1, <4 x i32> undef) 1325 ret <4 x i32> %2 1326} 1327 1328define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { 1329; CHECK-LABEL: test_vshlltq_x_n_s8: 1330; CHECK: @ %bb.0: @ %entry 1331; CHECK-NEXT: vmsr p0, r0 1332; CHECK-NEXT: vpst 1333; CHECK-NEXT: vshlltt.s8 q0, q0, #2 1334; CHECK-NEXT: bx lr 1335entry: 1336 %0 = zext i16 %p to i32 1337 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1338 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 2, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef) 1339 ret <8 x i16> %2 1340} 1341 1342define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) { 1343; CHECK-LABEL: test_vshlltq_x_n_s8_lanewidth: 1344; CHECK: @ %bb.0: @ %entry 1345; CHECK-NEXT: vmsr p0, r0 1346; CHECK-NEXT: vpst 1347; CHECK-NEXT: vshlltt.s8 q0, q0, #8 1348; CHECK-NEXT: bx lr 1349entry: 1350 %0 = zext i16 %p to i32 1351 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1352 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 0, i32 1, <8 x i1> %1, <8 x i16> undef) 1353 ret <8 x i16> %2 1354} 1355 1356define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { 1357; CHECK-LABEL: test_vshlltq_x_n_s16: 1358; CHECK: @ %bb.0: @ %entry 1359; CHECK-NEXT: vmsr p0, r0 1360; CHECK-NEXT: vpst 1361; CHECK-NEXT: vshlltt.s16 q0, q0, #6 1362; CHECK-NEXT: bx lr 1363entry: 1364 %0 = zext i16 %p to i32 1365 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1366 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 6, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef) 1367 ret <4 x i32> %2 1368} 1369 1370define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) { 1371; CHECK-LABEL: test_vshlltq_x_n_s16_lanewidth: 1372; CHECK: @ %bb.0: @ %entry 1373; CHECK-NEXT: vmsr p0, r0 1374; CHECK-NEXT: vpst 1375; CHECK-NEXT: vshlltt.s16 q0, q0, #16 1376; CHECK-NEXT: bx lr 1377entry: 1378 %0 = zext i16 %p to i32 1379 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1380 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 0, i32 1, <4 x i1> %1, <4 x i32> undef) 1381 ret <4 x i32> %2 1382} 1383 1384define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { 1385; CHECK-LABEL: test_vshlltq_x_n_u8: 1386; CHECK: @ %bb.0: @ %entry 1387; CHECK-NEXT: vmsr p0, r0 1388; CHECK-NEXT: vpst 1389; CHECK-NEXT: vshlltt.u8 q0, q0, #5 1390; CHECK-NEXT: bx lr 1391entry: 1392 %0 = zext i16 %p to i32 1393 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1394 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 5, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef) 1395 ret <8 x i16> %2 1396} 1397 1398define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) { 1399; CHECK-LABEL: test_vshlltq_x_n_u8_lanewidth: 1400; CHECK: @ %bb.0: @ %entry 1401; CHECK-NEXT: vmsr p0, r0 1402; CHECK-NEXT: vpst 1403; CHECK-NEXT: vshlltt.u8 q0, q0, #8 1404; CHECK-NEXT: bx lr 1405entry: 1406 %0 = zext i16 %p to i32 1407 %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1408 %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 8, i32 1, i32 1, <8 x i1> %1, <8 x i16> undef) 1409 ret <8 x i16> %2 1410} 1411 1412define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { 1413; CHECK-LABEL: test_vshlltq_x_n_u16: 1414; CHECK: @ %bb.0: @ %entry 1415; CHECK-NEXT: vmsr p0, r0 1416; CHECK-NEXT: vpst 1417; CHECK-NEXT: vshlltt.u16 q0, q0, #3 1418; CHECK-NEXT: bx lr 1419entry: 1420 %0 = zext i16 %p to i32 1421 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1422 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 3, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef) 1423 ret <4 x i32> %2 1424} 1425 1426define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) { 1427; CHECK-LABEL: test_vshlltq_x_n_u16_lanewidth: 1428; CHECK: @ %bb.0: @ %entry 1429; CHECK-NEXT: vmsr p0, r0 1430; CHECK-NEXT: vpst 1431; CHECK-NEXT: vshlltt.u16 q0, q0, #16 1432; CHECK-NEXT: bx lr 1433entry: 1434 %0 = zext i16 %p to i32 1435 %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1436 %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 16, i32 1, i32 1, <4 x i1> %1, <4 x i32> undef) 1437 ret <4 x i32> %2 1438} 1439 1440declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 1441declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 1442declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 1443 1444declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) 1445declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) 1446declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) 1447 1448declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) 1449declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) 1450declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) 1451 1452declare <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8>, i32, i32) 1453declare <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16>, i32, i32) 1454declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32) 1455declare <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) 1456declare <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) 1457declare <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) 1458 1459declare <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8>, i32) 1460declare <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16>, i32) 1461declare <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32>, i32) 1462declare <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) 1463declare <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) 1464declare <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) 1465 1466declare <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8>, i32, i32) 1467declare <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16>, i32, i32) 1468declare <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32>, i32, i32) 1469declare <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) 1470declare <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) 1471declare <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) 1472 1473declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32) 1474declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32) 1475declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, i32, <8 x i1>, <8 x i16>) 1476declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, i32, <4 x i1>, <4 x i32>) 1477