1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 5; CHECK-LABEL: test_vshrnbq_n_s16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vshrnb.i16 q0, q1, #3 8; CHECK-NEXT: bx lr 9entry: 10 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 0, i32 0, i32 0, i32 0, i32 0) 11 ret <16 x i8> %0 12} 13 14define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 15; CHECK-LABEL: test_vshrnbq_n_s32: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vshrnb.i32 q0, q1, #9 18; CHECK-NEXT: bx lr 19entry: 20 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0) 21 ret <8 x i16> %0 22} 23 24define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { 25; CHECK-LABEL: test_vshrnbq_n_u16: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vshrnb.i16 q0, q1, #1 28; CHECK-NEXT: bx lr 29entry: 30 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 0) 31 ret <16 x i8> %0 32} 33 34define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { 35; CHECK-LABEL: test_vshrnbq_n_u32: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vshrnb.i32 q0, q1, #3 38; CHECK-NEXT: bx lr 39entry: 40 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 0, i32 0, i32 1, i32 1, i32 0) 41 ret <8 x i16> %0 42} 43 44define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 45; CHECK-LABEL: test_vshrntq_n_s16: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vshrnt.i16 q0, q1, #1 48; CHECK-NEXT: bx lr 49entry: 50 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1) 51 ret <16 x i8> %0 52} 53 54define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 55; CHECK-LABEL: test_vshrntq_n_s32: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vshrnt.i32 q0, q1, #10 58; CHECK-NEXT: bx lr 59entry: 60 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 0, i32 0, i32 1) 61 ret <8 x i16> %0 62} 63 64define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { 65; CHECK-LABEL: test_vshrntq_n_u16: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vshrnt.i16 q0, q1, #6 68; CHECK-NEXT: bx lr 69entry: 70 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 1, i32 1, i32 1) 71 ret <16 x i8> %0 72} 73 74define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { 75; CHECK-LABEL: test_vshrntq_n_u32: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vshrnt.i32 q0, q1, #10 78; CHECK-NEXT: bx lr 79entry: 80 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1) 81 ret <8 x i16> %0 82} 83 84define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 85; CHECK-LABEL: test_vshrnbq_m_n_s16: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: vmsr p0, r0 88; CHECK-NEXT: vpst 89; CHECK-NEXT: vshrnbt.i16 q0, q1, #4 90; CHECK-NEXT: bx lr 91entry: 92 %0 = zext i16 %p to i32 93 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 94 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1) 95 ret <16 x i8> %2 96} 97 98define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 99; CHECK-LABEL: test_vshrnbq_m_n_s32: 100; CHECK: @ %bb.0: @ %entry 101; CHECK-NEXT: vmsr p0, r0 102; CHECK-NEXT: vpst 103; CHECK-NEXT: vshrnbt.i32 q0, q1, #13 104; CHECK-NEXT: bx lr 105entry: 106 %0 = zext i16 %p to i32 107 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 108 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1) 109 ret <8 x i16> %2 110} 111 112define arm_aapcs_vfpcc <16 x i8> @test_vshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 113; CHECK-LABEL: test_vshrnbq_m_n_u16: 114; CHECK: @ %bb.0: @ %entry 115; CHECK-NEXT: vmsr p0, r0 116; CHECK-NEXT: vpst 117; CHECK-NEXT: vshrnbt.i16 q0, q1, #7 118; CHECK-NEXT: bx lr 119entry: 120 %0 = zext i16 %p to i32 121 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 122 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 0, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1) 123 ret <16 x i8> %2 124} 125 126define arm_aapcs_vfpcc <8 x i16> @test_vshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 127; CHECK-LABEL: test_vshrnbq_m_n_u32: 128; CHECK: @ %bb.0: @ %entry 129; CHECK-NEXT: vmsr p0, r0 130; CHECK-NEXT: vpst 131; CHECK-NEXT: vshrnbt.i32 q0, q1, #15 132; CHECK-NEXT: bx lr 133entry: 134 %0 = zext i16 %p to i32 135 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 136 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 0, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1) 137 ret <8 x i16> %2 138} 139 140define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 141; CHECK-LABEL: test_vshrntq_m_n_s16: 142; CHECK: @ %bb.0: @ %entry 143; CHECK-NEXT: vmsr p0, r0 144; CHECK-NEXT: vpst 145; CHECK-NEXT: vshrntt.i16 q0, q1, #6 146; CHECK-NEXT: bx lr 147entry: 148 %0 = zext i16 %p to i32 149 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 150 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1) 151 ret <16 x i8> %2 152} 153 154define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 155; CHECK-LABEL: test_vshrntq_m_n_s32: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vmsr p0, r0 158; CHECK-NEXT: vpst 159; CHECK-NEXT: vshrntt.i32 q0, q1, #13 160; CHECK-NEXT: bx lr 161entry: 162 %0 = zext i16 %p to i32 163 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 164 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 0, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1) 165 ret <8 x i16> %2 166} 167 168define arm_aapcs_vfpcc <16 x i8> @test_vshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 169; CHECK-LABEL: test_vshrntq_m_n_u16: 170; CHECK: @ %bb.0: @ %entry 171; CHECK-NEXT: vmsr p0, r0 172; CHECK-NEXT: vpst 173; CHECK-NEXT: vshrntt.i16 q0, q1, #1 174; CHECK-NEXT: bx lr 175entry: 176 %0 = zext i16 %p to i32 177 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 178 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1) 179 ret <16 x i8> %2 180} 181 182define arm_aapcs_vfpcc <8 x i16> @test_vshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 183; CHECK-LABEL: test_vshrntq_m_n_u32: 184; CHECK: @ %bb.0: @ %entry 185; CHECK-NEXT: vmsr p0, r0 186; CHECK-NEXT: vpst 187; CHECK-NEXT: vshrntt.i32 q0, q1, #10 188; CHECK-NEXT: bx lr 189entry: 190 %0 = zext i16 %p to i32 191 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 192 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1) 193 ret <8 x i16> %2 194} 195 196define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 197; CHECK-LABEL: test_vrshrnbq_n_s16: 198; CHECK: @ %bb.0: @ %entry 199; CHECK-NEXT: vrshrnb.i16 q0, q1, #5 200; CHECK-NEXT: bx lr 201entry: 202 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 0, i32 1, i32 0, i32 0, i32 0) 203 ret <16 x i8> %0 204} 205 206define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 207; CHECK-LABEL: test_vrshrnbq_n_s32: 208; CHECK: @ %bb.0: @ %entry 209; CHECK-NEXT: vrshrnb.i32 q0, q1, #10 210; CHECK-NEXT: bx lr 211entry: 212 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 0, i32 0, i32 0) 213 ret <8 x i16> %0 214} 215 216define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { 217; CHECK-LABEL: test_vrshrnbq_n_u16: 218; CHECK: @ %bb.0: @ %entry 219; CHECK-NEXT: vrshrnb.i16 q0, q1, #2 220; CHECK-NEXT: bx lr 221entry: 222 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0) 223 ret <16 x i8> %0 224} 225 226define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { 227; CHECK-LABEL: test_vrshrnbq_n_u32: 228; CHECK: @ %bb.0: @ %entry 229; CHECK-NEXT: vrshrnb.i32 q0, q1, #12 230; CHECK-NEXT: bx lr 231entry: 232 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0) 233 ret <8 x i16> %0 234} 235 236define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 237; CHECK-LABEL: test_vrshrntq_n_s16: 238; CHECK: @ %bb.0: @ %entry 239; CHECK-NEXT: vrshrnt.i16 q0, q1, #4 240; CHECK-NEXT: bx lr 241entry: 242 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1) 243 ret <16 x i8> %0 244} 245 246define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 247; CHECK-LABEL: test_vrshrntq_n_s32: 248; CHECK: @ %bb.0: @ %entry 249; CHECK-NEXT: vrshrnt.i32 q0, q1, #11 250; CHECK-NEXT: bx lr 251entry: 252 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 0, i32 1, i32 0, i32 0, i32 1) 253 ret <8 x i16> %0 254} 255 256define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { 257; CHECK-LABEL: test_vrshrntq_n_u16: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vrshrnt.i16 q0, q1, #1 260; CHECK-NEXT: bx lr 261entry: 262 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 1, i32 1, i32 1) 263 ret <16 x i8> %0 264} 265 266define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { 267; CHECK-LABEL: test_vrshrntq_n_u32: 268; CHECK: @ %bb.0: @ %entry 269; CHECK-NEXT: vrshrnt.i32 q0, q1, #6 270; CHECK-NEXT: bx lr 271entry: 272 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1) 273 ret <8 x i16> %0 274} 275 276define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 277; CHECK-LABEL: test_vrshrnbq_m_n_s16: 278; CHECK: @ %bb.0: @ %entry 279; CHECK-NEXT: vmsr p0, r0 280; CHECK-NEXT: vpst 281; CHECK-NEXT: vrshrnbt.i16 q0, q1, #1 282; CHECK-NEXT: bx lr 283entry: 284 %0 = zext i16 %p to i32 285 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 286 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1) 287 ret <16 x i8> %2 288} 289 290define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 291; CHECK-LABEL: test_vrshrnbq_m_n_s32: 292; CHECK: @ %bb.0: @ %entry 293; CHECK-NEXT: vmsr p0, r0 294; CHECK-NEXT: vpst 295; CHECK-NEXT: vrshrnbt.i32 q0, q1, #14 296; CHECK-NEXT: bx lr 297entry: 298 %0 = zext i16 %p to i32 299 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 300 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 14, i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1) 301 ret <8 x i16> %2 302} 303 304define arm_aapcs_vfpcc <16 x i8> @test_vrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 305; CHECK-LABEL: test_vrshrnbq_m_n_u16: 306; CHECK: @ %bb.0: @ %entry 307; CHECK-NEXT: vmsr p0, r0 308; CHECK-NEXT: vpst 309; CHECK-NEXT: vrshrnbt.i16 q0, q1, #2 310; CHECK-NEXT: bx lr 311entry: 312 %0 = zext i16 %p to i32 313 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 314 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 0, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1) 315 ret <16 x i8> %2 316} 317 318define arm_aapcs_vfpcc <8 x i16> @test_vrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 319; CHECK-LABEL: test_vrshrnbq_m_n_u32: 320; CHECK: @ %bb.0: @ %entry 321; CHECK-NEXT: vmsr p0, r0 322; CHECK-NEXT: vpst 323; CHECK-NEXT: vrshrnbt.i32 q0, q1, #12 324; CHECK-NEXT: bx lr 325entry: 326 %0 = zext i16 %p to i32 327 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 328 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 0, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1) 329 ret <8 x i16> %2 330} 331 332define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 333; CHECK-LABEL: test_vrshrntq_m_n_s16: 334; CHECK: @ %bb.0: @ %entry 335; CHECK-NEXT: vmsr p0, r0 336; CHECK-NEXT: vpst 337; CHECK-NEXT: vrshrntt.i16 q0, q1, #4 338; CHECK-NEXT: bx lr 339entry: 340 %0 = zext i16 %p to i32 341 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 342 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 0, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1) 343 ret <16 x i8> %2 344} 345 346define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 347; CHECK-LABEL: test_vrshrntq_m_n_s32: 348; CHECK: @ %bb.0: @ %entry 349; CHECK-NEXT: vmsr p0, r0 350; CHECK-NEXT: vpst 351; CHECK-NEXT: vrshrntt.i32 q0, q1, #6 352; CHECK-NEXT: bx lr 353entry: 354 %0 = zext i16 %p to i32 355 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 356 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 0, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1) 357 ret <8 x i16> %2 358} 359 360define arm_aapcs_vfpcc <16 x i8> @test_vrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 361; CHECK-LABEL: test_vrshrntq_m_n_u16: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: vmsr p0, r0 364; CHECK-NEXT: vpst 365; CHECK-NEXT: vrshrntt.i16 q0, q1, #6 366; CHECK-NEXT: bx lr 367entry: 368 %0 = zext i16 %p to i32 369 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 370 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 6, i32 0, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1) 371 ret <16 x i8> %2 372} 373 374define arm_aapcs_vfpcc <8 x i16> @test_vrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 375; CHECK-LABEL: test_vrshrntq_m_n_u32: 376; CHECK: @ %bb.0: @ %entry 377; CHECK-NEXT: vmsr p0, r0 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vrshrntt.i32 q0, q1, #10 380; CHECK-NEXT: bx lr 381entry: 382 %0 = zext i16 %p to i32 383 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 384 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 0, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1) 385 ret <8 x i16> %2 386} 387 388define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 389; CHECK-LABEL: test_vqshrnbq_n_s16: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vqshrnb.s16 q0, q1, #7 392; CHECK-NEXT: bx lr 393entry: 394 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0) 395 ret <16 x i8> %0 396} 397 398define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 399; CHECK-LABEL: test_vqshrnbq_n_s32: 400; CHECK: @ %bb.0: @ %entry 401; CHECK-NEXT: vqshrnb.s32 q0, q1, #15 402; CHECK-NEXT: bx lr 403entry: 404 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 0, i32 0, i32 0) 405 ret <8 x i16> %0 406} 407 408define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { 409; CHECK-LABEL: test_vqshrnbq_n_u16: 410; CHECK: @ %bb.0: @ %entry 411; CHECK-NEXT: vqshrnb.u16 q0, q1, #3 412; CHECK-NEXT: bx lr 413entry: 414 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) 415 ret <16 x i8> %0 416} 417 418define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { 419; CHECK-LABEL: test_vqshrnbq_n_u32: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vqshrnb.u32 q0, q1, #3 422; CHECK-NEXT: bx lr 423entry: 424 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 0) 425 ret <8 x i16> %0 426} 427 428define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 429; CHECK-LABEL: test_vqshrntq_n_s16: 430; CHECK: @ %bb.0: @ %entry 431; CHECK-NEXT: vqshrnt.s16 q0, q1, #5 432; CHECK-NEXT: bx lr 433entry: 434 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 0, i32 0, i32 1) 435 ret <16 x i8> %0 436} 437 438define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 439; CHECK-LABEL: test_vqshrntq_n_s32: 440; CHECK: @ %bb.0: @ %entry 441; CHECK-NEXT: vqshrnt.s32 q0, q1, #6 442; CHECK-NEXT: bx lr 443entry: 444 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 0, i32 0, i32 0, i32 1) 445 ret <8 x i16> %0 446} 447 448define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { 449; CHECK-LABEL: test_vqshrntq_n_u16: 450; CHECK: @ %bb.0: @ %entry 451; CHECK-NEXT: vqshrnt.u16 q0, q1, #1 452; CHECK-NEXT: bx lr 453entry: 454 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1) 455 ret <16 x i8> %0 456} 457 458define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { 459; CHECK-LABEL: test_vqshrntq_n_u32: 460; CHECK: @ %bb.0: @ %entry 461; CHECK-NEXT: vqshrnt.u32 q0, q1, #15 462; CHECK-NEXT: bx lr 463entry: 464 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 0, i32 1, i32 1, i32 1) 465 ret <8 x i16> %0 466} 467 468define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 469; CHECK-LABEL: test_vqshrnbq_m_n_s16: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vmsr p0, r0 472; CHECK-NEXT: vpst 473; CHECK-NEXT: vqshrnbt.s16 q0, q1, #7 474; CHECK-NEXT: bx lr 475entry: 476 %0 = zext i16 %p to i32 477 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 478 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i1> %1) 479 ret <16 x i8> %2 480} 481 482define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 483; CHECK-LABEL: test_vqshrnbq_m_n_s32: 484; CHECK: @ %bb.0: @ %entry 485; CHECK-NEXT: vmsr p0, r0 486; CHECK-NEXT: vpst 487; CHECK-NEXT: vqshrnbt.s32 q0, q1, #1 488; CHECK-NEXT: bx lr 489entry: 490 %0 = zext i16 %p to i32 491 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 492 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i1> %1) 493 ret <8 x i16> %2 494} 495 496define arm_aapcs_vfpcc <16 x i8> @test_vqshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 497; CHECK-LABEL: test_vqshrnbq_m_n_u16: 498; CHECK: @ %bb.0: @ %entry 499; CHECK-NEXT: vmsr p0, r0 500; CHECK-NEXT: vpst 501; CHECK-NEXT: vqshrnbt.u16 q0, q1, #1 502; CHECK-NEXT: bx lr 503entry: 504 %0 = zext i16 %p to i32 505 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 506 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 0, <8 x i1> %1) 507 ret <16 x i8> %2 508} 509 510define arm_aapcs_vfpcc <8 x i16> @test_vqshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 511; CHECK-LABEL: test_vqshrnbq_m_n_u32: 512; CHECK: @ %bb.0: @ %entry 513; CHECK-NEXT: vmsr p0, r0 514; CHECK-NEXT: vpst 515; CHECK-NEXT: vqshrnbt.u32 q0, q1, #8 516; CHECK-NEXT: bx lr 517entry: 518 %0 = zext i16 %p to i32 519 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 520 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 0, i32 1, i32 1, i32 0, <4 x i1> %1) 521 ret <8 x i16> %2 522} 523 524define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 525; CHECK-LABEL: test_vqshrntq_m_n_s16: 526; CHECK: @ %bb.0: @ %entry 527; CHECK-NEXT: vmsr p0, r0 528; CHECK-NEXT: vpst 529; CHECK-NEXT: vqshrntt.s16 q0, q1, #1 530; CHECK-NEXT: bx lr 531entry: 532 %0 = zext i16 %p to i32 533 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 534 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 0, i32 0, i32 1, <8 x i1> %1) 535 ret <16 x i8> %2 536} 537 538define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 539; CHECK-LABEL: test_vqshrntq_m_n_s32: 540; CHECK: @ %bb.0: @ %entry 541; CHECK-NEXT: vmsr p0, r0 542; CHECK-NEXT: vpst 543; CHECK-NEXT: vqshrntt.s32 q0, q1, #11 544; CHECK-NEXT: bx lr 545entry: 546 %0 = zext i16 %p to i32 547 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 548 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 0, i32 0, i32 0, i32 1, <4 x i1> %1) 549 ret <8 x i16> %2 550} 551 552define arm_aapcs_vfpcc <16 x i8> @test_vqshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 553; CHECK-LABEL: test_vqshrntq_m_n_u16: 554; CHECK: @ %bb.0: @ %entry 555; CHECK-NEXT: vmsr p0, r0 556; CHECK-NEXT: vpst 557; CHECK-NEXT: vqshrntt.u16 q0, q1, #3 558; CHECK-NEXT: bx lr 559entry: 560 %0 = zext i16 %p to i32 561 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 562 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 1, i32 1, <8 x i1> %1) 563 ret <16 x i8> %2 564} 565 566define arm_aapcs_vfpcc <8 x i16> @test_vqshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 567; CHECK-LABEL: test_vqshrntq_m_n_u32: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vmsr p0, r0 570; CHECK-NEXT: vpst 571; CHECK-NEXT: vqshrntt.u32 q0, q1, #1 572; CHECK-NEXT: bx lr 573entry: 574 %0 = zext i16 %p to i32 575 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 576 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 1, i32 1, <4 x i1> %1) 577 ret <8 x i16> %2 578} 579 580define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 581; CHECK-LABEL: test_vqshrunbq_n_s16: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vqshrunb.s16 q0, q1, #5 584; CHECK-NEXT: bx lr 585entry: 586 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 0, i32 1, i32 0, i32 0) 587 ret <16 x i8> %0 588} 589 590define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 591; CHECK-LABEL: test_vqshrunbq_n_s32: 592; CHECK: @ %bb.0: @ %entry 593; CHECK-NEXT: vqshrunb.s32 q0, q1, #13 594; CHECK-NEXT: bx lr 595entry: 596 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 0) 597 ret <8 x i16> %0 598} 599 600define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 601; CHECK-LABEL: test_vqshruntq_n_s16: 602; CHECK: @ %bb.0: @ %entry 603; CHECK-NEXT: vqshrunt.s16 q0, q1, #2 604; CHECK-NEXT: bx lr 605entry: 606 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 0, i32 1, i32 0, i32 1) 607 ret <16 x i8> %0 608} 609 610define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 611; CHECK-LABEL: test_vqshruntq_n_s32: 612; CHECK: @ %bb.0: @ %entry 613; CHECK-NEXT: vqshrunt.s32 q0, q1, #7 614; CHECK-NEXT: bx lr 615entry: 616 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1) 617 ret <8 x i16> %0 618} 619 620define arm_aapcs_vfpcc <16 x i8> @test_vqshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 621; CHECK-LABEL: test_vqshrunbq_m_n_s16: 622; CHECK: @ %bb.0: @ %entry 623; CHECK-NEXT: vmsr p0, r0 624; CHECK-NEXT: vpst 625; CHECK-NEXT: vqshrunbt.s16 q0, q1, #7 626; CHECK-NEXT: bx lr 627entry: 628 %0 = zext i16 %p to i32 629 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 630 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1) 631 ret <16 x i8> %2 632} 633 634define arm_aapcs_vfpcc <8 x i16> @test_vqshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 635; CHECK-LABEL: test_vqshrunbq_m_n_s32: 636; CHECK: @ %bb.0: @ %entry 637; CHECK-NEXT: vmsr p0, r0 638; CHECK-NEXT: vpst 639; CHECK-NEXT: vqshrunbt.s32 q0, q1, #7 640; CHECK-NEXT: bx lr 641entry: 642 %0 = zext i16 %p to i32 643 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 644 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1) 645 ret <8 x i16> %2 646} 647 648define arm_aapcs_vfpcc <16 x i8> @test_vqshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 649; CHECK-LABEL: test_vqshruntq_m_n_s16: 650; CHECK: @ %bb.0: @ %entry 651; CHECK-NEXT: vmsr p0, r0 652; CHECK-NEXT: vpst 653; CHECK-NEXT: vqshruntt.s16 q0, q1, #7 654; CHECK-NEXT: bx lr 655entry: 656 %0 = zext i16 %p to i32 657 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 658 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1) 659 ret <16 x i8> %2 660} 661 662define arm_aapcs_vfpcc <8 x i16> @test_vqshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 663; CHECK-LABEL: test_vqshruntq_m_n_s32: 664; CHECK: @ %bb.0: @ %entry 665; CHECK-NEXT: vmsr p0, r0 666; CHECK-NEXT: vpst 667; CHECK-NEXT: vqshruntt.s32 q0, q1, #7 668; CHECK-NEXT: bx lr 669entry: 670 %0 = zext i16 %p to i32 671 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 672 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1) 673 ret <8 x i16> %2 674} 675 676define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 677; CHECK-LABEL: test_vqrshrnbq_n_s16: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: vqrshrnb.s16 q0, q1, #5 680; CHECK-NEXT: bx lr 681entry: 682 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 0, i32 0, i32 0) 683 ret <16 x i8> %0 684} 685 686define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 687; CHECK-LABEL: test_vqrshrnbq_n_s32: 688; CHECK: @ %bb.0: @ %entry 689; CHECK-NEXT: vqrshrnb.s32 q0, q1, #13 690; CHECK-NEXT: bx lr 691entry: 692 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 1, i32 0, i32 0, i32 0) 693 ret <8 x i16> %0 694} 695 696define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_n_u16(<16 x i8> %a, <8 x i16> %b) { 697; CHECK-LABEL: test_vqrshrnbq_n_u16: 698; CHECK: @ %bb.0: @ %entry 699; CHECK-NEXT: vqrshrnb.u16 q0, q1, #7 700; CHECK-NEXT: bx lr 701entry: 702 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 0) 703 ret <16 x i8> %0 704} 705 706define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_n_u32(<8 x i16> %a, <4 x i32> %b) { 707; CHECK-LABEL: test_vqrshrnbq_n_u32: 708; CHECK: @ %bb.0: @ %entry 709; CHECK-NEXT: vqrshrnb.u32 q0, q1, #8 710; CHECK-NEXT: bx lr 711entry: 712 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 8, i32 1, i32 1, i32 1, i32 1, i32 0) 713 ret <8 x i16> %0 714} 715 716define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 717; CHECK-LABEL: test_vqrshrntq_n_s16: 718; CHECK: @ %bb.0: @ %entry 719; CHECK-NEXT: vqrshrnt.s16 q0, q1, #7 720; CHECK-NEXT: bx lr 721entry: 722 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 0, i32 0, i32 1) 723 ret <16 x i8> %0 724} 725 726define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 727; CHECK-LABEL: test_vqrshrntq_n_s32: 728; CHECK: @ %bb.0: @ %entry 729; CHECK-NEXT: vqrshrnt.s32 q0, q1, #2 730; CHECK-NEXT: bx lr 731entry: 732 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 1) 733 ret <8 x i16> %0 734} 735 736define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_n_u16(<16 x i8> %a, <8 x i16> %b) { 737; CHECK-LABEL: test_vqrshrntq_n_u16: 738; CHECK: @ %bb.0: @ %entry 739; CHECK-NEXT: vqrshrnt.u16 q0, q1, #1 740; CHECK-NEXT: bx lr 741entry: 742 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) 743 ret <16 x i8> %0 744} 745 746define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_n_u32(<8 x i16> %a, <4 x i32> %b) { 747; CHECK-LABEL: test_vqrshrntq_n_u32: 748; CHECK: @ %bb.0: @ %entry 749; CHECK-NEXT: vqrshrnt.u32 q0, q1, #11 750; CHECK-NEXT: bx lr 751entry: 752 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 1) 753 ret <8 x i16> %0 754} 755 756define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 757; CHECK-LABEL: test_vqrshrnbq_m_n_s16: 758; CHECK: @ %bb.0: @ %entry 759; CHECK-NEXT: vmsr p0, r0 760; CHECK-NEXT: vpst 761; CHECK-NEXT: vqrshrnbt.s16 q0, q1, #2 762; CHECK-NEXT: bx lr 763entry: 764 %0 = zext i16 %p to i32 765 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 766 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 2, i32 1, i32 1, i32 0, i32 0, i32 0, <8 x i1> %1) 767 ret <16 x i8> %2 768} 769 770define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 771; CHECK-LABEL: test_vqrshrnbq_m_n_s32: 772; CHECK: @ %bb.0: @ %entry 773; CHECK-NEXT: vmsr p0, r0 774; CHECK-NEXT: vpst 775; CHECK-NEXT: vqrshrnbt.s32 q0, q1, #12 776; CHECK-NEXT: bx lr 777entry: 778 %0 = zext i16 %p to i32 779 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 780 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 12, i32 1, i32 1, i32 0, i32 0, i32 0, <4 x i1> %1) 781 ret <8 x i16> %2 782} 783 784define arm_aapcs_vfpcc <16 x i8> @test_vqrshrnbq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 785; CHECK-LABEL: test_vqrshrnbq_m_n_u16: 786; CHECK: @ %bb.0: @ %entry 787; CHECK-NEXT: vmsr p0, r0 788; CHECK-NEXT: vpst 789; CHECK-NEXT: vqrshrnbt.u16 q0, q1, #5 790; CHECK-NEXT: bx lr 791entry: 792 %0 = zext i16 %p to i32 793 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 794 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 5, i32 1, i32 1, i32 1, i32 1, i32 0, <8 x i1> %1) 795 ret <16 x i8> %2 796} 797 798define arm_aapcs_vfpcc <8 x i16> @test_vqrshrnbq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 799; CHECK-LABEL: test_vqrshrnbq_m_n_u32: 800; CHECK: @ %bb.0: @ %entry 801; CHECK-NEXT: vmsr p0, r0 802; CHECK-NEXT: vpst 803; CHECK-NEXT: vqrshrnbt.u32 q0, q1, #11 804; CHECK-NEXT: bx lr 805entry: 806 %0 = zext i16 %p to i32 807 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 808 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 11, i32 1, i32 1, i32 1, i32 1, i32 0, <4 x i1> %1) 809 ret <8 x i16> %2 810} 811 812define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 813; CHECK-LABEL: test_vqrshrntq_m_n_s16: 814; CHECK: @ %bb.0: @ %entry 815; CHECK-NEXT: vmsr p0, r0 816; CHECK-NEXT: vpst 817; CHECK-NEXT: vqrshrntt.s16 q0, q1, #4 818; CHECK-NEXT: bx lr 819entry: 820 %0 = zext i16 %p to i32 821 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 822 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 1, i32 0, i32 0, i32 1, <8 x i1> %1) 823 ret <16 x i8> %2 824} 825 826define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 827; CHECK-LABEL: test_vqrshrntq_m_n_s32: 828; CHECK: @ %bb.0: @ %entry 829; CHECK-NEXT: vmsr p0, r0 830; CHECK-NEXT: vpst 831; CHECK-NEXT: vqrshrntt.s32 q0, q1, #6 832; CHECK-NEXT: bx lr 833entry: 834 %0 = zext i16 %p to i32 835 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 836 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 6, i32 1, i32 1, i32 0, i32 0, i32 1, <4 x i1> %1) 837 ret <8 x i16> %2 838} 839 840define arm_aapcs_vfpcc <16 x i8> @test_vqrshrntq_m_n_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 841; CHECK-LABEL: test_vqrshrntq_m_n_u16: 842; CHECK: @ %bb.0: @ %entry 843; CHECK-NEXT: vmsr p0, r0 844; CHECK-NEXT: vpst 845; CHECK-NEXT: vqrshrntt.u16 q0, q1, #7 846; CHECK-NEXT: bx lr 847entry: 848 %0 = zext i16 %p to i32 849 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 850 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 1, i32 1, i32 1, i32 1, <8 x i1> %1) 851 ret <16 x i8> %2 852} 853 854define arm_aapcs_vfpcc <8 x i16> @test_vqrshrntq_m_n_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 855; CHECK-LABEL: test_vqrshrntq_m_n_u32: 856; CHECK: @ %bb.0: @ %entry 857; CHECK-NEXT: vmsr p0, r0 858; CHECK-NEXT: vpst 859; CHECK-NEXT: vqrshrntt.u32 q0, q1, #15 860; CHECK-NEXT: bx lr 861entry: 862 %0 = zext i16 %p to i32 863 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 864 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 15, i32 1, i32 1, i32 1, i32 1, i32 1, <4 x i1> %1) 865 ret <8 x i16> %2 866} 867 868define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_n_s16(<16 x i8> %a, <8 x i16> %b) { 869; CHECK-LABEL: test_vqrshrunbq_n_s16: 870; CHECK: @ %bb.0: @ %entry 871; CHECK-NEXT: vqshrunb.s16 q0, q1, #7 872; CHECK-NEXT: bx lr 873entry: 874 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 7, i32 1, i32 0, i32 1, i32 0, i32 0) 875 ret <16 x i8> %0 876} 877 878define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_n_s32(<8 x i16> %a, <4 x i32> %b) { 879; CHECK-LABEL: test_vqrshrunbq_n_s32: 880; CHECK: @ %bb.0: @ %entry 881; CHECK-NEXT: vqshrunb.s32 q0, q1, #1 882; CHECK-NEXT: bx lr 883entry: 884 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 0) 885 ret <8 x i16> %0 886} 887 888define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_n_s16(<16 x i8> %a, <8 x i16> %b) { 889; CHECK-LABEL: test_vqrshruntq_n_s16: 890; CHECK: @ %bb.0: @ %entry 891; CHECK-NEXT: vqshrunt.s16 q0, q1, #1 892; CHECK-NEXT: bx lr 893entry: 894 %0 = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> %a, <8 x i16> %b, i32 1, i32 1, i32 0, i32 1, i32 0, i32 1) 895 ret <16 x i8> %0 896} 897 898define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_n_s32(<8 x i16> %a, <4 x i32> %b) { 899; CHECK-LABEL: test_vqrshruntq_n_s32: 900; CHECK: @ %bb.0: @ %entry 901; CHECK-NEXT: vqshrunt.s32 q0, q1, #3 902; CHECK-NEXT: bx lr 903entry: 904 %0 = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> %a, <4 x i32> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1) 905 ret <8 x i16> %0 906} 907 908define arm_aapcs_vfpcc <16 x i8> @test_vqrshrunbq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 909; CHECK-LABEL: test_vqrshrunbq_m_n_s16: 910; CHECK: @ %bb.0: @ %entry 911; CHECK-NEXT: vmsr p0, r0 912; CHECK-NEXT: vpst 913; CHECK-NEXT: vqshrunbt.s16 q0, q1, #4 914; CHECK-NEXT: bx lr 915entry: 916 %0 = zext i16 %p to i32 917 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 918 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> %1) 919 ret <16 x i8> %2 920} 921 922define arm_aapcs_vfpcc <8 x i16> @test_vqrshrunbq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 923; CHECK-LABEL: test_vqrshrunbq_m_n_s32: 924; CHECK: @ %bb.0: @ %entry 925; CHECK-NEXT: vmsr p0, r0 926; CHECK-NEXT: vpst 927; CHECK-NEXT: vqshrunbt.s32 q0, q1, #10 928; CHECK-NEXT: bx lr 929entry: 930 %0 = zext i16 %p to i32 931 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 932 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 10, i32 1, i32 0, i32 1, i32 0, i32 0, <4 x i1> %1) 933 ret <8 x i16> %2 934} 935 936define arm_aapcs_vfpcc <16 x i8> @test_vqrshruntq_m_n_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { 937; CHECK-LABEL: test_vqrshruntq_m_n_s16: 938; CHECK: @ %bb.0: @ %entry 939; CHECK-NEXT: vmsr p0, r0 940; CHECK-NEXT: vpst 941; CHECK-NEXT: vqshruntt.s16 q0, q1, #3 942; CHECK-NEXT: bx lr 943entry: 944 %0 = zext i16 %p to i32 945 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 946 %2 = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 3, i32 1, i32 0, i32 1, i32 0, i32 1, <8 x i1> %1) 947 ret <16 x i8> %2 948} 949 950define arm_aapcs_vfpcc <8 x i16> @test_vqrshruntq_m_n_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { 951; CHECK-LABEL: test_vqrshruntq_m_n_s32: 952; CHECK: @ %bb.0: @ %entry 953; CHECK-NEXT: vmsr p0, r0 954; CHECK-NEXT: vpst 955; CHECK-NEXT: vqshruntt.s32 q0, q1, #13 956; CHECK-NEXT: bx lr 957entry: 958 %0 = zext i16 %p to i32 959 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 960 %2 = call <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 13, i32 1, i32 0, i32 1, i32 0, i32 1, <4 x i1> %1) 961 ret <8 x i16> %2 962} 963 964define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_s8(<16 x i8> %a, <16 x i8> %b) { 965; CHECK-LABEL: test_vsliq_n_s8: 966; CHECK: @ %bb.0: @ %entry 967; CHECK-NEXT: vsli.8 q0, q1, #2 968; CHECK-NEXT: bx lr 969entry: 970 %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 2) 971 ret <16 x i8> %0 972} 973 974define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_s16(<8 x i16> %a, <8 x i16> %b) { 975; CHECK-LABEL: test_vsliq_n_s16: 976; CHECK: @ %bb.0: @ %entry 977; CHECK-NEXT: vsli.16 q0, q1, #10 978; CHECK-NEXT: bx lr 979entry: 980 %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 10) 981 ret <8 x i16> %0 982} 983 984define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_s32(<4 x i32> %a, <4 x i32> %b) { 985; CHECK-LABEL: test_vsliq_n_s32: 986; CHECK: @ %bb.0: @ %entry 987; CHECK-NEXT: vsli.32 q0, q1, #1 988; CHECK-NEXT: bx lr 989entry: 990 %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 1) 991 ret <4 x i32> %0 992} 993 994define arm_aapcs_vfpcc <16 x i8> @test_vsliq_n_u8(<16 x i8> %a, <16 x i8> %b) { 995; CHECK-LABEL: test_vsliq_n_u8: 996; CHECK: @ %bb.0: @ %entry 997; CHECK-NEXT: vsli.8 q0, q1, #1 998; CHECK-NEXT: bx lr 999entry: 1000 %0 = call <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1) 1001 ret <16 x i8> %0 1002} 1003 1004define arm_aapcs_vfpcc <8 x i16> @test_vsliq_n_u16(<8 x i16> %a, <8 x i16> %b) { 1005; CHECK-LABEL: test_vsliq_n_u16: 1006; CHECK: @ %bb.0: @ %entry 1007; CHECK-NEXT: vsli.16 q0, q1, #1 1008; CHECK-NEXT: bx lr 1009entry: 1010 %0 = call <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16> %a, <8 x i16> %b, i32 1) 1011 ret <8 x i16> %0 1012} 1013 1014define arm_aapcs_vfpcc <4 x i32> @test_vsliq_n_u32(<4 x i32> %a, <4 x i32> %b) { 1015; CHECK-LABEL: test_vsliq_n_u32: 1016; CHECK: @ %bb.0: @ %entry 1017; CHECK-NEXT: vsli.32 q0, q1, #28 1018; CHECK-NEXT: bx lr 1019entry: 1020 %0 = call <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28) 1021 ret <4 x i32> %0 1022} 1023 1024define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1025; CHECK-LABEL: test_vsliq_m_n_s8: 1026; CHECK: @ %bb.0: @ %entry 1027; CHECK-NEXT: vmsr p0, r0 1028; CHECK-NEXT: vpst 1029; CHECK-NEXT: vslit.8 q0, q1, #4 1030; CHECK-NEXT: bx lr 1031entry: 1032 %0 = zext i16 %p to i32 1033 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1034 %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1) 1035 ret <16 x i8> %2 1036} 1037 1038define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1039; CHECK-LABEL: test_vsliq_m_n_s16: 1040; CHECK: @ %bb.0: @ %entry 1041; CHECK-NEXT: vmsr p0, r0 1042; CHECK-NEXT: vpst 1043; CHECK-NEXT: vslit.16 q0, q1, #1 1044; CHECK-NEXT: bx lr 1045entry: 1046 %0 = zext i16 %p to i32 1047 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1048 %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1) 1049 ret <8 x i16> %2 1050} 1051 1052define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1053; CHECK-LABEL: test_vsliq_m_n_s32: 1054; CHECK: @ %bb.0: @ %entry 1055; CHECK-NEXT: vmsr p0, r0 1056; CHECK-NEXT: vpst 1057; CHECK-NEXT: vslit.32 q0, q1, #1 1058; CHECK-NEXT: bx lr 1059entry: 1060 %0 = zext i16 %p to i32 1061 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1062 %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 1, <4 x i1> %1) 1063 ret <4 x i32> %2 1064} 1065 1066define arm_aapcs_vfpcc <16 x i8> @test_vsliq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1067; CHECK-LABEL: test_vsliq_m_n_u8: 1068; CHECK: @ %bb.0: @ %entry 1069; CHECK-NEXT: vmsr p0, r0 1070; CHECK-NEXT: vpst 1071; CHECK-NEXT: vslit.8 q0, q1, #5 1072; CHECK-NEXT: bx lr 1073entry: 1074 %0 = zext i16 %p to i32 1075 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1076 %2 = call <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 5, <16 x i1> %1) 1077 ret <16 x i8> %2 1078} 1079 1080define arm_aapcs_vfpcc <8 x i16> @test_vsliq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1081; CHECK-LABEL: test_vsliq_m_n_u16: 1082; CHECK: @ %bb.0: @ %entry 1083; CHECK-NEXT: vmsr p0, r0 1084; CHECK-NEXT: vpst 1085; CHECK-NEXT: vslit.16 q0, q1, #3 1086; CHECK-NEXT: bx lr 1087entry: 1088 %0 = zext i16 %p to i32 1089 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1090 %2 = call <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 3, <8 x i1> %1) 1091 ret <8 x i16> %2 1092} 1093 1094define arm_aapcs_vfpcc <4 x i32> @test_vsliq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1095; CHECK-LABEL: test_vsliq_m_n_u32: 1096; CHECK: @ %bb.0: @ %entry 1097; CHECK-NEXT: vmsr p0, r0 1098; CHECK-NEXT: vpst 1099; CHECK-NEXT: vslit.32 q0, q1, #9 1100; CHECK-NEXT: bx lr 1101entry: 1102 %0 = zext i16 %p to i32 1103 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1104 %2 = call <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 9, <4 x i1> %1) 1105 ret <4 x i32> %2 1106} 1107 1108define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_s8(<16 x i8> %a, <16 x i8> %b) { 1109; CHECK-LABEL: test_vsriq_n_s8: 1110; CHECK: @ %bb.0: @ %entry 1111; CHECK-NEXT: vsri.8 q0, q1, #3 1112; CHECK-NEXT: bx lr 1113entry: 1114 %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 1115 ret <16 x i8> %0 1116} 1117 1118define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_s16(<8 x i16> %a, <8 x i16> %b) { 1119; CHECK-LABEL: test_vsriq_n_s16: 1120; CHECK: @ %bb.0: @ %entry 1121; CHECK-NEXT: vsri.16 q0, q1, #2 1122; CHECK-NEXT: bx lr 1123entry: 1124 %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 2) 1125 ret <8 x i16> %0 1126} 1127 1128define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_s32(<4 x i32> %a, <4 x i32> %b) { 1129; CHECK-LABEL: test_vsriq_n_s32: 1130; CHECK: @ %bb.0: @ %entry 1131; CHECK-NEXT: vsri.32 q0, q1, #28 1132; CHECK-NEXT: bx lr 1133entry: 1134 %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 28) 1135 ret <4 x i32> %0 1136} 1137 1138define arm_aapcs_vfpcc <16 x i8> @test_vsriq_n_u8(<16 x i8> %a, <16 x i8> %b) { 1139; CHECK-LABEL: test_vsriq_n_u8: 1140; CHECK: @ %bb.0: @ %entry 1141; CHECK-NEXT: vsri.8 q0, q1, #3 1142; CHECK-NEXT: bx lr 1143entry: 1144 %0 = call <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 1145 ret <16 x i8> %0 1146} 1147 1148define arm_aapcs_vfpcc <8 x i16> @test_vsriq_n_u16(<8 x i16> %a, <8 x i16> %b) { 1149; CHECK-LABEL: test_vsriq_n_u16: 1150; CHECK: @ %bb.0: @ %entry 1151; CHECK-NEXT: vsri.16 q0, q1, #3 1152; CHECK-NEXT: bx lr 1153entry: 1154 %0 = call <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16> %a, <8 x i16> %b, i32 3) 1155 ret <8 x i16> %0 1156} 1157 1158define arm_aapcs_vfpcc <4 x i32> @test_vsriq_n_u32(<4 x i32> %a, <4 x i32> %b) { 1159; CHECK-LABEL: test_vsriq_n_u32: 1160; CHECK: @ %bb.0: @ %entry 1161; CHECK-NEXT: vsri.32 q0, q1, #26 1162; CHECK-NEXT: bx lr 1163entry: 1164 %0 = call <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32> %a, <4 x i32> %b, i32 26) 1165 ret <4 x i32> %0 1166} 1167 1168define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_s8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1169; CHECK-LABEL: test_vsriq_m_n_s8: 1170; CHECK: @ %bb.0: @ %entry 1171; CHECK-NEXT: vmsr p0, r0 1172; CHECK-NEXT: vpst 1173; CHECK-NEXT: vsrit.8 q0, q1, #4 1174; CHECK-NEXT: bx lr 1175entry: 1176 %0 = zext i16 %p to i32 1177 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1178 %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 4, <16 x i1> %1) 1179 ret <16 x i8> %2 1180} 1181 1182define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_s16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1183; CHECK-LABEL: test_vsriq_m_n_s16: 1184; CHECK: @ %bb.0: @ %entry 1185; CHECK-NEXT: vmsr p0, r0 1186; CHECK-NEXT: vpst 1187; CHECK-NEXT: vsrit.16 q0, q1, #1 1188; CHECK-NEXT: bx lr 1189entry: 1190 %0 = zext i16 %p to i32 1191 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1192 %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 1, <8 x i1> %1) 1193 ret <8 x i16> %2 1194} 1195 1196define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_s32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1197; CHECK-LABEL: test_vsriq_m_n_s32: 1198; CHECK: @ %bb.0: @ %entry 1199; CHECK-NEXT: vmsr p0, r0 1200; CHECK-NEXT: vpst 1201; CHECK-NEXT: vsrit.32 q0, q1, #27 1202; CHECK-NEXT: bx lr 1203entry: 1204 %0 = zext i16 %p to i32 1205 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1206 %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 27, <4 x i1> %1) 1207 ret <4 x i32> %2 1208} 1209 1210define arm_aapcs_vfpcc <16 x i8> @test_vsriq_m_n_u8(<16 x i8> %a, <16 x i8> %b, i16 zeroext %p) { 1211; CHECK-LABEL: test_vsriq_m_n_u8: 1212; CHECK: @ %bb.0: @ %entry 1213; CHECK-NEXT: vmsr p0, r0 1214; CHECK-NEXT: vpst 1215; CHECK-NEXT: vsrit.8 q0, q1, #7 1216; CHECK-NEXT: bx lr 1217entry: 1218 %0 = zext i16 %p to i32 1219 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 1220 %2 = call <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i8> %b, i32 7, <16 x i1> %1) 1221 ret <16 x i8> %2 1222} 1223 1224define arm_aapcs_vfpcc <8 x i16> @test_vsriq_m_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %p) { 1225; CHECK-LABEL: test_vsriq_m_n_u16: 1226; CHECK: @ %bb.0: @ %entry 1227; CHECK-NEXT: vmsr p0, r0 1228; CHECK-NEXT: vpst 1229; CHECK-NEXT: vsrit.16 q0, q1, #9 1230; CHECK-NEXT: bx lr 1231entry: 1232 %0 = zext i16 %p to i32 1233 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1234 %2 = call <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i16> %b, i32 9, <8 x i1> %1) 1235 ret <8 x i16> %2 1236} 1237 1238define arm_aapcs_vfpcc <4 x i32> @test_vsriq_m_n_u32(<4 x i32> %a, <4 x i32> %b, i16 zeroext %p) { 1239; CHECK-LABEL: test_vsriq_m_n_u32: 1240; CHECK: @ %bb.0: @ %entry 1241; CHECK-NEXT: vmsr p0, r0 1242; CHECK-NEXT: vpst 1243; CHECK-NEXT: vsrit.32 q0, q1, #13 1244; CHECK-NEXT: bx lr 1245entry: 1246 %0 = zext i16 %p to i32 1247 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1248 %2 = call <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i32> %b, i32 13, <4 x i1> %1) 1249 ret <4 x i32> %2 1250} 1251 1252declare <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32) 1253declare <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32) 1254declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 1255declare <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, i32, i32, i32, i32, i32, <8 x i1>) 1256declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 1257declare <8 x i16> @llvm.arm.mve.vshrn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, i32, i32, i32, i32, i32, <4 x i1>) 1258declare <16 x i8> @llvm.arm.mve.vsli.v16i8(<16 x i8>, <16 x i8>, i32) 1259declare <8 x i16> @llvm.arm.mve.vsli.v8i16(<8 x i16>, <8 x i16>, i32) 1260declare <4 x i32> @llvm.arm.mve.vsli.v4i32(<4 x i32>, <4 x i32>, i32) 1261declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 1262declare <16 x i8> @llvm.arm.mve.vsli.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>) 1263declare <8 x i16> @llvm.arm.mve.vsli.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>) 1264declare <4 x i32> @llvm.arm.mve.vsli.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>) 1265declare <16 x i8> @llvm.arm.mve.vsri.v16i8(<16 x i8>, <16 x i8>, i32) 1266declare <8 x i16> @llvm.arm.mve.vsri.v8i16(<8 x i16>, <8 x i16>, i32) 1267declare <4 x i32> @llvm.arm.mve.vsri.v4i32(<4 x i32>, <4 x i32>, i32) 1268declare <16 x i8> @llvm.arm.mve.vsri.predicated.v16i8.v16i1(<16 x i8>, <16 x i8>, i32, <16 x i1>) 1269declare <8 x i16> @llvm.arm.mve.vsri.predicated.v8i16.v8i1(<8 x i16>, <8 x i16>, i32, <8 x i1>) 1270declare <4 x i32> @llvm.arm.mve.vsri.predicated.v4i32.v4i1(<4 x i32>, <4 x i32>, i32, <4 x i1>) 1271