1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 5; CHECK-LABEL: add_v4i32_x: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vctp.32 r0 8; CHECK-NEXT: vpst 9; CHECK-NEXT: vaddt.i32 q0, q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 13 %a = add <4 x i32> %x, %y 14 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 15 ret <4 x i32> %b 16} 17 18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 19; CHECK-LABEL: add_v8i16_x: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vctp.16 r0 22; CHECK-NEXT: vpst 23; CHECK-NEXT: vaddt.i16 q0, q0, q1 24; CHECK-NEXT: bx lr 25entry: 26 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 27 %a = add <8 x i16> %x, %y 28 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 29 ret <8 x i16> %b 30} 31 32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 33; CHECK-LABEL: add_v16i8_x: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vctp.8 r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vaddt.i8 q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 41 %a = add <16 x i8> %x, %y 42 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 43 ret <16 x i8> %b 44} 45 46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 47; CHECK-LABEL: sub_v4i32_x: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vctp.32 r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vsubt.i32 q0, q0, q1 52; CHECK-NEXT: bx lr 53entry: 54 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 55 %a = sub <4 x i32> %x, %y 56 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 57 ret <4 x i32> %b 58} 59 60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 61; CHECK-LABEL: sub_v8i16_x: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vctp.16 r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vsubt.i16 q0, q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 69 %a = sub <8 x i16> %x, %y 70 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 71 ret <8 x i16> %b 72} 73 74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 75; CHECK-LABEL: sub_v16i8_x: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vctp.8 r0 78; CHECK-NEXT: vpst 79; CHECK-NEXT: vsubt.i8 q0, q0, q1 80; CHECK-NEXT: bx lr 81entry: 82 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 83 %a = sub <16 x i8> %x, %y 84 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 85 ret <16 x i8> %b 86} 87 88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 89; CHECK-LABEL: mul_v4i32_x: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vctp.32 r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vmult.i32 q0, q0, q1 94; CHECK-NEXT: bx lr 95entry: 96 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 97 %a = mul <4 x i32> %x, %y 98 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 99 ret <4 x i32> %b 100} 101 102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 103; CHECK-LABEL: mul_v8i16_x: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vctp.16 r0 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vmult.i16 q0, q0, q1 108; CHECK-NEXT: bx lr 109entry: 110 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 111 %a = mul <8 x i16> %x, %y 112 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 113 ret <8 x i16> %b 114} 115 116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 117; CHECK-LABEL: mul_v16i8_x: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: vctp.8 r0 120; CHECK-NEXT: vpst 121; CHECK-NEXT: vmult.i8 q0, q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 125 %a = mul <16 x i8> %x, %y 126 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 127 ret <16 x i8> %b 128} 129 130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 131; CHECK-LABEL: and_v4i32_x: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vctp.32 r0 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vandt q0, q0, q1 136; CHECK-NEXT: bx lr 137entry: 138 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 139 %a = and <4 x i32> %x, %y 140 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 141 ret <4 x i32> %b 142} 143 144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 145; CHECK-LABEL: and_v8i16_x: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vctp.16 r0 148; CHECK-NEXT: vpst 149; CHECK-NEXT: vandt q0, q0, q1 150; CHECK-NEXT: bx lr 151entry: 152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 153 %a = and <8 x i16> %x, %y 154 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 155 ret <8 x i16> %b 156} 157 158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 159; CHECK-LABEL: and_v16i8_x: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vctp.8 r0 162; CHECK-NEXT: vpst 163; CHECK-NEXT: vandt q0, q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 167 %a = and <16 x i8> %x, %y 168 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 169 ret <16 x i8> %b 170} 171 172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 173; CHECK-LABEL: or_v4i32_x: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vctp.32 r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vorrt q0, q0, q1 178; CHECK-NEXT: bx lr 179entry: 180 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 181 %a = or <4 x i32> %x, %y 182 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 183 ret <4 x i32> %b 184} 185 186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 187; CHECK-LABEL: or_v8i16_x: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vctp.16 r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vorrt q0, q0, q1 192; CHECK-NEXT: bx lr 193entry: 194 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 195 %a = or <8 x i16> %x, %y 196 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 197 ret <8 x i16> %b 198} 199 200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 201; CHECK-LABEL: or_v16i8_x: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vctp.8 r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vorrt q0, q0, q1 206; CHECK-NEXT: bx lr 207entry: 208 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 209 %a = or <16 x i8> %x, %y 210 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 211 ret <16 x i8> %b 212} 213 214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 215; CHECK-LABEL: xor_v4i32_x: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vctp.32 r0 218; CHECK-NEXT: vpst 219; CHECK-NEXT: veort q0, q0, q1 220; CHECK-NEXT: bx lr 221entry: 222 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 223 %a = xor <4 x i32> %x, %y 224 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 225 ret <4 x i32> %b 226} 227 228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 229; CHECK-LABEL: xor_v8i16_x: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vctp.16 r0 232; CHECK-NEXT: vpst 233; CHECK-NEXT: veort q0, q0, q1 234; CHECK-NEXT: bx lr 235entry: 236 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 237 %a = xor <8 x i16> %x, %y 238 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 239 ret <8 x i16> %b 240} 241 242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 243; CHECK-LABEL: xor_v16i8_x: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vctp.8 r0 246; CHECK-NEXT: vpst 247; CHECK-NEXT: veort q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 251 %a = xor <16 x i8> %x, %y 252 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 253 ret <16 x i8> %b 254} 255 256define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 257; CHECK-LABEL: shl_v4i32_x: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vctp.32 r0 260; CHECK-NEXT: vpst 261; CHECK-NEXT: vshlt.u32 q0, q0, q1 262; CHECK-NEXT: bx lr 263entry: 264 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 265 %a = shl <4 x i32> %x, %y 266 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 267 ret <4 x i32> %b 268} 269 270define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 271; CHECK-LABEL: shl_v8i16_x: 272; CHECK: @ %bb.0: @ %entry 273; CHECK-NEXT: vctp.16 r0 274; CHECK-NEXT: vpst 275; CHECK-NEXT: vshlt.u16 q0, q0, q1 276; CHECK-NEXT: bx lr 277entry: 278 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 279 %a = shl <8 x i16> %x, %y 280 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 281 ret <8 x i16> %b 282} 283 284define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 285; CHECK-LABEL: shl_v16i8_x: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vctp.8 r0 288; CHECK-NEXT: vpst 289; CHECK-NEXT: vshlt.u8 q0, q0, q1 290; CHECK-NEXT: bx lr 291entry: 292 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 293 %a = shl <16 x i8> %x, %y 294 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 295 ret <16 x i8> %b 296} 297 298define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 299; CHECK-LABEL: ashr_v4i32_x: 300; CHECK: @ %bb.0: @ %entry 301; CHECK-NEXT: vneg.s32 q1, q1 302; CHECK-NEXT: vctp.32 r0 303; CHECK-NEXT: vpst 304; CHECK-NEXT: vshlt.s32 q0, q0, q1 305; CHECK-NEXT: bx lr 306entry: 307 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 308 %a = ashr <4 x i32> %x, %y 309 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 310 ret <4 x i32> %b 311} 312 313define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 314; CHECK-LABEL: ashr_v8i16_x: 315; CHECK: @ %bb.0: @ %entry 316; CHECK-NEXT: vneg.s16 q1, q1 317; CHECK-NEXT: vctp.16 r0 318; CHECK-NEXT: vpst 319; CHECK-NEXT: vshlt.s16 q0, q0, q1 320; CHECK-NEXT: bx lr 321entry: 322 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 323 %a = ashr <8 x i16> %x, %y 324 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 325 ret <8 x i16> %b 326} 327 328define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 329; CHECK-LABEL: ashr_v16i8_x: 330; CHECK: @ %bb.0: @ %entry 331; CHECK-NEXT: vneg.s8 q1, q1 332; CHECK-NEXT: vctp.8 r0 333; CHECK-NEXT: vpst 334; CHECK-NEXT: vshlt.s8 q0, q0, q1 335; CHECK-NEXT: bx lr 336entry: 337 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 338 %a = ashr <16 x i8> %x, %y 339 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 340 ret <16 x i8> %b 341} 342 343define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 344; CHECK-LABEL: lshr_v4i32_x: 345; CHECK: @ %bb.0: @ %entry 346; CHECK-NEXT: vneg.s32 q1, q1 347; CHECK-NEXT: vctp.32 r0 348; CHECK-NEXT: vpst 349; CHECK-NEXT: vshlt.u32 q0, q0, q1 350; CHECK-NEXT: bx lr 351entry: 352 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 353 %a = lshr <4 x i32> %x, %y 354 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 355 ret <4 x i32> %b 356} 357 358define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 359; CHECK-LABEL: lshr_v8i16_x: 360; CHECK: @ %bb.0: @ %entry 361; CHECK-NEXT: vneg.s16 q1, q1 362; CHECK-NEXT: vctp.16 r0 363; CHECK-NEXT: vpst 364; CHECK-NEXT: vshlt.u16 q0, q0, q1 365; CHECK-NEXT: bx lr 366entry: 367 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 368 %a = lshr <8 x i16> %x, %y 369 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 370 ret <8 x i16> %b 371} 372 373define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 374; CHECK-LABEL: lshr_v16i8_x: 375; CHECK: @ %bb.0: @ %entry 376; CHECK-NEXT: vneg.s8 q1, q1 377; CHECK-NEXT: vctp.8 r0 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vshlt.u8 q0, q0, q1 380; CHECK-NEXT: bx lr 381entry: 382 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 383 %a = lshr <16 x i8> %x, %y 384 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 385 ret <16 x i8> %b 386} 387 388define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 389; CHECK-LABEL: andnot_v4i32_x: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vctp.32 r0 392; CHECK-NEXT: vpst 393; CHECK-NEXT: vbict q0, q0, q1 394; CHECK-NEXT: bx lr 395entry: 396 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 397 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 398 %a = and <4 x i32> %x, %y1 399 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 400 ret <4 x i32> %b 401} 402 403define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 404; CHECK-LABEL: andnot_v8i16_x: 405; CHECK: @ %bb.0: @ %entry 406; CHECK-NEXT: vctp.16 r0 407; CHECK-NEXT: vpst 408; CHECK-NEXT: vbict q0, q0, q1 409; CHECK-NEXT: bx lr 410entry: 411 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 412 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 413 %a = and <8 x i16> %x, %y1 414 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 415 ret <8 x i16> %b 416} 417 418define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 419; CHECK-LABEL: andnot_v16i8_x: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vctp.8 r0 422; CHECK-NEXT: vpst 423; CHECK-NEXT: vbict q0, q0, q1 424; CHECK-NEXT: bx lr 425entry: 426 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 427 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 428 %a = and <16 x i8> %x, %y1 429 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 430 ret <16 x i8> %b 431} 432 433define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 434; CHECK-LABEL: ornot_v4i32_x: 435; CHECK: @ %bb.0: @ %entry 436; CHECK-NEXT: vctp.32 r0 437; CHECK-NEXT: vpst 438; CHECK-NEXT: vornt q0, q0, q1 439; CHECK-NEXT: bx lr 440entry: 441 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 442 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 443 %a = or <4 x i32> %x, %y1 444 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 445 ret <4 x i32> %b 446} 447 448define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 449; CHECK-LABEL: ornot_v8i16_x: 450; CHECK: @ %bb.0: @ %entry 451; CHECK-NEXT: vctp.16 r0 452; CHECK-NEXT: vpst 453; CHECK-NEXT: vornt q0, q0, q1 454; CHECK-NEXT: bx lr 455entry: 456 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 457 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 458 %a = or <8 x i16> %x, %y1 459 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 460 ret <8 x i16> %b 461} 462 463define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 464; CHECK-LABEL: ornot_v16i8_x: 465; CHECK: @ %bb.0: @ %entry 466; CHECK-NEXT: vctp.8 r0 467; CHECK-NEXT: vpst 468; CHECK-NEXT: vornt q0, q0, q1 469; CHECK-NEXT: bx lr 470entry: 471 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 472 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 473 %a = or <16 x i8> %x, %y1 474 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 475 ret <16 x i8> %b 476} 477 478define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 479; CHECK-LABEL: fadd_v4f32_x: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: vctp.32 r0 482; CHECK-NEXT: vpst 483; CHECK-NEXT: vaddt.f32 q0, q0, q1 484; CHECK-NEXT: bx lr 485entry: 486 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 487 %a = fadd <4 x float> %x, %y 488 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 489 ret <4 x float> %b 490} 491 492define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 493; CHECK-LABEL: fadd_v8f16_x: 494; CHECK: @ %bb.0: @ %entry 495; CHECK-NEXT: vctp.16 r0 496; CHECK-NEXT: vpst 497; CHECK-NEXT: vaddt.f16 q0, q0, q1 498; CHECK-NEXT: bx lr 499entry: 500 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 501 %a = fadd <8 x half> %x, %y 502 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 503 ret <8 x half> %b 504} 505 506define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 507; CHECK-LABEL: fsub_v4f32_x: 508; CHECK: @ %bb.0: @ %entry 509; CHECK-NEXT: vctp.32 r0 510; CHECK-NEXT: vpst 511; CHECK-NEXT: vsubt.f32 q0, q0, q1 512; CHECK-NEXT: bx lr 513entry: 514 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 515 %a = fsub <4 x float> %x, %y 516 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 517 ret <4 x float> %b 518} 519 520define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 521; CHECK-LABEL: fsub_v8f16_x: 522; CHECK: @ %bb.0: @ %entry 523; CHECK-NEXT: vctp.16 r0 524; CHECK-NEXT: vpst 525; CHECK-NEXT: vsubt.f16 q0, q0, q1 526; CHECK-NEXT: bx lr 527entry: 528 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 529 %a = fsub <8 x half> %x, %y 530 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 531 ret <8 x half> %b 532} 533 534define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 535; CHECK-LABEL: fmul_v4f32_x: 536; CHECK: @ %bb.0: @ %entry 537; CHECK-NEXT: vctp.32 r0 538; CHECK-NEXT: vpst 539; CHECK-NEXT: vmult.f32 q0, q0, q1 540; CHECK-NEXT: bx lr 541entry: 542 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 543 %a = fmul <4 x float> %x, %y 544 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 545 ret <4 x float> %b 546} 547 548define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 549; CHECK-LABEL: fmul_v8f16_x: 550; CHECK: @ %bb.0: @ %entry 551; CHECK-NEXT: vctp.16 r0 552; CHECK-NEXT: vpst 553; CHECK-NEXT: vmult.f16 q0, q0, q1 554; CHECK-NEXT: bx lr 555entry: 556 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 557 %a = fmul <8 x half> %x, %y 558 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 559 ret <8 x half> %b 560} 561 562define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 563; CHECK-LABEL: fdiv_v4f32_x: 564; CHECK: @ %bb.0: @ %entry 565; CHECK-NEXT: vdiv.f32 s7, s3, s7 566; CHECK-NEXT: vctp.32 r0 567; CHECK-NEXT: vdiv.f32 s6, s2, s6 568; CHECK-NEXT: vdiv.f32 s5, s1, s5 569; CHECK-NEXT: vdiv.f32 s4, s0, s4 570; CHECK-NEXT: vpst 571; CHECK-NEXT: vmovt q0, q1 572; CHECK-NEXT: bx lr 573entry: 574 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 575 %a = fdiv <4 x float> %x, %y 576 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 577 ret <4 x float> %b 578} 579 580define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 581; CHECK-LABEL: fdiv_v8f16_x: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vmovx.f16 s8, s4 584; CHECK-NEXT: vmovx.f16 s10, s0 585; CHECK-NEXT: vdiv.f16 s8, s10, s8 586; CHECK-NEXT: vdiv.f16 s4, s0, s4 587; CHECK-NEXT: vins.f16 s4, s8 588; CHECK-NEXT: vmovx.f16 s8, s5 589; CHECK-NEXT: vmovx.f16 s10, s1 590; CHECK-NEXT: vdiv.f16 s5, s1, s5 591; CHECK-NEXT: vdiv.f16 s8, s10, s8 592; CHECK-NEXT: vmovx.f16 s10, s2 593; CHECK-NEXT: vins.f16 s5, s8 594; CHECK-NEXT: vmovx.f16 s8, s6 595; CHECK-NEXT: vdiv.f16 s8, s10, s8 596; CHECK-NEXT: vdiv.f16 s6, s2, s6 597; CHECK-NEXT: vins.f16 s6, s8 598; CHECK-NEXT: vmovx.f16 s8, s7 599; CHECK-NEXT: vmovx.f16 s10, s3 600; CHECK-NEXT: vdiv.f16 s7, s3, s7 601; CHECK-NEXT: vdiv.f16 s8, s10, s8 602; CHECK-NEXT: vctp.16 r0 603; CHECK-NEXT: vins.f16 s7, s8 604; CHECK-NEXT: vpst 605; CHECK-NEXT: vmovt q0, q1 606; CHECK-NEXT: bx lr 607entry: 608 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 609 %a = fdiv <8 x half> %x, %y 610 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 611 ret <8 x half> %b 612} 613 614define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 615; CHECK-LABEL: fmai_v4f32_x: 616; CHECK: @ %bb.0: @ %entry 617; CHECK-NEXT: vctp.32 r0 618; CHECK-NEXT: vpst 619; CHECK-NEXT: vfmat.f32 q0, q1, q2 620; CHECK-NEXT: bx lr 621entry: 622 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 623 %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x) 624 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 625 ret <4 x float> %b 626} 627 628define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 629; CHECK-LABEL: fmai_v8f16_x: 630; CHECK: @ %bb.0: @ %entry 631; CHECK-NEXT: vctp.16 r0 632; CHECK-NEXT: vpst 633; CHECK-NEXT: vfmat.f16 q0, q1, q2 634; CHECK-NEXT: bx lr 635entry: 636 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 637 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 638 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 639 ret <8 x half> %b 640} 641 642define arm_aapcs_vfpcc <4 x float> @fma_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 643; CHECK-LABEL: fma_v4f32_x: 644; CHECK: @ %bb.0: @ %entry 645; CHECK-NEXT: vctp.32 r0 646; CHECK-NEXT: vpst 647; CHECK-NEXT: vfmat.f32 q0, q1, q2 648; CHECK-NEXT: bx lr 649entry: 650 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 651 %m = fmul fast <4 x float> %y, %z 652 %a = fadd fast <4 x float> %m, %x 653 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 654 ret <4 x float> %b 655} 656 657define arm_aapcs_vfpcc <8 x half> @fma_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 658; CHECK-LABEL: fma_v8f16_x: 659; CHECK: @ %bb.0: @ %entry 660; CHECK-NEXT: vctp.16 r0 661; CHECK-NEXT: vpst 662; CHECK-NEXT: vfmat.f16 q0, q1, q2 663; CHECK-NEXT: bx lr 664entry: 665 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 666 %m = fmul fast <8 x half> %y, %z 667 %a = fadd fast <8 x half> %m, %x 668 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 669 ret <8 x half> %b 670} 671 672define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 673; CHECK-LABEL: icmp_slt_v4i32_x: 674; CHECK: @ %bb.0: @ %entry 675; CHECK-NEXT: vctp.32 r0 676; CHECK-NEXT: vpst 677; CHECK-NEXT: vmint.s32 q0, q0, q1 678; CHECK-NEXT: bx lr 679entry: 680 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 681 %a1 = icmp slt <4 x i32> %x, %y 682 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 683 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 684 ret <4 x i32> %b 685} 686 687define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 688; CHECK-LABEL: icmp_slt_v8i16_x: 689; CHECK: @ %bb.0: @ %entry 690; CHECK-NEXT: vctp.16 r0 691; CHECK-NEXT: vpst 692; CHECK-NEXT: vmint.s16 q0, q0, q1 693; CHECK-NEXT: bx lr 694entry: 695 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 696 %a1 = icmp slt <8 x i16> %x, %y 697 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 698 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 699 ret <8 x i16> %b 700} 701 702define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 703; CHECK-LABEL: icmp_slt_v16i8_x: 704; CHECK: @ %bb.0: @ %entry 705; CHECK-NEXT: vctp.8 r0 706; CHECK-NEXT: vpst 707; CHECK-NEXT: vmint.s8 q0, q0, q1 708; CHECK-NEXT: bx lr 709entry: 710 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 711 %a1 = icmp slt <16 x i8> %x, %y 712 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 713 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 714 ret <16 x i8> %b 715} 716 717define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 718; CHECK-LABEL: icmp_sgt_v4i32_x: 719; CHECK: @ %bb.0: @ %entry 720; CHECK-NEXT: vctp.32 r0 721; CHECK-NEXT: vpst 722; CHECK-NEXT: vmaxt.s32 q0, q0, q1 723; CHECK-NEXT: bx lr 724entry: 725 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 726 %a1 = icmp sgt <4 x i32> %x, %y 727 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 728 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 729 ret <4 x i32> %b 730} 731 732define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 733; CHECK-LABEL: icmp_sgt_v8i16_x: 734; CHECK: @ %bb.0: @ %entry 735; CHECK-NEXT: vctp.16 r0 736; CHECK-NEXT: vpst 737; CHECK-NEXT: vmaxt.s16 q0, q0, q1 738; CHECK-NEXT: bx lr 739entry: 740 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 741 %a1 = icmp sgt <8 x i16> %x, %y 742 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 743 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 744 ret <8 x i16> %b 745} 746 747define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 748; CHECK-LABEL: icmp_sgt_v16i8_x: 749; CHECK: @ %bb.0: @ %entry 750; CHECK-NEXT: vctp.8 r0 751; CHECK-NEXT: vpst 752; CHECK-NEXT: vmaxt.s8 q0, q0, q1 753; CHECK-NEXT: bx lr 754entry: 755 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 756 %a1 = icmp sgt <16 x i8> %x, %y 757 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 758 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 759 ret <16 x i8> %b 760} 761 762define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 763; CHECK-LABEL: icmp_ult_v4i32_x: 764; CHECK: @ %bb.0: @ %entry 765; CHECK-NEXT: vctp.32 r0 766; CHECK-NEXT: vpst 767; CHECK-NEXT: vmint.u32 q0, q0, q1 768; CHECK-NEXT: bx lr 769entry: 770 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 771 %a1 = icmp ult <4 x i32> %x, %y 772 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 773 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 774 ret <4 x i32> %b 775} 776 777define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 778; CHECK-LABEL: icmp_ult_v8i16_x: 779; CHECK: @ %bb.0: @ %entry 780; CHECK-NEXT: vctp.16 r0 781; CHECK-NEXT: vpst 782; CHECK-NEXT: vmint.u16 q0, q0, q1 783; CHECK-NEXT: bx lr 784entry: 785 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 786 %a1 = icmp ult <8 x i16> %x, %y 787 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 788 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 789 ret <8 x i16> %b 790} 791 792define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 793; CHECK-LABEL: icmp_ult_v16i8_x: 794; CHECK: @ %bb.0: @ %entry 795; CHECK-NEXT: vctp.8 r0 796; CHECK-NEXT: vpst 797; CHECK-NEXT: vmint.u8 q0, q0, q1 798; CHECK-NEXT: bx lr 799entry: 800 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 801 %a1 = icmp ult <16 x i8> %x, %y 802 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 803 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 804 ret <16 x i8> %b 805} 806 807define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 808; CHECK-LABEL: icmp_ugt_v4i32_x: 809; CHECK: @ %bb.0: @ %entry 810; CHECK-NEXT: vctp.32 r0 811; CHECK-NEXT: vpst 812; CHECK-NEXT: vmaxt.u32 q0, q0, q1 813; CHECK-NEXT: bx lr 814entry: 815 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 816 %a1 = icmp ugt <4 x i32> %x, %y 817 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 818 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 819 ret <4 x i32> %b 820} 821 822define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 823; CHECK-LABEL: icmp_ugt_v8i16_x: 824; CHECK: @ %bb.0: @ %entry 825; CHECK-NEXT: vctp.16 r0 826; CHECK-NEXT: vpst 827; CHECK-NEXT: vmaxt.u16 q0, q0, q1 828; CHECK-NEXT: bx lr 829entry: 830 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 831 %a1 = icmp ugt <8 x i16> %x, %y 832 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 833 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 834 ret <8 x i16> %b 835} 836 837define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 838; CHECK-LABEL: icmp_ugt_v16i8_x: 839; CHECK: @ %bb.0: @ %entry 840; CHECK-NEXT: vctp.8 r0 841; CHECK-NEXT: vpst 842; CHECK-NEXT: vmaxt.u8 q0, q0, q1 843; CHECK-NEXT: bx lr 844entry: 845 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 846 %a1 = icmp ugt <16 x i8> %x, %y 847 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 848 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 849 ret <16 x i8> %b 850} 851 852define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 853; CHECK-LABEL: fcmp_fast_olt_v4f32_x: 854; CHECK: @ %bb.0: @ %entry 855; CHECK-NEXT: vctp.32 r0 856; CHECK-NEXT: vpst 857; CHECK-NEXT: vminnmt.f32 q0, q0, q1 858; CHECK-NEXT: bx lr 859entry: 860 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 861 %a1 = fcmp fast olt <4 x float> %x, %y 862 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 863 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 864 ret <4 x float> %b 865} 866 867define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 868; CHECK-LABEL: fcmp_fast_olt_v8f16_x: 869; CHECK: @ %bb.0: @ %entry 870; CHECK-NEXT: vctp.16 r0 871; CHECK-NEXT: vpst 872; CHECK-NEXT: vminnmt.f16 q0, q0, q1 873; CHECK-NEXT: bx lr 874entry: 875 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 876 %a1 = fcmp fast olt <8 x half> %x, %y 877 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 878 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 879 ret <8 x half> %b 880} 881 882define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 883; CHECK-LABEL: fcmp_fast_ogt_v4f32_x: 884; CHECK: @ %bb.0: @ %entry 885; CHECK-NEXT: vctp.32 r0 886; CHECK-NEXT: vpst 887; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 888; CHECK-NEXT: bx lr 889entry: 890 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 891 %a1 = fcmp fast ogt <4 x float> %x, %y 892 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 893 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 894 ret <4 x float> %b 895} 896 897define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 898; CHECK-LABEL: fcmp_fast_ogt_v8f16_x: 899; CHECK: @ %bb.0: @ %entry 900; CHECK-NEXT: vctp.16 r0 901; CHECK-NEXT: vpst 902; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 903; CHECK-NEXT: bx lr 904entry: 905 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 906 %a1 = fcmp fast ogt <8 x half> %x, %y 907 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 908 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 909 ret <8 x half> %b 910} 911 912define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 913; CHECK-LABEL: sadd_sat_v4i32_x: 914; CHECK: @ %bb.0: @ %entry 915; CHECK-NEXT: vctp.32 r0 916; CHECK-NEXT: vpst 917; CHECK-NEXT: vqaddt.s32 q0, q0, q1 918; CHECK-NEXT: bx lr 919entry: 920 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 921 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 922 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 923 ret <4 x i32> %b 924} 925 926define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 927; CHECK-LABEL: sadd_sat_v8i16_x: 928; CHECK: @ %bb.0: @ %entry 929; CHECK-NEXT: vctp.16 r0 930; CHECK-NEXT: vpst 931; CHECK-NEXT: vqaddt.s16 q0, q0, q1 932; CHECK-NEXT: bx lr 933entry: 934 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 935 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 936 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 937 ret <8 x i16> %b 938} 939 940define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 941; CHECK-LABEL: sadd_sat_v16i8_x: 942; CHECK: @ %bb.0: @ %entry 943; CHECK-NEXT: vctp.8 r0 944; CHECK-NEXT: vpst 945; CHECK-NEXT: vqaddt.s8 q0, q0, q1 946; CHECK-NEXT: bx lr 947entry: 948 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 949 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 950 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 951 ret <16 x i8> %b 952} 953 954define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 955; CHECK-LABEL: uadd_sat_v4i32_x: 956; CHECK: @ %bb.0: @ %entry 957; CHECK-NEXT: vctp.32 r0 958; CHECK-NEXT: vpst 959; CHECK-NEXT: vqaddt.u32 q0, q0, q1 960; CHECK-NEXT: bx lr 961entry: 962 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 963 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 964 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 965 ret <4 x i32> %b 966} 967 968define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 969; CHECK-LABEL: uadd_sat_v8i16_x: 970; CHECK: @ %bb.0: @ %entry 971; CHECK-NEXT: vctp.16 r0 972; CHECK-NEXT: vpst 973; CHECK-NEXT: vqaddt.u16 q0, q0, q1 974; CHECK-NEXT: bx lr 975entry: 976 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 977 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 978 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 979 ret <8 x i16> %b 980} 981 982define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 983; CHECK-LABEL: uadd_sat_v16i8_x: 984; CHECK: @ %bb.0: @ %entry 985; CHECK-NEXT: vctp.8 r0 986; CHECK-NEXT: vpst 987; CHECK-NEXT: vqaddt.u8 q0, q0, q1 988; CHECK-NEXT: bx lr 989entry: 990 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 991 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 992 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 993 ret <16 x i8> %b 994} 995 996define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 997; CHECK-LABEL: ssub_sat_v4i32_x: 998; CHECK: @ %bb.0: @ %entry 999; CHECK-NEXT: vctp.32 r0 1000; CHECK-NEXT: vpst 1001; CHECK-NEXT: vqsubt.s32 q0, q0, q1 1002; CHECK-NEXT: bx lr 1003entry: 1004 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1005 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1006 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1007 ret <4 x i32> %b 1008} 1009 1010define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1011; CHECK-LABEL: ssub_sat_v8i16_x: 1012; CHECK: @ %bb.0: @ %entry 1013; CHECK-NEXT: vctp.16 r0 1014; CHECK-NEXT: vpst 1015; CHECK-NEXT: vqsubt.s16 q0, q0, q1 1016; CHECK-NEXT: bx lr 1017entry: 1018 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1019 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 1020 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1021 ret <8 x i16> %b 1022} 1023 1024define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1025; CHECK-LABEL: ssub_sat_v16i8_x: 1026; CHECK: @ %bb.0: @ %entry 1027; CHECK-NEXT: vctp.8 r0 1028; CHECK-NEXT: vpst 1029; CHECK-NEXT: vqsubt.s8 q0, q0, q1 1030; CHECK-NEXT: bx lr 1031entry: 1032 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1033 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1034 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1035 ret <16 x i8> %b 1036} 1037 1038define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1039; CHECK-LABEL: usub_sat_v4i32_x: 1040; CHECK: @ %bb.0: @ %entry 1041; CHECK-NEXT: vctp.32 r0 1042; CHECK-NEXT: vpst 1043; CHECK-NEXT: vqsubt.u32 q0, q0, q1 1044; CHECK-NEXT: bx lr 1045entry: 1046 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1047 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1048 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1049 ret <4 x i32> %b 1050} 1051 1052define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1053; CHECK-LABEL: usub_sat_v8i16_x: 1054; CHECK: @ %bb.0: @ %entry 1055; CHECK-NEXT: vctp.16 r0 1056; CHECK-NEXT: vpst 1057; CHECK-NEXT: vqsubt.u16 q0, q0, q1 1058; CHECK-NEXT: bx lr 1059entry: 1060 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1061 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 1062 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1063 ret <8 x i16> %b 1064} 1065 1066define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1067; CHECK-LABEL: usub_sat_v16i8_x: 1068; CHECK: @ %bb.0: @ %entry 1069; CHECK-NEXT: vctp.8 r0 1070; CHECK-NEXT: vpst 1071; CHECK-NEXT: vqsubt.u8 q0, q0, q1 1072; CHECK-NEXT: bx lr 1073entry: 1074 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1075 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1076 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1077 ret <16 x i8> %b 1078} 1079 1080define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1081; CHECK-LABEL: addqr_v4i32_x: 1082; CHECK: @ %bb.0: @ %entry 1083; CHECK-NEXT: vctp.32 r1 1084; CHECK-NEXT: vpst 1085; CHECK-NEXT: vaddt.i32 q0, q0, r0 1086; CHECK-NEXT: bx lr 1087entry: 1088 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1089 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1090 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1091 %a = add <4 x i32> %x, %ys 1092 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1093 ret <4 x i32> %b 1094} 1095 1096define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1097; CHECK-LABEL: addqr_v8i16_x: 1098; CHECK: @ %bb.0: @ %entry 1099; CHECK-NEXT: vctp.16 r1 1100; CHECK-NEXT: vpst 1101; CHECK-NEXT: vaddt.i16 q0, q0, r0 1102; CHECK-NEXT: bx lr 1103entry: 1104 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1105 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1106 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1107 %a = add <8 x i16> %x, %ys 1108 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1109 ret <8 x i16> %b 1110} 1111 1112define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1113; CHECK-LABEL: addqr_v16i8_x: 1114; CHECK: @ %bb.0: @ %entry 1115; CHECK-NEXT: vctp.8 r1 1116; CHECK-NEXT: vpst 1117; CHECK-NEXT: vaddt.i8 q0, q0, r0 1118; CHECK-NEXT: bx lr 1119entry: 1120 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1121 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1122 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1123 %a = add <16 x i8> %x, %ys 1124 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1125 ret <16 x i8> %b 1126} 1127 1128define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1129; CHECK-LABEL: subqr_v4i32_x: 1130; CHECK: @ %bb.0: @ %entry 1131; CHECK-NEXT: vctp.32 r1 1132; CHECK-NEXT: vpst 1133; CHECK-NEXT: vsubt.i32 q0, q0, r0 1134; CHECK-NEXT: bx lr 1135entry: 1136 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1137 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1138 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1139 %a = sub <4 x i32> %x, %ys 1140 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1141 ret <4 x i32> %b 1142} 1143 1144define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1145; CHECK-LABEL: subqr_v8i16_x: 1146; CHECK: @ %bb.0: @ %entry 1147; CHECK-NEXT: vctp.16 r1 1148; CHECK-NEXT: vpst 1149; CHECK-NEXT: vsubt.i16 q0, q0, r0 1150; CHECK-NEXT: bx lr 1151entry: 1152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1153 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1154 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1155 %a = sub <8 x i16> %x, %ys 1156 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1157 ret <8 x i16> %b 1158} 1159 1160define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1161; CHECK-LABEL: subqr_v16i8_x: 1162; CHECK: @ %bb.0: @ %entry 1163; CHECK-NEXT: vctp.8 r1 1164; CHECK-NEXT: vpst 1165; CHECK-NEXT: vsubt.i8 q0, q0, r0 1166; CHECK-NEXT: bx lr 1167entry: 1168 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1169 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1170 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1171 %a = sub <16 x i8> %x, %ys 1172 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1173 ret <16 x i8> %b 1174} 1175 1176define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1177; CHECK-LABEL: mulqr_v4i32_x: 1178; CHECK: @ %bb.0: @ %entry 1179; CHECK-NEXT: vctp.32 r1 1180; CHECK-NEXT: vpst 1181; CHECK-NEXT: vmult.i32 q0, q0, r0 1182; CHECK-NEXT: bx lr 1183entry: 1184 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1185 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1186 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1187 %a = mul <4 x i32> %x, %ys 1188 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1189 ret <4 x i32> %b 1190} 1191 1192define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1193; CHECK-LABEL: mulqr_v8i16_x: 1194; CHECK: @ %bb.0: @ %entry 1195; CHECK-NEXT: vctp.16 r1 1196; CHECK-NEXT: vpst 1197; CHECK-NEXT: vmult.i16 q0, q0, r0 1198; CHECK-NEXT: bx lr 1199entry: 1200 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1201 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1202 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1203 %a = mul <8 x i16> %x, %ys 1204 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1205 ret <8 x i16> %b 1206} 1207 1208define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1209; CHECK-LABEL: mulqr_v16i8_x: 1210; CHECK: @ %bb.0: @ %entry 1211; CHECK-NEXT: vctp.8 r1 1212; CHECK-NEXT: vpst 1213; CHECK-NEXT: vmult.i8 q0, q0, r0 1214; CHECK-NEXT: bx lr 1215entry: 1216 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1217 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1218 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1219 %a = mul <16 x i8> %x, %ys 1220 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1221 ret <16 x i8> %b 1222} 1223 1224define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1225; CHECK-LABEL: faddqr_v4f32_x: 1226; CHECK: @ %bb.0: @ %entry 1227; CHECK-NEXT: vmov r1, s4 1228; CHECK-NEXT: vctp.32 r0 1229; CHECK-NEXT: vpst 1230; CHECK-NEXT: vaddt.f32 q0, q0, r1 1231; CHECK-NEXT: bx lr 1232entry: 1233 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1234 %i = insertelement <4 x float> undef, float %y, i32 0 1235 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1236 %a = fadd <4 x float> %x, %ys 1237 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1238 ret <4 x float> %b 1239} 1240 1241define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1242; CHECK-LABEL: faddqr_v8f16_x: 1243; CHECK: @ %bb.0: @ %entry 1244; CHECK-NEXT: vmov.f16 r1, s4 1245; CHECK-NEXT: vctp.16 r0 1246; CHECK-NEXT: vpst 1247; CHECK-NEXT: vaddt.f16 q0, q0, r1 1248; CHECK-NEXT: bx lr 1249entry: 1250 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1251 %i = insertelement <8 x half> undef, half %y, i32 0 1252 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1253 %a = fadd <8 x half> %x, %ys 1254 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1255 ret <8 x half> %b 1256} 1257 1258define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1259; CHECK-LABEL: fsubqr_v4f32_x: 1260; CHECK: @ %bb.0: @ %entry 1261; CHECK-NEXT: vmov r1, s4 1262; CHECK-NEXT: vctp.32 r0 1263; CHECK-NEXT: vpst 1264; CHECK-NEXT: vsubt.f32 q0, q0, r1 1265; CHECK-NEXT: bx lr 1266entry: 1267 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1268 %i = insertelement <4 x float> undef, float %y, i32 0 1269 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1270 %a = fsub <4 x float> %x, %ys 1271 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1272 ret <4 x float> %b 1273} 1274 1275define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1276; CHECK-LABEL: fsubqr_v8f16_x: 1277; CHECK: @ %bb.0: @ %entry 1278; CHECK-NEXT: vmov.f16 r1, s4 1279; CHECK-NEXT: vctp.16 r0 1280; CHECK-NEXT: vpst 1281; CHECK-NEXT: vsubt.f16 q0, q0, r1 1282; CHECK-NEXT: bx lr 1283entry: 1284 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1285 %i = insertelement <8 x half> undef, half %y, i32 0 1286 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1287 %a = fsub <8 x half> %x, %ys 1288 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1289 ret <8 x half> %b 1290} 1291 1292define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1293; CHECK-LABEL: fmulqr_v4f32_x: 1294; CHECK: @ %bb.0: @ %entry 1295; CHECK-NEXT: vmov r1, s4 1296; CHECK-NEXT: vctp.32 r0 1297; CHECK-NEXT: vpst 1298; CHECK-NEXT: vmult.f32 q0, q0, r1 1299; CHECK-NEXT: bx lr 1300entry: 1301 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1302 %i = insertelement <4 x float> undef, float %y, i32 0 1303 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1304 %a = fmul <4 x float> %x, %ys 1305 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 1306 ret <4 x float> %b 1307} 1308 1309define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1310; CHECK-LABEL: fmulqr_v8f16_x: 1311; CHECK: @ %bb.0: @ %entry 1312; CHECK-NEXT: vmov.f16 r1, s4 1313; CHECK-NEXT: vctp.16 r0 1314; CHECK-NEXT: vpst 1315; CHECK-NEXT: vmult.f16 q0, q0, r1 1316; CHECK-NEXT: bx lr 1317entry: 1318 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1319 %i = insertelement <8 x half> undef, half %y, i32 0 1320 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1321 %a = fmul <8 x half> %x, %ys 1322 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 1323 ret <8 x half> %b 1324} 1325 1326define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1327; CHECK-LABEL: sadd_satqr_v4i32_x: 1328; CHECK: @ %bb.0: @ %entry 1329; CHECK-NEXT: vctp.32 r1 1330; CHECK-NEXT: vpst 1331; CHECK-NEXT: vqaddt.s32 q0, q0, r0 1332; CHECK-NEXT: bx lr 1333entry: 1334 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1335 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1336 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1337 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1338 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1339 ret <4 x i32> %b 1340} 1341 1342define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1343; CHECK-LABEL: sadd_satqr_v8i16_x: 1344; CHECK: @ %bb.0: @ %entry 1345; CHECK-NEXT: vctp.16 r1 1346; CHECK-NEXT: vpst 1347; CHECK-NEXT: vqaddt.s16 q0, q0, r0 1348; CHECK-NEXT: bx lr 1349entry: 1350 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1351 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1352 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1353 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1354 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1355 ret <8 x i16> %b 1356} 1357 1358define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1359; CHECK-LABEL: sadd_satqr_v16i8_x: 1360; CHECK: @ %bb.0: @ %entry 1361; CHECK-NEXT: vctp.8 r1 1362; CHECK-NEXT: vpst 1363; CHECK-NEXT: vqaddt.s8 q0, q0, r0 1364; CHECK-NEXT: bx lr 1365entry: 1366 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1367 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1368 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1369 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1370 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1371 ret <16 x i8> %b 1372} 1373 1374define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1375; CHECK-LABEL: uadd_satqr_v4i32_x: 1376; CHECK: @ %bb.0: @ %entry 1377; CHECK-NEXT: vctp.32 r1 1378; CHECK-NEXT: vpst 1379; CHECK-NEXT: vqaddt.u32 q0, q0, r0 1380; CHECK-NEXT: bx lr 1381entry: 1382 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1383 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1384 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1385 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1386 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1387 ret <4 x i32> %b 1388} 1389 1390define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1391; CHECK-LABEL: uadd_satqr_v8i16_x: 1392; CHECK: @ %bb.0: @ %entry 1393; CHECK-NEXT: vctp.16 r1 1394; CHECK-NEXT: vpst 1395; CHECK-NEXT: vqaddt.u16 q0, q0, r0 1396; CHECK-NEXT: bx lr 1397entry: 1398 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1399 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1400 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1401 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1402 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1403 ret <8 x i16> %b 1404} 1405 1406define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1407; CHECK-LABEL: uadd_satqr_v16i8_x: 1408; CHECK: @ %bb.0: @ %entry 1409; CHECK-NEXT: vctp.8 r1 1410; CHECK-NEXT: vpst 1411; CHECK-NEXT: vqaddt.u8 q0, q0, r0 1412; CHECK-NEXT: bx lr 1413entry: 1414 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1415 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1416 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1417 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1418 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1419 ret <16 x i8> %b 1420} 1421 1422define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1423; CHECK-LABEL: ssub_satqr_v4i32_x: 1424; CHECK: @ %bb.0: @ %entry 1425; CHECK-NEXT: vctp.32 r1 1426; CHECK-NEXT: vpst 1427; CHECK-NEXT: vqsubt.s32 q0, q0, r0 1428; CHECK-NEXT: bx lr 1429entry: 1430 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1431 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1432 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1433 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1434 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1435 ret <4 x i32> %b 1436} 1437 1438define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1439; CHECK-LABEL: ssub_satqr_v8i16_x: 1440; CHECK: @ %bb.0: @ %entry 1441; CHECK-NEXT: vctp.16 r1 1442; CHECK-NEXT: vpst 1443; CHECK-NEXT: vqsubt.s16 q0, q0, r0 1444; CHECK-NEXT: bx lr 1445entry: 1446 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1447 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1448 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1449 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1450 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1451 ret <8 x i16> %b 1452} 1453 1454define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1455; CHECK-LABEL: ssub_satqr_v16i8_x: 1456; CHECK: @ %bb.0: @ %entry 1457; CHECK-NEXT: vctp.8 r1 1458; CHECK-NEXT: vpst 1459; CHECK-NEXT: vqsubt.s8 q0, q0, r0 1460; CHECK-NEXT: bx lr 1461entry: 1462 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1463 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1464 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1465 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1466 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1467 ret <16 x i8> %b 1468} 1469 1470define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1471; CHECK-LABEL: usub_satqr_v4i32_x: 1472; CHECK: @ %bb.0: @ %entry 1473; CHECK-NEXT: vctp.32 r1 1474; CHECK-NEXT: vpst 1475; CHECK-NEXT: vqsubt.u32 q0, q0, r0 1476; CHECK-NEXT: bx lr 1477entry: 1478 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1479 %i = insertelement <4 x i32> undef, i32 %y, i32 0 1480 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1481 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1482 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1483 ret <4 x i32> %b 1484} 1485 1486define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1487; CHECK-LABEL: usub_satqr_v8i16_x: 1488; CHECK: @ %bb.0: @ %entry 1489; CHECK-NEXT: vctp.16 r1 1490; CHECK-NEXT: vpst 1491; CHECK-NEXT: vqsubt.u16 q0, q0, r0 1492; CHECK-NEXT: bx lr 1493entry: 1494 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1495 %i = insertelement <8 x i16> undef, i16 %y, i32 0 1496 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1497 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1498 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1499 ret <8 x i16> %b 1500} 1501 1502define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1503; CHECK-LABEL: usub_satqr_v16i8_x: 1504; CHECK: @ %bb.0: @ %entry 1505; CHECK-NEXT: vctp.8 r1 1506; CHECK-NEXT: vpst 1507; CHECK-NEXT: vqsubt.u8 q0, q0, r0 1508; CHECK-NEXT: bx lr 1509entry: 1510 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1511 %i = insertelement <16 x i8> undef, i8 %y, i32 0 1512 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1513 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1514 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1515 ret <16 x i8> %b 1516} 1517 1518define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1519; CHECK-LABEL: add_v4i32_y: 1520; CHECK: @ %bb.0: @ %entry 1521; CHECK-NEXT: vctp.32 r0 1522; CHECK-NEXT: vpst 1523; CHECK-NEXT: vaddt.i32 q1, q0, q1 1524; CHECK-NEXT: vmov q0, q1 1525; CHECK-NEXT: bx lr 1526entry: 1527 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1528 %a = add <4 x i32> %x, %y 1529 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1530 ret <4 x i32> %b 1531} 1532 1533define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1534; CHECK-LABEL: add_v8i16_y: 1535; CHECK: @ %bb.0: @ %entry 1536; CHECK-NEXT: vctp.16 r0 1537; CHECK-NEXT: vpst 1538; CHECK-NEXT: vaddt.i16 q1, q0, q1 1539; CHECK-NEXT: vmov q0, q1 1540; CHECK-NEXT: bx lr 1541entry: 1542 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1543 %a = add <8 x i16> %x, %y 1544 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1545 ret <8 x i16> %b 1546} 1547 1548define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1549; CHECK-LABEL: add_v16i8_y: 1550; CHECK: @ %bb.0: @ %entry 1551; CHECK-NEXT: vctp.8 r0 1552; CHECK-NEXT: vpst 1553; CHECK-NEXT: vaddt.i8 q1, q0, q1 1554; CHECK-NEXT: vmov q0, q1 1555; CHECK-NEXT: bx lr 1556entry: 1557 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1558 %a = add <16 x i8> %x, %y 1559 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1560 ret <16 x i8> %b 1561} 1562 1563define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1564; CHECK-LABEL: sub_v4i32_y: 1565; CHECK: @ %bb.0: @ %entry 1566; CHECK-NEXT: vctp.32 r0 1567; CHECK-NEXT: vpst 1568; CHECK-NEXT: vsubt.i32 q1, q0, q1 1569; CHECK-NEXT: vmov q0, q1 1570; CHECK-NEXT: bx lr 1571entry: 1572 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1573 %a = sub <4 x i32> %x, %y 1574 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1575 ret <4 x i32> %b 1576} 1577 1578define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1579; CHECK-LABEL: sub_v8i16_y: 1580; CHECK: @ %bb.0: @ %entry 1581; CHECK-NEXT: vctp.16 r0 1582; CHECK-NEXT: vpst 1583; CHECK-NEXT: vsubt.i16 q1, q0, q1 1584; CHECK-NEXT: vmov q0, q1 1585; CHECK-NEXT: bx lr 1586entry: 1587 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1588 %a = sub <8 x i16> %x, %y 1589 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1590 ret <8 x i16> %b 1591} 1592 1593define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1594; CHECK-LABEL: sub_v16i8_y: 1595; CHECK: @ %bb.0: @ %entry 1596; CHECK-NEXT: vctp.8 r0 1597; CHECK-NEXT: vpst 1598; CHECK-NEXT: vsubt.i8 q1, q0, q1 1599; CHECK-NEXT: vmov q0, q1 1600; CHECK-NEXT: bx lr 1601entry: 1602 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1603 %a = sub <16 x i8> %x, %y 1604 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1605 ret <16 x i8> %b 1606} 1607 1608define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1609; CHECK-LABEL: mul_v4i32_y: 1610; CHECK: @ %bb.0: @ %entry 1611; CHECK-NEXT: vctp.32 r0 1612; CHECK-NEXT: vpst 1613; CHECK-NEXT: vmult.i32 q1, q0, q1 1614; CHECK-NEXT: vmov q0, q1 1615; CHECK-NEXT: bx lr 1616entry: 1617 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1618 %a = mul <4 x i32> %x, %y 1619 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1620 ret <4 x i32> %b 1621} 1622 1623define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1624; CHECK-LABEL: mul_v8i16_y: 1625; CHECK: @ %bb.0: @ %entry 1626; CHECK-NEXT: vctp.16 r0 1627; CHECK-NEXT: vpst 1628; CHECK-NEXT: vmult.i16 q1, q0, q1 1629; CHECK-NEXT: vmov q0, q1 1630; CHECK-NEXT: bx lr 1631entry: 1632 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1633 %a = mul <8 x i16> %x, %y 1634 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1635 ret <8 x i16> %b 1636} 1637 1638define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1639; CHECK-LABEL: mul_v16i8_y: 1640; CHECK: @ %bb.0: @ %entry 1641; CHECK-NEXT: vctp.8 r0 1642; CHECK-NEXT: vpst 1643; CHECK-NEXT: vmult.i8 q1, q0, q1 1644; CHECK-NEXT: vmov q0, q1 1645; CHECK-NEXT: bx lr 1646entry: 1647 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1648 %a = mul <16 x i8> %x, %y 1649 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1650 ret <16 x i8> %b 1651} 1652 1653define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1654; CHECK-LABEL: and_v4i32_y: 1655; CHECK: @ %bb.0: @ %entry 1656; CHECK-NEXT: vctp.32 r0 1657; CHECK-NEXT: vpst 1658; CHECK-NEXT: vandt q1, q0, q1 1659; CHECK-NEXT: vmov q0, q1 1660; CHECK-NEXT: bx lr 1661entry: 1662 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1663 %a = and <4 x i32> %x, %y 1664 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1665 ret <4 x i32> %b 1666} 1667 1668define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1669; CHECK-LABEL: and_v8i16_y: 1670; CHECK: @ %bb.0: @ %entry 1671; CHECK-NEXT: vctp.16 r0 1672; CHECK-NEXT: vpst 1673; CHECK-NEXT: vandt q1, q0, q1 1674; CHECK-NEXT: vmov q0, q1 1675; CHECK-NEXT: bx lr 1676entry: 1677 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1678 %a = and <8 x i16> %x, %y 1679 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1680 ret <8 x i16> %b 1681} 1682 1683define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1684; CHECK-LABEL: and_v16i8_y: 1685; CHECK: @ %bb.0: @ %entry 1686; CHECK-NEXT: vctp.8 r0 1687; CHECK-NEXT: vpst 1688; CHECK-NEXT: vandt q1, q0, q1 1689; CHECK-NEXT: vmov q0, q1 1690; CHECK-NEXT: bx lr 1691entry: 1692 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1693 %a = and <16 x i8> %x, %y 1694 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1695 ret <16 x i8> %b 1696} 1697 1698define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1699; CHECK-LABEL: or_v4i32_y: 1700; CHECK: @ %bb.0: @ %entry 1701; CHECK-NEXT: vctp.32 r0 1702; CHECK-NEXT: vpst 1703; CHECK-NEXT: vorrt q1, q0, q1 1704; CHECK-NEXT: vmov q0, q1 1705; CHECK-NEXT: bx lr 1706entry: 1707 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1708 %a = or <4 x i32> %x, %y 1709 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1710 ret <4 x i32> %b 1711} 1712 1713define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1714; CHECK-LABEL: or_v8i16_y: 1715; CHECK: @ %bb.0: @ %entry 1716; CHECK-NEXT: vctp.16 r0 1717; CHECK-NEXT: vpst 1718; CHECK-NEXT: vorrt q1, q0, q1 1719; CHECK-NEXT: vmov q0, q1 1720; CHECK-NEXT: bx lr 1721entry: 1722 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1723 %a = or <8 x i16> %x, %y 1724 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1725 ret <8 x i16> %b 1726} 1727 1728define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1729; CHECK-LABEL: or_v16i8_y: 1730; CHECK: @ %bb.0: @ %entry 1731; CHECK-NEXT: vctp.8 r0 1732; CHECK-NEXT: vpst 1733; CHECK-NEXT: vorrt q1, q0, q1 1734; CHECK-NEXT: vmov q0, q1 1735; CHECK-NEXT: bx lr 1736entry: 1737 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1738 %a = or <16 x i8> %x, %y 1739 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1740 ret <16 x i8> %b 1741} 1742 1743define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1744; CHECK-LABEL: xor_v4i32_y: 1745; CHECK: @ %bb.0: @ %entry 1746; CHECK-NEXT: vctp.32 r0 1747; CHECK-NEXT: vpst 1748; CHECK-NEXT: veort q1, q0, q1 1749; CHECK-NEXT: vmov q0, q1 1750; CHECK-NEXT: bx lr 1751entry: 1752 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1753 %a = xor <4 x i32> %x, %y 1754 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1755 ret <4 x i32> %b 1756} 1757 1758define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1759; CHECK-LABEL: xor_v8i16_y: 1760; CHECK: @ %bb.0: @ %entry 1761; CHECK-NEXT: vctp.16 r0 1762; CHECK-NEXT: vpst 1763; CHECK-NEXT: veort q1, q0, q1 1764; CHECK-NEXT: vmov q0, q1 1765; CHECK-NEXT: bx lr 1766entry: 1767 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1768 %a = xor <8 x i16> %x, %y 1769 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1770 ret <8 x i16> %b 1771} 1772 1773define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1774; CHECK-LABEL: xor_v16i8_y: 1775; CHECK: @ %bb.0: @ %entry 1776; CHECK-NEXT: vctp.8 r0 1777; CHECK-NEXT: vpst 1778; CHECK-NEXT: veort q1, q0, q1 1779; CHECK-NEXT: vmov q0, q1 1780; CHECK-NEXT: bx lr 1781entry: 1782 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1783 %a = xor <16 x i8> %x, %y 1784 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1785 ret <16 x i8> %b 1786} 1787 1788define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1789; CHECK-LABEL: shl_v4i32_y: 1790; CHECK: @ %bb.0: @ %entry 1791; CHECK-NEXT: vctp.32 r0 1792; CHECK-NEXT: vpst 1793; CHECK-NEXT: vshlt.u32 q1, q0, q1 1794; CHECK-NEXT: vmov q0, q1 1795; CHECK-NEXT: bx lr 1796entry: 1797 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1798 %a = shl <4 x i32> %x, %y 1799 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1800 ret <4 x i32> %b 1801} 1802 1803define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1804; CHECK-LABEL: shl_v8i16_y: 1805; CHECK: @ %bb.0: @ %entry 1806; CHECK-NEXT: vctp.16 r0 1807; CHECK-NEXT: vpst 1808; CHECK-NEXT: vshlt.u16 q1, q0, q1 1809; CHECK-NEXT: vmov q0, q1 1810; CHECK-NEXT: bx lr 1811entry: 1812 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1813 %a = shl <8 x i16> %x, %y 1814 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1815 ret <8 x i16> %b 1816} 1817 1818define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1819; CHECK-LABEL: shl_v16i8_y: 1820; CHECK: @ %bb.0: @ %entry 1821; CHECK-NEXT: vctp.8 r0 1822; CHECK-NEXT: vpst 1823; CHECK-NEXT: vshlt.u8 q1, q0, q1 1824; CHECK-NEXT: vmov q0, q1 1825; CHECK-NEXT: bx lr 1826entry: 1827 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1828 %a = shl <16 x i8> %x, %y 1829 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1830 ret <16 x i8> %b 1831} 1832 1833define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1834; CHECK-LABEL: ashr_v4i32_y: 1835; CHECK: @ %bb.0: @ %entry 1836; CHECK-NEXT: vneg.s32 q2, q1 1837; CHECK-NEXT: vctp.32 r0 1838; CHECK-NEXT: vpst 1839; CHECK-NEXT: vshlt.s32 q1, q0, q2 1840; CHECK-NEXT: vmov q0, q1 1841; CHECK-NEXT: bx lr 1842entry: 1843 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1844 %a = ashr <4 x i32> %x, %y 1845 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1846 ret <4 x i32> %b 1847} 1848 1849define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1850; CHECK-LABEL: ashr_v8i16_y: 1851; CHECK: @ %bb.0: @ %entry 1852; CHECK-NEXT: vneg.s16 q2, q1 1853; CHECK-NEXT: vctp.16 r0 1854; CHECK-NEXT: vpst 1855; CHECK-NEXT: vshlt.s16 q1, q0, q2 1856; CHECK-NEXT: vmov q0, q1 1857; CHECK-NEXT: bx lr 1858entry: 1859 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1860 %a = ashr <8 x i16> %x, %y 1861 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1862 ret <8 x i16> %b 1863} 1864 1865define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1866; CHECK-LABEL: ashr_v16i8_y: 1867; CHECK: @ %bb.0: @ %entry 1868; CHECK-NEXT: vneg.s8 q2, q1 1869; CHECK-NEXT: vctp.8 r0 1870; CHECK-NEXT: vpst 1871; CHECK-NEXT: vshlt.s8 q1, q0, q2 1872; CHECK-NEXT: vmov q0, q1 1873; CHECK-NEXT: bx lr 1874entry: 1875 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1876 %a = ashr <16 x i8> %x, %y 1877 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1878 ret <16 x i8> %b 1879} 1880 1881define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1882; CHECK-LABEL: lshr_v4i32_y: 1883; CHECK: @ %bb.0: @ %entry 1884; CHECK-NEXT: vneg.s32 q2, q1 1885; CHECK-NEXT: vctp.32 r0 1886; CHECK-NEXT: vpst 1887; CHECK-NEXT: vshlt.u32 q1, q0, q2 1888; CHECK-NEXT: vmov q0, q1 1889; CHECK-NEXT: bx lr 1890entry: 1891 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1892 %a = lshr <4 x i32> %x, %y 1893 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1894 ret <4 x i32> %b 1895} 1896 1897define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1898; CHECK-LABEL: lshr_v8i16_y: 1899; CHECK: @ %bb.0: @ %entry 1900; CHECK-NEXT: vneg.s16 q2, q1 1901; CHECK-NEXT: vctp.16 r0 1902; CHECK-NEXT: vpst 1903; CHECK-NEXT: vshlt.u16 q1, q0, q2 1904; CHECK-NEXT: vmov q0, q1 1905; CHECK-NEXT: bx lr 1906entry: 1907 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1908 %a = lshr <8 x i16> %x, %y 1909 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1910 ret <8 x i16> %b 1911} 1912 1913define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1914; CHECK-LABEL: lshr_v16i8_y: 1915; CHECK: @ %bb.0: @ %entry 1916; CHECK-NEXT: vneg.s8 q2, q1 1917; CHECK-NEXT: vctp.8 r0 1918; CHECK-NEXT: vpst 1919; CHECK-NEXT: vshlt.u8 q1, q0, q2 1920; CHECK-NEXT: vmov q0, q1 1921; CHECK-NEXT: bx lr 1922entry: 1923 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1924 %a = lshr <16 x i8> %x, %y 1925 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1926 ret <16 x i8> %b 1927} 1928 1929define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1930; CHECK-LABEL: andnot_v4i32_y: 1931; CHECK: @ %bb.0: @ %entry 1932; CHECK-NEXT: vctp.32 r0 1933; CHECK-NEXT: vpst 1934; CHECK-NEXT: vbict q1, q0, q1 1935; CHECK-NEXT: vmov q0, q1 1936; CHECK-NEXT: bx lr 1937entry: 1938 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1939 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1940 %a = and <4 x i32> %x, %y1 1941 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1942 ret <4 x i32> %b 1943} 1944 1945define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1946; CHECK-LABEL: andnot_v8i16_y: 1947; CHECK: @ %bb.0: @ %entry 1948; CHECK-NEXT: vctp.16 r0 1949; CHECK-NEXT: vpst 1950; CHECK-NEXT: vbict q1, q0, q1 1951; CHECK-NEXT: vmov q0, q1 1952; CHECK-NEXT: bx lr 1953entry: 1954 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1955 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1956 %a = and <8 x i16> %x, %y1 1957 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1958 ret <8 x i16> %b 1959} 1960 1961define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1962; CHECK-LABEL: andnot_v16i8_y: 1963; CHECK: @ %bb.0: @ %entry 1964; CHECK-NEXT: vctp.8 r0 1965; CHECK-NEXT: vpst 1966; CHECK-NEXT: vbict q1, q0, q1 1967; CHECK-NEXT: vmov q0, q1 1968; CHECK-NEXT: bx lr 1969entry: 1970 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1971 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1972 %a = and <16 x i8> %x, %y1 1973 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1974 ret <16 x i8> %b 1975} 1976 1977define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1978; CHECK-LABEL: ornot_v4i32_y: 1979; CHECK: @ %bb.0: @ %entry 1980; CHECK-NEXT: vctp.32 r0 1981; CHECK-NEXT: vpst 1982; CHECK-NEXT: vornt q1, q0, q1 1983; CHECK-NEXT: vmov q0, q1 1984; CHECK-NEXT: bx lr 1985entry: 1986 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1987 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1988 %a = or <4 x i32> %x, %y1 1989 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1990 ret <4 x i32> %b 1991} 1992 1993define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1994; CHECK-LABEL: ornot_v8i16_y: 1995; CHECK: @ %bb.0: @ %entry 1996; CHECK-NEXT: vctp.16 r0 1997; CHECK-NEXT: vpst 1998; CHECK-NEXT: vornt q1, q0, q1 1999; CHECK-NEXT: vmov q0, q1 2000; CHECK-NEXT: bx lr 2001entry: 2002 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2003 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2004 %a = or <8 x i16> %x, %y1 2005 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2006 ret <8 x i16> %b 2007} 2008 2009define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2010; CHECK-LABEL: ornot_v16i8_y: 2011; CHECK: @ %bb.0: @ %entry 2012; CHECK-NEXT: vctp.8 r0 2013; CHECK-NEXT: vpst 2014; CHECK-NEXT: vornt q1, q0, q1 2015; CHECK-NEXT: vmov q0, q1 2016; CHECK-NEXT: bx lr 2017entry: 2018 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2019 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 2020 %a = or <16 x i8> %x, %y1 2021 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2022 ret <16 x i8> %b 2023} 2024 2025define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2026; CHECK-LABEL: fadd_v4f32_y: 2027; CHECK: @ %bb.0: @ %entry 2028; CHECK-NEXT: vctp.32 r0 2029; CHECK-NEXT: vpst 2030; CHECK-NEXT: vaddt.f32 q1, q0, q1 2031; CHECK-NEXT: vmov q0, q1 2032; CHECK-NEXT: bx lr 2033entry: 2034 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2035 %a = fadd <4 x float> %x, %y 2036 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2037 ret <4 x float> %b 2038} 2039 2040define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2041; CHECK-LABEL: fadd_v8f16_y: 2042; CHECK: @ %bb.0: @ %entry 2043; CHECK-NEXT: vctp.16 r0 2044; CHECK-NEXT: vpst 2045; CHECK-NEXT: vaddt.f16 q1, q0, q1 2046; CHECK-NEXT: vmov q0, q1 2047; CHECK-NEXT: bx lr 2048entry: 2049 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2050 %a = fadd <8 x half> %x, %y 2051 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2052 ret <8 x half> %b 2053} 2054 2055define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2056; CHECK-LABEL: fsub_v4f32_y: 2057; CHECK: @ %bb.0: @ %entry 2058; CHECK-NEXT: vctp.32 r0 2059; CHECK-NEXT: vpst 2060; CHECK-NEXT: vsubt.f32 q1, q0, q1 2061; CHECK-NEXT: vmov q0, q1 2062; CHECK-NEXT: bx lr 2063entry: 2064 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2065 %a = fsub <4 x float> %x, %y 2066 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2067 ret <4 x float> %b 2068} 2069 2070define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2071; CHECK-LABEL: fsub_v8f16_y: 2072; CHECK: @ %bb.0: @ %entry 2073; CHECK-NEXT: vctp.16 r0 2074; CHECK-NEXT: vpst 2075; CHECK-NEXT: vsubt.f16 q1, q0, q1 2076; CHECK-NEXT: vmov q0, q1 2077; CHECK-NEXT: bx lr 2078entry: 2079 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2080 %a = fsub <8 x half> %x, %y 2081 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2082 ret <8 x half> %b 2083} 2084 2085define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2086; CHECK-LABEL: fmul_v4f32_y: 2087; CHECK: @ %bb.0: @ %entry 2088; CHECK-NEXT: vctp.32 r0 2089; CHECK-NEXT: vpst 2090; CHECK-NEXT: vmult.f32 q1, q0, q1 2091; CHECK-NEXT: vmov q0, q1 2092; CHECK-NEXT: bx lr 2093entry: 2094 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2095 %a = fmul <4 x float> %x, %y 2096 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2097 ret <4 x float> %b 2098} 2099 2100define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2101; CHECK-LABEL: fmul_v8f16_y: 2102; CHECK: @ %bb.0: @ %entry 2103; CHECK-NEXT: vctp.16 r0 2104; CHECK-NEXT: vpst 2105; CHECK-NEXT: vmult.f16 q1, q0, q1 2106; CHECK-NEXT: vmov q0, q1 2107; CHECK-NEXT: bx lr 2108entry: 2109 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2110 %a = fmul <8 x half> %x, %y 2111 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2112 ret <8 x half> %b 2113} 2114 2115define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2116; CHECK-LABEL: fdiv_v4f32_y: 2117; CHECK: @ %bb.0: @ %entry 2118; CHECK-NEXT: vdiv.f32 s3, s3, s7 2119; CHECK-NEXT: vctp.32 r0 2120; CHECK-NEXT: vdiv.f32 s2, s2, s6 2121; CHECK-NEXT: vdiv.f32 s1, s1, s5 2122; CHECK-NEXT: vdiv.f32 s0, s0, s4 2123; CHECK-NEXT: vpst 2124; CHECK-NEXT: vmovt q1, q0 2125; CHECK-NEXT: vmov q0, q1 2126; CHECK-NEXT: bx lr 2127entry: 2128 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2129 %a = fdiv <4 x float> %x, %y 2130 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2131 ret <4 x float> %b 2132} 2133 2134define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2135; CHECK-LABEL: fdiv_v8f16_y: 2136; CHECK: @ %bb.0: @ %entry 2137; CHECK-NEXT: vmovx.f16 s10, s0 2138; CHECK-NEXT: vmovx.f16 s8, s4 2139; CHECK-NEXT: vdiv.f16 s8, s10, s8 2140; CHECK-NEXT: vdiv.f16 s0, s0, s4 2141; CHECK-NEXT: vins.f16 s0, s8 2142; CHECK-NEXT: vmovx.f16 s10, s1 2143; CHECK-NEXT: vmovx.f16 s8, s5 2144; CHECK-NEXT: vdiv.f16 s1, s1, s5 2145; CHECK-NEXT: vdiv.f16 s8, s10, s8 2146; CHECK-NEXT: vmovx.f16 s10, s2 2147; CHECK-NEXT: vins.f16 s1, s8 2148; CHECK-NEXT: vmovx.f16 s8, s6 2149; CHECK-NEXT: vdiv.f16 s8, s10, s8 2150; CHECK-NEXT: vdiv.f16 s2, s2, s6 2151; CHECK-NEXT: vins.f16 s2, s8 2152; CHECK-NEXT: vmovx.f16 s10, s3 2153; CHECK-NEXT: vmovx.f16 s8, s7 2154; CHECK-NEXT: vdiv.f16 s3, s3, s7 2155; CHECK-NEXT: vdiv.f16 s8, s10, s8 2156; CHECK-NEXT: vctp.16 r0 2157; CHECK-NEXT: vins.f16 s3, s8 2158; CHECK-NEXT: vpst 2159; CHECK-NEXT: vmovt q1, q0 2160; CHECK-NEXT: vmov q0, q1 2161; CHECK-NEXT: bx lr 2162entry: 2163 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2164 %a = fdiv <8 x half> %x, %y 2165 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2166 ret <8 x half> %b 2167} 2168 2169define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 2170; CHECK-LABEL: fmai_v4f32_y: 2171; CHECK: @ %bb.0: @ %entry 2172; CHECK-NEXT: vfma.f32 q0, q1, q2 2173; CHECK-NEXT: vctp.32 r0 2174; CHECK-NEXT: vpst 2175; CHECK-NEXT: vmovt q1, q0 2176; CHECK-NEXT: vmov q0, q1 2177; CHECK-NEXT: bx lr 2178entry: 2179 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2180 %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x) 2181 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2182 ret <4 x float> %b 2183} 2184 2185define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 2186; CHECK-LABEL: fmai_v8f16_y: 2187; CHECK: @ %bb.0: @ %entry 2188; CHECK-NEXT: vfma.f16 q0, q1, q2 2189; CHECK-NEXT: vctp.16 r0 2190; CHECK-NEXT: vpst 2191; CHECK-NEXT: vmovt q1, q0 2192; CHECK-NEXT: vmov q0, q1 2193; CHECK-NEXT: bx lr 2194entry: 2195 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2196 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 2197 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2198 ret <8 x half> %b 2199} 2200 2201define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 2202; CHECK-LABEL: fma_v4f32_y: 2203; CHECK: @ %bb.0: @ %entry 2204; CHECK-NEXT: vfma.f32 q0, q1, q2 2205; CHECK-NEXT: vctp.32 r0 2206; CHECK-NEXT: vpst 2207; CHECK-NEXT: vmovt q1, q0 2208; CHECK-NEXT: vmov q0, q1 2209; CHECK-NEXT: bx lr 2210entry: 2211 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2212 %m = fmul fast <4 x float> %y, %z 2213 %a = fadd fast <4 x float> %m, %x 2214 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2215 ret <4 x float> %b 2216} 2217 2218define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 2219; CHECK-LABEL: fma_v8f16_y: 2220; CHECK: @ %bb.0: @ %entry 2221; CHECK-NEXT: vfma.f16 q0, q1, q2 2222; CHECK-NEXT: vctp.16 r0 2223; CHECK-NEXT: vpst 2224; CHECK-NEXT: vmovt q1, q0 2225; CHECK-NEXT: vmov q0, q1 2226; CHECK-NEXT: bx lr 2227entry: 2228 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2229 %m = fmul fast <8 x half> %y, %z 2230 %a = fadd fast <8 x half> %m, %x 2231 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2232 ret <8 x half> %b 2233} 2234 2235define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2236; CHECK-LABEL: icmp_slt_v4i32_y: 2237; CHECK: @ %bb.0: @ %entry 2238; CHECK-NEXT: vctp.32 r0 2239; CHECK-NEXT: vpst 2240; CHECK-NEXT: vmint.s32 q1, q0, q1 2241; CHECK-NEXT: vmov q0, q1 2242; CHECK-NEXT: bx lr 2243entry: 2244 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2245 %a1 = icmp slt <4 x i32> %x, %y 2246 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 2247 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2248 ret <4 x i32> %b 2249} 2250 2251define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2252; CHECK-LABEL: icmp_slt_v8i16_y: 2253; CHECK: @ %bb.0: @ %entry 2254; CHECK-NEXT: vctp.16 r0 2255; CHECK-NEXT: vpst 2256; CHECK-NEXT: vmint.s16 q1, q0, q1 2257; CHECK-NEXT: vmov q0, q1 2258; CHECK-NEXT: bx lr 2259entry: 2260 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2261 %a1 = icmp slt <8 x i16> %x, %y 2262 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 2263 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2264 ret <8 x i16> %b 2265} 2266 2267define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2268; CHECK-LABEL: icmp_slt_v16i8_y: 2269; CHECK: @ %bb.0: @ %entry 2270; CHECK-NEXT: vctp.8 r0 2271; CHECK-NEXT: vpst 2272; CHECK-NEXT: vmint.s8 q1, q0, q1 2273; CHECK-NEXT: vmov q0, q1 2274; CHECK-NEXT: bx lr 2275entry: 2276 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2277 %a1 = icmp slt <16 x i8> %x, %y 2278 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 2279 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2280 ret <16 x i8> %b 2281} 2282 2283define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2284; CHECK-LABEL: icmp_sgt_v4i32_y: 2285; CHECK: @ %bb.0: @ %entry 2286; CHECK-NEXT: vctp.32 r0 2287; CHECK-NEXT: vpst 2288; CHECK-NEXT: vmaxt.s32 q1, q0, q1 2289; CHECK-NEXT: vmov q0, q1 2290; CHECK-NEXT: bx lr 2291entry: 2292 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2293 %a1 = icmp sgt <4 x i32> %x, %y 2294 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 2295 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2296 ret <4 x i32> %b 2297} 2298 2299define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2300; CHECK-LABEL: icmp_sgt_v8i16_y: 2301; CHECK: @ %bb.0: @ %entry 2302; CHECK-NEXT: vctp.16 r0 2303; CHECK-NEXT: vpst 2304; CHECK-NEXT: vmaxt.s16 q1, q0, q1 2305; CHECK-NEXT: vmov q0, q1 2306; CHECK-NEXT: bx lr 2307entry: 2308 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2309 %a1 = icmp sgt <8 x i16> %x, %y 2310 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 2311 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2312 ret <8 x i16> %b 2313} 2314 2315define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2316; CHECK-LABEL: icmp_sgt_v16i8_y: 2317; CHECK: @ %bb.0: @ %entry 2318; CHECK-NEXT: vctp.8 r0 2319; CHECK-NEXT: vpst 2320; CHECK-NEXT: vmaxt.s8 q1, q0, q1 2321; CHECK-NEXT: vmov q0, q1 2322; CHECK-NEXT: bx lr 2323entry: 2324 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2325 %a1 = icmp sgt <16 x i8> %x, %y 2326 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 2327 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2328 ret <16 x i8> %b 2329} 2330 2331define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2332; CHECK-LABEL: icmp_ult_v4i32_y: 2333; CHECK: @ %bb.0: @ %entry 2334; CHECK-NEXT: vctp.32 r0 2335; CHECK-NEXT: vpst 2336; CHECK-NEXT: vmint.u32 q1, q0, q1 2337; CHECK-NEXT: vmov q0, q1 2338; CHECK-NEXT: bx lr 2339entry: 2340 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2341 %a1 = icmp ult <4 x i32> %x, %y 2342 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 2343 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2344 ret <4 x i32> %b 2345} 2346 2347define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2348; CHECK-LABEL: icmp_ult_v8i16_y: 2349; CHECK: @ %bb.0: @ %entry 2350; CHECK-NEXT: vctp.16 r0 2351; CHECK-NEXT: vpst 2352; CHECK-NEXT: vmint.u16 q1, q0, q1 2353; CHECK-NEXT: vmov q0, q1 2354; CHECK-NEXT: bx lr 2355entry: 2356 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2357 %a1 = icmp ult <8 x i16> %x, %y 2358 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 2359 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2360 ret <8 x i16> %b 2361} 2362 2363define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2364; CHECK-LABEL: icmp_ult_v16i8_y: 2365; CHECK: @ %bb.0: @ %entry 2366; CHECK-NEXT: vctp.8 r0 2367; CHECK-NEXT: vpst 2368; CHECK-NEXT: vmint.u8 q1, q0, q1 2369; CHECK-NEXT: vmov q0, q1 2370; CHECK-NEXT: bx lr 2371entry: 2372 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2373 %a1 = icmp ult <16 x i8> %x, %y 2374 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 2375 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2376 ret <16 x i8> %b 2377} 2378 2379define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2380; CHECK-LABEL: icmp_ugt_v4i32_y: 2381; CHECK: @ %bb.0: @ %entry 2382; CHECK-NEXT: vctp.32 r0 2383; CHECK-NEXT: vpst 2384; CHECK-NEXT: vmaxt.u32 q1, q0, q1 2385; CHECK-NEXT: vmov q0, q1 2386; CHECK-NEXT: bx lr 2387entry: 2388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2389 %a1 = icmp ugt <4 x i32> %x, %y 2390 %a = select <4 x i1> %a1, <4 x i32> %x, <4 x i32> %y 2391 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2392 ret <4 x i32> %b 2393} 2394 2395define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2396; CHECK-LABEL: icmp_ugt_v8i16_y: 2397; CHECK: @ %bb.0: @ %entry 2398; CHECK-NEXT: vctp.16 r0 2399; CHECK-NEXT: vpst 2400; CHECK-NEXT: vmaxt.u16 q1, q0, q1 2401; CHECK-NEXT: vmov q0, q1 2402; CHECK-NEXT: bx lr 2403entry: 2404 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2405 %a1 = icmp ugt <8 x i16> %x, %y 2406 %a = select <8 x i1> %a1, <8 x i16> %x, <8 x i16> %y 2407 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2408 ret <8 x i16> %b 2409} 2410 2411define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2412; CHECK-LABEL: icmp_ugt_v16i8_y: 2413; CHECK: @ %bb.0: @ %entry 2414; CHECK-NEXT: vctp.8 r0 2415; CHECK-NEXT: vpst 2416; CHECK-NEXT: vmaxt.u8 q1, q0, q1 2417; CHECK-NEXT: vmov q0, q1 2418; CHECK-NEXT: bx lr 2419entry: 2420 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2421 %a1 = icmp ugt <16 x i8> %x, %y 2422 %a = select <16 x i1> %a1, <16 x i8> %x, <16 x i8> %y 2423 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2424 ret <16 x i8> %b 2425} 2426 2427define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2428; CHECK-LABEL: fcmp_fast_olt_v4f32_y: 2429; CHECK: @ %bb.0: @ %entry 2430; CHECK-NEXT: vctp.32 r0 2431; CHECK-NEXT: vpst 2432; CHECK-NEXT: vminnmt.f32 q1, q0, q1 2433; CHECK-NEXT: vmov q0, q1 2434; CHECK-NEXT: bx lr 2435entry: 2436 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2437 %a1 = fcmp fast olt <4 x float> %x, %y 2438 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 2439 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2440 ret <4 x float> %b 2441} 2442 2443define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2444; CHECK-LABEL: fcmp_fast_olt_v8f16_y: 2445; CHECK: @ %bb.0: @ %entry 2446; CHECK-NEXT: vctp.16 r0 2447; CHECK-NEXT: vpst 2448; CHECK-NEXT: vminnmt.f16 q1, q0, q1 2449; CHECK-NEXT: vmov q0, q1 2450; CHECK-NEXT: bx lr 2451entry: 2452 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2453 %a1 = fcmp fast olt <8 x half> %x, %y 2454 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 2455 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2456 ret <8 x half> %b 2457} 2458 2459define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2460; CHECK-LABEL: fcmp_fast_ogt_v4f32_y: 2461; CHECK: @ %bb.0: @ %entry 2462; CHECK-NEXT: vctp.32 r0 2463; CHECK-NEXT: vpst 2464; CHECK-NEXT: vmaxnmt.f32 q1, q0, q1 2465; CHECK-NEXT: vmov q0, q1 2466; CHECK-NEXT: bx lr 2467entry: 2468 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2469 %a1 = fcmp fast ogt <4 x float> %x, %y 2470 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 2471 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2472 ret <4 x float> %b 2473} 2474 2475define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2476; CHECK-LABEL: fcmp_fast_ogt_v8f16_y: 2477; CHECK: @ %bb.0: @ %entry 2478; CHECK-NEXT: vctp.16 r0 2479; CHECK-NEXT: vpst 2480; CHECK-NEXT: vmaxnmt.f16 q1, q0, q1 2481; CHECK-NEXT: vmov q0, q1 2482; CHECK-NEXT: bx lr 2483entry: 2484 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2485 %a1 = fcmp fast ogt <8 x half> %x, %y 2486 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 2487 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2488 ret <8 x half> %b 2489} 2490 2491define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2492; CHECK-LABEL: sadd_sat_v4i32_y: 2493; CHECK: @ %bb.0: @ %entry 2494; CHECK-NEXT: vctp.32 r0 2495; CHECK-NEXT: vpst 2496; CHECK-NEXT: vqaddt.s32 q1, q0, q1 2497; CHECK-NEXT: vmov q0, q1 2498; CHECK-NEXT: bx lr 2499entry: 2500 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2501 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2502 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2503 ret <4 x i32> %b 2504} 2505 2506define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2507; CHECK-LABEL: sadd_sat_v8i16_y: 2508; CHECK: @ %bb.0: @ %entry 2509; CHECK-NEXT: vctp.16 r0 2510; CHECK-NEXT: vpst 2511; CHECK-NEXT: vqaddt.s16 q1, q0, q1 2512; CHECK-NEXT: vmov q0, q1 2513; CHECK-NEXT: bx lr 2514entry: 2515 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2516 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2517 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2518 ret <8 x i16> %b 2519} 2520 2521define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2522; CHECK-LABEL: sadd_sat_v16i8_y: 2523; CHECK: @ %bb.0: @ %entry 2524; CHECK-NEXT: vctp.8 r0 2525; CHECK-NEXT: vpst 2526; CHECK-NEXT: vqaddt.s8 q1, q0, q1 2527; CHECK-NEXT: vmov q0, q1 2528; CHECK-NEXT: bx lr 2529entry: 2530 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2531 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2532 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2533 ret <16 x i8> %b 2534} 2535 2536define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2537; CHECK-LABEL: uadd_sat_v4i32_y: 2538; CHECK: @ %bb.0: @ %entry 2539; CHECK-NEXT: vctp.32 r0 2540; CHECK-NEXT: vpst 2541; CHECK-NEXT: vqaddt.u32 q1, q0, q1 2542; CHECK-NEXT: vmov q0, q1 2543; CHECK-NEXT: bx lr 2544entry: 2545 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2546 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2547 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2548 ret <4 x i32> %b 2549} 2550 2551define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2552; CHECK-LABEL: uadd_sat_v8i16_y: 2553; CHECK: @ %bb.0: @ %entry 2554; CHECK-NEXT: vctp.16 r0 2555; CHECK-NEXT: vpst 2556; CHECK-NEXT: vqaddt.u16 q1, q0, q1 2557; CHECK-NEXT: vmov q0, q1 2558; CHECK-NEXT: bx lr 2559entry: 2560 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2561 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2562 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2563 ret <8 x i16> %b 2564} 2565 2566define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2567; CHECK-LABEL: uadd_sat_v16i8_y: 2568; CHECK: @ %bb.0: @ %entry 2569; CHECK-NEXT: vctp.8 r0 2570; CHECK-NEXT: vpst 2571; CHECK-NEXT: vqaddt.u8 q1, q0, q1 2572; CHECK-NEXT: vmov q0, q1 2573; CHECK-NEXT: bx lr 2574entry: 2575 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2576 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2577 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2578 ret <16 x i8> %b 2579} 2580 2581define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2582; CHECK-LABEL: ssub_sat_v4i32_y: 2583; CHECK: @ %bb.0: @ %entry 2584; CHECK-NEXT: vctp.32 r0 2585; CHECK-NEXT: vpst 2586; CHECK-NEXT: vqsubt.s32 q1, q0, q1 2587; CHECK-NEXT: vmov q0, q1 2588; CHECK-NEXT: bx lr 2589entry: 2590 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2591 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2592 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2593 ret <4 x i32> %b 2594} 2595 2596define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2597; CHECK-LABEL: ssub_sat_v8i16_y: 2598; CHECK: @ %bb.0: @ %entry 2599; CHECK-NEXT: vctp.16 r0 2600; CHECK-NEXT: vpst 2601; CHECK-NEXT: vqsubt.s16 q1, q0, q1 2602; CHECK-NEXT: vmov q0, q1 2603; CHECK-NEXT: bx lr 2604entry: 2605 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2606 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2607 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2608 ret <8 x i16> %b 2609} 2610 2611define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2612; CHECK-LABEL: ssub_sat_v16i8_y: 2613; CHECK: @ %bb.0: @ %entry 2614; CHECK-NEXT: vctp.8 r0 2615; CHECK-NEXT: vpst 2616; CHECK-NEXT: vqsubt.s8 q1, q0, q1 2617; CHECK-NEXT: vmov q0, q1 2618; CHECK-NEXT: bx lr 2619entry: 2620 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2621 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2622 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2623 ret <16 x i8> %b 2624} 2625 2626define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2627; CHECK-LABEL: usub_sat_v4i32_y: 2628; CHECK: @ %bb.0: @ %entry 2629; CHECK-NEXT: vctp.32 r0 2630; CHECK-NEXT: vpst 2631; CHECK-NEXT: vqsubt.u32 q1, q0, q1 2632; CHECK-NEXT: vmov q0, q1 2633; CHECK-NEXT: bx lr 2634entry: 2635 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2636 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2637 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2638 ret <4 x i32> %b 2639} 2640 2641define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2642; CHECK-LABEL: usub_sat_v8i16_y: 2643; CHECK: @ %bb.0: @ %entry 2644; CHECK-NEXT: vctp.16 r0 2645; CHECK-NEXT: vpst 2646; CHECK-NEXT: vqsubt.u16 q1, q0, q1 2647; CHECK-NEXT: vmov q0, q1 2648; CHECK-NEXT: bx lr 2649entry: 2650 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2651 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2652 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2653 ret <8 x i16> %b 2654} 2655 2656define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2657; CHECK-LABEL: usub_sat_v16i8_y: 2658; CHECK: @ %bb.0: @ %entry 2659; CHECK-NEXT: vctp.8 r0 2660; CHECK-NEXT: vpst 2661; CHECK-NEXT: vqsubt.u8 q1, q0, q1 2662; CHECK-NEXT: vmov q0, q1 2663; CHECK-NEXT: bx lr 2664entry: 2665 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2666 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2667 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2668 ret <16 x i8> %b 2669} 2670 2671define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2672; CHECK-LABEL: addqr_v4i32_y: 2673; CHECK: @ %bb.0: @ %entry 2674; CHECK-NEXT: vdup.32 q1, r0 2675; CHECK-NEXT: vctp.32 r1 2676; CHECK-NEXT: vpst 2677; CHECK-NEXT: vaddt.i32 q1, q0, r0 2678; CHECK-NEXT: vmov q0, q1 2679; CHECK-NEXT: bx lr 2680entry: 2681 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2682 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2683 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2684 %a = add <4 x i32> %x, %ys 2685 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2686 ret <4 x i32> %b 2687} 2688 2689define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2690; CHECK-LABEL: addqr_v8i16_y: 2691; CHECK: @ %bb.0: @ %entry 2692; CHECK-NEXT: vdup.16 q1, r0 2693; CHECK-NEXT: vctp.16 r1 2694; CHECK-NEXT: vpst 2695; CHECK-NEXT: vaddt.i16 q1, q0, r0 2696; CHECK-NEXT: vmov q0, q1 2697; CHECK-NEXT: bx lr 2698entry: 2699 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2700 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2701 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2702 %a = add <8 x i16> %x, %ys 2703 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2704 ret <8 x i16> %b 2705} 2706 2707define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2708; CHECK-LABEL: addqr_v16i8_y: 2709; CHECK: @ %bb.0: @ %entry 2710; CHECK-NEXT: vdup.8 q1, r0 2711; CHECK-NEXT: vctp.8 r1 2712; CHECK-NEXT: vpst 2713; CHECK-NEXT: vaddt.i8 q1, q0, r0 2714; CHECK-NEXT: vmov q0, q1 2715; CHECK-NEXT: bx lr 2716entry: 2717 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2718 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2719 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2720 %a = add <16 x i8> %x, %ys 2721 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2722 ret <16 x i8> %b 2723} 2724 2725define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2726; CHECK-LABEL: subqr_v4i32_y: 2727; CHECK: @ %bb.0: @ %entry 2728; CHECK-NEXT: vdup.32 q1, r0 2729; CHECK-NEXT: vctp.32 r1 2730; CHECK-NEXT: vpst 2731; CHECK-NEXT: vsubt.i32 q1, q0, r0 2732; CHECK-NEXT: vmov q0, q1 2733; CHECK-NEXT: bx lr 2734entry: 2735 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2736 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2737 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2738 %a = sub <4 x i32> %x, %ys 2739 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2740 ret <4 x i32> %b 2741} 2742 2743define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2744; CHECK-LABEL: subqr_v8i16_y: 2745; CHECK: @ %bb.0: @ %entry 2746; CHECK-NEXT: vdup.16 q1, r0 2747; CHECK-NEXT: vctp.16 r1 2748; CHECK-NEXT: vpst 2749; CHECK-NEXT: vsubt.i16 q1, q0, r0 2750; CHECK-NEXT: vmov q0, q1 2751; CHECK-NEXT: bx lr 2752entry: 2753 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2754 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2755 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2756 %a = sub <8 x i16> %x, %ys 2757 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2758 ret <8 x i16> %b 2759} 2760 2761define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2762; CHECK-LABEL: subqr_v16i8_y: 2763; CHECK: @ %bb.0: @ %entry 2764; CHECK-NEXT: vdup.8 q1, r0 2765; CHECK-NEXT: vctp.8 r1 2766; CHECK-NEXT: vpst 2767; CHECK-NEXT: vsubt.i8 q1, q0, r0 2768; CHECK-NEXT: vmov q0, q1 2769; CHECK-NEXT: bx lr 2770entry: 2771 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2772 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2773 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2774 %a = sub <16 x i8> %x, %ys 2775 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2776 ret <16 x i8> %b 2777} 2778 2779define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2780; CHECK-LABEL: mulqr_v4i32_y: 2781; CHECK: @ %bb.0: @ %entry 2782; CHECK-NEXT: vdup.32 q1, r0 2783; CHECK-NEXT: vctp.32 r1 2784; CHECK-NEXT: vpst 2785; CHECK-NEXT: vmult.i32 q1, q0, r0 2786; CHECK-NEXT: vmov q0, q1 2787; CHECK-NEXT: bx lr 2788entry: 2789 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2790 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2791 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2792 %a = mul <4 x i32> %x, %ys 2793 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2794 ret <4 x i32> %b 2795} 2796 2797define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2798; CHECK-LABEL: mulqr_v8i16_y: 2799; CHECK: @ %bb.0: @ %entry 2800; CHECK-NEXT: vdup.16 q1, r0 2801; CHECK-NEXT: vctp.16 r1 2802; CHECK-NEXT: vpst 2803; CHECK-NEXT: vmult.i16 q1, q0, r0 2804; CHECK-NEXT: vmov q0, q1 2805; CHECK-NEXT: bx lr 2806entry: 2807 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2808 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2809 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2810 %a = mul <8 x i16> %x, %ys 2811 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2812 ret <8 x i16> %b 2813} 2814 2815define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2816; CHECK-LABEL: mulqr_v16i8_y: 2817; CHECK: @ %bb.0: @ %entry 2818; CHECK-NEXT: vdup.8 q1, r0 2819; CHECK-NEXT: vctp.8 r1 2820; CHECK-NEXT: vpst 2821; CHECK-NEXT: vmult.i8 q1, q0, r0 2822; CHECK-NEXT: vmov q0, q1 2823; CHECK-NEXT: bx lr 2824entry: 2825 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2826 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2827 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2828 %a = mul <16 x i8> %x, %ys 2829 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2830 ret <16 x i8> %b 2831} 2832 2833define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2834; CHECK-LABEL: faddqr_v4f32_y: 2835; CHECK: @ %bb.0: @ %entry 2836; CHECK-NEXT: vmov r1, s4 2837; CHECK-NEXT: vctp.32 r0 2838; CHECK-NEXT: vdup.32 q1, r1 2839; CHECK-NEXT: vpst 2840; CHECK-NEXT: vaddt.f32 q1, q0, r1 2841; CHECK-NEXT: vmov q0, q1 2842; CHECK-NEXT: bx lr 2843entry: 2844 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2845 %i = insertelement <4 x float> undef, float %y, i32 0 2846 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2847 %a = fadd <4 x float> %x, %ys 2848 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2849 ret <4 x float> %b 2850} 2851 2852define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2853; CHECK-LABEL: faddqr_v8f16_y: 2854; CHECK: @ %bb.0: @ %entry 2855; CHECK-NEXT: vmov.f16 r1, s4 2856; CHECK-NEXT: vctp.16 r0 2857; CHECK-NEXT: vdup.16 q1, r1 2858; CHECK-NEXT: vpst 2859; CHECK-NEXT: vaddt.f16 q1, q0, r1 2860; CHECK-NEXT: vmov q0, q1 2861; CHECK-NEXT: bx lr 2862entry: 2863 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2864 %i = insertelement <8 x half> undef, half %y, i32 0 2865 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2866 %a = fadd <8 x half> %x, %ys 2867 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2868 ret <8 x half> %b 2869} 2870 2871define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2872; CHECK-LABEL: fsubqr_v4f32_y: 2873; CHECK: @ %bb.0: @ %entry 2874; CHECK-NEXT: vmov r1, s4 2875; CHECK-NEXT: vctp.32 r0 2876; CHECK-NEXT: vdup.32 q1, r1 2877; CHECK-NEXT: vpst 2878; CHECK-NEXT: vsubt.f32 q1, q0, r1 2879; CHECK-NEXT: vmov q0, q1 2880; CHECK-NEXT: bx lr 2881entry: 2882 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2883 %i = insertelement <4 x float> undef, float %y, i32 0 2884 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2885 %a = fsub <4 x float> %x, %ys 2886 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2887 ret <4 x float> %b 2888} 2889 2890define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2891; CHECK-LABEL: fsubqr_v8f16_y: 2892; CHECK: @ %bb.0: @ %entry 2893; CHECK-NEXT: vmov.f16 r1, s4 2894; CHECK-NEXT: vctp.16 r0 2895; CHECK-NEXT: vdup.16 q1, r1 2896; CHECK-NEXT: vpst 2897; CHECK-NEXT: vsubt.f16 q1, q0, r1 2898; CHECK-NEXT: vmov q0, q1 2899; CHECK-NEXT: bx lr 2900entry: 2901 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2902 %i = insertelement <8 x half> undef, half %y, i32 0 2903 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2904 %a = fsub <8 x half> %x, %ys 2905 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2906 ret <8 x half> %b 2907} 2908 2909define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2910; CHECK-LABEL: fmulqr_v4f32_y: 2911; CHECK: @ %bb.0: @ %entry 2912; CHECK-NEXT: vmov r1, s4 2913; CHECK-NEXT: vctp.32 r0 2914; CHECK-NEXT: vdup.32 q1, r1 2915; CHECK-NEXT: vpst 2916; CHECK-NEXT: vmult.f32 q1, q0, r1 2917; CHECK-NEXT: vmov q0, q1 2918; CHECK-NEXT: bx lr 2919entry: 2920 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2921 %i = insertelement <4 x float> undef, float %y, i32 0 2922 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2923 %a = fmul <4 x float> %x, %ys 2924 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2925 ret <4 x float> %b 2926} 2927 2928define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2929; CHECK-LABEL: fmulqr_v8f16_y: 2930; CHECK: @ %bb.0: @ %entry 2931; CHECK-NEXT: vmov.f16 r1, s4 2932; CHECK-NEXT: vctp.16 r0 2933; CHECK-NEXT: vdup.16 q1, r1 2934; CHECK-NEXT: vpst 2935; CHECK-NEXT: vmult.f16 q1, q0, r1 2936; CHECK-NEXT: vmov q0, q1 2937; CHECK-NEXT: bx lr 2938entry: 2939 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2940 %i = insertelement <8 x half> undef, half %y, i32 0 2941 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2942 %a = fmul <8 x half> %x, %ys 2943 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2944 ret <8 x half> %b 2945} 2946 2947define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2948; CHECK-LABEL: sadd_satqr_v4i32_y: 2949; CHECK: @ %bb.0: @ %entry 2950; CHECK-NEXT: vdup.32 q1, r0 2951; CHECK-NEXT: vctp.32 r1 2952; CHECK-NEXT: vpst 2953; CHECK-NEXT: vqaddt.s32 q1, q0, r0 2954; CHECK-NEXT: vmov q0, q1 2955; CHECK-NEXT: bx lr 2956entry: 2957 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2958 %i = insertelement <4 x i32> undef, i32 %y, i32 0 2959 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2960 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 2961 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2962 ret <4 x i32> %b 2963} 2964 2965define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2966; CHECK-LABEL: sadd_satqr_v8i16_y: 2967; CHECK: @ %bb.0: @ %entry 2968; CHECK-NEXT: vdup.16 q1, r0 2969; CHECK-NEXT: vctp.16 r1 2970; CHECK-NEXT: vpst 2971; CHECK-NEXT: vqaddt.s16 q1, q0, r0 2972; CHECK-NEXT: vmov q0, q1 2973; CHECK-NEXT: bx lr 2974entry: 2975 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2976 %i = insertelement <8 x i16> undef, i16 %y, i32 0 2977 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2978 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 2979 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2980 ret <8 x i16> %b 2981} 2982 2983define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2984; CHECK-LABEL: sadd_satqr_v16i8_y: 2985; CHECK: @ %bb.0: @ %entry 2986; CHECK-NEXT: vdup.8 q1, r0 2987; CHECK-NEXT: vctp.8 r1 2988; CHECK-NEXT: vpst 2989; CHECK-NEXT: vqaddt.s8 q1, q0, r0 2990; CHECK-NEXT: vmov q0, q1 2991; CHECK-NEXT: bx lr 2992entry: 2993 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2994 %i = insertelement <16 x i8> undef, i8 %y, i32 0 2995 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2996 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 2997 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2998 ret <16 x i8> %b 2999} 3000 3001define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3002; CHECK-LABEL: uadd_satqr_v4i32_y: 3003; CHECK: @ %bb.0: @ %entry 3004; CHECK-NEXT: vdup.32 q1, r0 3005; CHECK-NEXT: vctp.32 r1 3006; CHECK-NEXT: vpst 3007; CHECK-NEXT: vqaddt.u32 q1, q0, r0 3008; CHECK-NEXT: vmov q0, q1 3009; CHECK-NEXT: bx lr 3010entry: 3011 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3012 %i = insertelement <4 x i32> undef, i32 %y, i32 0 3013 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3014 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3015 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3016 ret <4 x i32> %b 3017} 3018 3019define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3020; CHECK-LABEL: uadd_satqr_v8i16_y: 3021; CHECK: @ %bb.0: @ %entry 3022; CHECK-NEXT: vdup.16 q1, r0 3023; CHECK-NEXT: vctp.16 r1 3024; CHECK-NEXT: vpst 3025; CHECK-NEXT: vqaddt.u16 q1, q0, r0 3026; CHECK-NEXT: vmov q0, q1 3027; CHECK-NEXT: bx lr 3028entry: 3029 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3030 %i = insertelement <8 x i16> undef, i16 %y, i32 0 3031 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3032 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3033 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3034 ret <8 x i16> %b 3035} 3036 3037define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3038; CHECK-LABEL: uadd_satqr_v16i8_y: 3039; CHECK: @ %bb.0: @ %entry 3040; CHECK-NEXT: vdup.8 q1, r0 3041; CHECK-NEXT: vctp.8 r1 3042; CHECK-NEXT: vpst 3043; CHECK-NEXT: vqaddt.u8 q1, q0, r0 3044; CHECK-NEXT: vmov q0, q1 3045; CHECK-NEXT: bx lr 3046entry: 3047 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3048 %i = insertelement <16 x i8> undef, i8 %y, i32 0 3049 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3050 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3051 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3052 ret <16 x i8> %b 3053} 3054 3055define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3056; CHECK-LABEL: ssub_satqr_v4i32_y: 3057; CHECK: @ %bb.0: @ %entry 3058; CHECK-NEXT: vdup.32 q1, r0 3059; CHECK-NEXT: vctp.32 r1 3060; CHECK-NEXT: vpst 3061; CHECK-NEXT: vqsubt.s32 q1, q0, r0 3062; CHECK-NEXT: vmov q0, q1 3063; CHECK-NEXT: bx lr 3064entry: 3065 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3066 %i = insertelement <4 x i32> undef, i32 %y, i32 0 3067 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3068 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3069 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3070 ret <4 x i32> %b 3071} 3072 3073define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3074; CHECK-LABEL: ssub_satqr_v8i16_y: 3075; CHECK: @ %bb.0: @ %entry 3076; CHECK-NEXT: vdup.16 q1, r0 3077; CHECK-NEXT: vctp.16 r1 3078; CHECK-NEXT: vpst 3079; CHECK-NEXT: vqsubt.s16 q1, q0, r0 3080; CHECK-NEXT: vmov q0, q1 3081; CHECK-NEXT: bx lr 3082entry: 3083 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3084 %i = insertelement <8 x i16> undef, i16 %y, i32 0 3085 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3086 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3087 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3088 ret <8 x i16> %b 3089} 3090 3091define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3092; CHECK-LABEL: ssub_satqr_v16i8_y: 3093; CHECK: @ %bb.0: @ %entry 3094; CHECK-NEXT: vdup.8 q1, r0 3095; CHECK-NEXT: vctp.8 r1 3096; CHECK-NEXT: vpst 3097; CHECK-NEXT: vqsubt.s8 q1, q0, r0 3098; CHECK-NEXT: vmov q0, q1 3099; CHECK-NEXT: bx lr 3100entry: 3101 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3102 %i = insertelement <16 x i8> undef, i8 %y, i32 0 3103 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3104 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3105 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3106 ret <16 x i8> %b 3107} 3108 3109define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3110; CHECK-LABEL: usub_satqr_v4i32_y: 3111; CHECK: @ %bb.0: @ %entry 3112; CHECK-NEXT: vdup.32 q1, r0 3113; CHECK-NEXT: vctp.32 r1 3114; CHECK-NEXT: vpst 3115; CHECK-NEXT: vqsubt.u32 q1, q0, r0 3116; CHECK-NEXT: vmov q0, q1 3117; CHECK-NEXT: bx lr 3118entry: 3119 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3120 %i = insertelement <4 x i32> undef, i32 %y, i32 0 3121 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3122 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3123 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3124 ret <4 x i32> %b 3125} 3126 3127define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3128; CHECK-LABEL: usub_satqr_v8i16_y: 3129; CHECK: @ %bb.0: @ %entry 3130; CHECK-NEXT: vdup.16 q1, r0 3131; CHECK-NEXT: vctp.16 r1 3132; CHECK-NEXT: vpst 3133; CHECK-NEXT: vqsubt.u16 q1, q0, r0 3134; CHECK-NEXT: vmov q0, q1 3135; CHECK-NEXT: bx lr 3136entry: 3137 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3138 %i = insertelement <8 x i16> undef, i16 %y, i32 0 3139 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3140 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3141 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3142 ret <8 x i16> %b 3143} 3144 3145define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3146; CHECK-LABEL: usub_satqr_v16i8_y: 3147; CHECK: @ %bb.0: @ %entry 3148; CHECK-NEXT: vdup.8 q1, r0 3149; CHECK-NEXT: vctp.8 r1 3150; CHECK-NEXT: vpst 3151; CHECK-NEXT: vqsubt.u8 q1, q0, r0 3152; CHECK-NEXT: vmov q0, q1 3153; CHECK-NEXT: bx lr 3154entry: 3155 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3156 %i = insertelement <16 x i8> undef, i8 %y, i32 0 3157 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3158 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3159 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3160 ret <16 x i8> %b 3161} 3162 3163declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) 3164declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 3165declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 3166declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 3167declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 3168declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) 3169declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) 3170declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 3171declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 3172declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 3173declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 3174declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) 3175 3176declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 3177declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 3178 3179declare <16 x i1> @llvm.arm.mve.vctp8(i32) 3180declare <8 x i1> @llvm.arm.mve.vctp16(i32) 3181declare <4 x i1> @llvm.arm.mve.vctp32(i32) 3182