1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK 3 4define arm_aapcs_vfpcc <4 x i32> @add_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 5; CHECK-LABEL: add_v4i32_x: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vctp.32 r0 8; CHECK-NEXT: vpst 9; CHECK-NEXT: vaddt.i32 q0, q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 13 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 14 %b = add <4 x i32> %a, %x 15 ret <4 x i32> %b 16} 17 18define arm_aapcs_vfpcc <8 x i16> @add_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 19; CHECK-LABEL: add_v8i16_x: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vctp.16 r0 22; CHECK-NEXT: vpst 23; CHECK-NEXT: vaddt.i16 q0, q0, q1 24; CHECK-NEXT: bx lr 25entry: 26 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 27 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 28 %b = add <8 x i16> %a, %x 29 ret <8 x i16> %b 30} 31 32define arm_aapcs_vfpcc <16 x i8> @add_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 33; CHECK-LABEL: add_v16i8_x: 34; CHECK: @ %bb.0: @ %entry 35; CHECK-NEXT: vctp.8 r0 36; CHECK-NEXT: vpst 37; CHECK-NEXT: vaddt.i8 q0, q0, q1 38; CHECK-NEXT: bx lr 39entry: 40 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 41 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 42 %b = add <16 x i8> %a, %x 43 ret <16 x i8> %b 44} 45 46define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 47; CHECK-LABEL: sub_v4i32_x: 48; CHECK: @ %bb.0: @ %entry 49; CHECK-NEXT: vctp.32 r0 50; CHECK-NEXT: vpst 51; CHECK-NEXT: vsubt.i32 q0, q0, q1 52; CHECK-NEXT: bx lr 53entry: 54 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 55 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 56 %b = sub <4 x i32> %x, %a 57 ret <4 x i32> %b 58} 59 60define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 61; CHECK-LABEL: sub_v8i16_x: 62; CHECK: @ %bb.0: @ %entry 63; CHECK-NEXT: vctp.16 r0 64; CHECK-NEXT: vpst 65; CHECK-NEXT: vsubt.i16 q0, q0, q1 66; CHECK-NEXT: bx lr 67entry: 68 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 69 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 70 %b = sub <8 x i16> %x, %a 71 ret <8 x i16> %b 72} 73 74define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 75; CHECK-LABEL: sub_v16i8_x: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vctp.8 r0 78; CHECK-NEXT: vpst 79; CHECK-NEXT: vsubt.i8 q0, q0, q1 80; CHECK-NEXT: bx lr 81entry: 82 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 83 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 84 %b = sub <16 x i8> %x, %a 85 ret <16 x i8> %b 86} 87 88define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 89; CHECK-LABEL: mul_v4i32_x: 90; CHECK: @ %bb.0: @ %entry 91; CHECK-NEXT: vctp.32 r0 92; CHECK-NEXT: vpst 93; CHECK-NEXT: vmult.i32 q0, q0, q1 94; CHECK-NEXT: bx lr 95entry: 96 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 97 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 98 %b = mul <4 x i32> %a, %x 99 ret <4 x i32> %b 100} 101 102define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 103; CHECK-LABEL: mul_v8i16_x: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vctp.16 r0 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vmult.i16 q0, q0, q1 108; CHECK-NEXT: bx lr 109entry: 110 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 111 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 112 %b = mul <8 x i16> %a, %x 113 ret <8 x i16> %b 114} 115 116define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 117; CHECK-LABEL: mul_v16i8_x: 118; CHECK: @ %bb.0: @ %entry 119; CHECK-NEXT: vctp.8 r0 120; CHECK-NEXT: vpst 121; CHECK-NEXT: vmult.i8 q0, q0, q1 122; CHECK-NEXT: bx lr 123entry: 124 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 125 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 126 %b = mul <16 x i8> %a, %x 127 ret <16 x i8> %b 128} 129 130define arm_aapcs_vfpcc <4 x i32> @and_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 131; CHECK-LABEL: and_v4i32_x: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: vctp.32 r0 134; CHECK-NEXT: vpst 135; CHECK-NEXT: vandt q0, q0, q1 136; CHECK-NEXT: bx lr 137entry: 138 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 139 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 140 %b = and <4 x i32> %a, %x 141 ret <4 x i32> %b 142} 143 144define arm_aapcs_vfpcc <8 x i16> @and_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 145; CHECK-LABEL: and_v8i16_x: 146; CHECK: @ %bb.0: @ %entry 147; CHECK-NEXT: vctp.16 r0 148; CHECK-NEXT: vpst 149; CHECK-NEXT: vandt q0, q0, q1 150; CHECK-NEXT: bx lr 151entry: 152 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 153 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 154 %b = and <8 x i16> %a, %x 155 ret <8 x i16> %b 156} 157 158define arm_aapcs_vfpcc <16 x i8> @and_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 159; CHECK-LABEL: and_v16i8_x: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vctp.8 r0 162; CHECK-NEXT: vpst 163; CHECK-NEXT: vandt q0, q0, q1 164; CHECK-NEXT: bx lr 165entry: 166 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 167 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 168 %b = and <16 x i8> %a, %x 169 ret <16 x i8> %b 170} 171 172define arm_aapcs_vfpcc <4 x i32> @or_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 173; CHECK-LABEL: or_v4i32_x: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vctp.32 r0 176; CHECK-NEXT: vpst 177; CHECK-NEXT: vorrt q0, q0, q1 178; CHECK-NEXT: bx lr 179entry: 180 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 181 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 182 %b = or <4 x i32> %a, %x 183 ret <4 x i32> %b 184} 185 186define arm_aapcs_vfpcc <8 x i16> @or_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 187; CHECK-LABEL: or_v8i16_x: 188; CHECK: @ %bb.0: @ %entry 189; CHECK-NEXT: vctp.16 r0 190; CHECK-NEXT: vpst 191; CHECK-NEXT: vorrt q0, q0, q1 192; CHECK-NEXT: bx lr 193entry: 194 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 195 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 196 %b = or <8 x i16> %a, %x 197 ret <8 x i16> %b 198} 199 200define arm_aapcs_vfpcc <16 x i8> @or_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 201; CHECK-LABEL: or_v16i8_x: 202; CHECK: @ %bb.0: @ %entry 203; CHECK-NEXT: vctp.8 r0 204; CHECK-NEXT: vpst 205; CHECK-NEXT: vorrt q0, q0, q1 206; CHECK-NEXT: bx lr 207entry: 208 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 209 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 210 %b = or <16 x i8> %a, %x 211 ret <16 x i8> %b 212} 213 214define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 215; CHECK-LABEL: xor_v4i32_x: 216; CHECK: @ %bb.0: @ %entry 217; CHECK-NEXT: vctp.32 r0 218; CHECK-NEXT: vpst 219; CHECK-NEXT: veort q0, q0, q1 220; CHECK-NEXT: bx lr 221entry: 222 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 223 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 224 %b = xor <4 x i32> %a, %x 225 ret <4 x i32> %b 226} 227 228define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 229; CHECK-LABEL: xor_v8i16_x: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vctp.16 r0 232; CHECK-NEXT: vpst 233; CHECK-NEXT: veort q0, q0, q1 234; CHECK-NEXT: bx lr 235entry: 236 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 237 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 238 %b = xor <8 x i16> %a, %x 239 ret <8 x i16> %b 240} 241 242define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 243; CHECK-LABEL: xor_v16i8_x: 244; CHECK: @ %bb.0: @ %entry 245; CHECK-NEXT: vctp.8 r0 246; CHECK-NEXT: vpst 247; CHECK-NEXT: veort q0, q0, q1 248; CHECK-NEXT: bx lr 249entry: 250 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 251 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 252 %b = xor <16 x i8> %a, %x 253 ret <16 x i8> %b 254} 255 256define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 257; CHECK-LABEL: shl_v4i32_x: 258; CHECK: @ %bb.0: @ %entry 259; CHECK-NEXT: vctp.32 r0 260; CHECK-NEXT: vpst 261; CHECK-NEXT: vshlt.u32 q0, q0, q1 262; CHECK-NEXT: bx lr 263entry: 264 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 265 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 266 %b = shl <4 x i32> %x, %a 267 ret <4 x i32> %b 268} 269 270define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 271; CHECK-LABEL: shl_v8i16_x: 272; CHECK: @ %bb.0: @ %entry 273; CHECK-NEXT: vctp.16 r0 274; CHECK-NEXT: vpst 275; CHECK-NEXT: vshlt.u16 q0, q0, q1 276; CHECK-NEXT: bx lr 277entry: 278 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 279 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 280 %b = shl <8 x i16> %x, %a 281 ret <8 x i16> %b 282} 283 284define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 285; CHECK-LABEL: shl_v16i8_x: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vctp.8 r0 288; CHECK-NEXT: vpst 289; CHECK-NEXT: vshlt.u8 q0, q0, q1 290; CHECK-NEXT: bx lr 291entry: 292 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 293 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 294 %b = shl <16 x i8> %x, %a 295 ret <16 x i8> %b 296} 297 298define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 299; CHECK-LABEL: ashr_v4i32_x: 300; CHECK: @ %bb.0: @ %entry 301; CHECK-NEXT: vneg.s32 q1, q1 302; CHECK-NEXT: vctp.32 r0 303; CHECK-NEXT: vpst 304; CHECK-NEXT: vshlt.s32 q0, q0, q1 305; CHECK-NEXT: bx lr 306entry: 307 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 308 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 309 %b = ashr <4 x i32> %x, %a 310 ret <4 x i32> %b 311} 312 313define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 314; CHECK-LABEL: ashr_v8i16_x: 315; CHECK: @ %bb.0: @ %entry 316; CHECK-NEXT: vneg.s16 q1, q1 317; CHECK-NEXT: vctp.16 r0 318; CHECK-NEXT: vpst 319; CHECK-NEXT: vshlt.s16 q0, q0, q1 320; CHECK-NEXT: bx lr 321entry: 322 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 323 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 324 %b = ashr <8 x i16> %x, %a 325 ret <8 x i16> %b 326} 327 328define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 329; CHECK-LABEL: ashr_v16i8_x: 330; CHECK: @ %bb.0: @ %entry 331; CHECK-NEXT: vneg.s8 q1, q1 332; CHECK-NEXT: vctp.8 r0 333; CHECK-NEXT: vpst 334; CHECK-NEXT: vshlt.s8 q0, q0, q1 335; CHECK-NEXT: bx lr 336entry: 337 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 338 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 339 %b = ashr <16 x i8> %x, %a 340 ret <16 x i8> %b 341} 342 343define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 344; CHECK-LABEL: lshr_v4i32_x: 345; CHECK: @ %bb.0: @ %entry 346; CHECK-NEXT: vneg.s32 q1, q1 347; CHECK-NEXT: vctp.32 r0 348; CHECK-NEXT: vpst 349; CHECK-NEXT: vshlt.u32 q0, q0, q1 350; CHECK-NEXT: bx lr 351entry: 352 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 353 %a = select <4 x i1> %c, <4 x i32> %y, <4 x i32> zeroinitializer 354 %b = lshr <4 x i32> %x, %a 355 ret <4 x i32> %b 356} 357 358define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 359; CHECK-LABEL: lshr_v8i16_x: 360; CHECK: @ %bb.0: @ %entry 361; CHECK-NEXT: vneg.s16 q1, q1 362; CHECK-NEXT: vctp.16 r0 363; CHECK-NEXT: vpst 364; CHECK-NEXT: vshlt.u16 q0, q0, q1 365; CHECK-NEXT: bx lr 366entry: 367 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 368 %a = select <8 x i1> %c, <8 x i16> %y, <8 x i16> zeroinitializer 369 %b = lshr <8 x i16> %x, %a 370 ret <8 x i16> %b 371} 372 373define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 374; CHECK-LABEL: lshr_v16i8_x: 375; CHECK: @ %bb.0: @ %entry 376; CHECK-NEXT: vneg.s8 q1, q1 377; CHECK-NEXT: vctp.8 r0 378; CHECK-NEXT: vpst 379; CHECK-NEXT: vshlt.u8 q0, q0, q1 380; CHECK-NEXT: bx lr 381entry: 382 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 383 %a = select <16 x i1> %c, <16 x i8> %y, <16 x i8> zeroinitializer 384 %b = lshr <16 x i8> %x, %a 385 ret <16 x i8> %b 386} 387 388define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 389; CHECK-LABEL: andnot_v4i32_x: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vctp.32 r0 392; CHECK-NEXT: vpst 393; CHECK-NEXT: vbict q0, q0, q1 394; CHECK-NEXT: bx lr 395entry: 396 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 397 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 398 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 399 %b = and <4 x i32> %a, %x 400 ret <4 x i32> %b 401} 402 403define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 404; CHECK-LABEL: andnot_v8i16_x: 405; CHECK: @ %bb.0: @ %entry 406; CHECK-NEXT: vctp.16 r0 407; CHECK-NEXT: vpst 408; CHECK-NEXT: vbict q0, q0, q1 409; CHECK-NEXT: bx lr 410entry: 411 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 412 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 413 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 414 %b = and <8 x i16> %a, %x 415 ret <8 x i16> %b 416} 417 418define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 419; CHECK-LABEL: andnot_v16i8_x: 420; CHECK: @ %bb.0: @ %entry 421; CHECK-NEXT: vctp.8 r0 422; CHECK-NEXT: vpst 423; CHECK-NEXT: vbict q0, q0, q1 424; CHECK-NEXT: bx lr 425entry: 426 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 427 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 428 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 429 %b = and <16 x i8> %a, %x 430 ret <16 x i8> %b 431} 432 433define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 434; CHECK-LABEL: ornot_v4i32_x: 435; CHECK: @ %bb.0: @ %entry 436; CHECK-NEXT: vctp.32 r0 437; CHECK-NEXT: vpst 438; CHECK-NEXT: vornt q0, q0, q1 439; CHECK-NEXT: bx lr 440entry: 441 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 442 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 443 %a = select <4 x i1> %c, <4 x i32> %y1, <4 x i32> zeroinitializer 444 %b = or <4 x i32> %a, %x 445 ret <4 x i32> %b 446} 447 448define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 449; CHECK-LABEL: ornot_v8i16_x: 450; CHECK: @ %bb.0: @ %entry 451; CHECK-NEXT: vctp.16 r0 452; CHECK-NEXT: vpst 453; CHECK-NEXT: vornt q0, q0, q1 454; CHECK-NEXT: bx lr 455entry: 456 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 457 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 458 %a = select <8 x i1> %c, <8 x i16> %y1, <8 x i16> zeroinitializer 459 %b = or <8 x i16> %a, %x 460 ret <8 x i16> %b 461} 462 463define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 464; CHECK-LABEL: ornot_v16i8_x: 465; CHECK: @ %bb.0: @ %entry 466; CHECK-NEXT: vctp.8 r0 467; CHECK-NEXT: vpst 468; CHECK-NEXT: vornt q0, q0, q1 469; CHECK-NEXT: bx lr 470entry: 471 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 472 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 473 %a = select <16 x i1> %c, <16 x i8> %y1, <16 x i8> zeroinitializer 474 %b = or <16 x i8> %a, %x 475 ret <16 x i8> %b 476} 477 478define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 479; CHECK-LABEL: fadd_v4f32_x: 480; CHECK: @ %bb.0: @ %entry 481; CHECK-NEXT: vctp.32 r0 482; CHECK-NEXT: vpst 483; CHECK-NEXT: vaddt.f32 q0, q0, q1 484; CHECK-NEXT: bx lr 485entry: 486 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 487 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 488 %b = fadd <4 x float> %a, %x 489 ret <4 x float> %b 490} 491 492define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x2(<4 x float> %x, <4 x float> %y, i32 %n) { 493; CHECK-LABEL: fadd_v4f32_x2: 494; CHECK: @ %bb.0: @ %entry 495; CHECK-NEXT: vmov.i32 q2, #0x0 496; CHECK-NEXT: vctp.32 r0 497; CHECK-NEXT: vpst 498; CHECK-NEXT: vmovt q2, q1 499; CHECK-NEXT: vadd.f32 q0, q2, q0 500; CHECK-NEXT: bx lr 501entry: 502 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 503 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> 504 %b = fadd <4 x float> %a, %x 505 ret <4 x float> %b 506} 507 508define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_x3(<4 x float> %x, <4 x float> %y, i32 %n) { 509; CHECK-LABEL: fadd_v4f32_x3: 510; CHECK: @ %bb.0: @ %entry 511; CHECK-NEXT: vctp.32 r0 512; CHECK-NEXT: vpst 513; CHECK-NEXT: vaddt.f32 q0, q0, q1 514; CHECK-NEXT: bx lr 515entry: 516 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 517 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00> 518 %b = fadd nsz <4 x float> %a, %x 519 ret <4 x float> %b 520} 521 522define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 523; CHECK-LABEL: fadd_v8f16_x: 524; CHECK: @ %bb.0: @ %entry 525; CHECK-NEXT: vctp.16 r0 526; CHECK-NEXT: vpst 527; CHECK-NEXT: vaddt.f16 q0, q0, q1 528; CHECK-NEXT: bx lr 529entry: 530 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 531 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 532 %b = fadd <8 x half> %a, %x 533 ret <8 x half> %b 534} 535 536define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x2(<8 x half> %x, <8 x half> %y, i32 %n) { 537; CHECK-LABEL: fadd_v8f16_x2: 538; CHECK: @ %bb.0: @ %entry 539; CHECK-NEXT: vmov.i32 q2, #0x0 540; CHECK-NEXT: vctp.16 r0 541; CHECK-NEXT: vpst 542; CHECK-NEXT: vmovt q2, q1 543; CHECK-NEXT: vadd.f16 q0, q2, q0 544; CHECK-NEXT: bx lr 545entry: 546 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 547 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0x0000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000> 548 %b = fadd <8 x half> %a, %x 549 ret <8 x half> %b 550} 551 552define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_x3(<8 x half> %x, <8 x half> %y, i32 %n) { 553; CHECK-LABEL: fadd_v8f16_x3: 554; CHECK: @ %bb.0: @ %entry 555; CHECK-NEXT: vctp.16 r0 556; CHECK-NEXT: vpst 557; CHECK-NEXT: vaddt.f16 q0, q0, q1 558; CHECK-NEXT: bx lr 559entry: 560 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 561 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0x0000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000, half 0x00000> 562 %b = fadd nsz <8 x half> %a, %x 563 ret <8 x half> %b 564} 565 566define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 567; CHECK-LABEL: fsub_v4f32_x: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vctp.32 r0 570; CHECK-NEXT: vpst 571; CHECK-NEXT: vsubt.f32 q0, q0, q1 572; CHECK-NEXT: bx lr 573entry: 574 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 575 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> zeroinitializer 576 %b = fsub <4 x float> %x, %a 577 ret <4 x float> %b 578} 579 580define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 581; CHECK-LABEL: fsub_v8f16_x: 582; CHECK: @ %bb.0: @ %entry 583; CHECK-NEXT: vctp.16 r0 584; CHECK-NEXT: vpst 585; CHECK-NEXT: vsubt.f16 q0, q0, q1 586; CHECK-NEXT: bx lr 587entry: 588 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 589 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> zeroinitializer 590 %b = fsub <8 x half> %x, %a 591 ret <8 x half> %b 592} 593 594define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 595; CHECK-LABEL: fmul_v4f32_x: 596; CHECK: @ %bb.0: @ %entry 597; CHECK-NEXT: vctp.32 r0 598; CHECK-NEXT: vpst 599; CHECK-NEXT: vmult.f32 q0, q0, q1 600; CHECK-NEXT: bx lr 601entry: 602 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 603 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 604 %b = fmul <4 x float> %a, %x 605 ret <4 x float> %b 606} 607 608define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 609; CHECK-LABEL: fmul_v8f16_x: 610; CHECK: @ %bb.0: @ %entry 611; CHECK-NEXT: vctp.16 r0 612; CHECK-NEXT: vpst 613; CHECK-NEXT: vmult.f16 q0, q0, q1 614; CHECK-NEXT: bx lr 615entry: 616 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 617 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00> 618 %b = fmul <8 x half> %a, %x 619 ret <8 x half> %b 620} 621 622define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 623; CHECK-LABEL: fdiv_v4f32_x: 624; CHECK: @ %bb.0: @ %entry 625; CHECK-NEXT: vdiv.f32 s7, s3, s7 626; CHECK-NEXT: vctp.32 r0 627; CHECK-NEXT: vdiv.f32 s6, s2, s6 628; CHECK-NEXT: vdiv.f32 s5, s1, s5 629; CHECK-NEXT: vdiv.f32 s4, s0, s4 630; CHECK-NEXT: vpst 631; CHECK-NEXT: vmovt q0, q1 632; CHECK-NEXT: bx lr 633entry: 634 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 635 %a = select <4 x i1> %c, <4 x float> %y, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 636 %b = fdiv <4 x float> %x, %a 637 ret <4 x float> %b 638} 639 640define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 641; CHECK-LABEL: fdiv_v8f16_x: 642; CHECK: @ %bb.0: @ %entry 643; CHECK-NEXT: vmovx.f16 s8, s4 644; CHECK-NEXT: vmovx.f16 s10, s0 645; CHECK-NEXT: vdiv.f16 s8, s10, s8 646; CHECK-NEXT: vdiv.f16 s4, s0, s4 647; CHECK-NEXT: vins.f16 s4, s8 648; CHECK-NEXT: vmovx.f16 s8, s5 649; CHECK-NEXT: vmovx.f16 s10, s1 650; CHECK-NEXT: vdiv.f16 s5, s1, s5 651; CHECK-NEXT: vdiv.f16 s8, s10, s8 652; CHECK-NEXT: vmovx.f16 s10, s2 653; CHECK-NEXT: vins.f16 s5, s8 654; CHECK-NEXT: vmovx.f16 s8, s6 655; CHECK-NEXT: vdiv.f16 s8, s10, s8 656; CHECK-NEXT: vdiv.f16 s6, s2, s6 657; CHECK-NEXT: vins.f16 s6, s8 658; CHECK-NEXT: vmovx.f16 s8, s7 659; CHECK-NEXT: vmovx.f16 s10, s3 660; CHECK-NEXT: vdiv.f16 s7, s3, s7 661; CHECK-NEXT: vdiv.f16 s8, s10, s8 662; CHECK-NEXT: vctp.16 r0 663; CHECK-NEXT: vins.f16 s7, s8 664; CHECK-NEXT: vpst 665; CHECK-NEXT: vmovt q0, q1 666; CHECK-NEXT: bx lr 667entry: 668 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 669 %a = select <8 x i1> %c, <8 x half> %y, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00> 670 %b = fdiv <8 x half> %x, %a 671 ret <8 x half> %b 672} 673 674define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 675; CHECK-LABEL: fmai_v4f32_x: 676; CHECK: @ %bb.0: @ %entry 677; CHECK-NEXT: vmov.i32 q3, #0x80000000 678; CHECK-NEXT: vctp.32 r0 679; CHECK-NEXT: vpst 680; CHECK-NEXT: vmovt q3, q0 681; CHECK-NEXT: vfma.f32 q3, q1, q2 682; CHECK-NEXT: vmov q0, q3 683; CHECK-NEXT: bx lr 684entry: 685 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 686 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 687 %b = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %a) 688 ret <4 x float> %b 689} 690 691define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 692; CHECK-LABEL: fmai_v8f16_x: 693; CHECK: @ %bb.0: @ %entry 694; CHECK-NEXT: vmov.i16 q3, #0x8000 695; CHECK-NEXT: vctp.16 r0 696; CHECK-NEXT: vpst 697; CHECK-NEXT: vmovt q3, q0 698; CHECK-NEXT: vfma.f16 q3, q1, q2 699; CHECK-NEXT: vmov q0, q3 700; CHECK-NEXT: bx lr 701entry: 702 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 703 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 704 %b = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %a) 705 ret <8 x half> %b 706} 707 708define arm_aapcs_vfpcc <4 x float> @fma_v4f32_x(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 709; CHECK-LABEL: fma_v4f32_x: 710; CHECK: @ %bb.0: @ %entry 711; CHECK-NEXT: vctp.32 r0 712; CHECK-NEXT: vpst 713; CHECK-NEXT: vfmat.f32 q0, q1, q2 714; CHECK-NEXT: bx lr 715entry: 716 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 717 %m = fmul fast <4 x float> %y, %z 718 %a = select <4 x i1> %c, <4 x float> %m, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 719 %b = fadd fast <4 x float> %a, %x 720 ret <4 x float> %b 721} 722 723define arm_aapcs_vfpcc <8 x half> @fma_v8f16_x(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 724; CHECK-LABEL: fma_v8f16_x: 725; CHECK: @ %bb.0: @ %entry 726; CHECK-NEXT: vctp.16 r0 727; CHECK-NEXT: vpst 728; CHECK-NEXT: vfmat.f16 q0, q1, q2 729; CHECK-NEXT: bx lr 730entry: 731 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 732 %m = fmul fast <8 x half> %y, %z 733 %a = select <8 x i1> %c, <8 x half> %m, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 734 %b = fadd fast <8 x half> %a, %x 735 ret <8 x half> %b 736} 737 738define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 739; CHECK-LABEL: icmp_slt_v4i32_x: 740; CHECK: @ %bb.0: @ %entry 741; CHECK-NEXT: vctp.32 r0 742; CHECK-NEXT: vpst 743; CHECK-NEXT: vmint.s32 q0, q0, q1 744; CHECK-NEXT: bx lr 745entry: 746 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 747 %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y) 748 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 749 ret <4 x i32> %b 750} 751 752define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 753; CHECK-LABEL: icmp_slt_v8i16_x: 754; CHECK: @ %bb.0: @ %entry 755; CHECK-NEXT: vctp.16 r0 756; CHECK-NEXT: vpst 757; CHECK-NEXT: vmint.s16 q0, q0, q1 758; CHECK-NEXT: bx lr 759entry: 760 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 761 %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y) 762 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 763 ret <8 x i16> %b 764} 765 766define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 767; CHECK-LABEL: icmp_slt_v16i8_x: 768; CHECK: @ %bb.0: @ %entry 769; CHECK-NEXT: vctp.8 r0 770; CHECK-NEXT: vpst 771; CHECK-NEXT: vmint.s8 q0, q0, q1 772; CHECK-NEXT: bx lr 773entry: 774 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 775 %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y) 776 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 777 ret <16 x i8> %b 778} 779 780define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 781; CHECK-LABEL: icmp_sgt_v4i32_x: 782; CHECK: @ %bb.0: @ %entry 783; CHECK-NEXT: vctp.32 r0 784; CHECK-NEXT: vpst 785; CHECK-NEXT: vmaxt.s32 q0, q0, q1 786; CHECK-NEXT: bx lr 787entry: 788 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 789 %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y) 790 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 791 ret <4 x i32> %b 792} 793 794define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 795; CHECK-LABEL: icmp_sgt_v8i16_x: 796; CHECK: @ %bb.0: @ %entry 797; CHECK-NEXT: vctp.16 r0 798; CHECK-NEXT: vpst 799; CHECK-NEXT: vmaxt.s16 q0, q0, q1 800; CHECK-NEXT: bx lr 801entry: 802 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 803 %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y) 804 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 805 ret <8 x i16> %b 806} 807 808define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 809; CHECK-LABEL: icmp_sgt_v16i8_x: 810; CHECK: @ %bb.0: @ %entry 811; CHECK-NEXT: vctp.8 r0 812; CHECK-NEXT: vpst 813; CHECK-NEXT: vmaxt.s8 q0, q0, q1 814; CHECK-NEXT: bx lr 815entry: 816 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 817 %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y) 818 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 819 ret <16 x i8> %b 820} 821 822define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 823; CHECK-LABEL: icmp_ult_v4i32_x: 824; CHECK: @ %bb.0: @ %entry 825; CHECK-NEXT: vctp.32 r0 826; CHECK-NEXT: vpst 827; CHECK-NEXT: vmint.u32 q0, q0, q1 828; CHECK-NEXT: bx lr 829entry: 830 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 831 %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y) 832 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 833 ret <4 x i32> %b 834} 835 836define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 837; CHECK-LABEL: icmp_ult_v8i16_x: 838; CHECK: @ %bb.0: @ %entry 839; CHECK-NEXT: vctp.16 r0 840; CHECK-NEXT: vpst 841; CHECK-NEXT: vmint.u16 q0, q0, q1 842; CHECK-NEXT: bx lr 843entry: 844 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 845 %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y) 846 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 847 ret <8 x i16> %b 848} 849 850define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 851; CHECK-LABEL: icmp_ult_v16i8_x: 852; CHECK: @ %bb.0: @ %entry 853; CHECK-NEXT: vctp.8 r0 854; CHECK-NEXT: vpst 855; CHECK-NEXT: vmint.u8 q0, q0, q1 856; CHECK-NEXT: bx lr 857entry: 858 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 859 %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y) 860 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 861 ret <16 x i8> %b 862} 863 864define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 865; CHECK-LABEL: icmp_ugt_v4i32_x: 866; CHECK: @ %bb.0: @ %entry 867; CHECK-NEXT: vctp.32 r0 868; CHECK-NEXT: vpst 869; CHECK-NEXT: vmaxt.u32 q0, q0, q1 870; CHECK-NEXT: bx lr 871entry: 872 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 873 %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) 874 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 875 ret <4 x i32> %b 876} 877 878define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 879; CHECK-LABEL: icmp_ugt_v8i16_x: 880; CHECK: @ %bb.0: @ %entry 881; CHECK-NEXT: vctp.16 r0 882; CHECK-NEXT: vpst 883; CHECK-NEXT: vmaxt.u16 q0, q0, q1 884; CHECK-NEXT: bx lr 885entry: 886 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 887 %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y) 888 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 889 ret <8 x i16> %b 890} 891 892define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 893; CHECK-LABEL: icmp_ugt_v16i8_x: 894; CHECK: @ %bb.0: @ %entry 895; CHECK-NEXT: vctp.8 r0 896; CHECK-NEXT: vpst 897; CHECK-NEXT: vmaxt.u8 q0, q0, q1 898; CHECK-NEXT: bx lr 899entry: 900 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 901 %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y) 902 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 903 ret <16 x i8> %b 904} 905 906define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 907; CHECK-LABEL: fcmp_fast_olt_v4f32_x: 908; CHECK: @ %bb.0: @ %entry 909; CHECK-NEXT: vctp.32 r0 910; CHECK-NEXT: vpst 911; CHECK-NEXT: vminnmt.f32 q0, q0, q1 912; CHECK-NEXT: bx lr 913entry: 914 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 915 %a1 = fcmp fast olt <4 x float> %x, %y 916 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 917 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 918 ret <4 x float> %b 919} 920 921define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 922; CHECK-LABEL: fcmp_fast_olt_v8f16_x: 923; CHECK: @ %bb.0: @ %entry 924; CHECK-NEXT: vctp.16 r0 925; CHECK-NEXT: vpst 926; CHECK-NEXT: vminnmt.f16 q0, q0, q1 927; CHECK-NEXT: bx lr 928entry: 929 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 930 %a1 = fcmp fast olt <8 x half> %x, %y 931 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 932 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 933 ret <8 x half> %b 934} 935 936define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_x(<4 x float> %x, <4 x float> %y, i32 %n) { 937; CHECK-LABEL: fcmp_fast_ogt_v4f32_x: 938; CHECK: @ %bb.0: @ %entry 939; CHECK-NEXT: vctp.32 r0 940; CHECK-NEXT: vpst 941; CHECK-NEXT: vmaxnmt.f32 q0, q0, q1 942; CHECK-NEXT: bx lr 943entry: 944 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 945 %a1 = fcmp fast ogt <4 x float> %x, %y 946 %a = select <4 x i1> %a1, <4 x float> %x, <4 x float> %y 947 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %x 948 ret <4 x float> %b 949} 950 951define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_x(<8 x half> %x, <8 x half> %y, i32 %n) { 952; CHECK-LABEL: fcmp_fast_ogt_v8f16_x: 953; CHECK: @ %bb.0: @ %entry 954; CHECK-NEXT: vctp.16 r0 955; CHECK-NEXT: vpst 956; CHECK-NEXT: vmaxnmt.f16 q0, q0, q1 957; CHECK-NEXT: bx lr 958entry: 959 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 960 %a1 = fcmp fast ogt <8 x half> %x, %y 961 %a = select <8 x i1> %a1, <8 x half> %x, <8 x half> %y 962 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %x 963 ret <8 x half> %b 964} 965 966define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 967; CHECK-LABEL: sadd_sat_v4i32_x: 968; CHECK: @ %bb.0: @ %entry 969; CHECK-NEXT: vctp.32 r0 970; CHECK-NEXT: vpst 971; CHECK-NEXT: vqaddt.s32 q0, q0, q1 972; CHECK-NEXT: bx lr 973entry: 974 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 975 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 976 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 977 ret <4 x i32> %b 978} 979 980define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 981; CHECK-LABEL: sadd_sat_v8i16_x: 982; CHECK: @ %bb.0: @ %entry 983; CHECK-NEXT: vctp.16 r0 984; CHECK-NEXT: vpst 985; CHECK-NEXT: vqaddt.s16 q0, q0, q1 986; CHECK-NEXT: bx lr 987entry: 988 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 989 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 990 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 991 ret <8 x i16> %b 992} 993 994define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 995; CHECK-LABEL: sadd_sat_v16i8_x: 996; CHECK: @ %bb.0: @ %entry 997; CHECK-NEXT: vctp.8 r0 998; CHECK-NEXT: vpst 999; CHECK-NEXT: vqaddt.s8 q0, q0, q1 1000; CHECK-NEXT: bx lr 1001entry: 1002 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1003 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1004 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1005 ret <16 x i8> %b 1006} 1007 1008define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1009; CHECK-LABEL: uadd_sat_v4i32_x: 1010; CHECK: @ %bb.0: @ %entry 1011; CHECK-NEXT: vctp.32 r0 1012; CHECK-NEXT: vpst 1013; CHECK-NEXT: vqaddt.u32 q0, q0, q1 1014; CHECK-NEXT: bx lr 1015entry: 1016 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1017 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1018 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1019 ret <4 x i32> %b 1020} 1021 1022define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1023; CHECK-LABEL: uadd_sat_v8i16_x: 1024; CHECK: @ %bb.0: @ %entry 1025; CHECK-NEXT: vctp.16 r0 1026; CHECK-NEXT: vpst 1027; CHECK-NEXT: vqaddt.u16 q0, q0, q1 1028; CHECK-NEXT: bx lr 1029entry: 1030 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1031 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 1032 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1033 ret <8 x i16> %b 1034} 1035 1036define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1037; CHECK-LABEL: uadd_sat_v16i8_x: 1038; CHECK: @ %bb.0: @ %entry 1039; CHECK-NEXT: vctp.8 r0 1040; CHECK-NEXT: vpst 1041; CHECK-NEXT: vqaddt.u8 q0, q0, q1 1042; CHECK-NEXT: bx lr 1043entry: 1044 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1045 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1046 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1047 ret <16 x i8> %b 1048} 1049 1050define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1051; CHECK-LABEL: ssub_sat_v4i32_x: 1052; CHECK: @ %bb.0: @ %entry 1053; CHECK-NEXT: vctp.32 r0 1054; CHECK-NEXT: vpst 1055; CHECK-NEXT: vqsubt.s32 q0, q0, q1 1056; CHECK-NEXT: bx lr 1057entry: 1058 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1059 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1060 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1061 ret <4 x i32> %b 1062} 1063 1064define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1065; CHECK-LABEL: ssub_sat_v8i16_x: 1066; CHECK: @ %bb.0: @ %entry 1067; CHECK-NEXT: vctp.16 r0 1068; CHECK-NEXT: vpst 1069; CHECK-NEXT: vqsubt.s16 q0, q0, q1 1070; CHECK-NEXT: bx lr 1071entry: 1072 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1073 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 1074 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1075 ret <8 x i16> %b 1076} 1077 1078define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1079; CHECK-LABEL: ssub_sat_v16i8_x: 1080; CHECK: @ %bb.0: @ %entry 1081; CHECK-NEXT: vctp.8 r0 1082; CHECK-NEXT: vpst 1083; CHECK-NEXT: vqsubt.s8 q0, q0, q1 1084; CHECK-NEXT: bx lr 1085entry: 1086 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1087 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1088 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1089 ret <16 x i8> %b 1090} 1091 1092define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_x(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1093; CHECK-LABEL: usub_sat_v4i32_x: 1094; CHECK: @ %bb.0: @ %entry 1095; CHECK-NEXT: vctp.32 r0 1096; CHECK-NEXT: vpst 1097; CHECK-NEXT: vqsubt.u32 q0, q0, q1 1098; CHECK-NEXT: bx lr 1099entry: 1100 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1101 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 1102 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1103 ret <4 x i32> %b 1104} 1105 1106define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_x(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1107; CHECK-LABEL: usub_sat_v8i16_x: 1108; CHECK: @ %bb.0: @ %entry 1109; CHECK-NEXT: vctp.16 r0 1110; CHECK-NEXT: vpst 1111; CHECK-NEXT: vqsubt.u16 q0, q0, q1 1112; CHECK-NEXT: bx lr 1113entry: 1114 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1115 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 1116 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1117 ret <8 x i16> %b 1118} 1119 1120define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_x(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1121; CHECK-LABEL: usub_sat_v16i8_x: 1122; CHECK: @ %bb.0: @ %entry 1123; CHECK-NEXT: vctp.8 r0 1124; CHECK-NEXT: vpst 1125; CHECK-NEXT: vqsubt.u8 q0, q0, q1 1126; CHECK-NEXT: bx lr 1127entry: 1128 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1129 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 1130 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1131 ret <16 x i8> %b 1132} 1133 1134define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1135; CHECK-LABEL: addqr_v4i32_x: 1136; CHECK: @ %bb.0: @ %entry 1137; CHECK-NEXT: vctp.32 r1 1138; CHECK-NEXT: vpst 1139; CHECK-NEXT: vaddt.i32 q0, q0, r0 1140; CHECK-NEXT: bx lr 1141entry: 1142 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1143 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1144 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1145 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer 1146 %b = add <4 x i32> %a, %x 1147 ret <4 x i32> %b 1148} 1149 1150define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1151; CHECK-LABEL: addqr_v8i16_x: 1152; CHECK: @ %bb.0: @ %entry 1153; CHECK-NEXT: vctp.16 r1 1154; CHECK-NEXT: vpst 1155; CHECK-NEXT: vaddt.i16 q0, q0, r0 1156; CHECK-NEXT: bx lr 1157entry: 1158 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1159 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1160 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1161 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer 1162 %b = add <8 x i16> %a, %x 1163 ret <8 x i16> %b 1164} 1165 1166define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1167; CHECK-LABEL: addqr_v16i8_x: 1168; CHECK: @ %bb.0: @ %entry 1169; CHECK-NEXT: vctp.8 r1 1170; CHECK-NEXT: vpst 1171; CHECK-NEXT: vaddt.i8 q0, q0, r0 1172; CHECK-NEXT: bx lr 1173entry: 1174 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1175 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1176 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1177 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer 1178 %b = add <16 x i8> %a, %x 1179 ret <16 x i8> %b 1180} 1181 1182define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1183; CHECK-LABEL: subqr_v4i32_x: 1184; CHECK: @ %bb.0: @ %entry 1185; CHECK-NEXT: vctp.32 r1 1186; CHECK-NEXT: vpst 1187; CHECK-NEXT: vsubt.i32 q0, q0, r0 1188; CHECK-NEXT: bx lr 1189entry: 1190 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1191 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1192 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1193 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> zeroinitializer 1194 %b = sub <4 x i32> %x, %a 1195 ret <4 x i32> %b 1196} 1197 1198define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1199; CHECK-LABEL: subqr_v8i16_x: 1200; CHECK: @ %bb.0: @ %entry 1201; CHECK-NEXT: vctp.16 r1 1202; CHECK-NEXT: vpst 1203; CHECK-NEXT: vsubt.i16 q0, q0, r0 1204; CHECK-NEXT: bx lr 1205entry: 1206 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1207 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1208 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1209 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> zeroinitializer 1210 %b = sub <8 x i16> %x, %a 1211 ret <8 x i16> %b 1212} 1213 1214define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1215; CHECK-LABEL: subqr_v16i8_x: 1216; CHECK: @ %bb.0: @ %entry 1217; CHECK-NEXT: vctp.8 r1 1218; CHECK-NEXT: vpst 1219; CHECK-NEXT: vsubt.i8 q0, q0, r0 1220; CHECK-NEXT: bx lr 1221entry: 1222 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1223 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1224 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1225 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> zeroinitializer 1226 %b = sub <16 x i8> %x, %a 1227 ret <16 x i8> %b 1228} 1229 1230define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1231; CHECK-LABEL: mulqr_v4i32_x: 1232; CHECK: @ %bb.0: @ %entry 1233; CHECK-NEXT: vctp.32 r1 1234; CHECK-NEXT: vpst 1235; CHECK-NEXT: vmult.i32 q0, q0, r0 1236; CHECK-NEXT: bx lr 1237entry: 1238 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1239 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1240 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1241 %a = select <4 x i1> %c, <4 x i32> %ys, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1242 %b = mul <4 x i32> %a, %x 1243 ret <4 x i32> %b 1244} 1245 1246define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1247; CHECK-LABEL: mulqr_v8i16_x: 1248; CHECK: @ %bb.0: @ %entry 1249; CHECK-NEXT: vctp.16 r1 1250; CHECK-NEXT: vpst 1251; CHECK-NEXT: vmult.i16 q0, q0, r0 1252; CHECK-NEXT: bx lr 1253entry: 1254 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1255 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1256 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1257 %a = select <8 x i1> %c, <8 x i16> %ys, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1258 %b = mul <8 x i16> %a, %x 1259 ret <8 x i16> %b 1260} 1261 1262define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1263; CHECK-LABEL: mulqr_v16i8_x: 1264; CHECK: @ %bb.0: @ %entry 1265; CHECK-NEXT: vctp.8 r1 1266; CHECK-NEXT: vpst 1267; CHECK-NEXT: vmult.i8 q0, q0, r0 1268; CHECK-NEXT: bx lr 1269entry: 1270 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1271 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1272 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1273 %a = select <16 x i1> %c, <16 x i8> %ys, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1274 %b = mul <16 x i8> %a, %x 1275 ret <16 x i8> %b 1276} 1277 1278define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1279; CHECK-LABEL: faddqr_v4f32_x: 1280; CHECK: @ %bb.0: @ %entry 1281; CHECK-NEXT: vmov r1, s4 1282; CHECK-NEXT: vctp.32 r0 1283; CHECK-NEXT: vpst 1284; CHECK-NEXT: vaddt.f32 q0, q0, r1 1285; CHECK-NEXT: bx lr 1286entry: 1287 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1288 %i = insertelement <4 x float> undef, float %y, i64 0 1289 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1290 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 1291 %b = fadd <4 x float> %a, %x 1292 ret <4 x float> %b 1293} 1294 1295define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1296; CHECK-LABEL: faddqr_v8f16_x: 1297; CHECK: @ %bb.0: @ %entry 1298; CHECK-NEXT: vmov.f16 r1, s4 1299; CHECK-NEXT: vctp.16 r0 1300; CHECK-NEXT: vpst 1301; CHECK-NEXT: vaddt.f16 q0, q0, r1 1302; CHECK-NEXT: bx lr 1303entry: 1304 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1305 %i = insertelement <8 x half> undef, half %y, i64 0 1306 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1307 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 1308 %b = fadd <8 x half> %a, %x 1309 ret <8 x half> %b 1310} 1311 1312define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1313; CHECK-LABEL: fsubqr_v4f32_x: 1314; CHECK: @ %bb.0: @ %entry 1315; CHECK-NEXT: vmov r1, s4 1316; CHECK-NEXT: vctp.32 r0 1317; CHECK-NEXT: vpst 1318; CHECK-NEXT: vsubt.f32 q0, q0, r1 1319; CHECK-NEXT: bx lr 1320entry: 1321 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1322 %i = insertelement <4 x float> undef, float %y, i64 0 1323 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1324 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> zeroinitializer 1325 %b = fsub <4 x float> %x, %a 1326 ret <4 x float> %b 1327} 1328 1329define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1330; CHECK-LABEL: fsubqr_v8f16_x: 1331; CHECK: @ %bb.0: @ %entry 1332; CHECK-NEXT: vmov.f16 r1, s4 1333; CHECK-NEXT: vctp.16 r0 1334; CHECK-NEXT: vpst 1335; CHECK-NEXT: vsubt.f16 q0, q0, r1 1336; CHECK-NEXT: bx lr 1337entry: 1338 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1339 %i = insertelement <8 x half> undef, half %y, i64 0 1340 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1341 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> zeroinitializer 1342 %b = fsub <8 x half> %x, %a 1343 ret <8 x half> %b 1344} 1345 1346define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_x(<4 x float> %x, float %y, i32 %n) { 1347; CHECK-LABEL: fmulqr_v4f32_x: 1348; CHECK: @ %bb.0: @ %entry 1349; CHECK-NEXT: vmov r1, s4 1350; CHECK-NEXT: vctp.32 r0 1351; CHECK-NEXT: vpst 1352; CHECK-NEXT: vmult.f32 q0, q0, r1 1353; CHECK-NEXT: bx lr 1354entry: 1355 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1356 %i = insertelement <4 x float> undef, float %y, i64 0 1357 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 1358 %a = select <4 x i1> %c, <4 x float> %ys, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1359 %b = fmul <4 x float> %a, %x 1360 ret <4 x float> %b 1361} 1362 1363define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_x(<8 x half> %x, half %y, i32 %n) { 1364; CHECK-LABEL: fmulqr_v8f16_x: 1365; CHECK: @ %bb.0: @ %entry 1366; CHECK-NEXT: vmov.f16 r1, s4 1367; CHECK-NEXT: vctp.16 r0 1368; CHECK-NEXT: vpst 1369; CHECK-NEXT: vmult.f16 q0, q0, r1 1370; CHECK-NEXT: bx lr 1371entry: 1372 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1373 %i = insertelement <8 x half> undef, half %y, i64 0 1374 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 1375 %a = select <8 x i1> %c, <8 x half> %ys, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00> 1376 %b = fmul <8 x half> %a, %x 1377 ret <8 x half> %b 1378} 1379 1380define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1381; CHECK-LABEL: sadd_satqr_v4i32_x: 1382; CHECK: @ %bb.0: @ %entry 1383; CHECK-NEXT: vctp.32 r1 1384; CHECK-NEXT: vpst 1385; CHECK-NEXT: vqaddt.s32 q0, q0, r0 1386; CHECK-NEXT: bx lr 1387entry: 1388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1389 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1390 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1391 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1392 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1393 ret <4 x i32> %b 1394} 1395 1396define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1397; CHECK-LABEL: sadd_satqr_v8i16_x: 1398; CHECK: @ %bb.0: @ %entry 1399; CHECK-NEXT: vctp.16 r1 1400; CHECK-NEXT: vpst 1401; CHECK-NEXT: vqaddt.s16 q0, q0, r0 1402; CHECK-NEXT: bx lr 1403entry: 1404 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1405 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1406 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1407 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1408 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1409 ret <8 x i16> %b 1410} 1411 1412define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1413; CHECK-LABEL: sadd_satqr_v16i8_x: 1414; CHECK: @ %bb.0: @ %entry 1415; CHECK-NEXT: vctp.8 r1 1416; CHECK-NEXT: vpst 1417; CHECK-NEXT: vqaddt.s8 q0, q0, r0 1418; CHECK-NEXT: bx lr 1419entry: 1420 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1421 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1422 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1423 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1424 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1425 ret <16 x i8> %b 1426} 1427 1428define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1429; CHECK-LABEL: uadd_satqr_v4i32_x: 1430; CHECK: @ %bb.0: @ %entry 1431; CHECK-NEXT: vctp.32 r1 1432; CHECK-NEXT: vpst 1433; CHECK-NEXT: vqaddt.u32 q0, q0, r0 1434; CHECK-NEXT: bx lr 1435entry: 1436 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1437 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1438 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1439 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1440 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1441 ret <4 x i32> %b 1442} 1443 1444define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1445; CHECK-LABEL: uadd_satqr_v8i16_x: 1446; CHECK: @ %bb.0: @ %entry 1447; CHECK-NEXT: vctp.16 r1 1448; CHECK-NEXT: vpst 1449; CHECK-NEXT: vqaddt.u16 q0, q0, r0 1450; CHECK-NEXT: bx lr 1451entry: 1452 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1453 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1454 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1455 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1456 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1457 ret <8 x i16> %b 1458} 1459 1460define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1461; CHECK-LABEL: uadd_satqr_v16i8_x: 1462; CHECK: @ %bb.0: @ %entry 1463; CHECK-NEXT: vctp.8 r1 1464; CHECK-NEXT: vpst 1465; CHECK-NEXT: vqaddt.u8 q0, q0, r0 1466; CHECK-NEXT: bx lr 1467entry: 1468 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1469 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1470 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1471 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1472 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1473 ret <16 x i8> %b 1474} 1475 1476define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1477; CHECK-LABEL: ssub_satqr_v4i32_x: 1478; CHECK: @ %bb.0: @ %entry 1479; CHECK-NEXT: vctp.32 r1 1480; CHECK-NEXT: vpst 1481; CHECK-NEXT: vqsubt.s32 q0, q0, r0 1482; CHECK-NEXT: bx lr 1483entry: 1484 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1485 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1486 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1487 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1488 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1489 ret <4 x i32> %b 1490} 1491 1492define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1493; CHECK-LABEL: ssub_satqr_v8i16_x: 1494; CHECK: @ %bb.0: @ %entry 1495; CHECK-NEXT: vctp.16 r1 1496; CHECK-NEXT: vpst 1497; CHECK-NEXT: vqsubt.s16 q0, q0, r0 1498; CHECK-NEXT: bx lr 1499entry: 1500 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1501 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1502 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1503 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1504 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1505 ret <8 x i16> %b 1506} 1507 1508define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1509; CHECK-LABEL: ssub_satqr_v16i8_x: 1510; CHECK: @ %bb.0: @ %entry 1511; CHECK-NEXT: vctp.8 r1 1512; CHECK-NEXT: vpst 1513; CHECK-NEXT: vqsubt.s8 q0, q0, r0 1514; CHECK-NEXT: bx lr 1515entry: 1516 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1517 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1518 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1519 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1520 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1521 ret <16 x i8> %b 1522} 1523 1524define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_x(<4 x i32> %x, i32 %y, i32 %n) { 1525; CHECK-LABEL: usub_satqr_v4i32_x: 1526; CHECK: @ %bb.0: @ %entry 1527; CHECK-NEXT: vctp.32 r1 1528; CHECK-NEXT: vpst 1529; CHECK-NEXT: vqsubt.u32 q0, q0, r0 1530; CHECK-NEXT: bx lr 1531entry: 1532 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1533 %i = insertelement <4 x i32> undef, i32 %y, i64 0 1534 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 1535 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 1536 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %x 1537 ret <4 x i32> %b 1538} 1539 1540define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_x(<8 x i16> %x, i16 %y, i32 %n) { 1541; CHECK-LABEL: usub_satqr_v8i16_x: 1542; CHECK: @ %bb.0: @ %entry 1543; CHECK-NEXT: vctp.16 r1 1544; CHECK-NEXT: vpst 1545; CHECK-NEXT: vqsubt.u16 q0, q0, r0 1546; CHECK-NEXT: bx lr 1547entry: 1548 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1549 %i = insertelement <8 x i16> undef, i16 %y, i64 0 1550 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 1551 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 1552 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %x 1553 ret <8 x i16> %b 1554} 1555 1556define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_x(<16 x i8> %x, i8 %y, i32 %n) { 1557; CHECK-LABEL: usub_satqr_v16i8_x: 1558; CHECK: @ %bb.0: @ %entry 1559; CHECK-NEXT: vctp.8 r1 1560; CHECK-NEXT: vpst 1561; CHECK-NEXT: vqsubt.u8 q0, q0, r0 1562; CHECK-NEXT: bx lr 1563entry: 1564 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1565 %i = insertelement <16 x i8> undef, i8 %y, i64 0 1566 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 1567 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 1568 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %x 1569 ret <16 x i8> %b 1570} 1571 1572define arm_aapcs_vfpcc <4 x i32> @add_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1573; CHECK-LABEL: add_v4i32_y: 1574; CHECK: @ %bb.0: @ %entry 1575; CHECK-NEXT: vctp.32 r0 1576; CHECK-NEXT: vpst 1577; CHECK-NEXT: vaddt.i32 q1, q1, q0 1578; CHECK-NEXT: vmov q0, q1 1579; CHECK-NEXT: bx lr 1580entry: 1581 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1582 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1583 %b = add <4 x i32> %a, %y 1584 ret <4 x i32> %b 1585} 1586 1587define arm_aapcs_vfpcc <8 x i16> @add_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1588; CHECK-LABEL: add_v8i16_y: 1589; CHECK: @ %bb.0: @ %entry 1590; CHECK-NEXT: vctp.16 r0 1591; CHECK-NEXT: vpst 1592; CHECK-NEXT: vaddt.i16 q1, q1, q0 1593; CHECK-NEXT: vmov q0, q1 1594; CHECK-NEXT: bx lr 1595entry: 1596 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1597 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1598 %b = add <8 x i16> %a, %y 1599 ret <8 x i16> %b 1600} 1601 1602define arm_aapcs_vfpcc <16 x i8> @add_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1603; CHECK-LABEL: add_v16i8_y: 1604; CHECK: @ %bb.0: @ %entry 1605; CHECK-NEXT: vctp.8 r0 1606; CHECK-NEXT: vpst 1607; CHECK-NEXT: vaddt.i8 q1, q1, q0 1608; CHECK-NEXT: vmov q0, q1 1609; CHECK-NEXT: bx lr 1610entry: 1611 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1612 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1613 %b = add <16 x i8> %a, %y 1614 ret <16 x i8> %b 1615} 1616 1617define arm_aapcs_vfpcc <4 x i32> @sub_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1618; CHECK-LABEL: sub_v4i32_y: 1619; CHECK: @ %bb.0: @ %entry 1620; CHECK-NEXT: vctp.32 r0 1621; CHECK-NEXT: vpst 1622; CHECK-NEXT: vsubt.i32 q1, q0, q1 1623; CHECK-NEXT: vmov q0, q1 1624; CHECK-NEXT: bx lr 1625entry: 1626 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1627 %a = sub <4 x i32> %x, %y 1628 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1629 ret <4 x i32> %b 1630} 1631 1632define arm_aapcs_vfpcc <8 x i16> @sub_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1633; CHECK-LABEL: sub_v8i16_y: 1634; CHECK: @ %bb.0: @ %entry 1635; CHECK-NEXT: vctp.16 r0 1636; CHECK-NEXT: vpst 1637; CHECK-NEXT: vsubt.i16 q1, q0, q1 1638; CHECK-NEXT: vmov q0, q1 1639; CHECK-NEXT: bx lr 1640entry: 1641 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1642 %a = sub <8 x i16> %x, %y 1643 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1644 ret <8 x i16> %b 1645} 1646 1647define arm_aapcs_vfpcc <16 x i8> @sub_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1648; CHECK-LABEL: sub_v16i8_y: 1649; CHECK: @ %bb.0: @ %entry 1650; CHECK-NEXT: vctp.8 r0 1651; CHECK-NEXT: vpst 1652; CHECK-NEXT: vsubt.i8 q1, q0, q1 1653; CHECK-NEXT: vmov q0, q1 1654; CHECK-NEXT: bx lr 1655entry: 1656 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1657 %a = sub <16 x i8> %x, %y 1658 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1659 ret <16 x i8> %b 1660} 1661 1662define arm_aapcs_vfpcc <4 x i32> @mul_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1663; CHECK-LABEL: mul_v4i32_y: 1664; CHECK: @ %bb.0: @ %entry 1665; CHECK-NEXT: vctp.32 r0 1666; CHECK-NEXT: vpst 1667; CHECK-NEXT: vmult.i32 q1, q1, q0 1668; CHECK-NEXT: vmov q0, q1 1669; CHECK-NEXT: bx lr 1670entry: 1671 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1672 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 1673 %b = mul <4 x i32> %a, %y 1674 ret <4 x i32> %b 1675} 1676 1677define arm_aapcs_vfpcc <8 x i16> @mul_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1678; CHECK-LABEL: mul_v8i16_y: 1679; CHECK: @ %bb.0: @ %entry 1680; CHECK-NEXT: vctp.16 r0 1681; CHECK-NEXT: vpst 1682; CHECK-NEXT: vmult.i16 q1, q1, q0 1683; CHECK-NEXT: vmov q0, q1 1684; CHECK-NEXT: bx lr 1685entry: 1686 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1687 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 1688 %b = mul <8 x i16> %a, %y 1689 ret <8 x i16> %b 1690} 1691 1692define arm_aapcs_vfpcc <16 x i8> @mul_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1693; CHECK-LABEL: mul_v16i8_y: 1694; CHECK: @ %bb.0: @ %entry 1695; CHECK-NEXT: vctp.8 r0 1696; CHECK-NEXT: vpst 1697; CHECK-NEXT: vmult.i8 q1, q1, q0 1698; CHECK-NEXT: vmov q0, q1 1699; CHECK-NEXT: bx lr 1700entry: 1701 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1702 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1703 %b = mul <16 x i8> %a, %y 1704 ret <16 x i8> %b 1705} 1706 1707define arm_aapcs_vfpcc <4 x i32> @and_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1708; CHECK-LABEL: and_v4i32_y: 1709; CHECK: @ %bb.0: @ %entry 1710; CHECK-NEXT: vctp.32 r0 1711; CHECK-NEXT: vpst 1712; CHECK-NEXT: vandt q1, q1, q0 1713; CHECK-NEXT: vmov q0, q1 1714; CHECK-NEXT: bx lr 1715entry: 1716 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1717 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 1718 %b = and <4 x i32> %a, %y 1719 ret <4 x i32> %b 1720} 1721 1722define arm_aapcs_vfpcc <8 x i16> @and_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1723; CHECK-LABEL: and_v8i16_y: 1724; CHECK: @ %bb.0: @ %entry 1725; CHECK-NEXT: vctp.16 r0 1726; CHECK-NEXT: vpst 1727; CHECK-NEXT: vandt q1, q1, q0 1728; CHECK-NEXT: vmov q0, q1 1729; CHECK-NEXT: bx lr 1730entry: 1731 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1732 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1733 %b = and <8 x i16> %a, %y 1734 ret <8 x i16> %b 1735} 1736 1737define arm_aapcs_vfpcc <16 x i8> @and_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1738; CHECK-LABEL: and_v16i8_y: 1739; CHECK: @ %bb.0: @ %entry 1740; CHECK-NEXT: vctp.8 r0 1741; CHECK-NEXT: vpst 1742; CHECK-NEXT: vandt q1, q1, q0 1743; CHECK-NEXT: vmov q0, q1 1744; CHECK-NEXT: bx lr 1745entry: 1746 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1747 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1748 %b = and <16 x i8> %a, %y 1749 ret <16 x i8> %b 1750} 1751 1752define arm_aapcs_vfpcc <4 x i32> @or_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1753; CHECK-LABEL: or_v4i32_y: 1754; CHECK: @ %bb.0: @ %entry 1755; CHECK-NEXT: vctp.32 r0 1756; CHECK-NEXT: vpst 1757; CHECK-NEXT: vorrt q1, q1, q0 1758; CHECK-NEXT: vmov q0, q1 1759; CHECK-NEXT: bx lr 1760entry: 1761 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1762 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1763 %b = or <4 x i32> %a, %y 1764 ret <4 x i32> %b 1765} 1766 1767define arm_aapcs_vfpcc <8 x i16> @or_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1768; CHECK-LABEL: or_v8i16_y: 1769; CHECK: @ %bb.0: @ %entry 1770; CHECK-NEXT: vctp.16 r0 1771; CHECK-NEXT: vpst 1772; CHECK-NEXT: vorrt q1, q1, q0 1773; CHECK-NEXT: vmov q0, q1 1774; CHECK-NEXT: bx lr 1775entry: 1776 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1777 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1778 %b = or <8 x i16> %a, %y 1779 ret <8 x i16> %b 1780} 1781 1782define arm_aapcs_vfpcc <16 x i8> @or_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1783; CHECK-LABEL: or_v16i8_y: 1784; CHECK: @ %bb.0: @ %entry 1785; CHECK-NEXT: vctp.8 r0 1786; CHECK-NEXT: vpst 1787; CHECK-NEXT: vorrt q1, q1, q0 1788; CHECK-NEXT: vmov q0, q1 1789; CHECK-NEXT: bx lr 1790entry: 1791 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1792 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1793 %b = or <16 x i8> %a, %y 1794 ret <16 x i8> %b 1795} 1796 1797define arm_aapcs_vfpcc <4 x i32> @xor_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1798; CHECK-LABEL: xor_v4i32_y: 1799; CHECK: @ %bb.0: @ %entry 1800; CHECK-NEXT: vctp.32 r0 1801; CHECK-NEXT: vpst 1802; CHECK-NEXT: veort q1, q1, q0 1803; CHECK-NEXT: vmov q0, q1 1804; CHECK-NEXT: bx lr 1805entry: 1806 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1807 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 1808 %b = xor <4 x i32> %a, %y 1809 ret <4 x i32> %b 1810} 1811 1812define arm_aapcs_vfpcc <8 x i16> @xor_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1813; CHECK-LABEL: xor_v8i16_y: 1814; CHECK: @ %bb.0: @ %entry 1815; CHECK-NEXT: vctp.16 r0 1816; CHECK-NEXT: vpst 1817; CHECK-NEXT: veort q1, q1, q0 1818; CHECK-NEXT: vmov q0, q1 1819; CHECK-NEXT: bx lr 1820entry: 1821 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1822 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 1823 %b = xor <8 x i16> %a, %y 1824 ret <8 x i16> %b 1825} 1826 1827define arm_aapcs_vfpcc <16 x i8> @xor_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1828; CHECK-LABEL: xor_v16i8_y: 1829; CHECK: @ %bb.0: @ %entry 1830; CHECK-NEXT: vctp.8 r0 1831; CHECK-NEXT: vpst 1832; CHECK-NEXT: veort q1, q1, q0 1833; CHECK-NEXT: vmov q0, q1 1834; CHECK-NEXT: bx lr 1835entry: 1836 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1837 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 1838 %b = xor <16 x i8> %a, %y 1839 ret <16 x i8> %b 1840} 1841 1842define arm_aapcs_vfpcc <4 x i32> @shl_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1843; CHECK-LABEL: shl_v4i32_y: 1844; CHECK: @ %bb.0: @ %entry 1845; CHECK-NEXT: vctp.32 r0 1846; CHECK-NEXT: vpst 1847; CHECK-NEXT: vshlt.u32 q1, q0, q1 1848; CHECK-NEXT: vmov q0, q1 1849; CHECK-NEXT: bx lr 1850entry: 1851 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1852 %a = shl <4 x i32> %x, %y 1853 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1854 ret <4 x i32> %b 1855} 1856 1857define arm_aapcs_vfpcc <8 x i16> @shl_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1858; CHECK-LABEL: shl_v8i16_y: 1859; CHECK: @ %bb.0: @ %entry 1860; CHECK-NEXT: vctp.16 r0 1861; CHECK-NEXT: vpst 1862; CHECK-NEXT: vshlt.u16 q1, q0, q1 1863; CHECK-NEXT: vmov q0, q1 1864; CHECK-NEXT: bx lr 1865entry: 1866 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1867 %a = shl <8 x i16> %x, %y 1868 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1869 ret <8 x i16> %b 1870} 1871 1872define arm_aapcs_vfpcc <16 x i8> @shl_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1873; CHECK-LABEL: shl_v16i8_y: 1874; CHECK: @ %bb.0: @ %entry 1875; CHECK-NEXT: vctp.8 r0 1876; CHECK-NEXT: vpst 1877; CHECK-NEXT: vshlt.u8 q1, q0, q1 1878; CHECK-NEXT: vmov q0, q1 1879; CHECK-NEXT: bx lr 1880entry: 1881 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1882 %a = shl <16 x i8> %x, %y 1883 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1884 ret <16 x i8> %b 1885} 1886 1887define arm_aapcs_vfpcc <4 x i32> @ashr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1888; CHECK-LABEL: ashr_v4i32_y: 1889; CHECK: @ %bb.0: @ %entry 1890; CHECK-NEXT: vneg.s32 q2, q1 1891; CHECK-NEXT: vctp.32 r0 1892; CHECK-NEXT: vpst 1893; CHECK-NEXT: vshlt.s32 q1, q0, q2 1894; CHECK-NEXT: vmov q0, q1 1895; CHECK-NEXT: bx lr 1896entry: 1897 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1898 %a = ashr <4 x i32> %x, %y 1899 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1900 ret <4 x i32> %b 1901} 1902 1903define arm_aapcs_vfpcc <8 x i16> @ashr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1904; CHECK-LABEL: ashr_v8i16_y: 1905; CHECK: @ %bb.0: @ %entry 1906; CHECK-NEXT: vneg.s16 q2, q1 1907; CHECK-NEXT: vctp.16 r0 1908; CHECK-NEXT: vpst 1909; CHECK-NEXT: vshlt.s16 q1, q0, q2 1910; CHECK-NEXT: vmov q0, q1 1911; CHECK-NEXT: bx lr 1912entry: 1913 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1914 %a = ashr <8 x i16> %x, %y 1915 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1916 ret <8 x i16> %b 1917} 1918 1919define arm_aapcs_vfpcc <16 x i8> @ashr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1920; CHECK-LABEL: ashr_v16i8_y: 1921; CHECK: @ %bb.0: @ %entry 1922; CHECK-NEXT: vneg.s8 q2, q1 1923; CHECK-NEXT: vctp.8 r0 1924; CHECK-NEXT: vpst 1925; CHECK-NEXT: vshlt.s8 q1, q0, q2 1926; CHECK-NEXT: vmov q0, q1 1927; CHECK-NEXT: bx lr 1928entry: 1929 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1930 %a = ashr <16 x i8> %x, %y 1931 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1932 ret <16 x i8> %b 1933} 1934 1935define arm_aapcs_vfpcc <4 x i32> @lshr_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1936; CHECK-LABEL: lshr_v4i32_y: 1937; CHECK: @ %bb.0: @ %entry 1938; CHECK-NEXT: vneg.s32 q2, q1 1939; CHECK-NEXT: vctp.32 r0 1940; CHECK-NEXT: vpst 1941; CHECK-NEXT: vshlt.u32 q1, q0, q2 1942; CHECK-NEXT: vmov q0, q1 1943; CHECK-NEXT: bx lr 1944entry: 1945 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1946 %a = lshr <4 x i32> %x, %y 1947 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1948 ret <4 x i32> %b 1949} 1950 1951define arm_aapcs_vfpcc <8 x i16> @lshr_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 1952; CHECK-LABEL: lshr_v8i16_y: 1953; CHECK: @ %bb.0: @ %entry 1954; CHECK-NEXT: vneg.s16 q2, q1 1955; CHECK-NEXT: vctp.16 r0 1956; CHECK-NEXT: vpst 1957; CHECK-NEXT: vshlt.u16 q1, q0, q2 1958; CHECK-NEXT: vmov q0, q1 1959; CHECK-NEXT: bx lr 1960entry: 1961 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 1962 %a = lshr <8 x i16> %x, %y 1963 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 1964 ret <8 x i16> %b 1965} 1966 1967define arm_aapcs_vfpcc <16 x i8> @lshr_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 1968; CHECK-LABEL: lshr_v16i8_y: 1969; CHECK: @ %bb.0: @ %entry 1970; CHECK-NEXT: vneg.s8 q2, q1 1971; CHECK-NEXT: vctp.8 r0 1972; CHECK-NEXT: vpst 1973; CHECK-NEXT: vshlt.u8 q1, q0, q2 1974; CHECK-NEXT: vmov q0, q1 1975; CHECK-NEXT: bx lr 1976entry: 1977 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 1978 %a = lshr <16 x i8> %x, %y 1979 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 1980 ret <16 x i8> %b 1981} 1982 1983define arm_aapcs_vfpcc <4 x i32> @andnot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 1984; CHECK-LABEL: andnot_v4i32_y: 1985; CHECK: @ %bb.0: @ %entry 1986; CHECK-NEXT: vctp.32 r0 1987; CHECK-NEXT: vpst 1988; CHECK-NEXT: vbict q1, q0, q1 1989; CHECK-NEXT: vmov q0, q1 1990; CHECK-NEXT: bx lr 1991entry: 1992 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 1993 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 1994 %a = and <4 x i32> %y1, %x 1995 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 1996 ret <4 x i32> %b 1997} 1998 1999define arm_aapcs_vfpcc <8 x i16> @andnot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2000; CHECK-LABEL: andnot_v8i16_y: 2001; CHECK: @ %bb.0: @ %entry 2002; CHECK-NEXT: vctp.16 r0 2003; CHECK-NEXT: vpst 2004; CHECK-NEXT: vbict q1, q0, q1 2005; CHECK-NEXT: vmov q0, q1 2006; CHECK-NEXT: bx lr 2007entry: 2008 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2009 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2010 %a = and <8 x i16> %y1, %x 2011 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2012 ret <8 x i16> %b 2013} 2014 2015define arm_aapcs_vfpcc <16 x i8> @andnot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2016; CHECK-LABEL: andnot_v16i8_y: 2017; CHECK: @ %bb.0: @ %entry 2018; CHECK-NEXT: vctp.8 r0 2019; CHECK-NEXT: vpst 2020; CHECK-NEXT: vbict q1, q0, q1 2021; CHECK-NEXT: vmov q0, q1 2022; CHECK-NEXT: bx lr 2023entry: 2024 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2025 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 2026 %a = and <16 x i8> %y1, %x 2027 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2028 ret <16 x i8> %b 2029} 2030 2031define arm_aapcs_vfpcc <4 x i32> @ornot_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2032; CHECK-LABEL: ornot_v4i32_y: 2033; CHECK: @ %bb.0: @ %entry 2034; CHECK-NEXT: vctp.32 r0 2035; CHECK-NEXT: vpst 2036; CHECK-NEXT: vornt q1, q0, q1 2037; CHECK-NEXT: vmov q0, q1 2038; CHECK-NEXT: bx lr 2039entry: 2040 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2041 %y1 = xor <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1> 2042 %a = or <4 x i32> %y1, %x 2043 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2044 ret <4 x i32> %b 2045} 2046 2047define arm_aapcs_vfpcc <8 x i16> @ornot_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2048; CHECK-LABEL: ornot_v8i16_y: 2049; CHECK: @ %bb.0: @ %entry 2050; CHECK-NEXT: vctp.16 r0 2051; CHECK-NEXT: vpst 2052; CHECK-NEXT: vornt q1, q0, q1 2053; CHECK-NEXT: vmov q0, q1 2054; CHECK-NEXT: bx lr 2055entry: 2056 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2057 %y1 = xor <8 x i16> %y, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2058 %a = or <8 x i16> %y1, %x 2059 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2060 ret <8 x i16> %b 2061} 2062 2063define arm_aapcs_vfpcc <16 x i8> @ornot_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2064; CHECK-LABEL: ornot_v16i8_y: 2065; CHECK: @ %bb.0: @ %entry 2066; CHECK-NEXT: vctp.8 r0 2067; CHECK-NEXT: vpst 2068; CHECK-NEXT: vornt q1, q0, q1 2069; CHECK-NEXT: vmov q0, q1 2070; CHECK-NEXT: bx lr 2071entry: 2072 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2073 %y1 = xor <16 x i8> %y, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 2074 %a = or <16 x i8> %y1, %x 2075 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2076 ret <16 x i8> %b 2077} 2078 2079define arm_aapcs_vfpcc <4 x float> @fadd_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2080; CHECK-LABEL: fadd_v4f32_y: 2081; CHECK: @ %bb.0: @ %entry 2082; CHECK-NEXT: vctp.32 r0 2083; CHECK-NEXT: vpst 2084; CHECK-NEXT: vaddt.f32 q1, q1, q0 2085; CHECK-NEXT: vmov q0, q1 2086; CHECK-NEXT: bx lr 2087entry: 2088 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2089 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 2090 %b = fadd <4 x float> %a, %y 2091 ret <4 x float> %b 2092} 2093 2094define arm_aapcs_vfpcc <8 x half> @fadd_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2095; CHECK-LABEL: fadd_v8f16_y: 2096; CHECK: @ %bb.0: @ %entry 2097; CHECK-NEXT: vctp.16 r0 2098; CHECK-NEXT: vpst 2099; CHECK-NEXT: vaddt.f16 q1, q1, q0 2100; CHECK-NEXT: vmov q0, q1 2101; CHECK-NEXT: bx lr 2102entry: 2103 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2104 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 2105 %b = fadd <8 x half> %a, %y 2106 ret <8 x half> %b 2107} 2108 2109define arm_aapcs_vfpcc <4 x float> @fsub_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2110; CHECK-LABEL: fsub_v4f32_y: 2111; CHECK: @ %bb.0: @ %entry 2112; CHECK-NEXT: vctp.32 r0 2113; CHECK-NEXT: vpst 2114; CHECK-NEXT: vsubt.f32 q1, q0, q1 2115; CHECK-NEXT: vmov q0, q1 2116; CHECK-NEXT: bx lr 2117entry: 2118 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2119 %a = fsub <4 x float> %x, %y 2120 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2121 ret <4 x float> %b 2122} 2123 2124define arm_aapcs_vfpcc <8 x half> @fsub_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2125; CHECK-LABEL: fsub_v8f16_y: 2126; CHECK: @ %bb.0: @ %entry 2127; CHECK-NEXT: vctp.16 r0 2128; CHECK-NEXT: vpst 2129; CHECK-NEXT: vsubt.f16 q1, q0, q1 2130; CHECK-NEXT: vmov q0, q1 2131; CHECK-NEXT: bx lr 2132entry: 2133 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2134 %a = fsub <8 x half> %x, %y 2135 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2136 ret <8 x half> %b 2137} 2138 2139define arm_aapcs_vfpcc <4 x float> @fmul_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2140; CHECK-LABEL: fmul_v4f32_y: 2141; CHECK: @ %bb.0: @ %entry 2142; CHECK-NEXT: vctp.32 r0 2143; CHECK-NEXT: vpst 2144; CHECK-NEXT: vmult.f32 q1, q1, q0 2145; CHECK-NEXT: vmov q0, q1 2146; CHECK-NEXT: bx lr 2147entry: 2148 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2149 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 2150 %b = fmul <4 x float> %a, %y 2151 ret <4 x float> %b 2152} 2153 2154define arm_aapcs_vfpcc <8 x half> @fmul_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2155; CHECK-LABEL: fmul_v8f16_y: 2156; CHECK: @ %bb.0: @ %entry 2157; CHECK-NEXT: vctp.16 r0 2158; CHECK-NEXT: vpst 2159; CHECK-NEXT: vmult.f16 q1, q1, q0 2160; CHECK-NEXT: vmov q0, q1 2161; CHECK-NEXT: bx lr 2162entry: 2163 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2164 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00> 2165 %b = fmul <8 x half> %a, %y 2166 ret <8 x half> %b 2167} 2168 2169define arm_aapcs_vfpcc <4 x float> @fdiv_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2170; CHECK-LABEL: fdiv_v4f32_y: 2171; CHECK: @ %bb.0: @ %entry 2172; CHECK-NEXT: vdiv.f32 s3, s3, s7 2173; CHECK-NEXT: vctp.32 r0 2174; CHECK-NEXT: vdiv.f32 s2, s2, s6 2175; CHECK-NEXT: vdiv.f32 s1, s1, s5 2176; CHECK-NEXT: vdiv.f32 s0, s0, s4 2177; CHECK-NEXT: vpst 2178; CHECK-NEXT: vmovt q1, q0 2179; CHECK-NEXT: vmov q0, q1 2180; CHECK-NEXT: bx lr 2181entry: 2182 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2183 %a = fdiv <4 x float> %x, %y 2184 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2185 ret <4 x float> %b 2186} 2187 2188define arm_aapcs_vfpcc <8 x half> @fdiv_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2189; CHECK-LABEL: fdiv_v8f16_y: 2190; CHECK: @ %bb.0: @ %entry 2191; CHECK-NEXT: vmovx.f16 s10, s0 2192; CHECK-NEXT: vmovx.f16 s8, s4 2193; CHECK-NEXT: vdiv.f16 s8, s10, s8 2194; CHECK-NEXT: vdiv.f16 s0, s0, s4 2195; CHECK-NEXT: vins.f16 s0, s8 2196; CHECK-NEXT: vmovx.f16 s10, s1 2197; CHECK-NEXT: vmovx.f16 s8, s5 2198; CHECK-NEXT: vdiv.f16 s1, s1, s5 2199; CHECK-NEXT: vdiv.f16 s8, s10, s8 2200; CHECK-NEXT: vmovx.f16 s10, s2 2201; CHECK-NEXT: vins.f16 s1, s8 2202; CHECK-NEXT: vmovx.f16 s8, s6 2203; CHECK-NEXT: vdiv.f16 s8, s10, s8 2204; CHECK-NEXT: vdiv.f16 s2, s2, s6 2205; CHECK-NEXT: vins.f16 s2, s8 2206; CHECK-NEXT: vmovx.f16 s10, s3 2207; CHECK-NEXT: vmovx.f16 s8, s7 2208; CHECK-NEXT: vdiv.f16 s3, s3, s7 2209; CHECK-NEXT: vdiv.f16 s8, s10, s8 2210; CHECK-NEXT: vctp.16 r0 2211; CHECK-NEXT: vins.f16 s3, s8 2212; CHECK-NEXT: vpst 2213; CHECK-NEXT: vmovt q1, q0 2214; CHECK-NEXT: vmov q0, q1 2215; CHECK-NEXT: bx lr 2216entry: 2217 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2218 %a = fdiv <8 x half> %x, %y 2219 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2220 ret <8 x half> %b 2221} 2222 2223define arm_aapcs_vfpcc <4 x float> @fmai_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 2224; CHECK-LABEL: fmai_v4f32_y: 2225; CHECK: @ %bb.0: @ %entry 2226; CHECK-NEXT: vfma.f32 q0, q1, q2 2227; CHECK-NEXT: vctp.32 r0 2228; CHECK-NEXT: vpst 2229; CHECK-NEXT: vmovt q1, q0 2230; CHECK-NEXT: vmov q0, q1 2231; CHECK-NEXT: bx lr 2232entry: 2233 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2234 %a = call <4 x float> @llvm.fma.v4f32(<4 x float> %y, <4 x float> %z, <4 x float> %x) 2235 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2236 ret <4 x float> %b 2237} 2238 2239define arm_aapcs_vfpcc <8 x half> @fmai_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 2240; CHECK-LABEL: fmai_v8f16_y: 2241; CHECK: @ %bb.0: @ %entry 2242; CHECK-NEXT: vfma.f16 q0, q1, q2 2243; CHECK-NEXT: vctp.16 r0 2244; CHECK-NEXT: vpst 2245; CHECK-NEXT: vmovt q1, q0 2246; CHECK-NEXT: vmov q0, q1 2247; CHECK-NEXT: bx lr 2248entry: 2249 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2250 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x) 2251 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2252 ret <8 x half> %b 2253} 2254 2255define arm_aapcs_vfpcc <4 x float> @fma_v4f32_y(<4 x float> %x, <4 x float> %y, <4 x float> %z, i32 %n) { 2256; CHECK-LABEL: fma_v4f32_y: 2257; CHECK: @ %bb.0: @ %entry 2258; CHECK-NEXT: vfma.f32 q0, q1, q2 2259; CHECK-NEXT: vctp.32 r0 2260; CHECK-NEXT: vpst 2261; CHECK-NEXT: vmovt q1, q0 2262; CHECK-NEXT: vmov q0, q1 2263; CHECK-NEXT: bx lr 2264entry: 2265 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2266 %m = fmul fast <4 x float> %y, %z 2267 %a = fadd fast <4 x float> %m, %x 2268 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %y 2269 ret <4 x float> %b 2270} 2271 2272define arm_aapcs_vfpcc <8 x half> @fma_v8f16_y(<8 x half> %x, <8 x half> %y, <8 x half> %z, i32 %n) { 2273; CHECK-LABEL: fma_v8f16_y: 2274; CHECK: @ %bb.0: @ %entry 2275; CHECK-NEXT: vfma.f16 q0, q1, q2 2276; CHECK-NEXT: vctp.16 r0 2277; CHECK-NEXT: vpst 2278; CHECK-NEXT: vmovt q1, q0 2279; CHECK-NEXT: vmov q0, q1 2280; CHECK-NEXT: bx lr 2281entry: 2282 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2283 %m = fmul fast <8 x half> %y, %z 2284 %a = fadd fast <8 x half> %m, %x 2285 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %y 2286 ret <8 x half> %b 2287} 2288 2289define arm_aapcs_vfpcc <4 x i32> @icmp_slt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2290; CHECK-LABEL: icmp_slt_v4i32_y: 2291; CHECK: @ %bb.0: @ %entry 2292; CHECK-NEXT: vctp.32 r0 2293; CHECK-NEXT: vpst 2294; CHECK-NEXT: vmint.s32 q1, q0, q1 2295; CHECK-NEXT: vmov q0, q1 2296; CHECK-NEXT: bx lr 2297entry: 2298 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2299 %a = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %x, <4 x i32> %y) 2300 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2301 ret <4 x i32> %b 2302} 2303 2304define arm_aapcs_vfpcc <8 x i16> @icmp_slt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2305; CHECK-LABEL: icmp_slt_v8i16_y: 2306; CHECK: @ %bb.0: @ %entry 2307; CHECK-NEXT: vctp.16 r0 2308; CHECK-NEXT: vpst 2309; CHECK-NEXT: vmint.s16 q1, q0, q1 2310; CHECK-NEXT: vmov q0, q1 2311; CHECK-NEXT: bx lr 2312entry: 2313 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2314 %a = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %x, <8 x i16> %y) 2315 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2316 ret <8 x i16> %b 2317} 2318 2319define arm_aapcs_vfpcc <16 x i8> @icmp_slt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2320; CHECK-LABEL: icmp_slt_v16i8_y: 2321; CHECK: @ %bb.0: @ %entry 2322; CHECK-NEXT: vctp.8 r0 2323; CHECK-NEXT: vpst 2324; CHECK-NEXT: vmint.s8 q1, q0, q1 2325; CHECK-NEXT: vmov q0, q1 2326; CHECK-NEXT: bx lr 2327entry: 2328 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2329 %a = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %x, <16 x i8> %y) 2330 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2331 ret <16 x i8> %b 2332} 2333 2334define arm_aapcs_vfpcc <4 x i32> @icmp_sgt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2335; CHECK-LABEL: icmp_sgt_v4i32_y: 2336; CHECK: @ %bb.0: @ %entry 2337; CHECK-NEXT: vctp.32 r0 2338; CHECK-NEXT: vpst 2339; CHECK-NEXT: vmaxt.s32 q1, q0, q1 2340; CHECK-NEXT: vmov q0, q1 2341; CHECK-NEXT: bx lr 2342entry: 2343 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2344 %a = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> %y) 2345 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2346 ret <4 x i32> %b 2347} 2348 2349define arm_aapcs_vfpcc <8 x i16> @icmp_sgt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2350; CHECK-LABEL: icmp_sgt_v8i16_y: 2351; CHECK: @ %bb.0: @ %entry 2352; CHECK-NEXT: vctp.16 r0 2353; CHECK-NEXT: vpst 2354; CHECK-NEXT: vmaxt.s16 q1, q0, q1 2355; CHECK-NEXT: vmov q0, q1 2356; CHECK-NEXT: bx lr 2357entry: 2358 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2359 %a = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %x, <8 x i16> %y) 2360 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2361 ret <8 x i16> %b 2362} 2363 2364define arm_aapcs_vfpcc <16 x i8> @icmp_sgt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2365; CHECK-LABEL: icmp_sgt_v16i8_y: 2366; CHECK: @ %bb.0: @ %entry 2367; CHECK-NEXT: vctp.8 r0 2368; CHECK-NEXT: vpst 2369; CHECK-NEXT: vmaxt.s8 q1, q0, q1 2370; CHECK-NEXT: vmov q0, q1 2371; CHECK-NEXT: bx lr 2372entry: 2373 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2374 %a = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %x, <16 x i8> %y) 2375 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2376 ret <16 x i8> %b 2377} 2378 2379define arm_aapcs_vfpcc <4 x i32> @icmp_ult_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2380; CHECK-LABEL: icmp_ult_v4i32_y: 2381; CHECK: @ %bb.0: @ %entry 2382; CHECK-NEXT: vctp.32 r0 2383; CHECK-NEXT: vpst 2384; CHECK-NEXT: vmint.u32 q1, q0, q1 2385; CHECK-NEXT: vmov q0, q1 2386; CHECK-NEXT: bx lr 2387entry: 2388 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2389 %a = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %x, <4 x i32> %y) 2390 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2391 ret <4 x i32> %b 2392} 2393 2394define arm_aapcs_vfpcc <8 x i16> @icmp_ult_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2395; CHECK-LABEL: icmp_ult_v8i16_y: 2396; CHECK: @ %bb.0: @ %entry 2397; CHECK-NEXT: vctp.16 r0 2398; CHECK-NEXT: vpst 2399; CHECK-NEXT: vmint.u16 q1, q0, q1 2400; CHECK-NEXT: vmov q0, q1 2401; CHECK-NEXT: bx lr 2402entry: 2403 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2404 %a = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %x, <8 x i16> %y) 2405 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2406 ret <8 x i16> %b 2407} 2408 2409define arm_aapcs_vfpcc <16 x i8> @icmp_ult_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2410; CHECK-LABEL: icmp_ult_v16i8_y: 2411; CHECK: @ %bb.0: @ %entry 2412; CHECK-NEXT: vctp.8 r0 2413; CHECK-NEXT: vpst 2414; CHECK-NEXT: vmint.u8 q1, q0, q1 2415; CHECK-NEXT: vmov q0, q1 2416; CHECK-NEXT: bx lr 2417entry: 2418 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2419 %a = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %x, <16 x i8> %y) 2420 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2421 ret <16 x i8> %b 2422} 2423 2424define arm_aapcs_vfpcc <4 x i32> @icmp_ugt_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2425; CHECK-LABEL: icmp_ugt_v4i32_y: 2426; CHECK: @ %bb.0: @ %entry 2427; CHECK-NEXT: vctp.32 r0 2428; CHECK-NEXT: vpst 2429; CHECK-NEXT: vmaxt.u32 q1, q0, q1 2430; CHECK-NEXT: vmov q0, q1 2431; CHECK-NEXT: bx lr 2432entry: 2433 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2434 %a = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %x, <4 x i32> %y) 2435 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2436 ret <4 x i32> %b 2437} 2438 2439define arm_aapcs_vfpcc <8 x i16> @icmp_ugt_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2440; CHECK-LABEL: icmp_ugt_v8i16_y: 2441; CHECK: @ %bb.0: @ %entry 2442; CHECK-NEXT: vctp.16 r0 2443; CHECK-NEXT: vpst 2444; CHECK-NEXT: vmaxt.u16 q1, q0, q1 2445; CHECK-NEXT: vmov q0, q1 2446; CHECK-NEXT: bx lr 2447entry: 2448 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2449 %a = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %x, <8 x i16> %y) 2450 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2451 ret <8 x i16> %b 2452} 2453 2454define arm_aapcs_vfpcc <16 x i8> @icmp_ugt_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2455; CHECK-LABEL: icmp_ugt_v16i8_y: 2456; CHECK: @ %bb.0: @ %entry 2457; CHECK-NEXT: vctp.8 r0 2458; CHECK-NEXT: vpst 2459; CHECK-NEXT: vmaxt.u8 q1, q0, q1 2460; CHECK-NEXT: vmov q0, q1 2461; CHECK-NEXT: bx lr 2462entry: 2463 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2464 %a = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %x, <16 x i8> %y) 2465 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2466 ret <16 x i8> %b 2467} 2468 2469define arm_aapcs_vfpcc <4 x float> @fcmp_fast_olt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2470; CHECK-LABEL: fcmp_fast_olt_v4f32_y: 2471; CHECK: @ %bb.0: @ %entry 2472; CHECK-NEXT: vctp.32 r0 2473; CHECK-NEXT: vpstt 2474; CHECK-NEXT: vcmpt.f32 gt, q1, q0 2475; CHECK-NEXT: vmovt q1, q0 2476; CHECK-NEXT: vmov q0, q1 2477; CHECK-NEXT: bx lr 2478entry: 2479 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2480 %a1 = fcmp fast olt <4 x float> %x, %y 2481 %0 = and <4 x i1> %c, %a1 2482 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y 2483 ret <4 x float> %b 2484} 2485 2486define arm_aapcs_vfpcc <8 x half> @fcmp_fast_olt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2487; CHECK-LABEL: fcmp_fast_olt_v8f16_y: 2488; CHECK: @ %bb.0: @ %entry 2489; CHECK-NEXT: vctp.16 r0 2490; CHECK-NEXT: vpstt 2491; CHECK-NEXT: vcmpt.f16 gt, q1, q0 2492; CHECK-NEXT: vmovt q1, q0 2493; CHECK-NEXT: vmov q0, q1 2494; CHECK-NEXT: bx lr 2495entry: 2496 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2497 %a1 = fcmp fast olt <8 x half> %x, %y 2498 %0 = and <8 x i1> %c, %a1 2499 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y 2500 ret <8 x half> %b 2501} 2502 2503define arm_aapcs_vfpcc <4 x float> @fcmp_fast_ogt_v4f32_y(<4 x float> %x, <4 x float> %y, i32 %n) { 2504; CHECK-LABEL: fcmp_fast_ogt_v4f32_y: 2505; CHECK: @ %bb.0: @ %entry 2506; CHECK-NEXT: vctp.32 r0 2507; CHECK-NEXT: vpstt 2508; CHECK-NEXT: vcmpt.f32 gt, q0, q1 2509; CHECK-NEXT: vmovt q1, q0 2510; CHECK-NEXT: vmov q0, q1 2511; CHECK-NEXT: bx lr 2512entry: 2513 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2514 %a1 = fcmp fast ogt <4 x float> %x, %y 2515 %0 = and <4 x i1> %c, %a1 2516 %b = select <4 x i1> %0, <4 x float> %x, <4 x float> %y 2517 ret <4 x float> %b 2518} 2519 2520define arm_aapcs_vfpcc <8 x half> @fcmp_fast_ogt_v8f16_y(<8 x half> %x, <8 x half> %y, i32 %n) { 2521; CHECK-LABEL: fcmp_fast_ogt_v8f16_y: 2522; CHECK: @ %bb.0: @ %entry 2523; CHECK-NEXT: vctp.16 r0 2524; CHECK-NEXT: vpstt 2525; CHECK-NEXT: vcmpt.f16 gt, q0, q1 2526; CHECK-NEXT: vmovt q1, q0 2527; CHECK-NEXT: vmov q0, q1 2528; CHECK-NEXT: bx lr 2529entry: 2530 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2531 %a1 = fcmp fast ogt <8 x half> %x, %y 2532 %0 = and <8 x i1> %c, %a1 2533 %b = select <8 x i1> %0, <8 x half> %x, <8 x half> %y 2534 ret <8 x half> %b 2535} 2536 2537define arm_aapcs_vfpcc <4 x i32> @sadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2538; CHECK-LABEL: sadd_sat_v4i32_y: 2539; CHECK: @ %bb.0: @ %entry 2540; CHECK-NEXT: vctp.32 r0 2541; CHECK-NEXT: vpst 2542; CHECK-NEXT: vqaddt.s32 q1, q0, q1 2543; CHECK-NEXT: vmov q0, q1 2544; CHECK-NEXT: bx lr 2545entry: 2546 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2547 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2548 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2549 ret <4 x i32> %b 2550} 2551 2552define arm_aapcs_vfpcc <8 x i16> @sadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2553; CHECK-LABEL: sadd_sat_v8i16_y: 2554; CHECK: @ %bb.0: @ %entry 2555; CHECK-NEXT: vctp.16 r0 2556; CHECK-NEXT: vpst 2557; CHECK-NEXT: vqaddt.s16 q1, q0, q1 2558; CHECK-NEXT: vmov q0, q1 2559; CHECK-NEXT: bx lr 2560entry: 2561 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2562 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2563 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2564 ret <8 x i16> %b 2565} 2566 2567define arm_aapcs_vfpcc <16 x i8> @sadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2568; CHECK-LABEL: sadd_sat_v16i8_y: 2569; CHECK: @ %bb.0: @ %entry 2570; CHECK-NEXT: vctp.8 r0 2571; CHECK-NEXT: vpst 2572; CHECK-NEXT: vqaddt.s8 q1, q0, q1 2573; CHECK-NEXT: vmov q0, q1 2574; CHECK-NEXT: bx lr 2575entry: 2576 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2577 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2578 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2579 ret <16 x i8> %b 2580} 2581 2582define arm_aapcs_vfpcc <4 x i32> @uadd_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2583; CHECK-LABEL: uadd_sat_v4i32_y: 2584; CHECK: @ %bb.0: @ %entry 2585; CHECK-NEXT: vctp.32 r0 2586; CHECK-NEXT: vpst 2587; CHECK-NEXT: vqaddt.u32 q1, q0, q1 2588; CHECK-NEXT: vmov q0, q1 2589; CHECK-NEXT: bx lr 2590entry: 2591 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2592 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2593 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2594 ret <4 x i32> %b 2595} 2596 2597define arm_aapcs_vfpcc <8 x i16> @uadd_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2598; CHECK-LABEL: uadd_sat_v8i16_y: 2599; CHECK: @ %bb.0: @ %entry 2600; CHECK-NEXT: vctp.16 r0 2601; CHECK-NEXT: vpst 2602; CHECK-NEXT: vqaddt.u16 q1, q0, q1 2603; CHECK-NEXT: vmov q0, q1 2604; CHECK-NEXT: bx lr 2605entry: 2606 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2607 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2608 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2609 ret <8 x i16> %b 2610} 2611 2612define arm_aapcs_vfpcc <16 x i8> @uadd_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2613; CHECK-LABEL: uadd_sat_v16i8_y: 2614; CHECK: @ %bb.0: @ %entry 2615; CHECK-NEXT: vctp.8 r0 2616; CHECK-NEXT: vpst 2617; CHECK-NEXT: vqaddt.u8 q1, q0, q1 2618; CHECK-NEXT: vmov q0, q1 2619; CHECK-NEXT: bx lr 2620entry: 2621 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2622 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2623 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2624 ret <16 x i8> %b 2625} 2626 2627define arm_aapcs_vfpcc <4 x i32> @ssub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2628; CHECK-LABEL: ssub_sat_v4i32_y: 2629; CHECK: @ %bb.0: @ %entry 2630; CHECK-NEXT: vctp.32 r0 2631; CHECK-NEXT: vpst 2632; CHECK-NEXT: vqsubt.s32 q1, q0, q1 2633; CHECK-NEXT: vmov q0, q1 2634; CHECK-NEXT: bx lr 2635entry: 2636 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2637 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2638 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2639 ret <4 x i32> %b 2640} 2641 2642define arm_aapcs_vfpcc <8 x i16> @ssub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2643; CHECK-LABEL: ssub_sat_v8i16_y: 2644; CHECK: @ %bb.0: @ %entry 2645; CHECK-NEXT: vctp.16 r0 2646; CHECK-NEXT: vpst 2647; CHECK-NEXT: vqsubt.s16 q1, q0, q1 2648; CHECK-NEXT: vmov q0, q1 2649; CHECK-NEXT: bx lr 2650entry: 2651 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2652 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2653 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2654 ret <8 x i16> %b 2655} 2656 2657define arm_aapcs_vfpcc <16 x i8> @ssub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2658; CHECK-LABEL: ssub_sat_v16i8_y: 2659; CHECK: @ %bb.0: @ %entry 2660; CHECK-NEXT: vctp.8 r0 2661; CHECK-NEXT: vpst 2662; CHECK-NEXT: vqsubt.s8 q1, q0, q1 2663; CHECK-NEXT: vmov q0, q1 2664; CHECK-NEXT: bx lr 2665entry: 2666 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2667 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2668 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2669 ret <16 x i8> %b 2670} 2671 2672define arm_aapcs_vfpcc <4 x i32> @usub_sat_v4i32_y(<4 x i32> %x, <4 x i32> %y, i32 %n) { 2673; CHECK-LABEL: usub_sat_v4i32_y: 2674; CHECK: @ %bb.0: @ %entry 2675; CHECK-NEXT: vctp.32 r0 2676; CHECK-NEXT: vpst 2677; CHECK-NEXT: vqsubt.u32 q1, q0, q1 2678; CHECK-NEXT: vmov q0, q1 2679; CHECK-NEXT: bx lr 2680entry: 2681 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2682 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %y) 2683 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %y 2684 ret <4 x i32> %b 2685} 2686 2687define arm_aapcs_vfpcc <8 x i16> @usub_sat_v8i16_y(<8 x i16> %x, <8 x i16> %y, i32 %n) { 2688; CHECK-LABEL: usub_sat_v8i16_y: 2689; CHECK: @ %bb.0: @ %entry 2690; CHECK-NEXT: vctp.16 r0 2691; CHECK-NEXT: vpst 2692; CHECK-NEXT: vqsubt.u16 q1, q0, q1 2693; CHECK-NEXT: vmov q0, q1 2694; CHECK-NEXT: bx lr 2695entry: 2696 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2697 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %y) 2698 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %y 2699 ret <8 x i16> %b 2700} 2701 2702define arm_aapcs_vfpcc <16 x i8> @usub_sat_v16i8_y(<16 x i8> %x, <16 x i8> %y, i32 %n) { 2703; CHECK-LABEL: usub_sat_v16i8_y: 2704; CHECK: @ %bb.0: @ %entry 2705; CHECK-NEXT: vctp.8 r0 2706; CHECK-NEXT: vpst 2707; CHECK-NEXT: vqsubt.u8 q1, q0, q1 2708; CHECK-NEXT: vmov q0, q1 2709; CHECK-NEXT: bx lr 2710entry: 2711 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2712 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %y) 2713 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %y 2714 ret <16 x i8> %b 2715} 2716 2717define arm_aapcs_vfpcc <4 x i32> @addqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2718; CHECK-LABEL: addqr_v4i32_y: 2719; CHECK: @ %bb.0: @ %entry 2720; CHECK-NEXT: vdup.32 q1, r0 2721; CHECK-NEXT: vctp.32 r1 2722; CHECK-NEXT: vpst 2723; CHECK-NEXT: vaddt.i32 q1, q0, r0 2724; CHECK-NEXT: vmov q0, q1 2725; CHECK-NEXT: bx lr 2726entry: 2727 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2728 %i = insertelement <4 x i32> undef, i32 %y, i64 0 2729 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2730 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> zeroinitializer 2731 %b = add <4 x i32> %ys, %a 2732 ret <4 x i32> %b 2733} 2734 2735define arm_aapcs_vfpcc <8 x i16> @addqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2736; CHECK-LABEL: addqr_v8i16_y: 2737; CHECK: @ %bb.0: @ %entry 2738; CHECK-NEXT: vdup.16 q1, r0 2739; CHECK-NEXT: vctp.16 r1 2740; CHECK-NEXT: vpst 2741; CHECK-NEXT: vaddt.i16 q1, q0, r0 2742; CHECK-NEXT: vmov q0, q1 2743; CHECK-NEXT: bx lr 2744entry: 2745 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2746 %i = insertelement <8 x i16> undef, i16 %y, i64 0 2747 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2748 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> zeroinitializer 2749 %b = add <8 x i16> %ys, %a 2750 ret <8 x i16> %b 2751} 2752 2753define arm_aapcs_vfpcc <16 x i8> @addqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2754; CHECK-LABEL: addqr_v16i8_y: 2755; CHECK: @ %bb.0: @ %entry 2756; CHECK-NEXT: vdup.8 q1, r0 2757; CHECK-NEXT: vctp.8 r1 2758; CHECK-NEXT: vpst 2759; CHECK-NEXT: vaddt.i8 q1, q0, r0 2760; CHECK-NEXT: vmov q0, q1 2761; CHECK-NEXT: bx lr 2762entry: 2763 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2764 %i = insertelement <16 x i8> undef, i8 %y, i64 0 2765 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2766 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> zeroinitializer 2767 %b = add <16 x i8> %ys, %a 2768 ret <16 x i8> %b 2769} 2770 2771define arm_aapcs_vfpcc <4 x i32> @subqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2772; CHECK-LABEL: subqr_v4i32_y: 2773; CHECK: @ %bb.0: @ %entry 2774; CHECK-NEXT: vdup.32 q1, r0 2775; CHECK-NEXT: vctp.32 r1 2776; CHECK-NEXT: vpst 2777; CHECK-NEXT: vsubt.i32 q1, q0, r0 2778; CHECK-NEXT: vmov q0, q1 2779; CHECK-NEXT: bx lr 2780entry: 2781 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2782 %i = insertelement <4 x i32> undef, i32 %y, i64 0 2783 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2784 %a = sub <4 x i32> %x, %ys 2785 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 2786 ret <4 x i32> %b 2787} 2788 2789define arm_aapcs_vfpcc <8 x i16> @subqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2790; CHECK-LABEL: subqr_v8i16_y: 2791; CHECK: @ %bb.0: @ %entry 2792; CHECK-NEXT: vdup.16 q1, r0 2793; CHECK-NEXT: vctp.16 r1 2794; CHECK-NEXT: vpst 2795; CHECK-NEXT: vsubt.i16 q1, q0, r0 2796; CHECK-NEXT: vmov q0, q1 2797; CHECK-NEXT: bx lr 2798entry: 2799 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2800 %i = insertelement <8 x i16> undef, i16 %y, i64 0 2801 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2802 %a = sub <8 x i16> %x, %ys 2803 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 2804 ret <8 x i16> %b 2805} 2806 2807define arm_aapcs_vfpcc <16 x i8> @subqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2808; CHECK-LABEL: subqr_v16i8_y: 2809; CHECK: @ %bb.0: @ %entry 2810; CHECK-NEXT: vdup.8 q1, r0 2811; CHECK-NEXT: vctp.8 r1 2812; CHECK-NEXT: vpst 2813; CHECK-NEXT: vsubt.i8 q1, q0, r0 2814; CHECK-NEXT: vmov q0, q1 2815; CHECK-NEXT: bx lr 2816entry: 2817 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2818 %i = insertelement <16 x i8> undef, i8 %y, i64 0 2819 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2820 %a = sub <16 x i8> %x, %ys 2821 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 2822 ret <16 x i8> %b 2823} 2824 2825define arm_aapcs_vfpcc <4 x i32> @mulqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2826; CHECK-LABEL: mulqr_v4i32_y: 2827; CHECK: @ %bb.0: @ %entry 2828; CHECK-NEXT: vdup.32 q1, r0 2829; CHECK-NEXT: vctp.32 r1 2830; CHECK-NEXT: vpst 2831; CHECK-NEXT: vmult.i32 q1, q0, r0 2832; CHECK-NEXT: vmov q0, q1 2833; CHECK-NEXT: bx lr 2834entry: 2835 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2836 %i = insertelement <4 x i32> undef, i32 %y, i64 0 2837 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 2838 %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 2839 %b = mul <4 x i32> %ys, %a 2840 ret <4 x i32> %b 2841} 2842 2843define arm_aapcs_vfpcc <8 x i16> @mulqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 2844; CHECK-LABEL: mulqr_v8i16_y: 2845; CHECK: @ %bb.0: @ %entry 2846; CHECK-NEXT: vdup.16 q1, r0 2847; CHECK-NEXT: vctp.16 r1 2848; CHECK-NEXT: vpst 2849; CHECK-NEXT: vmult.i16 q1, q0, r0 2850; CHECK-NEXT: vmov q0, q1 2851; CHECK-NEXT: bx lr 2852entry: 2853 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2854 %i = insertelement <8 x i16> undef, i16 %y, i64 0 2855 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 2856 %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 2857 %b = mul <8 x i16> %ys, %a 2858 ret <8 x i16> %b 2859} 2860 2861define arm_aapcs_vfpcc <16 x i8> @mulqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 2862; CHECK-LABEL: mulqr_v16i8_y: 2863; CHECK: @ %bb.0: @ %entry 2864; CHECK-NEXT: vdup.8 q1, r0 2865; CHECK-NEXT: vctp.8 r1 2866; CHECK-NEXT: vpst 2867; CHECK-NEXT: vmult.i8 q1, q0, r0 2868; CHECK-NEXT: vmov q0, q1 2869; CHECK-NEXT: bx lr 2870entry: 2871 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 2872 %i = insertelement <16 x i8> undef, i8 %y, i64 0 2873 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 2874 %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 2875 %b = mul <16 x i8> %ys, %a 2876 ret <16 x i8> %b 2877} 2878 2879define arm_aapcs_vfpcc <4 x float> @faddqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2880; CHECK-LABEL: faddqr_v4f32_y: 2881; CHECK: @ %bb.0: @ %entry 2882; CHECK-NEXT: vmov r1, s4 2883; CHECK-NEXT: vctp.32 r0 2884; CHECK-NEXT: vdup.32 q1, r1 2885; CHECK-NEXT: vpst 2886; CHECK-NEXT: vaddt.f32 q1, q0, r1 2887; CHECK-NEXT: vmov q0, q1 2888; CHECK-NEXT: bx lr 2889entry: 2890 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2891 %i = insertelement <4 x float> undef, float %y, i64 0 2892 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2893 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> 2894 %b = fadd <4 x float> %ys, %a 2895 ret <4 x float> %b 2896} 2897 2898define arm_aapcs_vfpcc <8 x half> @faddqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2899; CHECK-LABEL: faddqr_v8f16_y: 2900; CHECK: @ %bb.0: @ %entry 2901; CHECK-NEXT: vmov.f16 r1, s4 2902; CHECK-NEXT: vctp.16 r0 2903; CHECK-NEXT: vdup.16 q1, r1 2904; CHECK-NEXT: vpst 2905; CHECK-NEXT: vaddt.f16 q1, q0, r1 2906; CHECK-NEXT: vmov q0, q1 2907; CHECK-NEXT: bx lr 2908entry: 2909 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2910 %i = insertelement <8 x half> undef, half %y, i64 0 2911 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2912 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000> 2913 %b = fadd <8 x half> %ys, %a 2914 ret <8 x half> %b 2915} 2916 2917define arm_aapcs_vfpcc <4 x float> @fsubqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2918; CHECK-LABEL: fsubqr_v4f32_y: 2919; CHECK: @ %bb.0: @ %entry 2920; CHECK-NEXT: vmov r1, s4 2921; CHECK-NEXT: vctp.32 r0 2922; CHECK-NEXT: vdup.32 q1, r1 2923; CHECK-NEXT: vpst 2924; CHECK-NEXT: vsubt.f32 q1, q0, r1 2925; CHECK-NEXT: vmov q0, q1 2926; CHECK-NEXT: bx lr 2927entry: 2928 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2929 %i = insertelement <4 x float> undef, float %y, i64 0 2930 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2931 %a = fsub <4 x float> %x, %ys 2932 %b = select <4 x i1> %c, <4 x float> %a, <4 x float> %ys 2933 ret <4 x float> %b 2934} 2935 2936define arm_aapcs_vfpcc <8 x half> @fsubqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2937; CHECK-LABEL: fsubqr_v8f16_y: 2938; CHECK: @ %bb.0: @ %entry 2939; CHECK-NEXT: vmov.f16 r1, s4 2940; CHECK-NEXT: vctp.16 r0 2941; CHECK-NEXT: vdup.16 q1, r1 2942; CHECK-NEXT: vpst 2943; CHECK-NEXT: vsubt.f16 q1, q0, r1 2944; CHECK-NEXT: vmov q0, q1 2945; CHECK-NEXT: bx lr 2946entry: 2947 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2948 %i = insertelement <8 x half> undef, half %y, i64 0 2949 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2950 %a = fsub <8 x half> %x, %ys 2951 %b = select <8 x i1> %c, <8 x half> %a, <8 x half> %ys 2952 ret <8 x half> %b 2953} 2954 2955define arm_aapcs_vfpcc <4 x float> @fmulqr_v4f32_y(<4 x float> %x, float %y, i32 %n) { 2956; CHECK-LABEL: fmulqr_v4f32_y: 2957; CHECK: @ %bb.0: @ %entry 2958; CHECK-NEXT: vmov r1, s4 2959; CHECK-NEXT: vctp.32 r0 2960; CHECK-NEXT: vdup.32 q1, r1 2961; CHECK-NEXT: vpst 2962; CHECK-NEXT: vmult.f32 q1, q0, r1 2963; CHECK-NEXT: vmov q0, q1 2964; CHECK-NEXT: bx lr 2965entry: 2966 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 2967 %i = insertelement <4 x float> undef, float %y, i64 0 2968 %ys = shufflevector <4 x float> %i, <4 x float> undef, <4 x i32> zeroinitializer 2969 %a = select <4 x i1> %c, <4 x float> %x, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 2970 %b = fmul <4 x float> %ys, %a 2971 ret <4 x float> %b 2972} 2973 2974define arm_aapcs_vfpcc <8 x half> @fmulqr_v8f16_y(<8 x half> %x, half %y, i32 %n) { 2975; CHECK-LABEL: fmulqr_v8f16_y: 2976; CHECK: @ %bb.0: @ %entry 2977; CHECK-NEXT: vmov.f16 r1, s4 2978; CHECK-NEXT: vctp.16 r0 2979; CHECK-NEXT: vdup.16 q1, r1 2980; CHECK-NEXT: vpst 2981; CHECK-NEXT: vmult.f16 q1, q0, r1 2982; CHECK-NEXT: vmov q0, q1 2983; CHECK-NEXT: bx lr 2984entry: 2985 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 2986 %i = insertelement <8 x half> undef, half %y, i64 0 2987 %ys = shufflevector <8 x half> %i, <8 x half> undef, <8 x i32> zeroinitializer 2988 %a = select <8 x i1> %c, <8 x half> %x, <8 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00> 2989 %b = fmul <8 x half> %ys, %a 2990 ret <8 x half> %b 2991} 2992 2993define arm_aapcs_vfpcc <4 x i32> @sadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 2994; CHECK-LABEL: sadd_satqr_v4i32_y: 2995; CHECK: @ %bb.0: @ %entry 2996; CHECK-NEXT: vdup.32 q1, r0 2997; CHECK-NEXT: vctp.32 r1 2998; CHECK-NEXT: vpst 2999; CHECK-NEXT: vqaddt.s32 q1, q0, r0 3000; CHECK-NEXT: vmov q0, q1 3001; CHECK-NEXT: bx lr 3002entry: 3003 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3004 %i = insertelement <4 x i32> undef, i32 %y, i64 0 3005 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3006 %a = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3007 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3008 ret <4 x i32> %b 3009} 3010 3011define arm_aapcs_vfpcc <8 x i16> @sadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3012; CHECK-LABEL: sadd_satqr_v8i16_y: 3013; CHECK: @ %bb.0: @ %entry 3014; CHECK-NEXT: vdup.16 q1, r0 3015; CHECK-NEXT: vctp.16 r1 3016; CHECK-NEXT: vpst 3017; CHECK-NEXT: vqaddt.s16 q1, q0, r0 3018; CHECK-NEXT: vmov q0, q1 3019; CHECK-NEXT: bx lr 3020entry: 3021 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3022 %i = insertelement <8 x i16> undef, i16 %y, i64 0 3023 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3024 %a = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3025 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3026 ret <8 x i16> %b 3027} 3028 3029define arm_aapcs_vfpcc <16 x i8> @sadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3030; CHECK-LABEL: sadd_satqr_v16i8_y: 3031; CHECK: @ %bb.0: @ %entry 3032; CHECK-NEXT: vdup.8 q1, r0 3033; CHECK-NEXT: vctp.8 r1 3034; CHECK-NEXT: vpst 3035; CHECK-NEXT: vqaddt.s8 q1, q0, r0 3036; CHECK-NEXT: vmov q0, q1 3037; CHECK-NEXT: bx lr 3038entry: 3039 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3040 %i = insertelement <16 x i8> undef, i8 %y, i64 0 3041 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3042 %a = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3043 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3044 ret <16 x i8> %b 3045} 3046 3047define arm_aapcs_vfpcc <4 x i32> @uadd_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3048; CHECK-LABEL: uadd_satqr_v4i32_y: 3049; CHECK: @ %bb.0: @ %entry 3050; CHECK-NEXT: vdup.32 q1, r0 3051; CHECK-NEXT: vctp.32 r1 3052; CHECK-NEXT: vpst 3053; CHECK-NEXT: vqaddt.u32 q1, q0, r0 3054; CHECK-NEXT: vmov q0, q1 3055; CHECK-NEXT: bx lr 3056entry: 3057 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3058 %i = insertelement <4 x i32> undef, i32 %y, i64 0 3059 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3060 %a = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3061 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3062 ret <4 x i32> %b 3063} 3064 3065define arm_aapcs_vfpcc <8 x i16> @uadd_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3066; CHECK-LABEL: uadd_satqr_v8i16_y: 3067; CHECK: @ %bb.0: @ %entry 3068; CHECK-NEXT: vdup.16 q1, r0 3069; CHECK-NEXT: vctp.16 r1 3070; CHECK-NEXT: vpst 3071; CHECK-NEXT: vqaddt.u16 q1, q0, r0 3072; CHECK-NEXT: vmov q0, q1 3073; CHECK-NEXT: bx lr 3074entry: 3075 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3076 %i = insertelement <8 x i16> undef, i16 %y, i64 0 3077 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3078 %a = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3079 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3080 ret <8 x i16> %b 3081} 3082 3083define arm_aapcs_vfpcc <16 x i8> @uadd_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3084; CHECK-LABEL: uadd_satqr_v16i8_y: 3085; CHECK: @ %bb.0: @ %entry 3086; CHECK-NEXT: vdup.8 q1, r0 3087; CHECK-NEXT: vctp.8 r1 3088; CHECK-NEXT: vpst 3089; CHECK-NEXT: vqaddt.u8 q1, q0, r0 3090; CHECK-NEXT: vmov q0, q1 3091; CHECK-NEXT: bx lr 3092entry: 3093 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3094 %i = insertelement <16 x i8> undef, i8 %y, i64 0 3095 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3096 %a = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3097 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3098 ret <16 x i8> %b 3099} 3100 3101define arm_aapcs_vfpcc <4 x i32> @ssub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3102; CHECK-LABEL: ssub_satqr_v4i32_y: 3103; CHECK: @ %bb.0: @ %entry 3104; CHECK-NEXT: vdup.32 q1, r0 3105; CHECK-NEXT: vctp.32 r1 3106; CHECK-NEXT: vpst 3107; CHECK-NEXT: vqsubt.s32 q1, q0, r0 3108; CHECK-NEXT: vmov q0, q1 3109; CHECK-NEXT: bx lr 3110entry: 3111 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3112 %i = insertelement <4 x i32> undef, i32 %y, i64 0 3113 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3114 %a = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3115 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3116 ret <4 x i32> %b 3117} 3118 3119define arm_aapcs_vfpcc <8 x i16> @ssub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3120; CHECK-LABEL: ssub_satqr_v8i16_y: 3121; CHECK: @ %bb.0: @ %entry 3122; CHECK-NEXT: vdup.16 q1, r0 3123; CHECK-NEXT: vctp.16 r1 3124; CHECK-NEXT: vpst 3125; CHECK-NEXT: vqsubt.s16 q1, q0, r0 3126; CHECK-NEXT: vmov q0, q1 3127; CHECK-NEXT: bx lr 3128entry: 3129 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3130 %i = insertelement <8 x i16> undef, i16 %y, i64 0 3131 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3132 %a = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3133 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3134 ret <8 x i16> %b 3135} 3136 3137define arm_aapcs_vfpcc <16 x i8> @ssub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3138; CHECK-LABEL: ssub_satqr_v16i8_y: 3139; CHECK: @ %bb.0: @ %entry 3140; CHECK-NEXT: vdup.8 q1, r0 3141; CHECK-NEXT: vctp.8 r1 3142; CHECK-NEXT: vpst 3143; CHECK-NEXT: vqsubt.s8 q1, q0, r0 3144; CHECK-NEXT: vmov q0, q1 3145; CHECK-NEXT: bx lr 3146entry: 3147 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3148 %i = insertelement <16 x i8> undef, i8 %y, i64 0 3149 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3150 %a = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3151 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3152 ret <16 x i8> %b 3153} 3154 3155define arm_aapcs_vfpcc <4 x i32> @usub_satqr_v4i32_y(<4 x i32> %x, i32 %y, i32 %n) { 3156; CHECK-LABEL: usub_satqr_v4i32_y: 3157; CHECK: @ %bb.0: @ %entry 3158; CHECK-NEXT: vdup.32 q1, r0 3159; CHECK-NEXT: vctp.32 r1 3160; CHECK-NEXT: vpst 3161; CHECK-NEXT: vqsubt.u32 q1, q0, r0 3162; CHECK-NEXT: vmov q0, q1 3163; CHECK-NEXT: bx lr 3164entry: 3165 %c = call <4 x i1> @llvm.arm.mve.vctp32(i32 %n) 3166 %i = insertelement <4 x i32> undef, i32 %y, i64 0 3167 %ys = shufflevector <4 x i32> %i, <4 x i32> undef, <4 x i32> zeroinitializer 3168 %a = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %x, <4 x i32> %ys) 3169 %b = select <4 x i1> %c, <4 x i32> %a, <4 x i32> %ys 3170 ret <4 x i32> %b 3171} 3172 3173define arm_aapcs_vfpcc <8 x i16> @usub_satqr_v8i16_y(<8 x i16> %x, i16 %y, i32 %n) { 3174; CHECK-LABEL: usub_satqr_v8i16_y: 3175; CHECK: @ %bb.0: @ %entry 3176; CHECK-NEXT: vdup.16 q1, r0 3177; CHECK-NEXT: vctp.16 r1 3178; CHECK-NEXT: vpst 3179; CHECK-NEXT: vqsubt.u16 q1, q0, r0 3180; CHECK-NEXT: vmov q0, q1 3181; CHECK-NEXT: bx lr 3182entry: 3183 %c = call <8 x i1> @llvm.arm.mve.vctp16(i32 %n) 3184 %i = insertelement <8 x i16> undef, i16 %y, i64 0 3185 %ys = shufflevector <8 x i16> %i, <8 x i16> undef, <8 x i32> zeroinitializer 3186 %a = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %x, <8 x i16> %ys) 3187 %b = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %ys 3188 ret <8 x i16> %b 3189} 3190 3191define arm_aapcs_vfpcc <16 x i8> @usub_satqr_v16i8_y(<16 x i8> %x, i8 %y, i32 %n) { 3192; CHECK-LABEL: usub_satqr_v16i8_y: 3193; CHECK: @ %bb.0: @ %entry 3194; CHECK-NEXT: vdup.8 q1, r0 3195; CHECK-NEXT: vctp.8 r1 3196; CHECK-NEXT: vpst 3197; CHECK-NEXT: vqsubt.u8 q1, q0, r0 3198; CHECK-NEXT: vmov q0, q1 3199; CHECK-NEXT: bx lr 3200entry: 3201 %c = call <16 x i1> @llvm.arm.mve.vctp8(i32 %n) 3202 %i = insertelement <16 x i8> undef, i8 %y, i64 0 3203 %ys = shufflevector <16 x i8> %i, <16 x i8> undef, <16 x i32> zeroinitializer 3204 %a = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %x, <16 x i8> %ys) 3205 %b = select <16 x i1> %c, <16 x i8> %a, <16 x i8> %ys 3206 ret <16 x i8> %b 3207} 3208 3209declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) 3210declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) 3211declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) 3212declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) 3213declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) 3214declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) 3215declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) 3216declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) 3217declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) 3218declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) 3219declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) 3220declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) 3221 3222declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) 3223declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) 3224declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) 3225declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) 3226declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) 3227declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) 3228declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) 3229declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) 3230declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) 3231declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) 3232declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) 3233declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) 3234 3235declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) 3236declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 3237 3238declare <16 x i1> @llvm.arm.mve.vctp8(i32) 3239declare <8 x i1> @llvm.arm.mve.vctp16(i32) 3240declare <4 x i1> @llvm.arm.mve.vctp32(i32) 3241