1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -mattr=+neon,+sve2 -verify-machineinstrs %s -o - | FileCheck %s 3 4define <vscale x 4 x i32> @add_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 5; CHECK-LABEL: add_v4i32: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: ptrue p0.s 8; CHECK-NEXT: add z1.s, z1.s, z2.s 9; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 10; CHECK-NEXT: mov z0.s, p0/m, z1.s 11; CHECK-NEXT: ret 12entry: 13 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 14 %a = add <vscale x 4 x i32> %x, %y 15 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 16 ret <vscale x 4 x i32> %b 17} 18 19define <vscale x 8 x i16> @add_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 20; CHECK-LABEL: add_v8i16: 21; CHECK: // %bb.0: // %entry 22; CHECK-NEXT: ptrue p0.h 23; CHECK-NEXT: add z1.h, z1.h, z2.h 24; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 25; CHECK-NEXT: mov z0.h, p0/m, z1.h 26; CHECK-NEXT: ret 27entry: 28 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 29 %a = add <vscale x 8 x i16> %x, %y 30 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 31 ret <vscale x 8 x i16> %b 32} 33 34define <vscale x 16 x i8> @add_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 35; CHECK-LABEL: add_v16i8: 36; CHECK: // %bb.0: // %entry 37; CHECK-NEXT: ptrue p0.b 38; CHECK-NEXT: add z1.b, z1.b, z2.b 39; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 40; CHECK-NEXT: mov z0.b, p0/m, z1.b 41; CHECK-NEXT: ret 42entry: 43 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 44 %a = add <vscale x 16 x i8> %x, %y 45 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 46 ret <vscale x 16 x i8> %b 47} 48 49define <vscale x 4 x i32> @sub_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 50; CHECK-LABEL: sub_v4i32: 51; CHECK: // %bb.0: // %entry 52; CHECK-NEXT: ptrue p0.s 53; CHECK-NEXT: sub z1.s, z1.s, z2.s 54; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 55; CHECK-NEXT: mov z0.s, p0/m, z1.s 56; CHECK-NEXT: ret 57entry: 58 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 59 %a = sub <vscale x 4 x i32> %x, %y 60 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 61 ret <vscale x 4 x i32> %b 62} 63 64define <vscale x 8 x i16> @sub_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 65; CHECK-LABEL: sub_v8i16: 66; CHECK: // %bb.0: // %entry 67; CHECK-NEXT: ptrue p0.h 68; CHECK-NEXT: sub z1.h, z1.h, z2.h 69; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 70; CHECK-NEXT: mov z0.h, p0/m, z1.h 71; CHECK-NEXT: ret 72entry: 73 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 74 %a = sub <vscale x 8 x i16> %x, %y 75 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 76 ret <vscale x 8 x i16> %b 77} 78 79define <vscale x 16 x i8> @sub_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 80; CHECK-LABEL: sub_v16i8: 81; CHECK: // %bb.0: // %entry 82; CHECK-NEXT: ptrue p0.b 83; CHECK-NEXT: sub z1.b, z1.b, z2.b 84; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 85; CHECK-NEXT: mov z0.b, p0/m, z1.b 86; CHECK-NEXT: ret 87entry: 88 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 89 %a = sub <vscale x 16 x i8> %x, %y 90 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 91 ret <vscale x 16 x i8> %b 92} 93 94define <vscale x 4 x i32> @mul_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 95; CHECK-LABEL: mul_v4i32: 96; CHECK: // %bb.0: // %entry 97; CHECK-NEXT: ptrue p0.s 98; CHECK-NEXT: mul z1.s, z1.s, z2.s 99; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 100; CHECK-NEXT: mov z0.s, p0/m, z1.s 101; CHECK-NEXT: ret 102entry: 103 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 104 %a = mul <vscale x 4 x i32> %x, %y 105 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 106 ret <vscale x 4 x i32> %b 107} 108 109define <vscale x 8 x i16> @mul_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 110; CHECK-LABEL: mul_v8i16: 111; CHECK: // %bb.0: // %entry 112; CHECK-NEXT: ptrue p0.h 113; CHECK-NEXT: mul z1.h, z1.h, z2.h 114; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 115; CHECK-NEXT: mov z0.h, p0/m, z1.h 116; CHECK-NEXT: ret 117entry: 118 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 119 %a = mul <vscale x 8 x i16> %x, %y 120 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 121 ret <vscale x 8 x i16> %b 122} 123 124define <vscale x 16 x i8> @mul_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 125; CHECK-LABEL: mul_v16i8: 126; CHECK: // %bb.0: // %entry 127; CHECK-NEXT: ptrue p0.b 128; CHECK-NEXT: mul z1.b, z1.b, z2.b 129; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 130; CHECK-NEXT: mov z0.b, p0/m, z1.b 131; CHECK-NEXT: ret 132entry: 133 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 134 %a = mul <vscale x 16 x i8> %x, %y 135 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 136 ret <vscale x 16 x i8> %b 137} 138 139define <vscale x 4 x i32> @and_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 140; CHECK-LABEL: and_v4i32: 141; CHECK: // %bb.0: // %entry 142; CHECK-NEXT: ptrue p0.s 143; CHECK-NEXT: and z1.d, z1.d, z2.d 144; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 145; CHECK-NEXT: mov z0.s, p0/m, z1.s 146; CHECK-NEXT: ret 147entry: 148 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 149 %a = and <vscale x 4 x i32> %x, %y 150 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 151 ret <vscale x 4 x i32> %b 152} 153 154define <vscale x 8 x i16> @and_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 155; CHECK-LABEL: and_v8i16: 156; CHECK: // %bb.0: // %entry 157; CHECK-NEXT: ptrue p0.h 158; CHECK-NEXT: and z1.d, z1.d, z2.d 159; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 160; CHECK-NEXT: mov z0.h, p0/m, z1.h 161; CHECK-NEXT: ret 162entry: 163 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 164 %a = and <vscale x 8 x i16> %x, %y 165 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 166 ret <vscale x 8 x i16> %b 167} 168 169define <vscale x 16 x i8> @and_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 170; CHECK-LABEL: and_v16i8: 171; CHECK: // %bb.0: // %entry 172; CHECK-NEXT: ptrue p0.b 173; CHECK-NEXT: and z1.d, z1.d, z2.d 174; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 175; CHECK-NEXT: mov z0.b, p0/m, z1.b 176; CHECK-NEXT: ret 177entry: 178 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 179 %a = and <vscale x 16 x i8> %x, %y 180 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 181 ret <vscale x 16 x i8> %b 182} 183 184define <vscale x 4 x i32> @or_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 185; CHECK-LABEL: or_v4i32: 186; CHECK: // %bb.0: // %entry 187; CHECK-NEXT: ptrue p0.s 188; CHECK-NEXT: orr z1.d, z1.d, z2.d 189; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 190; CHECK-NEXT: mov z0.s, p0/m, z1.s 191; CHECK-NEXT: ret 192entry: 193 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 194 %a = or <vscale x 4 x i32> %x, %y 195 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 196 ret <vscale x 4 x i32> %b 197} 198 199define <vscale x 8 x i16> @or_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 200; CHECK-LABEL: or_v8i16: 201; CHECK: // %bb.0: // %entry 202; CHECK-NEXT: ptrue p0.h 203; CHECK-NEXT: orr z1.d, z1.d, z2.d 204; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 205; CHECK-NEXT: mov z0.h, p0/m, z1.h 206; CHECK-NEXT: ret 207entry: 208 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 209 %a = or <vscale x 8 x i16> %x, %y 210 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 211 ret <vscale x 8 x i16> %b 212} 213 214define <vscale x 16 x i8> @or_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 215; CHECK-LABEL: or_v16i8: 216; CHECK: // %bb.0: // %entry 217; CHECK-NEXT: ptrue p0.b 218; CHECK-NEXT: orr z1.d, z1.d, z2.d 219; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 220; CHECK-NEXT: mov z0.b, p0/m, z1.b 221; CHECK-NEXT: ret 222entry: 223 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 224 %a = or <vscale x 16 x i8> %x, %y 225 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 226 ret <vscale x 16 x i8> %b 227} 228 229define <vscale x 4 x i32> @xor_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 230; CHECK-LABEL: xor_v4i32: 231; CHECK: // %bb.0: // %entry 232; CHECK-NEXT: ptrue p0.s 233; CHECK-NEXT: eor z1.d, z1.d, z2.d 234; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 235; CHECK-NEXT: mov z0.s, p0/m, z1.s 236; CHECK-NEXT: ret 237entry: 238 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 239 %a = xor <vscale x 4 x i32> %x, %y 240 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 241 ret <vscale x 4 x i32> %b 242} 243 244define <vscale x 8 x i16> @xor_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 245; CHECK-LABEL: xor_v8i16: 246; CHECK: // %bb.0: // %entry 247; CHECK-NEXT: ptrue p0.h 248; CHECK-NEXT: eor z1.d, z1.d, z2.d 249; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 250; CHECK-NEXT: mov z0.h, p0/m, z1.h 251; CHECK-NEXT: ret 252entry: 253 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 254 %a = xor <vscale x 8 x i16> %x, %y 255 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 256 ret <vscale x 8 x i16> %b 257} 258 259define <vscale x 16 x i8> @xor_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 260; CHECK-LABEL: xor_v16i8: 261; CHECK: // %bb.0: // %entry 262; CHECK-NEXT: ptrue p0.b 263; CHECK-NEXT: eor z1.d, z1.d, z2.d 264; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 265; CHECK-NEXT: mov z0.b, p0/m, z1.b 266; CHECK-NEXT: ret 267entry: 268 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 269 %a = xor <vscale x 16 x i8> %x, %y 270 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 271 ret <vscale x 16 x i8> %b 272} 273 274define <vscale x 4 x i32> @andnot_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 275; CHECK-LABEL: andnot_v4i32: 276; CHECK: // %bb.0: // %entry 277; CHECK-NEXT: ptrue p0.s 278; CHECK-NEXT: bic z1.d, z1.d, z2.d 279; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 280; CHECK-NEXT: mov z0.s, p0/m, z1.s 281; CHECK-NEXT: ret 282entry: 283 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 284 %y1 = xor <vscale x 4 x i32> %y, splat (i32 -1) 285 %a = and <vscale x 4 x i32> %x, %y1 286 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 287 ret <vscale x 4 x i32> %b 288} 289 290define <vscale x 8 x i16> @andnot_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 291; CHECK-LABEL: andnot_v8i16: 292; CHECK: // %bb.0: // %entry 293; CHECK-NEXT: ptrue p0.h 294; CHECK-NEXT: bic z1.d, z1.d, z2.d 295; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 296; CHECK-NEXT: mov z0.h, p0/m, z1.h 297; CHECK-NEXT: ret 298entry: 299 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 300 %y1 = xor <vscale x 8 x i16> %y, splat (i16 -1) 301 %a = and <vscale x 8 x i16> %x, %y1 302 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 303 ret <vscale x 8 x i16> %b 304} 305 306define <vscale x 16 x i8> @andnot_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 307; CHECK-LABEL: andnot_v16i8: 308; CHECK: // %bb.0: // %entry 309; CHECK-NEXT: ptrue p0.b 310; CHECK-NEXT: bic z1.d, z1.d, z2.d 311; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 312; CHECK-NEXT: mov z0.b, p0/m, z1.b 313; CHECK-NEXT: ret 314entry: 315 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 316 %y1 = xor <vscale x 16 x i8> %y, splat (i8 -1) 317 %a = and <vscale x 16 x i8> %x, %y1 318 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 319 ret <vscale x 16 x i8> %b 320} 321 322define <vscale x 4 x i32> @ornot_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 323; CHECK-LABEL: ornot_v4i32: 324; CHECK: // %bb.0: // %entry 325; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff 326; CHECK-NEXT: ptrue p0.s 327; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 328; CHECK-NEXT: eor z2.d, z2.d, z3.d 329; CHECK-NEXT: orr z1.d, z1.d, z2.d 330; CHECK-NEXT: mov z0.s, p0/m, z1.s 331; CHECK-NEXT: ret 332entry: 333 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 334 %y1 = xor <vscale x 4 x i32> %y, splat (i32 -1) 335 %a = or <vscale x 4 x i32> %x, %y1 336 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 337 ret <vscale x 4 x i32> %b 338} 339 340define <vscale x 8 x i16> @ornot_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 341; CHECK-LABEL: ornot_v8i16: 342; CHECK: // %bb.0: // %entry 343; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff 344; CHECK-NEXT: ptrue p0.h 345; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 346; CHECK-NEXT: eor z2.d, z2.d, z3.d 347; CHECK-NEXT: orr z1.d, z1.d, z2.d 348; CHECK-NEXT: mov z0.h, p0/m, z1.h 349; CHECK-NEXT: ret 350entry: 351 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 352 %y1 = xor <vscale x 8 x i16> %y, splat (i16 -1) 353 %a = or <vscale x 8 x i16> %x, %y1 354 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 355 ret <vscale x 8 x i16> %b 356} 357 358define <vscale x 16 x i8> @ornot_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 359; CHECK-LABEL: ornot_v16i8: 360; CHECK: // %bb.0: // %entry 361; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff 362; CHECK-NEXT: ptrue p0.b 363; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 364; CHECK-NEXT: eor z2.d, z2.d, z3.d 365; CHECK-NEXT: orr z1.d, z1.d, z2.d 366; CHECK-NEXT: mov z0.b, p0/m, z1.b 367; CHECK-NEXT: ret 368entry: 369 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 370 %y1 = xor <vscale x 16 x i8> %y, splat (i8 -1) 371 %a = or <vscale x 16 x i8> %x, %y1 372 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 373 ret <vscale x 16 x i8> %b 374} 375 376define <vscale x 4 x float> @fadd_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { 377; CHECK-LABEL: fadd_v4f32: 378; CHECK: // %bb.0: // %entry 379; CHECK-NEXT: ptrue p0.s 380; CHECK-NEXT: fadd z1.s, z1.s, z2.s 381; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 382; CHECK-NEXT: mov z0.s, p0/m, z1.s 383; CHECK-NEXT: ret 384entry: 385 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 386 %a = fadd <vscale x 4 x float> %x, %y 387 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 388 ret <vscale x 4 x float> %b 389} 390 391define <vscale x 8 x half> @fadd_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, <vscale x 8 x half> %y) { 392; CHECK-LABEL: fadd_v8f16: 393; CHECK: // %bb.0: // %entry 394; CHECK-NEXT: ptrue p0.h 395; CHECK-NEXT: fadd z1.h, z1.h, z2.h 396; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 397; CHECK-NEXT: mov z0.h, p0/m, z1.h 398; CHECK-NEXT: ret 399entry: 400 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 401 %a = fadd <vscale x 8 x half> %x, %y 402 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 403 ret <vscale x 8 x half> %b 404} 405 406define <vscale x 4 x float> @fsub_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { 407; CHECK-LABEL: fsub_v4f32: 408; CHECK: // %bb.0: // %entry 409; CHECK-NEXT: ptrue p0.s 410; CHECK-NEXT: fsub z1.s, z1.s, z2.s 411; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 412; CHECK-NEXT: mov z0.s, p0/m, z1.s 413; CHECK-NEXT: ret 414entry: 415 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 416 %a = fsub <vscale x 4 x float> %x, %y 417 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 418 ret <vscale x 4 x float> %b 419} 420 421define <vscale x 8 x half> @fsub_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, <vscale x 8 x half> %y) { 422; CHECK-LABEL: fsub_v8f16: 423; CHECK: // %bb.0: // %entry 424; CHECK-NEXT: ptrue p0.h 425; CHECK-NEXT: fsub z1.h, z1.h, z2.h 426; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 427; CHECK-NEXT: mov z0.h, p0/m, z1.h 428; CHECK-NEXT: ret 429entry: 430 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 431 %a = fsub <vscale x 8 x half> %x, %y 432 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 433 ret <vscale x 8 x half> %b 434} 435 436define <vscale x 4 x float> @fmul_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { 437; CHECK-LABEL: fmul_v4f32: 438; CHECK: // %bb.0: // %entry 439; CHECK-NEXT: ptrue p0.s 440; CHECK-NEXT: fmul z1.s, z1.s, z2.s 441; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 442; CHECK-NEXT: mov z0.s, p0/m, z1.s 443; CHECK-NEXT: ret 444entry: 445 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 446 %a = fmul <vscale x 4 x float> %x, %y 447 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 448 ret <vscale x 4 x float> %b 449} 450 451define <vscale x 8 x half> @fmul_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, <vscale x 8 x half> %y) { 452; CHECK-LABEL: fmul_v8f16: 453; CHECK: // %bb.0: // %entry 454; CHECK-NEXT: ptrue p0.h 455; CHECK-NEXT: fmul z1.h, z1.h, z2.h 456; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 457; CHECK-NEXT: mov z0.h, p0/m, z1.h 458; CHECK-NEXT: ret 459entry: 460 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 461 %a = fmul <vscale x 8 x half> %x, %y 462 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 463 ret <vscale x 8 x half> %b 464} 465 466define <vscale x 4 x i32> @icmp_slt_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 467; CHECK-LABEL: icmp_slt_v4i32: 468; CHECK: // %bb.0: // %entry 469; CHECK-NEXT: ptrue p0.s 470; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 471; CHECK-NEXT: smin z1.s, p0/m, z1.s, z2.s 472; CHECK-NEXT: mov z0.s, p1/m, z1.s 473; CHECK-NEXT: ret 474entry: 475 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 476 %a1 = icmp slt <vscale x 4 x i32> %x, %y 477 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y 478 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 479 ret <vscale x 4 x i32> %b 480} 481 482define <vscale x 8 x i16> @icmp_slt_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 483; CHECK-LABEL: icmp_slt_v8i16: 484; CHECK: // %bb.0: // %entry 485; CHECK-NEXT: ptrue p0.h 486; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 487; CHECK-NEXT: smin z1.h, p0/m, z1.h, z2.h 488; CHECK-NEXT: mov z0.h, p1/m, z1.h 489; CHECK-NEXT: ret 490entry: 491 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 492 %a1 = icmp slt <vscale x 8 x i16> %x, %y 493 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y 494 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 495 ret <vscale x 8 x i16> %b 496} 497 498define <vscale x 16 x i8> @icmp_slt_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 499; CHECK-LABEL: icmp_slt_v16i8: 500; CHECK: // %bb.0: // %entry 501; CHECK-NEXT: ptrue p0.b 502; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 503; CHECK-NEXT: smin z1.b, p0/m, z1.b, z2.b 504; CHECK-NEXT: mov z0.b, p1/m, z1.b 505; CHECK-NEXT: ret 506entry: 507 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 508 %a1 = icmp slt <vscale x 16 x i8> %x, %y 509 %a = select <vscale x 16 x i1> %a1, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y 510 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 511 ret <vscale x 16 x i8> %b 512} 513 514define <vscale x 4 x i32> @icmp_sgt_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 515; CHECK-LABEL: icmp_sgt_v4i32: 516; CHECK: // %bb.0: // %entry 517; CHECK-NEXT: ptrue p0.s 518; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 519; CHECK-NEXT: smax z1.s, p0/m, z1.s, z2.s 520; CHECK-NEXT: mov z0.s, p1/m, z1.s 521; CHECK-NEXT: ret 522entry: 523 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 524 %a1 = icmp sgt <vscale x 4 x i32> %x, %y 525 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y 526 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 527 ret <vscale x 4 x i32> %b 528} 529 530define <vscale x 8 x i16> @icmp_sgt_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 531; CHECK-LABEL: icmp_sgt_v8i16: 532; CHECK: // %bb.0: // %entry 533; CHECK-NEXT: ptrue p0.h 534; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 535; CHECK-NEXT: smax z1.h, p0/m, z1.h, z2.h 536; CHECK-NEXT: mov z0.h, p1/m, z1.h 537; CHECK-NEXT: ret 538entry: 539 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 540 %a1 = icmp sgt <vscale x 8 x i16> %x, %y 541 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y 542 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 543 ret <vscale x 8 x i16> %b 544} 545 546define <vscale x 16 x i8> @icmp_sgt_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 547; CHECK-LABEL: icmp_sgt_v16i8: 548; CHECK: // %bb.0: // %entry 549; CHECK-NEXT: ptrue p0.b 550; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 551; CHECK-NEXT: smax z1.b, p0/m, z1.b, z2.b 552; CHECK-NEXT: mov z0.b, p1/m, z1.b 553; CHECK-NEXT: ret 554entry: 555 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 556 %a1 = icmp sgt <vscale x 16 x i8> %x, %y 557 %a = select <vscale x 16 x i1> %a1, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y 558 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 559 ret <vscale x 16 x i8> %b 560} 561 562define <vscale x 4 x i32> @icmp_ult_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 563; CHECK-LABEL: icmp_ult_v4i32: 564; CHECK: // %bb.0: // %entry 565; CHECK-NEXT: ptrue p0.s 566; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 567; CHECK-NEXT: umin z1.s, p0/m, z1.s, z2.s 568; CHECK-NEXT: mov z0.s, p1/m, z1.s 569; CHECK-NEXT: ret 570entry: 571 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 572 %a1 = icmp ult <vscale x 4 x i32> %x, %y 573 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y 574 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 575 ret <vscale x 4 x i32> %b 576} 577 578define <vscale x 8 x i16> @icmp_ult_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 579; CHECK-LABEL: icmp_ult_v8i16: 580; CHECK: // %bb.0: // %entry 581; CHECK-NEXT: ptrue p0.h 582; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 583; CHECK-NEXT: umin z1.h, p0/m, z1.h, z2.h 584; CHECK-NEXT: mov z0.h, p1/m, z1.h 585; CHECK-NEXT: ret 586entry: 587 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 588 %a1 = icmp ult <vscale x 8 x i16> %x, %y 589 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y 590 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 591 ret <vscale x 8 x i16> %b 592} 593 594define <vscale x 16 x i8> @icmp_ult_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 595; CHECK-LABEL: icmp_ult_v16i8: 596; CHECK: // %bb.0: // %entry 597; CHECK-NEXT: ptrue p0.b 598; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 599; CHECK-NEXT: umin z1.b, p0/m, z1.b, z2.b 600; CHECK-NEXT: mov z0.b, p1/m, z1.b 601; CHECK-NEXT: ret 602entry: 603 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 604 %a1 = icmp ult <vscale x 16 x i8> %x, %y 605 %a = select <vscale x 16 x i1> %a1, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y 606 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 607 ret <vscale x 16 x i8> %b 608} 609 610define <vscale x 4 x i32> @icmp_ugt_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 611; CHECK-LABEL: icmp_ugt_v4i32: 612; CHECK: // %bb.0: // %entry 613; CHECK-NEXT: ptrue p0.s 614; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 615; CHECK-NEXT: umax z1.s, p0/m, z1.s, z2.s 616; CHECK-NEXT: mov z0.s, p1/m, z1.s 617; CHECK-NEXT: ret 618entry: 619 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 620 %a1 = icmp ugt <vscale x 4 x i32> %x, %y 621 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y 622 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 623 ret <vscale x 4 x i32> %b 624} 625 626define <vscale x 8 x i16> @icmp_ugt_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 627; CHECK-LABEL: icmp_ugt_v8i16: 628; CHECK: // %bb.0: // %entry 629; CHECK-NEXT: ptrue p0.h 630; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 631; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h 632; CHECK-NEXT: mov z0.h, p1/m, z1.h 633; CHECK-NEXT: ret 634entry: 635 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 636 %a1 = icmp ugt <vscale x 8 x i16> %x, %y 637 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y 638 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 639 ret <vscale x 8 x i16> %b 640} 641 642define <vscale x 16 x i8> @icmp_ugt_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 643; CHECK-LABEL: icmp_ugt_v16i8: 644; CHECK: // %bb.0: // %entry 645; CHECK-NEXT: ptrue p0.b 646; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 647; CHECK-NEXT: umax z1.b, p0/m, z1.b, z2.b 648; CHECK-NEXT: mov z0.b, p1/m, z1.b 649; CHECK-NEXT: ret 650entry: 651 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 652 %a1 = icmp ugt <vscale x 16 x i8> %x, %y 653 %a = select <vscale x 16 x i1> %a1, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y 654 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 655 ret <vscale x 16 x i8> %b 656} 657 658define <vscale x 4 x float> @fcmp_fast_olt_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { 659; CHECK-LABEL: fcmp_fast_olt_v4f32: 660; CHECK: // %bb.0: // %entry 661; CHECK-NEXT: ptrue p0.s 662; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 663; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s 664; CHECK-NEXT: mov z0.s, p1/m, z1.s 665; CHECK-NEXT: ret 666entry: 667 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 668 %a1 = fcmp fast olt <vscale x 4 x float> %x, %y 669 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y 670 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 671 ret <vscale x 4 x float> %b 672} 673 674define <vscale x 8 x half> @fcmp_fast_olt_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, <vscale x 8 x half> %y) { 675; CHECK-LABEL: fcmp_fast_olt_v8f16: 676; CHECK: // %bb.0: // %entry 677; CHECK-NEXT: ptrue p0.h 678; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 679; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h 680; CHECK-NEXT: mov z0.h, p1/m, z1.h 681; CHECK-NEXT: ret 682entry: 683 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 684 %a1 = fcmp fast olt <vscale x 8 x half> %x, %y 685 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y 686 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 687 ret <vscale x 8 x half> %b 688} 689 690define <vscale x 4 x float> @fcmp_fast_ogt_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, <vscale x 4 x float> %y) { 691; CHECK-LABEL: fcmp_fast_ogt_v4f32: 692; CHECK: // %bb.0: // %entry 693; CHECK-NEXT: ptrue p0.s 694; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 695; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s 696; CHECK-NEXT: mov z0.s, p1/m, z1.s 697; CHECK-NEXT: ret 698entry: 699 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 700 %a1 = fcmp fast ogt <vscale x 4 x float> %x, %y 701 %a = select <vscale x 4 x i1> %a1, <vscale x 4 x float> %x, <vscale x 4 x float> %y 702 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 703 ret <vscale x 4 x float> %b 704} 705 706define <vscale x 8 x half> @fcmp_fast_ogt_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, <vscale x 8 x half> %y) { 707; CHECK-LABEL: fcmp_fast_ogt_v8f16: 708; CHECK: // %bb.0: // %entry 709; CHECK-NEXT: ptrue p0.h 710; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 711; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h 712; CHECK-NEXT: mov z0.h, p1/m, z1.h 713; CHECK-NEXT: ret 714entry: 715 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 716 %a1 = fcmp fast ogt <vscale x 8 x half> %x, %y 717 %a = select <vscale x 8 x i1> %a1, <vscale x 8 x half> %x, <vscale x 8 x half> %y 718 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 719 ret <vscale x 8 x half> %b 720} 721 722define <vscale x 4 x i32> @sadd_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 723; CHECK-LABEL: sadd_sat_v4i32: 724; CHECK: // %bb.0: // %entry 725; CHECK-NEXT: ptrue p0.s 726; CHECK-NEXT: sqadd z1.s, z1.s, z2.s 727; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 728; CHECK-NEXT: mov z0.s, p0/m, z1.s 729; CHECK-NEXT: ret 730entry: 731 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 732 %a = call <vscale x 4 x i32> @llvm.sadd.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) 733 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 734 ret <vscale x 4 x i32> %b 735} 736 737define <vscale x 8 x i16> @sadd_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 738; CHECK-LABEL: sadd_sat_v8i16: 739; CHECK: // %bb.0: // %entry 740; CHECK-NEXT: ptrue p0.h 741; CHECK-NEXT: sqadd z1.h, z1.h, z2.h 742; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 743; CHECK-NEXT: mov z0.h, p0/m, z1.h 744; CHECK-NEXT: ret 745entry: 746 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 747 %a = call <vscale x 8 x i16> @llvm.sadd.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) 748 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 749 ret <vscale x 8 x i16> %b 750} 751 752define <vscale x 16 x i8> @sadd_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 753; CHECK-LABEL: sadd_sat_v16i8: 754; CHECK: // %bb.0: // %entry 755; CHECK-NEXT: ptrue p0.b 756; CHECK-NEXT: sqadd z1.b, z1.b, z2.b 757; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 758; CHECK-NEXT: mov z0.b, p0/m, z1.b 759; CHECK-NEXT: ret 760entry: 761 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 762 %a = call <vscale x 16 x i8> @llvm.sadd.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) 763 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 764 ret <vscale x 16 x i8> %b 765} 766 767define <vscale x 4 x i32> @uadd_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 768; CHECK-LABEL: uadd_sat_v4i32: 769; CHECK: // %bb.0: // %entry 770; CHECK-NEXT: ptrue p0.s 771; CHECK-NEXT: uqadd z1.s, z1.s, z2.s 772; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 773; CHECK-NEXT: mov z0.s, p0/m, z1.s 774; CHECK-NEXT: ret 775entry: 776 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 777 %a = call <vscale x 4 x i32> @llvm.uadd.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) 778 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 779 ret <vscale x 4 x i32> %b 780} 781 782define <vscale x 8 x i16> @uadd_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 783; CHECK-LABEL: uadd_sat_v8i16: 784; CHECK: // %bb.0: // %entry 785; CHECK-NEXT: ptrue p0.h 786; CHECK-NEXT: uqadd z1.h, z1.h, z2.h 787; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 788; CHECK-NEXT: mov z0.h, p0/m, z1.h 789; CHECK-NEXT: ret 790entry: 791 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 792 %a = call <vscale x 8 x i16> @llvm.uadd.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) 793 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 794 ret <vscale x 8 x i16> %b 795} 796 797define <vscale x 16 x i8> @uadd_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 798; CHECK-LABEL: uadd_sat_v16i8: 799; CHECK: // %bb.0: // %entry 800; CHECK-NEXT: ptrue p0.b 801; CHECK-NEXT: uqadd z1.b, z1.b, z2.b 802; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 803; CHECK-NEXT: mov z0.b, p0/m, z1.b 804; CHECK-NEXT: ret 805entry: 806 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 807 %a = call <vscale x 16 x i8> @llvm.uadd.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) 808 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 809 ret <vscale x 16 x i8> %b 810} 811 812define <vscale x 4 x i32> @ssub_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 813; CHECK-LABEL: ssub_sat_v4i32: 814; CHECK: // %bb.0: // %entry 815; CHECK-NEXT: ptrue p0.s 816; CHECK-NEXT: sqsub z1.s, z1.s, z2.s 817; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 818; CHECK-NEXT: mov z0.s, p0/m, z1.s 819; CHECK-NEXT: ret 820entry: 821 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 822 %a = call <vscale x 4 x i32> @llvm.ssub.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) 823 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 824 ret <vscale x 4 x i32> %b 825} 826 827define <vscale x 8 x i16> @ssub_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 828; CHECK-LABEL: ssub_sat_v8i16: 829; CHECK: // %bb.0: // %entry 830; CHECK-NEXT: ptrue p0.h 831; CHECK-NEXT: sqsub z1.h, z1.h, z2.h 832; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 833; CHECK-NEXT: mov z0.h, p0/m, z1.h 834; CHECK-NEXT: ret 835entry: 836 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 837 %a = call <vscale x 8 x i16> @llvm.ssub.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) 838 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 839 ret <vscale x 8 x i16> %b 840} 841 842define <vscale x 16 x i8> @ssub_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 843; CHECK-LABEL: ssub_sat_v16i8: 844; CHECK: // %bb.0: // %entry 845; CHECK-NEXT: ptrue p0.b 846; CHECK-NEXT: sqsub z1.b, z1.b, z2.b 847; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 848; CHECK-NEXT: mov z0.b, p0/m, z1.b 849; CHECK-NEXT: ret 850entry: 851 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 852 %a = call <vscale x 16 x i8> @llvm.ssub.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) 853 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 854 ret <vscale x 16 x i8> %b 855} 856 857define <vscale x 4 x i32> @usub_sat_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) { 858; CHECK-LABEL: usub_sat_v4i32: 859; CHECK: // %bb.0: // %entry 860; CHECK-NEXT: ptrue p0.s 861; CHECK-NEXT: uqsub z1.s, z1.s, z2.s 862; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 863; CHECK-NEXT: mov z0.s, p0/m, z1.s 864; CHECK-NEXT: ret 865entry: 866 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 867 %a = call <vscale x 4 x i32> @llvm.usub.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) 868 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 869 ret <vscale x 4 x i32> %b 870} 871 872define <vscale x 8 x i16> @usub_sat_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) { 873; CHECK-LABEL: usub_sat_v8i16: 874; CHECK: // %bb.0: // %entry 875; CHECK-NEXT: ptrue p0.h 876; CHECK-NEXT: uqsub z1.h, z1.h, z2.h 877; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 878; CHECK-NEXT: mov z0.h, p0/m, z1.h 879; CHECK-NEXT: ret 880entry: 881 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 882 %a = call <vscale x 8 x i16> @llvm.usub.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) 883 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 884 ret <vscale x 8 x i16> %b 885} 886 887define <vscale x 16 x i8> @usub_sat_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) { 888; CHECK-LABEL: usub_sat_v16i8: 889; CHECK: // %bb.0: // %entry 890; CHECK-NEXT: ptrue p0.b 891; CHECK-NEXT: uqsub z1.b, z1.b, z2.b 892; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 893; CHECK-NEXT: mov z0.b, p0/m, z1.b 894; CHECK-NEXT: ret 895entry: 896 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 897 %a = call <vscale x 16 x i8> @llvm.usub.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) 898 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 899 ret <vscale x 16 x i8> %b 900} 901 902define <vscale x 4 x i32> @addqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 903; CHECK-LABEL: addqr_v4i32: 904; CHECK: // %bb.0: // %entry 905; CHECK-NEXT: ptrue p0.s 906; CHECK-NEXT: mov z2.s, w0 907; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 908; CHECK-NEXT: add z1.s, z1.s, z2.s 909; CHECK-NEXT: mov z0.s, p0/m, z1.s 910; CHECK-NEXT: ret 911entry: 912 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 913 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 914 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 915 %a = add <vscale x 4 x i32> %x, %ys 916 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 917 ret <vscale x 4 x i32> %b 918} 919 920define <vscale x 8 x i16> @addqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 921; CHECK-LABEL: addqr_v8i16: 922; CHECK: // %bb.0: // %entry 923; CHECK-NEXT: ptrue p0.h 924; CHECK-NEXT: mov z2.h, w0 925; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 926; CHECK-NEXT: add z1.h, z1.h, z2.h 927; CHECK-NEXT: mov z0.h, p0/m, z1.h 928; CHECK-NEXT: ret 929entry: 930 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 931 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 932 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 933 %a = add <vscale x 8 x i16> %x, %ys 934 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 935 ret <vscale x 8 x i16> %b 936} 937 938define <vscale x 16 x i8> @addqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 939; CHECK-LABEL: addqr_v16i8: 940; CHECK: // %bb.0: // %entry 941; CHECK-NEXT: ptrue p0.b 942; CHECK-NEXT: mov z2.b, w0 943; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 944; CHECK-NEXT: add z1.b, z1.b, z2.b 945; CHECK-NEXT: mov z0.b, p0/m, z1.b 946; CHECK-NEXT: ret 947entry: 948 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 949 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 950 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 951 %a = add <vscale x 16 x i8> %x, %ys 952 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 953 ret <vscale x 16 x i8> %b 954} 955 956define <vscale x 4 x i32> @subqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 957; CHECK-LABEL: subqr_v4i32: 958; CHECK: // %bb.0: // %entry 959; CHECK-NEXT: ptrue p0.s 960; CHECK-NEXT: mov z2.s, w0 961; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 962; CHECK-NEXT: sub z1.s, z1.s, z2.s 963; CHECK-NEXT: mov z0.s, p0/m, z1.s 964; CHECK-NEXT: ret 965entry: 966 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 967 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 968 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 969 %a = sub <vscale x 4 x i32> %x, %ys 970 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 971 ret <vscale x 4 x i32> %b 972} 973 974define <vscale x 8 x i16> @subqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 975; CHECK-LABEL: subqr_v8i16: 976; CHECK: // %bb.0: // %entry 977; CHECK-NEXT: ptrue p0.h 978; CHECK-NEXT: mov z2.h, w0 979; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 980; CHECK-NEXT: sub z1.h, z1.h, z2.h 981; CHECK-NEXT: mov z0.h, p0/m, z1.h 982; CHECK-NEXT: ret 983entry: 984 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 985 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 986 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 987 %a = sub <vscale x 8 x i16> %x, %ys 988 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 989 ret <vscale x 8 x i16> %b 990} 991 992define <vscale x 16 x i8> @subqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 993; CHECK-LABEL: subqr_v16i8: 994; CHECK: // %bb.0: // %entry 995; CHECK-NEXT: ptrue p0.b 996; CHECK-NEXT: mov z2.b, w0 997; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 998; CHECK-NEXT: sub z1.b, z1.b, z2.b 999; CHECK-NEXT: mov z0.b, p0/m, z1.b 1000; CHECK-NEXT: ret 1001entry: 1002 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1003 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1004 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1005 %a = sub <vscale x 16 x i8> %x, %ys 1006 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1007 ret <vscale x 16 x i8> %b 1008} 1009 1010define <vscale x 4 x i32> @mulqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 1011; CHECK-LABEL: mulqr_v4i32: 1012; CHECK: // %bb.0: // %entry 1013; CHECK-NEXT: mov z2.s, w0 1014; CHECK-NEXT: ptrue p0.s 1015; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 1016; CHECK-NEXT: mul z1.s, z1.s, z2.s 1017; CHECK-NEXT: mov z0.s, p0/m, z1.s 1018; CHECK-NEXT: ret 1019entry: 1020 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 1021 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 1022 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 1023 %a = mul <vscale x 4 x i32> %x, %ys 1024 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 1025 ret <vscale x 4 x i32> %b 1026} 1027 1028define <vscale x 8 x i16> @mulqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 1029; CHECK-LABEL: mulqr_v8i16: 1030; CHECK: // %bb.0: // %entry 1031; CHECK-NEXT: mov z2.h, w0 1032; CHECK-NEXT: ptrue p0.h 1033; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 1034; CHECK-NEXT: mul z1.h, z1.h, z2.h 1035; CHECK-NEXT: mov z0.h, p0/m, z1.h 1036; CHECK-NEXT: ret 1037entry: 1038 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 1039 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 1040 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 1041 %a = mul <vscale x 8 x i16> %x, %ys 1042 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 1043 ret <vscale x 8 x i16> %b 1044} 1045 1046define <vscale x 16 x i8> @mulqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 1047; CHECK-LABEL: mulqr_v16i8: 1048; CHECK: // %bb.0: // %entry 1049; CHECK-NEXT: mov z2.b, w0 1050; CHECK-NEXT: ptrue p0.b 1051; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 1052; CHECK-NEXT: mul z1.b, z1.b, z2.b 1053; CHECK-NEXT: mov z0.b, p0/m, z1.b 1054; CHECK-NEXT: ret 1055entry: 1056 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1057 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1058 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1059 %a = mul <vscale x 16 x i8> %x, %ys 1060 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1061 ret <vscale x 16 x i8> %b 1062} 1063 1064define <vscale x 4 x float> @faddqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) { 1065; CHECK-LABEL: faddqr_v4f32: 1066; CHECK: // %bb.0: // %entry 1067; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 1068; CHECK-NEXT: ptrue p0.s 1069; CHECK-NEXT: mov z2.s, s2 1070; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 1071; CHECK-NEXT: fadd z1.s, z1.s, z2.s 1072; CHECK-NEXT: mov z0.s, p0/m, z1.s 1073; CHECK-NEXT: ret 1074entry: 1075 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 1076 %i = insertelement <vscale x 4 x float> undef, float %y, i32 0 1077 %ys = shufflevector <vscale x 4 x float> %i, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 1078 %a = fadd <vscale x 4 x float> %x, %ys 1079 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 1080 ret <vscale x 4 x float> %b 1081} 1082 1083define <vscale x 8 x half> @faddqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) { 1084; CHECK-LABEL: faddqr_v8f16: 1085; CHECK: // %bb.0: // %entry 1086; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 1087; CHECK-NEXT: ptrue p0.h 1088; CHECK-NEXT: mov z2.h, h2 1089; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 1090; CHECK-NEXT: fadd z1.h, z1.h, z2.h 1091; CHECK-NEXT: mov z0.h, p0/m, z1.h 1092; CHECK-NEXT: ret 1093entry: 1094 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 1095 %i = insertelement <vscale x 8 x half> undef, half %y, i32 0 1096 %ys = shufflevector <vscale x 8 x half> %i, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 1097 %a = fadd <vscale x 8 x half> %x, %ys 1098 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 1099 ret <vscale x 8 x half> %b 1100} 1101 1102define <vscale x 4 x float> @fsubqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) { 1103; CHECK-LABEL: fsubqr_v4f32: 1104; CHECK: // %bb.0: // %entry 1105; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 1106; CHECK-NEXT: ptrue p0.s 1107; CHECK-NEXT: mov z2.s, s2 1108; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 1109; CHECK-NEXT: fsub z1.s, z1.s, z2.s 1110; CHECK-NEXT: mov z0.s, p0/m, z1.s 1111; CHECK-NEXT: ret 1112entry: 1113 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 1114 %i = insertelement <vscale x 4 x float> undef, float %y, i32 0 1115 %ys = shufflevector <vscale x 4 x float> %i, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 1116 %a = fsub <vscale x 4 x float> %x, %ys 1117 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 1118 ret <vscale x 4 x float> %b 1119} 1120 1121define <vscale x 8 x half> @fsubqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) { 1122; CHECK-LABEL: fsubqr_v8f16: 1123; CHECK: // %bb.0: // %entry 1124; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 1125; CHECK-NEXT: ptrue p0.h 1126; CHECK-NEXT: mov z2.h, h2 1127; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 1128; CHECK-NEXT: fsub z1.h, z1.h, z2.h 1129; CHECK-NEXT: mov z0.h, p0/m, z1.h 1130; CHECK-NEXT: ret 1131entry: 1132 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 1133 %i = insertelement <vscale x 8 x half> undef, half %y, i32 0 1134 %ys = shufflevector <vscale x 8 x half> %i, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 1135 %a = fsub <vscale x 8 x half> %x, %ys 1136 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 1137 ret <vscale x 8 x half> %b 1138} 1139 1140define <vscale x 4 x float> @fmulqr_v4f32(<vscale x 4 x float> %z, <vscale x 4 x float> %x, float %y) { 1141; CHECK-LABEL: fmulqr_v4f32: 1142; CHECK: // %bb.0: // %entry 1143; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 1144; CHECK-NEXT: ptrue p0.s 1145; CHECK-NEXT: mov z2.s, s2 1146; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 1147; CHECK-NEXT: fmul z1.s, z1.s, z2.s 1148; CHECK-NEXT: mov z0.s, p0/m, z1.s 1149; CHECK-NEXT: ret 1150entry: 1151 %c = fcmp oeq <vscale x 4 x float> %z, zeroinitializer 1152 %i = insertelement <vscale x 4 x float> undef, float %y, i32 0 1153 %ys = shufflevector <vscale x 4 x float> %i, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 1154 %a = fmul <vscale x 4 x float> %x, %ys 1155 %b = select <vscale x 4 x i1> %c, <vscale x 4 x float> %a, <vscale x 4 x float> %z 1156 ret <vscale x 4 x float> %b 1157} 1158 1159define <vscale x 8 x half> @fmulqr_v8f16(<vscale x 8 x half> %z, <vscale x 8 x half> %x, half %y) { 1160; CHECK-LABEL: fmulqr_v8f16: 1161; CHECK: // %bb.0: // %entry 1162; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 1163; CHECK-NEXT: ptrue p0.h 1164; CHECK-NEXT: mov z2.h, h2 1165; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 1166; CHECK-NEXT: fmul z1.h, z1.h, z2.h 1167; CHECK-NEXT: mov z0.h, p0/m, z1.h 1168; CHECK-NEXT: ret 1169entry: 1170 %c = fcmp oeq <vscale x 8 x half> %z, zeroinitializer 1171 %i = insertelement <vscale x 8 x half> undef, half %y, i32 0 1172 %ys = shufflevector <vscale x 8 x half> %i, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 1173 %a = fmul <vscale x 8 x half> %x, %ys 1174 %b = select <vscale x 8 x i1> %c, <vscale x 8 x half> %a, <vscale x 8 x half> %z 1175 ret <vscale x 8 x half> %b 1176} 1177 1178define <vscale x 4 x i32> @sadd_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 1179; CHECK-LABEL: sadd_satqr_v4i32: 1180; CHECK: // %bb.0: // %entry 1181; CHECK-NEXT: mov z2.s, w0 1182; CHECK-NEXT: ptrue p0.s 1183; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 1184; CHECK-NEXT: sqadd z1.s, z1.s, z2.s 1185; CHECK-NEXT: mov z0.s, p0/m, z1.s 1186; CHECK-NEXT: ret 1187entry: 1188 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 1189 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 1190 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 1191 %a = call <vscale x 4 x i32> @llvm.sadd.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %ys) 1192 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 1193 ret <vscale x 4 x i32> %b 1194} 1195 1196define <vscale x 8 x i16> @sadd_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 1197; CHECK-LABEL: sadd_satqr_v8i16: 1198; CHECK: // %bb.0: // %entry 1199; CHECK-NEXT: mov z2.h, w0 1200; CHECK-NEXT: ptrue p0.h 1201; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 1202; CHECK-NEXT: sqadd z1.h, z1.h, z2.h 1203; CHECK-NEXT: mov z0.h, p0/m, z1.h 1204; CHECK-NEXT: ret 1205entry: 1206 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 1207 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 1208 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 1209 %a = call <vscale x 8 x i16> @llvm.sadd.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %ys) 1210 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 1211 ret <vscale x 8 x i16> %b 1212} 1213 1214define <vscale x 16 x i8> @sadd_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 1215; CHECK-LABEL: sadd_satqr_v16i8: 1216; CHECK: // %bb.0: // %entry 1217; CHECK-NEXT: mov z2.b, w0 1218; CHECK-NEXT: ptrue p0.b 1219; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 1220; CHECK-NEXT: sqadd z1.b, z1.b, z2.b 1221; CHECK-NEXT: mov z0.b, p0/m, z1.b 1222; CHECK-NEXT: ret 1223entry: 1224 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1225 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1226 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1227 %a = call <vscale x 16 x i8> @llvm.sadd.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %ys) 1228 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1229 ret <vscale x 16 x i8> %b 1230} 1231 1232define <vscale x 4 x i32> @uadd_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 1233; CHECK-LABEL: uadd_satqr_v4i32: 1234; CHECK: // %bb.0: // %entry 1235; CHECK-NEXT: mov z2.s, w0 1236; CHECK-NEXT: ptrue p0.s 1237; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 1238; CHECK-NEXT: uqadd z1.s, z1.s, z2.s 1239; CHECK-NEXT: mov z0.s, p0/m, z1.s 1240; CHECK-NEXT: ret 1241entry: 1242 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 1243 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 1244 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 1245 %a = call <vscale x 4 x i32> @llvm.uadd.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %ys) 1246 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 1247 ret <vscale x 4 x i32> %b 1248} 1249 1250define <vscale x 8 x i16> @uadd_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 1251; CHECK-LABEL: uadd_satqr_v8i16: 1252; CHECK: // %bb.0: // %entry 1253; CHECK-NEXT: mov z2.h, w0 1254; CHECK-NEXT: ptrue p0.h 1255; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 1256; CHECK-NEXT: uqadd z1.h, z1.h, z2.h 1257; CHECK-NEXT: mov z0.h, p0/m, z1.h 1258; CHECK-NEXT: ret 1259entry: 1260 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 1261 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 1262 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 1263 %a = call <vscale x 8 x i16> @llvm.uadd.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %ys) 1264 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 1265 ret <vscale x 8 x i16> %b 1266} 1267 1268define <vscale x 16 x i8> @uadd_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 1269; CHECK-LABEL: uadd_satqr_v16i8: 1270; CHECK: // %bb.0: // %entry 1271; CHECK-NEXT: mov z2.b, w0 1272; CHECK-NEXT: ptrue p0.b 1273; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 1274; CHECK-NEXT: uqadd z1.b, z1.b, z2.b 1275; CHECK-NEXT: mov z0.b, p0/m, z1.b 1276; CHECK-NEXT: ret 1277entry: 1278 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1279 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1280 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1281 %a = call <vscale x 16 x i8> @llvm.uadd.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %ys) 1282 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1283 ret <vscale x 16 x i8> %b 1284} 1285 1286define <vscale x 4 x i32> @ssub_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 1287; CHECK-LABEL: ssub_satqr_v4i32: 1288; CHECK: // %bb.0: // %entry 1289; CHECK-NEXT: mov z2.s, w0 1290; CHECK-NEXT: ptrue p0.s 1291; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 1292; CHECK-NEXT: sqsub z1.s, z1.s, z2.s 1293; CHECK-NEXT: mov z0.s, p0/m, z1.s 1294; CHECK-NEXT: ret 1295entry: 1296 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 1297 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 1298 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 1299 %a = call <vscale x 4 x i32> @llvm.ssub.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %ys) 1300 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 1301 ret <vscale x 4 x i32> %b 1302} 1303 1304define <vscale x 8 x i16> @ssub_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 1305; CHECK-LABEL: ssub_satqr_v8i16: 1306; CHECK: // %bb.0: // %entry 1307; CHECK-NEXT: mov z2.h, w0 1308; CHECK-NEXT: ptrue p0.h 1309; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 1310; CHECK-NEXT: sqsub z1.h, z1.h, z2.h 1311; CHECK-NEXT: mov z0.h, p0/m, z1.h 1312; CHECK-NEXT: ret 1313entry: 1314 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 1315 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 1316 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 1317 %a = call <vscale x 8 x i16> @llvm.ssub.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %ys) 1318 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 1319 ret <vscale x 8 x i16> %b 1320} 1321 1322define <vscale x 16 x i8> @ssub_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 1323; CHECK-LABEL: ssub_satqr_v16i8: 1324; CHECK: // %bb.0: // %entry 1325; CHECK-NEXT: mov z2.b, w0 1326; CHECK-NEXT: ptrue p0.b 1327; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 1328; CHECK-NEXT: sqsub z1.b, z1.b, z2.b 1329; CHECK-NEXT: mov z0.b, p0/m, z1.b 1330; CHECK-NEXT: ret 1331entry: 1332 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1333 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1334 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1335 %a = call <vscale x 16 x i8> @llvm.ssub.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %ys) 1336 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1337 ret <vscale x 16 x i8> %b 1338} 1339 1340define <vscale x 4 x i32> @usub_satqr_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, i32 %y) { 1341; CHECK-LABEL: usub_satqr_v4i32: 1342; CHECK: // %bb.0: // %entry 1343; CHECK-NEXT: mov z2.s, w0 1344; CHECK-NEXT: ptrue p0.s 1345; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 1346; CHECK-NEXT: uqsub z1.s, z1.s, z2.s 1347; CHECK-NEXT: mov z0.s, p0/m, z1.s 1348; CHECK-NEXT: ret 1349entry: 1350 %c = icmp eq <vscale x 4 x i32> %z, zeroinitializer 1351 %i = insertelement <vscale x 4 x i32> undef, i32 %y, i32 0 1352 %ys = shufflevector <vscale x 4 x i32> %i, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 1353 %a = call <vscale x 4 x i32> @llvm.usub.sat.v4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32> %ys) 1354 %b = select <vscale x 4 x i1> %c, <vscale x 4 x i32> %a, <vscale x 4 x i32> %z 1355 ret <vscale x 4 x i32> %b 1356} 1357 1358define <vscale x 8 x i16> @usub_satqr_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, i16 %y) { 1359; CHECK-LABEL: usub_satqr_v8i16: 1360; CHECK: // %bb.0: // %entry 1361; CHECK-NEXT: mov z2.h, w0 1362; CHECK-NEXT: ptrue p0.h 1363; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 1364; CHECK-NEXT: uqsub z1.h, z1.h, z2.h 1365; CHECK-NEXT: mov z0.h, p0/m, z1.h 1366; CHECK-NEXT: ret 1367entry: 1368 %c = icmp eq <vscale x 8 x i16> %z, zeroinitializer 1369 %i = insertelement <vscale x 8 x i16> undef, i16 %y, i32 0 1370 %ys = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 1371 %a = call <vscale x 8 x i16> @llvm.usub.sat.v8i16(<vscale x 8 x i16> %x, <vscale x 8 x i16> %ys) 1372 %b = select <vscale x 8 x i1> %c, <vscale x 8 x i16> %a, <vscale x 8 x i16> %z 1373 ret <vscale x 8 x i16> %b 1374} 1375 1376define <vscale x 16 x i8> @usub_satqr_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, i8 %y) { 1377; CHECK-LABEL: usub_satqr_v16i8: 1378; CHECK: // %bb.0: // %entry 1379; CHECK-NEXT: mov z2.b, w0 1380; CHECK-NEXT: ptrue p0.b 1381; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 1382; CHECK-NEXT: uqsub z1.b, z1.b, z2.b 1383; CHECK-NEXT: mov z0.b, p0/m, z1.b 1384; CHECK-NEXT: ret 1385entry: 1386 %c = icmp eq <vscale x 16 x i8> %z, zeroinitializer 1387 %i = insertelement <vscale x 16 x i8> undef, i8 %y, i32 0 1388 %ys = shufflevector <vscale x 16 x i8> %i, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 1389 %a = call <vscale x 16 x i8> @llvm.usub.sat.v16i8(<vscale x 16 x i8> %x, <vscale x 16 x i8> %ys) 1390 %b = select <vscale x 16 x i1> %c, <vscale x 16 x i8> %a, <vscale x 16 x i8> %z 1391 ret <vscale x 16 x i8> %b 1392} 1393 1394declare <vscale x 16 x i8> @llvm.sadd.sat.v16i8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) 1395declare <vscale x 8 x i16> @llvm.sadd.sat.v8i16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) 1396declare <vscale x 4 x i32> @llvm.sadd.sat.v4i32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) 1397declare <vscale x 16 x i8> @llvm.uadd.sat.v16i8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) 1398declare <vscale x 8 x i16> @llvm.uadd.sat.v8i16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) 1399declare <vscale x 4 x i32> @llvm.uadd.sat.v4i32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) 1400declare <vscale x 16 x i8> @llvm.ssub.sat.v16i8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) 1401declare <vscale x 8 x i16> @llvm.ssub.sat.v8i16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) 1402declare <vscale x 4 x i32> @llvm.ssub.sat.v4i32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) 1403declare <vscale x 16 x i8> @llvm.usub.sat.v16i8(<vscale x 16 x i8> %src1, <vscale x 16 x i8> %src2) 1404declare <vscale x 8 x i16> @llvm.usub.sat.v8i16(<vscale x 8 x i16> %src1, <vscale x 8 x i16> %src2) 1405declare <vscale x 4 x i32> @llvm.usub.sat.v4i32(<vscale x 4 x i32> %src1, <vscale x 4 x i32> %src2) 1406