1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s 3 4define <vscale x 1 x i8> @select_nxv1i8(i1 %cond, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b) { 5; CHECK-LABEL: select_nxv1i8: 6; CHECK: // %bb.0: 7; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 8; CHECK-NEXT: sbfx x8, x0, #0, #1 9; CHECK-NEXT: whilelo p0.b, xzr, x8 10; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 11; CHECK-NEXT: ret 12 %res = select i1 %cond, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b 13 ret <vscale x 1 x i8> %res 14} 15 16define <vscale x 16 x i8> @select_nxv16i8(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 17; CHECK-LABEL: select_nxv16i8: 18; CHECK: // %bb.0: 19; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 20; CHECK-NEXT: sbfx x8, x0, #0, #1 21; CHECK-NEXT: whilelo p0.b, xzr, x8 22; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 23; CHECK-NEXT: ret 24 %res = select i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b 25 ret <vscale x 16 x i8> %res 26} 27 28define <vscale x 1 x i16> @select_nxv1i16(i1 %cond, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b) { 29; CHECK-LABEL: select_nxv1i16: 30; CHECK: // %bb.0: 31; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 32; CHECK-NEXT: sbfx x8, x0, #0, #1 33; CHECK-NEXT: whilelo p0.h, xzr, x8 34; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 35; CHECK-NEXT: ret 36 %res = select i1 %cond, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b 37 ret <vscale x 1 x i16> %res 38} 39 40define <vscale x 8 x i16> @select_nxv8i16(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { 41; CHECK-LABEL: select_nxv8i16: 42; CHECK: // %bb.0: 43; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 44; CHECK-NEXT: sbfx x8, x0, #0, #1 45; CHECK-NEXT: whilelo p0.h, xzr, x8 46; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 47; CHECK-NEXT: ret 48 %res = select i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b 49 ret <vscale x 8 x i16> %res 50} 51 52define <vscale x 1 x i32> @select_nxv1i32(i1 %cond, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b) { 53; CHECK-LABEL: select_nxv1i32: 54; CHECK: // %bb.0: 55; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 56; CHECK-NEXT: sbfx x8, x0, #0, #1 57; CHECK-NEXT: whilelo p0.s, xzr, x8 58; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 59; CHECK-NEXT: ret 60 %res = select i1 %cond, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b 61 ret <vscale x 1 x i32> %res 62} 63 64define <vscale x 4 x i32> @select_nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { 65; CHECK-LABEL: select_nxv4i32: 66; CHECK: // %bb.0: 67; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 68; CHECK-NEXT: sbfx x8, x0, #0, #1 69; CHECK-NEXT: whilelo p0.s, xzr, x8 70; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 71; CHECK-NEXT: ret 72 %res = select i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b 73 ret <vscale x 4 x i32> %res 74} 75 76define <vscale x 1 x i64> @select_nxv1i64(i1 %cond, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b) { 77; CHECK-LABEL: select_nxv1i64: 78; CHECK: // %bb.0: 79; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 80; CHECK-NEXT: sbfx x8, x0, #0, #1 81; CHECK-NEXT: whilelo p0.d, xzr, x8 82; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 83; CHECK-NEXT: ret 84 %res = select i1 %cond, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b 85 ret <vscale x 1 x i64> %res 86} 87 88define <vscale x 2 x i64> @select_nxv2i64(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { 89; CHECK-LABEL: select_nxv2i64: 90; CHECK: // %bb.0: 91; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 92; CHECK-NEXT: sbfx x8, x0, #0, #1 93; CHECK-NEXT: whilelo p0.d, xzr, x8 94; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 95; CHECK-NEXT: ret 96 %res = select i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b 97 ret <vscale x 2 x i64> %res 98} 99 100define <vscale x 8 x half> @select_nxv8f16(i1 %cond, <vscale x 8 x half> %a, <vscale x 8 x half> %b) { 101; CHECK-LABEL: select_nxv8f16: 102; CHECK: // %bb.0: 103; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 104; CHECK-NEXT: sbfx x8, x0, #0, #1 105; CHECK-NEXT: whilelo p0.h, xzr, x8 106; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 107; CHECK-NEXT: ret 108 %res = select i1 %cond, <vscale x 8 x half> %a, <vscale x 8 x half> %b 109 ret <vscale x 8 x half> %res 110} 111 112define <vscale x 4 x float> @select_nxv4f32(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b) { 113; CHECK-LABEL: select_nxv4f32: 114; CHECK: // %bb.0: 115; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 116; CHECK-NEXT: sbfx x8, x0, #0, #1 117; CHECK-NEXT: whilelo p0.s, xzr, x8 118; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 119; CHECK-NEXT: ret 120 %res = select i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b 121 ret <vscale x 4 x float> %res 122} 123 124define <vscale x 2 x double> @select_nxv2f64(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b) { 125; CHECK-LABEL: select_nxv2f64: 126; CHECK: // %bb.0: 127; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 128; CHECK-NEXT: sbfx x8, x0, #0, #1 129; CHECK-NEXT: whilelo p0.d, xzr, x8 130; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 131; CHECK-NEXT: ret 132 %res = select i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b 133 ret <vscale x 2 x double> %res 134} 135 136define <vscale x 16 x i1> @select_nxv16i1(i1 %cond, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b) { 137; CHECK-LABEL: select_nxv16i1: 138; CHECK: // %bb.0: 139; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 140; CHECK-NEXT: sbfx x8, x0, #0, #1 141; CHECK-NEXT: whilelo p2.b, xzr, x8 142; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 143; CHECK-NEXT: ret 144 %res = select i1 %cond, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b 145 ret <vscale x 16 x i1> %res 146} 147 148define <vscale x 8 x i1> @select_nxv8i1(i1 %cond, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b) { 149; CHECK-LABEL: select_nxv8i1: 150; CHECK: // %bb.0: 151; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 152; CHECK-NEXT: sbfx x8, x0, #0, #1 153; CHECK-NEXT: whilelo p2.h, xzr, x8 154; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 155; CHECK-NEXT: ret 156 %res = select i1 %cond, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b 157 ret <vscale x 8 x i1> %res 158} 159 160define <vscale x 4 x i1> @select_nxv4i1(i1 %cond, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b) { 161; CHECK-LABEL: select_nxv4i1: 162; CHECK: // %bb.0: 163; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 164; CHECK-NEXT: sbfx x8, x0, #0, #1 165; CHECK-NEXT: whilelo p2.s, xzr, x8 166; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 167; CHECK-NEXT: ret 168 %res = select i1 %cond, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b 169 ret <vscale x 4 x i1> %res 170} 171 172define <vscale x 2 x i1> @select_nxv2i1(i1 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b) { 173; CHECK-LABEL: select_nxv2i1: 174; CHECK: // %bb.0: 175; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 176; CHECK-NEXT: sbfx x8, x0, #0, #1 177; CHECK-NEXT: whilelo p2.d, xzr, x8 178; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 179; CHECK-NEXT: ret 180 %res = select i1 %cond, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b 181 ret <vscale x 2 x i1> %res 182} 183 184define <vscale x 1 x i1> @select_nxv1i1(i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) { 185; CHECK-LABEL: select_nxv1i1: 186; CHECK: // %bb.0: 187; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 188; CHECK-NEXT: sbfx x8, x0, #0, #1 189; CHECK-NEXT: whilelo p2.d, xzr, x8 190; CHECK-NEXT: punpklo p2.h, p2.b 191; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 192; CHECK-NEXT: ret 193 %res = select i1 %cond, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b 194 ret <vscale x 1 x i1> %res 195} 196 197; Integer vector select 198 199define <vscale x 16 x i8> @sel_nxv16i8(<vscale x 16 x i1> %p, <vscale x 16 x i8> %dst, <vscale x 16 x i8> %a) { 200; CHECK-LABEL: sel_nxv16i8: 201; CHECK: // %bb.0: 202; CHECK-NEXT: mov z0.b, p0/m, z1.b 203; CHECK-NEXT: ret 204 %sel = select <vscale x 16 x i1> %p, <vscale x 16 x i8> %a, <vscale x 16 x i8> %dst 205 ret <vscale x 16 x i8> %sel 206} 207 208define <vscale x 8 x i16> @sel_nxv8i16(<vscale x 8 x i1> %p, <vscale x 8 x i16> %dst, <vscale x 8 x i16> %a) { 209; CHECK-LABEL: sel_nxv8i16: 210; CHECK: // %bb.0: 211; CHECK-NEXT: mov z0.h, p0/m, z1.h 212; CHECK-NEXT: ret 213 %sel = select <vscale x 8 x i1> %p, <vscale x 8 x i16> %a, <vscale x 8 x i16> %dst 214 ret <vscale x 8 x i16> %sel 215} 216 217define <vscale x 4 x i32> @sel_nxv4i32(<vscale x 4 x i1> %p, <vscale x 4 x i32> %dst, <vscale x 4 x i32> %a) { 218; CHECK-LABEL: sel_nxv4i32: 219; CHECK: // %bb.0: 220; CHECK-NEXT: mov z0.s, p0/m, z1.s 221; CHECK-NEXT: ret 222 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %a, <vscale x 4 x i32> %dst 223 ret <vscale x 4 x i32> %sel 224} 225 226define <vscale x 1 x i64> @sel_nxv1i64(<vscale x 1 x i1> %p, <vscale x 1 x i64> %dst, <vscale x 1 x i64> %a) { 227; CHECK-LABEL: sel_nxv1i64: 228; CHECK: // %bb.0: 229; CHECK-NEXT: uzp1 p0.d, p0.d, p0.d 230; CHECK-NEXT: mov z0.d, p0/m, z1.d 231; CHECK-NEXT: ret 232 %sel = select <vscale x 1 x i1> %p, <vscale x 1 x i64> %a, <vscale x 1 x i64> %dst 233 ret <vscale x 1 x i64> %sel 234} 235 236define <vscale x 2 x i64> @sel_nxv2i64(<vscale x 2 x i1> %p, <vscale x 2 x i64> %dst, <vscale x 2 x i64> %a) { 237; CHECK-LABEL: sel_nxv2i64: 238; CHECK: // %bb.0: 239; CHECK-NEXT: mov z0.d, p0/m, z1.d 240; CHECK-NEXT: ret 241 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %a, <vscale x 2 x i64> %dst 242 ret <vscale x 2 x i64> %sel 243} 244 245; Floating point vector select 246 247define <vscale x 8 x half> @sel_nxv8f16(<vscale x 8 x i1> %p, <vscale x 8 x half> %dst, <vscale x 8 x half> %a) { 248; CHECK-LABEL: sel_nxv8f16: 249; CHECK: // %bb.0: 250; CHECK-NEXT: mov z0.h, p0/m, z1.h 251; CHECK-NEXT: ret 252 %sel = select <vscale x 8 x i1> %p, <vscale x 8 x half> %a, <vscale x 8 x half> %dst 253 ret <vscale x 8 x half> %sel 254} 255 256define <vscale x 4 x float> @sel_nxv4f32(<vscale x 4 x i1> %p, <vscale x 4 x float> %dst, <vscale x 4 x float> %a) { 257; CHECK-LABEL: sel_nxv4f32: 258; CHECK: // %bb.0: 259; CHECK-NEXT: mov z0.s, p0/m, z1.s 260; CHECK-NEXT: ret 261 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %dst 262 ret <vscale x 4 x float> %sel 263} 264 265define <vscale x 2 x float> @sel_nxv2f32(<vscale x 2 x i1> %p, <vscale x 2 x float> %dst, <vscale x 2 x float> %a) { 266; CHECK-LABEL: sel_nxv2f32: 267; CHECK: // %bb.0: 268; CHECK-NEXT: mov z0.d, p0/m, z1.d 269; CHECK-NEXT: ret 270 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x float> %a, <vscale x 2 x float> %dst 271 ret <vscale x 2 x float> %sel 272} 273 274define <vscale x 2 x double> @sel_nxv8f64(<vscale x 2 x i1> %p, <vscale x 2 x double> %dst, <vscale x 2 x double> %a) { 275; CHECK-LABEL: sel_nxv8f64: 276; CHECK: // %bb.0: 277; CHECK-NEXT: mov z0.d, p0/m, z1.d 278; CHECK-NEXT: ret 279 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x double> %a, <vscale x 2 x double> %dst 280 ret <vscale x 2 x double> %sel 281} 282 283; Check icmp+select 284 285define <vscale x 2 x half> @icmp_select_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, i64 %x0) { 286; CHECK-LABEL: icmp_select_nxv2f16: 287; CHECK: // %bb.0: 288; CHECK-NEXT: cmp x0, #0 289; CHECK-NEXT: cset w8, eq 290; CHECK-NEXT: sbfx x8, x8, #0, #1 291; CHECK-NEXT: whilelo p0.d, xzr, x8 292; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 293; CHECK-NEXT: ret 294 %mask = icmp eq i64 %x0, 0 295 %sel = select i1 %mask, <vscale x 2 x half> %a, <vscale x 2 x half> %b 296 ret <vscale x 2 x half> %sel 297} 298 299define <vscale x 2 x float> @icmp_select_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, i64 %x0) { 300; CHECK-LABEL: icmp_select_nxv2f32: 301; CHECK: // %bb.0: 302; CHECK-NEXT: cmp x0, #0 303; CHECK-NEXT: cset w8, eq 304; CHECK-NEXT: sbfx x8, x8, #0, #1 305; CHECK-NEXT: whilelo p0.d, xzr, x8 306; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 307; CHECK-NEXT: ret 308 %mask = icmp eq i64 %x0, 0 309 %sel = select i1 %mask, <vscale x 2 x float> %a, <vscale x 2 x float> %b 310 ret <vscale x 2 x float> %sel 311} 312 313define <vscale x 2 x double> @icmp_select_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, i64 %x0) { 314; CHECK-LABEL: icmp_select_nxv2f64: 315; CHECK: // %bb.0: 316; CHECK-NEXT: cmp x0, #0 317; CHECK-NEXT: cset w8, eq 318; CHECK-NEXT: sbfx x8, x8, #0, #1 319; CHECK-NEXT: whilelo p0.d, xzr, x8 320; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 321; CHECK-NEXT: ret 322 %mask = icmp eq i64 %x0, 0 323 %sel = select i1 %mask, <vscale x 2 x double> %a, <vscale x 2 x double> %b 324 ret <vscale x 2 x double> %sel 325} 326 327define <vscale x 4 x half> @icmp_select_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, i64 %x0) { 328; CHECK-LABEL: icmp_select_nxv4f16: 329; CHECK: // %bb.0: 330; CHECK-NEXT: cmp x0, #0 331; CHECK-NEXT: cset w8, eq 332; CHECK-NEXT: sbfx x8, x8, #0, #1 333; CHECK-NEXT: whilelo p0.s, xzr, x8 334; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 335; CHECK-NEXT: ret 336 %mask = icmp eq i64 %x0, 0 337 %sel = select i1 %mask, <vscale x 4 x half> %a, <vscale x 4 x half> %b 338 ret <vscale x 4 x half> %sel 339} 340 341define <vscale x 4 x float> @icmp_select_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, i64 %x0) { 342; CHECK-LABEL: icmp_select_nxv4f32: 343; CHECK: // %bb.0: 344; CHECK-NEXT: cmp x0, #0 345; CHECK-NEXT: cset w8, eq 346; CHECK-NEXT: sbfx x8, x8, #0, #1 347; CHECK-NEXT: whilelo p0.s, xzr, x8 348; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 349; CHECK-NEXT: ret 350 %mask = icmp eq i64 %x0, 0 351 %sel = select i1 %mask, <vscale x 4 x float> %a, <vscale x 4 x float> %b 352 ret <vscale x 4 x float> %sel 353} 354 355define <vscale x 8 x half> @icmp_select_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, i64 %x0) { 356; CHECK-LABEL: icmp_select_nxv8f16: 357; CHECK: // %bb.0: 358; CHECK-NEXT: cmp x0, #0 359; CHECK-NEXT: cset w8, eq 360; CHECK-NEXT: sbfx x8, x8, #0, #1 361; CHECK-NEXT: whilelo p0.h, xzr, x8 362; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 363; CHECK-NEXT: ret 364 %mask = icmp eq i64 %x0, 0 365 %sel = select i1 %mask, <vscale x 8 x half> %a, <vscale x 8 x half> %b 366 ret <vscale x 8 x half> %sel 367} 368 369define <vscale x 1 x i64> @icmp_select_nxv1i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %b, i64 %x0) { 370; CHECK-LABEL: icmp_select_nxv1i64: 371; CHECK: // %bb.0: 372; CHECK-NEXT: cmp x0, #0 373; CHECK-NEXT: cset w8, eq 374; CHECK-NEXT: sbfx x8, x8, #0, #1 375; CHECK-NEXT: whilelo p0.d, xzr, x8 376; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 377; CHECK-NEXT: ret 378 %mask = icmp eq i64 %x0, 0 379 %sel = select i1 %mask, <vscale x 1 x i64> %a, <vscale x 1 x i64> %b 380 ret <vscale x 1 x i64> %sel 381} 382 383define <vscale x 2 x i64> @icmp_select_nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, i64 %x0) { 384; CHECK-LABEL: icmp_select_nxv2i64: 385; CHECK: // %bb.0: 386; CHECK-NEXT: cmp x0, #0 387; CHECK-NEXT: cset w8, eq 388; CHECK-NEXT: sbfx x8, x8, #0, #1 389; CHECK-NEXT: whilelo p0.d, xzr, x8 390; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 391; CHECK-NEXT: ret 392 %mask = icmp eq i64 %x0, 0 393 %sel = select i1 %mask, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b 394 ret <vscale x 2 x i64> %sel 395} 396 397define <vscale x 1 x i32> @icmp_select_nxv1i32(<vscale x 1 x i32> %a, <vscale x 1 x i32> %b, i64 %x0) { 398; CHECK-LABEL: icmp_select_nxv1i32: 399; CHECK: // %bb.0: 400; CHECK-NEXT: cmp x0, #0 401; CHECK-NEXT: cset w8, eq 402; CHECK-NEXT: sbfx x8, x8, #0, #1 403; CHECK-NEXT: whilelo p0.s, xzr, x8 404; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 405; CHECK-NEXT: ret 406 %mask = icmp eq i64 %x0, 0 407 %sel = select i1 %mask, <vscale x 1 x i32> %a, <vscale x 1 x i32> %b 408 ret <vscale x 1 x i32> %sel 409} 410 411define <vscale x 4 x i32> @icmp_select_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 %x0) { 412; CHECK-LABEL: icmp_select_nxv4i32: 413; CHECK: // %bb.0: 414; CHECK-NEXT: cmp x0, #0 415; CHECK-NEXT: cset w8, eq 416; CHECK-NEXT: sbfx x8, x8, #0, #1 417; CHECK-NEXT: whilelo p0.s, xzr, x8 418; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 419; CHECK-NEXT: ret 420 %mask = icmp eq i64 %x0, 0 421 %sel = select i1 %mask, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b 422 ret <vscale x 4 x i32> %sel 423} 424 425define <vscale x 1 x i16> @icmp_select_nxv1i16(<vscale x 1 x i16> %a, <vscale x 1 x i16> %b, i64 %x0) { 426; CHECK-LABEL: icmp_select_nxv1i16: 427; CHECK: // %bb.0: 428; CHECK-NEXT: cmp x0, #0 429; CHECK-NEXT: cset w8, eq 430; CHECK-NEXT: sbfx x8, x8, #0, #1 431; CHECK-NEXT: whilelo p0.h, xzr, x8 432; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 433; CHECK-NEXT: ret 434 %mask = icmp eq i64 %x0, 0 435 %sel = select i1 %mask, <vscale x 1 x i16> %a, <vscale x 1 x i16> %b 436 ret <vscale x 1 x i16> %sel 437} 438 439define <vscale x 8 x i16> @icmp_select_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, i64 %x0) { 440; CHECK-LABEL: icmp_select_nxv8i16: 441; CHECK: // %bb.0: 442; CHECK-NEXT: cmp x0, #0 443; CHECK-NEXT: cset w8, eq 444; CHECK-NEXT: sbfx x8, x8, #0, #1 445; CHECK-NEXT: whilelo p0.h, xzr, x8 446; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 447; CHECK-NEXT: ret 448 %mask = icmp eq i64 %x0, 0 449 %sel = select i1 %mask, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b 450 ret <vscale x 8 x i16> %sel 451} 452 453define <vscale x 1 x i8> @icmp_select_nxv1i8(<vscale x 1 x i8> %a, <vscale x 1 x i8> %b, i64 %x0) { 454; CHECK-LABEL: icmp_select_nxv1i8: 455; CHECK: // %bb.0: 456; CHECK-NEXT: cmp x0, #0 457; CHECK-NEXT: cset w8, eq 458; CHECK-NEXT: sbfx x8, x8, #0, #1 459; CHECK-NEXT: whilelo p0.b, xzr, x8 460; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 461; CHECK-NEXT: ret 462 %mask = icmp eq i64 %x0, 0 463 %sel = select i1 %mask, <vscale x 1 x i8> %a, <vscale x 1 x i8> %b 464 ret <vscale x 1 x i8> %sel 465} 466 467define <vscale x 16 x i8> @icmp_select_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, i64 %x0) { 468; CHECK-LABEL: icmp_select_nxv16i8: 469; CHECK: // %bb.0: 470; CHECK-NEXT: cmp x0, #0 471; CHECK-NEXT: cset w8, eq 472; CHECK-NEXT: sbfx x8, x8, #0, #1 473; CHECK-NEXT: whilelo p0.b, xzr, x8 474; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 475; CHECK-NEXT: ret 476 %mask = icmp eq i64 %x0, 0 477 %sel = select i1 %mask, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b 478 ret <vscale x 16 x i8> %sel 479} 480 481define <vscale x 1 x i1> @icmp_select_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, i64 %x0) { 482; CHECK-LABEL: icmp_select_nxv1i1: 483; CHECK: // %bb.0: 484; CHECK-NEXT: cmp x0, #0 485; CHECK-NEXT: cset w8, eq 486; CHECK-NEXT: sbfx x8, x8, #0, #1 487; CHECK-NEXT: whilelo p2.d, xzr, x8 488; CHECK-NEXT: punpklo p2.h, p2.b 489; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 490; CHECK-NEXT: ret 491 %mask = icmp eq i64 %x0, 0 492 %sel = select i1 %mask, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b 493 ret <vscale x 1 x i1> %sel 494} 495 496define <vscale x 2 x i1> @icmp_select_nxv2i1(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b, i64 %x0) { 497; CHECK-LABEL: icmp_select_nxv2i1: 498; CHECK: // %bb.0: 499; CHECK-NEXT: cmp x0, #0 500; CHECK-NEXT: cset w8, eq 501; CHECK-NEXT: sbfx x8, x8, #0, #1 502; CHECK-NEXT: whilelo p2.d, xzr, x8 503; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 504; CHECK-NEXT: ret 505 %mask = icmp eq i64 %x0, 0 506 %sel = select i1 %mask, <vscale x 2 x i1> %a, <vscale x 2 x i1> %b 507 ret <vscale x 2 x i1> %sel 508} 509define <vscale x 4 x i1> @icmp_select_nxv4i1(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b, i64 %x0) { 510; CHECK-LABEL: icmp_select_nxv4i1: 511; CHECK: // %bb.0: 512; CHECK-NEXT: cmp x0, #0 513; CHECK-NEXT: cset w8, eq 514; CHECK-NEXT: sbfx x8, x8, #0, #1 515; CHECK-NEXT: whilelo p2.s, xzr, x8 516; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 517; CHECK-NEXT: ret 518 %mask = icmp eq i64 %x0, 0 519 %sel = select i1 %mask, <vscale x 4 x i1> %a, <vscale x 4 x i1> %b 520 ret <vscale x 4 x i1> %sel 521} 522define <vscale x 8 x i1> @icmp_select_nxv8i1(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b, i64 %x0) { 523; CHECK-LABEL: icmp_select_nxv8i1: 524; CHECK: // %bb.0: 525; CHECK-NEXT: cmp x0, #0 526; CHECK-NEXT: cset w8, eq 527; CHECK-NEXT: sbfx x8, x8, #0, #1 528; CHECK-NEXT: whilelo p2.h, xzr, x8 529; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 530; CHECK-NEXT: ret 531 %mask = icmp eq i64 %x0, 0 532 %sel = select i1 %mask, <vscale x 8 x i1> %a, <vscale x 8 x i1> %b 533 ret <vscale x 8 x i1> %sel 534} 535define <vscale x 16 x i1> @icmp_select_nxv16i1(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b, i64 %x0) { 536; CHECK-LABEL: icmp_select_nxv16i1: 537; CHECK: // %bb.0: 538; CHECK-NEXT: cmp x0, #0 539; CHECK-NEXT: cset w8, eq 540; CHECK-NEXT: sbfx x8, x8, #0, #1 541; CHECK-NEXT: whilelo p2.b, xzr, x8 542; CHECK-NEXT: sel p0.b, p2, p0.b, p1.b 543; CHECK-NEXT: ret 544 %mask = icmp eq i64 %x0, 0 545 %sel = select i1 %mask, <vscale x 16 x i1> %a, <vscale x 16 x i1> %b 546 ret <vscale x 16 x i1> %sel 547} 548 549define <vscale x 4 x float> @select_f32_invert_fmul(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { 550; CHECK-LABEL: select_f32_invert_fmul: 551; CHECK: // %bb.0: 552; CHECK-NEXT: ptrue p0.s 553; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 554; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s 555; CHECK-NEXT: ret 556 %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer 557 %fmul = fmul <vscale x 4 x float> %a, %b 558 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fmul 559 ret <vscale x 4 x float> %sel 560} 561 562define <vscale x 4 x float> @select_f32_invert_fadd(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 563; CHECK-LABEL: select_f32_invert_fadd: 564; CHECK: // %bb.0: 565; CHECK-NEXT: ptrue p0.s 566; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 567; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s 568; CHECK-NEXT: ret 569 %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer 570 %fadd = fadd <vscale x 4 x float> %a, %b 571 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fadd 572 ret <vscale x 4 x float> %sel 573} 574 575define <vscale x 4 x float> @select_f32_invert_fsub(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x i32> %c) { 576; CHECK-LABEL: select_f32_invert_fsub: 577; CHECK: // %bb.0: 578; CHECK-NEXT: ptrue p0.s 579; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0 580; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s 581; CHECK-NEXT: ret 582 %p = icmp eq <vscale x 4 x i32> %c, zeroinitializer 583 %fsub = fsub <vscale x 4 x float> %a, %b 584 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fsub 585 ret <vscale x 4 x float> %sel 586} 587 588define <vscale x 4 x float> @select_f32_no_invert_op_lhs(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 589; CHECK-LABEL: select_f32_no_invert_op_lhs: 590; CHECK: // %bb.0: 591; CHECK-NEXT: ptrue p0.s 592; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 593; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s 594; CHECK-NEXT: ret 595 %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer 596 %fmul = fmul <vscale x 4 x float> %a, %b 597 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %fmul, <vscale x 4 x float> %a 598 ret <vscale x 4 x float> %sel 599} 600 601define <vscale x 4 x float> @select_f32_no_invert_2_op(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, <vscale x 4 x float> %d) { 602; CHECK-LABEL: select_f32_no_invert_2_op: 603; CHECK: // %bb.0: 604; CHECK-NEXT: ptrue p0.s 605; CHECK-NEXT: fmul z1.s, z0.s, z1.s 606; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 607; CHECK-NEXT: fmul z0.s, z2.s, z3.s 608; CHECK-NEXT: mov z0.s, p0/m, z1.s 609; CHECK-NEXT: ret 610 %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer 611 %fmul1 = fmul <vscale x 4 x float> %a, %b 612 %fmul2 = fmul <vscale x 4 x float> %c, %d 613 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %fmul1, <vscale x 4 x float> %fmul2 614 ret <vscale x 4 x float> %sel 615} 616 617define <vscale x 4 x float> @select_f32_no_invert_equal_ops(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { 618; CHECK-LABEL: select_f32_no_invert_equal_ops: 619; CHECK: // %bb.0: 620; CHECK-NEXT: fmul z0.s, z0.s, z1.s 621; CHECK-NEXT: ret 622 %m = fmul <vscale x 4 x float> %a, %b 623 %p = fcmp oeq <vscale x 4 x float> %m, zeroinitializer 624 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %m, <vscale x 4 x float> %m 625 ret <vscale x 4 x float> %sel 626} 627 628define <vscale x 4 x float> @select_f32_no_invert_fmul_two_setcc_uses(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c, i32 %len) #0 { 629; CHECK-LABEL: select_f32_no_invert_fmul_two_setcc_uses: 630; CHECK: // %bb.0: 631; CHECK-NEXT: ptrue p0.s 632; CHECK-NEXT: fadd z1.s, z0.s, z1.s 633; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 634; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 635; CHECK-NEXT: mov z0.s, p0/m, z2.s 636; CHECK-NEXT: ret 637 %p = fcmp oeq <vscale x 4 x float> %a, zeroinitializer 638 %fadd = fadd <vscale x 4 x float> %a, %b 639 %sel = select <vscale x 4 x i1> %p, <vscale x 4 x float> %a, <vscale x 4 x float> %fadd 640 %sel2 = select <vscale x 4 x i1> %p, <vscale x 4 x float> %c, <vscale x 4 x float> %sel 641 ret <vscale x 4 x float> %sel2 642} 643 644define <4 x float> @select_f32_no_invert_not_scalable(<4 x float> %a, <4 x float> %b) #0 { 645; CHECK-LABEL: select_f32_no_invert_not_scalable: 646; CHECK: // %bb.0: 647; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 648; CHECK-NEXT: fmul v1.4s, v0.4s, v1.4s 649; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 650; CHECK-NEXT: ret 651 %p = fcmp oeq <4 x float> %a, zeroinitializer 652 %fmul = fmul <4 x float> %a, %b 653 %sel = select <4 x i1> %p, <4 x float> %a, <4 x float> %fmul 654 ret <4 x float> %sel 655} 656