1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; Don't use SVE for 64-bit vectors. 9define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, <8 x i1> %mask) vscale_range(2,0) #0 { 10; CHECK-LABEL: select_v8i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: shl v2.8b, v2.8b, #7 13; CHECK-NEXT: cmlt v2.8b, v2.8b, #0 14; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 15; CHECK-NEXT: ret 16 %sel = select <8 x i1> %mask, <8 x i8> %op1, <8 x i8> %op2 17 ret <8 x i8> %sel 18} 19 20; Don't use SVE for 128-bit vectors. 21define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, <16 x i1> %mask) vscale_range(2,0) #0 { 22; CHECK-LABEL: select_v16i8: 23; CHECK: // %bb.0: 24; CHECK-NEXT: shl v2.16b, v2.16b, #7 25; CHECK-NEXT: cmlt v2.16b, v2.16b, #0 26; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 27; CHECK-NEXT: ret 28 %sel = select <16 x i1> %mask, <16 x i8> %op1, <16 x i8> %op2 29 ret <16 x i8> %sel 30} 31 32define void @select_v32i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 33; CHECK-LABEL: select_v32i8: 34; CHECK: // %bb.0: 35; CHECK-NEXT: ptrue p0.b, vl32 36; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 37; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 38; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 39; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b 40; CHECK-NEXT: st1b { z0.b }, p0, [x0] 41; CHECK-NEXT: ret 42 %op1 = load <32 x i8>, ptr %a 43 %op2 = load <32 x i8>, ptr %b 44 %mask = icmp eq <32 x i8> %op1, %op2 45 %sel = select <32 x i1> %mask, <32 x i8> %op1, <32 x i8> %op2 46 store <32 x i8> %sel, ptr %a 47 ret void 48} 49 50define void @select_v64i8(ptr %a, ptr %b) #0 { 51; VBITS_GE_256-LABEL: select_v64i8: 52; VBITS_GE_256: // %bb.0: 53; VBITS_GE_256-NEXT: ptrue p0.b, vl32 54; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 55; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] 56; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] 57; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] 58; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] 59; VBITS_GE_256-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 60; VBITS_GE_256-NEXT: cmpeq p2.b, p0/z, z2.b, z3.b 61; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z1.b 62; VBITS_GE_256-NEXT: sel z1.b, p2, z2.b, z3.b 63; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] 64; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] 65; VBITS_GE_256-NEXT: ret 66; 67; VBITS_GE_512-LABEL: select_v64i8: 68; VBITS_GE_512: // %bb.0: 69; VBITS_GE_512-NEXT: ptrue p0.b, vl64 70; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] 71; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] 72; VBITS_GE_512-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 73; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b 74; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] 75; VBITS_GE_512-NEXT: ret 76 %op1 = load <64 x i8>, ptr %a 77 %op2 = load <64 x i8>, ptr %b 78 %mask = icmp eq <64 x i8> %op1, %op2 79 %sel = select <64 x i1> %mask, <64 x i8> %op1, <64 x i8> %op2 80 store <64 x i8> %sel, ptr %a 81 ret void 82} 83 84define void @select_v128i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 85; CHECK-LABEL: select_v128i8: 86; CHECK: // %bb.0: 87; CHECK-NEXT: ptrue p0.b, vl128 88; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 89; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 90; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 91; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b 92; CHECK-NEXT: st1b { z0.b }, p0, [x0] 93; CHECK-NEXT: ret 94 %op1 = load <128 x i8>, ptr %a 95 %op2 = load <128 x i8>, ptr %b 96 %mask = icmp eq <128 x i8> %op1, %op2 97 %sel = select <128 x i1> %mask, <128 x i8> %op1, <128 x i8> %op2 98 store <128 x i8> %sel, ptr %a 99 ret void 100} 101 102define void @select_v256i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 103; CHECK-LABEL: select_v256i8: 104; CHECK: // %bb.0: 105; CHECK-NEXT: ptrue p0.b, vl256 106; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 107; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] 108; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b 109; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b 110; CHECK-NEXT: st1b { z0.b }, p0, [x0] 111; CHECK-NEXT: ret 112 %op1 = load <256 x i8>, ptr %a 113 %op2 = load <256 x i8>, ptr %b 114 %mask = icmp eq <256 x i8> %op1, %op2 115 %sel = select <256 x i1> %mask, <256 x i8> %op1, <256 x i8> %op2 116 store <256 x i8> %sel, ptr %a 117 ret void 118} 119 120; Don't use SVE for 64-bit vectors. 121define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, <4 x i1> %mask) vscale_range(2,0) #0 { 122; CHECK-LABEL: select_v4i16: 123; CHECK: // %bb.0: 124; CHECK-NEXT: shl v2.4h, v2.4h, #15 125; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 126; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 127; CHECK-NEXT: ret 128 %sel = select <4 x i1> %mask, <4 x i16> %op1, <4 x i16> %op2 129 ret <4 x i16> %sel 130} 131 132; Don't use SVE for 128-bit vectors. 133define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, <8 x i1> %mask) vscale_range(2,0) #0 { 134; CHECK-LABEL: select_v8i16: 135; CHECK: // %bb.0: 136; CHECK-NEXT: ushll v2.8h, v2.8b, #0 137; CHECK-NEXT: shl v2.8h, v2.8h, #15 138; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 139; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 140; CHECK-NEXT: ret 141 %sel = select <8 x i1> %mask, <8 x i16> %op1, <8 x i16> %op2 142 ret <8 x i16> %sel 143} 144 145define void @select_v16i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 146; CHECK-LABEL: select_v16i16: 147; CHECK: // %bb.0: 148; CHECK-NEXT: ptrue p0.h, vl16 149; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 150; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 151; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 152; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h 153; CHECK-NEXT: st1h { z0.h }, p0, [x0] 154; CHECK-NEXT: ret 155 %op1 = load <16 x i16>, ptr %a 156 %op2 = load <16 x i16>, ptr %b 157 %mask = icmp eq <16 x i16> %op1, %op2 158 %sel = select <16 x i1> %mask, <16 x i16> %op1, <16 x i16> %op2 159 store <16 x i16> %sel, ptr %a 160 ret void 161} 162 163define void @select_v32i16(ptr %a, ptr %b) #0 { 164; VBITS_GE_256-LABEL: select_v32i16: 165; VBITS_GE_256: // %bb.0: 166; VBITS_GE_256-NEXT: ptrue p0.h, vl16 167; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 168; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 169; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] 170; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] 171; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] 172; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 173; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z2.h, z3.h 174; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h 175; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h 176; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 177; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 178; VBITS_GE_256-NEXT: ret 179; 180; VBITS_GE_512-LABEL: select_v32i16: 181; VBITS_GE_512: // %bb.0: 182; VBITS_GE_512-NEXT: ptrue p0.h, vl32 183; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 184; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] 185; VBITS_GE_512-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 186; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h 187; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 188; VBITS_GE_512-NEXT: ret 189 %op1 = load <32 x i16>, ptr %a 190 %op2 = load <32 x i16>, ptr %b 191 %mask = icmp eq <32 x i16> %op1, %op2 192 %sel = select <32 x i1> %mask, <32 x i16> %op1, <32 x i16> %op2 193 store <32 x i16> %sel, ptr %a 194 ret void 195} 196 197define void @select_v64i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 198; CHECK-LABEL: select_v64i16: 199; CHECK: // %bb.0: 200; CHECK-NEXT: ptrue p0.h, vl64 201; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 202; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 203; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 204; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h 205; CHECK-NEXT: st1h { z0.h }, p0, [x0] 206; CHECK-NEXT: ret 207 %op1 = load <64 x i16>, ptr %a 208 %op2 = load <64 x i16>, ptr %b 209 %mask = icmp eq <64 x i16> %op1, %op2 210 %sel = select <64 x i1> %mask, <64 x i16> %op1, <64 x i16> %op2 211 store <64 x i16> %sel, ptr %a 212 ret void 213} 214 215define void @select_v128i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 216; CHECK-LABEL: select_v128i16: 217; CHECK: // %bb.0: 218; CHECK-NEXT: ptrue p0.h, vl128 219; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 220; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] 221; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h 222; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h 223; CHECK-NEXT: st1h { z0.h }, p0, [x0] 224; CHECK-NEXT: ret 225 %op1 = load <128 x i16>, ptr %a 226 %op2 = load <128 x i16>, ptr %b 227 %mask = icmp eq <128 x i16> %op1, %op2 228 %sel = select <128 x i1> %mask, <128 x i16> %op1, <128 x i16> %op2 229 store <128 x i16> %sel, ptr %a 230 ret void 231} 232 233; Don't use SVE for 64-bit vectors. 234define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, <2 x i1> %mask) vscale_range(2,0) #0 { 235; CHECK-LABEL: select_v2i32: 236; CHECK: // %bb.0: 237; CHECK-NEXT: shl v2.2s, v2.2s, #31 238; CHECK-NEXT: cmlt v2.2s, v2.2s, #0 239; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 240; CHECK-NEXT: ret 241 %sel = select <2 x i1> %mask, <2 x i32> %op1, <2 x i32> %op2 242 ret <2 x i32> %sel 243} 244 245; Don't use SVE for 128-bit vectors. 246define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, <4 x i1> %mask) vscale_range(2,0) #0 { 247; CHECK-LABEL: select_v4i32: 248; CHECK: // %bb.0: 249; CHECK-NEXT: ushll v2.4s, v2.4h, #0 250; CHECK-NEXT: shl v2.4s, v2.4s, #31 251; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 252; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 253; CHECK-NEXT: ret 254 %sel = select <4 x i1> %mask, <4 x i32> %op1, <4 x i32> %op2 255 ret <4 x i32> %sel 256} 257 258define void @select_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 259; CHECK-LABEL: select_v8i32: 260; CHECK: // %bb.0: 261; CHECK-NEXT: ptrue p0.s, vl8 262; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 263; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 264; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 265; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s 266; CHECK-NEXT: st1w { z0.s }, p0, [x0] 267; CHECK-NEXT: ret 268 %op1 = load <8 x i32>, ptr %a 269 %op2 = load <8 x i32>, ptr %b 270 %mask = icmp eq <8 x i32> %op1, %op2 271 %sel = select <8 x i1> %mask, <8 x i32> %op1, <8 x i32> %op2 272 store <8 x i32> %sel, ptr %a 273 ret void 274} 275 276define void @select_v16i32(ptr %a, ptr %b) #0 { 277; VBITS_GE_256-LABEL: select_v16i32: 278; VBITS_GE_256: // %bb.0: 279; VBITS_GE_256-NEXT: ptrue p0.s, vl8 280; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 281; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 282; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] 283; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] 284; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] 285; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 286; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z2.s, z3.s 287; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s 288; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s 289; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 290; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 291; VBITS_GE_256-NEXT: ret 292; 293; VBITS_GE_512-LABEL: select_v16i32: 294; VBITS_GE_512: // %bb.0: 295; VBITS_GE_512-NEXT: ptrue p0.s, vl16 296; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 297; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] 298; VBITS_GE_512-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 299; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s 300; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 301; VBITS_GE_512-NEXT: ret 302 %op1 = load <16 x i32>, ptr %a 303 %op2 = load <16 x i32>, ptr %b 304 %mask = icmp eq <16 x i32> %op1, %op2 305 %sel = select <16 x i1> %mask, <16 x i32> %op1, <16 x i32> %op2 306 store <16 x i32> %sel, ptr %a 307 ret void 308} 309 310define void @select_v32i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 311; CHECK-LABEL: select_v32i32: 312; CHECK: // %bb.0: 313; CHECK-NEXT: ptrue p0.s, vl32 314; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 315; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 316; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 317; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s 318; CHECK-NEXT: st1w { z0.s }, p0, [x0] 319; CHECK-NEXT: ret 320 %op1 = load <32 x i32>, ptr %a 321 %op2 = load <32 x i32>, ptr %b 322 %mask = icmp eq <32 x i32> %op1, %op2 323 %sel = select <32 x i1> %mask, <32 x i32> %op1, <32 x i32> %op2 324 store <32 x i32> %sel, ptr %a 325 ret void 326} 327 328define void @select_v64i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 329; CHECK-LABEL: select_v64i32: 330; CHECK: // %bb.0: 331; CHECK-NEXT: ptrue p0.s, vl64 332; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 333; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 334; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s 335; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s 336; CHECK-NEXT: st1w { z0.s }, p0, [x0] 337; CHECK-NEXT: ret 338 %op1 = load <64 x i32>, ptr %a 339 %op2 = load <64 x i32>, ptr %b 340 %mask = icmp eq <64 x i32> %op1, %op2 341 %sel = select <64 x i1> %mask, <64 x i32> %op1, <64 x i32> %op2 342 store <64 x i32> %sel, ptr %a 343 ret void 344} 345 346; Don't use SVE for 64-bit vectors. 347define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) vscale_range(2,0) #0 { 348; CHECK-LABEL: select_v1i64: 349; CHECK: // %bb.0: 350; CHECK-NEXT: tst w0, #0x1 351; CHECK-NEXT: csetm x8, ne 352; CHECK-NEXT: fmov d2, x8 353; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 354; CHECK-NEXT: ret 355 %sel = select <1 x i1> %mask, <1 x i64> %op1, <1 x i64> %op2 356 ret <1 x i64> %sel 357} 358 359; Don't use SVE for 128-bit vectors. 360define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, <2 x i1> %mask) vscale_range(2,0) #0 { 361; CHECK-LABEL: select_v2i64: 362; CHECK: // %bb.0: 363; CHECK-NEXT: ushll v2.2d, v2.2s, #0 364; CHECK-NEXT: shl v2.2d, v2.2d, #63 365; CHECK-NEXT: cmlt v2.2d, v2.2d, #0 366; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 367; CHECK-NEXT: ret 368 %sel = select <2 x i1> %mask, <2 x i64> %op1, <2 x i64> %op2 369 ret <2 x i64> %sel 370} 371 372define void @select_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 373; CHECK-LABEL: select_v4i64: 374; CHECK: // %bb.0: 375; CHECK-NEXT: ptrue p0.d, vl4 376; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 377; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 378; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 379; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d 380; CHECK-NEXT: st1d { z0.d }, p0, [x0] 381; CHECK-NEXT: ret 382 %op1 = load <4 x i64>, ptr %a 383 %op2 = load <4 x i64>, ptr %b 384 %mask = icmp eq <4 x i64> %op1, %op2 385 %sel = select <4 x i1> %mask, <4 x i64> %op1, <4 x i64> %op2 386 store <4 x i64> %sel, ptr %a 387 ret void 388} 389 390define void @select_v8i64(ptr %a, ptr %b) #0 { 391; VBITS_GE_256-LABEL: select_v8i64: 392; VBITS_GE_256: // %bb.0: 393; VBITS_GE_256-NEXT: ptrue p0.d, vl4 394; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 395; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] 396; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] 397; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] 398; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] 399; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 400; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z2.d, z3.d 401; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d 402; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d 403; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 404; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 405; VBITS_GE_256-NEXT: ret 406; 407; VBITS_GE_512-LABEL: select_v8i64: 408; VBITS_GE_512: // %bb.0: 409; VBITS_GE_512-NEXT: ptrue p0.d, vl8 410; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] 411; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] 412; VBITS_GE_512-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 413; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d 414; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 415; VBITS_GE_512-NEXT: ret 416 %op1 = load <8 x i64>, ptr %a 417 %op2 = load <8 x i64>, ptr %b 418 %mask = icmp eq <8 x i64> %op1, %op2 419 %sel = select <8 x i1> %mask, <8 x i64> %op1, <8 x i64> %op2 420 store <8 x i64> %sel, ptr %a 421 ret void 422} 423 424define void @select_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 425; CHECK-LABEL: select_v16i64: 426; CHECK: // %bb.0: 427; CHECK-NEXT: ptrue p0.d, vl16 428; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 429; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 430; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 431; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d 432; CHECK-NEXT: st1d { z0.d }, p0, [x0] 433; CHECK-NEXT: ret 434 %op1 = load <16 x i64>, ptr %a 435 %op2 = load <16 x i64>, ptr %b 436 %mask = icmp eq <16 x i64> %op1, %op2 437 %sel = select <16 x i1> %mask, <16 x i64> %op1, <16 x i64> %op2 438 store <16 x i64> %sel, ptr %a 439 ret void 440} 441 442define void @select_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 443; CHECK-LABEL: select_v32i64: 444; CHECK: // %bb.0: 445; CHECK-NEXT: ptrue p0.d, vl32 446; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 447; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] 448; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d 449; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d 450; CHECK-NEXT: st1d { z0.d }, p0, [x0] 451; CHECK-NEXT: ret 452 %op1 = load <32 x i64>, ptr %a 453 %op2 = load <32 x i64>, ptr %b 454 %mask = icmp eq <32 x i64> %op1, %op2 455 %sel = select <32 x i1> %mask, <32 x i64> %op1, <32 x i64> %op2 456 store <32 x i64> %sel, ptr %a 457 ret void 458} 459 460attributes #0 = { "target-features"="+sve" uwtable } 461