1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; Don't use SVE for 64-bit vectors. 9define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) vscale_range(2,0) #0 { 10; CHECK-LABEL: select_v8i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: tst w0, #0x1 13; CHECK-NEXT: csetm w8, ne 14; CHECK-NEXT: dup v2.8b, w8 15; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 16; CHECK-NEXT: ret 17 %sel = select i1 %mask, <8 x i8> %op1, <8 x i8> %op2 18 ret <8 x i8> %sel 19} 20 21; Don't use SVE for 128-bit vectors. 22define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) vscale_range(2,0) #0 { 23; CHECK-LABEL: select_v16i8: 24; CHECK: // %bb.0: 25; CHECK-NEXT: tst w0, #0x1 26; CHECK-NEXT: csetm w8, ne 27; CHECK-NEXT: dup v2.16b, w8 28; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 29; CHECK-NEXT: ret 30 %sel = select i1 %mask, <16 x i8> %op1, <16 x i8> %op2 31 ret <16 x i8> %sel 32} 33 34define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { 35; CHECK-LABEL: select_v32i8: 36; CHECK: // %bb.0: 37; CHECK-NEXT: mov z0.b, w2 38; CHECK-NEXT: ptrue p0.b 39; CHECK-NEXT: ptrue p1.b, vl32 40; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 41; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0] 42; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1] 43; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 44; CHECK-NEXT: st1b { z0.b }, p1, [x0] 45; CHECK-NEXT: ret 46 %op1 = load volatile <32 x i8>, ptr %a 47 %op2 = load volatile <32 x i8>, ptr %b 48 %sel = select i1 %mask, <32 x i8> %op1, <32 x i8> %op2 49 store <32 x i8> %sel, ptr %a 50 ret void 51} 52 53define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 { 54; VBITS_GE_256-LABEL: select_v64i8: 55; VBITS_GE_256: // %bb.0: 56; VBITS_GE_256-NEXT: mov z0.b, w2 57; VBITS_GE_256-NEXT: ptrue p0.b 58; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 59; VBITS_GE_256-NEXT: ptrue p1.b, vl32 60; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0 61; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0, x8] 62; VBITS_GE_256-NEXT: ld1b { z1.b }, p1/z, [x0] 63; VBITS_GE_256-NEXT: ld1b { z2.b }, p1/z, [x1, x8] 64; VBITS_GE_256-NEXT: ld1b { z3.b }, p1/z, [x1] 65; VBITS_GE_256-NEXT: sel z0.b, p0, z0.b, z2.b 66; VBITS_GE_256-NEXT: sel z1.b, p0, z1.b, z3.b 67; VBITS_GE_256-NEXT: st1b { z0.b }, p1, [x0, x8] 68; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x0] 69; VBITS_GE_256-NEXT: ret 70; 71; VBITS_GE_512-LABEL: select_v64i8: 72; VBITS_GE_512: // %bb.0: 73; VBITS_GE_512-NEXT: mov z0.b, w2 74; VBITS_GE_512-NEXT: ptrue p0.b 75; VBITS_GE_512-NEXT: ptrue p1.b, vl64 76; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z0.b, #0 77; VBITS_GE_512-NEXT: ld1b { z0.b }, p1/z, [x0] 78; VBITS_GE_512-NEXT: ld1b { z1.b }, p1/z, [x1] 79; VBITS_GE_512-NEXT: sel z0.b, p0, z0.b, z1.b 80; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x0] 81; VBITS_GE_512-NEXT: ret 82 %op1 = load volatile <64 x i8>, ptr %a 83 %op2 = load volatile <64 x i8>, ptr %b 84 %sel = select i1 %mask, <64 x i8> %op1, <64 x i8> %op2 85 store <64 x i8> %sel, ptr %a 86 ret void 87} 88 89define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { 90; CHECK-LABEL: select_v128i8: 91; CHECK: // %bb.0: 92; CHECK-NEXT: mov z0.b, w2 93; CHECK-NEXT: ptrue p0.b 94; CHECK-NEXT: ptrue p1.b, vl128 95; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 96; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0] 97; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1] 98; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 99; CHECK-NEXT: st1b { z0.b }, p1, [x0] 100; CHECK-NEXT: ret 101 %op1 = load volatile <128 x i8>, ptr %a 102 %op2 = load volatile <128 x i8>, ptr %b 103 %sel = select i1 %mask, <128 x i8> %op1, <128 x i8> %op2 104 store <128 x i8> %sel, ptr %a 105 ret void 106} 107 108define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { 109; CHECK-LABEL: select_v256i8: 110; CHECK: // %bb.0: 111; CHECK-NEXT: mov z0.b, w2 112; CHECK-NEXT: ptrue p0.b 113; CHECK-NEXT: ptrue p1.b, vl256 114; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 115; CHECK-NEXT: ld1b { z0.b }, p1/z, [x0] 116; CHECK-NEXT: ld1b { z1.b }, p1/z, [x1] 117; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b 118; CHECK-NEXT: st1b { z0.b }, p1, [x0] 119; CHECK-NEXT: ret 120 %op1 = load volatile <256 x i8>, ptr %a 121 %op2 = load volatile <256 x i8>, ptr %b 122 %sel = select i1 %mask, <256 x i8> %op1, <256 x i8> %op2 123 store <256 x i8> %sel, ptr %a 124 ret void 125} 126 127; Don't use SVE for 64-bit vectors. 128define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) vscale_range(2,0) #0 { 129; CHECK-LABEL: select_v4i16: 130; CHECK: // %bb.0: 131; CHECK-NEXT: tst w0, #0x1 132; CHECK-NEXT: csetm w8, ne 133; CHECK-NEXT: dup v2.4h, w8 134; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 135; CHECK-NEXT: ret 136 %sel = select i1 %mask, <4 x i16> %op1, <4 x i16> %op2 137 ret <4 x i16> %sel 138} 139 140; Don't use SVE for 128-bit vectors. 141define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) vscale_range(2,0) #0 { 142; CHECK-LABEL: select_v8i16: 143; CHECK: // %bb.0: 144; CHECK-NEXT: tst w0, #0x1 145; CHECK-NEXT: csetm w8, ne 146; CHECK-NEXT: dup v2.8h, w8 147; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 148; CHECK-NEXT: ret 149 %sel = select i1 %mask, <8 x i16> %op1, <8 x i16> %op2 150 ret <8 x i16> %sel 151} 152 153define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { 154; CHECK-LABEL: select_v16i16: 155; CHECK: // %bb.0: 156; CHECK-NEXT: mov z0.h, w2 157; CHECK-NEXT: ptrue p0.h 158; CHECK-NEXT: ptrue p1.h, vl16 159; CHECK-NEXT: and z0.h, z0.h, #0x1 160; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 161; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] 162; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1] 163; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 164; CHECK-NEXT: st1h { z0.h }, p1, [x0] 165; CHECK-NEXT: ret 166 %op1 = load volatile <16 x i16>, ptr %a 167 %op2 = load volatile <16 x i16>, ptr %b 168 %sel = select i1 %mask, <16 x i16> %op1, <16 x i16> %op2 169 store <16 x i16> %sel, ptr %a 170 ret void 171} 172 173define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 { 174; VBITS_GE_256-LABEL: select_v32i16: 175; VBITS_GE_256: // %bb.0: 176; VBITS_GE_256-NEXT: mov z0.h, w2 177; VBITS_GE_256-NEXT: ptrue p0.h 178; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 179; VBITS_GE_256-NEXT: ptrue p1.h, vl16 180; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x1 181; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0 182; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0, x8, lsl #1] 183; VBITS_GE_256-NEXT: ld1h { z1.h }, p1/z, [x0] 184; VBITS_GE_256-NEXT: ld1h { z2.h }, p1/z, [x1, x8, lsl #1] 185; VBITS_GE_256-NEXT: ld1h { z3.h }, p1/z, [x1] 186; VBITS_GE_256-NEXT: sel z0.h, p0, z0.h, z2.h 187; VBITS_GE_256-NEXT: sel z1.h, p0, z1.h, z3.h 188; VBITS_GE_256-NEXT: st1h { z0.h }, p1, [x0, x8, lsl #1] 189; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x0] 190; VBITS_GE_256-NEXT: ret 191; 192; VBITS_GE_512-LABEL: select_v32i16: 193; VBITS_GE_512: // %bb.0: 194; VBITS_GE_512-NEXT: mov z0.h, w2 195; VBITS_GE_512-NEXT: ptrue p0.h 196; VBITS_GE_512-NEXT: ptrue p1.h, vl32 197; VBITS_GE_512-NEXT: and z0.h, z0.h, #0x1 198; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z0.h, #0 199; VBITS_GE_512-NEXT: ld1h { z0.h }, p1/z, [x0] 200; VBITS_GE_512-NEXT: ld1h { z1.h }, p1/z, [x1] 201; VBITS_GE_512-NEXT: sel z0.h, p0, z0.h, z1.h 202; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x0] 203; VBITS_GE_512-NEXT: ret 204 %op1 = load volatile <32 x i16>, ptr %a 205 %op2 = load volatile <32 x i16>, ptr %b 206 %sel = select i1 %mask, <32 x i16> %op1, <32 x i16> %op2 207 store <32 x i16> %sel, ptr %a 208 ret void 209} 210 211define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { 212; CHECK-LABEL: select_v64i16: 213; CHECK: // %bb.0: 214; CHECK-NEXT: mov z0.h, w2 215; CHECK-NEXT: ptrue p0.h 216; CHECK-NEXT: ptrue p1.h, vl64 217; CHECK-NEXT: and z0.h, z0.h, #0x1 218; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 219; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] 220; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1] 221; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 222; CHECK-NEXT: st1h { z0.h }, p1, [x0] 223; CHECK-NEXT: ret 224 %op1 = load volatile <64 x i16>, ptr %a 225 %op2 = load volatile <64 x i16>, ptr %b 226 %sel = select i1 %mask, <64 x i16> %op1, <64 x i16> %op2 227 store <64 x i16> %sel, ptr %a 228 ret void 229} 230 231define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { 232; CHECK-LABEL: select_v128i16: 233; CHECK: // %bb.0: 234; CHECK-NEXT: mov z0.h, w2 235; CHECK-NEXT: ptrue p0.h 236; CHECK-NEXT: ptrue p1.h, vl128 237; CHECK-NEXT: and z0.h, z0.h, #0x1 238; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 239; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] 240; CHECK-NEXT: ld1h { z1.h }, p1/z, [x1] 241; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h 242; CHECK-NEXT: st1h { z0.h }, p1, [x0] 243; CHECK-NEXT: ret 244 %op1 = load volatile <128 x i16>, ptr %a 245 %op2 = load volatile <128 x i16>, ptr %b 246 %sel = select i1 %mask, <128 x i16> %op1, <128 x i16> %op2 247 store <128 x i16> %sel, ptr %a 248 ret void 249} 250 251; Don't use SVE for 64-bit vectors. 252define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) vscale_range(2,0) #0 { 253; CHECK-LABEL: select_v2i32: 254; CHECK: // %bb.0: 255; CHECK-NEXT: tst w0, #0x1 256; CHECK-NEXT: csetm w8, ne 257; CHECK-NEXT: dup v2.2s, w8 258; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 259; CHECK-NEXT: ret 260 %sel = select i1 %mask, <2 x i32> %op1, <2 x i32> %op2 261 ret <2 x i32> %sel 262} 263 264; Don't use SVE for 128-bit vectors. 265define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) vscale_range(2,0) #0 { 266; CHECK-LABEL: select_v4i32: 267; CHECK: // %bb.0: 268; CHECK-NEXT: tst w0, #0x1 269; CHECK-NEXT: csetm w8, ne 270; CHECK-NEXT: dup v2.4s, w8 271; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 272; CHECK-NEXT: ret 273 %sel = select i1 %mask, <4 x i32> %op1, <4 x i32> %op2 274 ret <4 x i32> %sel 275} 276 277define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { 278; CHECK-LABEL: select_v8i32: 279; CHECK: // %bb.0: 280; CHECK-NEXT: and w8, w2, #0x1 281; CHECK-NEXT: ptrue p0.s 282; CHECK-NEXT: mov z0.s, w8 283; CHECK-NEXT: ptrue p1.s, vl8 284; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 285; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] 286; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] 287; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 288; CHECK-NEXT: st1w { z0.s }, p1, [x0] 289; CHECK-NEXT: ret 290 %op1 = load volatile <8 x i32>, ptr %a 291 %op2 = load volatile <8 x i32>, ptr %b 292 %sel = select i1 %mask, <8 x i32> %op1, <8 x i32> %op2 293 store <8 x i32> %sel, ptr %a 294 ret void 295} 296 297define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 { 298; VBITS_GE_256-LABEL: select_v16i32: 299; VBITS_GE_256: // %bb.0: 300; VBITS_GE_256-NEXT: and w8, w2, #0x1 301; VBITS_GE_256-NEXT: ptrue p0.s 302; VBITS_GE_256-NEXT: mov z0.s, w8 303; VBITS_GE_256-NEXT: ptrue p1.s, vl8 304; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 305; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0 306; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0, x8, lsl #2] 307; VBITS_GE_256-NEXT: ld1w { z1.s }, p1/z, [x0] 308; VBITS_GE_256-NEXT: ld1w { z2.s }, p1/z, [x1, x8, lsl #2] 309; VBITS_GE_256-NEXT: ld1w { z3.s }, p1/z, [x1] 310; VBITS_GE_256-NEXT: sel z0.s, p0, z0.s, z2.s 311; VBITS_GE_256-NEXT: sel z1.s, p0, z1.s, z3.s 312; VBITS_GE_256-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2] 313; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0] 314; VBITS_GE_256-NEXT: ret 315; 316; VBITS_GE_512-LABEL: select_v16i32: 317; VBITS_GE_512: // %bb.0: 318; VBITS_GE_512-NEXT: and w8, w2, #0x1 319; VBITS_GE_512-NEXT: ptrue p0.s 320; VBITS_GE_512-NEXT: mov z0.s, w8 321; VBITS_GE_512-NEXT: ptrue p1.s, vl16 322; VBITS_GE_512-NEXT: cmpne p0.s, p0/z, z0.s, #0 323; VBITS_GE_512-NEXT: ld1w { z0.s }, p1/z, [x0] 324; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x1] 325; VBITS_GE_512-NEXT: sel z0.s, p0, z0.s, z1.s 326; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x0] 327; VBITS_GE_512-NEXT: ret 328 %op1 = load volatile <16 x i32>, ptr %a 329 %op2 = load volatile <16 x i32>, ptr %b 330 %sel = select i1 %mask, <16 x i32> %op1, <16 x i32> %op2 331 store <16 x i32> %sel, ptr %a 332 ret void 333} 334 335define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { 336; CHECK-LABEL: select_v32i32: 337; CHECK: // %bb.0: 338; CHECK-NEXT: and w8, w2, #0x1 339; CHECK-NEXT: ptrue p0.s 340; CHECK-NEXT: mov z0.s, w8 341; CHECK-NEXT: ptrue p1.s, vl32 342; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 343; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] 344; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] 345; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 346; CHECK-NEXT: st1w { z0.s }, p1, [x0] 347; CHECK-NEXT: ret 348 %op1 = load volatile <32 x i32>, ptr %a 349 %op2 = load volatile <32 x i32>, ptr %b 350 %sel = select i1 %mask, <32 x i32> %op1, <32 x i32> %op2 351 store <32 x i32> %sel, ptr %a 352 ret void 353} 354 355define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { 356; CHECK-LABEL: select_v64i32: 357; CHECK: // %bb.0: 358; CHECK-NEXT: and w8, w2, #0x1 359; CHECK-NEXT: ptrue p0.s 360; CHECK-NEXT: mov z0.s, w8 361; CHECK-NEXT: ptrue p1.s, vl64 362; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 363; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0] 364; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] 365; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s 366; CHECK-NEXT: st1w { z0.s }, p1, [x0] 367; CHECK-NEXT: ret 368 %op1 = load volatile <64 x i32>, ptr %a 369 %op2 = load volatile <64 x i32>, ptr %b 370 %sel = select i1 %mask, <64 x i32> %op1, <64 x i32> %op2 371 store <64 x i32> %sel, ptr %a 372 ret void 373} 374 375; Don't use SVE for 64-bit vectors. 376define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) vscale_range(2,0) #0 { 377; CHECK-LABEL: select_v1i64: 378; CHECK: // %bb.0: 379; CHECK-NEXT: tst w0, #0x1 380; CHECK-NEXT: csetm x8, ne 381; CHECK-NEXT: fmov d2, x8 382; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b 383; CHECK-NEXT: ret 384 %sel = select i1 %mask, <1 x i64> %op1, <1 x i64> %op2 385 ret <1 x i64> %sel 386} 387 388; Don't use SVE for 128-bit vectors. 389define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) vscale_range(2,0) #0 { 390; CHECK-LABEL: select_v2i64: 391; CHECK: // %bb.0: 392; CHECK-NEXT: tst w0, #0x1 393; CHECK-NEXT: csetm x8, ne 394; CHECK-NEXT: dup v2.2d, x8 395; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b 396; CHECK-NEXT: ret 397 %sel = select i1 %mask, <2 x i64> %op1, <2 x i64> %op2 398 ret <2 x i64> %sel 399} 400 401define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { 402; CHECK-LABEL: select_v4i64: 403; CHECK: // %bb.0: 404; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 405; CHECK-NEXT: and x8, x2, #0x1 406; CHECK-NEXT: ptrue p0.d 407; CHECK-NEXT: mov z0.d, x8 408; CHECK-NEXT: ptrue p1.d, vl4 409; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 410; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] 411; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] 412; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 413; CHECK-NEXT: st1d { z0.d }, p1, [x0] 414; CHECK-NEXT: ret 415 %op1 = load volatile <4 x i64>, ptr %a 416 %op2 = load volatile <4 x i64>, ptr %b 417 %sel = select i1 %mask, <4 x i64> %op1, <4 x i64> %op2 418 store <4 x i64> %sel, ptr %a 419 ret void 420} 421 422define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 { 423; VBITS_GE_256-LABEL: select_v8i64: 424; VBITS_GE_256: // %bb.0: 425; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2 426; VBITS_GE_256-NEXT: and x8, x2, #0x1 427; VBITS_GE_256-NEXT: ptrue p0.d 428; VBITS_GE_256-NEXT: mov z0.d, x8 429; VBITS_GE_256-NEXT: ptrue p1.d, vl4 430; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 431; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0 432; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [x0, x8, lsl #3] 433; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x0] 434; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3] 435; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1] 436; VBITS_GE_256-NEXT: sel z0.d, p0, z0.d, z2.d 437; VBITS_GE_256-NEXT: sel z1.d, p0, z1.d, z3.d 438; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3] 439; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [x0] 440; VBITS_GE_256-NEXT: ret 441; 442; VBITS_GE_512-LABEL: select_v8i64: 443; VBITS_GE_512: // %bb.0: 444; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2 445; VBITS_GE_512-NEXT: and x8, x2, #0x1 446; VBITS_GE_512-NEXT: ptrue p0.d 447; VBITS_GE_512-NEXT: mov z0.d, x8 448; VBITS_GE_512-NEXT: ptrue p1.d, vl8 449; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0 450; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x0] 451; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] 452; VBITS_GE_512-NEXT: sel z0.d, p0, z0.d, z1.d 453; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x0] 454; VBITS_GE_512-NEXT: ret 455 %op1 = load volatile <8 x i64>, ptr %a 456 %op2 = load volatile <8 x i64>, ptr %b 457 %sel = select i1 %mask, <8 x i64> %op1, <8 x i64> %op2 458 store <8 x i64> %sel, ptr %a 459 ret void 460} 461 462define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { 463; CHECK-LABEL: select_v16i64: 464; CHECK: // %bb.0: 465; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 466; CHECK-NEXT: and x8, x2, #0x1 467; CHECK-NEXT: ptrue p0.d 468; CHECK-NEXT: mov z0.d, x8 469; CHECK-NEXT: ptrue p1.d, vl16 470; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 471; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] 472; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] 473; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 474; CHECK-NEXT: st1d { z0.d }, p1, [x0] 475; CHECK-NEXT: ret 476 %op1 = load volatile <16 x i64>, ptr %a 477 %op2 = load volatile <16 x i64>, ptr %b 478 %sel = select i1 %mask, <16 x i64> %op1, <16 x i64> %op2 479 store <16 x i64> %sel, ptr %a 480 ret void 481} 482 483define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { 484; CHECK-LABEL: select_v32i64: 485; CHECK: // %bb.0: 486; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 487; CHECK-NEXT: and x8, x2, #0x1 488; CHECK-NEXT: ptrue p0.d 489; CHECK-NEXT: mov z0.d, x8 490; CHECK-NEXT: ptrue p1.d, vl32 491; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 492; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] 493; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] 494; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 495; CHECK-NEXT: st1d { z0.d }, p1, [x0] 496; CHECK-NEXT: ret 497 %op1 = load volatile <32 x i64>, ptr %a 498 %op2 = load volatile <32 x i64>, ptr %b 499 %sel = select i1 %mask, <32 x i64> %op1, <32 x i64> %op2 500 store <32 x i64> %sel, ptr %a 501 ret void 502} 503 504attributes #0 = { "target-features"="+sve" } 505