1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; == Scalable == 5 6define <vscale x 16 x i1> @lane_mask_nxv16i1_i32(i32 %index, i32 %TC) { 7; CHECK-LABEL: lane_mask_nxv16i1_i32: 8; CHECK: // %bb.0: 9; CHECK-NEXT: whilelo p0.b, w0, w1 10; CHECK-NEXT: ret 11 %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 %index, i32 %TC) 12 ret <vscale x 16 x i1> %active.lane.mask 13} 14 15define <vscale x 8 x i1> @lane_mask_nxv8i1_i32(i32 %index, i32 %TC) { 16; CHECK-LABEL: lane_mask_nxv8i1_i32: 17; CHECK: // %bb.0: 18; CHECK-NEXT: whilelo p0.h, w0, w1 19; CHECK-NEXT: ret 20 %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 %index, i32 %TC) 21 ret <vscale x 8 x i1> %active.lane.mask 22} 23 24define <vscale x 4 x i1> @lane_mask_nxv4i1_i32(i32 %index, i32 %TC) { 25; CHECK-LABEL: lane_mask_nxv4i1_i32: 26; CHECK: // %bb.0: 27; CHECK-NEXT: whilelo p0.s, w0, w1 28; CHECK-NEXT: ret 29 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 %index, i32 %TC) 30 ret <vscale x 4 x i1> %active.lane.mask 31} 32 33define <vscale x 2 x i1> @lane_mask_nxv2i1_i32(i32 %index, i32 %TC) { 34; CHECK-LABEL: lane_mask_nxv2i1_i32: 35; CHECK: // %bb.0: 36; CHECK-NEXT: whilelo p0.d, w0, w1 37; CHECK-NEXT: ret 38 %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 %index, i32 %TC) 39 ret <vscale x 2 x i1> %active.lane.mask 40} 41 42define <vscale x 16 x i1> @lane_mask_nxv16i1_i64(i64 %index, i64 %TC) { 43; CHECK-LABEL: lane_mask_nxv16i1_i64: 44; CHECK: // %bb.0: 45; CHECK-NEXT: whilelo p0.b, x0, x1 46; CHECK-NEXT: ret 47 %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 %index, i64 %TC) 48 ret <vscale x 16 x i1> %active.lane.mask 49} 50 51define <vscale x 8 x i1> @lane_mask_nxv8i1_i64(i64 %index, i64 %TC) { 52; CHECK-LABEL: lane_mask_nxv8i1_i64: 53; CHECK: // %bb.0: 54; CHECK-NEXT: whilelo p0.h, x0, x1 55; CHECK-NEXT: ret 56 %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 %index, i64 %TC) 57 ret <vscale x 8 x i1> %active.lane.mask 58} 59 60define <vscale x 4 x i1> @lane_mask_nxv4i1_i64(i64 %index, i64 %TC) { 61; CHECK-LABEL: lane_mask_nxv4i1_i64: 62; CHECK: // %bb.0: 63; CHECK-NEXT: whilelo p0.s, x0, x1 64; CHECK-NEXT: ret 65 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 %index, i64 %TC) 66 ret <vscale x 4 x i1> %active.lane.mask 67} 68 69define <vscale x 2 x i1> @lane_mask_nxv2i1_i64(i64 %index, i64 %TC) { 70; CHECK-LABEL: lane_mask_nxv2i1_i64: 71; CHECK: // %bb.0: 72; CHECK-NEXT: whilelo p0.d, x0, x1 73; CHECK-NEXT: ret 74 %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 %index, i64 %TC) 75 ret <vscale x 2 x i1> %active.lane.mask 76} 77 78define <vscale x 16 x i1> @lane_mask_nxv16i1_i8(i8 %index, i8 %TC) { 79; CHECK-LABEL: lane_mask_nxv16i1_i8: 80; CHECK: // %bb.0: 81; CHECK-NEXT: index z0.b, #0, #1 82; CHECK-NEXT: mov z1.b, w0 83; CHECK-NEXT: ptrue p0.b 84; CHECK-NEXT: uqadd z0.b, z0.b, z1.b 85; CHECK-NEXT: mov z1.b, w1 86; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b 87; CHECK-NEXT: ret 88 %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8 %index, i8 %TC) 89 ret <vscale x 16 x i1> %active.lane.mask 90} 91 92define <vscale x 8 x i1> @lane_mask_nxv8i1_i8(i8 %index, i8 %TC) { 93; CHECK-LABEL: lane_mask_nxv8i1_i8: 94; CHECK: // %bb.0: 95; CHECK-NEXT: index z0.h, #0, #1 96; CHECK-NEXT: mov z1.h, w0 97; CHECK-NEXT: ptrue p0.h 98; CHECK-NEXT: and z1.h, z1.h, #0xff 99; CHECK-NEXT: and z0.h, z0.h, #0xff 100; CHECK-NEXT: add z0.h, z0.h, z1.h 101; CHECK-NEXT: mov z1.h, w1 102; CHECK-NEXT: umin z0.h, z0.h, #255 103; CHECK-NEXT: and z1.h, z1.h, #0xff 104; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h 105; CHECK-NEXT: ret 106 %active.lane.mask = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i8(i8 %index, i8 %TC) 107 ret <vscale x 8 x i1> %active.lane.mask 108} 109 110define <vscale x 4 x i1> @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) { 111; CHECK-LABEL: lane_mask_nxv4i1_i8: 112; CHECK: // %bb.0: 113; CHECK-NEXT: index z0.s, #0, #1 114; CHECK-NEXT: and w8, w0, #0xff 115; CHECK-NEXT: ptrue p0.s 116; CHECK-NEXT: mov z1.s, w8 117; CHECK-NEXT: and w8, w1, #0xff 118; CHECK-NEXT: and z0.s, z0.s, #0xff 119; CHECK-NEXT: add z0.s, z0.s, z1.s 120; CHECK-NEXT: mov z1.s, w8 121; CHECK-NEXT: umin z0.s, z0.s, #255 122; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s 123; CHECK-NEXT: ret 124 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8 %index, i8 %TC) 125 ret <vscale x 4 x i1> %active.lane.mask 126} 127 128define <vscale x 2 x i1> @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) { 129; CHECK-LABEL: lane_mask_nxv2i1_i8: 130; CHECK: // %bb.0: 131; CHECK-NEXT: index z0.d, #0, #1 132; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 133; CHECK-NEXT: and x8, x0, #0xff 134; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 135; CHECK-NEXT: ptrue p0.d 136; CHECK-NEXT: mov z1.d, x8 137; CHECK-NEXT: and x8, x1, #0xff 138; CHECK-NEXT: and z0.d, z0.d, #0xff 139; CHECK-NEXT: add z0.d, z0.d, z1.d 140; CHECK-NEXT: mov z1.d, x8 141; CHECK-NEXT: umin z0.d, z0.d, #255 142; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d 143; CHECK-NEXT: ret 144 %active.lane.mask = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC) 145 ret <vscale x 2 x i1> %active.lane.mask 146} 147 148 149; Illegal types 150 151define <vscale x 32 x i1> @lane_mask_nxv32i1_i32(i32 %index, i32 %TC) { 152; CHECK-LABEL: lane_mask_nxv32i1_i32: 153; CHECK: // %bb.0: 154; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 155; CHECK-NEXT: addvl sp, sp, #-1 156; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill 157; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill 158; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill 159; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 160; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG 161; CHECK-NEXT: .cfi_offset w29, -16 162; CHECK-NEXT: index z0.s, #0, #1 163; CHECK-NEXT: mov z1.s, w0 164; CHECK-NEXT: mov z25.s, w1 165; CHECK-NEXT: ptrue p0.s 166; CHECK-NEXT: mov z2.d, z0.d 167; CHECK-NEXT: mov z3.d, z0.d 168; CHECK-NEXT: uqadd z6.s, z0.s, z1.s 169; CHECK-NEXT: incw z0.s, all, mul #4 170; CHECK-NEXT: incw z2.s 171; CHECK-NEXT: incw z3.s, all, mul #2 172; CHECK-NEXT: uqadd z0.s, z0.s, z1.s 173; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z6.s 174; CHECK-NEXT: mov z4.d, z2.d 175; CHECK-NEXT: uqadd z5.s, z2.s, z1.s 176; CHECK-NEXT: uqadd z7.s, z3.s, z1.s 177; CHECK-NEXT: incw z2.s, all, mul #4 178; CHECK-NEXT: incw z3.s, all, mul #4 179; CHECK-NEXT: cmphi p5.s, p0/z, z25.s, z0.s 180; CHECK-NEXT: incw z4.s, all, mul #2 181; CHECK-NEXT: uqadd z2.s, z2.s, z1.s 182; CHECK-NEXT: uqadd z3.s, z3.s, z1.s 183; CHECK-NEXT: cmphi p1.s, p0/z, z25.s, z5.s 184; CHECK-NEXT: cmphi p3.s, p0/z, z25.s, z7.s 185; CHECK-NEXT: uqadd z24.s, z4.s, z1.s 186; CHECK-NEXT: incw z4.s, all, mul #4 187; CHECK-NEXT: cmphi p6.s, p0/z, z25.s, z2.s 188; CHECK-NEXT: cmphi p7.s, p0/z, z25.s, z3.s 189; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h 190; CHECK-NEXT: uqadd z1.s, z4.s, z1.s 191; CHECK-NEXT: cmphi p4.s, p0/z, z25.s, z24.s 192; CHECK-NEXT: cmphi p0.s, p0/z, z25.s, z1.s 193; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h 194; CHECK-NEXT: uzp1 p3.h, p5.h, p6.h 195; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload 196; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload 197; CHECK-NEXT: uzp1 p4.h, p7.h, p0.h 198; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload 199; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b 200; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b 201; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 202; CHECK-NEXT: addvl sp, sp, #1 203; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 204; CHECK-NEXT: ret 205 %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32 %index, i32 %TC) 206 ret <vscale x 32 x i1> %active.lane.mask 207} 208 209define <vscale x 32 x i1> @lane_mask_nxv32i1_i64(i64 %index, i64 %TC) { 210; CHECK-LABEL: lane_mask_nxv32i1_i64: 211; CHECK: // %bb.0: 212; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 213; CHECK-NEXT: addvl sp, sp, #-2 214; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill 215; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill 216; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill 217; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill 218; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill 219; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill 220; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill 221; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG 222; CHECK-NEXT: .cfi_offset w29, -16 223; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 16 - 8 * VG 224; CHECK-NEXT: index z5.d, #0, #1 225; CHECK-NEXT: mov z0.d, x0 226; CHECK-NEXT: mov z3.d, x1 227; CHECK-NEXT: ptrue p0.d 228; CHECK-NEXT: mov z2.d, z5.d 229; CHECK-NEXT: mov z1.d, z5.d 230; CHECK-NEXT: mov z4.d, z5.d 231; CHECK-NEXT: uqadd z25.d, z5.d, z0.d 232; CHECK-NEXT: incd z5.d, all, mul #8 233; CHECK-NEXT: incd z2.d 234; CHECK-NEXT: incd z1.d, all, mul #2 235; CHECK-NEXT: incd z4.d, all, mul #4 236; CHECK-NEXT: uqadd z5.d, z5.d, z0.d 237; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z25.d 238; CHECK-NEXT: mov z6.d, z2.d 239; CHECK-NEXT: mov z7.d, z2.d 240; CHECK-NEXT: mov z24.d, z1.d 241; CHECK-NEXT: uqadd z26.d, z2.d, z0.d 242; CHECK-NEXT: uqadd z27.d, z1.d, z0.d 243; CHECK-NEXT: uqadd z28.d, z4.d, z0.d 244; CHECK-NEXT: incd z2.d, all, mul #8 245; CHECK-NEXT: incd z1.d, all, mul #8 246; CHECK-NEXT: incd z4.d, all, mul #8 247; CHECK-NEXT: incd z6.d, all, mul #2 248; CHECK-NEXT: incd z7.d, all, mul #4 249; CHECK-NEXT: incd z24.d, all, mul #4 250; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z26.d 251; CHECK-NEXT: cmphi p2.d, p0/z, z3.d, z27.d 252; CHECK-NEXT: cmphi p1.d, p0/z, z3.d, z28.d 253; CHECK-NEXT: mov z31.d, z6.d 254; CHECK-NEXT: uqadd z29.d, z6.d, z0.d 255; CHECK-NEXT: uqadd z30.d, z7.d, z0.d 256; CHECK-NEXT: uqadd z8.d, z24.d, z0.d 257; CHECK-NEXT: incd z6.d, all, mul #8 258; CHECK-NEXT: incd z7.d, all, mul #8 259; CHECK-NEXT: incd z24.d, all, mul #8 260; CHECK-NEXT: uqadd z2.d, z2.d, z0.d 261; CHECK-NEXT: uqadd z1.d, z1.d, z0.d 262; CHECK-NEXT: incd z31.d, all, mul #4 263; CHECK-NEXT: uqadd z4.d, z4.d, z0.d 264; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s 265; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z29.d 266; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z30.d 267; CHECK-NEXT: uqadd z6.d, z6.d, z0.d 268; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z8.d 269; CHECK-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload 270; CHECK-NEXT: uqadd z7.d, z7.d, z0.d 271; CHECK-NEXT: uqadd z25.d, z31.d, z0.d 272; CHECK-NEXT: incd z31.d, all, mul #8 273; CHECK-NEXT: uqadd z24.d, z24.d, z0.d 274; CHECK-NEXT: cmphi p4.d, p0/z, z3.d, z5.d 275; CHECK-NEXT: uzp1 p2.s, p2.s, p5.s 276; CHECK-NEXT: cmphi p5.d, p0/z, z3.d, z2.d 277; CHECK-NEXT: cmphi p9.d, p0/z, z3.d, z6.d 278; CHECK-NEXT: uqadd z0.d, z31.d, z0.d 279; CHECK-NEXT: uzp1 p1.s, p1.s, p7.s 280; CHECK-NEXT: cmphi p7.d, p0/z, z3.d, z1.d 281; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z25.d 282; CHECK-NEXT: uzp1 p2.h, p3.h, p2.h 283; CHECK-NEXT: cmphi p3.d, p0/z, z3.d, z7.d 284; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s 285; CHECK-NEXT: uzp1 p5.s, p7.s, p9.s 286; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload 287; CHECK-NEXT: uzp1 p6.s, p6.s, p8.s 288; CHECK-NEXT: cmphi p8.d, p0/z, z3.d, z4.d 289; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload 290; CHECK-NEXT: uzp1 p4.h, p4.h, p5.h 291; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload 292; CHECK-NEXT: uzp1 p1.h, p1.h, p6.h 293; CHECK-NEXT: cmphi p6.d, p0/z, z3.d, z24.d 294; CHECK-NEXT: cmphi p0.d, p0/z, z3.d, z0.d 295; CHECK-NEXT: uzp1 p3.s, p8.s, p3.s 296; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload 297; CHECK-NEXT: uzp1 p0.s, p6.s, p0.s 298; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload 299; CHECK-NEXT: uzp1 p3.h, p3.h, p0.h 300; CHECK-NEXT: uzp1 p0.b, p2.b, p1.b 301; CHECK-NEXT: uzp1 p1.b, p4.b, p3.b 302; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload 303; CHECK-NEXT: addvl sp, sp, #2 304; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 305; CHECK-NEXT: ret 306 %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 %index, i64 %TC) 307 ret <vscale x 32 x i1> %active.lane.mask 308} 309 310define <vscale x 32 x i1> @lane_mask_nxv32i1_i8(i8 %index, i8 %TC) { 311; CHECK-LABEL: lane_mask_nxv32i1_i8: 312; CHECK: // %bb.0: 313; CHECK-NEXT: index z0.b, #0, #1 314; CHECK-NEXT: rdvl x8, #1 315; CHECK-NEXT: mov z2.b, w0 316; CHECK-NEXT: mov z1.b, w8 317; CHECK-NEXT: ptrue p1.b 318; CHECK-NEXT: add z1.b, z0.b, z1.b 319; CHECK-NEXT: uqadd z0.b, z0.b, z2.b 320; CHECK-NEXT: uqadd z1.b, z1.b, z2.b 321; CHECK-NEXT: mov z2.b, w1 322; CHECK-NEXT: cmphi p0.b, p1/z, z2.b, z0.b 323; CHECK-NEXT: cmphi p1.b, p1/z, z2.b, z1.b 324; CHECK-NEXT: ret 325 %active.lane.mask = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i8(i8 %index, i8 %TC) 326 ret <vscale x 32 x i1> %active.lane.mask 327} 328 329; UTC_ARGS: --disable 330; This test exists to protect against a compiler crash caused by an attempt to 331; convert (via changeVectorElementType) an MVT into an EVT, which is impossible. 332; The test's output is large and not relevant so check lines have been disabled. 333define <vscale x 64 x i1> @lane_mask_nxv64i1_i64(i64 %index, i64 %TC) { 334; CHECK-LABEL: lane_mask_nxv64i1_i64: 335 %active.lane.mask = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 %index, i64 %TC) 336 ret <vscale x 64 x i1> %active.lane.mask 337} 338; UTC_ARGS: --enable 339 340; == Fixed width == 341 342define <16 x i1> @lane_mask_v16i1_i32(i32 %index, i32 %TC) { 343; CHECK-LABEL: lane_mask_v16i1_i32: 344; CHECK: // %bb.0: 345; CHECK-NEXT: whilelo p0.b, w0, w1 346; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff 347; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 348; CHECK-NEXT: ret 349 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %TC) 350 ret <16 x i1> %active.lane.mask 351} 352 353define <8 x i1> @lane_mask_v8i1_i32(i32 %index, i32 %TC) { 354; CHECK-LABEL: lane_mask_v8i1_i32: 355; CHECK: // %bb.0: 356; CHECK-NEXT: whilelo p0.b, w0, w1 357; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff 358; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 359; CHECK-NEXT: ret 360 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 %index, i32 %TC) 361 ret <8 x i1> %active.lane.mask 362} 363 364define <4 x i1> @lane_mask_v4i1_i32(i32 %index, i32 %TC) { 365; CHECK-LABEL: lane_mask_v4i1_i32: 366; CHECK: // %bb.0: 367; CHECK-NEXT: whilelo p0.h, w0, w1 368; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff 369; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 370; CHECK-NEXT: ret 371 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %index, i32 %TC) 372 ret <4 x i1> %active.lane.mask 373} 374 375define <2 x i1> @lane_mask_v2i1_i32(i32 %index, i32 %TC) { 376; CHECK-LABEL: lane_mask_v2i1_i32: 377; CHECK: // %bb.0: 378; CHECK-NEXT: whilelo p0.s, w0, w1 379; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 380; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 381; CHECK-NEXT: ret 382 %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 %index, i32 %TC) 383 ret <2 x i1> %active.lane.mask 384} 385 386define <16 x i1> @lane_mask_v16i1_i64(i64 %index, i64 %TC) { 387; CHECK-LABEL: lane_mask_v16i1_i64: 388; CHECK: // %bb.0: 389; CHECK-NEXT: whilelo p0.b, x0, x1 390; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff 391; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 392; CHECK-NEXT: ret 393 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 %index, i64 %TC) 394 ret <16 x i1> %active.lane.mask 395} 396 397define <8 x i1> @lane_mask_v8i1_i64(i64 %index, i64 %TC) { 398; CHECK-LABEL: lane_mask_v8i1_i64: 399; CHECK: // %bb.0: 400; CHECK-NEXT: whilelo p0.b, x0, x1 401; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff 402; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 403; CHECK-NEXT: ret 404 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %TC) 405 ret <8 x i1> %active.lane.mask 406} 407 408define <4 x i1> @lane_mask_v4i1_i64(i64 %index, i64 %TC) { 409; CHECK-LABEL: lane_mask_v4i1_i64: 410; CHECK: // %bb.0: 411; CHECK-NEXT: whilelo p0.h, x0, x1 412; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff 413; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 414; CHECK-NEXT: ret 415 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %index, i64 %TC) 416 ret <4 x i1> %active.lane.mask 417} 418 419define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) { 420; CHECK-LABEL: lane_mask_v2i1_i64: 421; CHECK: // %bb.0: 422; CHECK-NEXT: whilelo p0.s, x0, x1 423; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 424; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 425; CHECK-NEXT: ret 426 %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %TC) 427 ret <2 x i1> %active.lane.mask 428} 429 430define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) { 431; CHECK-LABEL: lane_mask_v16i1_i8: 432; CHECK: // %bb.0: 433; CHECK-NEXT: index z0.b, #0, #1 434; CHECK-NEXT: dup v1.16b, w0 435; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b 436; CHECK-NEXT: dup v1.16b, w1 437; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b 438; CHECK-NEXT: ret 439 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8 %index, i8 %TC) 440 ret <16 x i1> %active.lane.mask 441} 442 443define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) { 444; CHECK-LABEL: lane_mask_v8i1_i8: 445; CHECK: // %bb.0: 446; CHECK-NEXT: index z0.b, #0, #1 447; CHECK-NEXT: dup v1.8b, w0 448; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b 449; CHECK-NEXT: dup v1.8b, w1 450; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b 451; CHECK-NEXT: ret 452 %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC) 453 ret <8 x i1> %active.lane.mask 454} 455 456define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { 457; CHECK-LABEL: lane_mask_v4i1_i8: 458; CHECK: // %bb.0: 459; CHECK-NEXT: dup v0.4h, w0 460; CHECK-NEXT: index z1.h, #0, #1 461; CHECK-NEXT: movi d2, #0xff00ff00ff00ff 462; CHECK-NEXT: dup v3.4h, w1 463; CHECK-NEXT: bic v0.4h, #255, lsl #8 464; CHECK-NEXT: bic v3.4h, #255, lsl #8 465; CHECK-NEXT: add v0.4h, v0.4h, v1.4h 466; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h 467; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h 468; CHECK-NEXT: ret 469 %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) 470 ret <4 x i1> %active.lane.mask 471} 472 473define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { 474; CHECK-LABEL: lane_mask_v2i1_i8: 475; CHECK: // %bb.0: 476; CHECK-NEXT: movi d0, #0x0000ff000000ff 477; CHECK-NEXT: dup v1.2s, w0 478; CHECK-NEXT: index z2.s, #0, #1 479; CHECK-NEXT: dup v3.2s, w1 480; CHECK-NEXT: and v1.8b, v1.8b, v0.8b 481; CHECK-NEXT: add v1.2s, v1.2s, v2.2s 482; CHECK-NEXT: and v2.8b, v3.8b, v0.8b 483; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s 484; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s 485; CHECK-NEXT: ret 486 %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC) 487 ret <2 x i1> %active.lane.mask 488} 489 490define <vscale x 4 x i1> @lane_mask_nxv4i1_imm3() { 491; CHECK-LABEL: lane_mask_nxv4i1_imm3: 492; CHECK: // %bb.0: // %entry 493; CHECK-NEXT: ptrue p0.s, vl3 494; CHECK-NEXT: ret 495entry: 496 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 3) 497 ret <vscale x 4 x i1> %active.lane.mask 498} 499 500define <vscale x 4 x i1> @lane_mask_nxv4i1_imm5() { 501; CHECK-LABEL: lane_mask_nxv4i1_imm5: 502; CHECK: // %bb.0: // %entry 503; CHECK-NEXT: mov w8, #5 // =0x5 504; CHECK-NEXT: whilelo p0.s, xzr, x8 505; CHECK-NEXT: ret 506entry: 507 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5) 508 ret <vscale x 4 x i1> %active.lane.mask 509} 510 511define <vscale x 4 x i1> @lane_mask_nxv4i1_imm4() { 512; CHECK-LABEL: lane_mask_nxv4i1_imm4: 513; CHECK: // %bb.0: // %entry 514; CHECK-NEXT: ptrue p0.s, vl4 515; CHECK-NEXT: ret 516entry: 517 %active.lane.mask = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 10, i64 14) 518 ret <vscale x 4 x i1> %active.lane.mask 519} 520 521define <vscale x 16 x i1> @lane_mask_nxv16i1_imm10() { 522; CHECK-LABEL: lane_mask_nxv16i1_imm10: 523; CHECK: // %bb.0: // %entry 524; CHECK-NEXT: mov w8, #10 // =0xa 525; CHECK-NEXT: whilelo p0.b, xzr, x8 526; CHECK-NEXT: ret 527entry: 528 %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 10) 529 ret <vscale x 16 x i1> %active.lane.mask 530} 531 532define <vscale x 16 x i1> @lane_mask_nxv16i1_imm256() vscale_range(16, 16) { 533; CHECK-LABEL: lane_mask_nxv16i1_imm256: 534; CHECK: // %bb.0: // %entry 535; CHECK-NEXT: ptrue p0.b, vl256 536; CHECK-NEXT: ret 537entry: 538 %active.lane.mask = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 256) 539 ret <vscale x 16 x i1> %active.lane.mask 540} 541 542 543declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i32(i32, i32) 544declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32) 545declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32) 546declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32) 547declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32) 548 549declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64) 550declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64) 551declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64) 552declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64) 553declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64) 554declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64, i64) 555 556declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i8(i8, i8) 557declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i8(i8, i8) 558declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i8(i8, i8) 559declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i8(i8, i8) 560declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i8(i8, i8) 561 562 563declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) 564declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32, i32) 565declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32, i32) 566declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32, i32) 567 568declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64) 569declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) 570declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64, i64) 571declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) 572 573declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i8(i8, i8) 574declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8, i8) 575declare <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8, i8) 576declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8, i8) 577