1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK 3 4define <vscale x 1 x i1> @get_lane_mask(ptr %p, i64 %index, i64 %tc) { 5; CHECK-LABEL: get_lane_mask: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 8; CHECK-NEXT: vid.v v8 9; CHECK-NEXT: vsaddu.vx v8, v8, a1 10; CHECK-NEXT: vmsltu.vx v0, v8, a2 11; CHECK-NEXT: ret 12 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 %tc) 13 ret <vscale x 1 x i1> %mask 14} 15 16define <vscale x 1 x i1> @constant_zero_index(ptr %p, i64 %tc) { 17; CHECK-LABEL: constant_zero_index: 18; CHECK: # %bb.0: 19; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 20; CHECK-NEXT: vid.v v8 21; CHECK-NEXT: vmsltu.vx v0, v8, a1 22; CHECK-NEXT: ret 23 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 %tc) 24 ret <vscale x 1 x i1> %mask 25} 26 27define <vscale x 1 x i1> @constant_nonzero_index(ptr %p, i64 %tc) { 28; CHECK-LABEL: constant_nonzero_index: 29; CHECK: # %bb.0: 30; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 31; CHECK-NEXT: vid.v v8 32; CHECK-NEXT: li a0, 24 33; CHECK-NEXT: vsaddu.vx v8, v8, a0 34; CHECK-NEXT: vmsltu.vx v0, v8, a1 35; CHECK-NEXT: ret 36 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 24, i64 %tc) 37 ret <vscale x 1 x i1> %mask 38} 39 40define <vscale x 1 x i1> @constant_tripcount(ptr %p, i64 %index) { 41; CHECK-LABEL: constant_tripcount: 42; CHECK: # %bb.0: 43; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 44; CHECK-NEXT: vid.v v8 45; CHECK-NEXT: vsaddu.vx v8, v8, a1 46; CHECK-NEXT: li a0, 1024 47; CHECK-NEXT: vmsltu.vx v0, v8, a0 48; CHECK-NEXT: ret 49 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 %index, i64 1024) 50 ret <vscale x 1 x i1> %mask 51} 52 53define <vscale x 1 x i1> @constant_both(ptr %p) { 54; CHECK-LABEL: constant_both: 55; CHECK: # %bb.0: 56; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 57; CHECK-NEXT: vid.v v8 58; CHECK-NEXT: li a0, 1024 59; CHECK-NEXT: vmsltu.vx v0, v8, a0 60; CHECK-NEXT: ret 61 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 1024) 62 ret <vscale x 1 x i1> %mask 63} 64 65; Architectural max VLEN=64k, so result is "as-if" TC=1024 66define <vscale x 1 x i1> @above_maxvl(ptr %p) { 67; CHECK-LABEL: above_maxvl: 68; CHECK: # %bb.0: 69; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma 70; CHECK-NEXT: vid.v v8 71; CHECK-NEXT: li a0, 1 72; CHECK-NEXT: slli a0, a0, 11 73; CHECK-NEXT: vmsltu.vx v0, v8, a0 74; CHECK-NEXT: ret 75 %mask = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64 0, i64 2048) 76 ret <vscale x 1 x i1> %mask 77} 78 79define <2 x i1> @fv2(ptr %p, i64 %index, i64 %tc) { 80; CHECK-LABEL: fv2: 81; CHECK: # %bb.0: 82; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma 83; CHECK-NEXT: vid.v v8 84; CHECK-NEXT: vsaddu.vx v8, v8, a1 85; CHECK-NEXT: vmsltu.vx v0, v8, a2 86; CHECK-NEXT: ret 87 %mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 %index, i64 %tc) 88 ret <2 x i1> %mask 89} 90 91define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) { 92; CHECK-LABEL: fv8: 93; CHECK: # %bb.0: 94; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma 95; CHECK-NEXT: vid.v v8 96; CHECK-NEXT: vsaddu.vx v8, v8, a1 97; CHECK-NEXT: vmsltu.vx v0, v8, a2 98; CHECK-NEXT: ret 99 %mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 %index, i64 %tc) 100 ret <8 x i1> %mask 101} 102 103define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { 104; CHECK-LABEL: fv32: 105; CHECK: # %bb.0: 106; CHECK-NEXT: lui a0, %hi(.LCPI8_0) 107; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) 108; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 109; CHECK-NEXT: vle8.v v8, (a0) 110; CHECK-NEXT: vid.v v16 111; CHECK-NEXT: vsaddu.vx v16, v16, a1 112; CHECK-NEXT: vmsltu.vx v0, v16, a2 113; CHECK-NEXT: vsext.vf8 v16, v8 114; CHECK-NEXT: vsaddu.vx v8, v16, a1 115; CHECK-NEXT: vmsltu.vx v16, v8, a2 116; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma 117; CHECK-NEXT: vslideup.vi v0, v16, 2 118; CHECK-NEXT: ret 119 %mask = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 %index, i64 %tc) 120 ret <32 x i1> %mask 121} 122 123define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { 124; CHECK-LABEL: fv64: 125; CHECK: # %bb.0: 126; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 127; CHECK-NEXT: vid.v v8 128; CHECK-NEXT: lui a0, %hi(.LCPI9_0) 129; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) 130; CHECK-NEXT: vle8.v v16, (a0) 131; CHECK-NEXT: lui a0, %hi(.LCPI9_1) 132; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) 133; CHECK-NEXT: vle8.v v17, (a0) 134; CHECK-NEXT: lui a0, %hi(.LCPI9_2) 135; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) 136; CHECK-NEXT: vsaddu.vx v8, v8, a1 137; CHECK-NEXT: vle8.v v18, (a0) 138; CHECK-NEXT: vmsltu.vx v0, v8, a2 139; CHECK-NEXT: vsext.vf8 v8, v16 140; CHECK-NEXT: vsaddu.vx v8, v8, a1 141; CHECK-NEXT: vmsltu.vx v16, v8, a2 142; CHECK-NEXT: vsext.vf8 v8, v17 143; CHECK-NEXT: vsaddu.vx v8, v8, a1 144; CHECK-NEXT: vmsltu.vx v17, v8, a2 145; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 146; CHECK-NEXT: vslideup.vi v0, v16, 2 147; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma 148; CHECK-NEXT: vslideup.vi v0, v17, 4 149; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 150; CHECK-NEXT: vsext.vf8 v8, v18 151; CHECK-NEXT: vsaddu.vx v8, v8, a1 152; CHECK-NEXT: vmsltu.vx v16, v8, a2 153; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 154; CHECK-NEXT: vslideup.vi v0, v16, 6 155; CHECK-NEXT: ret 156 %mask = call <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64 %index, i64 %tc) 157 ret <64 x i1> %mask 158} 159 160define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { 161; CHECK-LABEL: fv128: 162; CHECK: # %bb.0: 163; CHECK-NEXT: lui a0, %hi(.LCPI10_0) 164; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) 165; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma 166; CHECK-NEXT: vle8.v v16, (a0) 167; CHECK-NEXT: lui a0, %hi(.LCPI10_1) 168; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1) 169; CHECK-NEXT: vle8.v v17, (a0) 170; CHECK-NEXT: lui a0, %hi(.LCPI10_2) 171; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2) 172; CHECK-NEXT: vle8.v v18, (a0) 173; CHECK-NEXT: lui a0, %hi(.LCPI10_3) 174; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3) 175; CHECK-NEXT: vid.v v8 176; CHECK-NEXT: vle8.v v19, (a0) 177; CHECK-NEXT: lui a0, %hi(.LCPI10_4) 178; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4) 179; CHECK-NEXT: vle8.v v20, (a0) 180; CHECK-NEXT: lui a0, %hi(.LCPI10_5) 181; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5) 182; CHECK-NEXT: vle8.v v21, (a0) 183; CHECK-NEXT: lui a0, %hi(.LCPI10_6) 184; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6) 185; CHECK-NEXT: vsaddu.vx v8, v8, a1 186; CHECK-NEXT: vle8.v v22, (a0) 187; CHECK-NEXT: vmsltu.vx v0, v8, a2 188; CHECK-NEXT: vsext.vf8 v8, v16 189; CHECK-NEXT: vsaddu.vx v8, v8, a1 190; CHECK-NEXT: vmsltu.vx v16, v8, a2 191; CHECK-NEXT: vsext.vf8 v8, v17 192; CHECK-NEXT: vsaddu.vx v8, v8, a1 193; CHECK-NEXT: vmsltu.vx v17, v8, a2 194; CHECK-NEXT: vsext.vf8 v8, v18 195; CHECK-NEXT: vsaddu.vx v8, v8, a1 196; CHECK-NEXT: vmsltu.vx v18, v8, a2 197; CHECK-NEXT: vsext.vf8 v8, v19 198; CHECK-NEXT: vsaddu.vx v8, v8, a1 199; CHECK-NEXT: vmsltu.vx v19, v8, a2 200; CHECK-NEXT: vsext.vf8 v8, v20 201; CHECK-NEXT: vsaddu.vx v8, v8, a1 202; CHECK-NEXT: vmsltu.vx v20, v8, a2 203; CHECK-NEXT: vsext.vf8 v8, v21 204; CHECK-NEXT: vsaddu.vx v8, v8, a1 205; CHECK-NEXT: vmsltu.vx v21, v8, a2 206; CHECK-NEXT: vsext.vf8 v8, v22 207; CHECK-NEXT: vsaddu.vx v8, v8, a1 208; CHECK-NEXT: vmsltu.vx v22, v8, a2 209; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma 210; CHECK-NEXT: vslideup.vi v17, v16, 2 211; CHECK-NEXT: vslideup.vi v0, v20, 2 212; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma 213; CHECK-NEXT: vslideup.vi v17, v18, 4 214; CHECK-NEXT: vslideup.vi v0, v21, 4 215; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma 216; CHECK-NEXT: vslideup.vi v17, v19, 6 217; CHECK-NEXT: vslideup.vi v0, v22, 6 218; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma 219; CHECK-NEXT: vslideup.vi v0, v17, 8 220; CHECK-NEXT: ret 221 %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc) 222 ret <128 x i1> %mask 223} 224 225 226declare <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i64(i64, i64) 227declare <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64, i64) 228declare <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64, i64) 229declare <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64, i64) 230declare <64 x i1> @llvm.get.active.lane.mask.v64i1.i64(i64, i64) 231declare <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64, i64) 232