1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s 3 4; This tests that various ands, sexts, and zexts (and other operations) 5; operating on vscale or the SVE count instructions can be eliminated 6; (via demanded bits) due to their known limited range. 7 8; On AArch64 vscale can be at most 16 (for a 2048-bit vector). 9; The counting instructions (sans multiplier) have a value of at most 256 10; (for a 2048-bit vector of i8s). 11 12define i32 @vscale_and_elimination() vscale_range(1,16) { 13; CHECK-LABEL: vscale_and_elimination: 14; CHECK: // %bb.0: 15; CHECK-NEXT: rdvl x8, #1 16; CHECK-NEXT: lsr x8, x8, #4 17; CHECK-NEXT: and w9, w8, #0x1c 18; CHECK-NEXT: add w0, w8, w9 19; CHECK-NEXT: ret 20 %vscale = call i32 @llvm.vscale.i32() 21 %and_redundant = and i32 %vscale, 31 22 %and_required = and i32 %vscale, 17179869180 23 %result = add i32 %and_redundant, %and_required 24 ret i32 %result 25} 26 27define i64 @cntb_and_elimination() { 28; CHECK-LABEL: cntb_and_elimination: 29; CHECK: // %bb.0: 30; CHECK-NEXT: cntb x8 31; CHECK-NEXT: and x9, x8, #0x1fc 32; CHECK-NEXT: add x0, x8, x9 33; CHECK-NEXT: ret 34 %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31) 35 %and_redundant = and i64 %cntb, 511 36 %and_required = and i64 %cntb, 17179869180 37 %result = add i64 %and_redundant, %and_required 38 ret i64 %result 39} 40 41define i64 @cnth_and_elimination() { 42; CHECK-LABEL: cnth_and_elimination: 43; CHECK: // %bb.0: 44; CHECK-NEXT: cnth x8 45; CHECK-NEXT: and x9, x8, #0xfc 46; CHECK-NEXT: add x0, x8, x9 47; CHECK-NEXT: ret 48 %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31) 49 %and_redundant = and i64 %cnth, 1023 50 %and_required = and i64 %cnth, 17179869180 51 %result = add i64 %and_redundant, %and_required 52 ret i64 %result 53} 54 55define i64 @cntw_and_elimination() { 56; CHECK-LABEL: cntw_and_elimination: 57; CHECK: // %bb.0: 58; CHECK-NEXT: cntw x8 59; CHECK-NEXT: and x9, x8, #0x7c 60; CHECK-NEXT: add x0, x8, x9 61; CHECK-NEXT: ret 62 %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31) 63 %and_redundant = and i64 %cntw, 127 64 %and_required = and i64 %cntw, 17179869180 65 %result = add i64 %and_redundant, %and_required 66 ret i64 %result 67} 68 69define i64 @cntd_and_elimination() { 70; CHECK-LABEL: cntd_and_elimination: 71; CHECK: // %bb.0: 72; CHECK-NEXT: cntd x8 73; CHECK-NEXT: and x9, x8, #0x3c 74; CHECK-NEXT: add x0, x8, x9 75; CHECK-NEXT: ret 76 %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31) 77 %and_redundant = and i64 %cntd, 63 78 %and_required = and i64 %cntd, 17179869180 79 %result = add i64 %and_redundant, %and_required 80 ret i64 %result 81} 82 83define i64 @vscale_trunc_zext() vscale_range(1,16) { 84; CHECK-LABEL: vscale_trunc_zext: 85; CHECK: // %bb.0: 86; CHECK-NEXT: rdvl x8, #1 87; CHECK-NEXT: lsr x0, x8, #4 88; CHECK-NEXT: ret 89 %vscale = call i32 @llvm.vscale.i32() 90 %zext = zext i32 %vscale to i64 91 ret i64 %zext 92} 93 94define i64 @vscale_trunc_sext() vscale_range(1,16) { 95; CHECK-LABEL: vscale_trunc_sext: 96; CHECK: // %bb.0: 97; CHECK-NEXT: rdvl x8, #1 98; CHECK-NEXT: lsr x0, x8, #4 99; CHECK-NEXT: ret 100 %vscale = call i32 @llvm.vscale.i32() 101 %sext = sext i32 %vscale to i64 102 ret i64 %sext 103} 104 105define i64 @count_bytes_trunc_zext() { 106; CHECK-LABEL: count_bytes_trunc_zext: 107; CHECK: // %bb.0: 108; CHECK-NEXT: cntb x0 109; CHECK-NEXT: ret 110 %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31) 111 %trunc = trunc i64 %cnt to i32 112 %zext = zext i32 %trunc to i64 113 ret i64 %zext 114} 115 116define i64 @count_halfs_trunc_zext() { 117; CHECK-LABEL: count_halfs_trunc_zext: 118; CHECK: // %bb.0: 119; CHECK-NEXT: cnth x0 120; CHECK-NEXT: ret 121 %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31) 122 %trunc = trunc i64 %cnt to i32 123 %zext = zext i32 %trunc to i64 124 ret i64 %zext 125} 126 127define i64 @count_words_trunc_zext() { 128; CHECK-LABEL: count_words_trunc_zext: 129; CHECK: // %bb.0: 130; CHECK-NEXT: cntw x0 131; CHECK-NEXT: ret 132 %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31) 133 %trunc = trunc i64 %cnt to i32 134 %zext = zext i32 %trunc to i64 135 ret i64 %zext 136} 137 138define i64 @count_doubles_trunc_zext() { 139; CHECK-LABEL: count_doubles_trunc_zext: 140; CHECK: // %bb.0: 141; CHECK-NEXT: cntd x0 142; CHECK-NEXT: ret 143 %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) 144 %trunc = trunc i64 %cnt to i32 145 %zext = zext i32 %trunc to i64 146 ret i64 %zext 147} 148 149define i64 @count_bytes_trunc_sext() { 150; CHECK-LABEL: count_bytes_trunc_sext: 151; CHECK: // %bb.0: 152; CHECK-NEXT: cntb x0 153; CHECK-NEXT: ret 154 %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31) 155 %trunc = trunc i64 %cnt to i32 156 %sext = sext i32 %trunc to i64 157 ret i64 %sext 158} 159 160define i64 @count_halfs_trunc_sext() { 161; CHECK-LABEL: count_halfs_trunc_sext: 162; CHECK: // %bb.0: 163; CHECK-NEXT: cnth x0 164; CHECK-NEXT: ret 165 %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31) 166 %trunc = trunc i64 %cnt to i32 167 %sext = sext i32 %trunc to i64 168 ret i64 %sext 169} 170 171define i64 @count_words_trunc_sext() { 172; CHECK-LABEL: count_words_trunc_sext: 173; CHECK: // %bb.0: 174; CHECK-NEXT: cntw x0 175; CHECK-NEXT: ret 176 %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31) 177 %trunc = trunc i64 %cnt to i32 178 %sext = sext i32 %trunc to i64 179 ret i64 %sext 180} 181 182define i64 @count_doubles_trunc_sext() { 183; CHECK-LABEL: count_doubles_trunc_sext: 184; CHECK: // %bb.0: 185; CHECK-NEXT: cntd x0 186; CHECK-NEXT: ret 187 %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31) 188 %trunc = trunc i64 %cnt to i32 189 %sext = sext i32 %trunc to i64 190 ret i64 %sext 191} 192 193define i32 @vscale_with_multiplier() vscale_range(1,16) { 194; CHECK-LABEL: vscale_with_multiplier: 195; CHECK: // %bb.0: 196; CHECK-NEXT: rdvl x8, #1 197; CHECK-NEXT: mov w9, #5 // =0x5 198; CHECK-NEXT: lsr x8, x8, #4 199; CHECK-NEXT: mul x8, x8, x9 200; CHECK-NEXT: and w9, w8, #0x3f 201; CHECK-NEXT: add w0, w8, w9 202; CHECK-NEXT: ret 203 %vscale = call i32 @llvm.vscale.i32() 204 %mul = mul i32 %vscale, 5 205 %and_redundant = and i32 %mul, 127 206 %and_required = and i32 %mul, 63 207 %result = add i32 %and_redundant, %and_required 208 ret i32 %result 209} 210 211define i32 @vscale_with_negative_multiplier() vscale_range(1,16) { 212; CHECK-LABEL: vscale_with_negative_multiplier: 213; CHECK: // %bb.0: 214; CHECK-NEXT: rdvl x8, #1 215; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb 216; CHECK-NEXT: lsr x8, x8, #4 217; CHECK-NEXT: mul x8, x8, x9 218; CHECK-NEXT: and w9, w8, #0xffffffc0 219; CHECK-NEXT: add w0, w8, w9 220; CHECK-NEXT: ret 221 %vscale = call i32 @llvm.vscale.i32() 222 %mul = mul i32 %vscale, -5 223 %or_redundant = or i32 %mul, 4294967168 224 %or_required = and i32 %mul, 4294967232 225 %result = add i32 %or_redundant, %or_required 226 ret i32 %result 227} 228 229define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) { 230; CHECK-LABEL: pow2_vscale_with_negative_multiplier: 231; CHECK: // %bb.0: 232; CHECK-NEXT: cntd x8 233; CHECK-NEXT: neg x9, x8 234; CHECK-NEXT: orr w9, w9, #0xfffffff0 235; CHECK-NEXT: sub w0, w9, w8 236; CHECK-NEXT: ret 237 %vscale = call i32 @llvm.vscale.i32() 238 %mul = mul i32 %vscale, -2 239 %or_redundant = or i32 %mul, 4294967264 240 %or_required = or i32 %mul, 4294967280 241 %result = add i32 %or_redundant, %or_required 242 ret i32 %result 243} 244 245declare i32 @llvm.vscale.i32() 246declare i64 @llvm.aarch64.sve.cntb(i32 %pattern) 247declare i64 @llvm.aarch64.sve.cnth(i32 %pattern) 248declare i64 @llvm.aarch64.sve.cntw(i32 %pattern) 249declare i64 @llvm.aarch64.sve.cntd(i32 %pattern) 250