1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s | FileCheck %s 3 4target triple = "aarch64-unknown-linux-gnu" 5 6; INCP 7 8define i32 @cntp_add_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { 9; CHECK-LABEL: cntp_add_all_active_nxv16i1: 10; CHECK: // %bb.0: 11; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 12; CHECK-NEXT: incp x0, p0.b 13; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 14; CHECK-NEXT: ret 15 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 16 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg) 17 %3 = trunc i64 %2 to i32 18 %add = add i32 %3, %x 19 ret i32 %add 20} 21 22define i32 @cntp_add_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { 23; CHECK-LABEL: cntp_add_all_active_nxv8i1: 24; CHECK: // %bb.0: 25; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 26; CHECK-NEXT: incp x0, p0.h 27; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 28; CHECK-NEXT: ret 29 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 30 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg) 31 %3 = trunc i64 %2 to i32 32 %add = add i32 %3, %x 33 ret i32 %add 34} 35 36define i32 @cntp_add_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { 37; CHECK-LABEL: cntp_add_all_active_nxv4i1: 38; CHECK: // %bb.0: 39; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 40; CHECK-NEXT: incp x0, p0.s 41; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 42; CHECK-NEXT: ret 43 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 44 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg) 45 %3 = trunc i64 %2 to i32 46 %add = add i32 %3, %x 47 ret i32 %add 48} 49 50define i32 @cntp_add_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { 51; CHECK-LABEL: cntp_add_all_active_nxv2i1: 52; CHECK: // %bb.0: 53; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 54; CHECK-NEXT: incp x0, p0.d 55; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 56; CHECK-NEXT: ret 57 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 58 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) 59 %3 = trunc i64 %2 to i32 60 %add = add i32 %3, %x 61 ret i32 %add 62} 63 64define i32 @cntp_add_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 { 65; CHECK-LABEL: cntp_add_all_active_nxv8i1_via_cast: 66; CHECK: // %bb.0: 67; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 68; CHECK-NEXT: incp x0, p0.h 69; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 70; CHECK-NEXT: ret 71 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 72 %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1) 73 %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg) 74 %4 = trunc i64 %3 to i32 75 %add = add i32 %4, %x 76 ret i32 %add 77} 78 79define i64 @cntp_add_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { 80; CHECK-LABEL: cntp_add_all_active_nxv2i1_multiuse: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ptrue p1.d 83; CHECK-NEXT: cntp x8, p1, p0.d 84; CHECK-NEXT: add w9, w8, w0 85; CHECK-NEXT: madd x0, x8, x1, x9 86; CHECK-NEXT: ret 87 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 88 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) 89 %3 = trunc i64 %2 to i32 90 %add = add i32 %3, %x 91 %add.ext = zext i32 %add to i64 92 %mul = mul i64 %2, %y 93 %res = add i64 %add.ext, %mul 94 ret i64 %res 95} 96 97define i32 @cntp_add_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { 98; CHECK-LABEL: cntp_add_same_active_nxv16i1: 99; CHECK: // %bb.0: 100; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 101; CHECK-NEXT: incp x0, p0.b 102; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 103; CHECK-NEXT: ret 104 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg) 105 %2 = trunc i64 %1 to i32 106 %add = add i32 %2, %x 107 ret i32 %add 108} 109 110define i32 @cntp_add_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { 111; CHECK-LABEL: cntp_add_same_active_nxv8i1: 112; CHECK: // %bb.0: 113; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 114; CHECK-NEXT: incp x0, p0.h 115; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 116; CHECK-NEXT: ret 117 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg) 118 %2 = trunc i64 %1 to i32 119 %add = add i32 %2, %x 120 ret i32 %add 121} 122 123define i32 @cntp_add_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { 124; CHECK-LABEL: cntp_add_same_active_nxv4i1: 125; CHECK: // %bb.0: 126; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 127; CHECK-NEXT: incp x0, p0.s 128; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 129; CHECK-NEXT: ret 130 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg) 131 %2 = trunc i64 %1 to i32 132 %add = add i32 %2, %x 133 ret i32 %add 134} 135 136define i32 @cntp_add_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { 137; CHECK-LABEL: cntp_add_same_active_nxv2i1: 138; CHECK: // %bb.0: 139; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 140; CHECK-NEXT: incp x0, p0.d 141; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 142; CHECK-NEXT: ret 143 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) 144 %2 = trunc i64 %1 to i32 145 %add = add i32 %2, %x 146 ret i32 %add 147} 148 149define i64 @cntp_add_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { 150; CHECK-LABEL: cntp_add_same_active_nxv2i1_multiuse: 151; CHECK: // %bb.0: 152; CHECK-NEXT: cntp x8, p0, p0.d 153; CHECK-NEXT: add w9, w8, w0 154; CHECK-NEXT: madd x0, x8, x1, x9 155; CHECK-NEXT: ret 156 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) 157 %2 = trunc i64 %1 to i32 158 %add = add i32 %2, %x 159 %add.ext = zext i32 %add to i64 160 %mul = mul i64 %1, %y 161 %res = add i64 %add.ext, %mul 162 ret i64 %res 163} 164 165; DECP 166 167define i32 @cntp_sub_all_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { 168; CHECK-LABEL: cntp_sub_all_active_nxv16i1: 169; CHECK: // %bb.0: 170; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 171; CHECK-NEXT: decp x0, p0.b 172; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 173; CHECK-NEXT: ret 174 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 175 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %1, <vscale x 16 x i1> %pg) 176 %3 = trunc i64 %2 to i32 177 %sub = sub i32 %x, %3 178 ret i32 %sub 179} 180 181define i32 @cntp_sub_all_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { 182; CHECK-LABEL: cntp_sub_all_active_nxv8i1: 183; CHECK: // %bb.0: 184; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 185; CHECK-NEXT: decp x0, p0.h 186; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 187; CHECK-NEXT: ret 188 %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) 189 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %1, <vscale x 8 x i1> %pg) 190 %3 = trunc i64 %2 to i32 191 %sub = sub i32 %x, %3 192 ret i32 %sub 193} 194 195define i32 @cntp_sub_all_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { 196; CHECK-LABEL: cntp_sub_all_active_nxv4i1: 197; CHECK: // %bb.0: 198; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 199; CHECK-NEXT: decp x0, p0.s 200; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 201; CHECK-NEXT: ret 202 %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) 203 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %1, <vscale x 4 x i1> %pg) 204 %3 = trunc i64 %2 to i32 205 %sub = sub i32 %x, %3 206 ret i32 %sub 207} 208 209define i32 @cntp_sub_all_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { 210; CHECK-LABEL: cntp_sub_all_active_nxv2i1: 211; CHECK: // %bb.0: 212; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 213; CHECK-NEXT: decp x0, p0.d 214; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 215; CHECK-NEXT: ret 216 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 217 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) 218 %3 = trunc i64 %2 to i32 219 %sub = sub i32 %x, %3 220 ret i32 %sub 221} 222 223define i32 @cntp_sub_all_active_nxv8i1_via_cast(i32 %x, <vscale x 8 x i1> %pg) #0 { 224; CHECK-LABEL: cntp_sub_all_active_nxv8i1_via_cast: 225; CHECK: // %bb.0: 226; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 227; CHECK-NEXT: decp x0, p0.h 228; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 229; CHECK-NEXT: ret 230 %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) 231 %2 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %1) 232 %3 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %2, <vscale x 8 x i1> %pg) 233 %4 = trunc i64 %3 to i32 234 %sub = sub i32 %x, %4 235 ret i32 %sub 236} 237 238define i64 @cntp_sub_all_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { 239; CHECK-LABEL: cntp_sub_all_active_nxv2i1_multiuse: 240; CHECK: // %bb.0: 241; CHECK-NEXT: ptrue p1.d 242; CHECK-NEXT: cntp x8, p1, p0.d 243; CHECK-NEXT: sub w9, w8, w0 244; CHECK-NEXT: madd x0, x8, x1, x9 245; CHECK-NEXT: ret 246 %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) 247 %2 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %1, <vscale x 2 x i1> %pg) 248 %3 = trunc i64 %2 to i32 249 %sub = sub i32 %3, %x 250 %sub.ext = zext i32 %sub to i64 251 %mul = mul i64 %2, %y 252 %res = add i64 %sub.ext, %mul 253 ret i64 %res 254} 255 256define i32 @cntp_sub_same_active_nxv16i1(i32 %x, <vscale x 16 x i1> %pg) #0 { 257; CHECK-LABEL: cntp_sub_same_active_nxv16i1: 258; CHECK: // %bb.0: 259; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 260; CHECK-NEXT: decp x0, p0.b 261; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 262; CHECK-NEXT: ret 263 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %pg) 264 %2 = trunc i64 %1 to i32 265 %sub = sub i32 %x, %2 266 ret i32 %sub 267} 268 269define i32 @cntp_sub_same_active_nxv8i1(i32 %x, <vscale x 8 x i1> %pg) #0 { 270; CHECK-LABEL: cntp_sub_same_active_nxv8i1: 271; CHECK: // %bb.0: 272; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 273; CHECK-NEXT: decp x0, p0.h 274; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 275; CHECK-NEXT: ret 276 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1> %pg, <vscale x 8 x i1> %pg) 277 %2 = trunc i64 %1 to i32 278 %sub = sub i32 %x, %2 279 ret i32 %sub 280} 281 282define i32 @cntp_sub_same_active_nxv4i1(i32 %x, <vscale x 4 x i1> %pg) #0 { 283; CHECK-LABEL: cntp_sub_same_active_nxv4i1: 284; CHECK: // %bb.0: 285; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 286; CHECK-NEXT: decp x0, p0.s 287; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 288; CHECK-NEXT: ret 289 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1> %pg, <vscale x 4 x i1> %pg) 290 %2 = trunc i64 %1 to i32 291 %sub = sub i32 %x, %2 292 ret i32 %sub 293} 294 295define i32 @cntp_sub_same_active_nxv2i1(i32 %x, <vscale x 2 x i1> %pg) #0 { 296; CHECK-LABEL: cntp_sub_same_active_nxv2i1: 297; CHECK: // %bb.0: 298; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 299; CHECK-NEXT: decp x0, p0.d 300; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 301; CHECK-NEXT: ret 302 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) 303 %2 = trunc i64 %1 to i32 304 %sub = sub i32 %x, %2 305 ret i32 %sub 306} 307 308define i64 @cntp_sub_same_active_nxv2i1_multiuse(i32 %x, i64 %y, <vscale x 2 x i1> %pg) #0 { 309; CHECK-LABEL: cntp_sub_same_active_nxv2i1_multiuse: 310; CHECK: // %bb.0: 311; CHECK-NEXT: cntp x8, p0, p0.d 312; CHECK-NEXT: sub w9, w8, w0 313; CHECK-NEXT: madd x0, x8, x1, x9 314; CHECK-NEXT: ret 315 %1 = tail call i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1> %pg, <vscale x 2 x i1> %pg) 316 %2 = trunc i64 %1 to i32 317 %sub = sub i32 %2, %x 318 %sub.ext = zext i32 %sub to i64 319 %mul = mul i64 %1, %y 320 %res = add i64 %sub.ext, %mul 321 ret i64 %res 322} 323 324declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>) 325declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>) 326declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>) 327 328declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32) 329declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) 330declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) 331declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) 332 333declare i64 @llvm.aarch64.sve.cntp.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>) 334declare i64 @llvm.aarch64.sve.cntp.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>) 335declare i64 @llvm.aarch64.sve.cntp.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>) 336declare i64 @llvm.aarch64.sve.cntp.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>) 337 338attributes #0 = { "target-features"="+sve" } 339