1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -sve-use-scalar-inc-vl=true -verify-machineinstrs < %s | FileCheck %s 4; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s 5; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -sve-use-scalar-inc-vl=false -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC 6 7define <vscale x 8 x i16> @inch_vec(<vscale x 8 x i16> %a) { 8; NO_SCALAR_INC-LABEL: inch_vec: 9; NO_SCALAR_INC: // %bb.0: 10; NO_SCALAR_INC-NEXT: inch z0.h 11; NO_SCALAR_INC-NEXT: ret 12; 13; CHECK-LABEL: inch_vec: 14; CHECK: // %bb.0: 15; CHECK-NEXT: inch z0.h 16; CHECK-NEXT: ret 17 %vscale = call i16 @llvm.vscale.i16() 18 %mul = mul i16 %vscale, 8 19 %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0 20 %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 21 %res = add <vscale x 8 x i16> %a, %vl.splat 22 ret <vscale x 8 x i16> %res 23} 24 25define <vscale x 4 x i32> @incw_vec(<vscale x 4 x i32> %a) { 26; NO_SCALAR_INC-LABEL: incw_vec: 27; NO_SCALAR_INC: // %bb.0: 28; NO_SCALAR_INC-NEXT: incw z0.s 29; NO_SCALAR_INC-NEXT: ret 30; 31; CHECK-LABEL: incw_vec: 32; CHECK: // %bb.0: 33; CHECK-NEXT: incw z0.s 34; CHECK-NEXT: ret 35 %vscale = call i32 @llvm.vscale.i32() 36 %mul = mul i32 %vscale, 4 37 %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0 38 %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 39 %res = add <vscale x 4 x i32> %a, %vl.splat 40 ret <vscale x 4 x i32> %res 41} 42 43define <vscale x 2 x i64> @incd_vec(<vscale x 2 x i64> %a) { 44; NO_SCALAR_INC-LABEL: incd_vec: 45; NO_SCALAR_INC: // %bb.0: 46; NO_SCALAR_INC-NEXT: incd z0.d 47; NO_SCALAR_INC-NEXT: ret 48; 49; CHECK-LABEL: incd_vec: 50; CHECK: // %bb.0: 51; CHECK-NEXT: incd z0.d 52; CHECK-NEXT: ret 53 %vscale = call i64 @llvm.vscale.i64() 54 %mul = mul i64 %vscale, 2 55 %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0 56 %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 57 %res = add <vscale x 2 x i64> %a, %vl.splat 58 ret <vscale x 2 x i64> %res 59} 60 61define <vscale x 8 x i16> @dech_vec(<vscale x 8 x i16> %a) { 62; NO_SCALAR_INC-LABEL: dech_vec: 63; NO_SCALAR_INC: // %bb.0: 64; NO_SCALAR_INC-NEXT: dech z0.h, all, mul #2 65; NO_SCALAR_INC-NEXT: ret 66; 67; CHECK-LABEL: dech_vec: 68; CHECK: // %bb.0: 69; CHECK-NEXT: dech z0.h, all, mul #2 70; CHECK-NEXT: ret 71 %vscale = call i16 @llvm.vscale.i16() 72 %mul = mul i16 %vscale, 16 73 %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0 74 %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 75 %res = sub <vscale x 8 x i16> %a, %vl.splat 76 ret <vscale x 8 x i16> %res 77} 78 79define <vscale x 4 x i32> @decw_vec(<vscale x 4 x i32> %a) { 80; NO_SCALAR_INC-LABEL: decw_vec: 81; NO_SCALAR_INC: // %bb.0: 82; NO_SCALAR_INC-NEXT: decw z0.s, all, mul #4 83; NO_SCALAR_INC-NEXT: ret 84; 85; CHECK-LABEL: decw_vec: 86; CHECK: // %bb.0: 87; CHECK-NEXT: decw z0.s, all, mul #4 88; CHECK-NEXT: ret 89 %vscale = call i32 @llvm.vscale.i32() 90 %mul = mul i32 %vscale, 16 91 %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0 92 %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 93 %res = sub <vscale x 4 x i32> %a, %vl.splat 94 ret <vscale x 4 x i32> %res 95} 96 97define <vscale x 2 x i64> @decd_vec(<vscale x 2 x i64> %a) { 98; NO_SCALAR_INC-LABEL: decd_vec: 99; NO_SCALAR_INC: // %bb.0: 100; NO_SCALAR_INC-NEXT: decd z0.d, all, mul #8 101; NO_SCALAR_INC-NEXT: ret 102; 103; CHECK-LABEL: decd_vec: 104; CHECK: // %bb.0: 105; CHECK-NEXT: decd z0.d, all, mul #8 106; CHECK-NEXT: ret 107 %vscale = call i64 @llvm.vscale.i64() 108 %mul = mul i64 %vscale, 16 109 %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0 110 %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 111 %res = sub <vscale x 2 x i64> %a, %vl.splat 112 ret <vscale x 2 x i64> %res 113} 114 115; NOTE: As there is no need for the predicate pattern we 116; fall back to using ADDVL with its larger immediate range. 117define i64 @incb_scalar_i64(i64 %a) { 118; NO_SCALAR_INC-LABEL: incb_scalar_i64: 119; NO_SCALAR_INC: // %bb.0: 120; NO_SCALAR_INC-NEXT: rdvl x8, #1 121; NO_SCALAR_INC-NEXT: add x0, x0, x8 122; NO_SCALAR_INC-NEXT: ret 123; 124; CHECK-LABEL: incb_scalar_i64: 125; CHECK: // %bb.0: 126; CHECK-NEXT: addvl x0, x0, #1 127; CHECK-NEXT: ret 128 %vscale = call i64 @llvm.vscale.i64() 129 %mul = mul i64 %vscale, 16 130 %add = add i64 %a, %mul 131 ret i64 %add 132} 133 134define i64 @inch_scalar_i64(i64 %a) { 135; NO_SCALAR_INC-LABEL: inch_scalar_i64: 136; NO_SCALAR_INC: // %bb.0: 137; NO_SCALAR_INC-NEXT: cnth x8 138; NO_SCALAR_INC-NEXT: add x0, x0, x8 139; NO_SCALAR_INC-NEXT: ret 140; 141; CHECK-LABEL: inch_scalar_i64: 142; CHECK: // %bb.0: 143; CHECK-NEXT: inch x0 144; CHECK-NEXT: ret 145 %vscale = call i64 @llvm.vscale.i64() 146 %mul = mul i64 %vscale, 8 147 %add = add i64 %a, %mul 148 ret i64 %add 149} 150 151define i64 @incw_scalar_i64(i64 %a) { 152; NO_SCALAR_INC-LABEL: incw_scalar_i64: 153; NO_SCALAR_INC: // %bb.0: 154; NO_SCALAR_INC-NEXT: cntw x8 155; NO_SCALAR_INC-NEXT: add x0, x0, x8 156; NO_SCALAR_INC-NEXT: ret 157; 158; CHECK-LABEL: incw_scalar_i64: 159; CHECK: // %bb.0: 160; CHECK-NEXT: incw x0 161; CHECK-NEXT: ret 162 %vscale = call i64 @llvm.vscale.i64() 163 %mul = mul i64 %vscale, 4 164 %add = add i64 %a, %mul 165 ret i64 %add 166} 167 168define i64 @incd_scalar_i64(i64 %a) { 169; NO_SCALAR_INC-LABEL: incd_scalar_i64: 170; NO_SCALAR_INC: // %bb.0: 171; NO_SCALAR_INC-NEXT: cntd x8 172; NO_SCALAR_INC-NEXT: add x0, x0, x8 173; NO_SCALAR_INC-NEXT: ret 174; 175; CHECK-LABEL: incd_scalar_i64: 176; CHECK: // %bb.0: 177; CHECK-NEXT: incd x0 178; CHECK-NEXT: ret 179 %vscale = call i64 @llvm.vscale.i64() 180 %mul = mul i64 %vscale, 2 181 %add = add i64 %a, %mul 182 ret i64 %add 183} 184 185; NOTE: As there is no need for the predicate pattern we 186; fall back to using ADDVL with its larger immediate range. 187define i64 @decb_scalar_i64(i64 %a) { 188; NO_SCALAR_INC-LABEL: decb_scalar_i64: 189; NO_SCALAR_INC: // %bb.0: 190; NO_SCALAR_INC-NEXT: cnth x8, all, mul #4 191; NO_SCALAR_INC-NEXT: sub x0, x0, x8 192; NO_SCALAR_INC-NEXT: ret 193; 194; CHECK-LABEL: decb_scalar_i64: 195; CHECK: // %bb.0: 196; CHECK-NEXT: addvl x0, x0, #-2 197; CHECK-NEXT: ret 198 %vscale = call i64 @llvm.vscale.i64() 199 %mul = mul i64 %vscale, 32 200 %sub = sub i64 %a, %mul 201 ret i64 %sub 202} 203 204define i64 @dech_scalar_i64(i64 %a) { 205; NO_SCALAR_INC-LABEL: dech_scalar_i64: 206; NO_SCALAR_INC: // %bb.0: 207; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3 208; NO_SCALAR_INC-NEXT: sub x0, x0, x8 209; NO_SCALAR_INC-NEXT: ret 210; 211; CHECK-LABEL: dech_scalar_i64: 212; CHECK: // %bb.0: 213; CHECK-NEXT: dech x0, all, mul #3 214; CHECK-NEXT: ret 215 %vscale = call i64 @llvm.vscale.i64() 216 %mul = mul i64 %vscale, 24 217 %sub = sub i64 %a, %mul 218 ret i64 %sub 219} 220 221define i64 @decw_scalar_i64(i64 %a) { 222; NO_SCALAR_INC-LABEL: decw_scalar_i64: 223; NO_SCALAR_INC: // %bb.0: 224; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3 225; NO_SCALAR_INC-NEXT: sub x0, x0, x8 226; NO_SCALAR_INC-NEXT: ret 227; 228; CHECK-LABEL: decw_scalar_i64: 229; CHECK: // %bb.0: 230; CHECK-NEXT: decw x0, all, mul #3 231; CHECK-NEXT: ret 232 %vscale = call i64 @llvm.vscale.i64() 233 %mul = mul i64 %vscale, 12 234 %sub = sub i64 %a, %mul 235 ret i64 %sub 236} 237 238define i64 @decd_scalar_i64(i64 %a) { 239; NO_SCALAR_INC-LABEL: decd_scalar_i64: 240; NO_SCALAR_INC: // %bb.0: 241; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3 242; NO_SCALAR_INC-NEXT: sub x0, x0, x8 243; NO_SCALAR_INC-NEXT: ret 244; 245; CHECK-LABEL: decd_scalar_i64: 246; CHECK: // %bb.0: 247; CHECK-NEXT: decd x0, all, mul #3 248; CHECK-NEXT: ret 249 %vscale = call i64 @llvm.vscale.i64() 250 %mul = mul i64 %vscale, 6 251 %sub = sub i64 %a, %mul 252 ret i64 %sub 253} 254 255; NOTE: As there is no need for the predicate pattern we 256; fall back to using ADDVL with its larger immediate range. 257define i32 @incb_scalar_i32(i32 %a) { 258; NO_SCALAR_INC-LABEL: incb_scalar_i32: 259; NO_SCALAR_INC: // %bb.0: 260; NO_SCALAR_INC-NEXT: rdvl x8, #3 261; NO_SCALAR_INC-NEXT: add w0, w0, w8 262; NO_SCALAR_INC-NEXT: ret 263; 264; CHECK-LABEL: incb_scalar_i32: 265; CHECK: // %bb.0: 266; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 267; CHECK-NEXT: addvl x0, x0, #3 268; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 269; CHECK-NEXT: ret 270 271 %vscale = call i64 @llvm.vscale.i64() 272 %mul = mul i64 %vscale, 48 273 %vl = trunc i64 %mul to i32 274 %add = add i32 %a, %vl 275 ret i32 %add 276} 277 278define i32 @inch_scalar_i32(i32 %a) { 279; NO_SCALAR_INC-LABEL: inch_scalar_i32: 280; NO_SCALAR_INC: // %bb.0: 281; NO_SCALAR_INC-NEXT: cnth x8, all, mul #7 282; NO_SCALAR_INC-NEXT: add w0, w0, w8 283; NO_SCALAR_INC-NEXT: ret 284; 285; CHECK-LABEL: inch_scalar_i32: 286; CHECK: // %bb.0: 287; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 288; CHECK-NEXT: inch x0, all, mul #7 289; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 290; CHECK-NEXT: ret 291 292 %vscale = call i64 @llvm.vscale.i64() 293 %mul = mul i64 %vscale, 56 294 %vl = trunc i64 %mul to i32 295 %add = add i32 %a, %vl 296 ret i32 %add 297} 298 299define i32 @incw_scalar_i32(i32 %a) { 300; NO_SCALAR_INC-LABEL: incw_scalar_i32: 301; NO_SCALAR_INC: // %bb.0: 302; NO_SCALAR_INC-NEXT: cntw x8, all, mul #7 303; NO_SCALAR_INC-NEXT: add w0, w0, w8 304; NO_SCALAR_INC-NEXT: ret 305; 306; CHECK-LABEL: incw_scalar_i32: 307; CHECK: // %bb.0: 308; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 309; CHECK-NEXT: incw x0, all, mul #7 310; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 311; CHECK-NEXT: ret 312 313 %vscale = call i64 @llvm.vscale.i64() 314 %mul = mul i64 %vscale, 28 315 %vl = trunc i64 %mul to i32 316 %add = add i32 %a, %vl 317 ret i32 %add 318} 319 320define i32 @incd_scalar_i32(i32 %a) { 321; NO_SCALAR_INC-LABEL: incd_scalar_i32: 322; NO_SCALAR_INC: // %bb.0: 323; NO_SCALAR_INC-NEXT: cntd x8, all, mul #7 324; NO_SCALAR_INC-NEXT: add w0, w0, w8 325; NO_SCALAR_INC-NEXT: ret 326; 327; CHECK-LABEL: incd_scalar_i32: 328; CHECK: // %bb.0: 329; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 330; CHECK-NEXT: incd x0, all, mul #7 331; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 332; CHECK-NEXT: ret 333 334 %vscale = call i64 @llvm.vscale.i64() 335 %mul = mul i64 %vscale, 14 336 %vl = trunc i64 %mul to i32 337 %add = add i32 %a, %vl 338 ret i32 %add 339} 340 341; NOTE: As there is no need for the predicate pattern we 342; fall back to using ADDVL with its larger immediate range. 343define i32 @decb_scalar_i32(i32 %a) { 344; NO_SCALAR_INC-LABEL: decb_scalar_i32: 345; NO_SCALAR_INC: // %bb.0: 346; NO_SCALAR_INC-NEXT: cnth x8, all, mul #8 347; NO_SCALAR_INC-NEXT: sub w0, w0, w8 348; NO_SCALAR_INC-NEXT: ret 349; 350; CHECK-LABEL: decb_scalar_i32: 351; CHECK: // %bb.0: 352; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 353; CHECK-NEXT: addvl x0, x0, #-4 354; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 355; CHECK-NEXT: ret 356 357 %vscale = call i64 @llvm.vscale.i64() 358 %mul = mul i64 %vscale, 64 359 %vl = trunc i64 %mul to i32 360 %sub = sub i32 %a, %vl 361 ret i32 %sub 362} 363 364define i32 @dech_scalar_i32(i32 %a) { 365; NO_SCALAR_INC-LABEL: dech_scalar_i32: 366; NO_SCALAR_INC: // %bb.0: 367; NO_SCALAR_INC-NEXT: cnth x8 368; NO_SCALAR_INC-NEXT: sub w0, w0, w8 369; NO_SCALAR_INC-NEXT: ret 370; 371; CHECK-LABEL: dech_scalar_i32: 372; CHECK: // %bb.0: 373; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 374; CHECK-NEXT: dech x0 375; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 376; CHECK-NEXT: ret 377 378 %vscale = call i64 @llvm.vscale.i64() 379 %mul = mul i64 %vscale, 8 380 %vl = trunc i64 %mul to i32 381 %sub = sub i32 %a, %vl 382 ret i32 %sub 383} 384 385define i32 @decw_scalar_i32(i32 %a) { 386; NO_SCALAR_INC-LABEL: decw_scalar_i32: 387; NO_SCALAR_INC: // %bb.0: 388; NO_SCALAR_INC-NEXT: cntw x8 389; NO_SCALAR_INC-NEXT: sub w0, w0, w8 390; NO_SCALAR_INC-NEXT: ret 391; 392; CHECK-LABEL: decw_scalar_i32: 393; CHECK: // %bb.0: 394; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 395; CHECK-NEXT: decw x0 396; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 397; CHECK-NEXT: ret 398 399 %vscale = call i64 @llvm.vscale.i64() 400 %mul = mul i64 %vscale, 4 401 %vl = trunc i64 %mul to i32 402 %sub = sub i32 %a, %vl 403 ret i32 %sub 404} 405 406define i32 @decd_scalar_i32(i32 %a) { 407; NO_SCALAR_INC-LABEL: decd_scalar_i32: 408; NO_SCALAR_INC: // %bb.0: 409; NO_SCALAR_INC-NEXT: cntd x8 410; NO_SCALAR_INC-NEXT: sub w0, w0, w8 411; NO_SCALAR_INC-NEXT: ret 412; 413; CHECK-LABEL: decd_scalar_i32: 414; CHECK: // %bb.0: 415; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 416; CHECK-NEXT: decd x0 417; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 418; CHECK-NEXT: ret 419 %vscale = call i64 @llvm.vscale.i64() 420 %mul = mul i64 %vscale, 2 421 %vl = trunc i64 %mul to i32 422 %sub = sub i32 %a, %vl 423 ret i32 %sub 424} 425 426declare i16 @llvm.vscale.i16() 427declare i32 @llvm.vscale.i32() 428declare i64 @llvm.vscale.i64() 429