1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s --check-prefixes=CHECK,NOZBA 4; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba -verify-machineinstrs < %s \ 5; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA 6; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ 7; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL 8 9define void @lmul1() nounwind { 10; CHECK-LABEL: lmul1: 11; CHECK: # %bb.0: 12; CHECK-NEXT: csrr a0, vlenb 13; CHECK-NEXT: sub sp, sp, a0 14; CHECK-NEXT: csrr a0, vlenb 15; CHECK-NEXT: add sp, sp, a0 16; CHECK-NEXT: ret 17 %v = alloca <vscale x 1 x i64> 18 ret void 19} 20 21define void @lmul2() nounwind { 22; NOZBA-LABEL: lmul2: 23; NOZBA: # %bb.0: 24; NOZBA-NEXT: csrr a0, vlenb 25; NOZBA-NEXT: slli a0, a0, 1 26; NOZBA-NEXT: sub sp, sp, a0 27; NOZBA-NEXT: csrr a0, vlenb 28; NOZBA-NEXT: slli a0, a0, 1 29; NOZBA-NEXT: add sp, sp, a0 30; NOZBA-NEXT: ret 31; 32; ZBA-LABEL: lmul2: 33; ZBA: # %bb.0: 34; ZBA-NEXT: csrr a0, vlenb 35; ZBA-NEXT: slli a0, a0, 1 36; ZBA-NEXT: sub sp, sp, a0 37; ZBA-NEXT: csrr a0, vlenb 38; ZBA-NEXT: sh1add sp, a0, sp 39; ZBA-NEXT: ret 40; 41; NOMUL-LABEL: lmul2: 42; NOMUL: # %bb.0: 43; NOMUL-NEXT: csrr a0, vlenb 44; NOMUL-NEXT: slli a0, a0, 1 45; NOMUL-NEXT: sub sp, sp, a0 46; NOMUL-NEXT: csrr a0, vlenb 47; NOMUL-NEXT: slli a0, a0, 1 48; NOMUL-NEXT: add sp, sp, a0 49; NOMUL-NEXT: ret 50 %v = alloca <vscale x 2 x i64> 51 ret void 52} 53 54define void @lmul4() nounwind { 55; CHECK-LABEL: lmul4: 56; CHECK: # %bb.0: 57; CHECK-NEXT: addi sp, sp, -48 58; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 59; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 60; CHECK-NEXT: addi s0, sp, 48 61; CHECK-NEXT: csrr a0, vlenb 62; CHECK-NEXT: slli a0, a0, 2 63; CHECK-NEXT: sub sp, sp, a0 64; CHECK-NEXT: andi sp, sp, -32 65; CHECK-NEXT: addi sp, s0, -48 66; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 67; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 68; CHECK-NEXT: addi sp, sp, 48 69; CHECK-NEXT: ret 70 %v = alloca <vscale x 4 x i64> 71 ret void 72} 73 74define void @lmul8() nounwind { 75; CHECK-LABEL: lmul8: 76; CHECK: # %bb.0: 77; CHECK-NEXT: addi sp, sp, -80 78; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 79; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 80; CHECK-NEXT: addi s0, sp, 80 81; CHECK-NEXT: csrr a0, vlenb 82; CHECK-NEXT: slli a0, a0, 3 83; CHECK-NEXT: sub sp, sp, a0 84; CHECK-NEXT: andi sp, sp, -64 85; CHECK-NEXT: addi sp, s0, -80 86; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 87; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 88; CHECK-NEXT: addi sp, sp, 80 89; CHECK-NEXT: ret 90 %v = alloca <vscale x 8 x i64> 91 ret void 92} 93 94define void @lmul1_and_2() nounwind { 95; NOZBA-LABEL: lmul1_and_2: 96; NOZBA: # %bb.0: 97; NOZBA-NEXT: csrr a0, vlenb 98; NOZBA-NEXT: slli a0, a0, 2 99; NOZBA-NEXT: sub sp, sp, a0 100; NOZBA-NEXT: csrr a0, vlenb 101; NOZBA-NEXT: slli a0, a0, 2 102; NOZBA-NEXT: add sp, sp, a0 103; NOZBA-NEXT: ret 104; 105; ZBA-LABEL: lmul1_and_2: 106; ZBA: # %bb.0: 107; ZBA-NEXT: csrr a0, vlenb 108; ZBA-NEXT: slli a0, a0, 2 109; ZBA-NEXT: sub sp, sp, a0 110; ZBA-NEXT: csrr a0, vlenb 111; ZBA-NEXT: sh2add sp, a0, sp 112; ZBA-NEXT: ret 113; 114; NOMUL-LABEL: lmul1_and_2: 115; NOMUL: # %bb.0: 116; NOMUL-NEXT: csrr a0, vlenb 117; NOMUL-NEXT: slli a0, a0, 2 118; NOMUL-NEXT: sub sp, sp, a0 119; NOMUL-NEXT: csrr a0, vlenb 120; NOMUL-NEXT: slli a0, a0, 2 121; NOMUL-NEXT: add sp, sp, a0 122; NOMUL-NEXT: ret 123 %v1 = alloca <vscale x 1 x i64> 124 %v2 = alloca <vscale x 2 x i64> 125 ret void 126} 127 128define void @lmul2_and_4() nounwind { 129; CHECK-LABEL: lmul2_and_4: 130; CHECK: # %bb.0: 131; CHECK-NEXT: addi sp, sp, -48 132; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 133; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 134; CHECK-NEXT: addi s0, sp, 48 135; CHECK-NEXT: csrr a0, vlenb 136; CHECK-NEXT: slli a0, a0, 3 137; CHECK-NEXT: sub sp, sp, a0 138; CHECK-NEXT: andi sp, sp, -32 139; CHECK-NEXT: addi sp, s0, -48 140; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 141; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 142; CHECK-NEXT: addi sp, sp, 48 143; CHECK-NEXT: ret 144 %v1 = alloca <vscale x 2 x i64> 145 %v2 = alloca <vscale x 4 x i64> 146 ret void 147} 148 149define void @lmul1_and_4() nounwind { 150; CHECK-LABEL: lmul1_and_4: 151; CHECK: # %bb.0: 152; CHECK-NEXT: addi sp, sp, -48 153; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 154; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 155; CHECK-NEXT: addi s0, sp, 48 156; CHECK-NEXT: csrr a0, vlenb 157; CHECK-NEXT: slli a0, a0, 3 158; CHECK-NEXT: sub sp, sp, a0 159; CHECK-NEXT: andi sp, sp, -32 160; CHECK-NEXT: addi sp, s0, -48 161; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 162; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 163; CHECK-NEXT: addi sp, sp, 48 164; CHECK-NEXT: ret 165 %v1 = alloca <vscale x 1 x i64> 166 %v2 = alloca <vscale x 4 x i64> 167 ret void 168} 169 170define void @lmul2_and_1() nounwind { 171; NOZBA-LABEL: lmul2_and_1: 172; NOZBA: # %bb.0: 173; NOZBA-NEXT: csrr a0, vlenb 174; NOZBA-NEXT: slli a1, a0, 1 175; NOZBA-NEXT: add a0, a1, a0 176; NOZBA-NEXT: sub sp, sp, a0 177; NOZBA-NEXT: csrr a0, vlenb 178; NOZBA-NEXT: slli a1, a0, 1 179; NOZBA-NEXT: add a0, a1, a0 180; NOZBA-NEXT: add sp, sp, a0 181; NOZBA-NEXT: ret 182; 183; ZBA-LABEL: lmul2_and_1: 184; ZBA: # %bb.0: 185; ZBA-NEXT: csrr a0, vlenb 186; ZBA-NEXT: sh1add a0, a0, a0 187; ZBA-NEXT: sub sp, sp, a0 188; ZBA-NEXT: csrr a0, vlenb 189; ZBA-NEXT: sh1add a0, a0, a0 190; ZBA-NEXT: add sp, sp, a0 191; ZBA-NEXT: ret 192; 193; NOMUL-LABEL: lmul2_and_1: 194; NOMUL: # %bb.0: 195; NOMUL-NEXT: csrr a0, vlenb 196; NOMUL-NEXT: slli a1, a0, 1 197; NOMUL-NEXT: add a0, a1, a0 198; NOMUL-NEXT: sub sp, sp, a0 199; NOMUL-NEXT: csrr a0, vlenb 200; NOMUL-NEXT: slli a1, a0, 1 201; NOMUL-NEXT: add a0, a1, a0 202; NOMUL-NEXT: add sp, sp, a0 203; NOMUL-NEXT: ret 204 %v1 = alloca <vscale x 2 x i64> 205 %v2 = alloca <vscale x 1 x i64> 206 ret void 207} 208 209define void @lmul4_and_1() nounwind { 210; NOZBA-LABEL: lmul4_and_1: 211; NOZBA: # %bb.0: 212; NOZBA-NEXT: addi sp, sp, -48 213; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 214; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 215; NOZBA-NEXT: addi s0, sp, 48 216; NOZBA-NEXT: csrr a0, vlenb 217; NOZBA-NEXT: li a1, 6 218; NOZBA-NEXT: mul a0, a0, a1 219; NOZBA-NEXT: sub sp, sp, a0 220; NOZBA-NEXT: andi sp, sp, -32 221; NOZBA-NEXT: addi sp, s0, -48 222; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 223; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 224; NOZBA-NEXT: addi sp, sp, 48 225; NOZBA-NEXT: ret 226; 227; ZBA-LABEL: lmul4_and_1: 228; ZBA: # %bb.0: 229; ZBA-NEXT: addi sp, sp, -48 230; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 231; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 232; ZBA-NEXT: addi s0, sp, 48 233; ZBA-NEXT: csrr a0, vlenb 234; ZBA-NEXT: slli a0, a0, 1 235; ZBA-NEXT: sh1add a0, a0, a0 236; ZBA-NEXT: sub sp, sp, a0 237; ZBA-NEXT: andi sp, sp, -32 238; ZBA-NEXT: addi sp, s0, -48 239; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 240; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 241; ZBA-NEXT: addi sp, sp, 48 242; ZBA-NEXT: ret 243; 244; NOMUL-LABEL: lmul4_and_1: 245; NOMUL: # %bb.0: 246; NOMUL-NEXT: addi sp, sp, -48 247; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 248; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 249; NOMUL-NEXT: addi s0, sp, 48 250; NOMUL-NEXT: csrr a0, vlenb 251; NOMUL-NEXT: slli a0, a0, 1 252; NOMUL-NEXT: mv a1, a0 253; NOMUL-NEXT: slli a0, a0, 1 254; NOMUL-NEXT: add a0, a0, a1 255; NOMUL-NEXT: sub sp, sp, a0 256; NOMUL-NEXT: andi sp, sp, -32 257; NOMUL-NEXT: addi sp, s0, -48 258; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 259; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 260; NOMUL-NEXT: addi sp, sp, 48 261; NOMUL-NEXT: ret 262 %v1 = alloca <vscale x 4 x i64> 263 %v2 = alloca <vscale x 1 x i64> 264 ret void 265} 266 267define void @lmul4_and_2() nounwind { 268; NOZBA-LABEL: lmul4_and_2: 269; NOZBA: # %bb.0: 270; NOZBA-NEXT: addi sp, sp, -48 271; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 272; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 273; NOZBA-NEXT: addi s0, sp, 48 274; NOZBA-NEXT: csrr a0, vlenb 275; NOZBA-NEXT: li a1, 6 276; NOZBA-NEXT: mul a0, a0, a1 277; NOZBA-NEXT: sub sp, sp, a0 278; NOZBA-NEXT: andi sp, sp, -32 279; NOZBA-NEXT: addi sp, s0, -48 280; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 281; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 282; NOZBA-NEXT: addi sp, sp, 48 283; NOZBA-NEXT: ret 284; 285; ZBA-LABEL: lmul4_and_2: 286; ZBA: # %bb.0: 287; ZBA-NEXT: addi sp, sp, -48 288; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 289; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 290; ZBA-NEXT: addi s0, sp, 48 291; ZBA-NEXT: csrr a0, vlenb 292; ZBA-NEXT: slli a0, a0, 1 293; ZBA-NEXT: sh1add a0, a0, a0 294; ZBA-NEXT: sub sp, sp, a0 295; ZBA-NEXT: andi sp, sp, -32 296; ZBA-NEXT: addi sp, s0, -48 297; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 298; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 299; ZBA-NEXT: addi sp, sp, 48 300; ZBA-NEXT: ret 301; 302; NOMUL-LABEL: lmul4_and_2: 303; NOMUL: # %bb.0: 304; NOMUL-NEXT: addi sp, sp, -48 305; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 306; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 307; NOMUL-NEXT: addi s0, sp, 48 308; NOMUL-NEXT: csrr a0, vlenb 309; NOMUL-NEXT: slli a0, a0, 1 310; NOMUL-NEXT: mv a1, a0 311; NOMUL-NEXT: slli a0, a0, 1 312; NOMUL-NEXT: add a0, a0, a1 313; NOMUL-NEXT: sub sp, sp, a0 314; NOMUL-NEXT: andi sp, sp, -32 315; NOMUL-NEXT: addi sp, s0, -48 316; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 317; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 318; NOMUL-NEXT: addi sp, sp, 48 319; NOMUL-NEXT: ret 320 %v1 = alloca <vscale x 4 x i64> 321 %v2 = alloca <vscale x 2 x i64> 322 ret void 323} 324 325define void @lmul4_and_2_x2_0() nounwind { 326; NOZBA-LABEL: lmul4_and_2_x2_0: 327; NOZBA: # %bb.0: 328; NOZBA-NEXT: addi sp, sp, -48 329; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 330; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 331; NOZBA-NEXT: addi s0, sp, 48 332; NOZBA-NEXT: csrr a0, vlenb 333; NOZBA-NEXT: li a1, 14 334; NOZBA-NEXT: mul a0, a0, a1 335; NOZBA-NEXT: sub sp, sp, a0 336; NOZBA-NEXT: andi sp, sp, -32 337; NOZBA-NEXT: addi sp, s0, -48 338; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 339; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 340; NOZBA-NEXT: addi sp, sp, 48 341; NOZBA-NEXT: ret 342; 343; ZBA-LABEL: lmul4_and_2_x2_0: 344; ZBA: # %bb.0: 345; ZBA-NEXT: addi sp, sp, -48 346; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 347; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 348; ZBA-NEXT: addi s0, sp, 48 349; ZBA-NEXT: csrr a0, vlenb 350; ZBA-NEXT: li a1, 14 351; ZBA-NEXT: mul a0, a0, a1 352; ZBA-NEXT: sub sp, sp, a0 353; ZBA-NEXT: andi sp, sp, -32 354; ZBA-NEXT: addi sp, s0, -48 355; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 356; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 357; ZBA-NEXT: addi sp, sp, 48 358; ZBA-NEXT: ret 359; 360; NOMUL-LABEL: lmul4_and_2_x2_0: 361; NOMUL: # %bb.0: 362; NOMUL-NEXT: addi sp, sp, -48 363; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 364; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 365; NOMUL-NEXT: addi s0, sp, 48 366; NOMUL-NEXT: csrr a0, vlenb 367; NOMUL-NEXT: slli a0, a0, 1 368; NOMUL-NEXT: mv a1, a0 369; NOMUL-NEXT: slli a0, a0, 1 370; NOMUL-NEXT: add a1, a1, a0 371; NOMUL-NEXT: slli a0, a0, 1 372; NOMUL-NEXT: add a0, a0, a1 373; NOMUL-NEXT: sub sp, sp, a0 374; NOMUL-NEXT: andi sp, sp, -32 375; NOMUL-NEXT: addi sp, s0, -48 376; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 377; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 378; NOMUL-NEXT: addi sp, sp, 48 379; NOMUL-NEXT: ret 380 %v1 = alloca <vscale x 4 x i64> 381 %v2 = alloca <vscale x 2 x i64> 382 %v3 = alloca <vscale x 4 x i64> 383 %v4 = alloca <vscale x 2 x i64> 384 ret void 385} 386 387define void @lmul4_and_2_x2_1() nounwind { 388; NOZBA-LABEL: lmul4_and_2_x2_1: 389; NOZBA: # %bb.0: 390; NOZBA-NEXT: addi sp, sp, -48 391; NOZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 392; NOZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 393; NOZBA-NEXT: addi s0, sp, 48 394; NOZBA-NEXT: csrr a0, vlenb 395; NOZBA-NEXT: li a1, 12 396; NOZBA-NEXT: mul a0, a0, a1 397; NOZBA-NEXT: sub sp, sp, a0 398; NOZBA-NEXT: andi sp, sp, -32 399; NOZBA-NEXT: addi sp, s0, -48 400; NOZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 401; NOZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 402; NOZBA-NEXT: addi sp, sp, 48 403; NOZBA-NEXT: ret 404; 405; ZBA-LABEL: lmul4_and_2_x2_1: 406; ZBA: # %bb.0: 407; ZBA-NEXT: addi sp, sp, -48 408; ZBA-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 409; ZBA-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 410; ZBA-NEXT: addi s0, sp, 48 411; ZBA-NEXT: csrr a0, vlenb 412; ZBA-NEXT: slli a0, a0, 2 413; ZBA-NEXT: sh1add a0, a0, a0 414; ZBA-NEXT: sub sp, sp, a0 415; ZBA-NEXT: andi sp, sp, -32 416; ZBA-NEXT: addi sp, s0, -48 417; ZBA-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 418; ZBA-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 419; ZBA-NEXT: addi sp, sp, 48 420; ZBA-NEXT: ret 421; 422; NOMUL-LABEL: lmul4_and_2_x2_1: 423; NOMUL: # %bb.0: 424; NOMUL-NEXT: addi sp, sp, -48 425; NOMUL-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 426; NOMUL-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 427; NOMUL-NEXT: addi s0, sp, 48 428; NOMUL-NEXT: csrr a0, vlenb 429; NOMUL-NEXT: slli a0, a0, 2 430; NOMUL-NEXT: mv a1, a0 431; NOMUL-NEXT: slli a0, a0, 1 432; NOMUL-NEXT: add a0, a0, a1 433; NOMUL-NEXT: sub sp, sp, a0 434; NOMUL-NEXT: andi sp, sp, -32 435; NOMUL-NEXT: addi sp, s0, -48 436; NOMUL-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 437; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 438; NOMUL-NEXT: addi sp, sp, 48 439; NOMUL-NEXT: ret 440 %v1 = alloca <vscale x 4 x i64> 441 %v3 = alloca <vscale x 4 x i64> 442 %v2 = alloca <vscale x 2 x i64> 443 %v4 = alloca <vscale x 2 x i64> 444 ret void 445} 446 447 448define void @gpr_and_lmul1_and_2() nounwind { 449; NOZBA-LABEL: gpr_and_lmul1_and_2: 450; NOZBA: # %bb.0: 451; NOZBA-NEXT: addi sp, sp, -16 452; NOZBA-NEXT: csrr a0, vlenb 453; NOZBA-NEXT: slli a0, a0, 2 454; NOZBA-NEXT: sub sp, sp, a0 455; NOZBA-NEXT: li a0, 3 456; NOZBA-NEXT: sd a0, 8(sp) 457; NOZBA-NEXT: csrr a0, vlenb 458; NOZBA-NEXT: slli a0, a0, 2 459; NOZBA-NEXT: add sp, sp, a0 460; NOZBA-NEXT: addi sp, sp, 16 461; NOZBA-NEXT: ret 462; 463; ZBA-LABEL: gpr_and_lmul1_and_2: 464; ZBA: # %bb.0: 465; ZBA-NEXT: addi sp, sp, -16 466; ZBA-NEXT: csrr a0, vlenb 467; ZBA-NEXT: slli a0, a0, 2 468; ZBA-NEXT: sub sp, sp, a0 469; ZBA-NEXT: li a0, 3 470; ZBA-NEXT: sd a0, 8(sp) 471; ZBA-NEXT: csrr a0, vlenb 472; ZBA-NEXT: sh2add sp, a0, sp 473; ZBA-NEXT: addi sp, sp, 16 474; ZBA-NEXT: ret 475; 476; NOMUL-LABEL: gpr_and_lmul1_and_2: 477; NOMUL: # %bb.0: 478; NOMUL-NEXT: addi sp, sp, -16 479; NOMUL-NEXT: csrr a0, vlenb 480; NOMUL-NEXT: slli a0, a0, 2 481; NOMUL-NEXT: sub sp, sp, a0 482; NOMUL-NEXT: li a0, 3 483; NOMUL-NEXT: sd a0, 8(sp) 484; NOMUL-NEXT: csrr a0, vlenb 485; NOMUL-NEXT: slli a0, a0, 2 486; NOMUL-NEXT: add sp, sp, a0 487; NOMUL-NEXT: addi sp, sp, 16 488; NOMUL-NEXT: ret 489 %x1 = alloca i64 490 %v1 = alloca <vscale x 1 x i64> 491 %v2 = alloca <vscale x 2 x i64> 492 store volatile i64 3, ptr %x1 493 ret void 494} 495 496define void @gpr_and_lmul1_and_4() nounwind { 497; CHECK-LABEL: gpr_and_lmul1_and_4: 498; CHECK: # %bb.0: 499; CHECK-NEXT: addi sp, sp, -48 500; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 501; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 502; CHECK-NEXT: addi s0, sp, 48 503; CHECK-NEXT: csrr a0, vlenb 504; CHECK-NEXT: slli a0, a0, 3 505; CHECK-NEXT: sub sp, sp, a0 506; CHECK-NEXT: andi sp, sp, -32 507; CHECK-NEXT: li a0, 3 508; CHECK-NEXT: sd a0, 8(sp) 509; CHECK-NEXT: addi sp, s0, -48 510; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 511; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 512; CHECK-NEXT: addi sp, sp, 48 513; CHECK-NEXT: ret 514 %x1 = alloca i64 515 %v1 = alloca <vscale x 1 x i64> 516 %v2 = alloca <vscale x 4 x i64> 517 store volatile i64 3, ptr %x1 518 ret void 519} 520 521define void @lmul_1_2_4_8() nounwind { 522; CHECK-LABEL: lmul_1_2_4_8: 523; CHECK: # %bb.0: 524; CHECK-NEXT: addi sp, sp, -80 525; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 526; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 527; CHECK-NEXT: addi s0, sp, 80 528; CHECK-NEXT: csrr a0, vlenb 529; CHECK-NEXT: slli a0, a0, 4 530; CHECK-NEXT: sub sp, sp, a0 531; CHECK-NEXT: andi sp, sp, -64 532; CHECK-NEXT: addi sp, s0, -80 533; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 534; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 535; CHECK-NEXT: addi sp, sp, 80 536; CHECK-NEXT: ret 537 %v1 = alloca <vscale x 1 x i64> 538 %v2 = alloca <vscale x 2 x i64> 539 %v4 = alloca <vscale x 4 x i64> 540 %v8 = alloca <vscale x 8 x i64> 541 ret void 542} 543 544define void @lmul_1_2_4_8_x2_0() nounwind { 545; CHECK-LABEL: lmul_1_2_4_8_x2_0: 546; CHECK: # %bb.0: 547; CHECK-NEXT: addi sp, sp, -80 548; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 549; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 550; CHECK-NEXT: addi s0, sp, 80 551; CHECK-NEXT: csrr a0, vlenb 552; CHECK-NEXT: slli a0, a0, 5 553; CHECK-NEXT: sub sp, sp, a0 554; CHECK-NEXT: andi sp, sp, -64 555; CHECK-NEXT: addi sp, s0, -80 556; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 557; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 558; CHECK-NEXT: addi sp, sp, 80 559; CHECK-NEXT: ret 560 %v1 = alloca <vscale x 1 x i64> 561 %v2 = alloca <vscale x 1 x i64> 562 %v3 = alloca <vscale x 2 x i64> 563 %v4 = alloca <vscale x 2 x i64> 564 %v5 = alloca <vscale x 4 x i64> 565 %v6 = alloca <vscale x 4 x i64> 566 %v7 = alloca <vscale x 8 x i64> 567 %v8 = alloca <vscale x 8 x i64> 568 ret void 569} 570 571define void @lmul_1_2_4_8_x2_1() nounwind { 572; CHECK-LABEL: lmul_1_2_4_8_x2_1: 573; CHECK: # %bb.0: 574; CHECK-NEXT: addi sp, sp, -80 575; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 576; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 577; CHECK-NEXT: addi s0, sp, 80 578; CHECK-NEXT: csrr a0, vlenb 579; CHECK-NEXT: slli a0, a0, 5 580; CHECK-NEXT: sub sp, sp, a0 581; CHECK-NEXT: andi sp, sp, -64 582; CHECK-NEXT: addi sp, s0, -80 583; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 584; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 585; CHECK-NEXT: addi sp, sp, 80 586; CHECK-NEXT: ret 587 %v8 = alloca <vscale x 8 x i64> 588 %v7 = alloca <vscale x 8 x i64> 589 %v6 = alloca <vscale x 4 x i64> 590 %v5 = alloca <vscale x 4 x i64> 591 %v4 = alloca <vscale x 2 x i64> 592 %v3 = alloca <vscale x 2 x i64> 593 %v2 = alloca <vscale x 1 x i64> 594 %v1 = alloca <vscale x 1 x i64> 595 ret void 596} 597 598define void @masks() nounwind { 599; NOZBA-LABEL: masks: 600; NOZBA: # %bb.0: 601; NOZBA-NEXT: csrr a0, vlenb 602; NOZBA-NEXT: slli a0, a0, 2 603; NOZBA-NEXT: sub sp, sp, a0 604; NOZBA-NEXT: csrr a0, vlenb 605; NOZBA-NEXT: slli a0, a0, 2 606; NOZBA-NEXT: add sp, sp, a0 607; NOZBA-NEXT: ret 608; 609; ZBA-LABEL: masks: 610; ZBA: # %bb.0: 611; ZBA-NEXT: csrr a0, vlenb 612; ZBA-NEXT: slli a0, a0, 2 613; ZBA-NEXT: sub sp, sp, a0 614; ZBA-NEXT: csrr a0, vlenb 615; ZBA-NEXT: sh2add sp, a0, sp 616; ZBA-NEXT: ret 617; 618; NOMUL-LABEL: masks: 619; NOMUL: # %bb.0: 620; NOMUL-NEXT: csrr a0, vlenb 621; NOMUL-NEXT: slli a0, a0, 2 622; NOMUL-NEXT: sub sp, sp, a0 623; NOMUL-NEXT: csrr a0, vlenb 624; NOMUL-NEXT: slli a0, a0, 2 625; NOMUL-NEXT: add sp, sp, a0 626; NOMUL-NEXT: ret 627 %v1 = alloca <vscale x 1 x i1> 628 %v2 = alloca <vscale x 2 x i1> 629 %v4 = alloca <vscale x 4 x i1> 630 %v8 = alloca <vscale x 8 x i1> 631 ret void 632} 633 634define void @lmul_8_x5() nounwind { 635; NOZBA-LABEL: lmul_8_x5: 636; NOZBA: # %bb.0: 637; NOZBA-NEXT: addi sp, sp, -80 638; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 639; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 640; NOZBA-NEXT: addi s0, sp, 80 641; NOZBA-NEXT: csrr a0, vlenb 642; NOZBA-NEXT: li a1, 40 643; NOZBA-NEXT: mul a0, a0, a1 644; NOZBA-NEXT: sub sp, sp, a0 645; NOZBA-NEXT: andi sp, sp, -64 646; NOZBA-NEXT: addi sp, s0, -80 647; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 648; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 649; NOZBA-NEXT: addi sp, sp, 80 650; NOZBA-NEXT: ret 651; 652; ZBA-LABEL: lmul_8_x5: 653; ZBA: # %bb.0: 654; ZBA-NEXT: addi sp, sp, -80 655; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 656; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 657; ZBA-NEXT: addi s0, sp, 80 658; ZBA-NEXT: csrr a0, vlenb 659; ZBA-NEXT: slli a0, a0, 3 660; ZBA-NEXT: sh2add a0, a0, a0 661; ZBA-NEXT: sub sp, sp, a0 662; ZBA-NEXT: andi sp, sp, -64 663; ZBA-NEXT: addi sp, s0, -80 664; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 665; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 666; ZBA-NEXT: addi sp, sp, 80 667; ZBA-NEXT: ret 668; 669; NOMUL-LABEL: lmul_8_x5: 670; NOMUL: # %bb.0: 671; NOMUL-NEXT: addi sp, sp, -80 672; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 673; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 674; NOMUL-NEXT: addi s0, sp, 80 675; NOMUL-NEXT: csrr a0, vlenb 676; NOMUL-NEXT: slli a0, a0, 3 677; NOMUL-NEXT: mv a1, a0 678; NOMUL-NEXT: slli a0, a0, 2 679; NOMUL-NEXT: add a0, a0, a1 680; NOMUL-NEXT: sub sp, sp, a0 681; NOMUL-NEXT: andi sp, sp, -64 682; NOMUL-NEXT: addi sp, s0, -80 683; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 684; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 685; NOMUL-NEXT: addi sp, sp, 80 686; NOMUL-NEXT: ret 687 %v1 = alloca <vscale x 8 x i64> 688 %v2 = alloca <vscale x 8 x i64> 689 %v3 = alloca <vscale x 8 x i64> 690 %v4 = alloca <vscale x 8 x i64> 691 %v5 = alloca <vscale x 8 x i64> 692 ret void 693} 694 695define void @lmul_8_x9() nounwind { 696; NOZBA-LABEL: lmul_8_x9: 697; NOZBA: # %bb.0: 698; NOZBA-NEXT: addi sp, sp, -80 699; NOZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 700; NOZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 701; NOZBA-NEXT: addi s0, sp, 80 702; NOZBA-NEXT: csrr a0, vlenb 703; NOZBA-NEXT: li a1, 72 704; NOZBA-NEXT: mul a0, a0, a1 705; NOZBA-NEXT: sub sp, sp, a0 706; NOZBA-NEXT: andi sp, sp, -64 707; NOZBA-NEXT: addi sp, s0, -80 708; NOZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 709; NOZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 710; NOZBA-NEXT: addi sp, sp, 80 711; NOZBA-NEXT: ret 712; 713; ZBA-LABEL: lmul_8_x9: 714; ZBA: # %bb.0: 715; ZBA-NEXT: addi sp, sp, -80 716; ZBA-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 717; ZBA-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 718; ZBA-NEXT: addi s0, sp, 80 719; ZBA-NEXT: csrr a0, vlenb 720; ZBA-NEXT: slli a0, a0, 3 721; ZBA-NEXT: sh3add a0, a0, a0 722; ZBA-NEXT: sub sp, sp, a0 723; ZBA-NEXT: andi sp, sp, -64 724; ZBA-NEXT: addi sp, s0, -80 725; ZBA-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 726; ZBA-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 727; ZBA-NEXT: addi sp, sp, 80 728; ZBA-NEXT: ret 729; 730; NOMUL-LABEL: lmul_8_x9: 731; NOMUL: # %bb.0: 732; NOMUL-NEXT: addi sp, sp, -80 733; NOMUL-NEXT: sd ra, 72(sp) # 8-byte Folded Spill 734; NOMUL-NEXT: sd s0, 64(sp) # 8-byte Folded Spill 735; NOMUL-NEXT: addi s0, sp, 80 736; NOMUL-NEXT: csrr a0, vlenb 737; NOMUL-NEXT: slli a0, a0, 3 738; NOMUL-NEXT: mv a1, a0 739; NOMUL-NEXT: slli a0, a0, 3 740; NOMUL-NEXT: add a0, a0, a1 741; NOMUL-NEXT: sub sp, sp, a0 742; NOMUL-NEXT: andi sp, sp, -64 743; NOMUL-NEXT: addi sp, s0, -80 744; NOMUL-NEXT: ld ra, 72(sp) # 8-byte Folded Reload 745; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload 746; NOMUL-NEXT: addi sp, sp, 80 747; NOMUL-NEXT: ret 748 %v1 = alloca <vscale x 8 x i64> 749 %v2 = alloca <vscale x 8 x i64> 750 %v3 = alloca <vscale x 8 x i64> 751 %v4 = alloca <vscale x 8 x i64> 752 %v5 = alloca <vscale x 8 x i64> 753 %v6 = alloca <vscale x 8 x i64> 754 %v7 = alloca <vscale x 8 x i64> 755 %v8 = alloca <vscale x 8 x i64> 756 %v9 = alloca <vscale x 8 x i64> 757 ret void 758} 759 760define void @lmul_16_align() nounwind { 761; NOZBA-LABEL: lmul_16_align: 762; NOZBA: # %bb.0: 763; NOZBA-NEXT: addi sp, sp, -144 764; NOZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill 765; NOZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill 766; NOZBA-NEXT: addi s0, sp, 144 767; NOZBA-NEXT: csrr a0, vlenb 768; NOZBA-NEXT: li a1, 24 769; NOZBA-NEXT: mul a0, a0, a1 770; NOZBA-NEXT: sub sp, sp, a0 771; NOZBA-NEXT: andi sp, sp, -128 772; NOZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma 773; NOZBA-NEXT: vmv.v.i v8, 0 774; NOZBA-NEXT: csrr a0, vlenb 775; NOZBA-NEXT: add a0, sp, a0 776; NOZBA-NEXT: addi a0, a0, 128 777; NOZBA-NEXT: csrr a1, vlenb 778; NOZBA-NEXT: vs8r.v v8, (a0) 779; NOZBA-NEXT: slli a1, a1, 3 780; NOZBA-NEXT: add a0, a0, a1 781; NOZBA-NEXT: vs8r.v v8, (a0) 782; NOZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma 783; NOZBA-NEXT: vmv.v.i v8, 0 784; NOZBA-NEXT: addi a0, sp, 128 785; NOZBA-NEXT: vs1r.v v8, (a0) 786; NOZBA-NEXT: addi sp, s0, -144 787; NOZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload 788; NOZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload 789; NOZBA-NEXT: addi sp, sp, 144 790; NOZBA-NEXT: ret 791; 792; ZBA-LABEL: lmul_16_align: 793; ZBA: # %bb.0: 794; ZBA-NEXT: addi sp, sp, -144 795; ZBA-NEXT: sd ra, 136(sp) # 8-byte Folded Spill 796; ZBA-NEXT: sd s0, 128(sp) # 8-byte Folded Spill 797; ZBA-NEXT: addi s0, sp, 144 798; ZBA-NEXT: csrr a0, vlenb 799; ZBA-NEXT: slli a0, a0, 3 800; ZBA-NEXT: sh1add a0, a0, a0 801; ZBA-NEXT: sub sp, sp, a0 802; ZBA-NEXT: andi sp, sp, -128 803; ZBA-NEXT: vsetvli a0, zero, e64, m8, ta, ma 804; ZBA-NEXT: vmv.v.i v8, 0 805; ZBA-NEXT: csrr a0, vlenb 806; ZBA-NEXT: add a0, sp, a0 807; ZBA-NEXT: addi a0, a0, 128 808; ZBA-NEXT: csrr a1, vlenb 809; ZBA-NEXT: vs8r.v v8, (a0) 810; ZBA-NEXT: sh3add a0, a1, a0 811; ZBA-NEXT: vs8r.v v8, (a0) 812; ZBA-NEXT: vsetvli a0, zero, e64, m1, ta, ma 813; ZBA-NEXT: vmv.v.i v8, 0 814; ZBA-NEXT: addi a0, sp, 128 815; ZBA-NEXT: vs1r.v v8, (a0) 816; ZBA-NEXT: addi sp, s0, -144 817; ZBA-NEXT: ld ra, 136(sp) # 8-byte Folded Reload 818; ZBA-NEXT: ld s0, 128(sp) # 8-byte Folded Reload 819; ZBA-NEXT: addi sp, sp, 144 820; ZBA-NEXT: ret 821; 822; NOMUL-LABEL: lmul_16_align: 823; NOMUL: # %bb.0: 824; NOMUL-NEXT: addi sp, sp, -144 825; NOMUL-NEXT: sd ra, 136(sp) # 8-byte Folded Spill 826; NOMUL-NEXT: sd s0, 128(sp) # 8-byte Folded Spill 827; NOMUL-NEXT: addi s0, sp, 144 828; NOMUL-NEXT: csrr a0, vlenb 829; NOMUL-NEXT: slli a0, a0, 3 830; NOMUL-NEXT: mv a1, a0 831; NOMUL-NEXT: slli a0, a0, 1 832; NOMUL-NEXT: add a0, a0, a1 833; NOMUL-NEXT: sub sp, sp, a0 834; NOMUL-NEXT: andi sp, sp, -128 835; NOMUL-NEXT: vsetvli a0, zero, e64, m8, ta, ma 836; NOMUL-NEXT: vmv.v.i v8, 0 837; NOMUL-NEXT: csrr a0, vlenb 838; NOMUL-NEXT: add a0, sp, a0 839; NOMUL-NEXT: addi a0, a0, 128 840; NOMUL-NEXT: csrr a1, vlenb 841; NOMUL-NEXT: vs8r.v v8, (a0) 842; NOMUL-NEXT: slli a1, a1, 3 843; NOMUL-NEXT: add a0, a0, a1 844; NOMUL-NEXT: vs8r.v v8, (a0) 845; NOMUL-NEXT: vsetvli a0, zero, e64, m1, ta, ma 846; NOMUL-NEXT: vmv.v.i v8, 0 847; NOMUL-NEXT: addi a0, sp, 128 848; NOMUL-NEXT: vs1r.v v8, (a0) 849; NOMUL-NEXT: addi sp, s0, -144 850; NOMUL-NEXT: ld ra, 136(sp) # 8-byte Folded Reload 851; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload 852; NOMUL-NEXT: addi sp, sp, 144 853; NOMUL-NEXT: ret 854 %v1 = alloca <vscale x 16 x i64> 855 %v2 = alloca <vscale x 1 x i64> 856 store <vscale x 16 x i64> zeroinitializer, ptr %v1 857 store <vscale x 1 x i64> zeroinitializer, ptr %v2 858 ret void 859} 860