1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV32 3; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV64 4 5; Check that we correctly scale the split part indirect offsets by VSCALE. 6define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) { 7; CHECK-LABEL: callee_scalable_vector_split_indirect: 8; CHECK: # %bb.0: 9; CHECK-NEXT: csrr a1, vlenb 10; CHECK-NEXT: slli a1, a1, 3 11; CHECK-NEXT: add a1, a0, a1 12; CHECK-NEXT: vl8re32.v v24, (a0) 13; CHECK-NEXT: vl8re32.v v0, (a1) 14; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma 15; CHECK-NEXT: vadd.vv v8, v8, v24 16; CHECK-NEXT: vadd.vv v16, v16, v0 17; CHECK-NEXT: ret 18 %a = add <vscale x 32 x i32> %x, %y 19 ret <vscale x 32 x i32> %a 20} 21 22; Call the function above. Check that we set the arguments correctly. 23define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) { 24; RV32-LABEL: caller_scalable_vector_split_indirect: 25; RV32: # %bb.0: 26; RV32-NEXT: addi sp, sp, -144 27; RV32-NEXT: .cfi_def_cfa_offset 144 28; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill 29; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill 30; RV32-NEXT: .cfi_offset ra, -4 31; RV32-NEXT: .cfi_offset s0, -8 32; RV32-NEXT: addi s0, sp, 144 33; RV32-NEXT: .cfi_def_cfa s0, 0 34; RV32-NEXT: csrr a0, vlenb 35; RV32-NEXT: slli a0, a0, 4 36; RV32-NEXT: sub sp, sp, a0 37; RV32-NEXT: andi sp, sp, -128 38; RV32-NEXT: addi a0, sp, 128 39; RV32-NEXT: csrr a1, vlenb 40; RV32-NEXT: vs8r.v v8, (a0) 41; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma 42; RV32-NEXT: vmv.v.i v8, 0 43; RV32-NEXT: slli a1, a1, 3 44; RV32-NEXT: add a1, a0, a1 45; RV32-NEXT: addi a0, sp, 128 46; RV32-NEXT: vs8r.v v16, (a1) 47; RV32-NEXT: vmv.v.i v16, 0 48; RV32-NEXT: call callee_scalable_vector_split_indirect 49; RV32-NEXT: addi sp, s0, -144 50; RV32-NEXT: .cfi_def_cfa sp, 144 51; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload 52; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload 53; RV32-NEXT: .cfi_restore ra 54; RV32-NEXT: .cfi_restore s0 55; RV32-NEXT: addi sp, sp, 144 56; RV32-NEXT: .cfi_def_cfa_offset 0 57; RV32-NEXT: ret 58; 59; RV64-LABEL: caller_scalable_vector_split_indirect: 60; RV64: # %bb.0: 61; RV64-NEXT: addi sp, sp, -144 62; RV64-NEXT: .cfi_def_cfa_offset 144 63; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill 64; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill 65; RV64-NEXT: .cfi_offset ra, -8 66; RV64-NEXT: .cfi_offset s0, -16 67; RV64-NEXT: addi s0, sp, 144 68; RV64-NEXT: .cfi_def_cfa s0, 0 69; RV64-NEXT: csrr a0, vlenb 70; RV64-NEXT: slli a0, a0, 4 71; RV64-NEXT: sub sp, sp, a0 72; RV64-NEXT: andi sp, sp, -128 73; RV64-NEXT: addi a0, sp, 128 74; RV64-NEXT: csrr a1, vlenb 75; RV64-NEXT: vs8r.v v8, (a0) 76; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma 77; RV64-NEXT: vmv.v.i v8, 0 78; RV64-NEXT: slli a1, a1, 3 79; RV64-NEXT: add a1, a0, a1 80; RV64-NEXT: addi a0, sp, 128 81; RV64-NEXT: vs8r.v v16, (a1) 82; RV64-NEXT: vmv.v.i v16, 0 83; RV64-NEXT: call callee_scalable_vector_split_indirect 84; RV64-NEXT: addi sp, s0, -144 85; RV64-NEXT: .cfi_def_cfa sp, 144 86; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload 87; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload 88; RV64-NEXT: .cfi_restore ra 89; RV64-NEXT: .cfi_restore s0 90; RV64-NEXT: addi sp, sp, 144 91; RV64-NEXT: .cfi_def_cfa_offset 0 92; RV64-NEXT: ret 93 %c = alloca i64 94 %a = call <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> %x) 95 ret <vscale x 32 x i32> %a 96} 97 98define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return() { 99; RV32-LABEL: caller_tuple_return: 100; RV32: # %bb.0: 101; RV32-NEXT: addi sp, sp, -16 102; RV32-NEXT: .cfi_def_cfa_offset 16 103; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 104; RV32-NEXT: .cfi_offset ra, -4 105; RV32-NEXT: call callee_tuple_return 106; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 107; RV32-NEXT: vmv2r.v v6, v8 108; RV32-NEXT: vmv2r.v v8, v10 109; RV32-NEXT: vmv2r.v v10, v6 110; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 111; RV32-NEXT: .cfi_restore ra 112; RV32-NEXT: addi sp, sp, 16 113; RV32-NEXT: .cfi_def_cfa_offset 0 114; RV32-NEXT: ret 115; 116; RV64-LABEL: caller_tuple_return: 117; RV64: # %bb.0: 118; RV64-NEXT: addi sp, sp, -16 119; RV64-NEXT: .cfi_def_cfa_offset 16 120; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 121; RV64-NEXT: .cfi_offset ra, -8 122; RV64-NEXT: call callee_tuple_return 123; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma 124; RV64-NEXT: vmv2r.v v6, v8 125; RV64-NEXT: vmv2r.v v8, v10 126; RV64-NEXT: vmv2r.v v10, v6 127; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 128; RV64-NEXT: .cfi_restore ra 129; RV64-NEXT: addi sp, sp, 16 130; RV64-NEXT: .cfi_def_cfa_offset 0 131; RV64-NEXT: ret 132 %a = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return() 133 %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 0) 134 %c = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 1) 135 %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %c, i32 0) 136 %e = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d, <vscale x 4 x i32> %b, i32 1) 137 ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %e 138} 139 140declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return() 141 142define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x) { 143; RV32-LABEL: caller_tuple_argument: 144; RV32: # %bb.0: 145; RV32-NEXT: addi sp, sp, -16 146; RV32-NEXT: .cfi_def_cfa_offset 16 147; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 148; RV32-NEXT: .cfi_offset ra, -4 149; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma 150; RV32-NEXT: vmv2r.v v6, v8 151; RV32-NEXT: vmv2r.v v8, v10 152; RV32-NEXT: vmv2r.v v10, v6 153; RV32-NEXT: call callee_tuple_argument 154; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 155; RV32-NEXT: .cfi_restore ra 156; RV32-NEXT: addi sp, sp, 16 157; RV32-NEXT: .cfi_def_cfa_offset 0 158; RV32-NEXT: ret 159; 160; RV64-LABEL: caller_tuple_argument: 161; RV64: # %bb.0: 162; RV64-NEXT: addi sp, sp, -16 163; RV64-NEXT: .cfi_def_cfa_offset 16 164; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 165; RV64-NEXT: .cfi_offset ra, -8 166; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma 167; RV64-NEXT: vmv2r.v v6, v8 168; RV64-NEXT: vmv2r.v v8, v10 169; RV64-NEXT: vmv2r.v v10, v6 170; RV64-NEXT: call callee_tuple_argument 171; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 172; RV64-NEXT: .cfi_restore ra 173; RV64-NEXT: addi sp, sp, 16 174; RV64-NEXT: .cfi_def_cfa_offset 0 175; RV64-NEXT: ret 176 %a = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 0) 177 %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 1) 178 %c = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %b, i32 0) 179 %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %c, <vscale x 4 x i32> %a, i32 1) 180 call void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d) 181 ret void 182} 183 184declare void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2)) 185