xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll (revision b6c0f1bfa79a3a32d841ac5ab1f94c3aee3b5d90)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV32
3; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefixes=CHECK,RV64
4
5; Check that we correctly scale the split part indirect offsets by VSCALE.
6define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
7; CHECK-LABEL: callee_scalable_vector_split_indirect:
8; CHECK:       # %bb.0:
9; CHECK-NEXT:    csrr a1, vlenb
10; CHECK-NEXT:    slli a1, a1, 3
11; CHECK-NEXT:    add a1, a0, a1
12; CHECK-NEXT:    vl8re32.v v24, (a0)
13; CHECK-NEXT:    vl8re32.v v0, (a1)
14; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
15; CHECK-NEXT:    vadd.vv v8, v8, v24
16; CHECK-NEXT:    vadd.vv v16, v16, v0
17; CHECK-NEXT:    ret
18  %a = add <vscale x 32 x i32> %x, %y
19  ret <vscale x 32 x i32> %a
20}
21
22; Call the function above. Check that we set the arguments correctly.
23define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x i32> %x) {
24; RV32-LABEL: caller_scalable_vector_split_indirect:
25; RV32:       # %bb.0:
26; RV32-NEXT:    addi sp, sp, -144
27; RV32-NEXT:    .cfi_def_cfa_offset 144
28; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
29; RV32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
30; RV32-NEXT:    .cfi_offset ra, -4
31; RV32-NEXT:    .cfi_offset s0, -8
32; RV32-NEXT:    addi s0, sp, 144
33; RV32-NEXT:    .cfi_def_cfa s0, 0
34; RV32-NEXT:    csrr a0, vlenb
35; RV32-NEXT:    slli a0, a0, 4
36; RV32-NEXT:    sub sp, sp, a0
37; RV32-NEXT:    andi sp, sp, -128
38; RV32-NEXT:    addi a0, sp, 128
39; RV32-NEXT:    csrr a1, vlenb
40; RV32-NEXT:    vs8r.v v8, (a0)
41; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
42; RV32-NEXT:    vmv.v.i v8, 0
43; RV32-NEXT:    slli a1, a1, 3
44; RV32-NEXT:    add a1, a0, a1
45; RV32-NEXT:    addi a0, sp, 128
46; RV32-NEXT:    vs8r.v v16, (a1)
47; RV32-NEXT:    vmv.v.i v16, 0
48; RV32-NEXT:    call callee_scalable_vector_split_indirect
49; RV32-NEXT:    addi sp, s0, -144
50; RV32-NEXT:    .cfi_def_cfa sp, 144
51; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
52; RV32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
53; RV32-NEXT:    .cfi_restore ra
54; RV32-NEXT:    .cfi_restore s0
55; RV32-NEXT:    addi sp, sp, 144
56; RV32-NEXT:    .cfi_def_cfa_offset 0
57; RV32-NEXT:    ret
58;
59; RV64-LABEL: caller_scalable_vector_split_indirect:
60; RV64:       # %bb.0:
61; RV64-NEXT:    addi sp, sp, -144
62; RV64-NEXT:    .cfi_def_cfa_offset 144
63; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
64; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
65; RV64-NEXT:    .cfi_offset ra, -8
66; RV64-NEXT:    .cfi_offset s0, -16
67; RV64-NEXT:    addi s0, sp, 144
68; RV64-NEXT:    .cfi_def_cfa s0, 0
69; RV64-NEXT:    csrr a0, vlenb
70; RV64-NEXT:    slli a0, a0, 4
71; RV64-NEXT:    sub sp, sp, a0
72; RV64-NEXT:    andi sp, sp, -128
73; RV64-NEXT:    addi a0, sp, 128
74; RV64-NEXT:    csrr a1, vlenb
75; RV64-NEXT:    vs8r.v v8, (a0)
76; RV64-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
77; RV64-NEXT:    vmv.v.i v8, 0
78; RV64-NEXT:    slli a1, a1, 3
79; RV64-NEXT:    add a1, a0, a1
80; RV64-NEXT:    addi a0, sp, 128
81; RV64-NEXT:    vs8r.v v16, (a1)
82; RV64-NEXT:    vmv.v.i v16, 0
83; RV64-NEXT:    call callee_scalable_vector_split_indirect
84; RV64-NEXT:    addi sp, s0, -144
85; RV64-NEXT:    .cfi_def_cfa sp, 144
86; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
87; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
88; RV64-NEXT:    .cfi_restore ra
89; RV64-NEXT:    .cfi_restore s0
90; RV64-NEXT:    addi sp, sp, 144
91; RV64-NEXT:    .cfi_def_cfa_offset 0
92; RV64-NEXT:    ret
93  %c = alloca i64
94  %a = call <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> %x)
95  ret <vscale x 32 x i32> %a
96}
97
98define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @caller_tuple_return() {
99; RV32-LABEL: caller_tuple_return:
100; RV32:       # %bb.0:
101; RV32-NEXT:    addi sp, sp, -16
102; RV32-NEXT:    .cfi_def_cfa_offset 16
103; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
104; RV32-NEXT:    .cfi_offset ra, -4
105; RV32-NEXT:    call callee_tuple_return
106; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
107; RV32-NEXT:    vmv2r.v v6, v8
108; RV32-NEXT:    vmv2r.v v8, v10
109; RV32-NEXT:    vmv2r.v v10, v6
110; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
111; RV32-NEXT:    .cfi_restore ra
112; RV32-NEXT:    addi sp, sp, 16
113; RV32-NEXT:    .cfi_def_cfa_offset 0
114; RV32-NEXT:    ret
115;
116; RV64-LABEL: caller_tuple_return:
117; RV64:       # %bb.0:
118; RV64-NEXT:    addi sp, sp, -16
119; RV64-NEXT:    .cfi_def_cfa_offset 16
120; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
121; RV64-NEXT:    .cfi_offset ra, -8
122; RV64-NEXT:    call callee_tuple_return
123; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
124; RV64-NEXT:    vmv2r.v v6, v8
125; RV64-NEXT:    vmv2r.v v8, v10
126; RV64-NEXT:    vmv2r.v v10, v6
127; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
128; RV64-NEXT:    .cfi_restore ra
129; RV64-NEXT:    addi sp, sp, 16
130; RV64-NEXT:    .cfi_def_cfa_offset 0
131; RV64-NEXT:    ret
132  %a = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
133  %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 0)
134  %c = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %a, i32 1)
135  %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %c,   i32 0)
136  %e = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d, <vscale x 4 x i32> %b, i32   1)
137  ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %e
138}
139
140declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @callee_tuple_return()
141
142define void @caller_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x) {
143; RV32-LABEL: caller_tuple_argument:
144; RV32:       # %bb.0:
145; RV32-NEXT:    addi sp, sp, -16
146; RV32-NEXT:    .cfi_def_cfa_offset 16
147; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
148; RV32-NEXT:    .cfi_offset ra, -4
149; RV32-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
150; RV32-NEXT:    vmv2r.v v6, v8
151; RV32-NEXT:    vmv2r.v v8, v10
152; RV32-NEXT:    vmv2r.v v10, v6
153; RV32-NEXT:    call callee_tuple_argument
154; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
155; RV32-NEXT:    .cfi_restore ra
156; RV32-NEXT:    addi sp, sp, 16
157; RV32-NEXT:    .cfi_def_cfa_offset 0
158; RV32-NEXT:    ret
159;
160; RV64-LABEL: caller_tuple_argument:
161; RV64:       # %bb.0:
162; RV64-NEXT:    addi sp, sp, -16
163; RV64-NEXT:    .cfi_def_cfa_offset 16
164; RV64-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
165; RV64-NEXT:    .cfi_offset ra, -8
166; RV64-NEXT:    vsetivli zero, 1, e8, m1, ta, ma
167; RV64-NEXT:    vmv2r.v v6, v8
168; RV64-NEXT:    vmv2r.v v8, v10
169; RV64-NEXT:    vmv2r.v v10, v6
170; RV64-NEXT:    call callee_tuple_argument
171; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
172; RV64-NEXT:    .cfi_restore ra
173; RV64-NEXT:    addi sp, sp, 16
174; RV64-NEXT:    .cfi_def_cfa_offset 0
175; RV64-NEXT:    ret
176  %a = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 0)
177  %b = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %x, i32 1)
178  %c = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) poison, <vscale x 4 x i32> %b,   i32 0)
179  %d = call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv16i8_2t.nxv4i32(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %c, <vscale x 4 x i32> %a, i32   1)
180  call void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %d)
181  ret void
182}
183
184declare void @callee_tuple_argument(target("riscv.vector.tuple", <vscale x 16 x i8>, 2))
185