xref: /llvm-project/llvm/test/CodeGen/NVPTX/param-overalign.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; RUN: llc < %s -mtriple=nvptx | FileCheck %s
2; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %}
3
4target triple = "nvptx64-nvidia-cuda"
5
6%struct.float2 = type { float, float }
7
8; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee_md
9; CHECK-NEXT: (
10; CHECK-NEXT:         .param .align 8 .b8 callee_md_param_0[8]
11; CHECK-NEXT: )
12; CHECK-NEXT: ;
13
14; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee
15; CHECK-NEXT: (
16; CHECK-NEXT:         .param .align 8 .b8 callee_param_0[8]
17; CHECK-NEXT: )
18; CHECK-NEXT: ;
19
20define float @caller_md(float %a, float %b) {
21; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) caller_md(
22; CHECK-NEXT:         .param .b32 caller_md_param_0,
23; CHECK-NEXT:         .param .b32 caller_md_param_1
24; CHECK-NEXT: )
25; CHECK-NEXT: {
26
27; CHECK:         ld.param.f32 %f1, [caller_md_param_0];
28; CHECK-NEXT:    ld.param.f32 %f2, [caller_md_param_1];
29; CHECK-NEXT:    {
30; CHECK-NEXT:    .param .align 8 .b8 param0[8];
31; CHECK-NEXT:    st.param.v2.f32 [param0], {%f1, %f2};
32; CHECK-NEXT:    .param .b32 retval0;
33; CHECK-NEXT:    call.uni (retval0),
34; CHECK-NEXT:    callee_md,
35; CHECK-NEXT:    (
36; CHECK-NEXT:    param0
37; CHECK-NEXT:    );
38; CHECK-NEXT:    ld.param.f32 %f3, [retval0];
39; CHECK-NEXT:    }
40; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
41; CHECK-NEXT:    ret;
42  %s1 = insertvalue %struct.float2 poison, float %a, 0
43  %s2 = insertvalue %struct.float2 %s1, float %b, 1
44  %r = call float @callee_md(%struct.float2 %s2)
45  ret float %r
46}
47
48define float @callee_md(%struct.float2 %a) {
49; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee_md(
50; CHECK-NEXT:         .param .align 8 .b8 callee_md_param_0[8]
51; CHECK-NEXT: )
52; CHECK-NEXT: {
53
54; CHECK:         ld.param.v2.f32 {%f1, %f2}, [callee_md_param_0];
55; CHECK-NEXT:    add.rn.f32 %f3, %f1, %f2;
56; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
57; CHECK-NEXT:    ret;
58  %v0 = extractvalue %struct.float2 %a, 0
59  %v1 = extractvalue %struct.float2 %a, 1
60  %2 = fadd float %v0, %v1
61  ret float %2
62}
63
64define float @caller(float %a, float %b) {
65; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) caller(
66; CHECK-NEXT:         .param .b32 caller_param_0,
67; CHECK-NEXT:         .param .b32 caller_param_1
68; CHECK-NEXT: )
69; CHECK-NEXT: {
70
71; CHECK:         ld.param.f32 %f1, [caller_param_0];
72; CHECK-NEXT:    ld.param.f32 %f2, [caller_param_1];
73; CHECK-NEXT:    {
74; CHECK-NEXT:    .param .align 8 .b8 param0[8];
75; CHECK-NEXT:    st.param.v2.f32 [param0], {%f1, %f2};
76; CHECK-NEXT:    .param .b32 retval0;
77; CHECK-NEXT:    call.uni (retval0),
78; CHECK-NEXT:    callee,
79; CHECK-NEXT:    (
80; CHECK-NEXT:    param0
81; CHECK-NEXT:    );
82; CHECK-NEXT:    ld.param.f32 %f3, [retval0];
83; CHECK-NEXT:    }
84; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
85; CHECK-NEXT:    ret;
86  %s1 = insertvalue %struct.float2 poison, float %a, 0
87  %s2 = insertvalue %struct.float2 %s1, float %b, 1
88  %r = call float @callee(%struct.float2 %s2)
89  ret float %r
90}
91
92define float @callee(%struct.float2 alignstack(8) %a ) {
93; CHECK-LABEL: .visible .func  (.param .b32 func_retval0) callee(
94; CHECK-NEXT:         .param .align 8 .b8 callee_param_0[8]
95; CHECK-NEXT: )
96; CHECK-NEXT: {
97
98; CHECK:         ld.param.v2.f32 {%f1, %f2}, [callee_param_0];
99; CHECK-NEXT:    add.rn.f32 %f3, %f1, %f2;
100; CHECK-NEXT:    st.param.f32 [func_retval0], %f3;
101; CHECK-NEXT:    ret;
102  %v0 = extractvalue %struct.float2 %a, 0
103  %v1 = extractvalue %struct.float2 %a, 1
104  %2 = fadd float %v0, %v1
105  ret float %2
106}
107
108!nvvm.annotations = !{!0}
109!0 = !{ptr @callee_md, !"align", i32 u0x00010008}
110