xref: /llvm-project/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; Verifies correctness of load/store of parameters and return values.
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
4
5%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
6%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
7%s_i8i64p = type { <{ i64, i8, i64 }>, i64 }
8%s_i8f16p = type { <{ half, i8, half }>, i64 }
9%s_i8f16x2p = type { <{ <2 x half>, i8, <2 x half> }>, i64 }
10%s_i8f32p = type { <{ float, i8, float }>, i64 }
11%s_i8f64p = type { <{ double, i8, double }>, i64 }
12
13; -- All loads/stores from parameters aligned by one must be done one
14;    byte at a time.
15; -- Notes:
16;   -- There are two fields of interest in the packed part of the struct, one
17;      with a proper offset and one without. The former should be loaded or
18;      stored as a whole, and the latter by bytes.
19;   -- Only loading and storing the said fields are checked in the following
20;      series of tests so that they are more concise.
21
22; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[16])
23; CHECK-LABEL: test_s_i8i16p(
24; CHECK:        .param .align 8 .b8 test_s_i8i16p_param_0[16]
25; CHECK-DAG:    ld.param.u16 [[P0:%rs[0-9]+]],   [test_s_i8i16p_param_0];
26; CHECK-DAG:    ld.param.u8 [[P2_0:%rs[0-9]+]],   [test_s_i8i16p_param_0+3];
27; CHECK-DAG:    ld.param.u8 [[P2_1:%rs[0-9]+]],   [test_s_i8i16p_param_0+4];
28; CHECK-DAG:    shl.b16     [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8;
29; CHECK-DAG:    or.b16      [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]];
30; CHECK:        { // callseq
31; CHECK:        .param .align 8 .b8 param0[16];
32; CHECK-DAG:    st.param.b16 [param0], [[P0]];
33; CHECK-DAG:    st.param.b8  [param0+3], [[P2_1_or]];
34; CHECK-DAG:    st.param.b8  [param0+4], [[P2_1]];
35; CHECK:        .param .align 8 .b8 retval0[16];
36; CHECK-NEXT:   call.uni (retval0),
37; CHECK-NEXT:   test_s_i8i16p,
38; CHECK-NEXT:   (
39; CHECK-NEXT:   param0
40; CHECK-NEXT:   );
41; CHECK-DAG:    ld.param.b16 [[R0:%rs[0-9]+]],   [retval0];
42; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+3];
43; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+4];
44; CHECK:        } // callseq
45; CHECK-DAG:    st.param.b16 [func_retval0], [[R0]];
46; CHECK-DAG:    shl.b16      [[R2_1_shl:%rs[0-9]+]], [[R2_1]], 8;
47; CHECK-DAG:    and.b16      [[R2_0_and:%rs[0-9]+]], [[R2_0]], 255;
48; CHECK-DAG:    or.b16       [[R2:%rs[0-9]+]], [[R2_0_and]], [[R2_1_shl]];
49; CHECK-DAG:    st.param.b8  [func_retval0+3], [[R2]];
50; CHECK-DAG:    and.b16      [[R2_1_and:%rs[0-9]+]], [[R2_1]], 255;
51; CHECK-DAG:    st.param.b8  [func_retval0+4], [[R2_1_and]];
52; CHECK:        ret;
53
54define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
55       %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
56       ret %s_i8i16p %r
57}
58
59; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[24])
60; CHECK-LABEL: test_s_i8i32p(
61; CHECK:        .param .align 8 .b8 test_s_i8i32p_param_0[24]
62; CHECK-DAG:    ld.param.u32 [[P0:%r[0-9]+]],   [test_s_i8i32p_param_0];
63; CHECK-DAG:    ld.param.u8 [[P2_0:%r[0-9]+]],   [test_s_i8i32p_param_0+5];
64; CHECK-DAG:    ld.param.u8 [[P2_1:%r[0-9]+]],   [test_s_i8i32p_param_0+6];
65; CHECK-DAG:    ld.param.u8 [[P2_2:%r[0-9]+]],   [test_s_i8i32p_param_0+7];
66; CHECK-DAG:    ld.param.u8 [[P2_3:%r[0-9]+]],   [test_s_i8i32p_param_0+8];
67; CHECK-DAG:    shl.b32     [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
68; CHECK-DAG:    shl.b32     [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
69; CHECK-DAG:    shl.b32     [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
70; CHECK-DAG:    or.b32      [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
71; CHECK-DAG:    or.b32      [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
72; CHECK-DAG:    or.b32      [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
73; CHECK-DAG:    shr.u32     [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
74; CHECK-DAG:    shr.u32     [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
75; CHECK:        { // callseq
76; CHECK-DAG:    .param .align 8 .b8 param0[24];
77; CHECK-DAG:    st.param.b32 [param0], [[P0]];
78; CHECK-DAG:    st.param.b8  [param0+5], [[P2]];
79; CHECK-DAG:    st.param.b8  [param0+6], [[P2_1_shr]];
80; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
81; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
82; CHECK:        .param .align 8 .b8 retval0[24];
83; CHECK-NEXT:   call.uni (retval0),
84; CHECK-NEXT:   test_s_i8i32p,
85; CHECK-NEXT:   (
86; CHECK-NEXT:   param0
87; CHECK-NEXT:   );
88; CHECK-DAG:    ld.param.b32 [[R0:%r[0-9]+]],   [retval0];
89; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
90; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
91; CHECK-DAG:    ld.param.b8  [[R2_2:%rs[0-9]+]], [retval0+7];
92; CHECK-DAG:    ld.param.b8  [[R2_3:%rs[0-9]+]], [retval0+8];
93; CHECK:        } // callseq
94; CHECK-DAG:    st.param.b32 [func_retval0], [[R0]];
95; CHECK-DAG:    st.param.b8  [func_retval0+5],
96; CHECK-DAG:    st.param.b8  [func_retval0+6],
97; CHECK-DAG:    st.param.b8  [func_retval0+7],
98; CHECK-DAG:    st.param.b8  [func_retval0+8],
99; CHECK:        ret;
100
101define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
102       %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
103       ret %s_i8i32p %r
104}
105
106; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[32])
107; CHECK-LABEL: test_s_i8i64p(
108; CHECK:        .param .align 8 .b8 test_s_i8i64p_param_0[32]
109; CHECK-DAG:    ld.param.u64 [[P0:%rd[0-9]+]],   [test_s_i8i64p_param_0];
110; CHECK-DAG:    ld.param.u8 [[P2_0:%rd[0-9]+]],   [test_s_i8i64p_param_0+9];
111; CHECK-DAG:    ld.param.u8 [[P2_1:%rd[0-9]+]],   [test_s_i8i64p_param_0+10];
112; CHECK-DAG:    ld.param.u8 [[P2_2:%rd[0-9]+]],   [test_s_i8i64p_param_0+11];
113; CHECK-DAG:    ld.param.u8 [[P2_3:%rd[0-9]+]],   [test_s_i8i64p_param_0+12];
114; CHECK-DAG:    ld.param.u8 [[P2_4:%rd[0-9]+]],   [test_s_i8i64p_param_0+13];
115; CHECK-DAG:    ld.param.u8 [[P2_5:%rd[0-9]+]],   [test_s_i8i64p_param_0+14];
116; CHECK-DAG:    ld.param.u8 [[P2_6:%rd[0-9]+]],   [test_s_i8i64p_param_0+15];
117; CHECK-DAG:    ld.param.u8 [[P2_7:%rd[0-9]+]],   [test_s_i8i64p_param_0+16];
118; CHECK-DAG:    shl.b64      [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8;
119; CHECK-DAG:    shl.b64      [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16;
120; CHECK-DAG:    shl.b64      [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24;
121; CHECK-DAG:    or.b64       [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]];
122; CHECK-DAG:    or.b64       [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
123; CHECK-DAG:    or.b64       [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]];
124; CHECK-DAG:    shl.b64 	 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8;
125; CHECK-DAG:    shl.b64      [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16;
126; CHECK-DAG:    shl.b64      [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24;
127; CHECK-DAG:    or.b64       [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]];
128; CHECK-DAG:    or.b64       [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]];
129; CHECK-DAG:    or.b64       [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]];
130; CHECK-DAG:    shl.b64      [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32;
131; CHECK-DAG:    or.b64       [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]];
132; CHECK-DAG:    shr.u64      [[P2_shr_1:%rd[0-9]+]], [[P2]], 8;
133; CHECK-DAG:    shr.u64      [[P2_shr_2:%rd[0-9]+]], [[P2]], 16;
134; CHECK-DAG:    shr.u64      [[P2_shr_3:%rd[0-9]+]], [[P2]], 24;
135; CHECK-DAG:    bfe.u64      [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24;
136; CHECK-DAG:    bfe.u64      [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16;
137; CHECK-DAG:    bfe.u64      [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8;
138; CHECK:        { // callseq
139; CHECK:        .param .align 8 .b8 param0[32];
140; CHECK-DAG:    st.param.b64 [param0],  [[P0]];
141; CHECK-DAG:    st.param.b8  [param0+9],  [[P2]];
142; CHECK-DAG:    st.param.b8  [param0+10], [[P2_shr_1]];
143; CHECK-DAG:    st.param.b8  [param0+11], [[P2_shr_2]];
144; CHECK-DAG:    st.param.b8  [param0+12], [[P2_shr_3]];
145; CHECK-DAG:    st.param.b8  [param0+13], [[P2_or_5]];
146; CHECK-DAG:    st.param.b8  [param0+14], [[P2_bfe_4]];
147; CHECK-DAG:    st.param.b8  [param0+15], [[P2_bfe_5]];
148; CHECK-DAG:    st.param.b8  [param0+16], [[P2_bfe_6]];
149; CHECK:        .param .align 8 .b8 retval0[32];
150; CHECK-NEXT:   call.uni (retval0),
151; CHECK-NEXT:   test_s_i8i64p,
152; CHECK-NEXT:   (
153; CHECK-NEXT:   param0
154; CHECK-NEXT:   );
155; CHECK-DAG:    ld.param.b64 [[R0:%rd[0-9]+]],   [retval0];
156; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+9];
157; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+10];
158; CHECK-DAG:    ld.param.b8  [[R2_2:%rs[0-9]+]], [retval0+11];
159; CHECK-DAG:    ld.param.b8  [[R2_3:%rs[0-9]+]], [retval0+12];
160; CHECK-DAG:    ld.param.b8  [[R2_4:%rs[0-9]+]], [retval0+13];
161; CHECK-DAG:    ld.param.b8  [[R2_5:%rs[0-9]+]], [retval0+14];
162; CHECK-DAG:    ld.param.b8  [[R2_6:%rs[0-9]+]], [retval0+15];
163; CHECK-DAG:    ld.param.b8  [[R2_7:%rs[0-9]+]], [retval0+16];
164; CHECK:        } // callseq
165; CHECK-DAG:    st.param.b64 [func_retval0], [[R0]];
166; CHECK-DAG:    st.param.b8  [func_retval0+9],
167; CHECK-DAG:    st.param.b8  [func_retval0+10],
168; CHECK-DAG:    st.param.b8  [func_retval0+11],
169; CHECK-DAG:    st.param.b8  [func_retval0+12],
170; CHECK-DAG:    st.param.b8  [func_retval0+13],
171; CHECK-DAG:    st.param.b8  [func_retval0+14],
172; CHECK-DAG:    st.param.b8  [func_retval0+15],
173; CHECK-DAG:    st.param.b8  [func_retval0+16],
174; CHECK:        ret;
175
176define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
177       %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
178       ret %s_i8i64p %r
179}
180
181; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[16])
182; CHECK-LABEL: test_s_i8f16p(
183; CHECK:        .param .align 8 .b8 test_s_i8f16p_param_0[16]
184; CHECK-DAG:    ld.param.b16 [[P0:%rs[0-9]+]],     [test_s_i8f16p_param_0];
185; CHECK-DAG:    ld.param.u8  [[P2_0:%rs[0-9]+]],   [test_s_i8f16p_param_0+3];
186; CHECK-DAG:    ld.param.u8  [[P2_1:%rs[0-9]+]],   [test_s_i8f16p_param_0+4];
187; CHECK-DAG:    shl.b16      [[P2_1_shl:%rs[0-9]+]], [[P2_1]], 8;
188; CHECK-DAG:    or.b16       [[P2_1_or:%rs[0-9]+]], [[P2_1_shl]], [[P2_0]];
189; CHECK:        { // callseq
190; CHECK:        .param .align 8 .b8 param0[16];
191; CHECK-DAG:    st.param.b16 [param0], [[P0]];
192; CHECK-DAG:    st.param.b8  [param0+3], [[P2_1_or]];
193; CHECK-DAG:    st.param.b8  [param0+4], [[P2_1]];
194; CHECK:        .param .align 8 .b8 retval0[16];
195; CHECK-NEXT:   call.uni (retval0),
196; CHECK-NEXT:   test_s_i8f16p,
197; CHECK-NEXT:   (
198; CHECK-NEXT:   param0
199; CHECK-NEXT:   );
200; CHECK-DAG:    ld.param.b16 [[R0:%rs[0-9]+]],     [retval0];
201; CHECK-DAG:    ld.param.b8  [[R2I_0:%rs[0-9]+]], [retval0+3];
202; CHECK-DAG:    ld.param.b8  [[R2I_1:%rs[0-9]+]], [retval0+4];
203; CHECK:        } // callseq
204; CHECK-DAG:    st.param.b16 [func_retval0], [[R0]];
205; CHECK-DAG:    shl.b16      [[R2I_1_shl:%rs[0-9]+]], [[R2I_1]], 8;
206; CHECK-DAG:    and.b16      [[R2I_0_and:%rs[0-9]+]], [[R2I_0]], 255;
207; CHECK-DAG:    or.b16       [[R2I:%rs[0-9]+]], [[R2I_0_and]], [[R2I_1_shl]];
208; CHECK-DAG:    st.param.b8  [func_retval0+3],  [[R2I]];
209; CHECK-DAG:    and.b16      [[R2I_1_and:%rs[0-9]+]], [[R2I_1]], 255;
210; CHECK-DAG:    st.param.b8  [func_retval0+4],  [[R2I_1_and]];
211; CHECK:        ret;
212
213define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
214       %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
215       ret %s_i8f16p %r
216}
217
218; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[24])
219; CHECK-LABEL: test_s_i8f16x2p(
220; CHECK:        .param .align 8 .b8 test_s_i8f16x2p_param_0[24]
221; CHECK-DAG:    ld.param.b32 [[P0:%r[0-9]+]],  [test_s_i8f16x2p_param_0];
222; CHECK-DAG:    ld.param.u8  [[P2_0:%r[0-9]+]],   [test_s_i8f16x2p_param_0+5];
223; CHECK-DAG:    ld.param.u8  [[P2_1:%r[0-9]+]],   [test_s_i8f16x2p_param_0+6];
224; CHECK-DAG:    ld.param.u8  [[P2_2:%r[0-9]+]],   [test_s_i8f16x2p_param_0+7];
225; CHECK-DAG:    ld.param.u8  [[P2_3:%r[0-9]+]],   [test_s_i8f16x2p_param_0+8];
226; CHECK-DAG:    shl.b32      [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
227; CHECK-DAG:    shl.b32      [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
228; CHECK-DAG:    shl.b32      [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
229; CHECK-DAG:    or.b32       [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
230; CHECK-DAG:    or.b32       [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
231; CHECK-DAG:    or.b32       [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
232; CHECK-DAG:    shr.u32      [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
233; CHECK-DAG:    shr.u32      [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
234; CHECK:        { // callseq
235; CHECK-DAG:    .param .align 8 .b8 param0[24];
236; CHECK-DAG:    st.param.b32 [param0], [[P0]];
237; CHECK-DAG:    st.param.b8  [param0+5], [[P2]];
238; CHECK-DAG:    st.param.b8  [param0+6], [[P2_1_shr]];
239; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
240; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
241; CHECK:        .param .align 8 .b8 retval0[24];
242; CHECK-NEXT:   call.uni (retval0),
243; CHECK-NEXT:   test_s_i8f16x2p,
244; CHECK-NEXT:   (
245; CHECK-NEXT:   param0
246; CHECK-NEXT:   );
247; CHECK-DAG:    ld.param.b32 [[R0:%r[0-9]+]],   [retval0];
248; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
249; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
250; CHECK-DAG:    ld.param.b8  [[R2_2:%rs[0-9]+]], [retval0+7];
251; CHECK-DAG:    ld.param.b8  [[R2_3:%rs[0-9]+]], [retval0+8];
252; CHECK:        } // callseq
253; CHECK-DAG:    st.param.b32 [func_retval0], [[R0]];
254; CHECK-DAG:    st.param.b8  [func_retval0+5],
255; CHECK-DAG:    st.param.b8  [func_retval0+6],
256; CHECK-DAG:    st.param.b8  [func_retval0+7],
257; CHECK-DAG:    st.param.b8  [func_retval0+8],
258; CHECK:        ret;
259
260define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
261       %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
262       ret %s_i8f16x2p %r
263}
264
265; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[24])
266; CHECK-LABEL: test_s_i8f32p(
267; CHECK:        .param .align 8 .b8 test_s_i8f32p_param_0[24]
268; CHECK-DAG:    ld.param.f32 [[P0:%f[0-9]+]],    [test_s_i8f32p_param_0];
269; CHECK-DAG:    ld.param.u8  [[P2_0:%r[0-9]+]],   [test_s_i8f32p_param_0+5];
270; CHECK-DAG:    ld.param.u8  [[P2_1:%r[0-9]+]],   [test_s_i8f32p_param_0+6];
271; CHECK-DAG:    ld.param.u8  [[P2_2:%r[0-9]+]],   [test_s_i8f32p_param_0+7];
272; CHECK-DAG:    ld.param.u8  [[P2_3:%r[0-9]+]],   [test_s_i8f32p_param_0+8];
273; CHECK-DAG:    shl.b32      [[P2_1_shl:%r[0-9]+]], [[P2_1]], 8;
274; CHECK-DAG:    shl.b32      [[P2_2_shl:%r[0-9]+]], [[P2_2]], 16;
275; CHECK-DAG:    shl.b32      [[P2_3_shl:%r[0-9]+]], [[P2_3]], 24;
276; CHECK-DAG:    or.b32       [[P2_or:%r[0-9]+]], [[P2_1_shl]], [[P2_0]];
277; CHECK-DAG:    or.b32       [[P2_or_1:%r[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
278; CHECK-DAG:    or.b32       [[P2:%r[0-9]+]], [[P2_or_1]], [[P2_or]];
279; CHECK-DAG:    shr.u32      [[P2_1_shr:%r[0-9]+]], [[P2]], 8;
280; CHECK-DAG:    shr.u32      [[P2_2_shr:%r[0-9]+]], [[P2_or_1]], 16;
281; CHECK:        { // callseq
282; CHECK-DAG:    .param .align 8 .b8 param0[24];
283; CHECK-DAG:    st.param.f32 [param0], [[P0]];
284; CHECK-DAG:    st.param.b8  [param0+5], [[P2]];
285; CHECK-DAG:    st.param.b8  [param0+6], [[P2_1_shr]];
286; CHECK-DAG:    st.param.b8  [param0+7], [[P2_2_shr]];
287; CHECK-DAG:    st.param.b8  [param0+8], [[P2_3]];
288; CHECK:        .param .align 8 .b8 retval0[24];
289; CHECK-NEXT:   call.uni (retval0),
290; CHECK-NEXT:   test_s_i8f32p,
291; CHECK-NEXT:   (
292; CHECK-NEXT:   param0
293; CHECK-NEXT:   );
294; CHECK-DAG:    ld.param.f32 [[R0:%f[0-9]+]],    [retval0];
295; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+5];
296; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+6];
297; CHECK-DAG:    ld.param.b8  [[R2_2:%rs[0-9]+]], [retval0+7];
298; CHECK-DAG:    ld.param.b8  [[R2_3:%rs[0-9]+]], [retval0+8];
299; CHECK:        } // callseq
300; CHECK-DAG:    st.param.f32 [func_retval0], [[R0]];
301; CHECK-DAG:    st.param.b8  [func_retval0+5],
302; CHECK-DAG:    st.param.b8  [func_retval0+6],
303; CHECK-DAG:    st.param.b8  [func_retval0+7],
304; CHECK-DAG:    st.param.b8  [func_retval0+8],
305; CHECK:        ret;
306
307define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
308       %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
309       ret %s_i8f32p %r
310}
311
312; CHECK:       .visible .func (.param .align 8 .b8 func_retval0[32])
313; CHECK-LABEL: test_s_i8f64p(
314; CHECK:        .param .align 8 .b8 test_s_i8f64p_param_0[32]
315; CHECK-DAG:    ld.param.f64 [[P0:%fd[0-9]+]],    [test_s_i8f64p_param_0];
316; CHECK-DAG:    ld.param.u8  [[P2_0:%rd[0-9]+]],   [test_s_i8f64p_param_0+9];
317; CHECK-DAG:    ld.param.u8  [[P2_1:%rd[0-9]+]],   [test_s_i8f64p_param_0+10];
318; CHECK-DAG:    ld.param.u8  [[P2_2:%rd[0-9]+]],   [test_s_i8f64p_param_0+11];
319; CHECK-DAG:    ld.param.u8  [[P2_3:%rd[0-9]+]],   [test_s_i8f64p_param_0+12];
320; CHECK-DAG:    ld.param.u8  [[P2_4:%rd[0-9]+]],   [test_s_i8f64p_param_0+13];
321; CHECK-DAG:    ld.param.u8  [[P2_5:%rd[0-9]+]],   [test_s_i8f64p_param_0+14];
322; CHECK-DAG:    ld.param.u8  [[P2_6:%rd[0-9]+]],   [test_s_i8f64p_param_0+15];
323; CHECK-DAG:    ld.param.u8  [[P2_7:%rd[0-9]+]],   [test_s_i8f64p_param_0+16];
324; CHECK-DAG:    shl.b64      [[P2_1_shl:%rd[0-9]+]], [[P2_1]], 8;
325; CHECK-DAG:    shl.b64      [[P2_2_shl:%rd[0-9]+]], [[P2_2]], 16;
326; CHECK-DAG:    shl.b64      [[P2_3_shl:%rd[0-9]+]], [[P2_3]], 24;
327; CHECK-DAG:    or.b64       [[P2_or_0:%rd[0-9]+]], [[P2_1_shl]], [[P2_0]];
328; CHECK-DAG:    or.b64       [[P2_or_1:%rd[0-9]+]], [[P2_3_shl]], [[P2_2_shl]];
329; CHECK-DAG:    or.b64       [[P2_or_2:%rd[0-9]+]], [[P2_or_1]], [[P2_or_0]];
330; CHECK-DAG:    shl.b64 	 [[P2_5_shl:%rd[0-9]+]], [[P2_5]], 8;
331; CHECK-DAG:    shl.b64      [[P2_6_shl:%rd[0-9]+]], [[P2_6]], 16;
332; CHECK-DAG:    shl.b64      [[P2_7_shl:%rd[0-9]+]], [[P2_7]], 24;
333; CHECK-DAG:    or.b64       [[P2_or_3:%rd[0-9]+]], [[P2_5_shl]], [[P2_4]];
334; CHECK-DAG:    or.b64       [[P2_or_4:%rd[0-9]+]], [[P2_7_shl]], [[P2_6_shl]];
335; CHECK-DAG:    or.b64       [[P2_or_5:%rd[0-9]+]], [[P2_or_4]], [[P2_or_3]];
336; CHECK-DAG:    shl.b64      [[P2_or_shl:%rd[0-9]+]], [[P2_or_5]], 32;
337; CHECK-DAG:    or.b64       [[P2:%rd[0-9]+]], [[P2_or_shl]], [[P2_or_2]];
338; CHECK-DAG:    shr.u64      [[P2_shr_1:%rd[0-9]+]], [[P2]], 8;
339; CHECK-DAG:    shr.u64      [[P2_shr_2:%rd[0-9]+]], [[P2]], 16;
340; CHECK-DAG:    shr.u64      [[P2_shr_3:%rd[0-9]+]], [[P2]], 24;
341; CHECK-DAG:    bfe.u64      [[P2_bfe_4:%rd[0-9]+]], [[P2_or_5]], 8, 24;
342; CHECK-DAG:    bfe.u64      [[P2_bfe_5:%rd[0-9]+]], [[P2_or_5]], 16, 16;
343; CHECK-DAG:    bfe.u64      [[P2_bfe_6:%rd[0-9]+]], [[P2_or_5]], 24, 8;
344; CHECK:        { // callseq
345; CHECK:        .param .align 8 .b8 param0[32];
346; CHECK-DAG:    st.param.f64 [param0],  [[P0]];
347; CHECK-DAG:    st.param.b8  [param0+9],  [[P2]];
348; CHECK-DAG:    st.param.b8  [param0+10], [[P2_shr_1]];
349; CHECK-DAG:    st.param.b8  [param0+11], [[P2_shr_2]];
350; CHECK-DAG:    st.param.b8  [param0+12], [[P2_shr_3]];
351; CHECK-DAG:    st.param.b8  [param0+13], [[P2_or_5]];
352; CHECK-DAG:    st.param.b8  [param0+14], [[P2_bfe_4]];
353; CHECK-DAG:    st.param.b8  [param0+15], [[P2_bfe_5]];
354; CHECK-DAG:    st.param.b8  [param0+16], [[P2_bfe_6]];
355; CHECK:        .param .align 8 .b8 retval0[32];
356; CHECK-NEXT:   call.uni (retval0),
357; CHECK-NEXT:   test_s_i8f64p,
358; CHECK-NEXT:   (
359; CHECK-NEXT:   param0
360; CHECK-NEXT:   );
361; CHECK-DAG:    ld.param.f64 [[R0:%fd[0-9]+]],   [retval0];
362; CHECK-DAG:    ld.param.b8  [[R2_0:%rs[0-9]+]], [retval0+9];
363; CHECK-DAG:    ld.param.b8  [[R2_1:%rs[0-9]+]], [retval0+10];
364; CHECK-DAG:    ld.param.b8  [[R2_2:%rs[0-9]+]], [retval0+11];
365; CHECK-DAG:    ld.param.b8  [[R2_3:%rs[0-9]+]], [retval0+12];
366; CHECK-DAG:    ld.param.b8  [[R2_4:%rs[0-9]+]], [retval0+13];
367; CHECK-DAG:    ld.param.b8  [[R2_5:%rs[0-9]+]], [retval0+14];
368; CHECK-DAG:    ld.param.b8  [[R2_6:%rs[0-9]+]], [retval0+15];
369; CHECK-DAG:    ld.param.b8  [[R2_7:%rs[0-9]+]], [retval0+16];
370; CHECK:        } // callseq
371; CHECK-DAG:    st.param.f64 [func_retval0], [[R0]];
372; CHECK-DAG:    st.param.b8  [func_retval0+9],
373; CHECK-DAG:    st.param.b8  [func_retval0+10],
374; CHECK-DAG:    st.param.b8  [func_retval0+11],
375; CHECK-DAG:    st.param.b8  [func_retval0+12],
376; CHECK-DAG:    st.param.b8  [func_retval0+13],
377; CHECK-DAG:    st.param.b8  [func_retval0+14],
378; CHECK-DAG:    st.param.b8  [func_retval0+15],
379; CHECK-DAG:    st.param.b8  [func_retval0+16],
380; CHECK:        ret;
381
382define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
383       %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
384       ret %s_i8f64p %r
385}
386