xref: /llvm-project/llvm/test/CodeGen/NVPTX/param-load-store.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; Verifies correctness of load/store of parameters and return values.
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
4
5%s_i1 = type { i1 }
6%s_i8 = type { i8 }
7%s_i16 = type { i16 }
8%s_f16 = type { half }
9%s_i32 = type { i32 }
10%s_f32 = type { float }
11%s_i64 = type { i64 }
12%s_f64 = type { double }
13
14; More complicated types. i64 is used to increase natural alignment
15; requirement for the type.
16%s_i32x4 = type { i32, i32, i32, i32, i64}
17%s_i32f32 = type { i32, float, i32, float, i64}
18%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
19%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
20%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
21; All scalar parameters must be at least 32 bits in size.
22; i1 is loaded/stored as i8.
23
24; CHECK: .func  (.param .b32 func_retval0)
25; CHECK-LABEL: test_i1(
26; CHECK-NEXT: .param .b32 test_i1_param_0
27; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0];
28; CHECK:      and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
29; CHECK:      setp.eq.b16 %p1, [[A]], 1
30; CHECK:      cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
31; CHECK:      and.b32 [[C:%r[0-9]+]], [[B]], 1;
32; CHECK:      .param .b32 param0;
33; CHECK:      st.param.b32    [param0], [[C]]
34; CHECK:      .param .b32 retval0;
35; CHECK:      call.uni
36; CHECK-NEXT: test_i1,
37; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0];
38; CHECK:      and.b32         [[R:%r[0-9]+]], [[R8]], 1;
39; CHECK:      st.param.b32    [func_retval0], [[R]];
40; CHECK:      ret;
41define i1 @test_i1(i1 %a) {
42  %r = tail call i1 @test_i1(i1 %a);
43  ret i1 %r;
44}
45
46; Signed i1 is a somewhat special case. We only care about one bit and
47; then us neg.s32 to convert it to 32-bit -1 if it's set.
48; CHECK: .func  (.param .b32 func_retval0)
49; CHECK-LABEL: test_i1s(
50; CHECK-NEXT: .param .b32 test_i1s_param_0
51; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
52; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
53; CHECK:      and.b32         [[A1:%r[0-9]+]], [[A32]], 1;
54; CHECK:      neg.s32         [[A:%r[0-9]+]], [[A1]];
55; CHECK:      .param .b32 param0;
56; CHECK:      st.param.b32    [param0], [[A]];
57; CHECK:      .param .b32 retval0;
58; CHECK:      call.uni
59; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0];
60; CHECK:      and.b32         [[R1:%r[0-9]+]], [[R8]], 1;
61; CHECK:      neg.s32         [[R:%r[0-9]+]], [[R1]];
62; CHECK:      st.param.b32    [func_retval0], [[R]];
63; CHECK-NEXT: ret;
64define signext i1 @test_i1s(i1 signext %a) {
65       %r = tail call signext i1 @test_i1s(i1 signext %a);
66       ret i1 %r;
67}
68
69; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
70; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
71; CHECK-LABEL: test_v3i1(
72; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1]
73; CHECK-DAG:  ld.param.u8     [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
74; CHECK-DAG:  ld.param.u8     [[E0:%rs[0-9]+]], [test_v3i1_param_0]
75; CHECK:      .param .align 1 .b8 param0[1];
76; CHECK-DAG:  st.param.b8     [param0], [[E0]];
77; CHECK-DAG:  st.param.b8     [param0+2], [[E2]];
78; CHECK:      .param .align 1 .b8 retval0[1];
79; CHECK:      call.uni (retval0),
80; CHECK-NEXT: test_v3i1,
81; CHECK-DAG:  ld.param.b8     [[RE0:%rs[0-9]+]], [retval0];
82; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
83; CHECK-DAG:  st.param.b8     [func_retval0], [[RE0]]
84; CHECK-DAG:  st.param.b8     [func_retval0+2], [[RE2]];
85; CHECK-NEXT: ret;
86define <3 x i1> @test_v3i1(<3 x i1> %a) {
87       %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
88       ret <3 x i1> %r;
89}
90
91; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
92; CHECK-LABEL: test_v4i1(
93; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
94; CHECK:      ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
95; CHECK:      .param .align 1 .b8 param0[1];
96; CHECK:      st.param.b8  [param0], [[E0]];
97; CHECK:      .param .align 1 .b8 retval0[1];
98; CHECK:      call.uni (retval0),
99; CHECK:      test_v4i1,
100; CHECK:      ld.param.b8  [[RE0:%rs[0-9]+]], [retval0];
101; CHECK:      ld.param.b8  [[RE1:%rs[0-9]+]], [retval0+1];
102; CHECK:      ld.param.b8  [[RE2:%rs[0-9]+]], [retval0+2];
103; CHECK:      ld.param.b8  [[RE3:%rs[0-9]+]], [retval0+3];
104; CHECK:      st.param.b8  [func_retval0], [[RE0]];
105; CHECK:      st.param.b8  [func_retval0+1], [[RE1]];
106; CHECK:      st.param.b8  [func_retval0+2], [[RE2]];
107; CHECK:      st.param.b8  [func_retval0+3], [[RE3]];
108; CHECK-NEXT: ret;
109define <4 x i1> @test_v4i1(<4 x i1> %a) {
110       %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
111       ret <4 x i1> %r;
112}
113
114; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
115; CHECK-LABEL: test_v5i1(
116; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1]
117; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
118; CHECK-DAG:  ld.param.u8     [[E0:%rs[0-9]+]], [test_v5i1_param_0]
119; CHECK:      .param .align 1 .b8 param0[1];
120; CHECK-DAG:  st.param.b8     [param0], [[E0]];
121; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
122; CHECK:      .param .align 1 .b8 retval0[1];
123; CHECK:      call.uni (retval0),
124; CHECK-NEXT: test_v5i1,
125; CHECK-DAG:  ld.param.b8  [[RE0:%rs[0-9]+]], [retval0];
126; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
127; CHECK-DAG:  st.param.b8  [func_retval0], [[RE0]]
128; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
129; CHECK-NEXT: ret;
130define <5 x i1> @test_v5i1(<5 x i1> %a) {
131       %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
132       ret <5 x i1> %r;
133}
134
135; CHECK: .func  (.param .b32 func_retval0)
136; CHECK-LABEL: test_i2(
137; CHECK-NEXT: .param .b32 test_i2_param_0
138; CHECK:      ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0];
139; CHECK:      .param .b32 param0;
140; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
141; CHECK:      .param .b32 retval0;
142; CHECK:      call.uni (retval0),
143; CHECK:      test_i2,
144; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
145; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
146; CHECK-NEXT: ret;
147define i2 @test_i2(i2 %a) {
148       %r = tail call i2 @test_i2(i2 %a);
149       ret i2 %r;
150}
151
152; CHECK: .func  (.param .b32 func_retval0)
153; CHECK-LABEL: test_i3(
154; CHECK-NEXT: .param .b32 test_i3_param_0
155; CHECK:      ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0];
156; CHECK:      .param .b32 param0;
157; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
158; CHECK:      .param .b32 retval0;
159; CHECK:      call.uni (retval0),
160; CHECK:      test_i3,
161; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
162; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
163; CHECK-NEXT: ret;
164define i3 @test_i3(i3 %a) {
165       %r = tail call i3 @test_i3(i3 %a);
166       ret i3 %r;
167}
168
169; Unsigned i8 is loaded directly into 32-bit register.
170; CHECK: .func  (.param .b32 func_retval0)
171; CHECK-LABEL: test_i8(
172; CHECK-NEXT: .param .b32 test_i8_param_0
173; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
174; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
175; CHECK:      and.b32         [[A:%r[0-9]+]], [[A32]], 255;
176; CHECK:      .param .b32 param0;
177; CHECK:      st.param.b32    [param0], [[A]];
178; CHECK:      .param .b32 retval0;
179; CHECK:      call.uni (retval0),
180; CHECK:      test_i8,
181; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0];
182; CHECK:      and.b32         [[R:%r[0-9]+]], [[R32]], 255;
183; CHECK:      st.param.b32    [func_retval0], [[R]];
184; CHECK-NEXT: ret;
185define i8 @test_i8(i8 %a) {
186       %r = tail call i8 @test_i8(i8 %a);
187       ret i8 %r;
188}
189
190; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
191; CHECK: .func  (.param .b32 func_retval0)
192; CHECK-LABEL: test_i8s(
193; CHECK-NEXT: .param .b32 test_i8s_param_0
194; CHECK:      ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
195; CHECK:      cvt.s32.s16     [[A:%r[0-9]+]], [[A8]];
196; CHECK:      .param .b32 param0;
197; CHECK:      st.param.b32    [param0], [[A]];
198; CHECK:      .param .b32 retval0;
199; CHECK:      call.uni (retval0),
200; CHECK:      test_i8s,
201; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0];
202; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
203; CHECK:      cvt.u16.u32     [[R16:%rs[0-9]+]], [[R32]];
204; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[R16]];
205; CHECK:      st.param.b32    [func_retval0], [[R]];
206; CHECK-NEXT: ret;
207define signext i8 @test_i8s(i8 signext %a) {
208       %r = tail call signext i8 @test_i8s(i8 signext %a);
209       ret i8 %r;
210}
211
212; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
213; CHECK-LABEL: test_v3i8(
214; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
215; CHECK:      ld.param.u32     [[R:%r[0-9]+]], [test_v3i8_param_0];
216; CHECK:      .param .align 4 .b8 param0[4];
217; CHECK:      st.param.b32  [param0], [[R]]
218; CHECK:      .param .align 4 .b8 retval0[4];
219; CHECK:      call.uni (retval0),
220; CHECK-NEXT: test_v3i8,
221; CHECK:      ld.param.b32  [[RE:%r[0-9]+]], [retval0];
222; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
223; interesting here, so it's skipped.
224; CHECK:      st.param.b32  [func_retval0],
225; CHECK-NEXT: ret;
226define <3 x i8> @test_v3i8(<3 x i8> %a) {
227       %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
228       ret <3 x i8> %r;
229}
230
231; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
232; CHECK-LABEL: test_v4i8(
233; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
234; CHECK:      ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0]
235; CHECK:      .param .align 4 .b8 param0[4];
236; CHECK:      st.param.b32  [param0], [[R]];
237; CHECK:      .param .align 4 .b8 retval0[4];
238; CHECK:      call.uni (retval0),
239; CHECK-NEXT: test_v4i8,
240; CHECK:      ld.param.b32  [[RET:%r[0-9]+]], [retval0];
241; CHECK:      st.param.b32  [func_retval0], [[RET]];
242; CHECK-NEXT: ret;
243define <4 x i8> @test_v4i8(<4 x i8> %a) {
244       %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
245       ret <4 x i8> %r;
246}
247
248; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
249; CHECK-LABEL: test_v5i8(
250; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
251; CHECK-DAG:  ld.param.u32    [[E0:%r[0-9]+]], [test_v5i8_param_0]
252; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
253; CHECK:      .param .align 8 .b8 param0[8];
254; CHECK-DAG:  st.param.v4.b8  [param0],
255; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
256; CHECK:      .param .align 8 .b8 retval0[8];
257; CHECK:      call.uni (retval0),
258; CHECK-NEXT: test_v5i8,
259; CHECK-DAG:  ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
260; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
261; CHECK-DAG:  st.param.v4.b8  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
262; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
263; CHECK-NEXT: ret;
264define <5 x i8> @test_v5i8(<5 x i8> %a) {
265       %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
266       ret <5 x i8> %r;
267}
268
269; CHECK: .func  (.param .b32 func_retval0)
270; CHECK-LABEL: test_i11(
271; CHECK-NEXT: .param .b32 test_i11_param_0
272; CHECK:      ld.param.u16    {{%rs[0-9]+}}, [test_i11_param_0];
273; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
274; CHECK:      .param .b32 retval0;
275; CHECK:      call.uni (retval0),
276; CHECK-NEXT: test_i11,
277; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
278; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
279; CHECK-NEXT: ret;
280define i11 @test_i11(i11 %a) {
281       %r = tail call i11 @test_i11(i11 %a);
282       ret i11 %r;
283}
284
285; CHECK: .func  (.param .b32 func_retval0)
286; CHECK-LABEL: test_i16(
287; CHECK-NEXT: .param .b32 test_i16_param_0
288; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16_param_0];
289; CHECK:      cvt.u32.u16     [[E32:%r[0-9]+]], [[E16]];
290; CHECK:      .param .b32 param0;
291; CHECK:      st.param.b32    [param0], [[E32]];
292; CHECK:      .param .b32 retval0;
293; CHECK:      call.uni (retval0),
294; CHECK-NEXT: test_i16,
295; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0];
296; CHECK:      and.b32         [[R:%r[0-9]+]], [[RE32]], 65535;
297; CHECK:      st.param.b32    [func_retval0], [[R]];
298; CHECK-NEXT: ret;
299define i16 @test_i16(i16 %a) {
300       %r = tail call i16 @test_i16(i16 %a);
301       ret i16 %r;
302}
303
304; CHECK: .func  (.param .b32 func_retval0)
305; CHECK-LABEL: test_i16s(
306; CHECK-NEXT: .param .b32 test_i16s_param_0
307; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16s_param_0];
308; CHECK:      cvt.s32.s16     [[E32:%r[0-9]+]], [[E16]];
309; CHECK:      .param .b32 param0;
310; CHECK:      st.param.b32    [param0], [[E32]];
311; CHECK:      .param .b32 retval0;
312; CHECK:      call.uni (retval0),
313; CHECK-NEXT: test_i16s,
314; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0];
315; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[RE32]];
316; CHECK:      st.param.b32    [func_retval0], [[R]];
317; CHECK-NEXT: ret;
318define signext i16 @test_i16s(i16 signext %a) {
319       %r = tail call signext i16 @test_i16s(i16 signext %a);
320       ret i16 %r;
321}
322
323; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
324; CHECK-LABEL: test_v3i16(
325; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
326; CHECK-DAG:  ld.param.u16    [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
327; CHECK-DAG:  ld.param.u32    [[R:%r[0-9]+]], [test_v3i16_param_0];
328; CHECK-DAG:  mov.b32 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[R]];
329; CHECK:      .param .align 8 .b8 param0[8];
330; CHECK:      st.param.v2.b16 [param0], {[[E0]], [[E1]]};
331; CHECK:      st.param.b16    [param0+4], [[E2]];
332; CHECK:      .param .align 8 .b8 retval0[8];
333; CHECK:      call.uni (retval0),
334; CHECK-NEXT: test_v3i16,
335; CHECK:      ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
336; CHECK:      ld.param.b16    [[RE2:%rs[0-9]+]], [retval0+4];
337; CHECK-DAG:  st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
338; CHECK-DAG:  st.param.b16    [func_retval0+4], [[RE2]];
339; CHECK-NEXT: ret;
340define <3 x i16> @test_v3i16(<3 x i16> %a) {
341       %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
342       ret <3 x i16> %r;
343}
344
345; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
346; CHECK-LABEL: test_v4i16(
347; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
348; CHECK:      ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
349; CHECK:      .param .align 8 .b8 param0[8];
350; CHECK:      st.param.v2.b32 [param0], {[[E0]], [[E1]]};
351; CHECK:      .param .align 8 .b8 retval0[8];
352; CHECK:      call.uni (retval0),
353; CHECK-NEXT: test_v4i16,
354; CHECK:      ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
355; CHECK:      st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
356; CHECK-NEXT: ret;
357define <4 x i16> @test_v4i16(<4 x i16> %a) {
358       %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
359       ret <4 x i16> %r;
360}
361
362; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
363; CHECK-LABEL: test_v5i16(
364; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
365; CHECK-DAG:  ld.param.u16    [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
366; CHECK-DAG:  ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
367; CHECK:      .param .align 16 .b8 param0[16];
368; CHECK-DAG:  st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
369; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
370; CHECK:      .param .align 16 .b8 retval0[16];
371; CHECK:      call.uni (retval0),
372; CHECK-NEXT: test_v5i16,
373; CHECK-DAG:  ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
374; CHECK-DAG:  ld.param.b16    [[RE4:%rs[0-9]+]], [retval0+8];
375; CHECK-DAG:  st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
376; CHECK-DAG:  st.param.b16    [func_retval0+8], [[RE4]];
377; CHECK-NEXT: ret;
378define <5 x i16> @test_v5i16(<5 x i16> %a) {
379       %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
380       ret <5 x i16> %r;
381}
382
383; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
384; CHECK-LABEL: test_f16(
385; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
386; CHECK:      ld.param.b16    [[E:%rs[0-9]+]], [test_f16_param_0];
387; CHECK:      .param .align 2 .b8 param0[2];
388; CHECK:      st.param.b16    [param0], [[E]];
389; CHECK:      .param .align 2 .b8 retval0[2];
390; CHECK:      call.uni (retval0),
391; CHECK-NEXT: test_f16,
392; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
393; CHECK:      st.param.b16    [func_retval0], [[R]]
394; CHECK-NEXT: ret;
395define half @test_f16(half %a) {
396       %r = tail call half @test_f16(half %a);
397       ret half %r;
398}
399
400; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
401; CHECK-LABEL: test_v2f16(
402; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
403; CHECK:      ld.param.b32    [[E:%r[0-9]+]], [test_v2f16_param_0];
404; CHECK:      .param .align 4 .b8 param0[4];
405; CHECK:      st.param.b32    [param0], [[E]];
406; CHECK:      .param .align 4 .b8 retval0[4];
407; CHECK:      call.uni (retval0),
408; CHECK-NEXT: test_v2f16,
409; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
410; CHECK:      st.param.b32    [func_retval0], [[R]]
411; CHECK-NEXT: ret;
412define <2 x half> @test_v2f16(<2 x half> %a) {
413       %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
414       ret <2 x half> %r;
415}
416
417; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
418; CHECK-LABEL: test_bf16(
419; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
420; CHECK:      ld.param.b16    [[E:%rs[0-9]+]], [test_bf16_param_0];
421; CHECK:      .param .align 2 .b8 param0[2];
422; CHECK:      st.param.b16    [param0], [[E]];
423; CHECK:      .param .align 2 .b8 retval0[2];
424; CHECK:      call.uni (retval0),
425; CHECK-NEXT: test_bf16,
426; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
427; CHECK:      st.param.b16    [func_retval0], [[R]]
428; CHECK-NEXT: ret;
429define bfloat @test_bf16(bfloat %a) {
430       %r = tail call bfloat @test_bf16(bfloat %a);
431       ret bfloat %r;
432}
433
434; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
435; CHECK-LABEL: test_v2bf16(
436; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
437; CHECK:      ld.param.b32    [[E:%r[0-9]+]], [test_v2bf16_param_0];
438; CHECK:      .param .align 4 .b8 param0[4];
439; CHECK:      st.param.b32    [param0], [[E]];
440; CHECK:      .param .align 4 .b8 retval0[4];
441; CHECK:      call.uni (retval0),
442; CHECK-NEXT: test_v2bf16,
443; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
444; CHECK:      st.param.b32    [func_retval0], [[R]]
445; CHECK-NEXT: ret;
446define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
447       %r = tail call <2 x bfloat> @test_v2bf16(<2 x bfloat> %a);
448       ret <2 x bfloat> %r;
449}
450
451
452; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
453; CHECK-LABEL: test_v3f16(
454; CHECK:      .param .align 8 .b8 test_v3f16_param_0[8]
455; CHECK-DAG:  ld.param.b32    [[HH01:%r[0-9]+]], [test_v3f16_param_0];
456; CHECK-DAG:  mov.b32         {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [[HH01]];
457; CHECK-DAG:  ld.param.b16    [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
458; CHECK:      .param .align 8 .b8 param0[8];
459; CHECK-DAG:  st.param.v2.b16 [param0], {[[E0]], [[E1]]};
460; CHECK-DAG:  st.param.b16    [param0+4], [[E2]];
461; CHECK:      .param .align 8 .b8 retval0[8];
462; CHECK:      call.uni (retval0),
463; CHECK:      test_v3f16,
464; CHECK-DAG:  ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
465; CHECK-DAG:  ld.param.b16    [[R2:%rs[0-9]+]], [retval0+4];
466; CHECK-DAG:  st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
467; CHECK-DAG:  st.param.b16    [func_retval0+4], [[R2]];
468; CHECK:      ret;
469define <3 x half> @test_v3f16(<3 x half> %a) {
470       %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
471       ret <3 x half> %r;
472}
473
474; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
475; CHECK-LABEL: test_v4f16(
476; CHECK:      .param .align 8 .b8 test_v4f16_param_0[8]
477; CHECK:      ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
478; CHECK:      .param .align 8 .b8 param0[8];
479; CHECK:      st.param.v2.b32 [param0], {[[R01]], [[R23]]};
480; CHECK:      .param .align 8 .b8 retval0[8];
481; CHECK:      call.uni (retval0),
482; CHECK:      test_v4f16,
483; CHECK:      ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
484; CHECK:      st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
485; CHECK:      ret;
486define <4 x half> @test_v4f16(<4 x half> %a) {
487       %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
488       ret <4 x half> %r;
489}
490
491; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
492; CHECK-LABEL: test_v5f16(
493; CHECK:      .param .align 16 .b8 test_v5f16_param_0[16]
494; CHECK-DAG:  ld.param.v4.b16  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
495; CHECK-DAG:  ld.param.b16    [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
496; CHECK:      .param .align 16 .b8 param0[16];
497; CHECK-DAG:  st.param.v4.b16 [param0],
498; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
499; CHECK:      .param .align 16 .b8 retval0[16];
500; CHECK:      call.uni (retval0),
501; CHECK:      test_v5f16,
502; CHECK-DAG:  ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
503; CHECK-DAG:  ld.param.b16    [[R4:%rs[0-9]+]], [retval0+8];
504; CHECK-DAG:  st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
505; CHECK-DAG:  st.param.b16    [func_retval0+8], [[R4]];
506; CHECK:      ret;
507define <5 x half> @test_v5f16(<5 x half> %a) {
508       %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
509       ret <5 x half> %r;
510}
511
512; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
513; CHECK-LABEL: test_v8f16(
514; CHECK:      .param .align 16 .b8 test_v8f16_param_0[16]
515; CHECK:      ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
516; CHECK:      .param .align 16 .b8 param0[16];
517; CHECK:      st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
518; CHECK:      .param .align 16 .b8 retval0[16];
519; CHECK:      call.uni (retval0),
520; CHECK:      test_v8f16,
521; CHECK:      ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
522; CHECK:      st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
523; CHECK:      ret;
524define <8 x half> @test_v8f16(<8 x half> %a) {
525       %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
526       ret <8 x half> %r;
527}
528
529; CHECK:.func  (.param .align 32 .b8 func_retval0[32])
530; CHECK-LABEL: test_v9f16(
531; CHECK:      .param .align 32 .b8 test_v9f16_param_0[32]
532; CHECK-DAG:  ld.param.v4.b16  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
533; CHECK-DAG:  ld.param.v4.b16  {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
534; CHECK-DAG:  ld.param.b16     [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
535; CHECK:      .param .align 32 .b8 param0[32];
536; CHECK-DAG:  st.param.v4.b16 [param0],
537; CHECK-DAG:  st.param.v4.b16 [param0+8],
538; CHECK-DAG:  st.param.b16    [param0+16], [[E8]];
539; CHECK:      .param .align 32 .b8 retval0[32];
540; CHECK:      call.uni (retval0),
541; CHECK:      test_v9f16,
542; CHECK-DAG:  ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
543; CHECK-DAG:  ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
544; CHECK-DAG:  ld.param.b16    [[R8:%rs[0-9]+]], [retval0+16];
545; CHECK-DAG:  st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
546; CHECK-DAG:  st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
547; CHECK-DAG:  st.param.b16    [func_retval0+16], [[R8]];
548; CHECK:      ret;
549define <9 x half> @test_v9f16(<9 x half> %a) {
550       %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
551       ret <9 x half> %r;
552}
553
554; CHECK: .func  (.param .b32 func_retval0)
555; CHECK-LABEL: test_i19(
556; CHECK-NEXT: .param .b32 test_i19_param_0
557; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i19_param_0];
558; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i19_param_0+2];
559; CHECK:      .param .b32 param0;
560; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
561; CHECK:      .param .b32 retval0;
562; CHECK:      call.uni (retval0),
563; CHECK-NEXT: test_i19,
564; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
565; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
566; CHECK-NEXT: ret;
567define i19 @test_i19(i19 %a) {
568       %r = tail call i19 @test_i19(i19 %a);
569       ret i19 %r;
570}
571
572; CHECK: .func  (.param .b32 func_retval0)
573; CHECK-LABEL: test_i23(
574; CHECK-NEXT: .param .b32 test_i23_param_0
575; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i23_param_0];
576; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i23_param_0+2];
577; CHECK:      .param .b32 param0;
578; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
579; CHECK:      .param .b32 retval0;
580; CHECK:      call.uni (retval0),
581; CHECK-NEXT: test_i23,
582; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
583; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
584; CHECK-NEXT: ret;
585define i23 @test_i23(i23 %a) {
586       %r = tail call i23 @test_i23(i23 %a);
587       ret i23 %r;
588}
589
590; CHECK: .func  (.param .b32 func_retval0)
591; CHECK-LABEL: test_i24(
592; CHECK-NEXT: .param .b32 test_i24_param_0
593; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i24_param_0+2];
594; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i24_param_0];
595; CHECK:      .param .b32 param0;
596; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
597; CHECK:      .param .b32 retval0;
598; CHECK:      call.uni (retval0),
599; CHECK-NEXT: test_i24,
600; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
601; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
602; CHECK-NEXT: ret;
603define i24 @test_i24(i24 %a) {
604       %r = tail call i24 @test_i24(i24 %a);
605       ret i24 %r;
606}
607
608; CHECK: .func  (.param .b32 func_retval0)
609; CHECK-LABEL: test_i29(
610; CHECK-NEXT: .param .b32 test_i29_param_0
611; CHECK:      ld.param.u32    {{%r[0-9]+}}, [test_i29_param_0];
612; CHECK:      .param .b32 param0;
613; CHECK:      st.param.b32    [param0], {{%r[0-9]+}};
614; CHECK:      .param .b32 retval0;
615; CHECK:      call.uni (retval0),
616; CHECK-NEXT: test_i29,
617; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0];
618; CHECK:      st.param.b32    [func_retval0], {{%r[0-9]+}};
619; CHECK-NEXT: ret;
620define i29 @test_i29(i29 %a) {
621       %r = tail call i29 @test_i29(i29 %a);
622       ret i29 %r;
623}
624
625; CHECK: .func  (.param .b32 func_retval0)
626; CHECK-LABEL: test_i32(
627; CHECK-NEXT: .param .b32 test_i32_param_0
628; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_i32_param_0];
629; CHECK:      .param .b32 param0;
630; CHECK:      st.param.b32    [param0], [[E]];
631; CHECK:      .param .b32 retval0;
632; CHECK:      call.uni (retval0),
633; CHECK-NEXT: test_i32,
634; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
635; CHECK:      st.param.b32    [func_retval0], [[R]];
636; CHECK-NEXT: ret;
637define i32 @test_i32(i32 %a) {
638       %r = tail call i32 @test_i32(i32 %a);
639       ret i32 %r;
640}
641
642; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
643; CHECK-LABEL: test_v3i32(
644; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
645; CHECK-DAG:  ld.param.u32     [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
646; CHECK-DAG:  ld.param.v2.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
647; CHECK:      .param .align 16 .b8 param0[16];
648; CHECK:      st.param.v2.b32  [param0], {[[E0]], [[E1]]};
649; CHECK:      st.param.b32     [param0+8], [[E2]];
650; CHECK:      .param .align 16 .b8 retval0[16];
651; CHECK:      call.uni (retval0),
652; CHECK-NEXT: test_v3i32,
653; CHECK:      ld.param.v2.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
654; CHECK:      ld.param.b32     [[RE2:%r[0-9]+]], [retval0+8];
655; CHECK-DAG:  st.param.v2.b32  [func_retval0], {[[RE0]], [[RE1]]};
656; CHECK-DAG:  st.param.b32     [func_retval0+8], [[RE2]];
657; CHECK-NEXT: ret;
658define <3 x i32> @test_v3i32(<3 x i32> %a) {
659       %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
660       ret <3 x i32> %r;
661}
662
663; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
664; CHECK-LABEL: test_v4i32(
665; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
666; CHECK:      ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
667; CHECK:      .param .align 16 .b8 param0[16];
668; CHECK:      st.param.v4.b32  [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
669; CHECK:      .param .align 16 .b8 retval0[16];
670; CHECK:      call.uni (retval0),
671; CHECK-NEXT: test_v4i32,
672; CHECK:      ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
673; CHECK:      st.param.v4.b32  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
674; CHECK-NEXT: ret;
675define <4 x i32> @test_v4i32(<4 x i32> %a) {
676       %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
677       ret <4 x i32> %r;
678}
679
680; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
681; CHECK-LABEL: test_v5i32(
682; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
683; CHECK-DAG:  ld.param.u32     [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
684; CHECK-DAG:  ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
685; CHECK:      .param .align 32 .b8 param0[32];
686; CHECK-DAG:  st.param.v4.b32  [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
687; CHECK-DAG:  st.param.b32     [param0+16], [[E4]];
688; CHECK:      .param .align 32 .b8 retval0[32];
689; CHECK:      call.uni (retval0),
690; CHECK-NEXT: test_v5i32,
691; CHECK-DAG:  ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
692; CHECK-DAG:  ld.param.b32     [[RE4:%r[0-9]+]], [retval0+16];
693; CHECK-DAG:  st.param.v4.b32  [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
694; CHECK-DAG:  st.param.b32     [func_retval0+16], [[RE4]];
695; CHECK-NEXT: ret;
696define <5 x i32> @test_v5i32(<5 x i32> %a) {
697       %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
698       ret <5 x i32> %r;
699}
700
701; CHECK: .func  (.param .b32 func_retval0)
702; CHECK-LABEL: test_f32(
703; CHECK-NEXT: .param .b32 test_f32_param_0
704; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_f32_param_0];
705; CHECK:      .param .b32 param0;
706; CHECK:      st.param.f32    [param0], [[E]];
707; CHECK:      .param .b32 retval0;
708; CHECK:      call.uni (retval0),
709; CHECK-NEXT: test_f32,
710; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0];
711; CHECK:      st.param.f32    [func_retval0], [[R]];
712; CHECK-NEXT: ret;
713define float @test_f32(float %a) {
714       %r = tail call float @test_f32(float %a);
715       ret float %r;
716}
717
718; CHECK: .func  (.param .b64 func_retval0)
719; CHECK-LABEL: test_i40(
720; CHECK-NEXT: .param .b64 test_i40_param_0
721; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i40_param_0+4];
722; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i40_param_0];
723; CHECK:      .param .b64 param0;
724; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
725; CHECK:      .param .b64 retval0;
726; CHECK:      call.uni (retval0),
727; CHECK-NEXT: test_i40,
728; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
729; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
730; CHECK-NEXT: ret;
731define i40 @test_i40(i40 %a) {
732       %r = tail call i40 @test_i40(i40 %a);
733       ret i40 %r;
734}
735
736; CHECK: .func  (.param .b64 func_retval0)
737; CHECK-LABEL: test_i47(
738; CHECK-NEXT: .param .b64 test_i47_param_0
739; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i47_param_0+4];
740; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i47_param_0];
741; CHECK:      .param .b64 param0;
742; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
743; CHECK:      .param .b64 retval0;
744; CHECK:      call.uni (retval0),
745; CHECK-NEXT: test_i47,
746; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
747; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
748; CHECK-NEXT: ret;
749define i47 @test_i47(i47 %a) {
750       %r = tail call i47 @test_i47(i47 %a);
751       ret i47 %r;
752}
753
754; CHECK: .func  (.param .b64 func_retval0)
755; CHECK-LABEL: test_i48(
756; CHECK-NEXT: .param .b64 test_i48_param_0
757; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i48_param_0+4];
758; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i48_param_0];
759; CHECK:      .param .b64 param0;
760; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
761; CHECK:      .param .b64 retval0;
762; CHECK:      call.uni (retval0),
763; CHECK-NEXT: test_i48,
764; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
765; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
766; CHECK-NEXT: ret;
767define i48 @test_i48(i48 %a) {
768       %r = tail call i48 @test_i48(i48 %a);
769       ret i48 %r;
770}
771
772; CHECK: .func  (.param .b64 func_retval0)
773; CHECK-LABEL: test_i51(
774; CHECK-NEXT: .param .b64 test_i51_param_0
775; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i51_param_0+6];
776; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i51_param_0+4];
777; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i51_param_0];
778; CHECK:      .param .b64 param0;
779; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
780; CHECK:      .param .b64 retval0;
781; CHECK:      call.uni (retval0),
782; CHECK-NEXT: test_i51,
783; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
784; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
785; CHECK-NEXT: ret;
786define i51 @test_i51(i51 %a) {
787       %r = tail call i51 @test_i51(i51 %a);
788       ret i51 %r;
789}
790
791; CHECK: .func  (.param .b64 func_retval0)
792; CHECK-LABEL: test_i56(
793; CHECK-NEXT: .param .b64 test_i56_param_0
794; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i56_param_0+6];
795; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i56_param_0+4];
796; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i56_param_0];
797; CHECK:      .param .b64 param0;
798; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
799; CHECK:      .param .b64 retval0;
800; CHECK:      call.uni (retval0),
801; CHECK-NEXT: test_i56,
802; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
803; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
804; CHECK-NEXT: ret;
805define i56 @test_i56(i56 %a) {
806       %r = tail call i56 @test_i56(i56 %a);
807       ret i56 %r;
808}
809
810; CHECK: .func  (.param .b64 func_retval0)
811; CHECK-LABEL: test_i57(
812; CHECK-NEXT: .param .b64 test_i57_param_0
813; CHECK:      ld.param.u64    {{%rd[0-9]+}}, [test_i57_param_0];
814; CHECK:      .param .b64 param0;
815; CHECK:      st.param.b64    [param0], {{%rd[0-9]+}};
816; CHECK:      .param .b64 retval0;
817; CHECK:      call.uni (retval0),
818; CHECK-NEXT: test_i57,
819; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0];
820; CHECK:      st.param.b64    [func_retval0], {{%rd[0-9]+}};
821; CHECK-NEXT: ret;
822define i57 @test_i57(i57 %a) {
823       %r = tail call i57 @test_i57(i57 %a);
824       ret i57 %r;
825}
826
827; CHECK: .func  (.param .b64 func_retval0)
828; CHECK-LABEL: test_i64(
829; CHECK-NEXT: .param .b64 test_i64_param_0
830; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_i64_param_0];
831; CHECK:      .param .b64 param0;
832; CHECK:      st.param.b64    [param0], [[E]];
833; CHECK:      .param .b64 retval0;
834; CHECK:      call.uni (retval0),
835; CHECK-NEXT: test_i64,
836; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0];
837; CHECK:      st.param.b64    [func_retval0], [[R]];
838; CHECK-NEXT: ret;
839define i64 @test_i64(i64 %a) {
840       %r = tail call i64 @test_i64(i64 %a);
841       ret i64 %r;
842}
843
844; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
845; CHECK-LABEL: test_v3i64(
846; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
847; CHECK-DAG:  ld.param.u64     [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
848; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
849; CHECK:      .param .align 32 .b8 param0[32];
850; CHECK:      st.param.v2.b64  [param0], {[[E0]], [[E1]]};
851; CHECK:      st.param.b64     [param0+16], [[E2]];
852; CHECK:      .param .align 32 .b8 retval0[32];
853; CHECK:      call.uni (retval0),
854; CHECK-NEXT: test_v3i64,
855; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
856; CHECK:      ld.param.b64     [[RE2:%rd[0-9]+]], [retval0+16];
857; CHECK-DAG:  st.param.v2.b64  [func_retval0], {[[RE0]], [[RE1]]};
858; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
859; CHECK-DAG:  st.param.v2.b64  [func_retval0], {[[RE0]], [[RE1]]};
860; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
861; CHECK-NEXT: ret;
862define <3 x i64> @test_v3i64(<3 x i64> %a) {
863       %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
864       ret <3 x i64> %r;
865}
866
867; For i64 vector loads are limited by PTX to 2 elements.
868; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
869; CHECK-LABEL: test_v4i64(
870; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
871; CHECK-DAG:  ld.param.v2.u64  {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
872; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
873; CHECK:      .param .align 32 .b8 param0[32];
874; CHECK:      st.param.v2.b64  [param0], {[[E0]], [[E1]]};
875; CHECK:      st.param.v2.b64  [param0+16], {[[E2]], [[E3]]};
876; CHECK:      .param .align 32 .b8 retval0[32];
877; CHECK:      call.uni (retval0),
878; CHECK-NEXT: test_v4i64,
879; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
880; CHECK:      ld.param.v2.b64  {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
881; CHECK-DAG:  st.param.v2.b64  [func_retval0+16], {[[RE2]], [[RE3]]};
882; CHECK-DAG:  st.param.v2.b64  [func_retval0], {[[RE0]], [[RE1]]};
883; CHECK-NEXT: ret;
884define <4 x i64> @test_v4i64(<4 x i64> %a) {
885       %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
886       ret <4 x i64> %r;
887}
888
889; Aggregates, on the other hand, do not get extended.
890
891; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
892; CHECK-LABEL: test_s_i1(
893; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
894; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
895; CHECK:      .param .align 1 .b8 param0[1];
896; CHECK:      st.param.b8    [param0], [[A]]
897; CHECK:      .param .align 1 .b8 retval0[1];
898; CHECK:      call.uni
899; CHECK-NEXT: test_s_i1,
900; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0];
901; CHECK:      st.param.b8    [func_retval0], [[R]];
902; CHECK-NEXT: ret;
903define %s_i1 @test_s_i1(%s_i1 %a) {
904       %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
905       ret %s_i1 %r;
906}
907
908; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
909; CHECK-LABEL: test_s_i8(
910; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
911; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
912; CHECK:      .param .align 1 .b8 param0[1];
913; CHECK:      st.param.b8    [param0], [[A]]
914; CHECK:      .param .align 1 .b8 retval0[1];
915; CHECK:      call.uni
916; CHECK-NEXT: test_s_i8,
917; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0];
918; CHECK:      st.param.b8    [func_retval0], [[R]];
919; CHECK-NEXT: ret;
920define %s_i8 @test_s_i8(%s_i8 %a) {
921       %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
922       ret %s_i8 %r;
923}
924
925; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
926; CHECK-LABEL: test_s_i16(
927; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
928; CHECK:      ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
929; CHECK:      .param .align 2 .b8 param0[2];
930; CHECK:      st.param.b16    [param0], [[A]]
931; CHECK:      .param .align 2 .b8 retval0[2];
932; CHECK:      call.uni
933; CHECK-NEXT: test_s_i16,
934; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
935; CHECK:      st.param.b16    [func_retval0], [[R]];
936; CHECK-NEXT: ret;
937define %s_i16 @test_s_i16(%s_i16 %a) {
938       %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
939       ret %s_i16 %r;
940}
941
942; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
943; CHECK-LABEL: test_s_f16(
944; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
945; CHECK:      ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
946; CHECK:      .param .align 2 .b8 param0[2];
947; CHECK:      st.param.b16    [param0], [[A]]
948; CHECK:      .param .align 2 .b8 retval0[2];
949; CHECK:      call.uni
950; CHECK-NEXT: test_s_f16,
951; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0];
952; CHECK:      st.param.b16    [func_retval0], [[R]];
953; CHECK-NEXT: ret;
954define %s_f16 @test_s_f16(%s_f16 %a) {
955       %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
956       ret %s_f16 %r;
957}
958
959; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
960; CHECK-LABEL: test_s_i32(
961; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
962; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_s_i32_param_0];
963; CHECK:      .param .align 4 .b8 param0[4]
964; CHECK:      st.param.b32    [param0], [[E]];
965; CHECK:      .param .align 4 .b8 retval0[4];
966; CHECK:      call.uni (retval0),
967; CHECK-NEXT: test_s_i32,
968; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0];
969; CHECK:      st.param.b32    [func_retval0], [[R]];
970; CHECK-NEXT: ret;
971define %s_i32 @test_s_i32(%s_i32 %a) {
972       %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
973       ret %s_i32 %r;
974}
975
976; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
977; CHECK-LABEL: test_s_f32(
978; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
979; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_s_f32_param_0];
980; CHECK:      .param .align 4 .b8 param0[4]
981; CHECK:      st.param.f32    [param0], [[E]];
982; CHECK:      .param .align 4 .b8 retval0[4];
983; CHECK:      call.uni (retval0),
984; CHECK-NEXT: test_s_f32,
985; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0];
986; CHECK:      st.param.f32    [func_retval0], [[R]];
987; CHECK-NEXT: ret;
988define %s_f32 @test_s_f32(%s_f32 %a) {
989       %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
990       ret %s_f32 %r;
991}
992
993; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
994; CHECK-LABEL: test_s_i64(
995; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
996; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_s_i64_param_0];
997; CHECK:      .param .align 8 .b8 param0[8];
998; CHECK:      st.param.b64    [param0], [[E]];
999; CHECK:      .param .align 8 .b8 retval0[8];
1000; CHECK:      call.uni (retval0),
1001; CHECK-NEXT: test_s_i64,
1002; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0];
1003; CHECK:      st.param.b64    [func_retval0], [[R]];
1004; CHECK-NEXT: ret;
1005define %s_i64 @test_s_i64(%s_i64 %a) {
1006       %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
1007       ret %s_i64 %r;
1008}
1009
1010; Fields that have different types, but identical sizes are not vectorized.
1011; CHECK: .func  (.param .align 8 .b8 func_retval0[24])
1012; CHECK-LABEL: test_s_i32f32(
1013; CHECK:        .param .align 8 .b8 test_s_i32f32_param_0[24]
1014; CHECK-DAG:    ld.param.u64    [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
1015; CHECK-DAG:    ld.param.f32    [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
1016; CHECK-DAG:    ld.param.u32    [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
1017; CHECK-DAG:    ld.param.f32    [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
1018; CHECK-DAG:    ld.param.u32    [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
1019; CHECK:        .param .align 8 .b8 param0[24];
1020; CHECK-DAG:    st.param.b32    [param0], [[E0]];
1021; CHECK-DAG:    st.param.f32    [param0+4], [[E1]];
1022; CHECK-DAG:    st.param.b32    [param0+8], [[E2]];
1023; CHECK-DAG:    st.param.f32    [param0+12], [[E3]];
1024; CHECK-DAG:    st.param.b64    [param0+16], [[E4]];
1025; CHECK:        .param .align 8 .b8 retval0[24];
1026; CHECK:        call.uni (retval0),
1027; CHECK-NEXT:   test_s_i32f32,
1028; CHECK-DAG:    ld.param.b32    [[RE0:%r[0-9]+]], [retval0];
1029; CHECK-DAG:    ld.param.f32    [[RE1:%f[0-9]+]], [retval0+4];
1030; CHECK-DAG:    ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
1031; CHECK-DAG:    ld.param.f32    [[RE3:%f[0-9]+]], [retval0+12];
1032; CHECK-DAG:    ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
1033; CHECK-DAG:    st.param.b32    [func_retval0], [[RE0]];
1034; CHECK-DAG:    st.param.f32    [func_retval0+4], [[RE1]];
1035; CHECK-DAG:    st.param.b32    [func_retval0+8], [[RE2]];
1036; CHECK-DAG:    st.param.f32    [func_retval0+12], [[RE3]];
1037; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
1038; CHECK:        ret;
1039define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
1040       %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
1041       ret %s_i32f32 %r;
1042}
1043
1044; We do vectorize consecutive fields with matching types.
1045; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[24])
1046; CHECK-LABEL: test_s_i32x4(
1047; CHECK:        .param .align 8 .b8 test_s_i32x4_param_0[24]
1048; CHECK-DAG:    ld.param.u64    [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
1049; CHECK-DAG:    ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
1050; CHECK-DAG:    ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
1051; CHECK:        .param .align 8 .b8 param0[24];
1052; CHECK:        st.param.v2.b32 [param0], {[[E0]], [[E1]]};
1053; CHECK:        st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
1054; CHECK:        st.param.b64    [param0+16], [[E4]];
1055; CHECK:        .param .align 8 .b8 retval0[24];
1056; CHECK:        call.uni (retval0),
1057; CHECK-NEXT:   test_s_i32x4,
1058; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
1059; CHECK:        ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
1060; CHECK:        ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
1061; CHECK-DAG:    st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]};
1062; CHECK-DAG:    st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
1063; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
1064; CHECK:        ret;
1065
1066define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
1067       %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
1068       ret %s_i32x4 %r;
1069}
1070
1071; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[32])
1072; CHECK-LABEL: test_s_i1i32x4(
1073; CHECK:        .param .align 8 .b8 test_s_i1i32x4_param_0[32]
1074; CHECK:        ld.param.u64    [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
1075; CHECK:        ld.param.u32    [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
1076; CHECK:        ld.param.u32    [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
1077; CHECK:        ld.param.u8     [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
1078; CHECK:        ld.param.v2.u32         {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
1079; CHECK:        .param .align 8 .b8 param0[32];
1080; CHECK:        st.param.v2.b32 [param0], {[[E0]], [[E1]]};
1081; CHECK:        st.param.b8     [param0+8], [[E2]];
1082; CHECK:        st.param.b32    [param0+12], [[E3]];
1083; CHECK:        st.param.b32    [param0+16], [[E4]];
1084; CHECK:        st.param.b64    [param0+24], [[E5]];
1085; CHECK:        .param .align 8 .b8 retval0[32];
1086; CHECK:        call.uni (retval0),
1087; CHECK:        test_s_i1i32x4,
1088; CHECK:        (
1089; CHECK:        param0
1090; CHECK:        );
1091; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
1092; CHECK:        ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+8];
1093; CHECK:        ld.param.b32    [[RE3:%r[0-9]+]], [retval0+12];
1094; CHECK:        ld.param.b32    [[RE4:%r[0-9]+]], [retval0+16];
1095; CHECK:        ld.param.b64    [[RE5:%rd[0-9]+]], [retval0+24];
1096; CHECK:        st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]};
1097; CHECK:        st.param.b8     [func_retval0+8], [[RE2]];
1098; CHECK:        st.param.b32    [func_retval0+12], [[RE3]];
1099; CHECK:        st.param.b32    [func_retval0+16], [[RE4]];
1100; CHECK:        st.param.b64    [func_retval0+24], [[RE5]];
1101; CHECK:        ret;
1102
1103define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
1104       %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
1105       ret %s_i8i32x4 %r;
1106}
1107
1108; -- All loads/stores from parameters aligned by one must be done one
1109; -- byte at a time.
1110; CHECK:.visible .func  (.param .align 1 .b8 func_retval0[25])
1111; CHECK-LABEL: test_s_i1i32x4p(
1112; CHECK-DAG:        .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
1113; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+24];
1114; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+23];
1115; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+22];
1116; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+21];
1117; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+20];
1118; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+19];
1119; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+18];
1120; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+17];
1121; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+16];
1122; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+15];
1123; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+14];
1124; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+13];
1125; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+12];
1126; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+11];
1127; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+10];
1128; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+9];
1129; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+8];
1130; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+7];
1131; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+6];
1132; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+5];
1133; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+4];
1134; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+3];
1135; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+2];
1136; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+1];
1137; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0];
1138; CHECK:        .param .align 1 .b8 param0[25];
1139; CHECK-DAG:        st.param.b8     [param0],
1140; CHECK-DAG:        st.param.b8     [param0+1],
1141; CHECK-DAG:        st.param.b8     [param0+2],
1142; CHECK-DAG:        st.param.b8     [param0+3],
1143; CHECK-DAG:        st.param.b8     [param0+4],
1144; CHECK-DAG:        st.param.b8     [param0+5],
1145; CHECK-DAG:        st.param.b8     [param0+6],
1146; CHECK-DAG:        st.param.b8     [param0+7],
1147; CHECK-DAG:        st.param.b8     [param0+8],
1148; CHECK-DAG:        st.param.b8     [param0+9],
1149; CHECK-DAG:        st.param.b8     [param0+10],
1150; CHECK-DAG:        st.param.b8     [param0+11],
1151; CHECK-DAG:        st.param.b8     [param0+12],
1152; CHECK-DAG:        st.param.b8     [param0+13],
1153; CHECK-DAG:        st.param.b8     [param0+14],
1154; CHECK-DAG:        st.param.b8     [param0+15],
1155; CHECK-DAG:        st.param.b8     [param0+16],
1156; CHECK-DAG:        st.param.b8     [param0+17],
1157; CHECK-DAG:        st.param.b8     [param0+18],
1158; CHECK-DAG:        st.param.b8     [param0+19],
1159; CHECK-DAG:        st.param.b8     [param0+20],
1160; CHECK-DAG:        st.param.b8     [param0+21],
1161; CHECK-DAG:        st.param.b8     [param0+22],
1162; CHECK-DAG:        st.param.b8     [param0+23],
1163; CHECK-DAG:        st.param.b8     [param0+24],
1164; CHECK:            .param .align 1 .b8 retval0[25];
1165; CHECK:            call.uni (retval0),
1166; CHECK-NEXT:       test_s_i1i32x4p,
1167; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0];
1168; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
1169; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
1170; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+3];
1171; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+4];
1172; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+5];
1173; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+6];
1174; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+7];
1175; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
1176; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+9];
1177; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+10];
1178; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+11];
1179; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+12];
1180; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+13];
1181; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+14];
1182; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+15];
1183; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+16];
1184; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+17];
1185; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+18];
1186; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+19];
1187; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+20];
1188; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+21];
1189; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+22];
1190; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+23];
1191; CHECK-DAG:        ld.param.b8 %rs{{[0-9]+}}, [retval0+24];
1192; CHECK:            } // callseq
1193; CHECK-DAG:        st.param.b8     [func_retval0],
1194; CHECK-DAG:        st.param.b8     [func_retval0+1],
1195; CHECK-DAG:        st.param.b8     [func_retval0+2],
1196; CHECK-DAG:        st.param.b8     [func_retval0+3],
1197; CHECK-DAG:        st.param.b8     [func_retval0+4],
1198; CHECK-DAG:        st.param.b8     [func_retval0+5],
1199; CHECK-DAG:        st.param.b8     [func_retval0+6],
1200; CHECK-DAG:        st.param.b8     [func_retval0+7],
1201; CHECK-DAG:        st.param.b8     [func_retval0+8],
1202; CHECK-DAG:        st.param.b8     [func_retval0+9],
1203; CHECK-DAG:        st.param.b8     [func_retval0+10],
1204; CHECK-DAG:        st.param.b8     [func_retval0+11],
1205; CHECK-DAG:        st.param.b8     [func_retval0+12],
1206; CHECK-DAG:        st.param.b8     [func_retval0+13],
1207; CHECK-DAG:        st.param.b8     [func_retval0+14],
1208; CHECK-DAG:        st.param.b8     [func_retval0+15],
1209; CHECK-DAG:        st.param.b8     [func_retval0+16],
1210; CHECK-DAG:        st.param.b8     [func_retval0+17],
1211; CHECK-DAG:        st.param.b8     [func_retval0+18],
1212; CHECK-DAG:        st.param.b8     [func_retval0+19],
1213; CHECK-DAG:        st.param.b8     [func_retval0+20],
1214; CHECK-DAG:        st.param.b8     [func_retval0+21],
1215; CHECK-DAG:        st.param.b8     [func_retval0+22],
1216; CHECK-DAG:        st.param.b8     [func_retval0+23],
1217; CHECK-DAG:        st.param.b8     [func_retval0+24],
1218
1219define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
1220       %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
1221       ret %s_i8i32x4p %r;
1222}
1223
1224; Check that we can vectorize loads that span multiple aggregate fields.
1225; CHECK:.visible .func  (.param .align 16 .b8 func_retval0[80])
1226; CHECK-LABEL: test_s_crossfield(
1227; CHECK:        .param .align 16 .b8 test_s_crossfield_param_0[80]
1228; CHECK:        ld.param.u32    [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
1229; CHECK:        ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
1230; CHECK:        ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
1231; CHECK:        ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
1232; CHECK:        ld.param.u32    [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
1233; CHECK:        ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
1234; CHECK:        .param .align 16 .b8 param0[80];
1235; CHECK:        st.param.v2.b32 [param0], {[[E0]], [[E1]]};
1236; CHECK:        st.param.b32    [param0+8], [[E2]];
1237; CHECK:        st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
1238; CHECK:        st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
1239; CHECK:        st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
1240; CHECK:        st.param.b32    [param0+64], [[E15]];
1241; CHECK:        .param .align 16 .b8 retval0[80];
1242; CHECK:        call.uni (retval0),
1243; CHECK:        test_s_crossfield,
1244; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
1245; CHECK:        ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
1246; CHECK:        ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
1247; CHECK:        ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
1248; CHECK:        ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
1249; CHECK:        ld.param.b32    [[RE15:%r[0-9]+]], [retval0+64];
1250; CHECK:        st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]};
1251; CHECK:        st.param.b32    [func_retval0+8], [[RE2]];
1252; CHECK:        st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
1253; CHECK:        st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
1254; CHECK:        st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
1255; CHECK:        st.param.b32    [func_retval0+64], [[RE15]];
1256; CHECK:        ret;
1257
1258define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
1259       %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
1260       ret %s_crossfield %r;
1261}
1262