xref: /llvm-project/llvm/test/CodeGen/NVPTX/rotate.ll (revision 0f0a96b8621fcc8e1d6b6a3d047c263bb17a7f39)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_20 | FileCheck --check-prefix=SM20 %s
3; RUN: llc < %s --mtriple=nvptx64 -mcpu=sm_35 | FileCheck --check-prefix=SM35 %s
4; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
5; RUN: %if ptxas %{ llc < %s --mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
6
7
8declare i32 @llvm.nvvm.rotate.b32(i32, i32)
9declare i64 @llvm.nvvm.rotate.b64(i64, i32)
10declare i64 @llvm.nvvm.rotate.right.b64(i64, i32)
11
12declare i64 @llvm.fshl.i64(i64, i64, i64)
13declare i64 @llvm.fshr.i64(i64, i64, i64)
14declare i32 @llvm.fshl.i32(i32, i32, i32)
15declare i32 @llvm.fshr.i32(i32, i32, i32)
16
17
18; SM20: rotate32
19; SM35: rotate32
20define i32 @rotate32(i32 %a, i32 %b) {
21; SM20-LABEL: rotate32(
22; SM20:       {
23; SM20-NEXT:    .reg .b32 %r<9>;
24; SM20-EMPTY:
25; SM20-NEXT:  // %bb.0:
26; SM20-NEXT:    ld.param.u32 %r1, [rotate32_param_0];
27; SM20-NEXT:    ld.param.u32 %r2, [rotate32_param_1];
28; SM20-NEXT:    and.b32 %r3, %r2, 31;
29; SM20-NEXT:    shl.b32 %r4, %r1, %r3;
30; SM20-NEXT:    neg.s32 %r5, %r2;
31; SM20-NEXT:    and.b32 %r6, %r5, 31;
32; SM20-NEXT:    shr.u32 %r7, %r1, %r6;
33; SM20-NEXT:    or.b32 %r8, %r4, %r7;
34; SM20-NEXT:    st.param.b32 [func_retval0], %r8;
35; SM20-NEXT:    ret;
36;
37; SM35-LABEL: rotate32(
38; SM35:       {
39; SM35-NEXT:    .reg .b32 %r<4>;
40; SM35-EMPTY:
41; SM35-NEXT:  // %bb.0:
42; SM35-NEXT:    ld.param.u32 %r1, [rotate32_param_0];
43; SM35-NEXT:    ld.param.u32 %r2, [rotate32_param_1];
44; SM35-NEXT:    shf.l.wrap.b32 %r3, %r1, %r1, %r2;
45; SM35-NEXT:    st.param.b32 [func_retval0], %r3;
46; SM35-NEXT:    ret;
47  %val = tail call i32 @llvm.nvvm.rotate.b32(i32 %a, i32 %b)
48  ret i32 %val
49}
50
51; SM20: rotate64
52; SM35: rotate64
53define i64 @rotate64(i64 %a, i32 %b) {
54; SM20-LABEL: rotate64(
55; SM20:       {
56; SM20-NEXT:    .reg .b32 %r<5>;
57; SM20-NEXT:    .reg .b64 %rd<5>;
58; SM20-EMPTY:
59; SM20-NEXT:  // %bb.0:
60; SM20-NEXT:    ld.param.u64 %rd1, [rotate64_param_0];
61; SM20-NEXT:    ld.param.u32 %r1, [rotate64_param_1];
62; SM20-NEXT:    and.b32 %r2, %r1, 63;
63; SM20-NEXT:    shl.b64 %rd2, %rd1, %r2;
64; SM20-NEXT:    neg.s32 %r3, %r1;
65; SM20-NEXT:    and.b32 %r4, %r3, 63;
66; SM20-NEXT:    shr.u64 %rd3, %rd1, %r4;
67; SM20-NEXT:    or.b64 %rd4, %rd2, %rd3;
68; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
69; SM20-NEXT:    ret;
70;
71; SM35-LABEL: rotate64(
72; SM35:       {
73; SM35-NEXT:    .reg .b32 %r<5>;
74; SM35-NEXT:    .reg .b64 %rd<5>;
75; SM35-EMPTY:
76; SM35-NEXT:  // %bb.0:
77; SM35-NEXT:    ld.param.u64 %rd1, [rotate64_param_0];
78; SM35-NEXT:    ld.param.u32 %r1, [rotate64_param_1];
79; SM35-NEXT:    and.b32 %r2, %r1, 63;
80; SM35-NEXT:    shl.b64 %rd2, %rd1, %r2;
81; SM35-NEXT:    neg.s32 %r3, %r1;
82; SM35-NEXT:    and.b32 %r4, %r3, 63;
83; SM35-NEXT:    shr.u64 %rd3, %rd1, %r4;
84; SM35-NEXT:    or.b64 %rd4, %rd2, %rd3;
85; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
86; SM35-NEXT:    ret;
87  %val = tail call i64 @llvm.nvvm.rotate.b64(i64 %a, i32 %b)
88  ret i64 %val
89}
90
91; SM20: rotateright64
92; SM35: rotateright64
93define i64 @rotateright64(i64 %a, i32 %b) {
94; SM20-LABEL: rotateright64(
95; SM20:       {
96; SM20-NEXT:    .reg .b32 %r<5>;
97; SM20-NEXT:    .reg .b64 %rd<5>;
98; SM20-EMPTY:
99; SM20-NEXT:  // %bb.0:
100; SM20-NEXT:    ld.param.u64 %rd1, [rotateright64_param_0];
101; SM20-NEXT:    ld.param.u32 %r1, [rotateright64_param_1];
102; SM20-NEXT:    and.b32 %r2, %r1, 63;
103; SM20-NEXT:    shr.u64 %rd2, %rd1, %r2;
104; SM20-NEXT:    neg.s32 %r3, %r1;
105; SM20-NEXT:    and.b32 %r4, %r3, 63;
106; SM20-NEXT:    shl.b64 %rd3, %rd1, %r4;
107; SM20-NEXT:    or.b64 %rd4, %rd2, %rd3;
108; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
109; SM20-NEXT:    ret;
110;
111; SM35-LABEL: rotateright64(
112; SM35:       {
113; SM35-NEXT:    .reg .b32 %r<5>;
114; SM35-NEXT:    .reg .b64 %rd<5>;
115; SM35-EMPTY:
116; SM35-NEXT:  // %bb.0:
117; SM35-NEXT:    ld.param.u64 %rd1, [rotateright64_param_0];
118; SM35-NEXT:    ld.param.u32 %r1, [rotateright64_param_1];
119; SM35-NEXT:    and.b32 %r2, %r1, 63;
120; SM35-NEXT:    shr.u64 %rd2, %rd1, %r2;
121; SM35-NEXT:    neg.s32 %r3, %r1;
122; SM35-NEXT:    and.b32 %r4, %r3, 63;
123; SM35-NEXT:    shl.b64 %rd3, %rd1, %r4;
124; SM35-NEXT:    or.b64 %rd4, %rd2, %rd3;
125; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
126; SM35-NEXT:    ret;
127  %val = tail call i64 @llvm.nvvm.rotate.right.b64(i64 %a, i32 %b)
128  ret i64 %val
129}
130
131; SM20: rotl0
132; SM35: rotl0
133define i32 @rotl0(i32 %x) {
134; SM20-LABEL: rotl0(
135; SM20:       {
136; SM20-NEXT:    .reg .b32 %r<5>;
137; SM20-EMPTY:
138; SM20-NEXT:  // %bb.0:
139; SM20-NEXT:    ld.param.u32 %r1, [rotl0_param_0];
140; SM20-NEXT:    shr.u32 %r2, %r1, 24;
141; SM20-NEXT:    shl.b32 %r3, %r1, 8;
142; SM20-NEXT:    or.b32 %r4, %r3, %r2;
143; SM20-NEXT:    st.param.b32 [func_retval0], %r4;
144; SM20-NEXT:    ret;
145;
146; SM35-LABEL: rotl0(
147; SM35:       {
148; SM35-NEXT:    .reg .b32 %r<3>;
149; SM35-EMPTY:
150; SM35-NEXT:  // %bb.0:
151; SM35-NEXT:    ld.param.u32 %r1, [rotl0_param_0];
152; SM35-NEXT:    shf.l.wrap.b32 %r2, %r1, %r1, 8;
153; SM35-NEXT:    st.param.b32 [func_retval0], %r2;
154; SM35-NEXT:    ret;
155  %t0 = shl i32 %x, 8
156  %t1 = lshr i32 %x, 24
157  %t2 = or i32 %t0, %t1
158  ret i32 %t2
159}
160
161; SM35: rotl64
162define i64 @rotl64(i64 %a, i64 %n) {
163; SM20-LABEL: rotl64(
164; SM20:       {
165; SM20-NEXT:    .reg .b32 %r<5>;
166; SM20-NEXT:    .reg .b64 %rd<5>;
167; SM20-EMPTY:
168; SM20-NEXT:  // %bb.0:
169; SM20-NEXT:    ld.param.u64 %rd1, [rotl64_param_0];
170; SM20-NEXT:    ld.param.u32 %r1, [rotl64_param_1];
171; SM20-NEXT:    and.b32 %r2, %r1, 63;
172; SM20-NEXT:    shl.b64 %rd2, %rd1, %r2;
173; SM20-NEXT:    neg.s32 %r3, %r1;
174; SM20-NEXT:    and.b32 %r4, %r3, 63;
175; SM20-NEXT:    shr.u64 %rd3, %rd1, %r4;
176; SM20-NEXT:    or.b64 %rd4, %rd2, %rd3;
177; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
178; SM20-NEXT:    ret;
179;
180; SM35-LABEL: rotl64(
181; SM35:       {
182; SM35-NEXT:    .reg .b32 %r<5>;
183; SM35-NEXT:    .reg .b64 %rd<5>;
184; SM35-EMPTY:
185; SM35-NEXT:  // %bb.0:
186; SM35-NEXT:    ld.param.u64 %rd1, [rotl64_param_0];
187; SM35-NEXT:    ld.param.u32 %r1, [rotl64_param_1];
188; SM35-NEXT:    and.b32 %r2, %r1, 63;
189; SM35-NEXT:    shl.b64 %rd2, %rd1, %r2;
190; SM35-NEXT:    neg.s32 %r3, %r1;
191; SM35-NEXT:    and.b32 %r4, %r3, 63;
192; SM35-NEXT:    shr.u64 %rd3, %rd1, %r4;
193; SM35-NEXT:    or.b64 %rd4, %rd2, %rd3;
194; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
195; SM35-NEXT:    ret;
196  %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %n)
197  ret i64 %val
198}
199
200; SM35: rotl64_imm
201define i64 @rotl64_imm(i64 %a) {
202; SM20-LABEL: rotl64_imm(
203; SM20:       {
204; SM20-NEXT:    .reg .b64 %rd<5>;
205; SM20-EMPTY:
206; SM20-NEXT:  // %bb.0:
207; SM20-NEXT:    ld.param.u64 %rd1, [rotl64_imm_param_0];
208; SM20-NEXT:    shr.u64 %rd2, %rd1, 62;
209; SM20-NEXT:    shl.b64 %rd3, %rd1, 2;
210; SM20-NEXT:    or.b64 %rd4, %rd3, %rd2;
211; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
212; SM20-NEXT:    ret;
213;
214; SM35-LABEL: rotl64_imm(
215; SM35:       {
216; SM35-NEXT:    .reg .b64 %rd<5>;
217; SM35-EMPTY:
218; SM35-NEXT:  // %bb.0:
219; SM35-NEXT:    ld.param.u64 %rd1, [rotl64_imm_param_0];
220; SM35-NEXT:    shr.u64 %rd2, %rd1, 62;
221; SM35-NEXT:    shl.b64 %rd3, %rd1, 2;
222; SM35-NEXT:    or.b64 %rd4, %rd3, %rd2;
223; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
224; SM35-NEXT:    ret;
225  %val = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 66)
226  ret i64 %val
227}
228
229; SM35: rotr64
230define i64 @rotr64(i64 %a, i64 %n) {
231; SM20-LABEL: rotr64(
232; SM20:       {
233; SM20-NEXT:    .reg .b32 %r<5>;
234; SM20-NEXT:    .reg .b64 %rd<5>;
235; SM20-EMPTY:
236; SM20-NEXT:  // %bb.0:
237; SM20-NEXT:    ld.param.u64 %rd1, [rotr64_param_0];
238; SM20-NEXT:    ld.param.u32 %r1, [rotr64_param_1];
239; SM20-NEXT:    and.b32 %r2, %r1, 63;
240; SM20-NEXT:    shr.u64 %rd2, %rd1, %r2;
241; SM20-NEXT:    neg.s32 %r3, %r1;
242; SM20-NEXT:    and.b32 %r4, %r3, 63;
243; SM20-NEXT:    shl.b64 %rd3, %rd1, %r4;
244; SM20-NEXT:    or.b64 %rd4, %rd2, %rd3;
245; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
246; SM20-NEXT:    ret;
247;
248; SM35-LABEL: rotr64(
249; SM35:       {
250; SM35-NEXT:    .reg .b32 %r<5>;
251; SM35-NEXT:    .reg .b64 %rd<5>;
252; SM35-EMPTY:
253; SM35-NEXT:  // %bb.0:
254; SM35-NEXT:    ld.param.u64 %rd1, [rotr64_param_0];
255; SM35-NEXT:    ld.param.u32 %r1, [rotr64_param_1];
256; SM35-NEXT:    and.b32 %r2, %r1, 63;
257; SM35-NEXT:    shr.u64 %rd2, %rd1, %r2;
258; SM35-NEXT:    neg.s32 %r3, %r1;
259; SM35-NEXT:    and.b32 %r4, %r3, 63;
260; SM35-NEXT:    shl.b64 %rd3, %rd1, %r4;
261; SM35-NEXT:    or.b64 %rd4, %rd2, %rd3;
262; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
263; SM35-NEXT:    ret;
264  %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %n)
265  ret i64 %val
266}
267
268; SM35: rotr64_imm
269define i64 @rotr64_imm(i64 %a) {
270; SM20-LABEL: rotr64_imm(
271; SM20:       {
272; SM20-NEXT:    .reg .b64 %rd<5>;
273; SM20-EMPTY:
274; SM20-NEXT:  // %bb.0:
275; SM20-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
276; SM20-NEXT:    shl.b64 %rd2, %rd1, 62;
277; SM20-NEXT:    shr.u64 %rd3, %rd1, 2;
278; SM20-NEXT:    or.b64 %rd4, %rd3, %rd2;
279; SM20-NEXT:    st.param.b64 [func_retval0], %rd4;
280; SM20-NEXT:    ret;
281;
282; SM35-LABEL: rotr64_imm(
283; SM35:       {
284; SM35-NEXT:    .reg .b64 %rd<5>;
285; SM35-EMPTY:
286; SM35-NEXT:  // %bb.0:
287; SM35-NEXT:    ld.param.u64 %rd1, [rotr64_imm_param_0];
288; SM35-NEXT:    shl.b64 %rd2, %rd1, 62;
289; SM35-NEXT:    shr.u64 %rd3, %rd1, 2;
290; SM35-NEXT:    or.b64 %rd4, %rd3, %rd2;
291; SM35-NEXT:    st.param.b64 [func_retval0], %rd4;
292; SM35-NEXT:    ret;
293  %val = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 66)
294  ret i64 %val
295}
296
297define i32 @funnel_shift_right_32(i32 %a, i32 %b, i32 %c) {
298; SM20-LABEL: funnel_shift_right_32(
299; SM20:       {
300; SM20-NEXT:    .reg .b32 %r<11>;
301; SM20-EMPTY:
302; SM20-NEXT:  // %bb.0:
303; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_right_32_param_0];
304; SM20-NEXT:    ld.param.u32 %r2, [funnel_shift_right_32_param_2];
305; SM20-NEXT:    and.b32 %r3, %r2, 31;
306; SM20-NEXT:    ld.param.u32 %r4, [funnel_shift_right_32_param_1];
307; SM20-NEXT:    shr.u32 %r5, %r4, %r3;
308; SM20-NEXT:    shl.b32 %r6, %r1, 1;
309; SM20-NEXT:    not.b32 %r7, %r2;
310; SM20-NEXT:    and.b32 %r8, %r7, 31;
311; SM20-NEXT:    shl.b32 %r9, %r6, %r8;
312; SM20-NEXT:    or.b32 %r10, %r9, %r5;
313; SM20-NEXT:    st.param.b32 [func_retval0], %r10;
314; SM20-NEXT:    ret;
315;
316; SM35-LABEL: funnel_shift_right_32(
317; SM35:       {
318; SM35-NEXT:    .reg .b32 %r<5>;
319; SM35-EMPTY:
320; SM35-NEXT:  // %bb.0:
321; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_right_32_param_0];
322; SM35-NEXT:    ld.param.u32 %r2, [funnel_shift_right_32_param_1];
323; SM35-NEXT:    ld.param.u32 %r3, [funnel_shift_right_32_param_2];
324; SM35-NEXT:    shf.r.wrap.b32 %r4, %r2, %r1, %r3;
325; SM35-NEXT:    st.param.b32 [func_retval0], %r4;
326; SM35-NEXT:    ret;
327  %val = call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
328  ret i32 %val
329}
330
331define i32 @funnel_shift_left_32(i32 %a, i32 %b, i32 %c) {
332; SM20-LABEL: funnel_shift_left_32(
333; SM20:       {
334; SM20-NEXT:    .reg .b32 %r<11>;
335; SM20-EMPTY:
336; SM20-NEXT:  // %bb.0:
337; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_left_32_param_0];
338; SM20-NEXT:    ld.param.u32 %r2, [funnel_shift_left_32_param_2];
339; SM20-NEXT:    and.b32 %r3, %r2, 31;
340; SM20-NEXT:    shl.b32 %r4, %r1, %r3;
341; SM20-NEXT:    ld.param.u32 %r5, [funnel_shift_left_32_param_1];
342; SM20-NEXT:    shr.u32 %r6, %r5, 1;
343; SM20-NEXT:    not.b32 %r7, %r2;
344; SM20-NEXT:    and.b32 %r8, %r7, 31;
345; SM20-NEXT:    shr.u32 %r9, %r6, %r8;
346; SM20-NEXT:    or.b32 %r10, %r4, %r9;
347; SM20-NEXT:    st.param.b32 [func_retval0], %r10;
348; SM20-NEXT:    ret;
349;
350; SM35-LABEL: funnel_shift_left_32(
351; SM35:       {
352; SM35-NEXT:    .reg .b32 %r<5>;
353; SM35-EMPTY:
354; SM35-NEXT:  // %bb.0:
355; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_left_32_param_0];
356; SM35-NEXT:    ld.param.u32 %r2, [funnel_shift_left_32_param_1];
357; SM35-NEXT:    ld.param.u32 %r3, [funnel_shift_left_32_param_2];
358; SM35-NEXT:    shf.l.wrap.b32 %r4, %r2, %r1, %r3;
359; SM35-NEXT:    st.param.b32 [func_retval0], %r4;
360; SM35-NEXT:    ret;
361  %val = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
362  ret i32 %val
363}
364
365define i64 @funnel_shift_right_64(i64 %a, i64 %b, i64 %c) {
366; SM20-LABEL: funnel_shift_right_64(
367; SM20:       {
368; SM20-NEXT:    .reg .b32 %r<5>;
369; SM20-NEXT:    .reg .b64 %rd<7>;
370; SM20-EMPTY:
371; SM20-NEXT:  // %bb.0:
372; SM20-NEXT:    ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
373; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_right_64_param_2];
374; SM20-NEXT:    and.b32 %r2, %r1, 63;
375; SM20-NEXT:    ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
376; SM20-NEXT:    shr.u64 %rd3, %rd2, %r2;
377; SM20-NEXT:    shl.b64 %rd4, %rd1, 1;
378; SM20-NEXT:    not.b32 %r3, %r1;
379; SM20-NEXT:    and.b32 %r4, %r3, 63;
380; SM20-NEXT:    shl.b64 %rd5, %rd4, %r4;
381; SM20-NEXT:    or.b64 %rd6, %rd5, %rd3;
382; SM20-NEXT:    st.param.b64 [func_retval0], %rd6;
383; SM20-NEXT:    ret;
384;
385; SM35-LABEL: funnel_shift_right_64(
386; SM35:       {
387; SM35-NEXT:    .reg .b32 %r<5>;
388; SM35-NEXT:    .reg .b64 %rd<7>;
389; SM35-EMPTY:
390; SM35-NEXT:  // %bb.0:
391; SM35-NEXT:    ld.param.u64 %rd1, [funnel_shift_right_64_param_0];
392; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_right_64_param_2];
393; SM35-NEXT:    and.b32 %r2, %r1, 63;
394; SM35-NEXT:    ld.param.u64 %rd2, [funnel_shift_right_64_param_1];
395; SM35-NEXT:    shr.u64 %rd3, %rd2, %r2;
396; SM35-NEXT:    shl.b64 %rd4, %rd1, 1;
397; SM35-NEXT:    not.b32 %r3, %r1;
398; SM35-NEXT:    and.b32 %r4, %r3, 63;
399; SM35-NEXT:    shl.b64 %rd5, %rd4, %r4;
400; SM35-NEXT:    or.b64 %rd6, %rd5, %rd3;
401; SM35-NEXT:    st.param.b64 [func_retval0], %rd6;
402; SM35-NEXT:    ret;
403  %val = call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
404  ret i64 %val
405}
406
407define i64 @funnel_shift_left_64(i64 %a, i64 %b, i64 %c) {
408; SM20-LABEL: funnel_shift_left_64(
409; SM20:       {
410; SM20-NEXT:    .reg .b32 %r<5>;
411; SM20-NEXT:    .reg .b64 %rd<7>;
412; SM20-EMPTY:
413; SM20-NEXT:  // %bb.0:
414; SM20-NEXT:    ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
415; SM20-NEXT:    ld.param.u32 %r1, [funnel_shift_left_64_param_2];
416; SM20-NEXT:    and.b32 %r2, %r1, 63;
417; SM20-NEXT:    shl.b64 %rd2, %rd1, %r2;
418; SM20-NEXT:    ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
419; SM20-NEXT:    shr.u64 %rd4, %rd3, 1;
420; SM20-NEXT:    not.b32 %r3, %r1;
421; SM20-NEXT:    and.b32 %r4, %r3, 63;
422; SM20-NEXT:    shr.u64 %rd5, %rd4, %r4;
423; SM20-NEXT:    or.b64 %rd6, %rd2, %rd5;
424; SM20-NEXT:    st.param.b64 [func_retval0], %rd6;
425; SM20-NEXT:    ret;
426;
427; SM35-LABEL: funnel_shift_left_64(
428; SM35:       {
429; SM35-NEXT:    .reg .b32 %r<5>;
430; SM35-NEXT:    .reg .b64 %rd<7>;
431; SM35-EMPTY:
432; SM35-NEXT:  // %bb.0:
433; SM35-NEXT:    ld.param.u64 %rd1, [funnel_shift_left_64_param_0];
434; SM35-NEXT:    ld.param.u32 %r1, [funnel_shift_left_64_param_2];
435; SM35-NEXT:    and.b32 %r2, %r1, 63;
436; SM35-NEXT:    shl.b64 %rd2, %rd1, %r2;
437; SM35-NEXT:    ld.param.u64 %rd3, [funnel_shift_left_64_param_1];
438; SM35-NEXT:    shr.u64 %rd4, %rd3, 1;
439; SM35-NEXT:    not.b32 %r3, %r1;
440; SM35-NEXT:    and.b32 %r4, %r3, 63;
441; SM35-NEXT:    shr.u64 %rd5, %rd4, %r4;
442; SM35-NEXT:    or.b64 %rd6, %rd2, %rd5;
443; SM35-NEXT:    st.param.b64 [func_retval0], %rd6;
444; SM35-NEXT:    ret;
445  %val = call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
446  ret i64 %val
447}
448
449