xref: /llvm-project/llvm/test/CodeGen/PowerPC/funnel-shift.ll (revision d1924f0474b65fe3189ffd658a12f452e4696c28)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=ppc32-- | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_32
3; RUN: llc < %s -mtriple=ppc32-- -mcpu=ppc64 | FileCheck %s --check-prefixes=CHECK,CHECK32,CHECK32_64
4; RUN: llc < %s -mtriple=powerpc64le-- | FileCheck %s --check-prefixes=CHECK,CHECK64
5
6declare i8 @llvm.fshl.i8(i8, i8, i8)
7declare i16 @llvm.fshl.i16(i16, i16, i16)
8declare i32 @llvm.fshl.i32(i32, i32, i32)
9declare i64 @llvm.fshl.i64(i64, i64, i64)
10declare i128 @llvm.fshl.i128(i128, i128, i128)
11declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
12
13declare i8 @llvm.fshr.i8(i8, i8, i8)
14declare i16 @llvm.fshr.i16(i16, i16, i16)
15declare i32 @llvm.fshr.i32(i32, i32, i32)
16declare i64 @llvm.fshr.i64(i64, i64, i64)
17declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
18
19; General case - all operands can be variables.
20
21define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
22; CHECK-LABEL: fshl_i32:
23; CHECK:       # %bb.0:
24; CHECK-NEXT:    clrlwi 5, 5, 27
25; CHECK-NEXT:    slw 3, 3, 5
26; CHECK-NEXT:    subfic 5, 5, 32
27; CHECK-NEXT:    srw 4, 4, 5
28; CHECK-NEXT:    or 3, 3, 4
29; CHECK-NEXT:    blr
30  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
31  ret i32 %f
32}
33
34define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
35; CHECK32-LABEL: fshl_i64:
36; CHECK32:       # %bb.0:
37; CHECK32-NEXT:    andi. 7, 8, 32
38; CHECK32-NEXT:    mr 7, 5
39; CHECK32-NEXT:    bne 0, .LBB1_2
40; CHECK32-NEXT:  # %bb.1:
41; CHECK32-NEXT:    mr 7, 4
42; CHECK32-NEXT:  .LBB1_2:
43; CHECK32-NEXT:    clrlwi 8, 8, 27
44; CHECK32-NEXT:    subfic 9, 8, 32
45; CHECK32-NEXT:    srw 10, 7, 9
46; CHECK32-NEXT:    bne 0, .LBB1_4
47; CHECK32-NEXT:  # %bb.3:
48; CHECK32-NEXT:    mr 4, 3
49; CHECK32-NEXT:  .LBB1_4:
50; CHECK32-NEXT:    slw 3, 4, 8
51; CHECK32-NEXT:    or 3, 3, 10
52; CHECK32-NEXT:    bne 0, .LBB1_6
53; CHECK32-NEXT:  # %bb.5:
54; CHECK32-NEXT:    mr 6, 5
55; CHECK32-NEXT:  .LBB1_6:
56; CHECK32-NEXT:    srw 4, 6, 9
57; CHECK32-NEXT:    slw 5, 7, 8
58; CHECK32-NEXT:    or 4, 5, 4
59; CHECK32-NEXT:    blr
60;
61; CHECK64-LABEL: fshl_i64:
62; CHECK64:       # %bb.0:
63; CHECK64-NEXT:    clrlwi 5, 5, 26
64; CHECK64-NEXT:    sld 3, 3, 5
65; CHECK64-NEXT:    subfic 5, 5, 64
66; CHECK64-NEXT:    srd 4, 4, 5
67; CHECK64-NEXT:    or 3, 3, 4
68; CHECK64-NEXT:    blr
69  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
70  ret i64 %f
71}
72
73define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
74; CHECK32_32-LABEL: fshl_i128:
75; CHECK32_32:       # %bb.0:
76; CHECK32_32-NEXT:    stwu 1, -32(1)
77; CHECK32_32-NEXT:    lwz 12, 52(1)
78; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
79; CHECK32_32-NEXT:    andi. 11, 12, 64
80; CHECK32_32-NEXT:    mcrf 1, 0
81; CHECK32_32-NEXT:    mr 11, 6
82; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
83; CHECK32_32-NEXT:    bne 0, .LBB2_2
84; CHECK32_32-NEXT:  # %bb.1:
85; CHECK32_32-NEXT:    mr 11, 4
86; CHECK32_32-NEXT:  .LBB2_2:
87; CHECK32_32-NEXT:    mr 30, 7
88; CHECK32_32-NEXT:    bne 1, .LBB2_4
89; CHECK32_32-NEXT:  # %bb.3:
90; CHECK32_32-NEXT:    mr 30, 5
91; CHECK32_32-NEXT:  .LBB2_4:
92; CHECK32_32-NEXT:    andi. 4, 12, 32
93; CHECK32_32-NEXT:    mr 4, 30
94; CHECK32_32-NEXT:    beq 0, .LBB2_18
95; CHECK32_32-NEXT:  # %bb.5:
96; CHECK32_32-NEXT:    beq 1, .LBB2_19
97; CHECK32_32-NEXT:  .LBB2_6:
98; CHECK32_32-NEXT:    beq 0, .LBB2_20
99; CHECK32_32-NEXT:  .LBB2_7:
100; CHECK32_32-NEXT:    mr 5, 8
101; CHECK32_32-NEXT:    beq 1, .LBB2_21
102; CHECK32_32-NEXT:  .LBB2_8:
103; CHECK32_32-NEXT:    mr 3, 5
104; CHECK32_32-NEXT:    beq 0, .LBB2_22
105; CHECK32_32-NEXT:  .LBB2_9:
106; CHECK32_32-NEXT:    clrlwi 6, 12, 27
107; CHECK32_32-NEXT:    bne 1, .LBB2_11
108; CHECK32_32-NEXT:  .LBB2_10:
109; CHECK32_32-NEXT:    mr 9, 7
110; CHECK32_32-NEXT:  .LBB2_11:
111; CHECK32_32-NEXT:    subfic 7, 6, 32
112; CHECK32_32-NEXT:    mr 12, 9
113; CHECK32_32-NEXT:    bne 0, .LBB2_13
114; CHECK32_32-NEXT:  # %bb.12:
115; CHECK32_32-NEXT:    mr 12, 5
116; CHECK32_32-NEXT:  .LBB2_13:
117; CHECK32_32-NEXT:    srw 5, 4, 7
118; CHECK32_32-NEXT:    slw 11, 11, 6
119; CHECK32_32-NEXT:    srw 0, 3, 7
120; CHECK32_32-NEXT:    slw 4, 4, 6
121; CHECK32_32-NEXT:    srw 30, 12, 7
122; CHECK32_32-NEXT:    slw 29, 3, 6
123; CHECK32_32-NEXT:    bne 1, .LBB2_15
124; CHECK32_32-NEXT:  # %bb.14:
125; CHECK32_32-NEXT:    mr 10, 8
126; CHECK32_32-NEXT:  .LBB2_15:
127; CHECK32_32-NEXT:    or 3, 11, 5
128; CHECK32_32-NEXT:    or 4, 4, 0
129; CHECK32_32-NEXT:    or 5, 29, 30
130; CHECK32_32-NEXT:    bne 0, .LBB2_17
131; CHECK32_32-NEXT:  # %bb.16:
132; CHECK32_32-NEXT:    mr 10, 9
133; CHECK32_32-NEXT:  .LBB2_17:
134; CHECK32_32-NEXT:    srw 7, 10, 7
135; CHECK32_32-NEXT:    slw 6, 12, 6
136; CHECK32_32-NEXT:    or 6, 6, 7
137; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
138; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
139; CHECK32_32-NEXT:    addi 1, 1, 32
140; CHECK32_32-NEXT:    blr
141; CHECK32_32-NEXT:  .LBB2_18:
142; CHECK32_32-NEXT:    mr 4, 11
143; CHECK32_32-NEXT:    bne 1, .LBB2_6
144; CHECK32_32-NEXT:  .LBB2_19:
145; CHECK32_32-NEXT:    mr 5, 3
146; CHECK32_32-NEXT:    bne 0, .LBB2_7
147; CHECK32_32-NEXT:  .LBB2_20:
148; CHECK32_32-NEXT:    mr 11, 5
149; CHECK32_32-NEXT:    mr 5, 8
150; CHECK32_32-NEXT:    bne 1, .LBB2_8
151; CHECK32_32-NEXT:  .LBB2_21:
152; CHECK32_32-NEXT:    mr 5, 6
153; CHECK32_32-NEXT:    mr 3, 5
154; CHECK32_32-NEXT:    bne 0, .LBB2_9
155; CHECK32_32-NEXT:  .LBB2_22:
156; CHECK32_32-NEXT:    mr 3, 30
157; CHECK32_32-NEXT:    clrlwi 6, 12, 27
158; CHECK32_32-NEXT:    beq 1, .LBB2_10
159; CHECK32_32-NEXT:    b .LBB2_11
160;
161; CHECK32_64-LABEL: fshl_i128:
162; CHECK32_64:       # %bb.0:
163; CHECK32_64-NEXT:    stwu 1, -32(1)
164; CHECK32_64-NEXT:    lwz 12, 52(1)
165; CHECK32_64-NEXT:    andi. 11, 12, 64
166; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
167; CHECK32_64-NEXT:    mcrf 1, 0
168; CHECK32_64-NEXT:    mr 11, 6
169; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
170; CHECK32_64-NEXT:    bne 0, .LBB2_2
171; CHECK32_64-NEXT:  # %bb.1:
172; CHECK32_64-NEXT:    mr 11, 4
173; CHECK32_64-NEXT:  .LBB2_2:
174; CHECK32_64-NEXT:    mr 30, 7
175; CHECK32_64-NEXT:    bne 1, .LBB2_4
176; CHECK32_64-NEXT:  # %bb.3:
177; CHECK32_64-NEXT:    mr 30, 5
178; CHECK32_64-NEXT:  .LBB2_4:
179; CHECK32_64-NEXT:    andi. 4, 12, 32
180; CHECK32_64-NEXT:    mr 4, 30
181; CHECK32_64-NEXT:    beq 0, .LBB2_18
182; CHECK32_64-NEXT:  # %bb.5:
183; CHECK32_64-NEXT:    beq 1, .LBB2_19
184; CHECK32_64-NEXT:  .LBB2_6:
185; CHECK32_64-NEXT:    beq 0, .LBB2_20
186; CHECK32_64-NEXT:  .LBB2_7:
187; CHECK32_64-NEXT:    mr 5, 8
188; CHECK32_64-NEXT:    beq 1, .LBB2_21
189; CHECK32_64-NEXT:  .LBB2_8:
190; CHECK32_64-NEXT:    mr 3, 5
191; CHECK32_64-NEXT:    beq 0, .LBB2_22
192; CHECK32_64-NEXT:  .LBB2_9:
193; CHECK32_64-NEXT:    clrlwi 6, 12, 27
194; CHECK32_64-NEXT:    bne 1, .LBB2_11
195; CHECK32_64-NEXT:  .LBB2_10:
196; CHECK32_64-NEXT:    mr 9, 7
197; CHECK32_64-NEXT:  .LBB2_11:
198; CHECK32_64-NEXT:    subfic 7, 6, 32
199; CHECK32_64-NEXT:    mr 12, 9
200; CHECK32_64-NEXT:    bne 0, .LBB2_13
201; CHECK32_64-NEXT:  # %bb.12:
202; CHECK32_64-NEXT:    mr 12, 5
203; CHECK32_64-NEXT:  .LBB2_13:
204; CHECK32_64-NEXT:    srw 5, 4, 7
205; CHECK32_64-NEXT:    slw 11, 11, 6
206; CHECK32_64-NEXT:    srw 0, 3, 7
207; CHECK32_64-NEXT:    slw 4, 4, 6
208; CHECK32_64-NEXT:    srw 30, 12, 7
209; CHECK32_64-NEXT:    slw 29, 3, 6
210; CHECK32_64-NEXT:    bne 1, .LBB2_15
211; CHECK32_64-NEXT:  # %bb.14:
212; CHECK32_64-NEXT:    mr 10, 8
213; CHECK32_64-NEXT:  .LBB2_15:
214; CHECK32_64-NEXT:    or 3, 11, 5
215; CHECK32_64-NEXT:    or 4, 4, 0
216; CHECK32_64-NEXT:    or 5, 29, 30
217; CHECK32_64-NEXT:    bne 0, .LBB2_17
218; CHECK32_64-NEXT:  # %bb.16:
219; CHECK32_64-NEXT:    mr 10, 9
220; CHECK32_64-NEXT:  .LBB2_17:
221; CHECK32_64-NEXT:    srw 7, 10, 7
222; CHECK32_64-NEXT:    slw 6, 12, 6
223; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
224; CHECK32_64-NEXT:    or 6, 6, 7
225; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
226; CHECK32_64-NEXT:    addi 1, 1, 32
227; CHECK32_64-NEXT:    blr
228; CHECK32_64-NEXT:  .LBB2_18:
229; CHECK32_64-NEXT:    mr 4, 11
230; CHECK32_64-NEXT:    bne 1, .LBB2_6
231; CHECK32_64-NEXT:  .LBB2_19:
232; CHECK32_64-NEXT:    mr 5, 3
233; CHECK32_64-NEXT:    bne 0, .LBB2_7
234; CHECK32_64-NEXT:  .LBB2_20:
235; CHECK32_64-NEXT:    mr 11, 5
236; CHECK32_64-NEXT:    mr 5, 8
237; CHECK32_64-NEXT:    bne 1, .LBB2_8
238; CHECK32_64-NEXT:  .LBB2_21:
239; CHECK32_64-NEXT:    mr 5, 6
240; CHECK32_64-NEXT:    mr 3, 5
241; CHECK32_64-NEXT:    bne 0, .LBB2_9
242; CHECK32_64-NEXT:  .LBB2_22:
243; CHECK32_64-NEXT:    mr 3, 30
244; CHECK32_64-NEXT:    clrlwi 6, 12, 27
245; CHECK32_64-NEXT:    beq 1, .LBB2_10
246; CHECK32_64-NEXT:    b .LBB2_11
247;
248; CHECK64-LABEL: fshl_i128:
249; CHECK64:       # %bb.0:
250; CHECK64-NEXT:    andi. 8, 7, 64
251; CHECK64-NEXT:    clrlwi 7, 7, 26
252; CHECK64-NEXT:    subfic 8, 7, 64
253; CHECK64-NEXT:    iseleq 5, 6, 5
254; CHECK64-NEXT:    iseleq 6, 3, 6
255; CHECK64-NEXT:    iseleq 3, 4, 3
256; CHECK64-NEXT:    srd 5, 5, 8
257; CHECK64-NEXT:    sld 9, 6, 7
258; CHECK64-NEXT:    srd 6, 6, 8
259; CHECK64-NEXT:    sld 3, 3, 7
260; CHECK64-NEXT:    or 5, 9, 5
261; CHECK64-NEXT:    or 4, 3, 6
262; CHECK64-NEXT:    mr 3, 5
263; CHECK64-NEXT:    blr
264  %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
265  ret i128 %f
266}
267
268; Verify that weird types are minimally supported.
269declare i37 @llvm.fshl.i37(i37, i37, i37)
270define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
271; CHECK32_32-LABEL: fshl_i37:
272; CHECK32_32:       # %bb.0:
273; CHECK32_32-NEXT:    mflr 0
274; CHECK32_32-NEXT:    stwu 1, -32(1)
275; CHECK32_32-NEXT:    stw 0, 36(1)
276; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
277; CHECK32_32-NEXT:    .cfi_offset lr, 4
278; CHECK32_32-NEXT:    .cfi_offset r27, -20
279; CHECK32_32-NEXT:    .cfi_offset r28, -16
280; CHECK32_32-NEXT:    .cfi_offset r29, -12
281; CHECK32_32-NEXT:    .cfi_offset r30, -8
282; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
283; CHECK32_32-NEXT:    mr 27, 5
284; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
285; CHECK32_32-NEXT:    mr 28, 3
286; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
287; CHECK32_32-NEXT:    mr 29, 4
288; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
289; CHECK32_32-NEXT:    mr 30, 6
290; CHECK32_32-NEXT:    clrlwi 3, 7, 27
291; CHECK32_32-NEXT:    mr 4, 8
292; CHECK32_32-NEXT:    li 5, 0
293; CHECK32_32-NEXT:    li 6, 37
294; CHECK32_32-NEXT:    bl __umoddi3
295; CHECK32_32-NEXT:    rotlwi 5, 30, 27
296; CHECK32_32-NEXT:    rlwimi 5, 27, 27, 0, 4
297; CHECK32_32-NEXT:    andi. 3, 4, 32
298; CHECK32_32-NEXT:    mr 6, 5
299; CHECK32_32-NEXT:    bne 0, .LBB3_2
300; CHECK32_32-NEXT:  # %bb.1:
301; CHECK32_32-NEXT:    mr 6, 29
302; CHECK32_32-NEXT:  .LBB3_2:
303; CHECK32_32-NEXT:    clrlwi 4, 4, 27
304; CHECK32_32-NEXT:    subfic 7, 4, 32
305; CHECK32_32-NEXT:    srw 3, 6, 7
306; CHECK32_32-NEXT:    bne 0, .LBB3_4
307; CHECK32_32-NEXT:  # %bb.3:
308; CHECK32_32-NEXT:    mr 29, 28
309; CHECK32_32-NEXT:  .LBB3_4:
310; CHECK32_32-NEXT:    slw 8, 29, 4
311; CHECK32_32-NEXT:    or 3, 8, 3
312; CHECK32_32-NEXT:    beq 0, .LBB3_6
313; CHECK32_32-NEXT:  # %bb.5:
314; CHECK32_32-NEXT:    slwi 5, 30, 27
315; CHECK32_32-NEXT:  .LBB3_6:
316; CHECK32_32-NEXT:    srw 5, 5, 7
317; CHECK32_32-NEXT:    slw 4, 6, 4
318; CHECK32_32-NEXT:    or 4, 4, 5
319; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
320; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
321; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
322; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
323; CHECK32_32-NEXT:    lwz 0, 36(1)
324; CHECK32_32-NEXT:    addi 1, 1, 32
325; CHECK32_32-NEXT:    mtlr 0
326; CHECK32_32-NEXT:    blr
327;
328; CHECK32_64-LABEL: fshl_i37:
329; CHECK32_64:       # %bb.0:
330; CHECK32_64-NEXT:    mflr 0
331; CHECK32_64-NEXT:    stwu 1, -32(1)
332; CHECK32_64-NEXT:    stw 0, 36(1)
333; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
334; CHECK32_64-NEXT:    .cfi_offset lr, 4
335; CHECK32_64-NEXT:    .cfi_offset r27, -20
336; CHECK32_64-NEXT:    .cfi_offset r28, -16
337; CHECK32_64-NEXT:    .cfi_offset r29, -12
338; CHECK32_64-NEXT:    .cfi_offset r30, -8
339; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
340; CHECK32_64-NEXT:    mr 27, 5
341; CHECK32_64-NEXT:    li 5, 0
342; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
343; CHECK32_64-NEXT:    mr 28, 3
344; CHECK32_64-NEXT:    clrlwi 3, 7, 27
345; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
346; CHECK32_64-NEXT:    mr 29, 4
347; CHECK32_64-NEXT:    mr 4, 8
348; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
349; CHECK32_64-NEXT:    mr 30, 6
350; CHECK32_64-NEXT:    li 6, 37
351; CHECK32_64-NEXT:    bl __umoddi3
352; CHECK32_64-NEXT:    rotlwi 5, 30, 27
353; CHECK32_64-NEXT:    andi. 3, 4, 32
354; CHECK32_64-NEXT:    rlwimi 5, 27, 27, 0, 4
355; CHECK32_64-NEXT:    mr 6, 5
356; CHECK32_64-NEXT:    bne 0, .LBB3_2
357; CHECK32_64-NEXT:  # %bb.1:
358; CHECK32_64-NEXT:    mr 6, 29
359; CHECK32_64-NEXT:  .LBB3_2:
360; CHECK32_64-NEXT:    clrlwi 4, 4, 27
361; CHECK32_64-NEXT:    subfic 7, 4, 32
362; CHECK32_64-NEXT:    srw 3, 6, 7
363; CHECK32_64-NEXT:    bne 0, .LBB3_4
364; CHECK32_64-NEXT:  # %bb.3:
365; CHECK32_64-NEXT:    mr 29, 28
366; CHECK32_64-NEXT:  .LBB3_4:
367; CHECK32_64-NEXT:    slw 8, 29, 4
368; CHECK32_64-NEXT:    or 3, 8, 3
369; CHECK32_64-NEXT:    beq 0, .LBB3_6
370; CHECK32_64-NEXT:  # %bb.5:
371; CHECK32_64-NEXT:    slwi 5, 30, 27
372; CHECK32_64-NEXT:  .LBB3_6:
373; CHECK32_64-NEXT:    srw 5, 5, 7
374; CHECK32_64-NEXT:    slw 4, 6, 4
375; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
376; CHECK32_64-NEXT:    or 4, 4, 5
377; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
378; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
379; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
380; CHECK32_64-NEXT:    lwz 0, 36(1)
381; CHECK32_64-NEXT:    addi 1, 1, 32
382; CHECK32_64-NEXT:    mtlr 0
383; CHECK32_64-NEXT:    blr
384;
385; CHECK64-LABEL: fshl_i37:
386; CHECK64:       # %bb.0:
387; CHECK64-NEXT:    lis 7, 1771
388; CHECK64-NEXT:    clrldi 6, 5, 27
389; CHECK64-NEXT:    sldi 4, 4, 27
390; CHECK64-NEXT:    ori 7, 7, 15941
391; CHECK64-NEXT:    rldic 7, 7, 32, 5
392; CHECK64-NEXT:    oris 7, 7, 12398
393; CHECK64-NEXT:    ori 7, 7, 46053
394; CHECK64-NEXT:    mulhdu 6, 6, 7
395; CHECK64-NEXT:    mulli 6, 6, 37
396; CHECK64-NEXT:    sub 5, 5, 6
397; CHECK64-NEXT:    clrlwi 5, 5, 26
398; CHECK64-NEXT:    sld 3, 3, 5
399; CHECK64-NEXT:    subfic 5, 5, 64
400; CHECK64-NEXT:    srd 4, 4, 5
401; CHECK64-NEXT:    or 3, 3, 4
402; CHECK64-NEXT:    blr
403  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
404  ret i37 %f
405}
406
407; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
408
409declare i7 @llvm.fshl.i7(i7, i7, i7)
410define i7 @fshl_i7_const_fold() {
411; CHECK-LABEL: fshl_i7_const_fold:
412; CHECK:       # %bb.0:
413; CHECK-NEXT:    li 3, 67
414; CHECK-NEXT:    blr
415  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
416  ret i7 %f
417}
418
419; With constant shift amount, this is rotate + insert (missing extended mnemonics).
420
421define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
422; CHECK-LABEL: fshl_i32_const_shift:
423; CHECK:       # %bb.0:
424; CHECK-NEXT:    rotlwi 4, 4, 9
425; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
426; CHECK-NEXT:    mr 3, 4
427; CHECK-NEXT:    blr
428  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
429  ret i32 %f
430}
431
432; Check modulo math on shift amount.
433
434define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
435; CHECK-LABEL: fshl_i32_const_overshift:
436; CHECK:       # %bb.0:
437; CHECK-NEXT:    rotlwi 4, 4, 9
438; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
439; CHECK-NEXT:    mr 3, 4
440; CHECK-NEXT:    blr
441  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
442  ret i32 %f
443}
444
445; 64-bit should also work.
446
447define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
448; CHECK32-LABEL: fshl_i64_const_overshift:
449; CHECK32:       # %bb.0:
450; CHECK32-NEXT:    rotlwi 6, 6, 9
451; CHECK32-NEXT:    rotlwi 3, 5, 9
452; CHECK32-NEXT:    rlwimi 6, 5, 9, 0, 22
453; CHECK32-NEXT:    rlwimi 3, 4, 9, 0, 22
454; CHECK32-NEXT:    mr 4, 6
455; CHECK32-NEXT:    blr
456;
457; CHECK64-LABEL: fshl_i64_const_overshift:
458; CHECK64:       # %bb.0:
459; CHECK64-NEXT:    rotldi 4, 4, 41
460; CHECK64-NEXT:    rldimi 4, 3, 41, 0
461; CHECK64-NEXT:    mr 3, 4
462; CHECK64-NEXT:    blr
463  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
464  ret i64 %f
465}
466
467; This should work without any node-specific logic.
468
469define i8 @fshl_i8_const_fold() {
470; CHECK-LABEL: fshl_i8_const_fold:
471; CHECK:       # %bb.0:
472; CHECK-NEXT:    li 3, 128
473; CHECK-NEXT:    blr
474  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
475  ret i8 %f
476}
477
478; Repeat everything for funnel shift right.
479
480; General case - all operands can be variables.
481
482define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
483; CHECK-LABEL: fshr_i32:
484; CHECK:       # %bb.0:
485; CHECK-NEXT:    clrlwi 5, 5, 27
486; CHECK-NEXT:    srw 4, 4, 5
487; CHECK-NEXT:    subfic 5, 5, 32
488; CHECK-NEXT:    slw 3, 3, 5
489; CHECK-NEXT:    or 3, 3, 4
490; CHECK-NEXT:    blr
491  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
492  ret i32 %f
493}
494
495define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
496; CHECK32-LABEL: fshr_i64:
497; CHECK32:       # %bb.0:
498; CHECK32-NEXT:    andi. 7, 8, 32
499; CHECK32-NEXT:    mr 7, 5
500; CHECK32-NEXT:    beq 0, .LBB10_2
501; CHECK32-NEXT:  # %bb.1:
502; CHECK32-NEXT:    mr 7, 4
503; CHECK32-NEXT:  .LBB10_2:
504; CHECK32-NEXT:    clrlwi 8, 8, 27
505; CHECK32-NEXT:    srw 10, 7, 8
506; CHECK32-NEXT:    beq 0, .LBB10_4
507; CHECK32-NEXT:  # %bb.3:
508; CHECK32-NEXT:    mr 4, 3
509; CHECK32-NEXT:  .LBB10_4:
510; CHECK32-NEXT:    subfic 9, 8, 32
511; CHECK32-NEXT:    slw 3, 4, 9
512; CHECK32-NEXT:    or 3, 3, 10
513; CHECK32-NEXT:    beq 0, .LBB10_6
514; CHECK32-NEXT:  # %bb.5:
515; CHECK32-NEXT:    mr 6, 5
516; CHECK32-NEXT:  .LBB10_6:
517; CHECK32-NEXT:    srw 4, 6, 8
518; CHECK32-NEXT:    slw 5, 7, 9
519; CHECK32-NEXT:    or 4, 5, 4
520; CHECK32-NEXT:    blr
521;
522; CHECK64-LABEL: fshr_i64:
523; CHECK64:       # %bb.0:
524; CHECK64-NEXT:    clrlwi 5, 5, 26
525; CHECK64-NEXT:    srd 4, 4, 5
526; CHECK64-NEXT:    subfic 5, 5, 64
527; CHECK64-NEXT:    sld 3, 3, 5
528; CHECK64-NEXT:    or 3, 3, 4
529; CHECK64-NEXT:    blr
530  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
531  ret i64 %f
532}
533
534; Verify that weird types are minimally supported.
535declare i37 @llvm.fshr.i37(i37, i37, i37)
536define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
537; CHECK32_32-LABEL: fshr_i37:
538; CHECK32_32:       # %bb.0:
539; CHECK32_32-NEXT:    mflr 0
540; CHECK32_32-NEXT:    stwu 1, -32(1)
541; CHECK32_32-NEXT:    stw 0, 36(1)
542; CHECK32_32-NEXT:    .cfi_def_cfa_offset 32
543; CHECK32_32-NEXT:    .cfi_offset lr, 4
544; CHECK32_32-NEXT:    .cfi_offset r27, -20
545; CHECK32_32-NEXT:    .cfi_offset r28, -16
546; CHECK32_32-NEXT:    .cfi_offset r29, -12
547; CHECK32_32-NEXT:    .cfi_offset r30, -8
548; CHECK32_32-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
549; CHECK32_32-NEXT:    mr 27, 5
550; CHECK32_32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
551; CHECK32_32-NEXT:    mr 28, 3
552; CHECK32_32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
553; CHECK32_32-NEXT:    mr 29, 4
554; CHECK32_32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
555; CHECK32_32-NEXT:    mr 30, 6
556; CHECK32_32-NEXT:    clrlwi 3, 7, 27
557; CHECK32_32-NEXT:    mr 4, 8
558; CHECK32_32-NEXT:    li 5, 0
559; CHECK32_32-NEXT:    li 6, 37
560; CHECK32_32-NEXT:    bl __umoddi3
561; CHECK32_32-NEXT:    rotlwi 5, 30, 27
562; CHECK32_32-NEXT:    addi 3, 4, 27
563; CHECK32_32-NEXT:    andi. 4, 3, 32
564; CHECK32_32-NEXT:    rlwimi 5, 27, 27, 0, 4
565; CHECK32_32-NEXT:    mr 4, 5
566; CHECK32_32-NEXT:    beq 0, .LBB11_2
567; CHECK32_32-NEXT:  # %bb.1:
568; CHECK32_32-NEXT:    mr 4, 29
569; CHECK32_32-NEXT:  .LBB11_2:
570; CHECK32_32-NEXT:    clrlwi 6, 3, 27
571; CHECK32_32-NEXT:    srw 3, 4, 6
572; CHECK32_32-NEXT:    beq 0, .LBB11_4
573; CHECK32_32-NEXT:  # %bb.3:
574; CHECK32_32-NEXT:    mr 29, 28
575; CHECK32_32-NEXT:  .LBB11_4:
576; CHECK32_32-NEXT:    subfic 7, 6, 32
577; CHECK32_32-NEXT:    slw 8, 29, 7
578; CHECK32_32-NEXT:    or 3, 8, 3
579; CHECK32_32-NEXT:    bne 0, .LBB11_6
580; CHECK32_32-NEXT:  # %bb.5:
581; CHECK32_32-NEXT:    slwi 5, 30, 27
582; CHECK32_32-NEXT:  .LBB11_6:
583; CHECK32_32-NEXT:    srw 5, 5, 6
584; CHECK32_32-NEXT:    slw 4, 4, 7
585; CHECK32_32-NEXT:    or 4, 4, 5
586; CHECK32_32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
587; CHECK32_32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
588; CHECK32_32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
589; CHECK32_32-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
590; CHECK32_32-NEXT:    lwz 0, 36(1)
591; CHECK32_32-NEXT:    addi 1, 1, 32
592; CHECK32_32-NEXT:    mtlr 0
593; CHECK32_32-NEXT:    blr
594;
595; CHECK32_64-LABEL: fshr_i37:
596; CHECK32_64:       # %bb.0:
597; CHECK32_64-NEXT:    mflr 0
598; CHECK32_64-NEXT:    stwu 1, -32(1)
599; CHECK32_64-NEXT:    stw 0, 36(1)
600; CHECK32_64-NEXT:    .cfi_def_cfa_offset 32
601; CHECK32_64-NEXT:    .cfi_offset lr, 4
602; CHECK32_64-NEXT:    .cfi_offset r27, -20
603; CHECK32_64-NEXT:    .cfi_offset r28, -16
604; CHECK32_64-NEXT:    .cfi_offset r29, -12
605; CHECK32_64-NEXT:    .cfi_offset r30, -8
606; CHECK32_64-NEXT:    stw 27, 12(1) # 4-byte Folded Spill
607; CHECK32_64-NEXT:    mr 27, 5
608; CHECK32_64-NEXT:    li 5, 0
609; CHECK32_64-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
610; CHECK32_64-NEXT:    mr 28, 3
611; CHECK32_64-NEXT:    clrlwi 3, 7, 27
612; CHECK32_64-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
613; CHECK32_64-NEXT:    mr 29, 4
614; CHECK32_64-NEXT:    mr 4, 8
615; CHECK32_64-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
616; CHECK32_64-NEXT:    mr 30, 6
617; CHECK32_64-NEXT:    li 6, 37
618; CHECK32_64-NEXT:    bl __umoddi3
619; CHECK32_64-NEXT:    rotlwi 5, 30, 27
620; CHECK32_64-NEXT:    addi 3, 4, 27
621; CHECK32_64-NEXT:    andi. 4, 3, 32
622; CHECK32_64-NEXT:    rlwimi 5, 27, 27, 0, 4
623; CHECK32_64-NEXT:    mr 4, 5
624; CHECK32_64-NEXT:    beq 0, .LBB11_2
625; CHECK32_64-NEXT:  # %bb.1:
626; CHECK32_64-NEXT:    mr 4, 29
627; CHECK32_64-NEXT:  .LBB11_2:
628; CHECK32_64-NEXT:    clrlwi 6, 3, 27
629; CHECK32_64-NEXT:    srw 3, 4, 6
630; CHECK32_64-NEXT:    beq 0, .LBB11_4
631; CHECK32_64-NEXT:  # %bb.3:
632; CHECK32_64-NEXT:    mr 29, 28
633; CHECK32_64-NEXT:  .LBB11_4:
634; CHECK32_64-NEXT:    subfic 7, 6, 32
635; CHECK32_64-NEXT:    slw 8, 29, 7
636; CHECK32_64-NEXT:    or 3, 8, 3
637; CHECK32_64-NEXT:    bne 0, .LBB11_6
638; CHECK32_64-NEXT:  # %bb.5:
639; CHECK32_64-NEXT:    slwi 5, 30, 27
640; CHECK32_64-NEXT:  .LBB11_6:
641; CHECK32_64-NEXT:    srw 5, 5, 6
642; CHECK32_64-NEXT:    slw 4, 4, 7
643; CHECK32_64-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
644; CHECK32_64-NEXT:    or 4, 4, 5
645; CHECK32_64-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
646; CHECK32_64-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
647; CHECK32_64-NEXT:    lwz 27, 12(1) # 4-byte Folded Reload
648; CHECK32_64-NEXT:    lwz 0, 36(1)
649; CHECK32_64-NEXT:    addi 1, 1, 32
650; CHECK32_64-NEXT:    mtlr 0
651; CHECK32_64-NEXT:    blr
652;
653; CHECK64-LABEL: fshr_i37:
654; CHECK64:       # %bb.0:
655; CHECK64-NEXT:    lis 7, 1771
656; CHECK64-NEXT:    clrldi 6, 5, 27
657; CHECK64-NEXT:    sldi 4, 4, 27
658; CHECK64-NEXT:    ori 7, 7, 15941
659; CHECK64-NEXT:    rldic 7, 7, 32, 5
660; CHECK64-NEXT:    oris 7, 7, 12398
661; CHECK64-NEXT:    ori 7, 7, 46053
662; CHECK64-NEXT:    mulhdu 6, 6, 7
663; CHECK64-NEXT:    mulli 6, 6, 37
664; CHECK64-NEXT:    sub 5, 5, 6
665; CHECK64-NEXT:    addi 5, 5, 27
666; CHECK64-NEXT:    clrlwi 5, 5, 26
667; CHECK64-NEXT:    srd 4, 4, 5
668; CHECK64-NEXT:    subfic 5, 5, 64
669; CHECK64-NEXT:    sld 3, 3, 5
670; CHECK64-NEXT:    or 3, 3, 4
671; CHECK64-NEXT:    blr
672  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
673  ret i37 %f
674}
675
676; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
677
678declare i7 @llvm.fshr.i7(i7, i7, i7)
679define i7 @fshr_i7_const_fold() {
680; CHECK-LABEL: fshr_i7_const_fold:
681; CHECK:       # %bb.0:
682; CHECK-NEXT:    li 3, 31
683; CHECK-NEXT:    blr
684  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
685  ret i7 %f
686}
687
688; With constant shift amount, this is rotate + insert (missing extended mnemonics).
689
690define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
691; CHECK-LABEL: fshr_i32_const_shift:
692; CHECK:       # %bb.0:
693; CHECK-NEXT:    rotlwi 4, 4, 23
694; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
695; CHECK-NEXT:    mr 3, 4
696; CHECK-NEXT:    blr
697  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
698  ret i32 %f
699}
700
701; Check modulo math on shift amount. 41-32=9.
702
703define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
704; CHECK-LABEL: fshr_i32_const_overshift:
705; CHECK:       # %bb.0:
706; CHECK-NEXT:    rotlwi 4, 4, 23
707; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
708; CHECK-NEXT:    mr 3, 4
709; CHECK-NEXT:    blr
710  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
711  ret i32 %f
712}
713
714; 64-bit should also work. 105-64 = 41.
715
716define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
717; CHECK32-LABEL: fshr_i64_const_overshift:
718; CHECK32:       # %bb.0:
719; CHECK32-NEXT:    rotlwi 6, 4, 23
720; CHECK32-NEXT:    rotlwi 5, 5, 23
721; CHECK32-NEXT:    rlwimi 6, 3, 23, 0, 8
722; CHECK32-NEXT:    rlwimi 5, 4, 23, 0, 8
723; CHECK32-NEXT:    mr 3, 6
724; CHECK32-NEXT:    mr 4, 5
725; CHECK32-NEXT:    blr
726;
727; CHECK64-LABEL: fshr_i64_const_overshift:
728; CHECK64:       # %bb.0:
729; CHECK64-NEXT:    rotldi 4, 4, 23
730; CHECK64-NEXT:    rldimi 4, 3, 23, 0
731; CHECK64-NEXT:    mr 3, 4
732; CHECK64-NEXT:    blr
733  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
734  ret i64 %f
735}
736
737; This should work without any node-specific logic.
738
739define i8 @fshr_i8_const_fold() {
740; CHECK-LABEL: fshr_i8_const_fold:
741; CHECK:       # %bb.0:
742; CHECK-NEXT:    li 3, 254
743; CHECK-NEXT:    blr
744  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
745  ret i8 %f
746}
747
748define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
749; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
750; CHECK:       # %bb.0:
751; CHECK-NEXT:    blr
752  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
753  ret i32 %f
754}
755
756define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
757; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
758; CHECK:       # %bb.0:
759; CHECK-NEXT:    mr 3, 4
760; CHECK-NEXT:    blr
761  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
762  ret i32 %f
763}
764
765define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
766; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
767; CHECK:       # %bb.0:
768; CHECK-NEXT:    blr
769  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
770  ret <4 x i32> %f
771}
772
773define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
774; CHECK32_32-LABEL: fshr_v4i32_shift_by_bitwidth:
775; CHECK32_32:       # %bb.0:
776; CHECK32_32-NEXT:    mr 6, 10
777; CHECK32_32-NEXT:    mr 5, 9
778; CHECK32_32-NEXT:    mr 4, 8
779; CHECK32_32-NEXT:    mr 3, 7
780; CHECK32_32-NEXT:    blr
781;
782; CHECK32_64-LABEL: fshr_v4i32_shift_by_bitwidth:
783; CHECK32_64:       # %bb.0:
784; CHECK32_64-NEXT:    vmr 2, 3
785; CHECK32_64-NEXT:    blr
786;
787; CHECK64-LABEL: fshr_v4i32_shift_by_bitwidth:
788; CHECK64:       # %bb.0:
789; CHECK64-NEXT:    vmr 2, 3
790; CHECK64-NEXT:    blr
791  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
792  ret <4 x i32> %f
793}
794
795