xref: /llvm-project/llvm/test/CodeGen/PowerPC/wide-scalar-shift-legalization.ll (revision 032014ef103157bfd8403418538e25f3f58efa9d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,LE,LE-64BIT
3; RUN: llc < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu   | FileCheck %s --check-prefixes=ALL,BE
4; RUN: llc < %s -mtriple=ppc32--                       | FileCheck %s --check-prefixes=ALL,LE,LE-32BIT
5
6define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
7; ALL-LABEL: lshr_4bytes:
8; ALL:       # %bb.0:
9; ALL-NEXT:    lwz 3, 0(3)
10; ALL-NEXT:    lwz 4, 0(4)
11; ALL-NEXT:    srw 3, 3, 4
12; ALL-NEXT:    stw 3, 0(5)
13; ALL-NEXT:    blr
14  %src = load i32, ptr %src.ptr, align 1
15  %bitOff = load i32, ptr %bitOff.ptr, align 1
16  %res = lshr i32 %src, %bitOff
17  store i32 %res, ptr %dst, align 1
18  ret void
19}
20define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
21; ALL-LABEL: shl_4bytes:
22; ALL:       # %bb.0:
23; ALL-NEXT:    lwz 3, 0(3)
24; ALL-NEXT:    lwz 4, 0(4)
25; ALL-NEXT:    slw 3, 3, 4
26; ALL-NEXT:    stw 3, 0(5)
27; ALL-NEXT:    blr
28  %src = load i32, ptr %src.ptr, align 1
29  %bitOff = load i32, ptr %bitOff.ptr, align 1
30  %res = shl i32 %src, %bitOff
31  store i32 %res, ptr %dst, align 1
32  ret void
33}
34define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
35; ALL-LABEL: ashr_4bytes:
36; ALL:       # %bb.0:
37; ALL-NEXT:    lwz 3, 0(3)
38; ALL-NEXT:    lwz 4, 0(4)
39; ALL-NEXT:    sraw 3, 3, 4
40; ALL-NEXT:    stw 3, 0(5)
41; ALL-NEXT:    blr
42  %src = load i32, ptr %src.ptr, align 1
43  %bitOff = load i32, ptr %bitOff.ptr, align 1
44  %res = ashr i32 %src, %bitOff
45  store i32 %res, ptr %dst, align 1
46  ret void
47}
48
49define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
50; LE-64BIT-LABEL: lshr_8bytes:
51; LE-64BIT:       # %bb.0:
52; LE-64BIT-NEXT:    ld 3, 0(3)
53; LE-64BIT-NEXT:    lwz 4, 0(4)
54; LE-64BIT-NEXT:    srd 3, 3, 4
55; LE-64BIT-NEXT:    std 3, 0(5)
56; LE-64BIT-NEXT:    blr
57;
58; BE-LABEL: lshr_8bytes:
59; BE:       # %bb.0:
60; BE-NEXT:    ld 3, 0(3)
61; BE-NEXT:    lwz 4, 4(4)
62; BE-NEXT:    srd 3, 3, 4
63; BE-NEXT:    std 3, 0(5)
64; BE-NEXT:    blr
65;
66; LE-32BIT-LABEL: lshr_8bytes:
67; LE-32BIT:       # %bb.0:
68; LE-32BIT-NEXT:    lwz 4, 4(4)
69; LE-32BIT-NEXT:    lwz 6, 4(3)
70; LE-32BIT-NEXT:    lwz 3, 0(3)
71; LE-32BIT-NEXT:    subfic 7, 4, 32
72; LE-32BIT-NEXT:    srw 6, 6, 4
73; LE-32BIT-NEXT:    addi 8, 4, -32
74; LE-32BIT-NEXT:    slw 7, 3, 7
75; LE-32BIT-NEXT:    srw 4, 3, 4
76; LE-32BIT-NEXT:    srw 3, 3, 8
77; LE-32BIT-NEXT:    or 6, 6, 7
78; LE-32BIT-NEXT:    or 3, 6, 3
79; LE-32BIT-NEXT:    stw 4, 0(5)
80; LE-32BIT-NEXT:    stw 3, 4(5)
81; LE-32BIT-NEXT:    blr
82  %src = load i64, ptr %src.ptr, align 1
83  %bitOff = load i64, ptr %bitOff.ptr, align 1
84  %res = lshr i64 %src, %bitOff
85  store i64 %res, ptr %dst, align 1
86  ret void
87}
88define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
89; LE-64BIT-LABEL: shl_8bytes:
90; LE-64BIT:       # %bb.0:
91; LE-64BIT-NEXT:    ld 3, 0(3)
92; LE-64BIT-NEXT:    lwz 4, 0(4)
93; LE-64BIT-NEXT:    sld 3, 3, 4
94; LE-64BIT-NEXT:    std 3, 0(5)
95; LE-64BIT-NEXT:    blr
96;
97; BE-LABEL: shl_8bytes:
98; BE:       # %bb.0:
99; BE-NEXT:    ld 3, 0(3)
100; BE-NEXT:    lwz 4, 4(4)
101; BE-NEXT:    sld 3, 3, 4
102; BE-NEXT:    std 3, 0(5)
103; BE-NEXT:    blr
104;
105; LE-32BIT-LABEL: shl_8bytes:
106; LE-32BIT:       # %bb.0:
107; LE-32BIT-NEXT:    lwz 4, 4(4)
108; LE-32BIT-NEXT:    lwz 6, 0(3)
109; LE-32BIT-NEXT:    lwz 3, 4(3)
110; LE-32BIT-NEXT:    subfic 7, 4, 32
111; LE-32BIT-NEXT:    slw 6, 6, 4
112; LE-32BIT-NEXT:    addi 8, 4, -32
113; LE-32BIT-NEXT:    srw 7, 3, 7
114; LE-32BIT-NEXT:    slw 4, 3, 4
115; LE-32BIT-NEXT:    slw 3, 3, 8
116; LE-32BIT-NEXT:    or 6, 6, 7
117; LE-32BIT-NEXT:    or 3, 6, 3
118; LE-32BIT-NEXT:    stw 4, 4(5)
119; LE-32BIT-NEXT:    stw 3, 0(5)
120; LE-32BIT-NEXT:    blr
121  %src = load i64, ptr %src.ptr, align 1
122  %bitOff = load i64, ptr %bitOff.ptr, align 1
123  %res = shl i64 %src, %bitOff
124  store i64 %res, ptr %dst, align 1
125  ret void
126}
127define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
128; LE-64BIT-LABEL: ashr_8bytes:
129; LE-64BIT:       # %bb.0:
130; LE-64BIT-NEXT:    ld 3, 0(3)
131; LE-64BIT-NEXT:    lwz 4, 0(4)
132; LE-64BIT-NEXT:    srad 3, 3, 4
133; LE-64BIT-NEXT:    std 3, 0(5)
134; LE-64BIT-NEXT:    blr
135;
136; BE-LABEL: ashr_8bytes:
137; BE:       # %bb.0:
138; BE-NEXT:    ld 3, 0(3)
139; BE-NEXT:    lwz 4, 4(4)
140; BE-NEXT:    srad 3, 3, 4
141; BE-NEXT:    std 3, 0(5)
142; BE-NEXT:    blr
143;
144; LE-32BIT-LABEL: ashr_8bytes:
145; LE-32BIT:       # %bb.0:
146; LE-32BIT-NEXT:    lwz 4, 4(4)
147; LE-32BIT-NEXT:    lwz 6, 0(3)
148; LE-32BIT-NEXT:    addi 7, 4, -32
149; LE-32BIT-NEXT:    cmpwi 7, 0
150; LE-32BIT-NEXT:    ble 0, .LBB5_2
151; LE-32BIT-NEXT:  # %bb.1:
152; LE-32BIT-NEXT:    sraw 3, 6, 7
153; LE-32BIT-NEXT:    b .LBB5_3
154; LE-32BIT-NEXT:  .LBB5_2:
155; LE-32BIT-NEXT:    lwz 3, 4(3)
156; LE-32BIT-NEXT:    subfic 7, 4, 32
157; LE-32BIT-NEXT:    slw 7, 6, 7
158; LE-32BIT-NEXT:    srw 3, 3, 4
159; LE-32BIT-NEXT:    or 3, 3, 7
160; LE-32BIT-NEXT:  .LBB5_3:
161; LE-32BIT-NEXT:    sraw 4, 6, 4
162; LE-32BIT-NEXT:    stw 4, 0(5)
163; LE-32BIT-NEXT:    stw 3, 4(5)
164; LE-32BIT-NEXT:    blr
165  %src = load i64, ptr %src.ptr, align 1
166  %bitOff = load i64, ptr %bitOff.ptr, align 1
167  %res = ashr i64 %src, %bitOff
168  store i64 %res, ptr %dst, align 1
169  ret void
170}
171
172define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
173; LE-64BIT-LABEL: lshr_16bytes:
174; LE-64BIT:       # %bb.0:
175; LE-64BIT-NEXT:    lwz 4, 0(4)
176; LE-64BIT-NEXT:    ld 6, 8(3)
177; LE-64BIT-NEXT:    subfic 7, 4, 64
178; LE-64BIT-NEXT:    ld 3, 0(3)
179; LE-64BIT-NEXT:    srd 3, 3, 4
180; LE-64BIT-NEXT:    sld 7, 6, 7
181; LE-64BIT-NEXT:    or 3, 3, 7
182; LE-64BIT-NEXT:    addi 7, 4, -64
183; LE-64BIT-NEXT:    srd 4, 6, 4
184; LE-64BIT-NEXT:    srd 7, 6, 7
185; LE-64BIT-NEXT:    std 4, 8(5)
186; LE-64BIT-NEXT:    or 3, 3, 7
187; LE-64BIT-NEXT:    std 3, 0(5)
188; LE-64BIT-NEXT:    blr
189;
190; BE-LABEL: lshr_16bytes:
191; BE:       # %bb.0:
192; BE-NEXT:    lwz 4, 12(4)
193; BE-NEXT:    ld 6, 0(3)
194; BE-NEXT:    ld 3, 8(3)
195; BE-NEXT:    subfic 7, 4, 64
196; BE-NEXT:    srd 3, 3, 4
197; BE-NEXT:    sld 7, 6, 7
198; BE-NEXT:    addi 8, 4, -64
199; BE-NEXT:    or 3, 3, 7
200; BE-NEXT:    srd 7, 6, 8
201; BE-NEXT:    srd 4, 6, 4
202; BE-NEXT:    or 3, 3, 7
203; BE-NEXT:    std 4, 0(5)
204; BE-NEXT:    std 3, 8(5)
205; BE-NEXT:    blr
206;
207; LE-32BIT-LABEL: lshr_16bytes:
208; LE-32BIT:       # %bb.0:
209; LE-32BIT-NEXT:    stwu 1, -48(1)
210; LE-32BIT-NEXT:    lwz 7, 0(3)
211; LE-32BIT-NEXT:    li 6, 0
212; LE-32BIT-NEXT:    lwz 8, 4(3)
213; LE-32BIT-NEXT:    lwz 9, 8(3)
214; LE-32BIT-NEXT:    lwz 3, 12(3)
215; LE-32BIT-NEXT:    lwz 4, 12(4)
216; LE-32BIT-NEXT:    stw 6, 28(1)
217; LE-32BIT-NEXT:    stw 6, 24(1)
218; LE-32BIT-NEXT:    stw 6, 20(1)
219; LE-32BIT-NEXT:    stw 6, 16(1)
220; LE-32BIT-NEXT:    rlwinm 6, 4, 29, 28, 29
221; LE-32BIT-NEXT:    stw 3, 44(1)
222; LE-32BIT-NEXT:    addi 3, 1, 32
223; LE-32BIT-NEXT:    stw 9, 40(1)
224; LE-32BIT-NEXT:    sub 3, 3, 6
225; LE-32BIT-NEXT:    stw 8, 36(1)
226; LE-32BIT-NEXT:    clrlwi 4, 4, 27
227; LE-32BIT-NEXT:    stw 7, 32(1)
228; LE-32BIT-NEXT:    subfic 9, 4, 32
229; LE-32BIT-NEXT:    lwz 6, 4(3)
230; LE-32BIT-NEXT:    lwz 7, 0(3)
231; LE-32BIT-NEXT:    lwz 8, 12(3)
232; LE-32BIT-NEXT:    srw 10, 6, 4
233; LE-32BIT-NEXT:    lwz 3, 8(3)
234; LE-32BIT-NEXT:    slw 11, 7, 9
235; LE-32BIT-NEXT:    slw 6, 6, 9
236; LE-32BIT-NEXT:    srw 8, 8, 4
237; LE-32BIT-NEXT:    slw 9, 3, 9
238; LE-32BIT-NEXT:    srw 3, 3, 4
239; LE-32BIT-NEXT:    or 3, 6, 3
240; LE-32BIT-NEXT:    stw 3, 8(5)
241; LE-32BIT-NEXT:    or 3, 9, 8
242; LE-32BIT-NEXT:    srw 4, 7, 4
243; LE-32BIT-NEXT:    stw 3, 12(5)
244; LE-32BIT-NEXT:    or 3, 11, 10
245; LE-32BIT-NEXT:    stw 4, 0(5)
246; LE-32BIT-NEXT:    stw 3, 4(5)
247; LE-32BIT-NEXT:    addi 1, 1, 48
248; LE-32BIT-NEXT:    blr
249  %src = load i128, ptr %src.ptr, align 1
250  %bitOff = load i128, ptr %bitOff.ptr, align 1
251  %res = lshr i128 %src, %bitOff
252  store i128 %res, ptr %dst, align 1
253  ret void
254}
255define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
256; LE-64BIT-LABEL: shl_16bytes:
257; LE-64BIT:       # %bb.0:
258; LE-64BIT-NEXT:    lwz 4, 0(4)
259; LE-64BIT-NEXT:    ld 6, 0(3)
260; LE-64BIT-NEXT:    subfic 7, 4, 64
261; LE-64BIT-NEXT:    ld 3, 8(3)
262; LE-64BIT-NEXT:    sld 3, 3, 4
263; LE-64BIT-NEXT:    srd 7, 6, 7
264; LE-64BIT-NEXT:    or 3, 3, 7
265; LE-64BIT-NEXT:    addi 7, 4, -64
266; LE-64BIT-NEXT:    sld 4, 6, 4
267; LE-64BIT-NEXT:    sld 7, 6, 7
268; LE-64BIT-NEXT:    std 4, 0(5)
269; LE-64BIT-NEXT:    or 3, 3, 7
270; LE-64BIT-NEXT:    std 3, 8(5)
271; LE-64BIT-NEXT:    blr
272;
273; BE-LABEL: shl_16bytes:
274; BE:       # %bb.0:
275; BE-NEXT:    lwz 4, 12(4)
276; BE-NEXT:    ld 6, 8(3)
277; BE-NEXT:    ld 3, 0(3)
278; BE-NEXT:    subfic 7, 4, 64
279; BE-NEXT:    sld 3, 3, 4
280; BE-NEXT:    srd 7, 6, 7
281; BE-NEXT:    addi 8, 4, -64
282; BE-NEXT:    or 3, 3, 7
283; BE-NEXT:    sld 7, 6, 8
284; BE-NEXT:    sld 4, 6, 4
285; BE-NEXT:    or 3, 3, 7
286; BE-NEXT:    std 4, 8(5)
287; BE-NEXT:    std 3, 0(5)
288; BE-NEXT:    blr
289;
290; LE-32BIT-LABEL: shl_16bytes:
291; LE-32BIT:       # %bb.0:
292; LE-32BIT-NEXT:    stwu 1, -48(1)
293; LE-32BIT-NEXT:    lwz 7, 0(3)
294; LE-32BIT-NEXT:    li 6, 0
295; LE-32BIT-NEXT:    lwz 8, 4(3)
296; LE-32BIT-NEXT:    lwz 9, 8(3)
297; LE-32BIT-NEXT:    lwz 3, 12(3)
298; LE-32BIT-NEXT:    lwz 4, 12(4)
299; LE-32BIT-NEXT:    stw 6, 44(1)
300; LE-32BIT-NEXT:    stw 6, 40(1)
301; LE-32BIT-NEXT:    stw 6, 36(1)
302; LE-32BIT-NEXT:    stw 6, 32(1)
303; LE-32BIT-NEXT:    rlwinm 6, 4, 29, 28, 29
304; LE-32BIT-NEXT:    stw 3, 28(1)
305; LE-32BIT-NEXT:    addi 3, 1, 16
306; LE-32BIT-NEXT:    stw 9, 24(1)
307; LE-32BIT-NEXT:    clrlwi 4, 4, 27
308; LE-32BIT-NEXT:    stw 8, 20(1)
309; LE-32BIT-NEXT:    subfic 8, 4, 32
310; LE-32BIT-NEXT:    stw 7, 16(1)
311; LE-32BIT-NEXT:    lwzux 3, 6, 3
312; LE-32BIT-NEXT:    lwz 9, 4(6)
313; LE-32BIT-NEXT:    slw 3, 3, 4
314; LE-32BIT-NEXT:    lwz 7, 8(6)
315; LE-32BIT-NEXT:    lwz 6, 12(6)
316; LE-32BIT-NEXT:    slw 11, 9, 4
317; LE-32BIT-NEXT:    srw 9, 9, 8
318; LE-32BIT-NEXT:    srw 10, 7, 8
319; LE-32BIT-NEXT:    srw 8, 6, 8
320; LE-32BIT-NEXT:    slw 7, 7, 4
321; LE-32BIT-NEXT:    slw 4, 6, 4
322; LE-32BIT-NEXT:    or 3, 3, 9
323; LE-32BIT-NEXT:    stw 4, 12(5)
324; LE-32BIT-NEXT:    or 4, 7, 8
325; LE-32BIT-NEXT:    stw 3, 0(5)
326; LE-32BIT-NEXT:    or 3, 11, 10
327; LE-32BIT-NEXT:    stw 4, 8(5)
328; LE-32BIT-NEXT:    stw 3, 4(5)
329; LE-32BIT-NEXT:    addi 1, 1, 48
330; LE-32BIT-NEXT:    blr
331  %src = load i128, ptr %src.ptr, align 1
332  %bitOff = load i128, ptr %bitOff.ptr, align 1
333  %res = shl i128 %src, %bitOff
334  store i128 %res, ptr %dst, align 1
335  ret void
336}
337define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
338; LE-64BIT-LABEL: ashr_16bytes:
339; LE-64BIT:       # %bb.0:
340; LE-64BIT-NEXT:    lwz 4, 0(4)
341; LE-64BIT-NEXT:    ld 6, 8(3)
342; LE-64BIT-NEXT:    subfic 7, 4, 64
343; LE-64BIT-NEXT:    ld 3, 0(3)
344; LE-64BIT-NEXT:    srd 3, 3, 4
345; LE-64BIT-NEXT:    sld 7, 6, 7
346; LE-64BIT-NEXT:    or 3, 3, 7
347; LE-64BIT-NEXT:    addi 7, 4, -64
348; LE-64BIT-NEXT:    srad 4, 6, 4
349; LE-64BIT-NEXT:    cmpwi 7, 1
350; LE-64BIT-NEXT:    srad 8, 6, 7
351; LE-64BIT-NEXT:    std 4, 8(5)
352; LE-64BIT-NEXT:    isellt 3, 3, 8
353; LE-64BIT-NEXT:    std 3, 0(5)
354; LE-64BIT-NEXT:    blr
355;
356; BE-LABEL: ashr_16bytes:
357; BE:       # %bb.0:
358; BE-NEXT:    lwz 4, 12(4)
359; BE-NEXT:    ld 6, 0(3)
360; BE-NEXT:    addi 7, 4, -64
361; BE-NEXT:    cmpwi 7, 1
362; BE-NEXT:    blt 0, .LBB8_2
363; BE-NEXT:  # %bb.1:
364; BE-NEXT:    srad 3, 6, 7
365; BE-NEXT:    b .LBB8_3
366; BE-NEXT:  .LBB8_2:
367; BE-NEXT:    ld 3, 8(3)
368; BE-NEXT:    subfic 7, 4, 64
369; BE-NEXT:    sld 7, 6, 7
370; BE-NEXT:    srd 3, 3, 4
371; BE-NEXT:    or 3, 3, 7
372; BE-NEXT:  .LBB8_3:
373; BE-NEXT:    srad 4, 6, 4
374; BE-NEXT:    std 3, 8(5)
375; BE-NEXT:    std 4, 0(5)
376; BE-NEXT:    blr
377;
378; LE-32BIT-LABEL: ashr_16bytes:
379; LE-32BIT:       # %bb.0:
380; LE-32BIT-NEXT:    stwu 1, -48(1)
381; LE-32BIT-NEXT:    lwz 7, 0(3)
382; LE-32BIT-NEXT:    addi 6, 1, 32
383; LE-32BIT-NEXT:    lwz 8, 4(3)
384; LE-32BIT-NEXT:    lwz 9, 8(3)
385; LE-32BIT-NEXT:    lwz 3, 12(3)
386; LE-32BIT-NEXT:    lwz 4, 12(4)
387; LE-32BIT-NEXT:    stw 3, 44(1)
388; LE-32BIT-NEXT:    srawi 3, 7, 31
389; LE-32BIT-NEXT:    stw 7, 32(1)
390; LE-32BIT-NEXT:    rlwinm 7, 4, 29, 28, 29
391; LE-32BIT-NEXT:    stw 9, 40(1)
392; LE-32BIT-NEXT:    clrlwi 4, 4, 27
393; LE-32BIT-NEXT:    stw 8, 36(1)
394; LE-32BIT-NEXT:    subfic 9, 4, 32
395; LE-32BIT-NEXT:    stw 3, 28(1)
396; LE-32BIT-NEXT:    stw 3, 24(1)
397; LE-32BIT-NEXT:    stw 3, 20(1)
398; LE-32BIT-NEXT:    stw 3, 16(1)
399; LE-32BIT-NEXT:    sub 3, 6, 7
400; LE-32BIT-NEXT:    lwz 6, 4(3)
401; LE-32BIT-NEXT:    lwz 7, 0(3)
402; LE-32BIT-NEXT:    lwz 8, 12(3)
403; LE-32BIT-NEXT:    srw 10, 6, 4
404; LE-32BIT-NEXT:    lwz 3, 8(3)
405; LE-32BIT-NEXT:    slw 11, 7, 9
406; LE-32BIT-NEXT:    slw 6, 6, 9
407; LE-32BIT-NEXT:    srw 8, 8, 4
408; LE-32BIT-NEXT:    slw 9, 3, 9
409; LE-32BIT-NEXT:    srw 3, 3, 4
410; LE-32BIT-NEXT:    or 3, 6, 3
411; LE-32BIT-NEXT:    stw 3, 8(5)
412; LE-32BIT-NEXT:    or 3, 9, 8
413; LE-32BIT-NEXT:    sraw 4, 7, 4
414; LE-32BIT-NEXT:    stw 3, 12(5)
415; LE-32BIT-NEXT:    or 3, 11, 10
416; LE-32BIT-NEXT:    stw 4, 0(5)
417; LE-32BIT-NEXT:    stw 3, 4(5)
418; LE-32BIT-NEXT:    addi 1, 1, 48
419; LE-32BIT-NEXT:    blr
420  %src = load i128, ptr %src.ptr, align 1
421  %bitOff = load i128, ptr %bitOff.ptr, align 1
422  %res = ashr i128 %src, %bitOff
423  store i128 %res, ptr %dst, align 1
424  ret void
425}
426
427define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
428; LE-64BIT-LABEL: lshr_32bytes:
429; LE-64BIT:       # %bb.0:
430; LE-64BIT-NEXT:    li 6, 16
431; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
432; LE-64BIT-NEXT:    xxlxor 2, 2, 2
433; LE-64BIT-NEXT:    addi 7, 1, -64
434; LE-64BIT-NEXT:    li 8, 32
435; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
436; LE-64BIT-NEXT:    lwz 3, 0(4)
437; LE-64BIT-NEXT:    li 4, 48
438; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
439; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
440; LE-64BIT-NEXT:    rlwinm 4, 3, 29, 27, 28
441; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
442; LE-64BIT-NEXT:    clrlwi 3, 3, 26
443; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
444; LE-64BIT-NEXT:    xori 8, 3, 63
445; LE-64BIT-NEXT:    ldux 6, 4, 7
446; LE-64BIT-NEXT:    ld 7, 16(4)
447; LE-64BIT-NEXT:    ld 9, 8(4)
448; LE-64BIT-NEXT:    ld 4, 24(4)
449; LE-64BIT-NEXT:    srd 6, 6, 3
450; LE-64BIT-NEXT:    sldi 11, 7, 1
451; LE-64BIT-NEXT:    srd 10, 9, 3
452; LE-64BIT-NEXT:    srd 7, 7, 3
453; LE-64BIT-NEXT:    sld 8, 11, 8
454; LE-64BIT-NEXT:    or 8, 10, 8
455; LE-64BIT-NEXT:    subfic 10, 3, 64
456; LE-64BIT-NEXT:    srd 3, 4, 3
457; LE-64BIT-NEXT:    sld 11, 4, 10
458; LE-64BIT-NEXT:    sld 9, 9, 10
459; LE-64BIT-NEXT:    std 3, 24(5)
460; LE-64BIT-NEXT:    std 8, 8(5)
461; LE-64BIT-NEXT:    or 6, 9, 6
462; LE-64BIT-NEXT:    or 3, 11, 7
463; LE-64BIT-NEXT:    std 6, 0(5)
464; LE-64BIT-NEXT:    std 3, 16(5)
465; LE-64BIT-NEXT:    blr
466;
467; BE-LABEL: lshr_32bytes:
468; BE:       # %bb.0:
469; BE-NEXT:    ld 6, 0(3)
470; BE-NEXT:    ld 7, 8(3)
471; BE-NEXT:    ld 8, 16(3)
472; BE-NEXT:    ld 3, 24(3)
473; BE-NEXT:    lwz 4, 28(4)
474; BE-NEXT:    li 9, 0
475; BE-NEXT:    addi 10, 1, -32
476; BE-NEXT:    std 9, -40(1)
477; BE-NEXT:    std 9, -48(1)
478; BE-NEXT:    std 9, -56(1)
479; BE-NEXT:    std 9, -64(1)
480; BE-NEXT:    std 3, -8(1)
481; BE-NEXT:    rlwinm 3, 4, 29, 27, 28
482; BE-NEXT:    neg 3, 3
483; BE-NEXT:    std 8, -16(1)
484; BE-NEXT:    std 7, -24(1)
485; BE-NEXT:    std 6, -32(1)
486; BE-NEXT:    extsw 3, 3
487; BE-NEXT:    ldux 3, 10, 3
488; BE-NEXT:    clrlwi 4, 4, 26
489; BE-NEXT:    subfic 9, 4, 64
490; BE-NEXT:    ld 6, 8(10)
491; BE-NEXT:    ld 7, 24(10)
492; BE-NEXT:    ld 8, 16(10)
493; BE-NEXT:    sld 10, 3, 9
494; BE-NEXT:    srd 3, 3, 4
495; BE-NEXT:    std 3, 0(5)
496; BE-NEXT:    srd 11, 6, 4
497; BE-NEXT:    srd 7, 7, 4
498; BE-NEXT:    sld 6, 6, 9
499; BE-NEXT:    sld 9, 8, 9
500; BE-NEXT:    srd 8, 8, 4
501; BE-NEXT:    or 10, 10, 11
502; BE-NEXT:    or 7, 9, 7
503; BE-NEXT:    or 6, 6, 8
504; BE-NEXT:    std 6, 16(5)
505; BE-NEXT:    std 7, 24(5)
506; BE-NEXT:    std 10, 8(5)
507; BE-NEXT:    blr
508;
509; LE-32BIT-LABEL: lshr_32bytes:
510; LE-32BIT:       # %bb.0:
511; LE-32BIT-NEXT:    stwu 1, -112(1)
512; LE-32BIT-NEXT:    lwz 7, 0(3)
513; LE-32BIT-NEXT:    li 6, 0
514; LE-32BIT-NEXT:    lwz 8, 4(3)
515; LE-32BIT-NEXT:    lwz 9, 8(3)
516; LE-32BIT-NEXT:    lwz 10, 12(3)
517; LE-32BIT-NEXT:    lwz 11, 16(3)
518; LE-32BIT-NEXT:    lwz 12, 20(3)
519; LE-32BIT-NEXT:    lwz 0, 24(3)
520; LE-32BIT-NEXT:    lwz 3, 28(3)
521; LE-32BIT-NEXT:    lwz 4, 28(4)
522; LE-32BIT-NEXT:    stw 6, 44(1)
523; LE-32BIT-NEXT:    stw 6, 40(1)
524; LE-32BIT-NEXT:    stw 6, 36(1)
525; LE-32BIT-NEXT:    stw 6, 32(1)
526; LE-32BIT-NEXT:    stw 6, 28(1)
527; LE-32BIT-NEXT:    stw 6, 24(1)
528; LE-32BIT-NEXT:    stw 6, 20(1)
529; LE-32BIT-NEXT:    stw 6, 16(1)
530; LE-32BIT-NEXT:    rlwinm 6, 4, 29, 27, 29
531; LE-32BIT-NEXT:    stw 3, 76(1)
532; LE-32BIT-NEXT:    addi 3, 1, 48
533; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
534; LE-32BIT-NEXT:    sub 3, 3, 6
535; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
536; LE-32BIT-NEXT:    clrlwi 4, 4, 27
537; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
538; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
539; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
540; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
541; LE-32BIT-NEXT:    subfic 30, 4, 32
542; LE-32BIT-NEXT:    stw 0, 72(1)
543; LE-32BIT-NEXT:    stw 12, 68(1)
544; LE-32BIT-NEXT:    xori 12, 4, 31
545; LE-32BIT-NEXT:    stw 11, 64(1)
546; LE-32BIT-NEXT:    stw 10, 60(1)
547; LE-32BIT-NEXT:    stw 9, 56(1)
548; LE-32BIT-NEXT:    stw 8, 52(1)
549; LE-32BIT-NEXT:    stw 7, 48(1)
550; LE-32BIT-NEXT:    lwz 6, 8(3)
551; LE-32BIT-NEXT:    lwz 7, 4(3)
552; LE-32BIT-NEXT:    lwz 8, 0(3)
553; LE-32BIT-NEXT:    srw 29, 6, 4
554; LE-32BIT-NEXT:    lwz 9, 12(3)
555; LE-32BIT-NEXT:    slw 6, 6, 30
556; LE-32BIT-NEXT:    lwz 10, 20(3)
557; LE-32BIT-NEXT:    slw 28, 8, 30
558; LE-32BIT-NEXT:    lwz 11, 16(3)
559; LE-32BIT-NEXT:    srw 27, 9, 4
560; LE-32BIT-NEXT:    lwz 0, 28(3)
561; LE-32BIT-NEXT:    srw 26, 10, 4
562; LE-32BIT-NEXT:    lwz 3, 24(3)
563; LE-32BIT-NEXT:    slw 25, 11, 30
564; LE-32BIT-NEXT:    slw 9, 9, 30
565; LE-32BIT-NEXT:    slw 10, 10, 30
566; LE-32BIT-NEXT:    slw 30, 3, 30
567; LE-32BIT-NEXT:    srw 3, 3, 4
568; LE-32BIT-NEXT:    srw 0, 0, 4
569; LE-32BIT-NEXT:    or 3, 10, 3
570; LE-32BIT-NEXT:    srw 11, 11, 4
571; LE-32BIT-NEXT:    stw 3, 24(5)
572; LE-32BIT-NEXT:    or 3, 30, 0
573; LE-32BIT-NEXT:    stw 3, 28(5)
574; LE-32BIT-NEXT:    or 3, 9, 11
575; LE-32BIT-NEXT:    stw 3, 16(5)
576; LE-32BIT-NEXT:    or 3, 25, 26
577; LE-32BIT-NEXT:    srw 8, 8, 4
578; LE-32BIT-NEXT:    srw 4, 7, 4
579; LE-32BIT-NEXT:    slwi 7, 7, 1
580; LE-32BIT-NEXT:    stw 3, 20(5)
581; LE-32BIT-NEXT:    or 3, 6, 27
582; LE-32BIT-NEXT:    slw 7, 7, 12
583; LE-32BIT-NEXT:    stw 3, 12(5)
584; LE-32BIT-NEXT:    or 3, 28, 4
585; LE-32BIT-NEXT:    stw 3, 4(5)
586; LE-32BIT-NEXT:    or 3, 29, 7
587; LE-32BIT-NEXT:    stw 8, 0(5)
588; LE-32BIT-NEXT:    stw 3, 8(5)
589; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
590; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
591; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
592; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
593; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
594; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
595; LE-32BIT-NEXT:    addi 1, 1, 112
596; LE-32BIT-NEXT:    blr
597  %src = load i256, ptr %src.ptr, align 1
598  %bitOff = load i256, ptr %bitOff.ptr, align 1
599  %res = lshr i256 %src, %bitOff
600  store i256 %res, ptr %dst, align 1
601  ret void
602}
603define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
604; LE-64BIT-LABEL: shl_32bytes:
605; LE-64BIT:       # %bb.0:
606; LE-64BIT-NEXT:    li 6, 16
607; LE-64BIT-NEXT:    lwz 4, 0(4)
608; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
609; LE-64BIT-NEXT:    addi 7, 1, -64
610; LE-64BIT-NEXT:    li 8, 48
611; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
612; LE-64BIT-NEXT:    rlwinm 3, 4, 29, 27, 28
613; LE-64BIT-NEXT:    clrlwi 4, 4, 26
614; LE-64BIT-NEXT:    neg 3, 3
615; LE-64BIT-NEXT:    stxvd2x 0, 7, 8
616; LE-64BIT-NEXT:    xxlxor 0, 0, 0
617; LE-64BIT-NEXT:    li 8, 32
618; LE-64BIT-NEXT:    extsw 3, 3
619; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
620; LE-64BIT-NEXT:    addi 6, 1, -32
621; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
622; LE-64BIT-NEXT:    stxvd2x 1, 7, 8
623; LE-64BIT-NEXT:    subfic 7, 4, 64
624; LE-64BIT-NEXT:    ldux 3, 6, 3
625; LE-64BIT-NEXT:    ld 8, 16(6)
626; LE-64BIT-NEXT:    ld 9, 24(6)
627; LE-64BIT-NEXT:    ld 6, 8(6)
628; LE-64BIT-NEXT:    srd 10, 8, 7
629; LE-64BIT-NEXT:    sld 9, 9, 4
630; LE-64BIT-NEXT:    sld 8, 8, 4
631; LE-64BIT-NEXT:    or 9, 9, 10
632; LE-64BIT-NEXT:    srd 10, 6, 7
633; LE-64BIT-NEXT:    srd 7, 3, 7
634; LE-64BIT-NEXT:    sld 6, 6, 4
635; LE-64BIT-NEXT:    sld 3, 3, 4
636; LE-64BIT-NEXT:    or 6, 6, 7
637; LE-64BIT-NEXT:    std 3, 0(5)
638; LE-64BIT-NEXT:    or 3, 8, 10
639; LE-64BIT-NEXT:    std 9, 24(5)
640; LE-64BIT-NEXT:    std 6, 8(5)
641; LE-64BIT-NEXT:    std 3, 16(5)
642; LE-64BIT-NEXT:    blr
643;
644; BE-LABEL: shl_32bytes:
645; BE:       # %bb.0:
646; BE-NEXT:    ld 6, 0(3)
647; BE-NEXT:    ld 7, 8(3)
648; BE-NEXT:    ld 8, 16(3)
649; BE-NEXT:    ld 3, 24(3)
650; BE-NEXT:    lwz 4, 28(4)
651; BE-NEXT:    li 9, 0
652; BE-NEXT:    addi 10, 1, -64
653; BE-NEXT:    std 9, -8(1)
654; BE-NEXT:    std 9, -16(1)
655; BE-NEXT:    std 9, -24(1)
656; BE-NEXT:    std 9, -32(1)
657; BE-NEXT:    std 3, -40(1)
658; BE-NEXT:    std 8, -48(1)
659; BE-NEXT:    std 7, -56(1)
660; BE-NEXT:    std 6, -64(1)
661; BE-NEXT:    rlwinm 3, 4, 29, 27, 28
662; BE-NEXT:    ldux 6, 3, 10
663; BE-NEXT:    clrlwi 4, 4, 26
664; BE-NEXT:    subfic 9, 4, 64
665; BE-NEXT:    ld 7, 16(3)
666; BE-NEXT:    ld 8, 8(3)
667; BE-NEXT:    ld 3, 24(3)
668; BE-NEXT:    sld 6, 6, 4
669; BE-NEXT:    srd 10, 7, 9
670; BE-NEXT:    sld 11, 8, 4
671; BE-NEXT:    srd 8, 8, 9
672; BE-NEXT:    srd 9, 3, 9
673; BE-NEXT:    sld 7, 7, 4
674; BE-NEXT:    sld 3, 3, 4
675; BE-NEXT:    or 10, 11, 10
676; BE-NEXT:    or 6, 6, 8
677; BE-NEXT:    or 7, 7, 9
678; BE-NEXT:    std 3, 24(5)
679; BE-NEXT:    std 7, 16(5)
680; BE-NEXT:    std 6, 0(5)
681; BE-NEXT:    std 10, 8(5)
682; BE-NEXT:    blr
683;
684; LE-32BIT-LABEL: shl_32bytes:
685; LE-32BIT:       # %bb.0:
686; LE-32BIT-NEXT:    stwu 1, -112(1)
687; LE-32BIT-NEXT:    lwz 7, 0(3)
688; LE-32BIT-NEXT:    li 6, 0
689; LE-32BIT-NEXT:    lwz 8, 4(3)
690; LE-32BIT-NEXT:    lwz 9, 8(3)
691; LE-32BIT-NEXT:    lwz 10, 12(3)
692; LE-32BIT-NEXT:    lwz 11, 16(3)
693; LE-32BIT-NEXT:    lwz 12, 20(3)
694; LE-32BIT-NEXT:    lwz 0, 24(3)
695; LE-32BIT-NEXT:    lwz 3, 28(3)
696; LE-32BIT-NEXT:    lwz 4, 28(4)
697; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
698; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
699; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
700; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
701; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
702; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
703; LE-32BIT-NEXT:    stw 6, 76(1)
704; LE-32BIT-NEXT:    stw 6, 72(1)
705; LE-32BIT-NEXT:    stw 6, 68(1)
706; LE-32BIT-NEXT:    stw 6, 64(1)
707; LE-32BIT-NEXT:    stw 6, 60(1)
708; LE-32BIT-NEXT:    stw 6, 56(1)
709; LE-32BIT-NEXT:    stw 6, 52(1)
710; LE-32BIT-NEXT:    stw 6, 48(1)
711; LE-32BIT-NEXT:    rlwinm 6, 4, 29, 27, 29
712; LE-32BIT-NEXT:    stw 3, 44(1)
713; LE-32BIT-NEXT:    addi 3, 1, 16
714; LE-32BIT-NEXT:    stw 0, 40(1)
715; LE-32BIT-NEXT:    clrlwi 4, 4, 27
716; LE-32BIT-NEXT:    stw 12, 36(1)
717; LE-32BIT-NEXT:    subfic 12, 4, 32
718; LE-32BIT-NEXT:    stw 11, 32(1)
719; LE-32BIT-NEXT:    stw 10, 28(1)
720; LE-32BIT-NEXT:    stw 9, 24(1)
721; LE-32BIT-NEXT:    stw 8, 20(1)
722; LE-32BIT-NEXT:    stw 7, 16(1)
723; LE-32BIT-NEXT:    lwzux 3, 6, 3
724; LE-32BIT-NEXT:    lwz 7, 8(6)
725; LE-32BIT-NEXT:    slw 3, 3, 4
726; LE-32BIT-NEXT:    lwz 8, 4(6)
727; LE-32BIT-NEXT:    lwz 9, 16(6)
728; LE-32BIT-NEXT:    srw 30, 7, 12
729; LE-32BIT-NEXT:    lwz 10, 12(6)
730; LE-32BIT-NEXT:    slw 29, 8, 4
731; LE-32BIT-NEXT:    lwz 11, 24(6)
732; LE-32BIT-NEXT:    srw 8, 8, 12
733; LE-32BIT-NEXT:    lwz 0, 20(6)
734; LE-32BIT-NEXT:    srw 28, 9, 12
735; LE-32BIT-NEXT:    lwz 6, 28(6)
736; LE-32BIT-NEXT:    slw 27, 10, 4
737; LE-32BIT-NEXT:    srw 10, 10, 12
738; LE-32BIT-NEXT:    slw 7, 7, 4
739; LE-32BIT-NEXT:    srw 26, 11, 12
740; LE-32BIT-NEXT:    slw 25, 0, 4
741; LE-32BIT-NEXT:    srw 0, 0, 12
742; LE-32BIT-NEXT:    slw 9, 9, 4
743; LE-32BIT-NEXT:    srw 12, 6, 12
744; LE-32BIT-NEXT:    slw 11, 11, 4
745; LE-32BIT-NEXT:    slw 4, 6, 4
746; LE-32BIT-NEXT:    stw 4, 28(5)
747; LE-32BIT-NEXT:    or 4, 11, 12
748; LE-32BIT-NEXT:    stw 4, 24(5)
749; LE-32BIT-NEXT:    or 4, 9, 0
750; LE-32BIT-NEXT:    stw 4, 16(5)
751; LE-32BIT-NEXT:    or 4, 25, 26
752; LE-32BIT-NEXT:    stw 4, 20(5)
753; LE-32BIT-NEXT:    or 4, 7, 10
754; LE-32BIT-NEXT:    or 3, 3, 8
755; LE-32BIT-NEXT:    stw 4, 8(5)
756; LE-32BIT-NEXT:    or 4, 27, 28
757; LE-32BIT-NEXT:    stw 3, 0(5)
758; LE-32BIT-NEXT:    or 3, 29, 30
759; LE-32BIT-NEXT:    stw 4, 12(5)
760; LE-32BIT-NEXT:    stw 3, 4(5)
761; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
762; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
763; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
764; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
765; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
766; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
767; LE-32BIT-NEXT:    addi 1, 1, 112
768; LE-32BIT-NEXT:    blr
769  %src = load i256, ptr %src.ptr, align 1
770  %bitOff = load i256, ptr %bitOff.ptr, align 1
771  %res = shl i256 %src, %bitOff
772  store i256 %res, ptr %dst, align 1
773  ret void
774}
775define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
776; LE-64BIT-LABEL: ashr_32bytes:
777; LE-64BIT:       # %bb.0:
778; LE-64BIT-NEXT:    ld 6, 24(3)
779; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
780; LE-64BIT-NEXT:    lwz 4, 0(4)
781; LE-64BIT-NEXT:    addi 7, 1, -64
782; LE-64BIT-NEXT:    ld 3, 16(3)
783; LE-64BIT-NEXT:    sradi 8, 6, 63
784; LE-64BIT-NEXT:    rlwinm 9, 4, 29, 27, 28
785; LE-64BIT-NEXT:    std 6, -40(1)
786; LE-64BIT-NEXT:    std 3, -48(1)
787; LE-64BIT-NEXT:    clrlwi 4, 4, 26
788; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
789; LE-64BIT-NEXT:    std 8, -8(1)
790; LE-64BIT-NEXT:    std 8, -16(1)
791; LE-64BIT-NEXT:    std 8, -24(1)
792; LE-64BIT-NEXT:    std 8, -32(1)
793; LE-64BIT-NEXT:    ldux 3, 9, 7
794; LE-64BIT-NEXT:    xori 7, 4, 63
795; LE-64BIT-NEXT:    ld 6, 16(9)
796; LE-64BIT-NEXT:    ld 8, 8(9)
797; LE-64BIT-NEXT:    ld 9, 24(9)
798; LE-64BIT-NEXT:    srd 3, 3, 4
799; LE-64BIT-NEXT:    sldi 11, 6, 1
800; LE-64BIT-NEXT:    srd 10, 8, 4
801; LE-64BIT-NEXT:    srd 6, 6, 4
802; LE-64BIT-NEXT:    sld 7, 11, 7
803; LE-64BIT-NEXT:    or 7, 10, 7
804; LE-64BIT-NEXT:    subfic 10, 4, 64
805; LE-64BIT-NEXT:    srad 4, 9, 4
806; LE-64BIT-NEXT:    sld 8, 8, 10
807; LE-64BIT-NEXT:    sld 11, 9, 10
808; LE-64BIT-NEXT:    std 4, 24(5)
809; LE-64BIT-NEXT:    std 7, 8(5)
810; LE-64BIT-NEXT:    or 3, 8, 3
811; LE-64BIT-NEXT:    std 3, 0(5)
812; LE-64BIT-NEXT:    or 3, 11, 6
813; LE-64BIT-NEXT:    std 3, 16(5)
814; LE-64BIT-NEXT:    blr
815;
816; BE-LABEL: ashr_32bytes:
817; BE:       # %bb.0:
818; BE-NEXT:    ld 7, 0(3)
819; BE-NEXT:    ld 8, 8(3)
820; BE-NEXT:    ld 9, 16(3)
821; BE-NEXT:    ld 3, 24(3)
822; BE-NEXT:    lwz 4, 28(4)
823; BE-NEXT:    addi 6, 1, -32
824; BE-NEXT:    std 3, -8(1)
825; BE-NEXT:    std 7, -32(1)
826; BE-NEXT:    sradi 3, 7, 63
827; BE-NEXT:    rlwinm 7, 4, 29, 27, 28
828; BE-NEXT:    std 3, -40(1)
829; BE-NEXT:    std 3, -48(1)
830; BE-NEXT:    std 3, -56(1)
831; BE-NEXT:    std 3, -64(1)
832; BE-NEXT:    neg 3, 7
833; BE-NEXT:    std 9, -16(1)
834; BE-NEXT:    std 8, -24(1)
835; BE-NEXT:    extsw 3, 3
836; BE-NEXT:    ldux 3, 6, 3
837; BE-NEXT:    clrlwi 4, 4, 26
838; BE-NEXT:    subfic 9, 4, 64
839; BE-NEXT:    ld 7, 8(6)
840; BE-NEXT:    ld 8, 24(6)
841; BE-NEXT:    ld 6, 16(6)
842; BE-NEXT:    sld 10, 3, 9
843; BE-NEXT:    srad 3, 3, 4
844; BE-NEXT:    std 3, 0(5)
845; BE-NEXT:    srd 11, 7, 4
846; BE-NEXT:    srd 8, 8, 4
847; BE-NEXT:    sld 7, 7, 9
848; BE-NEXT:    sld 9, 6, 9
849; BE-NEXT:    srd 6, 6, 4
850; BE-NEXT:    or 10, 10, 11
851; BE-NEXT:    or 8, 9, 8
852; BE-NEXT:    or 6, 7, 6
853; BE-NEXT:    std 6, 16(5)
854; BE-NEXT:    std 8, 24(5)
855; BE-NEXT:    std 10, 8(5)
856; BE-NEXT:    blr
857;
858; LE-32BIT-LABEL: ashr_32bytes:
859; LE-32BIT:       # %bb.0:
860; LE-32BIT-NEXT:    stwu 1, -112(1)
861; LE-32BIT-NEXT:    lwz 7, 0(3)
862; LE-32BIT-NEXT:    addi 6, 1, 48
863; LE-32BIT-NEXT:    lwz 8, 4(3)
864; LE-32BIT-NEXT:    lwz 9, 8(3)
865; LE-32BIT-NEXT:    lwz 10, 12(3)
866; LE-32BIT-NEXT:    lwz 11, 16(3)
867; LE-32BIT-NEXT:    lwz 12, 20(3)
868; LE-32BIT-NEXT:    lwz 0, 24(3)
869; LE-32BIT-NEXT:    lwz 3, 28(3)
870; LE-32BIT-NEXT:    lwz 4, 28(4)
871; LE-32BIT-NEXT:    stw 3, 76(1)
872; LE-32BIT-NEXT:    srawi 3, 7, 31
873; LE-32BIT-NEXT:    stw 7, 48(1)
874; LE-32BIT-NEXT:    rlwinm 7, 4, 29, 27, 29
875; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
876; LE-32BIT-NEXT:    clrlwi 4, 4, 27
877; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
878; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
879; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
880; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
881; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
882; LE-32BIT-NEXT:    subfic 30, 4, 32
883; LE-32BIT-NEXT:    stw 0, 72(1)
884; LE-32BIT-NEXT:    stw 12, 68(1)
885; LE-32BIT-NEXT:    xori 12, 4, 31
886; LE-32BIT-NEXT:    stw 11, 64(1)
887; LE-32BIT-NEXT:    stw 10, 60(1)
888; LE-32BIT-NEXT:    stw 9, 56(1)
889; LE-32BIT-NEXT:    stw 8, 52(1)
890; LE-32BIT-NEXT:    stw 3, 44(1)
891; LE-32BIT-NEXT:    stw 3, 40(1)
892; LE-32BIT-NEXT:    stw 3, 36(1)
893; LE-32BIT-NEXT:    stw 3, 32(1)
894; LE-32BIT-NEXT:    stw 3, 28(1)
895; LE-32BIT-NEXT:    stw 3, 24(1)
896; LE-32BIT-NEXT:    stw 3, 20(1)
897; LE-32BIT-NEXT:    stw 3, 16(1)
898; LE-32BIT-NEXT:    sub 3, 6, 7
899; LE-32BIT-NEXT:    lwz 6, 8(3)
900; LE-32BIT-NEXT:    lwz 7, 4(3)
901; LE-32BIT-NEXT:    lwz 8, 0(3)
902; LE-32BIT-NEXT:    srw 29, 6, 4
903; LE-32BIT-NEXT:    lwz 9, 12(3)
904; LE-32BIT-NEXT:    slw 6, 6, 30
905; LE-32BIT-NEXT:    lwz 10, 20(3)
906; LE-32BIT-NEXT:    slw 28, 8, 30
907; LE-32BIT-NEXT:    lwz 11, 16(3)
908; LE-32BIT-NEXT:    srw 27, 9, 4
909; LE-32BIT-NEXT:    lwz 0, 28(3)
910; LE-32BIT-NEXT:    srw 26, 10, 4
911; LE-32BIT-NEXT:    lwz 3, 24(3)
912; LE-32BIT-NEXT:    slw 25, 11, 30
913; LE-32BIT-NEXT:    slw 9, 9, 30
914; LE-32BIT-NEXT:    slw 10, 10, 30
915; LE-32BIT-NEXT:    slw 30, 3, 30
916; LE-32BIT-NEXT:    srw 3, 3, 4
917; LE-32BIT-NEXT:    srw 0, 0, 4
918; LE-32BIT-NEXT:    or 3, 10, 3
919; LE-32BIT-NEXT:    srw 11, 11, 4
920; LE-32BIT-NEXT:    stw 3, 24(5)
921; LE-32BIT-NEXT:    or 3, 30, 0
922; LE-32BIT-NEXT:    stw 3, 28(5)
923; LE-32BIT-NEXT:    or 3, 9, 11
924; LE-32BIT-NEXT:    stw 3, 16(5)
925; LE-32BIT-NEXT:    or 3, 25, 26
926; LE-32BIT-NEXT:    sraw 8, 8, 4
927; LE-32BIT-NEXT:    srw 4, 7, 4
928; LE-32BIT-NEXT:    slwi 7, 7, 1
929; LE-32BIT-NEXT:    stw 3, 20(5)
930; LE-32BIT-NEXT:    or 3, 6, 27
931; LE-32BIT-NEXT:    slw 7, 7, 12
932; LE-32BIT-NEXT:    stw 3, 12(5)
933; LE-32BIT-NEXT:    or 3, 28, 4
934; LE-32BIT-NEXT:    stw 3, 4(5)
935; LE-32BIT-NEXT:    or 3, 29, 7
936; LE-32BIT-NEXT:    stw 8, 0(5)
937; LE-32BIT-NEXT:    stw 3, 8(5)
938; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
939; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
940; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
941; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
942; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
943; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
944; LE-32BIT-NEXT:    addi 1, 1, 112
945; LE-32BIT-NEXT:    blr
946  %src = load i256, ptr %src.ptr, align 1
947  %bitOff = load i256, ptr %bitOff.ptr, align 1
948  %res = ashr i256 %src, %bitOff
949  store i256 %res, ptr %dst, align 1
950  ret void
951}
952;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
953; LE: {{.*}}
954