xref: /llvm-project/llvm/test/CodeGen/PowerPC/wide-scalar-shift-by-byte-multiple-legalization.ll (revision 032014ef103157bfd8403418538e25f3f58efa9d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu | FileCheck %s --check-prefixes=ALL,LE,LE-64BIT
3; RUN: llc < %s -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu   | FileCheck %s --check-prefixes=ALL,BE
4; RUN: llc < %s -mtriple=ppc32--                       | FileCheck %s --check-prefixes=ALL,LE,LE-32BIT
5
6define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
7; ALL-LABEL: lshr_4bytes:
8; ALL:       # %bb.0:
9; ALL-NEXT:    lwz 4, 0(4)
10; ALL-NEXT:    lwz 3, 0(3)
11; ALL-NEXT:    slwi 4, 4, 3
12; ALL-NEXT:    srw 3, 3, 4
13; ALL-NEXT:    stw 3, 0(5)
14; ALL-NEXT:    blr
15  %src = load i32, ptr %src.ptr, align 1
16  %byteOff = load i32, ptr %byteOff.ptr, align 1
17  %bitOff = shl i32 %byteOff, 3
18  %res = lshr i32 %src, %bitOff
19  store i32 %res, ptr %dst, align 1
20  ret void
21}
22define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
23; ALL-LABEL: shl_4bytes:
24; ALL:       # %bb.0:
25; ALL-NEXT:    lwz 4, 0(4)
26; ALL-NEXT:    lwz 3, 0(3)
27; ALL-NEXT:    slwi 4, 4, 3
28; ALL-NEXT:    slw 3, 3, 4
29; ALL-NEXT:    stw 3, 0(5)
30; ALL-NEXT:    blr
31  %src = load i32, ptr %src.ptr, align 1
32  %byteOff = load i32, ptr %byteOff.ptr, align 1
33  %bitOff = shl i32 %byteOff, 3
34  %res = shl i32 %src, %bitOff
35  store i32 %res, ptr %dst, align 1
36  ret void
37}
38define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
39; ALL-LABEL: ashr_4bytes:
40; ALL:       # %bb.0:
41; ALL-NEXT:    lwz 4, 0(4)
42; ALL-NEXT:    lwz 3, 0(3)
43; ALL-NEXT:    slwi 4, 4, 3
44; ALL-NEXT:    sraw 3, 3, 4
45; ALL-NEXT:    stw 3, 0(5)
46; ALL-NEXT:    blr
47  %src = load i32, ptr %src.ptr, align 1
48  %byteOff = load i32, ptr %byteOff.ptr, align 1
49  %bitOff = shl i32 %byteOff, 3
50  %res = ashr i32 %src, %bitOff
51  store i32 %res, ptr %dst, align 1
52  ret void
53}
54
55define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
56; LE-64BIT-LABEL: lshr_8bytes:
57; LE-64BIT:       # %bb.0:
58; LE-64BIT-NEXT:    lwz 4, 0(4)
59; LE-64BIT-NEXT:    ld 3, 0(3)
60; LE-64BIT-NEXT:    slwi 4, 4, 3
61; LE-64BIT-NEXT:    srd 3, 3, 4
62; LE-64BIT-NEXT:    std 3, 0(5)
63; LE-64BIT-NEXT:    blr
64;
65; BE-LABEL: lshr_8bytes:
66; BE:       # %bb.0:
67; BE-NEXT:    lwz 4, 4(4)
68; BE-NEXT:    ld 3, 0(3)
69; BE-NEXT:    slwi 4, 4, 3
70; BE-NEXT:    srd 3, 3, 4
71; BE-NEXT:    std 3, 0(5)
72; BE-NEXT:    blr
73;
74; LE-32BIT-LABEL: lshr_8bytes:
75; LE-32BIT:       # %bb.0:
76; LE-32BIT-NEXT:    lwz 4, 4(4)
77; LE-32BIT-NEXT:    lwz 6, 4(3)
78; LE-32BIT-NEXT:    lwz 3, 0(3)
79; LE-32BIT-NEXT:    slwi 4, 4, 3
80; LE-32BIT-NEXT:    subfic 7, 4, 32
81; LE-32BIT-NEXT:    srw 6, 6, 4
82; LE-32BIT-NEXT:    addi 8, 4, -32
83; LE-32BIT-NEXT:    slw 7, 3, 7
84; LE-32BIT-NEXT:    srw 4, 3, 4
85; LE-32BIT-NEXT:    srw 3, 3, 8
86; LE-32BIT-NEXT:    or 6, 6, 7
87; LE-32BIT-NEXT:    or 3, 6, 3
88; LE-32BIT-NEXT:    stw 4, 0(5)
89; LE-32BIT-NEXT:    stw 3, 4(5)
90; LE-32BIT-NEXT:    blr
91  %src = load i64, ptr %src.ptr, align 1
92  %byteOff = load i64, ptr %byteOff.ptr, align 1
93  %bitOff = shl i64 %byteOff, 3
94  %res = lshr i64 %src, %bitOff
95  store i64 %res, ptr %dst, align 1
96  ret void
97}
98define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
99; LE-64BIT-LABEL: shl_8bytes:
100; LE-64BIT:       # %bb.0:
101; LE-64BIT-NEXT:    lwz 4, 0(4)
102; LE-64BIT-NEXT:    ld 3, 0(3)
103; LE-64BIT-NEXT:    slwi 4, 4, 3
104; LE-64BIT-NEXT:    sld 3, 3, 4
105; LE-64BIT-NEXT:    std 3, 0(5)
106; LE-64BIT-NEXT:    blr
107;
108; BE-LABEL: shl_8bytes:
109; BE:       # %bb.0:
110; BE-NEXT:    lwz 4, 4(4)
111; BE-NEXT:    ld 3, 0(3)
112; BE-NEXT:    slwi 4, 4, 3
113; BE-NEXT:    sld 3, 3, 4
114; BE-NEXT:    std 3, 0(5)
115; BE-NEXT:    blr
116;
117; LE-32BIT-LABEL: shl_8bytes:
118; LE-32BIT:       # %bb.0:
119; LE-32BIT-NEXT:    lwz 4, 4(4)
120; LE-32BIT-NEXT:    lwz 6, 0(3)
121; LE-32BIT-NEXT:    lwz 3, 4(3)
122; LE-32BIT-NEXT:    slwi 4, 4, 3
123; LE-32BIT-NEXT:    subfic 7, 4, 32
124; LE-32BIT-NEXT:    slw 6, 6, 4
125; LE-32BIT-NEXT:    addi 8, 4, -32
126; LE-32BIT-NEXT:    srw 7, 3, 7
127; LE-32BIT-NEXT:    slw 4, 3, 4
128; LE-32BIT-NEXT:    slw 3, 3, 8
129; LE-32BIT-NEXT:    or 6, 6, 7
130; LE-32BIT-NEXT:    or 3, 6, 3
131; LE-32BIT-NEXT:    stw 4, 4(5)
132; LE-32BIT-NEXT:    stw 3, 0(5)
133; LE-32BIT-NEXT:    blr
134  %src = load i64, ptr %src.ptr, align 1
135  %byteOff = load i64, ptr %byteOff.ptr, align 1
136  %bitOff = shl i64 %byteOff, 3
137  %res = shl i64 %src, %bitOff
138  store i64 %res, ptr %dst, align 1
139  ret void
140}
141define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
142; LE-64BIT-LABEL: ashr_8bytes:
143; LE-64BIT:       # %bb.0:
144; LE-64BIT-NEXT:    lwz 4, 0(4)
145; LE-64BIT-NEXT:    ld 3, 0(3)
146; LE-64BIT-NEXT:    slwi 4, 4, 3
147; LE-64BIT-NEXT:    srad 3, 3, 4
148; LE-64BIT-NEXT:    std 3, 0(5)
149; LE-64BIT-NEXT:    blr
150;
151; BE-LABEL: ashr_8bytes:
152; BE:       # %bb.0:
153; BE-NEXT:    lwz 4, 4(4)
154; BE-NEXT:    ld 3, 0(3)
155; BE-NEXT:    slwi 4, 4, 3
156; BE-NEXT:    srad 3, 3, 4
157; BE-NEXT:    std 3, 0(5)
158; BE-NEXT:    blr
159;
160; LE-32BIT-LABEL: ashr_8bytes:
161; LE-32BIT:       # %bb.0:
162; LE-32BIT-NEXT:    lwz 4, 4(4)
163; LE-32BIT-NEXT:    lwz 6, 0(3)
164; LE-32BIT-NEXT:    slwi 4, 4, 3
165; LE-32BIT-NEXT:    addi 7, 4, -32
166; LE-32BIT-NEXT:    cmpwi 7, 0
167; LE-32BIT-NEXT:    ble 0, .LBB5_2
168; LE-32BIT-NEXT:  # %bb.1:
169; LE-32BIT-NEXT:    sraw 3, 6, 7
170; LE-32BIT-NEXT:    b .LBB5_3
171; LE-32BIT-NEXT:  .LBB5_2:
172; LE-32BIT-NEXT:    lwz 3, 4(3)
173; LE-32BIT-NEXT:    subfic 7, 4, 32
174; LE-32BIT-NEXT:    slw 7, 6, 7
175; LE-32BIT-NEXT:    srw 3, 3, 4
176; LE-32BIT-NEXT:    or 3, 3, 7
177; LE-32BIT-NEXT:  .LBB5_3:
178; LE-32BIT-NEXT:    sraw 4, 6, 4
179; LE-32BIT-NEXT:    stw 4, 0(5)
180; LE-32BIT-NEXT:    stw 3, 4(5)
181; LE-32BIT-NEXT:    blr
182  %src = load i64, ptr %src.ptr, align 1
183  %byteOff = load i64, ptr %byteOff.ptr, align 1
184  %bitOff = shl i64 %byteOff, 3
185  %res = ashr i64 %src, %bitOff
186  store i64 %res, ptr %dst, align 1
187  ret void
188}
189
190define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
191; LE-64BIT-LABEL: lshr_16bytes:
192; LE-64BIT:       # %bb.0:
193; LE-64BIT-NEXT:    lwz 4, 0(4)
194; LE-64BIT-NEXT:    ld 6, 8(3)
195; LE-64BIT-NEXT:    ld 3, 0(3)
196; LE-64BIT-NEXT:    slwi 4, 4, 3
197; LE-64BIT-NEXT:    subfic 7, 4, 64
198; LE-64BIT-NEXT:    srd 3, 3, 4
199; LE-64BIT-NEXT:    sld 7, 6, 7
200; LE-64BIT-NEXT:    or 3, 3, 7
201; LE-64BIT-NEXT:    addi 7, 4, -64
202; LE-64BIT-NEXT:    srd 4, 6, 4
203; LE-64BIT-NEXT:    srd 7, 6, 7
204; LE-64BIT-NEXT:    std 4, 8(5)
205; LE-64BIT-NEXT:    or 3, 3, 7
206; LE-64BIT-NEXT:    std 3, 0(5)
207; LE-64BIT-NEXT:    blr
208;
209; BE-LABEL: lshr_16bytes:
210; BE:       # %bb.0:
211; BE-NEXT:    lwz 4, 12(4)
212; BE-NEXT:    ld 6, 0(3)
213; BE-NEXT:    ld 3, 8(3)
214; BE-NEXT:    slwi 4, 4, 3
215; BE-NEXT:    subfic 7, 4, 64
216; BE-NEXT:    srd 3, 3, 4
217; BE-NEXT:    sld 7, 6, 7
218; BE-NEXT:    addi 8, 4, -64
219; BE-NEXT:    or 3, 3, 7
220; BE-NEXT:    srd 7, 6, 8
221; BE-NEXT:    srd 4, 6, 4
222; BE-NEXT:    or 3, 3, 7
223; BE-NEXT:    std 4, 0(5)
224; BE-NEXT:    std 3, 8(5)
225; BE-NEXT:    blr
226;
227; LE-32BIT-LABEL: lshr_16bytes:
228; LE-32BIT:       # %bb.0:
229; LE-32BIT-NEXT:    stwu 1, -48(1)
230; LE-32BIT-NEXT:    lwz 7, 0(3)
231; LE-32BIT-NEXT:    li 6, 0
232; LE-32BIT-NEXT:    lwz 8, 4(3)
233; LE-32BIT-NEXT:    lwz 9, 8(3)
234; LE-32BIT-NEXT:    lwz 3, 12(3)
235; LE-32BIT-NEXT:    lwz 4, 12(4)
236; LE-32BIT-NEXT:    stw 6, 28(1)
237; LE-32BIT-NEXT:    stw 6, 24(1)
238; LE-32BIT-NEXT:    stw 6, 20(1)
239; LE-32BIT-NEXT:    stw 6, 16(1)
240; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 28, 29
241; LE-32BIT-NEXT:    stw 3, 44(1)
242; LE-32BIT-NEXT:    addi 3, 1, 32
243; LE-32BIT-NEXT:    stw 9, 40(1)
244; LE-32BIT-NEXT:    sub 3, 3, 6
245; LE-32BIT-NEXT:    stw 8, 36(1)
246; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
247; LE-32BIT-NEXT:    stw 7, 32(1)
248; LE-32BIT-NEXT:    subfic 9, 4, 32
249; LE-32BIT-NEXT:    lwz 6, 4(3)
250; LE-32BIT-NEXT:    lwz 7, 0(3)
251; LE-32BIT-NEXT:    lwz 8, 12(3)
252; LE-32BIT-NEXT:    srw 10, 6, 4
253; LE-32BIT-NEXT:    lwz 3, 8(3)
254; LE-32BIT-NEXT:    slw 11, 7, 9
255; LE-32BIT-NEXT:    slw 6, 6, 9
256; LE-32BIT-NEXT:    srw 8, 8, 4
257; LE-32BIT-NEXT:    slw 9, 3, 9
258; LE-32BIT-NEXT:    srw 3, 3, 4
259; LE-32BIT-NEXT:    or 3, 6, 3
260; LE-32BIT-NEXT:    stw 3, 8(5)
261; LE-32BIT-NEXT:    or 3, 9, 8
262; LE-32BIT-NEXT:    srw 4, 7, 4
263; LE-32BIT-NEXT:    stw 3, 12(5)
264; LE-32BIT-NEXT:    or 3, 11, 10
265; LE-32BIT-NEXT:    stw 4, 0(5)
266; LE-32BIT-NEXT:    stw 3, 4(5)
267; LE-32BIT-NEXT:    addi 1, 1, 48
268; LE-32BIT-NEXT:    blr
269  %src = load i128, ptr %src.ptr, align 1
270  %byteOff = load i128, ptr %byteOff.ptr, align 1
271  %bitOff = shl i128 %byteOff, 3
272  %res = lshr i128 %src, %bitOff
273  store i128 %res, ptr %dst, align 1
274  ret void
275}
276
277define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
278; LE-64BIT-LABEL: lshr_16bytes_wordOff:
279; LE-64BIT:       # %bb.0:
280; LE-64BIT-NEXT:    lwz 4, 0(4)
281; LE-64BIT-NEXT:    ld 6, 8(3)
282; LE-64BIT-NEXT:    ld 3, 0(3)
283; LE-64BIT-NEXT:    slwi 4, 4, 5
284; LE-64BIT-NEXT:    subfic 7, 4, 64
285; LE-64BIT-NEXT:    srd 3, 3, 4
286; LE-64BIT-NEXT:    sld 7, 6, 7
287; LE-64BIT-NEXT:    or 3, 3, 7
288; LE-64BIT-NEXT:    addi 7, 4, -64
289; LE-64BIT-NEXT:    srd 4, 6, 4
290; LE-64BIT-NEXT:    srd 7, 6, 7
291; LE-64BIT-NEXT:    std 4, 8(5)
292; LE-64BIT-NEXT:    or 3, 3, 7
293; LE-64BIT-NEXT:    std 3, 0(5)
294; LE-64BIT-NEXT:    blr
295;
296; BE-LABEL: lshr_16bytes_wordOff:
297; BE:       # %bb.0:
298; BE-NEXT:    lwz 4, 12(4)
299; BE-NEXT:    ld 6, 0(3)
300; BE-NEXT:    ld 3, 8(3)
301; BE-NEXT:    slwi 4, 4, 5
302; BE-NEXT:    subfic 7, 4, 64
303; BE-NEXT:    srd 3, 3, 4
304; BE-NEXT:    sld 7, 6, 7
305; BE-NEXT:    addi 8, 4, -64
306; BE-NEXT:    or 3, 3, 7
307; BE-NEXT:    srd 7, 6, 8
308; BE-NEXT:    srd 4, 6, 4
309; BE-NEXT:    or 3, 3, 7
310; BE-NEXT:    std 4, 0(5)
311; BE-NEXT:    std 3, 8(5)
312; BE-NEXT:    blr
313;
314; LE-32BIT-LABEL: lshr_16bytes_wordOff:
315; LE-32BIT:       # %bb.0:
316; LE-32BIT-NEXT:    stwu 1, -48(1)
317; LE-32BIT-NEXT:    lwz 7, 0(3)
318; LE-32BIT-NEXT:    li 6, 0
319; LE-32BIT-NEXT:    lwz 8, 4(3)
320; LE-32BIT-NEXT:    lwz 9, 8(3)
321; LE-32BIT-NEXT:    lwz 3, 12(3)
322; LE-32BIT-NEXT:    lwz 4, 12(4)
323; LE-32BIT-NEXT:    stw 3, 44(1)
324; LE-32BIT-NEXT:    addi 3, 1, 32
325; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
326; LE-32BIT-NEXT:    stw 6, 28(1)
327; LE-32BIT-NEXT:    sub 3, 3, 4
328; LE-32BIT-NEXT:    stw 6, 24(1)
329; LE-32BIT-NEXT:    stw 6, 20(1)
330; LE-32BIT-NEXT:    stw 6, 16(1)
331; LE-32BIT-NEXT:    stw 9, 40(1)
332; LE-32BIT-NEXT:    stw 8, 36(1)
333; LE-32BIT-NEXT:    stw 7, 32(1)
334; LE-32BIT-NEXT:    lwz 4, 4(3)
335; LE-32BIT-NEXT:    lwz 6, 0(3)
336; LE-32BIT-NEXT:    lwz 7, 8(3)
337; LE-32BIT-NEXT:    lwz 3, 12(3)
338; LE-32BIT-NEXT:    stw 7, 8(5)
339; LE-32BIT-NEXT:    stw 3, 12(5)
340; LE-32BIT-NEXT:    stw 6, 0(5)
341; LE-32BIT-NEXT:    stw 4, 4(5)
342; LE-32BIT-NEXT:    addi 1, 1, 48
343; LE-32BIT-NEXT:    blr
344  %src = load i128, ptr %src.ptr, align 1
345  %wordOff = load i128, ptr %wordOff.ptr, align 1
346  %bitOff = shl i128 %wordOff, 5
347  %res = lshr i128 %src, %bitOff
348  store i128 %res, ptr %dst, align 1
349  ret void
350}
351
352define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
353; LE-64BIT-LABEL: shl_16bytes:
354; LE-64BIT:       # %bb.0:
355; LE-64BIT-NEXT:    lwz 4, 0(4)
356; LE-64BIT-NEXT:    ld 6, 0(3)
357; LE-64BIT-NEXT:    ld 3, 8(3)
358; LE-64BIT-NEXT:    slwi 4, 4, 3
359; LE-64BIT-NEXT:    subfic 7, 4, 64
360; LE-64BIT-NEXT:    sld 3, 3, 4
361; LE-64BIT-NEXT:    srd 7, 6, 7
362; LE-64BIT-NEXT:    or 3, 3, 7
363; LE-64BIT-NEXT:    addi 7, 4, -64
364; LE-64BIT-NEXT:    sld 4, 6, 4
365; LE-64BIT-NEXT:    sld 7, 6, 7
366; LE-64BIT-NEXT:    std 4, 0(5)
367; LE-64BIT-NEXT:    or 3, 3, 7
368; LE-64BIT-NEXT:    std 3, 8(5)
369; LE-64BIT-NEXT:    blr
370;
371; BE-LABEL: shl_16bytes:
372; BE:       # %bb.0:
373; BE-NEXT:    lwz 4, 12(4)
374; BE-NEXT:    ld 6, 8(3)
375; BE-NEXT:    ld 3, 0(3)
376; BE-NEXT:    slwi 4, 4, 3
377; BE-NEXT:    subfic 7, 4, 64
378; BE-NEXT:    sld 3, 3, 4
379; BE-NEXT:    srd 7, 6, 7
380; BE-NEXT:    addi 8, 4, -64
381; BE-NEXT:    or 3, 3, 7
382; BE-NEXT:    sld 7, 6, 8
383; BE-NEXT:    sld 4, 6, 4
384; BE-NEXT:    or 3, 3, 7
385; BE-NEXT:    std 4, 8(5)
386; BE-NEXT:    std 3, 0(5)
387; BE-NEXT:    blr
388;
389; LE-32BIT-LABEL: shl_16bytes:
390; LE-32BIT:       # %bb.0:
391; LE-32BIT-NEXT:    stwu 1, -48(1)
392; LE-32BIT-NEXT:    lwz 7, 0(3)
393; LE-32BIT-NEXT:    li 6, 0
394; LE-32BIT-NEXT:    lwz 8, 4(3)
395; LE-32BIT-NEXT:    lwz 9, 8(3)
396; LE-32BIT-NEXT:    lwz 3, 12(3)
397; LE-32BIT-NEXT:    lwz 4, 12(4)
398; LE-32BIT-NEXT:    stw 6, 44(1)
399; LE-32BIT-NEXT:    stw 6, 40(1)
400; LE-32BIT-NEXT:    stw 6, 36(1)
401; LE-32BIT-NEXT:    stw 6, 32(1)
402; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 28, 29
403; LE-32BIT-NEXT:    stw 3, 28(1)
404; LE-32BIT-NEXT:    addi 3, 1, 16
405; LE-32BIT-NEXT:    stw 9, 24(1)
406; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
407; LE-32BIT-NEXT:    stw 8, 20(1)
408; LE-32BIT-NEXT:    subfic 8, 4, 32
409; LE-32BIT-NEXT:    stw 7, 16(1)
410; LE-32BIT-NEXT:    lwzux 3, 6, 3
411; LE-32BIT-NEXT:    lwz 9, 4(6)
412; LE-32BIT-NEXT:    slw 3, 3, 4
413; LE-32BIT-NEXT:    lwz 7, 8(6)
414; LE-32BIT-NEXT:    lwz 6, 12(6)
415; LE-32BIT-NEXT:    slw 11, 9, 4
416; LE-32BIT-NEXT:    srw 9, 9, 8
417; LE-32BIT-NEXT:    srw 10, 7, 8
418; LE-32BIT-NEXT:    srw 8, 6, 8
419; LE-32BIT-NEXT:    slw 7, 7, 4
420; LE-32BIT-NEXT:    slw 4, 6, 4
421; LE-32BIT-NEXT:    or 3, 3, 9
422; LE-32BIT-NEXT:    stw 4, 12(5)
423; LE-32BIT-NEXT:    or 4, 7, 8
424; LE-32BIT-NEXT:    stw 3, 0(5)
425; LE-32BIT-NEXT:    or 3, 11, 10
426; LE-32BIT-NEXT:    stw 4, 8(5)
427; LE-32BIT-NEXT:    stw 3, 4(5)
428; LE-32BIT-NEXT:    addi 1, 1, 48
429; LE-32BIT-NEXT:    blr
430  %src = load i128, ptr %src.ptr, align 1
431  %byteOff = load i128, ptr %byteOff.ptr, align 1
432  %bitOff = shl i128 %byteOff, 3
433  %res = shl i128 %src, %bitOff
434  store i128 %res, ptr %dst, align 1
435  ret void
436}
437
438define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
439; LE-64BIT-LABEL: shl_16bytes_wordOff:
440; LE-64BIT:       # %bb.0:
441; LE-64BIT-NEXT:    lwz 4, 0(4)
442; LE-64BIT-NEXT:    ld 6, 0(3)
443; LE-64BIT-NEXT:    ld 3, 8(3)
444; LE-64BIT-NEXT:    slwi 4, 4, 5
445; LE-64BIT-NEXT:    subfic 7, 4, 64
446; LE-64BIT-NEXT:    sld 3, 3, 4
447; LE-64BIT-NEXT:    srd 7, 6, 7
448; LE-64BIT-NEXT:    or 3, 3, 7
449; LE-64BIT-NEXT:    addi 7, 4, -64
450; LE-64BIT-NEXT:    sld 4, 6, 4
451; LE-64BIT-NEXT:    sld 7, 6, 7
452; LE-64BIT-NEXT:    std 4, 0(5)
453; LE-64BIT-NEXT:    or 3, 3, 7
454; LE-64BIT-NEXT:    std 3, 8(5)
455; LE-64BIT-NEXT:    blr
456;
457; BE-LABEL: shl_16bytes_wordOff:
458; BE:       # %bb.0:
459; BE-NEXT:    lwz 4, 12(4)
460; BE-NEXT:    ld 6, 8(3)
461; BE-NEXT:    ld 3, 0(3)
462; BE-NEXT:    slwi 4, 4, 5
463; BE-NEXT:    subfic 7, 4, 64
464; BE-NEXT:    sld 3, 3, 4
465; BE-NEXT:    srd 7, 6, 7
466; BE-NEXT:    addi 8, 4, -64
467; BE-NEXT:    or 3, 3, 7
468; BE-NEXT:    sld 7, 6, 8
469; BE-NEXT:    sld 4, 6, 4
470; BE-NEXT:    or 3, 3, 7
471; BE-NEXT:    std 4, 8(5)
472; BE-NEXT:    std 3, 0(5)
473; BE-NEXT:    blr
474;
475; LE-32BIT-LABEL: shl_16bytes_wordOff:
476; LE-32BIT:       # %bb.0:
477; LE-32BIT-NEXT:    stwu 1, -48(1)
478; LE-32BIT-NEXT:    lwz 7, 0(3)
479; LE-32BIT-NEXT:    li 6, 0
480; LE-32BIT-NEXT:    lwz 8, 4(3)
481; LE-32BIT-NEXT:    lwz 9, 8(3)
482; LE-32BIT-NEXT:    lwz 3, 12(3)
483; LE-32BIT-NEXT:    lwz 4, 12(4)
484; LE-32BIT-NEXT:    stw 6, 44(1)
485; LE-32BIT-NEXT:    stw 6, 40(1)
486; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
487; LE-32BIT-NEXT:    stw 6, 36(1)
488; LE-32BIT-NEXT:    stw 6, 32(1)
489; LE-32BIT-NEXT:    stw 3, 28(1)
490; LE-32BIT-NEXT:    addi 3, 1, 16
491; LE-32BIT-NEXT:    stw 9, 24(1)
492; LE-32BIT-NEXT:    stw 8, 20(1)
493; LE-32BIT-NEXT:    stw 7, 16(1)
494; LE-32BIT-NEXT:    lwzux 3, 4, 3
495; LE-32BIT-NEXT:    lwz 6, 4(4)
496; LE-32BIT-NEXT:    lwz 7, 12(4)
497; LE-32BIT-NEXT:    lwz 4, 8(4)
498; LE-32BIT-NEXT:    stw 3, 0(5)
499; LE-32BIT-NEXT:    stw 4, 8(5)
500; LE-32BIT-NEXT:    stw 7, 12(5)
501; LE-32BIT-NEXT:    stw 6, 4(5)
502; LE-32BIT-NEXT:    addi 1, 1, 48
503; LE-32BIT-NEXT:    blr
504  %src = load i128, ptr %src.ptr, align 1
505  %wordOff = load i128, ptr %wordOff.ptr, align 1
506  %bitOff = shl i128 %wordOff, 5
507  %res = shl i128 %src, %bitOff
508  store i128 %res, ptr %dst, align 1
509  ret void
510}
511
512define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
513; LE-64BIT-LABEL: ashr_16bytes:
514; LE-64BIT:       # %bb.0:
515; LE-64BIT-NEXT:    lwz 4, 0(4)
516; LE-64BIT-NEXT:    ld 6, 8(3)
517; LE-64BIT-NEXT:    ld 3, 0(3)
518; LE-64BIT-NEXT:    slwi 4, 4, 3
519; LE-64BIT-NEXT:    subfic 7, 4, 64
520; LE-64BIT-NEXT:    srd 3, 3, 4
521; LE-64BIT-NEXT:    sld 7, 6, 7
522; LE-64BIT-NEXT:    or 3, 3, 7
523; LE-64BIT-NEXT:    addi 7, 4, -64
524; LE-64BIT-NEXT:    srad 4, 6, 4
525; LE-64BIT-NEXT:    cmpwi 7, 1
526; LE-64BIT-NEXT:    srad 8, 6, 7
527; LE-64BIT-NEXT:    std 4, 8(5)
528; LE-64BIT-NEXT:    isellt 3, 3, 8
529; LE-64BIT-NEXT:    std 3, 0(5)
530; LE-64BIT-NEXT:    blr
531;
532; BE-LABEL: ashr_16bytes:
533; BE:       # %bb.0:
534; BE-NEXT:    lwz 4, 12(4)
535; BE-NEXT:    ld 6, 0(3)
536; BE-NEXT:    slwi 4, 4, 3
537; BE-NEXT:    addi 7, 4, -64
538; BE-NEXT:    cmpwi 7, 1
539; BE-NEXT:    blt 0, .LBB10_2
540; BE-NEXT:  # %bb.1:
541; BE-NEXT:    srad 3, 6, 7
542; BE-NEXT:    b .LBB10_3
543; BE-NEXT:  .LBB10_2:
544; BE-NEXT:    ld 3, 8(3)
545; BE-NEXT:    subfic 7, 4, 64
546; BE-NEXT:    sld 7, 6, 7
547; BE-NEXT:    srd 3, 3, 4
548; BE-NEXT:    or 3, 3, 7
549; BE-NEXT:  .LBB10_3:
550; BE-NEXT:    srad 4, 6, 4
551; BE-NEXT:    std 3, 8(5)
552; BE-NEXT:    std 4, 0(5)
553; BE-NEXT:    blr
554;
555; LE-32BIT-LABEL: ashr_16bytes:
556; LE-32BIT:       # %bb.0:
557; LE-32BIT-NEXT:    stwu 1, -48(1)
558; LE-32BIT-NEXT:    lwz 7, 0(3)
559; LE-32BIT-NEXT:    addi 6, 1, 32
560; LE-32BIT-NEXT:    lwz 8, 4(3)
561; LE-32BIT-NEXT:    lwz 9, 8(3)
562; LE-32BIT-NEXT:    lwz 3, 12(3)
563; LE-32BIT-NEXT:    lwz 4, 12(4)
564; LE-32BIT-NEXT:    stw 3, 44(1)
565; LE-32BIT-NEXT:    srawi 3, 7, 31
566; LE-32BIT-NEXT:    stw 7, 32(1)
567; LE-32BIT-NEXT:    rlwinm 7, 4, 0, 28, 29
568; LE-32BIT-NEXT:    stw 9, 40(1)
569; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
570; LE-32BIT-NEXT:    stw 8, 36(1)
571; LE-32BIT-NEXT:    subfic 9, 4, 32
572; LE-32BIT-NEXT:    stw 3, 28(1)
573; LE-32BIT-NEXT:    stw 3, 24(1)
574; LE-32BIT-NEXT:    stw 3, 20(1)
575; LE-32BIT-NEXT:    stw 3, 16(1)
576; LE-32BIT-NEXT:    sub 3, 6, 7
577; LE-32BIT-NEXT:    lwz 6, 4(3)
578; LE-32BIT-NEXT:    lwz 7, 0(3)
579; LE-32BIT-NEXT:    lwz 8, 12(3)
580; LE-32BIT-NEXT:    srw 10, 6, 4
581; LE-32BIT-NEXT:    lwz 3, 8(3)
582; LE-32BIT-NEXT:    slw 11, 7, 9
583; LE-32BIT-NEXT:    slw 6, 6, 9
584; LE-32BIT-NEXT:    srw 8, 8, 4
585; LE-32BIT-NEXT:    slw 9, 3, 9
586; LE-32BIT-NEXT:    srw 3, 3, 4
587; LE-32BIT-NEXT:    or 3, 6, 3
588; LE-32BIT-NEXT:    stw 3, 8(5)
589; LE-32BIT-NEXT:    or 3, 9, 8
590; LE-32BIT-NEXT:    sraw 4, 7, 4
591; LE-32BIT-NEXT:    stw 3, 12(5)
592; LE-32BIT-NEXT:    or 3, 11, 10
593; LE-32BIT-NEXT:    stw 4, 0(5)
594; LE-32BIT-NEXT:    stw 3, 4(5)
595; LE-32BIT-NEXT:    addi 1, 1, 48
596; LE-32BIT-NEXT:    blr
597  %src = load i128, ptr %src.ptr, align 1
598  %byteOff = load i128, ptr %byteOff.ptr, align 1
599  %bitOff = shl i128 %byteOff, 3
600  %res = ashr i128 %src, %bitOff
601  store i128 %res, ptr %dst, align 1
602  ret void
603}
604
605define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
606; LE-64BIT-LABEL: ashr_16bytes_wordOff:
607; LE-64BIT:       # %bb.0:
608; LE-64BIT-NEXT:    lwz 4, 0(4)
609; LE-64BIT-NEXT:    ld 6, 8(3)
610; LE-64BIT-NEXT:    ld 3, 0(3)
611; LE-64BIT-NEXT:    slwi 4, 4, 5
612; LE-64BIT-NEXT:    subfic 7, 4, 64
613; LE-64BIT-NEXT:    srd 3, 3, 4
614; LE-64BIT-NEXT:    sld 7, 6, 7
615; LE-64BIT-NEXT:    or 3, 3, 7
616; LE-64BIT-NEXT:    addi 7, 4, -64
617; LE-64BIT-NEXT:    srad 4, 6, 4
618; LE-64BIT-NEXT:    cmpwi 7, 1
619; LE-64BIT-NEXT:    srad 8, 6, 7
620; LE-64BIT-NEXT:    std 4, 8(5)
621; LE-64BIT-NEXT:    isellt 3, 3, 8
622; LE-64BIT-NEXT:    std 3, 0(5)
623; LE-64BIT-NEXT:    blr
624;
625; BE-LABEL: ashr_16bytes_wordOff:
626; BE:       # %bb.0:
627; BE-NEXT:    lwz 4, 12(4)
628; BE-NEXT:    ld 6, 0(3)
629; BE-NEXT:    slwi 4, 4, 5
630; BE-NEXT:    addi 7, 4, -64
631; BE-NEXT:    cmpwi 7, 1
632; BE-NEXT:    blt 0, .LBB11_2
633; BE-NEXT:  # %bb.1:
634; BE-NEXT:    srad 3, 6, 7
635; BE-NEXT:    b .LBB11_3
636; BE-NEXT:  .LBB11_2:
637; BE-NEXT:    ld 3, 8(3)
638; BE-NEXT:    subfic 7, 4, 64
639; BE-NEXT:    sld 7, 6, 7
640; BE-NEXT:    srd 3, 3, 4
641; BE-NEXT:    or 3, 3, 7
642; BE-NEXT:  .LBB11_3:
643; BE-NEXT:    srad 4, 6, 4
644; BE-NEXT:    std 3, 8(5)
645; BE-NEXT:    std 4, 0(5)
646; BE-NEXT:    blr
647;
648; LE-32BIT-LABEL: ashr_16bytes_wordOff:
649; LE-32BIT:       # %bb.0:
650; LE-32BIT-NEXT:    stwu 1, -48(1)
651; LE-32BIT-NEXT:    lwz 7, 0(3)
652; LE-32BIT-NEXT:    addi 6, 1, 32
653; LE-32BIT-NEXT:    lwz 8, 4(3)
654; LE-32BIT-NEXT:    lwz 9, 8(3)
655; LE-32BIT-NEXT:    lwz 3, 12(3)
656; LE-32BIT-NEXT:    lwz 4, 12(4)
657; LE-32BIT-NEXT:    stw 3, 44(1)
658; LE-32BIT-NEXT:    srawi 3, 7, 31
659; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 28, 29
660; LE-32BIT-NEXT:    stw 9, 40(1)
661; LE-32BIT-NEXT:    stw 8, 36(1)
662; LE-32BIT-NEXT:    stw 7, 32(1)
663; LE-32BIT-NEXT:    stw 3, 28(1)
664; LE-32BIT-NEXT:    stw 3, 24(1)
665; LE-32BIT-NEXT:    stw 3, 20(1)
666; LE-32BIT-NEXT:    stw 3, 16(1)
667; LE-32BIT-NEXT:    sub 3, 6, 4
668; LE-32BIT-NEXT:    lwz 4, 4(3)
669; LE-32BIT-NEXT:    lwz 6, 0(3)
670; LE-32BIT-NEXT:    lwz 7, 8(3)
671; LE-32BIT-NEXT:    lwz 3, 12(3)
672; LE-32BIT-NEXT:    stw 7, 8(5)
673; LE-32BIT-NEXT:    stw 3, 12(5)
674; LE-32BIT-NEXT:    stw 6, 0(5)
675; LE-32BIT-NEXT:    stw 4, 4(5)
676; LE-32BIT-NEXT:    addi 1, 1, 48
677; LE-32BIT-NEXT:    blr
678  %src = load i128, ptr %src.ptr, align 1
679  %wordOff = load i128, ptr %wordOff.ptr, align 1
680  %bitOff = shl i128 %wordOff, 5
681  %res = ashr i128 %src, %bitOff
682  store i128 %res, ptr %dst, align 1
683  ret void
684}
685
686define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
687; LE-64BIT-LABEL: lshr_32bytes:
688; LE-64BIT:       # %bb.0:
689; LE-64BIT-NEXT:    li 6, 16
690; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
691; LE-64BIT-NEXT:    xxlxor 2, 2, 2
692; LE-64BIT-NEXT:    addi 7, 1, -64
693; LE-64BIT-NEXT:    li 8, 32
694; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
695; LE-64BIT-NEXT:    lwz 3, 0(4)
696; LE-64BIT-NEXT:    li 4, 48
697; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
698; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
699; LE-64BIT-NEXT:    rlwinm 4, 3, 0, 27, 28
700; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
701; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 26, 28
702; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
703; LE-64BIT-NEXT:    ldux 6, 4, 7
704; LE-64BIT-NEXT:    subfic 7, 3, 64
705; LE-64BIT-NEXT:    ld 8, 8(4)
706; LE-64BIT-NEXT:    ld 9, 16(4)
707; LE-64BIT-NEXT:    ld 4, 24(4)
708; LE-64BIT-NEXT:    srd 6, 6, 3
709; LE-64BIT-NEXT:    sld 10, 8, 7
710; LE-64BIT-NEXT:    sld 11, 4, 7
711; LE-64BIT-NEXT:    srd 8, 8, 3
712; LE-64BIT-NEXT:    sld 7, 9, 7
713; LE-64BIT-NEXT:    or 6, 10, 6
714; LE-64BIT-NEXT:    srd 10, 9, 3
715; LE-64BIT-NEXT:    srd 3, 4, 3
716; LE-64BIT-NEXT:    or 7, 7, 8
717; LE-64BIT-NEXT:    std 3, 24(5)
718; LE-64BIT-NEXT:    or 3, 11, 10
719; LE-64BIT-NEXT:    std 7, 8(5)
720; LE-64BIT-NEXT:    std 6, 0(5)
721; LE-64BIT-NEXT:    std 3, 16(5)
722; LE-64BIT-NEXT:    blr
723;
724; BE-LABEL: lshr_32bytes:
725; BE:       # %bb.0:
726; BE-NEXT:    ld 6, 0(3)
727; BE-NEXT:    ld 7, 8(3)
728; BE-NEXT:    ld 8, 16(3)
729; BE-NEXT:    ld 3, 24(3)
730; BE-NEXT:    lwz 4, 28(4)
731; BE-NEXT:    li 9, 0
732; BE-NEXT:    addi 10, 1, -32
733; BE-NEXT:    std 9, -40(1)
734; BE-NEXT:    std 9, -48(1)
735; BE-NEXT:    std 9, -56(1)
736; BE-NEXT:    std 9, -64(1)
737; BE-NEXT:    std 3, -8(1)
738; BE-NEXT:    rlwinm 3, 4, 0, 27, 28
739; BE-NEXT:    neg 3, 3
740; BE-NEXT:    std 8, -16(1)
741; BE-NEXT:    std 7, -24(1)
742; BE-NEXT:    std 6, -32(1)
743; BE-NEXT:    extsw 3, 3
744; BE-NEXT:    ldux 3, 10, 3
745; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
746; BE-NEXT:    subfic 9, 4, 64
747; BE-NEXT:    ld 6, 8(10)
748; BE-NEXT:    ld 7, 24(10)
749; BE-NEXT:    ld 8, 16(10)
750; BE-NEXT:    sld 10, 3, 9
751; BE-NEXT:    srd 3, 3, 4
752; BE-NEXT:    std 3, 0(5)
753; BE-NEXT:    srd 11, 6, 4
754; BE-NEXT:    srd 7, 7, 4
755; BE-NEXT:    sld 6, 6, 9
756; BE-NEXT:    sld 9, 8, 9
757; BE-NEXT:    srd 8, 8, 4
758; BE-NEXT:    or 10, 10, 11
759; BE-NEXT:    or 7, 9, 7
760; BE-NEXT:    or 6, 6, 8
761; BE-NEXT:    std 6, 16(5)
762; BE-NEXT:    std 7, 24(5)
763; BE-NEXT:    std 10, 8(5)
764; BE-NEXT:    blr
765;
766; LE-32BIT-LABEL: lshr_32bytes:
767; LE-32BIT:       # %bb.0:
768; LE-32BIT-NEXT:    stwu 1, -112(1)
769; LE-32BIT-NEXT:    lwz 7, 0(3)
770; LE-32BIT-NEXT:    li 6, 0
771; LE-32BIT-NEXT:    lwz 8, 4(3)
772; LE-32BIT-NEXT:    lwz 9, 8(3)
773; LE-32BIT-NEXT:    lwz 10, 12(3)
774; LE-32BIT-NEXT:    lwz 11, 16(3)
775; LE-32BIT-NEXT:    lwz 12, 20(3)
776; LE-32BIT-NEXT:    lwz 0, 24(3)
777; LE-32BIT-NEXT:    lwz 3, 28(3)
778; LE-32BIT-NEXT:    lwz 4, 28(4)
779; LE-32BIT-NEXT:    stw 6, 44(1)
780; LE-32BIT-NEXT:    stw 6, 40(1)
781; LE-32BIT-NEXT:    stw 6, 36(1)
782; LE-32BIT-NEXT:    stw 6, 32(1)
783; LE-32BIT-NEXT:    stw 6, 28(1)
784; LE-32BIT-NEXT:    stw 6, 24(1)
785; LE-32BIT-NEXT:    stw 6, 20(1)
786; LE-32BIT-NEXT:    stw 6, 16(1)
787; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 27, 29
788; LE-32BIT-NEXT:    stw 3, 76(1)
789; LE-32BIT-NEXT:    addi 3, 1, 48
790; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
791; LE-32BIT-NEXT:    sub 3, 3, 6
792; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
793; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
794; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
795; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
796; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
797; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
798; LE-32BIT-NEXT:    stw 0, 72(1)
799; LE-32BIT-NEXT:    subfic 0, 4, 32
800; LE-32BIT-NEXT:    stw 12, 68(1)
801; LE-32BIT-NEXT:    stw 11, 64(1)
802; LE-32BIT-NEXT:    stw 10, 60(1)
803; LE-32BIT-NEXT:    stw 9, 56(1)
804; LE-32BIT-NEXT:    stw 8, 52(1)
805; LE-32BIT-NEXT:    stw 7, 48(1)
806; LE-32BIT-NEXT:    lwz 6, 4(3)
807; LE-32BIT-NEXT:    lwz 7, 0(3)
808; LE-32BIT-NEXT:    lwz 8, 12(3)
809; LE-32BIT-NEXT:    srw 30, 6, 4
810; LE-32BIT-NEXT:    lwz 9, 8(3)
811; LE-32BIT-NEXT:    slw 29, 7, 0
812; LE-32BIT-NEXT:    lwz 10, 20(3)
813; LE-32BIT-NEXT:    srw 28, 8, 4
814; LE-32BIT-NEXT:    lwz 11, 16(3)
815; LE-32BIT-NEXT:    slw 27, 9, 0
816; LE-32BIT-NEXT:    lwz 12, 28(3)
817; LE-32BIT-NEXT:    slw 6, 6, 0
818; LE-32BIT-NEXT:    lwz 3, 24(3)
819; LE-32BIT-NEXT:    srw 26, 10, 4
820; LE-32BIT-NEXT:    slw 25, 11, 0
821; LE-32BIT-NEXT:    slw 8, 8, 0
822; LE-32BIT-NEXT:    slw 10, 10, 0
823; LE-32BIT-NEXT:    slw 0, 3, 0
824; LE-32BIT-NEXT:    srw 3, 3, 4
825; LE-32BIT-NEXT:    srw 12, 12, 4
826; LE-32BIT-NEXT:    or 3, 10, 3
827; LE-32BIT-NEXT:    srw 11, 11, 4
828; LE-32BIT-NEXT:    stw 3, 24(5)
829; LE-32BIT-NEXT:    or 3, 0, 12
830; LE-32BIT-NEXT:    stw 3, 28(5)
831; LE-32BIT-NEXT:    or 3, 8, 11
832; LE-32BIT-NEXT:    srw 9, 9, 4
833; LE-32BIT-NEXT:    stw 3, 16(5)
834; LE-32BIT-NEXT:    or 3, 25, 26
835; LE-32BIT-NEXT:    stw 3, 20(5)
836; LE-32BIT-NEXT:    or 3, 6, 9
837; LE-32BIT-NEXT:    stw 3, 8(5)
838; LE-32BIT-NEXT:    or 3, 27, 28
839; LE-32BIT-NEXT:    srw 4, 7, 4
840; LE-32BIT-NEXT:    stw 3, 12(5)
841; LE-32BIT-NEXT:    or 3, 29, 30
842; LE-32BIT-NEXT:    stw 4, 0(5)
843; LE-32BIT-NEXT:    stw 3, 4(5)
844; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
845; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
846; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
847; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
848; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
849; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
850; LE-32BIT-NEXT:    addi 1, 1, 112
851; LE-32BIT-NEXT:    blr
852  %src = load i256, ptr %src.ptr, align 1
853  %byteOff = load i256, ptr %byteOff.ptr, align 1
854  %bitOff = shl i256 %byteOff, 3
855  %res = lshr i256 %src, %bitOff
856  store i256 %res, ptr %dst, align 1
857  ret void
858}
859
860define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
861; LE-64BIT-LABEL: lshr_32bytes_wordOff:
862; LE-64BIT:       # %bb.0:
863; LE-64BIT-NEXT:    li 6, 16
864; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
865; LE-64BIT-NEXT:    xxlxor 2, 2, 2
866; LE-64BIT-NEXT:    addi 7, 1, -64
867; LE-64BIT-NEXT:    li 8, 32
868; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
869; LE-64BIT-NEXT:    lwz 3, 0(4)
870; LE-64BIT-NEXT:    li 4, 48
871; LE-64BIT-NEXT:    stxvd2x 2, 7, 8
872; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
873; LE-64BIT-NEXT:    rlwinm 4, 3, 2, 27, 28
874; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
875; LE-64BIT-NEXT:    rlwinm 3, 3, 5, 26, 26
876; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
877; LE-64BIT-NEXT:    ldux 6, 4, 7
878; LE-64BIT-NEXT:    subfic 7, 3, 64
879; LE-64BIT-NEXT:    ld 8, 8(4)
880; LE-64BIT-NEXT:    ld 9, 16(4)
881; LE-64BIT-NEXT:    ld 4, 24(4)
882; LE-64BIT-NEXT:    srd 6, 6, 3
883; LE-64BIT-NEXT:    sld 10, 8, 7
884; LE-64BIT-NEXT:    sld 11, 4, 7
885; LE-64BIT-NEXT:    srd 8, 8, 3
886; LE-64BIT-NEXT:    sld 7, 9, 7
887; LE-64BIT-NEXT:    or 6, 10, 6
888; LE-64BIT-NEXT:    srd 10, 9, 3
889; LE-64BIT-NEXT:    srd 3, 4, 3
890; LE-64BIT-NEXT:    or 7, 7, 8
891; LE-64BIT-NEXT:    std 3, 24(5)
892; LE-64BIT-NEXT:    or 3, 11, 10
893; LE-64BIT-NEXT:    std 7, 8(5)
894; LE-64BIT-NEXT:    std 6, 0(5)
895; LE-64BIT-NEXT:    std 3, 16(5)
896; LE-64BIT-NEXT:    blr
897;
898; BE-LABEL: lshr_32bytes_wordOff:
899; BE:       # %bb.0:
900; BE-NEXT:    ld 6, 0(3)
901; BE-NEXT:    ld 7, 8(3)
902; BE-NEXT:    ld 8, 16(3)
903; BE-NEXT:    ld 3, 24(3)
904; BE-NEXT:    lwz 4, 28(4)
905; BE-NEXT:    li 9, 0
906; BE-NEXT:    addi 10, 1, -32
907; BE-NEXT:    std 9, -40(1)
908; BE-NEXT:    std 9, -48(1)
909; BE-NEXT:    std 9, -56(1)
910; BE-NEXT:    std 9, -64(1)
911; BE-NEXT:    std 3, -8(1)
912; BE-NEXT:    rlwinm 3, 4, 2, 27, 28
913; BE-NEXT:    neg 3, 3
914; BE-NEXT:    std 8, -16(1)
915; BE-NEXT:    std 7, -24(1)
916; BE-NEXT:    std 6, -32(1)
917; BE-NEXT:    extsw 3, 3
918; BE-NEXT:    ldux 3, 10, 3
919; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
920; BE-NEXT:    subfic 9, 4, 64
921; BE-NEXT:    ld 6, 8(10)
922; BE-NEXT:    ld 7, 24(10)
923; BE-NEXT:    ld 8, 16(10)
924; BE-NEXT:    sld 10, 3, 9
925; BE-NEXT:    srd 3, 3, 4
926; BE-NEXT:    std 3, 0(5)
927; BE-NEXT:    srd 11, 6, 4
928; BE-NEXT:    srd 7, 7, 4
929; BE-NEXT:    sld 6, 6, 9
930; BE-NEXT:    sld 9, 8, 9
931; BE-NEXT:    srd 8, 8, 4
932; BE-NEXT:    or 10, 10, 11
933; BE-NEXT:    or 7, 9, 7
934; BE-NEXT:    or 6, 6, 8
935; BE-NEXT:    std 6, 16(5)
936; BE-NEXT:    std 7, 24(5)
937; BE-NEXT:    std 10, 8(5)
938; BE-NEXT:    blr
939;
940; LE-32BIT-LABEL: lshr_32bytes_wordOff:
941; LE-32BIT:       # %bb.0:
942; LE-32BIT-NEXT:    stwu 1, -80(1)
943; LE-32BIT-NEXT:    lwz 7, 0(3)
944; LE-32BIT-NEXT:    li 6, 0
945; LE-32BIT-NEXT:    lwz 8, 4(3)
946; LE-32BIT-NEXT:    lwz 9, 8(3)
947; LE-32BIT-NEXT:    lwz 10, 12(3)
948; LE-32BIT-NEXT:    lwz 11, 16(3)
949; LE-32BIT-NEXT:    lwz 12, 20(3)
950; LE-32BIT-NEXT:    lwz 0, 24(3)
951; LE-32BIT-NEXT:    lwz 3, 28(3)
952; LE-32BIT-NEXT:    lwz 4, 28(4)
953; LE-32BIT-NEXT:    stw 3, 76(1)
954; LE-32BIT-NEXT:    addi 3, 1, 48
955; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
956; LE-32BIT-NEXT:    stw 6, 44(1)
957; LE-32BIT-NEXT:    sub 3, 3, 4
958; LE-32BIT-NEXT:    stw 6, 40(1)
959; LE-32BIT-NEXT:    stw 6, 36(1)
960; LE-32BIT-NEXT:    stw 6, 32(1)
961; LE-32BIT-NEXT:    stw 6, 28(1)
962; LE-32BIT-NEXT:    stw 6, 24(1)
963; LE-32BIT-NEXT:    stw 6, 20(1)
964; LE-32BIT-NEXT:    stw 6, 16(1)
965; LE-32BIT-NEXT:    stw 0, 72(1)
966; LE-32BIT-NEXT:    stw 12, 68(1)
967; LE-32BIT-NEXT:    stw 11, 64(1)
968; LE-32BIT-NEXT:    stw 10, 60(1)
969; LE-32BIT-NEXT:    stw 9, 56(1)
970; LE-32BIT-NEXT:    stw 8, 52(1)
971; LE-32BIT-NEXT:    stw 7, 48(1)
972; LE-32BIT-NEXT:    lwz 4, 4(3)
973; LE-32BIT-NEXT:    lwz 6, 0(3)
974; LE-32BIT-NEXT:    lwz 7, 12(3)
975; LE-32BIT-NEXT:    lwz 8, 8(3)
976; LE-32BIT-NEXT:    lwz 9, 20(3)
977; LE-32BIT-NEXT:    lwz 10, 16(3)
978; LE-32BIT-NEXT:    lwz 11, 24(3)
979; LE-32BIT-NEXT:    lwz 3, 28(3)
980; LE-32BIT-NEXT:    stw 11, 24(5)
981; LE-32BIT-NEXT:    stw 3, 28(5)
982; LE-32BIT-NEXT:    stw 10, 16(5)
983; LE-32BIT-NEXT:    stw 9, 20(5)
984; LE-32BIT-NEXT:    stw 8, 8(5)
985; LE-32BIT-NEXT:    stw 7, 12(5)
986; LE-32BIT-NEXT:    stw 6, 0(5)
987; LE-32BIT-NEXT:    stw 4, 4(5)
988; LE-32BIT-NEXT:    addi 1, 1, 80
989; LE-32BIT-NEXT:    blr
990  %src = load i256, ptr %src.ptr, align 1
991  %wordOff = load i256, ptr %wordOff.ptr, align 1
992  %bitOff = shl i256 %wordOff, 5
993  %res = lshr i256 %src, %bitOff
994  store i256 %res, ptr %dst, align 1
995  ret void
996}
997
998define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
999; LE-64BIT-LABEL: lshr_32bytes_dwordOff:
1000; LE-64BIT:       # %bb.0:
1001; LE-64BIT-NEXT:    li 6, 16
1002; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
1003; LE-64BIT-NEXT:    xxlxor 2, 2, 2
1004; LE-64BIT-NEXT:    addi 7, 1, -64
1005; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
1006; LE-64BIT-NEXT:    lwz 3, 0(4)
1007; LE-64BIT-NEXT:    li 4, 48
1008; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
1009; LE-64BIT-NEXT:    li 4, 32
1010; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
1011; LE-64BIT-NEXT:    stxvd2x 1, 0, 7
1012; LE-64BIT-NEXT:    stxvd2x 2, 7, 4
1013; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
1014; LE-64BIT-NEXT:    lxvd2x 0, 7, 3
1015; LE-64BIT-NEXT:    add 3, 7, 3
1016; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
1017; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
1018; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
1019; LE-64BIT-NEXT:    blr
1020;
1021; BE-LABEL: lshr_32bytes_dwordOff:
1022; BE:       # %bb.0:
1023; BE-NEXT:    ld 7, 0(3)
1024; BE-NEXT:    ld 8, 8(3)
1025; BE-NEXT:    ld 9, 16(3)
1026; BE-NEXT:    ld 3, 24(3)
1027; BE-NEXT:    lwz 4, 28(4)
1028; BE-NEXT:    li 6, 0
1029; BE-NEXT:    std 6, -40(1)
1030; BE-NEXT:    std 6, -48(1)
1031; BE-NEXT:    std 6, -56(1)
1032; BE-NEXT:    std 6, -64(1)
1033; BE-NEXT:    std 3, -8(1)
1034; BE-NEXT:    rlwinm 3, 4, 3, 27, 28
1035; BE-NEXT:    neg 3, 3
1036; BE-NEXT:    std 9, -16(1)
1037; BE-NEXT:    std 8, -24(1)
1038; BE-NEXT:    std 7, -32(1)
1039; BE-NEXT:    extsw 3, 3
1040; BE-NEXT:    addi 4, 1, -32
1041; BE-NEXT:    ldux 3, 4, 3
1042; BE-NEXT:    ld 6, 8(4)
1043; BE-NEXT:    ld 7, 24(4)
1044; BE-NEXT:    ld 4, 16(4)
1045; BE-NEXT:    std 3, 0(5)
1046; BE-NEXT:    std 4, 16(5)
1047; BE-NEXT:    std 7, 24(5)
1048; BE-NEXT:    std 6, 8(5)
1049; BE-NEXT:    blr
1050;
1051; LE-32BIT-LABEL: lshr_32bytes_dwordOff:
1052; LE-32BIT:       # %bb.0:
1053; LE-32BIT-NEXT:    stwu 1, -80(1)
1054; LE-32BIT-NEXT:    lwz 7, 0(3)
1055; LE-32BIT-NEXT:    li 6, 0
1056; LE-32BIT-NEXT:    lwz 8, 4(3)
1057; LE-32BIT-NEXT:    lwz 9, 8(3)
1058; LE-32BIT-NEXT:    lwz 10, 12(3)
1059; LE-32BIT-NEXT:    lwz 11, 16(3)
1060; LE-32BIT-NEXT:    lwz 12, 20(3)
1061; LE-32BIT-NEXT:    lwz 0, 24(3)
1062; LE-32BIT-NEXT:    lwz 3, 28(3)
1063; LE-32BIT-NEXT:    lwz 4, 28(4)
1064; LE-32BIT-NEXT:    stw 3, 76(1)
1065; LE-32BIT-NEXT:    addi 3, 1, 48
1066; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
1067; LE-32BIT-NEXT:    stw 6, 44(1)
1068; LE-32BIT-NEXT:    sub 3, 3, 4
1069; LE-32BIT-NEXT:    stw 6, 40(1)
1070; LE-32BIT-NEXT:    stw 6, 36(1)
1071; LE-32BIT-NEXT:    stw 6, 32(1)
1072; LE-32BIT-NEXT:    stw 6, 28(1)
1073; LE-32BIT-NEXT:    stw 6, 24(1)
1074; LE-32BIT-NEXT:    stw 6, 20(1)
1075; LE-32BIT-NEXT:    stw 6, 16(1)
1076; LE-32BIT-NEXT:    stw 0, 72(1)
1077; LE-32BIT-NEXT:    stw 12, 68(1)
1078; LE-32BIT-NEXT:    stw 11, 64(1)
1079; LE-32BIT-NEXT:    stw 10, 60(1)
1080; LE-32BIT-NEXT:    stw 9, 56(1)
1081; LE-32BIT-NEXT:    stw 8, 52(1)
1082; LE-32BIT-NEXT:    stw 7, 48(1)
1083; LE-32BIT-NEXT:    lwz 4, 4(3)
1084; LE-32BIT-NEXT:    lwz 6, 0(3)
1085; LE-32BIT-NEXT:    lwz 7, 12(3)
1086; LE-32BIT-NEXT:    lwz 8, 8(3)
1087; LE-32BIT-NEXT:    lwz 9, 20(3)
1088; LE-32BIT-NEXT:    lwz 10, 16(3)
1089; LE-32BIT-NEXT:    lwz 11, 24(3)
1090; LE-32BIT-NEXT:    lwz 3, 28(3)
1091; LE-32BIT-NEXT:    stw 11, 24(5)
1092; LE-32BIT-NEXT:    stw 3, 28(5)
1093; LE-32BIT-NEXT:    stw 10, 16(5)
1094; LE-32BIT-NEXT:    stw 9, 20(5)
1095; LE-32BIT-NEXT:    stw 8, 8(5)
1096; LE-32BIT-NEXT:    stw 7, 12(5)
1097; LE-32BIT-NEXT:    stw 6, 0(5)
1098; LE-32BIT-NEXT:    stw 4, 4(5)
1099; LE-32BIT-NEXT:    addi 1, 1, 80
1100; LE-32BIT-NEXT:    blr
1101  %src = load i256, ptr %src.ptr, align 1
1102  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
1103  %bitOff = shl i256 %dwordOff, 6
1104  %res = lshr i256 %src, %bitOff
1105  store i256 %res, ptr %dst, align 1
1106  ret void
1107}
1108
1109define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
1110; LE-64BIT-LABEL: shl_32bytes:
1111; LE-64BIT:       # %bb.0:
1112; LE-64BIT-NEXT:    li 6, 16
1113; LE-64BIT-NEXT:    lwz 4, 0(4)
1114; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
1115; LE-64BIT-NEXT:    addi 7, 1, -64
1116; LE-64BIT-NEXT:    li 8, 48
1117; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
1118; LE-64BIT-NEXT:    rlwinm 3, 4, 0, 27, 28
1119; LE-64BIT-NEXT:    rlwinm 4, 4, 3, 26, 28
1120; LE-64BIT-NEXT:    neg 3, 3
1121; LE-64BIT-NEXT:    stxvd2x 0, 7, 8
1122; LE-64BIT-NEXT:    xxlxor 0, 0, 0
1123; LE-64BIT-NEXT:    li 8, 32
1124; LE-64BIT-NEXT:    extsw 3, 3
1125; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
1126; LE-64BIT-NEXT:    addi 6, 1, -32
1127; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
1128; LE-64BIT-NEXT:    stxvd2x 1, 7, 8
1129; LE-64BIT-NEXT:    subfic 7, 4, 64
1130; LE-64BIT-NEXT:    ldux 3, 6, 3
1131; LE-64BIT-NEXT:    ld 8, 16(6)
1132; LE-64BIT-NEXT:    ld 9, 24(6)
1133; LE-64BIT-NEXT:    ld 6, 8(6)
1134; LE-64BIT-NEXT:    srd 10, 8, 7
1135; LE-64BIT-NEXT:    sld 9, 9, 4
1136; LE-64BIT-NEXT:    sld 8, 8, 4
1137; LE-64BIT-NEXT:    or 9, 9, 10
1138; LE-64BIT-NEXT:    srd 10, 6, 7
1139; LE-64BIT-NEXT:    srd 7, 3, 7
1140; LE-64BIT-NEXT:    sld 6, 6, 4
1141; LE-64BIT-NEXT:    sld 3, 3, 4
1142; LE-64BIT-NEXT:    or 6, 6, 7
1143; LE-64BIT-NEXT:    std 3, 0(5)
1144; LE-64BIT-NEXT:    or 3, 8, 10
1145; LE-64BIT-NEXT:    std 9, 24(5)
1146; LE-64BIT-NEXT:    std 6, 8(5)
1147; LE-64BIT-NEXT:    std 3, 16(5)
1148; LE-64BIT-NEXT:    blr
1149;
1150; BE-LABEL: shl_32bytes:
1151; BE:       # %bb.0:
1152; BE-NEXT:    ld 6, 0(3)
1153; BE-NEXT:    ld 7, 8(3)
1154; BE-NEXT:    ld 8, 16(3)
1155; BE-NEXT:    ld 3, 24(3)
1156; BE-NEXT:    lwz 4, 28(4)
1157; BE-NEXT:    li 9, 0
1158; BE-NEXT:    addi 10, 1, -64
1159; BE-NEXT:    std 9, -8(1)
1160; BE-NEXT:    std 9, -16(1)
1161; BE-NEXT:    std 9, -24(1)
1162; BE-NEXT:    std 9, -32(1)
1163; BE-NEXT:    std 3, -40(1)
1164; BE-NEXT:    std 8, -48(1)
1165; BE-NEXT:    std 7, -56(1)
1166; BE-NEXT:    std 6, -64(1)
1167; BE-NEXT:    rlwinm 3, 4, 0, 27, 28
1168; BE-NEXT:    ldux 6, 3, 10
1169; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
1170; BE-NEXT:    subfic 9, 4, 64
1171; BE-NEXT:    ld 7, 16(3)
1172; BE-NEXT:    ld 8, 8(3)
1173; BE-NEXT:    ld 3, 24(3)
1174; BE-NEXT:    sld 6, 6, 4
1175; BE-NEXT:    srd 10, 7, 9
1176; BE-NEXT:    sld 11, 8, 4
1177; BE-NEXT:    srd 8, 8, 9
1178; BE-NEXT:    srd 9, 3, 9
1179; BE-NEXT:    sld 7, 7, 4
1180; BE-NEXT:    sld 3, 3, 4
1181; BE-NEXT:    or 10, 11, 10
1182; BE-NEXT:    or 6, 6, 8
1183; BE-NEXT:    or 7, 7, 9
1184; BE-NEXT:    std 3, 24(5)
1185; BE-NEXT:    std 7, 16(5)
1186; BE-NEXT:    std 6, 0(5)
1187; BE-NEXT:    std 10, 8(5)
1188; BE-NEXT:    blr
1189;
1190; LE-32BIT-LABEL: shl_32bytes:
1191; LE-32BIT:       # %bb.0:
1192; LE-32BIT-NEXT:    stwu 1, -112(1)
1193; LE-32BIT-NEXT:    lwz 7, 0(3)
1194; LE-32BIT-NEXT:    li 6, 0
1195; LE-32BIT-NEXT:    lwz 8, 4(3)
1196; LE-32BIT-NEXT:    lwz 9, 8(3)
1197; LE-32BIT-NEXT:    lwz 10, 12(3)
1198; LE-32BIT-NEXT:    lwz 11, 16(3)
1199; LE-32BIT-NEXT:    lwz 12, 20(3)
1200; LE-32BIT-NEXT:    lwz 0, 24(3)
1201; LE-32BIT-NEXT:    lwz 3, 28(3)
1202; LE-32BIT-NEXT:    lwz 4, 28(4)
1203; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
1204; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
1205; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
1206; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
1207; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
1208; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
1209; LE-32BIT-NEXT:    stw 6, 76(1)
1210; LE-32BIT-NEXT:    stw 6, 72(1)
1211; LE-32BIT-NEXT:    stw 6, 68(1)
1212; LE-32BIT-NEXT:    stw 6, 64(1)
1213; LE-32BIT-NEXT:    stw 6, 60(1)
1214; LE-32BIT-NEXT:    stw 6, 56(1)
1215; LE-32BIT-NEXT:    stw 6, 52(1)
1216; LE-32BIT-NEXT:    stw 6, 48(1)
1217; LE-32BIT-NEXT:    rlwinm 6, 4, 0, 27, 29
1218; LE-32BIT-NEXT:    stw 3, 44(1)
1219; LE-32BIT-NEXT:    addi 3, 1, 16
1220; LE-32BIT-NEXT:    stw 0, 40(1)
1221; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
1222; LE-32BIT-NEXT:    stw 12, 36(1)
1223; LE-32BIT-NEXT:    subfic 12, 4, 32
1224; LE-32BIT-NEXT:    stw 11, 32(1)
1225; LE-32BIT-NEXT:    stw 10, 28(1)
1226; LE-32BIT-NEXT:    stw 9, 24(1)
1227; LE-32BIT-NEXT:    stw 8, 20(1)
1228; LE-32BIT-NEXT:    stw 7, 16(1)
1229; LE-32BIT-NEXT:    lwzux 3, 6, 3
1230; LE-32BIT-NEXT:    lwz 7, 8(6)
1231; LE-32BIT-NEXT:    slw 3, 3, 4
1232; LE-32BIT-NEXT:    lwz 8, 4(6)
1233; LE-32BIT-NEXT:    lwz 9, 16(6)
1234; LE-32BIT-NEXT:    srw 30, 7, 12
1235; LE-32BIT-NEXT:    lwz 10, 12(6)
1236; LE-32BIT-NEXT:    slw 29, 8, 4
1237; LE-32BIT-NEXT:    lwz 11, 24(6)
1238; LE-32BIT-NEXT:    srw 8, 8, 12
1239; LE-32BIT-NEXT:    lwz 0, 20(6)
1240; LE-32BIT-NEXT:    srw 28, 9, 12
1241; LE-32BIT-NEXT:    lwz 6, 28(6)
1242; LE-32BIT-NEXT:    slw 27, 10, 4
1243; LE-32BIT-NEXT:    srw 10, 10, 12
1244; LE-32BIT-NEXT:    slw 7, 7, 4
1245; LE-32BIT-NEXT:    srw 26, 11, 12
1246; LE-32BIT-NEXT:    slw 25, 0, 4
1247; LE-32BIT-NEXT:    srw 0, 0, 12
1248; LE-32BIT-NEXT:    slw 9, 9, 4
1249; LE-32BIT-NEXT:    srw 12, 6, 12
1250; LE-32BIT-NEXT:    slw 11, 11, 4
1251; LE-32BIT-NEXT:    slw 4, 6, 4
1252; LE-32BIT-NEXT:    stw 4, 28(5)
1253; LE-32BIT-NEXT:    or 4, 11, 12
1254; LE-32BIT-NEXT:    stw 4, 24(5)
1255; LE-32BIT-NEXT:    or 4, 9, 0
1256; LE-32BIT-NEXT:    stw 4, 16(5)
1257; LE-32BIT-NEXT:    or 4, 25, 26
1258; LE-32BIT-NEXT:    stw 4, 20(5)
1259; LE-32BIT-NEXT:    or 4, 7, 10
1260; LE-32BIT-NEXT:    or 3, 3, 8
1261; LE-32BIT-NEXT:    stw 4, 8(5)
1262; LE-32BIT-NEXT:    or 4, 27, 28
1263; LE-32BIT-NEXT:    stw 3, 0(5)
1264; LE-32BIT-NEXT:    or 3, 29, 30
1265; LE-32BIT-NEXT:    stw 4, 12(5)
1266; LE-32BIT-NEXT:    stw 3, 4(5)
1267; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
1268; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
1269; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
1270; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
1271; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
1272; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
1273; LE-32BIT-NEXT:    addi 1, 1, 112
1274; LE-32BIT-NEXT:    blr
1275  %src = load i256, ptr %src.ptr, align 1
1276  %byteOff = load i256, ptr %byteOff.ptr, align 1
1277  %bitOff = shl i256 %byteOff, 3
1278  %res = shl i256 %src, %bitOff
1279  store i256 %res, ptr %dst, align 1
1280  ret void
1281}
1282
1283define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
1284; LE-64BIT-LABEL: shl_32bytes_wordOff:
1285; LE-64BIT:       # %bb.0:
1286; LE-64BIT-NEXT:    li 6, 16
1287; LE-64BIT-NEXT:    lwz 4, 0(4)
1288; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
1289; LE-64BIT-NEXT:    addi 7, 1, -64
1290; LE-64BIT-NEXT:    li 8, 48
1291; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
1292; LE-64BIT-NEXT:    rlwinm 3, 4, 2, 27, 28
1293; LE-64BIT-NEXT:    rlwinm 4, 4, 5, 26, 26
1294; LE-64BIT-NEXT:    neg 3, 3
1295; LE-64BIT-NEXT:    stxvd2x 0, 7, 8
1296; LE-64BIT-NEXT:    xxlxor 0, 0, 0
1297; LE-64BIT-NEXT:    li 8, 32
1298; LE-64BIT-NEXT:    extsw 3, 3
1299; LE-64BIT-NEXT:    stxvd2x 0, 7, 6
1300; LE-64BIT-NEXT:    addi 6, 1, -32
1301; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
1302; LE-64BIT-NEXT:    stxvd2x 1, 7, 8
1303; LE-64BIT-NEXT:    subfic 7, 4, 64
1304; LE-64BIT-NEXT:    ldux 3, 6, 3
1305; LE-64BIT-NEXT:    ld 8, 16(6)
1306; LE-64BIT-NEXT:    ld 9, 24(6)
1307; LE-64BIT-NEXT:    ld 6, 8(6)
1308; LE-64BIT-NEXT:    srd 10, 8, 7
1309; LE-64BIT-NEXT:    sld 9, 9, 4
1310; LE-64BIT-NEXT:    sld 8, 8, 4
1311; LE-64BIT-NEXT:    or 9, 9, 10
1312; LE-64BIT-NEXT:    srd 10, 6, 7
1313; LE-64BIT-NEXT:    srd 7, 3, 7
1314; LE-64BIT-NEXT:    sld 6, 6, 4
1315; LE-64BIT-NEXT:    sld 3, 3, 4
1316; LE-64BIT-NEXT:    or 6, 6, 7
1317; LE-64BIT-NEXT:    std 3, 0(5)
1318; LE-64BIT-NEXT:    or 3, 8, 10
1319; LE-64BIT-NEXT:    std 9, 24(5)
1320; LE-64BIT-NEXT:    std 6, 8(5)
1321; LE-64BIT-NEXT:    std 3, 16(5)
1322; LE-64BIT-NEXT:    blr
1323;
1324; BE-LABEL: shl_32bytes_wordOff:
1325; BE:       # %bb.0:
1326; BE-NEXT:    ld 6, 0(3)
1327; BE-NEXT:    ld 7, 8(3)
1328; BE-NEXT:    ld 8, 16(3)
1329; BE-NEXT:    ld 3, 24(3)
1330; BE-NEXT:    lwz 4, 28(4)
1331; BE-NEXT:    li 9, 0
1332; BE-NEXT:    addi 10, 1, -64
1333; BE-NEXT:    std 9, -8(1)
1334; BE-NEXT:    std 9, -16(1)
1335; BE-NEXT:    std 9, -24(1)
1336; BE-NEXT:    std 9, -32(1)
1337; BE-NEXT:    std 3, -40(1)
1338; BE-NEXT:    std 8, -48(1)
1339; BE-NEXT:    std 7, -56(1)
1340; BE-NEXT:    std 6, -64(1)
1341; BE-NEXT:    rlwinm 3, 4, 2, 27, 28
1342; BE-NEXT:    ldux 6, 3, 10
1343; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
1344; BE-NEXT:    subfic 9, 4, 64
1345; BE-NEXT:    ld 7, 16(3)
1346; BE-NEXT:    ld 8, 8(3)
1347; BE-NEXT:    ld 3, 24(3)
1348; BE-NEXT:    sld 6, 6, 4
1349; BE-NEXT:    srd 10, 7, 9
1350; BE-NEXT:    sld 11, 8, 4
1351; BE-NEXT:    srd 8, 8, 9
1352; BE-NEXT:    srd 9, 3, 9
1353; BE-NEXT:    sld 7, 7, 4
1354; BE-NEXT:    sld 3, 3, 4
1355; BE-NEXT:    or 10, 11, 10
1356; BE-NEXT:    or 6, 6, 8
1357; BE-NEXT:    or 7, 7, 9
1358; BE-NEXT:    std 3, 24(5)
1359; BE-NEXT:    std 7, 16(5)
1360; BE-NEXT:    std 6, 0(5)
1361; BE-NEXT:    std 10, 8(5)
1362; BE-NEXT:    blr
1363;
1364; LE-32BIT-LABEL: shl_32bytes_wordOff:
1365; LE-32BIT:       # %bb.0:
1366; LE-32BIT-NEXT:    stwu 1, -80(1)
1367; LE-32BIT-NEXT:    lwz 7, 0(3)
1368; LE-32BIT-NEXT:    li 6, 0
1369; LE-32BIT-NEXT:    lwz 8, 4(3)
1370; LE-32BIT-NEXT:    lwz 9, 8(3)
1371; LE-32BIT-NEXT:    lwz 10, 12(3)
1372; LE-32BIT-NEXT:    lwz 11, 16(3)
1373; LE-32BIT-NEXT:    lwz 12, 20(3)
1374; LE-32BIT-NEXT:    lwz 0, 24(3)
1375; LE-32BIT-NEXT:    lwz 3, 28(3)
1376; LE-32BIT-NEXT:    lwz 4, 28(4)
1377; LE-32BIT-NEXT:    stw 6, 76(1)
1378; LE-32BIT-NEXT:    stw 6, 72(1)
1379; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
1380; LE-32BIT-NEXT:    stw 6, 68(1)
1381; LE-32BIT-NEXT:    stw 6, 64(1)
1382; LE-32BIT-NEXT:    stw 6, 60(1)
1383; LE-32BIT-NEXT:    stw 6, 56(1)
1384; LE-32BIT-NEXT:    stw 6, 52(1)
1385; LE-32BIT-NEXT:    stw 6, 48(1)
1386; LE-32BIT-NEXT:    stw 3, 44(1)
1387; LE-32BIT-NEXT:    addi 3, 1, 16
1388; LE-32BIT-NEXT:    stw 0, 40(1)
1389; LE-32BIT-NEXT:    stw 12, 36(1)
1390; LE-32BIT-NEXT:    stw 11, 32(1)
1391; LE-32BIT-NEXT:    stw 10, 28(1)
1392; LE-32BIT-NEXT:    stw 9, 24(1)
1393; LE-32BIT-NEXT:    stw 8, 20(1)
1394; LE-32BIT-NEXT:    stw 7, 16(1)
1395; LE-32BIT-NEXT:    lwzux 3, 4, 3
1396; LE-32BIT-NEXT:    lwz 6, 4(4)
1397; LE-32BIT-NEXT:    lwz 7, 12(4)
1398; LE-32BIT-NEXT:    lwz 8, 8(4)
1399; LE-32BIT-NEXT:    lwz 9, 20(4)
1400; LE-32BIT-NEXT:    lwz 10, 16(4)
1401; LE-32BIT-NEXT:    lwz 11, 28(4)
1402; LE-32BIT-NEXT:    lwz 4, 24(4)
1403; LE-32BIT-NEXT:    stw 3, 0(5)
1404; LE-32BIT-NEXT:    stw 4, 24(5)
1405; LE-32BIT-NEXT:    stw 11, 28(5)
1406; LE-32BIT-NEXT:    stw 10, 16(5)
1407; LE-32BIT-NEXT:    stw 9, 20(5)
1408; LE-32BIT-NEXT:    stw 8, 8(5)
1409; LE-32BIT-NEXT:    stw 7, 12(5)
1410; LE-32BIT-NEXT:    stw 6, 4(5)
1411; LE-32BIT-NEXT:    addi 1, 1, 80
1412; LE-32BIT-NEXT:    blr
1413  %src = load i256, ptr %src.ptr, align 1
1414  %wordOff = load i256, ptr %wordOff.ptr, align 1
1415  %bitOff = shl i256 %wordOff, 5
1416  %res = shl i256 %src, %bitOff
1417  store i256 %res, ptr %dst, align 1
1418  ret void
1419}
1420
1421define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
1422; LE-64BIT-LABEL: shl_32bytes_dwordOff:
1423; LE-64BIT:       # %bb.0:
1424; LE-64BIT-NEXT:    li 6, 16
1425; LE-64BIT-NEXT:    lxvd2x 1, 0, 3
1426; LE-64BIT-NEXT:    li 7, 48
1427; LE-64BIT-NEXT:    xxlxor 2, 2, 2
1428; LE-64BIT-NEXT:    lxvd2x 0, 3, 6
1429; LE-64BIT-NEXT:    lwz 3, 0(4)
1430; LE-64BIT-NEXT:    addi 4, 1, -64
1431; LE-64BIT-NEXT:    stxvd2x 2, 4, 6
1432; LE-64BIT-NEXT:    stxvd2x 2, 0, 4
1433; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
1434; LE-64BIT-NEXT:    stxvd2x 0, 4, 7
1435; LE-64BIT-NEXT:    li 7, 32
1436; LE-64BIT-NEXT:    neg 3, 3
1437; LE-64BIT-NEXT:    stxvd2x 1, 4, 7
1438; LE-64BIT-NEXT:    extsw 3, 3
1439; LE-64BIT-NEXT:    addi 4, 1, -32
1440; LE-64BIT-NEXT:    lxvd2x 0, 4, 3
1441; LE-64BIT-NEXT:    add 3, 4, 3
1442; LE-64BIT-NEXT:    lxvd2x 1, 3, 6
1443; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
1444; LE-64BIT-NEXT:    stxvd2x 1, 5, 6
1445; LE-64BIT-NEXT:    blr
1446;
1447; BE-LABEL: shl_32bytes_dwordOff:
1448; BE:       # %bb.0:
1449; BE-NEXT:    ld 7, 0(3)
1450; BE-NEXT:    ld 8, 8(3)
1451; BE-NEXT:    ld 9, 16(3)
1452; BE-NEXT:    ld 3, 24(3)
1453; BE-NEXT:    lwz 4, 28(4)
1454; BE-NEXT:    li 6, 0
1455; BE-NEXT:    std 6, -8(1)
1456; BE-NEXT:    std 6, -16(1)
1457; BE-NEXT:    std 6, -24(1)
1458; BE-NEXT:    std 6, -32(1)
1459; BE-NEXT:    std 3, -40(1)
1460; BE-NEXT:    std 9, -48(1)
1461; BE-NEXT:    std 8, -56(1)
1462; BE-NEXT:    std 7, -64(1)
1463; BE-NEXT:    rlwinm 3, 4, 3, 27, 28
1464; BE-NEXT:    addi 4, 1, -64
1465; BE-NEXT:    ldux 4, 3, 4
1466; BE-NEXT:    ld 6, 8(3)
1467; BE-NEXT:    ld 7, 24(3)
1468; BE-NEXT:    ld 3, 16(3)
1469; BE-NEXT:    std 4, 0(5)
1470; BE-NEXT:    std 3, 16(5)
1471; BE-NEXT:    std 7, 24(5)
1472; BE-NEXT:    std 6, 8(5)
1473; BE-NEXT:    blr
1474;
1475; LE-32BIT-LABEL: shl_32bytes_dwordOff:
1476; LE-32BIT:       # %bb.0:
1477; LE-32BIT-NEXT:    stwu 1, -80(1)
1478; LE-32BIT-NEXT:    lwz 7, 0(3)
1479; LE-32BIT-NEXT:    li 6, 0
1480; LE-32BIT-NEXT:    lwz 8, 4(3)
1481; LE-32BIT-NEXT:    lwz 9, 8(3)
1482; LE-32BIT-NEXT:    lwz 10, 12(3)
1483; LE-32BIT-NEXT:    lwz 11, 16(3)
1484; LE-32BIT-NEXT:    lwz 12, 20(3)
1485; LE-32BIT-NEXT:    lwz 0, 24(3)
1486; LE-32BIT-NEXT:    lwz 3, 28(3)
1487; LE-32BIT-NEXT:    lwz 4, 28(4)
1488; LE-32BIT-NEXT:    stw 6, 76(1)
1489; LE-32BIT-NEXT:    stw 6, 72(1)
1490; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
1491; LE-32BIT-NEXT:    stw 6, 68(1)
1492; LE-32BIT-NEXT:    stw 6, 64(1)
1493; LE-32BIT-NEXT:    stw 6, 60(1)
1494; LE-32BIT-NEXT:    stw 6, 56(1)
1495; LE-32BIT-NEXT:    stw 6, 52(1)
1496; LE-32BIT-NEXT:    stw 6, 48(1)
1497; LE-32BIT-NEXT:    stw 3, 44(1)
1498; LE-32BIT-NEXT:    addi 3, 1, 16
1499; LE-32BIT-NEXT:    stw 0, 40(1)
1500; LE-32BIT-NEXT:    stw 12, 36(1)
1501; LE-32BIT-NEXT:    stw 11, 32(1)
1502; LE-32BIT-NEXT:    stw 10, 28(1)
1503; LE-32BIT-NEXT:    stw 9, 24(1)
1504; LE-32BIT-NEXT:    stw 8, 20(1)
1505; LE-32BIT-NEXT:    stw 7, 16(1)
1506; LE-32BIT-NEXT:    lwzux 3, 4, 3
1507; LE-32BIT-NEXT:    lwz 6, 12(4)
1508; LE-32BIT-NEXT:    lwz 7, 8(4)
1509; LE-32BIT-NEXT:    lwz 8, 20(4)
1510; LE-32BIT-NEXT:    lwz 9, 16(4)
1511; LE-32BIT-NEXT:    lwz 10, 28(4)
1512; LE-32BIT-NEXT:    lwz 11, 24(4)
1513; LE-32BIT-NEXT:    ori 4, 4, 4
1514; LE-32BIT-NEXT:    lwz 4, 0(4)
1515; LE-32BIT-NEXT:    stw 3, 0(5)
1516; LE-32BIT-NEXT:    stw 11, 24(5)
1517; LE-32BIT-NEXT:    stw 10, 28(5)
1518; LE-32BIT-NEXT:    stw 9, 16(5)
1519; LE-32BIT-NEXT:    stw 8, 20(5)
1520; LE-32BIT-NEXT:    stw 7, 8(5)
1521; LE-32BIT-NEXT:    stw 6, 12(5)
1522; LE-32BIT-NEXT:    stw 4, 4(5)
1523; LE-32BIT-NEXT:    addi 1, 1, 80
1524; LE-32BIT-NEXT:    blr
1525  %src = load i256, ptr %src.ptr, align 1
1526  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
1527  %bitOff = shl i256 %dwordOff, 6
1528  %res = shl i256 %src, %bitOff
1529  store i256 %res, ptr %dst, align 1
1530  ret void
1531}
1532
1533
1534define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
1535; LE-64BIT-LABEL: ashr_32bytes:
1536; LE-64BIT:       # %bb.0:
1537; LE-64BIT-NEXT:    ld 6, 24(3)
1538; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
1539; LE-64BIT-NEXT:    lwz 4, 0(4)
1540; LE-64BIT-NEXT:    addi 7, 1, -64
1541; LE-64BIT-NEXT:    ld 3, 16(3)
1542; LE-64BIT-NEXT:    sradi 8, 6, 63
1543; LE-64BIT-NEXT:    rlwinm 9, 4, 0, 27, 28
1544; LE-64BIT-NEXT:    std 6, -40(1)
1545; LE-64BIT-NEXT:    std 3, -48(1)
1546; LE-64BIT-NEXT:    rlwinm 3, 4, 3, 26, 28
1547; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
1548; LE-64BIT-NEXT:    std 8, -8(1)
1549; LE-64BIT-NEXT:    std 8, -16(1)
1550; LE-64BIT-NEXT:    std 8, -24(1)
1551; LE-64BIT-NEXT:    std 8, -32(1)
1552; LE-64BIT-NEXT:    subfic 6, 3, 64
1553; LE-64BIT-NEXT:    ldux 4, 9, 7
1554; LE-64BIT-NEXT:    ld 7, 8(9)
1555; LE-64BIT-NEXT:    ld 8, 16(9)
1556; LE-64BIT-NEXT:    ld 9, 24(9)
1557; LE-64BIT-NEXT:    srd 4, 4, 3
1558; LE-64BIT-NEXT:    sld 10, 7, 6
1559; LE-64BIT-NEXT:    sld 11, 9, 6
1560; LE-64BIT-NEXT:    srd 7, 7, 3
1561; LE-64BIT-NEXT:    sld 6, 8, 6
1562; LE-64BIT-NEXT:    or 4, 10, 4
1563; LE-64BIT-NEXT:    srd 10, 8, 3
1564; LE-64BIT-NEXT:    srad 3, 9, 3
1565; LE-64BIT-NEXT:    or 6, 6, 7
1566; LE-64BIT-NEXT:    std 3, 24(5)
1567; LE-64BIT-NEXT:    or 3, 11, 10
1568; LE-64BIT-NEXT:    std 6, 8(5)
1569; LE-64BIT-NEXT:    std 4, 0(5)
1570; LE-64BIT-NEXT:    std 3, 16(5)
1571; LE-64BIT-NEXT:    blr
1572;
1573; BE-LABEL: ashr_32bytes:
1574; BE:       # %bb.0:
1575; BE-NEXT:    ld 7, 0(3)
1576; BE-NEXT:    ld 8, 8(3)
1577; BE-NEXT:    ld 9, 16(3)
1578; BE-NEXT:    ld 3, 24(3)
1579; BE-NEXT:    lwz 4, 28(4)
1580; BE-NEXT:    addi 6, 1, -32
1581; BE-NEXT:    std 3, -8(1)
1582; BE-NEXT:    std 7, -32(1)
1583; BE-NEXT:    sradi 3, 7, 63
1584; BE-NEXT:    rlwinm 7, 4, 0, 27, 28
1585; BE-NEXT:    std 3, -40(1)
1586; BE-NEXT:    std 3, -48(1)
1587; BE-NEXT:    std 3, -56(1)
1588; BE-NEXT:    std 3, -64(1)
1589; BE-NEXT:    neg 3, 7
1590; BE-NEXT:    std 9, -16(1)
1591; BE-NEXT:    std 8, -24(1)
1592; BE-NEXT:    extsw 3, 3
1593; BE-NEXT:    ldux 3, 6, 3
1594; BE-NEXT:    rlwinm 4, 4, 3, 26, 28
1595; BE-NEXT:    subfic 9, 4, 64
1596; BE-NEXT:    ld 7, 8(6)
1597; BE-NEXT:    ld 8, 24(6)
1598; BE-NEXT:    ld 6, 16(6)
1599; BE-NEXT:    sld 10, 3, 9
1600; BE-NEXT:    srad 3, 3, 4
1601; BE-NEXT:    std 3, 0(5)
1602; BE-NEXT:    srd 11, 7, 4
1603; BE-NEXT:    srd 8, 8, 4
1604; BE-NEXT:    sld 7, 7, 9
1605; BE-NEXT:    sld 9, 6, 9
1606; BE-NEXT:    srd 6, 6, 4
1607; BE-NEXT:    or 10, 10, 11
1608; BE-NEXT:    or 8, 9, 8
1609; BE-NEXT:    or 6, 7, 6
1610; BE-NEXT:    std 6, 16(5)
1611; BE-NEXT:    std 8, 24(5)
1612; BE-NEXT:    std 10, 8(5)
1613; BE-NEXT:    blr
1614;
1615; LE-32BIT-LABEL: ashr_32bytes:
1616; LE-32BIT:       # %bb.0:
1617; LE-32BIT-NEXT:    stwu 1, -112(1)
1618; LE-32BIT-NEXT:    lwz 7, 0(3)
1619; LE-32BIT-NEXT:    addi 6, 1, 48
1620; LE-32BIT-NEXT:    lwz 8, 4(3)
1621; LE-32BIT-NEXT:    lwz 9, 8(3)
1622; LE-32BIT-NEXT:    lwz 10, 12(3)
1623; LE-32BIT-NEXT:    lwz 11, 16(3)
1624; LE-32BIT-NEXT:    lwz 12, 20(3)
1625; LE-32BIT-NEXT:    lwz 0, 24(3)
1626; LE-32BIT-NEXT:    lwz 3, 28(3)
1627; LE-32BIT-NEXT:    lwz 4, 28(4)
1628; LE-32BIT-NEXT:    stw 3, 76(1)
1629; LE-32BIT-NEXT:    srawi 3, 7, 31
1630; LE-32BIT-NEXT:    stw 7, 48(1)
1631; LE-32BIT-NEXT:    rlwinm 7, 4, 0, 27, 29
1632; LE-32BIT-NEXT:    stw 25, 84(1) # 4-byte Folded Spill
1633; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
1634; LE-32BIT-NEXT:    stw 26, 88(1) # 4-byte Folded Spill
1635; LE-32BIT-NEXT:    stw 27, 92(1) # 4-byte Folded Spill
1636; LE-32BIT-NEXT:    stw 28, 96(1) # 4-byte Folded Spill
1637; LE-32BIT-NEXT:    stw 29, 100(1) # 4-byte Folded Spill
1638; LE-32BIT-NEXT:    stw 30, 104(1) # 4-byte Folded Spill
1639; LE-32BIT-NEXT:    stw 0, 72(1)
1640; LE-32BIT-NEXT:    subfic 0, 4, 32
1641; LE-32BIT-NEXT:    stw 12, 68(1)
1642; LE-32BIT-NEXT:    stw 11, 64(1)
1643; LE-32BIT-NEXT:    stw 10, 60(1)
1644; LE-32BIT-NEXT:    stw 9, 56(1)
1645; LE-32BIT-NEXT:    stw 8, 52(1)
1646; LE-32BIT-NEXT:    stw 3, 44(1)
1647; LE-32BIT-NEXT:    stw 3, 40(1)
1648; LE-32BIT-NEXT:    stw 3, 36(1)
1649; LE-32BIT-NEXT:    stw 3, 32(1)
1650; LE-32BIT-NEXT:    stw 3, 28(1)
1651; LE-32BIT-NEXT:    stw 3, 24(1)
1652; LE-32BIT-NEXT:    stw 3, 20(1)
1653; LE-32BIT-NEXT:    stw 3, 16(1)
1654; LE-32BIT-NEXT:    sub 3, 6, 7
1655; LE-32BIT-NEXT:    lwz 6, 4(3)
1656; LE-32BIT-NEXT:    lwz 7, 0(3)
1657; LE-32BIT-NEXT:    lwz 8, 12(3)
1658; LE-32BIT-NEXT:    srw 30, 6, 4
1659; LE-32BIT-NEXT:    lwz 9, 8(3)
1660; LE-32BIT-NEXT:    slw 29, 7, 0
1661; LE-32BIT-NEXT:    lwz 10, 20(3)
1662; LE-32BIT-NEXT:    srw 28, 8, 4
1663; LE-32BIT-NEXT:    lwz 11, 16(3)
1664; LE-32BIT-NEXT:    slw 27, 9, 0
1665; LE-32BIT-NEXT:    lwz 12, 28(3)
1666; LE-32BIT-NEXT:    slw 6, 6, 0
1667; LE-32BIT-NEXT:    lwz 3, 24(3)
1668; LE-32BIT-NEXT:    srw 26, 10, 4
1669; LE-32BIT-NEXT:    slw 25, 11, 0
1670; LE-32BIT-NEXT:    slw 8, 8, 0
1671; LE-32BIT-NEXT:    slw 10, 10, 0
1672; LE-32BIT-NEXT:    slw 0, 3, 0
1673; LE-32BIT-NEXT:    srw 3, 3, 4
1674; LE-32BIT-NEXT:    srw 12, 12, 4
1675; LE-32BIT-NEXT:    or 3, 10, 3
1676; LE-32BIT-NEXT:    srw 11, 11, 4
1677; LE-32BIT-NEXT:    stw 3, 24(5)
1678; LE-32BIT-NEXT:    or 3, 0, 12
1679; LE-32BIT-NEXT:    stw 3, 28(5)
1680; LE-32BIT-NEXT:    or 3, 8, 11
1681; LE-32BIT-NEXT:    srw 9, 9, 4
1682; LE-32BIT-NEXT:    stw 3, 16(5)
1683; LE-32BIT-NEXT:    or 3, 25, 26
1684; LE-32BIT-NEXT:    stw 3, 20(5)
1685; LE-32BIT-NEXT:    or 3, 6, 9
1686; LE-32BIT-NEXT:    stw 3, 8(5)
1687; LE-32BIT-NEXT:    or 3, 27, 28
1688; LE-32BIT-NEXT:    sraw 4, 7, 4
1689; LE-32BIT-NEXT:    stw 3, 12(5)
1690; LE-32BIT-NEXT:    or 3, 29, 30
1691; LE-32BIT-NEXT:    stw 4, 0(5)
1692; LE-32BIT-NEXT:    stw 3, 4(5)
1693; LE-32BIT-NEXT:    lwz 30, 104(1) # 4-byte Folded Reload
1694; LE-32BIT-NEXT:    lwz 29, 100(1) # 4-byte Folded Reload
1695; LE-32BIT-NEXT:    lwz 28, 96(1) # 4-byte Folded Reload
1696; LE-32BIT-NEXT:    lwz 27, 92(1) # 4-byte Folded Reload
1697; LE-32BIT-NEXT:    lwz 26, 88(1) # 4-byte Folded Reload
1698; LE-32BIT-NEXT:    lwz 25, 84(1) # 4-byte Folded Reload
1699; LE-32BIT-NEXT:    addi 1, 1, 112
1700; LE-32BIT-NEXT:    blr
1701  %src = load i256, ptr %src.ptr, align 1
1702  %byteOff = load i256, ptr %byteOff.ptr, align 1
1703  %bitOff = shl i256 %byteOff, 3
1704  %res = ashr i256 %src, %bitOff
1705  store i256 %res, ptr %dst, align 1
1706  ret void
1707}
1708
1709define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind {
1710; LE-64BIT-LABEL: ashr_32bytes_wordOff:
1711; LE-64BIT:       # %bb.0:
1712; LE-64BIT-NEXT:    ld 6, 24(3)
1713; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
1714; LE-64BIT-NEXT:    lwz 4, 0(4)
1715; LE-64BIT-NEXT:    addi 7, 1, -64
1716; LE-64BIT-NEXT:    ld 3, 16(3)
1717; LE-64BIT-NEXT:    sradi 8, 6, 63
1718; LE-64BIT-NEXT:    rlwinm 9, 4, 2, 27, 28
1719; LE-64BIT-NEXT:    std 6, -40(1)
1720; LE-64BIT-NEXT:    std 3, -48(1)
1721; LE-64BIT-NEXT:    rlwinm 3, 4, 5, 26, 26
1722; LE-64BIT-NEXT:    stxvd2x 0, 0, 7
1723; LE-64BIT-NEXT:    std 8, -8(1)
1724; LE-64BIT-NEXT:    std 8, -16(1)
1725; LE-64BIT-NEXT:    std 8, -24(1)
1726; LE-64BIT-NEXT:    std 8, -32(1)
1727; LE-64BIT-NEXT:    subfic 6, 3, 64
1728; LE-64BIT-NEXT:    ldux 4, 9, 7
1729; LE-64BIT-NEXT:    ld 7, 8(9)
1730; LE-64BIT-NEXT:    ld 8, 16(9)
1731; LE-64BIT-NEXT:    ld 9, 24(9)
1732; LE-64BIT-NEXT:    srd 4, 4, 3
1733; LE-64BIT-NEXT:    sld 10, 7, 6
1734; LE-64BIT-NEXT:    sld 11, 9, 6
1735; LE-64BIT-NEXT:    srd 7, 7, 3
1736; LE-64BIT-NEXT:    sld 6, 8, 6
1737; LE-64BIT-NEXT:    or 4, 10, 4
1738; LE-64BIT-NEXT:    srd 10, 8, 3
1739; LE-64BIT-NEXT:    srad 3, 9, 3
1740; LE-64BIT-NEXT:    or 6, 6, 7
1741; LE-64BIT-NEXT:    std 3, 24(5)
1742; LE-64BIT-NEXT:    or 3, 11, 10
1743; LE-64BIT-NEXT:    std 6, 8(5)
1744; LE-64BIT-NEXT:    std 4, 0(5)
1745; LE-64BIT-NEXT:    std 3, 16(5)
1746; LE-64BIT-NEXT:    blr
1747;
1748; BE-LABEL: ashr_32bytes_wordOff:
1749; BE:       # %bb.0:
1750; BE-NEXT:    ld 7, 0(3)
1751; BE-NEXT:    ld 8, 8(3)
1752; BE-NEXT:    ld 9, 16(3)
1753; BE-NEXT:    ld 3, 24(3)
1754; BE-NEXT:    lwz 4, 28(4)
1755; BE-NEXT:    addi 6, 1, -32
1756; BE-NEXT:    std 3, -8(1)
1757; BE-NEXT:    std 7, -32(1)
1758; BE-NEXT:    sradi 3, 7, 63
1759; BE-NEXT:    rlwinm 7, 4, 2, 27, 28
1760; BE-NEXT:    std 3, -40(1)
1761; BE-NEXT:    std 3, -48(1)
1762; BE-NEXT:    std 3, -56(1)
1763; BE-NEXT:    std 3, -64(1)
1764; BE-NEXT:    neg 3, 7
1765; BE-NEXT:    std 9, -16(1)
1766; BE-NEXT:    std 8, -24(1)
1767; BE-NEXT:    extsw 3, 3
1768; BE-NEXT:    ldux 3, 6, 3
1769; BE-NEXT:    rlwinm 4, 4, 5, 26, 26
1770; BE-NEXT:    subfic 9, 4, 64
1771; BE-NEXT:    ld 7, 8(6)
1772; BE-NEXT:    ld 8, 24(6)
1773; BE-NEXT:    ld 6, 16(6)
1774; BE-NEXT:    sld 10, 3, 9
1775; BE-NEXT:    srad 3, 3, 4
1776; BE-NEXT:    std 3, 0(5)
1777; BE-NEXT:    srd 11, 7, 4
1778; BE-NEXT:    srd 8, 8, 4
1779; BE-NEXT:    sld 7, 7, 9
1780; BE-NEXT:    sld 9, 6, 9
1781; BE-NEXT:    srd 6, 6, 4
1782; BE-NEXT:    or 10, 10, 11
1783; BE-NEXT:    or 8, 9, 8
1784; BE-NEXT:    or 6, 7, 6
1785; BE-NEXT:    std 6, 16(5)
1786; BE-NEXT:    std 8, 24(5)
1787; BE-NEXT:    std 10, 8(5)
1788; BE-NEXT:    blr
1789;
1790; LE-32BIT-LABEL: ashr_32bytes_wordOff:
1791; LE-32BIT:       # %bb.0:
1792; LE-32BIT-NEXT:    stwu 1, -80(1)
1793; LE-32BIT-NEXT:    lwz 7, 0(3)
1794; LE-32BIT-NEXT:    addi 6, 1, 48
1795; LE-32BIT-NEXT:    lwz 8, 4(3)
1796; LE-32BIT-NEXT:    lwz 9, 8(3)
1797; LE-32BIT-NEXT:    lwz 10, 12(3)
1798; LE-32BIT-NEXT:    lwz 11, 16(3)
1799; LE-32BIT-NEXT:    lwz 12, 20(3)
1800; LE-32BIT-NEXT:    lwz 0, 24(3)
1801; LE-32BIT-NEXT:    lwz 3, 28(3)
1802; LE-32BIT-NEXT:    lwz 4, 28(4)
1803; LE-32BIT-NEXT:    stw 3, 76(1)
1804; LE-32BIT-NEXT:    srawi 3, 7, 31
1805; LE-32BIT-NEXT:    rlwinm 4, 4, 2, 27, 29
1806; LE-32BIT-NEXT:    stw 0, 72(1)
1807; LE-32BIT-NEXT:    stw 12, 68(1)
1808; LE-32BIT-NEXT:    stw 11, 64(1)
1809; LE-32BIT-NEXT:    stw 10, 60(1)
1810; LE-32BIT-NEXT:    stw 9, 56(1)
1811; LE-32BIT-NEXT:    stw 8, 52(1)
1812; LE-32BIT-NEXT:    stw 7, 48(1)
1813; LE-32BIT-NEXT:    stw 3, 44(1)
1814; LE-32BIT-NEXT:    stw 3, 40(1)
1815; LE-32BIT-NEXT:    stw 3, 36(1)
1816; LE-32BIT-NEXT:    stw 3, 32(1)
1817; LE-32BIT-NEXT:    stw 3, 28(1)
1818; LE-32BIT-NEXT:    stw 3, 24(1)
1819; LE-32BIT-NEXT:    stw 3, 20(1)
1820; LE-32BIT-NEXT:    stw 3, 16(1)
1821; LE-32BIT-NEXT:    sub 3, 6, 4
1822; LE-32BIT-NEXT:    lwz 4, 4(3)
1823; LE-32BIT-NEXT:    lwz 6, 0(3)
1824; LE-32BIT-NEXT:    lwz 7, 12(3)
1825; LE-32BIT-NEXT:    lwz 8, 8(3)
1826; LE-32BIT-NEXT:    lwz 9, 20(3)
1827; LE-32BIT-NEXT:    lwz 10, 16(3)
1828; LE-32BIT-NEXT:    lwz 11, 24(3)
1829; LE-32BIT-NEXT:    lwz 3, 28(3)
1830; LE-32BIT-NEXT:    stw 11, 24(5)
1831; LE-32BIT-NEXT:    stw 3, 28(5)
1832; LE-32BIT-NEXT:    stw 10, 16(5)
1833; LE-32BIT-NEXT:    stw 9, 20(5)
1834; LE-32BIT-NEXT:    stw 8, 8(5)
1835; LE-32BIT-NEXT:    stw 7, 12(5)
1836; LE-32BIT-NEXT:    stw 6, 0(5)
1837; LE-32BIT-NEXT:    stw 4, 4(5)
1838; LE-32BIT-NEXT:    addi 1, 1, 80
1839; LE-32BIT-NEXT:    blr
1840  %src = load i256, ptr %src.ptr, align 1
1841  %wordOff = load i256, ptr %wordOff.ptr, align 1
1842  %bitOff = shl i256 %wordOff, 5
1843  %res = ashr i256 %src, %bitOff
1844  store i256 %res, ptr %dst, align 1
1845  ret void
1846}
1847
1848define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nounwind {
1849; LE-64BIT-LABEL: ashr_32bytes_dwordOff:
1850; LE-64BIT:       # %bb.0:
1851; LE-64BIT-NEXT:    ld 6, 16(3)
1852; LE-64BIT-NEXT:    ld 7, 24(3)
1853; LE-64BIT-NEXT:    lxvd2x 0, 0, 3
1854; LE-64BIT-NEXT:    lwz 3, 0(4)
1855; LE-64BIT-NEXT:    addi 4, 1, -64
1856; LE-64BIT-NEXT:    std 6, -48(1)
1857; LE-64BIT-NEXT:    sradi 6, 7, 63
1858; LE-64BIT-NEXT:    rlwinm 3, 3, 3, 27, 28
1859; LE-64BIT-NEXT:    std 7, -40(1)
1860; LE-64BIT-NEXT:    stxvd2x 0, 0, 4
1861; LE-64BIT-NEXT:    std 6, -8(1)
1862; LE-64BIT-NEXT:    std 6, -16(1)
1863; LE-64BIT-NEXT:    std 6, -24(1)
1864; LE-64BIT-NEXT:    std 6, -32(1)
1865; LE-64BIT-NEXT:    lxvd2x 0, 4, 3
1866; LE-64BIT-NEXT:    add 3, 4, 3
1867; LE-64BIT-NEXT:    li 4, 16
1868; LE-64BIT-NEXT:    lxvd2x 1, 3, 4
1869; LE-64BIT-NEXT:    stxvd2x 0, 0, 5
1870; LE-64BIT-NEXT:    stxvd2x 1, 5, 4
1871; LE-64BIT-NEXT:    blr
1872;
1873; BE-LABEL: ashr_32bytes_dwordOff:
1874; BE:       # %bb.0:
1875; BE-NEXT:    ld 7, 0(3)
1876; BE-NEXT:    ld 8, 8(3)
1877; BE-NEXT:    ld 9, 16(3)
1878; BE-NEXT:    ld 3, 24(3)
1879; BE-NEXT:    lwz 4, 28(4)
1880; BE-NEXT:    addi 6, 1, -32
1881; BE-NEXT:    std 3, -8(1)
1882; BE-NEXT:    sradi 3, 7, 63
1883; BE-NEXT:    rlwinm 4, 4, 3, 27, 28
1884; BE-NEXT:    std 3, -40(1)
1885; BE-NEXT:    std 3, -48(1)
1886; BE-NEXT:    std 3, -56(1)
1887; BE-NEXT:    std 3, -64(1)
1888; BE-NEXT:    neg 3, 4
1889; BE-NEXT:    std 9, -16(1)
1890; BE-NEXT:    std 8, -24(1)
1891; BE-NEXT:    std 7, -32(1)
1892; BE-NEXT:    extsw 3, 3
1893; BE-NEXT:    ldux 3, 6, 3
1894; BE-NEXT:    ld 4, 8(6)
1895; BE-NEXT:    ld 7, 24(6)
1896; BE-NEXT:    ld 6, 16(6)
1897; BE-NEXT:    std 3, 0(5)
1898; BE-NEXT:    std 6, 16(5)
1899; BE-NEXT:    std 7, 24(5)
1900; BE-NEXT:    std 4, 8(5)
1901; BE-NEXT:    blr
1902;
1903; LE-32BIT-LABEL: ashr_32bytes_dwordOff:
1904; LE-32BIT:       # %bb.0:
1905; LE-32BIT-NEXT:    stwu 1, -80(1)
1906; LE-32BIT-NEXT:    lwz 7, 0(3)
1907; LE-32BIT-NEXT:    addi 6, 1, 48
1908; LE-32BIT-NEXT:    lwz 8, 4(3)
1909; LE-32BIT-NEXT:    lwz 9, 8(3)
1910; LE-32BIT-NEXT:    lwz 10, 12(3)
1911; LE-32BIT-NEXT:    lwz 11, 16(3)
1912; LE-32BIT-NEXT:    lwz 12, 20(3)
1913; LE-32BIT-NEXT:    lwz 0, 24(3)
1914; LE-32BIT-NEXT:    lwz 3, 28(3)
1915; LE-32BIT-NEXT:    lwz 4, 28(4)
1916; LE-32BIT-NEXT:    stw 3, 76(1)
1917; LE-32BIT-NEXT:    srawi 3, 7, 31
1918; LE-32BIT-NEXT:    rlwinm 4, 4, 3, 27, 28
1919; LE-32BIT-NEXT:    stw 0, 72(1)
1920; LE-32BIT-NEXT:    stw 12, 68(1)
1921; LE-32BIT-NEXT:    stw 11, 64(1)
1922; LE-32BIT-NEXT:    stw 10, 60(1)
1923; LE-32BIT-NEXT:    stw 9, 56(1)
1924; LE-32BIT-NEXT:    stw 8, 52(1)
1925; LE-32BIT-NEXT:    stw 7, 48(1)
1926; LE-32BIT-NEXT:    stw 3, 44(1)
1927; LE-32BIT-NEXT:    stw 3, 40(1)
1928; LE-32BIT-NEXT:    stw 3, 36(1)
1929; LE-32BIT-NEXT:    stw 3, 32(1)
1930; LE-32BIT-NEXT:    stw 3, 28(1)
1931; LE-32BIT-NEXT:    stw 3, 24(1)
1932; LE-32BIT-NEXT:    stw 3, 20(1)
1933; LE-32BIT-NEXT:    stw 3, 16(1)
1934; LE-32BIT-NEXT:    sub 3, 6, 4
1935; LE-32BIT-NEXT:    lwz 4, 4(3)
1936; LE-32BIT-NEXT:    lwz 6, 0(3)
1937; LE-32BIT-NEXT:    lwz 7, 12(3)
1938; LE-32BIT-NEXT:    lwz 8, 8(3)
1939; LE-32BIT-NEXT:    lwz 9, 20(3)
1940; LE-32BIT-NEXT:    lwz 10, 16(3)
1941; LE-32BIT-NEXT:    lwz 11, 24(3)
1942; LE-32BIT-NEXT:    lwz 3, 28(3)
1943; LE-32BIT-NEXT:    stw 11, 24(5)
1944; LE-32BIT-NEXT:    stw 3, 28(5)
1945; LE-32BIT-NEXT:    stw 10, 16(5)
1946; LE-32BIT-NEXT:    stw 9, 20(5)
1947; LE-32BIT-NEXT:    stw 8, 8(5)
1948; LE-32BIT-NEXT:    stw 7, 12(5)
1949; LE-32BIT-NEXT:    stw 6, 0(5)
1950; LE-32BIT-NEXT:    stw 4, 4(5)
1951; LE-32BIT-NEXT:    addi 1, 1, 80
1952; LE-32BIT-NEXT:    blr
1953  %src = load i256, ptr %src.ptr, align 1
1954  %dwordOff = load i256, ptr %dwordOff.ptr, align 1
1955  %bitOff = shl i256 %dwordOff, 6
1956  %res = ashr i256 %src, %bitOff
1957  store i256 %res, ptr %dst, align 1
1958  ret void
1959}
1960
1961
1962;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1963; LE: {{.*}}
1964