xref: /llvm-project/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV64I
3; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=RV32I
4
5define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
6; RV64I-LABEL: lshr_4bytes:
7; RV64I:       # %bb.0:
8; RV64I-NEXT:    lbu a3, 1(a0)
9; RV64I-NEXT:    lbu a4, 0(a0)
10; RV64I-NEXT:    lbu a5, 2(a0)
11; RV64I-NEXT:    lb a0, 3(a0)
12; RV64I-NEXT:    slli a3, a3, 8
13; RV64I-NEXT:    or a3, a3, a4
14; RV64I-NEXT:    lbu a1, 0(a1)
15; RV64I-NEXT:    slli a5, a5, 16
16; RV64I-NEXT:    slli a0, a0, 24
17; RV64I-NEXT:    or a0, a0, a5
18; RV64I-NEXT:    or a0, a0, a3
19; RV64I-NEXT:    srlw a0, a0, a1
20; RV64I-NEXT:    srli a1, a0, 16
21; RV64I-NEXT:    srli a3, a0, 24
22; RV64I-NEXT:    srli a4, a0, 8
23; RV64I-NEXT:    sb a0, 0(a2)
24; RV64I-NEXT:    sb a4, 1(a2)
25; RV64I-NEXT:    sb a1, 2(a2)
26; RV64I-NEXT:    sb a3, 3(a2)
27; RV64I-NEXT:    ret
28;
29; RV32I-LABEL: lshr_4bytes:
30; RV32I:       # %bb.0:
31; RV32I-NEXT:    lbu a3, 1(a0)
32; RV32I-NEXT:    lbu a4, 2(a0)
33; RV32I-NEXT:    lbu a5, 3(a0)
34; RV32I-NEXT:    lbu a0, 0(a0)
35; RV32I-NEXT:    slli a3, a3, 8
36; RV32I-NEXT:    slli a4, a4, 16
37; RV32I-NEXT:    slli a5, a5, 24
38; RV32I-NEXT:    or a0, a3, a0
39; RV32I-NEXT:    lbu a3, 0(a1)
40; RV32I-NEXT:    lbu a6, 1(a1)
41; RV32I-NEXT:    or a4, a5, a4
42; RV32I-NEXT:    lbu a5, 2(a1)
43; RV32I-NEXT:    lbu a1, 3(a1)
44; RV32I-NEXT:    slli a6, a6, 8
45; RV32I-NEXT:    or a3, a6, a3
46; RV32I-NEXT:    slli a5, a5, 16
47; RV32I-NEXT:    slli a1, a1, 24
48; RV32I-NEXT:    or a1, a1, a5
49; RV32I-NEXT:    or a0, a4, a0
50; RV32I-NEXT:    or a1, a1, a3
51; RV32I-NEXT:    srl a0, a0, a1
52; RV32I-NEXT:    srli a1, a0, 16
53; RV32I-NEXT:    srli a3, a0, 24
54; RV32I-NEXT:    srli a4, a0, 8
55; RV32I-NEXT:    sb a0, 0(a2)
56; RV32I-NEXT:    sb a4, 1(a2)
57; RV32I-NEXT:    sb a1, 2(a2)
58; RV32I-NEXT:    sb a3, 3(a2)
59; RV32I-NEXT:    ret
60  %src = load i32, ptr %src.ptr, align 1
61  %bitOff = load i32, ptr %bitOff.ptr, align 1
62  %res = lshr i32 %src, %bitOff
63  store i32 %res, ptr %dst, align 1
64  ret void
65}
66define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
67; RV64I-LABEL: shl_4bytes:
68; RV64I:       # %bb.0:
69; RV64I-NEXT:    lbu a3, 1(a0)
70; RV64I-NEXT:    lbu a4, 0(a0)
71; RV64I-NEXT:    lbu a5, 2(a0)
72; RV64I-NEXT:    lb a0, 3(a0)
73; RV64I-NEXT:    slli a3, a3, 8
74; RV64I-NEXT:    or a3, a3, a4
75; RV64I-NEXT:    lbu a1, 0(a1)
76; RV64I-NEXT:    slli a5, a5, 16
77; RV64I-NEXT:    slli a0, a0, 24
78; RV64I-NEXT:    or a0, a0, a5
79; RV64I-NEXT:    or a0, a0, a3
80; RV64I-NEXT:    sllw a0, a0, a1
81; RV64I-NEXT:    srli a1, a0, 16
82; RV64I-NEXT:    srli a3, a0, 24
83; RV64I-NEXT:    srli a4, a0, 8
84; RV64I-NEXT:    sb a0, 0(a2)
85; RV64I-NEXT:    sb a4, 1(a2)
86; RV64I-NEXT:    sb a1, 2(a2)
87; RV64I-NEXT:    sb a3, 3(a2)
88; RV64I-NEXT:    ret
89;
90; RV32I-LABEL: shl_4bytes:
91; RV32I:       # %bb.0:
92; RV32I-NEXT:    lbu a3, 1(a0)
93; RV32I-NEXT:    lbu a4, 2(a0)
94; RV32I-NEXT:    lbu a5, 3(a0)
95; RV32I-NEXT:    lbu a0, 0(a0)
96; RV32I-NEXT:    slli a3, a3, 8
97; RV32I-NEXT:    slli a4, a4, 16
98; RV32I-NEXT:    slli a5, a5, 24
99; RV32I-NEXT:    or a0, a3, a0
100; RV32I-NEXT:    lbu a3, 0(a1)
101; RV32I-NEXT:    lbu a6, 1(a1)
102; RV32I-NEXT:    or a4, a5, a4
103; RV32I-NEXT:    lbu a5, 2(a1)
104; RV32I-NEXT:    lbu a1, 3(a1)
105; RV32I-NEXT:    slli a6, a6, 8
106; RV32I-NEXT:    or a3, a6, a3
107; RV32I-NEXT:    slli a5, a5, 16
108; RV32I-NEXT:    slli a1, a1, 24
109; RV32I-NEXT:    or a1, a1, a5
110; RV32I-NEXT:    or a0, a4, a0
111; RV32I-NEXT:    or a1, a1, a3
112; RV32I-NEXT:    sll a0, a0, a1
113; RV32I-NEXT:    srli a1, a0, 16
114; RV32I-NEXT:    srli a3, a0, 24
115; RV32I-NEXT:    srli a4, a0, 8
116; RV32I-NEXT:    sb a0, 0(a2)
117; RV32I-NEXT:    sb a4, 1(a2)
118; RV32I-NEXT:    sb a1, 2(a2)
119; RV32I-NEXT:    sb a3, 3(a2)
120; RV32I-NEXT:    ret
121  %src = load i32, ptr %src.ptr, align 1
122  %bitOff = load i32, ptr %bitOff.ptr, align 1
123  %res = shl i32 %src, %bitOff
124  store i32 %res, ptr %dst, align 1
125  ret void
126}
127define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
128; RV64I-LABEL: ashr_4bytes:
129; RV64I:       # %bb.0:
130; RV64I-NEXT:    lbu a3, 1(a0)
131; RV64I-NEXT:    lbu a4, 0(a0)
132; RV64I-NEXT:    lbu a5, 2(a0)
133; RV64I-NEXT:    lb a0, 3(a0)
134; RV64I-NEXT:    slli a3, a3, 8
135; RV64I-NEXT:    or a3, a3, a4
136; RV64I-NEXT:    lbu a1, 0(a1)
137; RV64I-NEXT:    slli a5, a5, 16
138; RV64I-NEXT:    slli a0, a0, 24
139; RV64I-NEXT:    or a0, a0, a5
140; RV64I-NEXT:    or a0, a0, a3
141; RV64I-NEXT:    sraw a0, a0, a1
142; RV64I-NEXT:    srli a1, a0, 16
143; RV64I-NEXT:    srli a3, a0, 24
144; RV64I-NEXT:    srli a4, a0, 8
145; RV64I-NEXT:    sb a0, 0(a2)
146; RV64I-NEXT:    sb a4, 1(a2)
147; RV64I-NEXT:    sb a1, 2(a2)
148; RV64I-NEXT:    sb a3, 3(a2)
149; RV64I-NEXT:    ret
150;
151; RV32I-LABEL: ashr_4bytes:
152; RV32I:       # %bb.0:
153; RV32I-NEXT:    lbu a3, 1(a0)
154; RV32I-NEXT:    lbu a4, 2(a0)
155; RV32I-NEXT:    lbu a5, 3(a0)
156; RV32I-NEXT:    lbu a0, 0(a0)
157; RV32I-NEXT:    slli a3, a3, 8
158; RV32I-NEXT:    slli a4, a4, 16
159; RV32I-NEXT:    slli a5, a5, 24
160; RV32I-NEXT:    or a0, a3, a0
161; RV32I-NEXT:    lbu a3, 0(a1)
162; RV32I-NEXT:    lbu a6, 1(a1)
163; RV32I-NEXT:    or a4, a5, a4
164; RV32I-NEXT:    lbu a5, 2(a1)
165; RV32I-NEXT:    lbu a1, 3(a1)
166; RV32I-NEXT:    slli a6, a6, 8
167; RV32I-NEXT:    or a3, a6, a3
168; RV32I-NEXT:    slli a5, a5, 16
169; RV32I-NEXT:    slli a1, a1, 24
170; RV32I-NEXT:    or a1, a1, a5
171; RV32I-NEXT:    or a0, a4, a0
172; RV32I-NEXT:    or a1, a1, a3
173; RV32I-NEXT:    sra a0, a0, a1
174; RV32I-NEXT:    srli a1, a0, 16
175; RV32I-NEXT:    srli a3, a0, 24
176; RV32I-NEXT:    srli a4, a0, 8
177; RV32I-NEXT:    sb a0, 0(a2)
178; RV32I-NEXT:    sb a4, 1(a2)
179; RV32I-NEXT:    sb a1, 2(a2)
180; RV32I-NEXT:    sb a3, 3(a2)
181; RV32I-NEXT:    ret
182  %src = load i32, ptr %src.ptr, align 1
183  %bitOff = load i32, ptr %bitOff.ptr, align 1
184  %res = ashr i32 %src, %bitOff
185  store i32 %res, ptr %dst, align 1
186  ret void
187}
188
189define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
190; RV64I-LABEL: lshr_8bytes:
191; RV64I:       # %bb.0:
192; RV64I-NEXT:    lbu a3, 0(a0)
193; RV64I-NEXT:    lbu a4, 1(a0)
194; RV64I-NEXT:    lbu a5, 2(a0)
195; RV64I-NEXT:    lbu a6, 3(a0)
196; RV64I-NEXT:    lbu a7, 4(a0)
197; RV64I-NEXT:    lbu t0, 5(a0)
198; RV64I-NEXT:    lbu t1, 6(a0)
199; RV64I-NEXT:    lbu a0, 7(a0)
200; RV64I-NEXT:    slli a4, a4, 8
201; RV64I-NEXT:    slli a5, a5, 16
202; RV64I-NEXT:    slli a6, a6, 24
203; RV64I-NEXT:    slli t0, t0, 8
204; RV64I-NEXT:    or a3, a4, a3
205; RV64I-NEXT:    or a4, a6, a5
206; RV64I-NEXT:    or a5, t0, a7
207; RV64I-NEXT:    lbu a6, 0(a1)
208; RV64I-NEXT:    lbu a7, 1(a1)
209; RV64I-NEXT:    lbu t0, 2(a1)
210; RV64I-NEXT:    lbu t2, 3(a1)
211; RV64I-NEXT:    slli t1, t1, 16
212; RV64I-NEXT:    slli a0, a0, 24
213; RV64I-NEXT:    slli a7, a7, 8
214; RV64I-NEXT:    slli t0, t0, 16
215; RV64I-NEXT:    slli t2, t2, 24
216; RV64I-NEXT:    or a0, a0, t1
217; RV64I-NEXT:    or a6, a7, a6
218; RV64I-NEXT:    lbu a7, 4(a1)
219; RV64I-NEXT:    lbu t1, 5(a1)
220; RV64I-NEXT:    or t0, t2, t0
221; RV64I-NEXT:    lbu t2, 6(a1)
222; RV64I-NEXT:    lbu a1, 7(a1)
223; RV64I-NEXT:    slli t1, t1, 8
224; RV64I-NEXT:    or a7, t1, a7
225; RV64I-NEXT:    slli t2, t2, 16
226; RV64I-NEXT:    slli a1, a1, 24
227; RV64I-NEXT:    or a1, a1, t2
228; RV64I-NEXT:    or a3, a4, a3
229; RV64I-NEXT:    or a0, a0, a5
230; RV64I-NEXT:    or a4, t0, a6
231; RV64I-NEXT:    or a1, a1, a7
232; RV64I-NEXT:    slli a0, a0, 32
233; RV64I-NEXT:    slli a1, a1, 32
234; RV64I-NEXT:    or a0, a0, a3
235; RV64I-NEXT:    or a1, a1, a4
236; RV64I-NEXT:    srl a0, a0, a1
237; RV64I-NEXT:    srli a1, a0, 48
238; RV64I-NEXT:    srli a3, a0, 56
239; RV64I-NEXT:    srli a4, a0, 32
240; RV64I-NEXT:    srli a5, a0, 40
241; RV64I-NEXT:    srli a6, a0, 16
242; RV64I-NEXT:    srli a7, a0, 24
243; RV64I-NEXT:    srli t0, a0, 8
244; RV64I-NEXT:    sb a4, 4(a2)
245; RV64I-NEXT:    sb a5, 5(a2)
246; RV64I-NEXT:    sb a1, 6(a2)
247; RV64I-NEXT:    sb a3, 7(a2)
248; RV64I-NEXT:    sb a0, 0(a2)
249; RV64I-NEXT:    sb t0, 1(a2)
250; RV64I-NEXT:    sb a6, 2(a2)
251; RV64I-NEXT:    sb a7, 3(a2)
252; RV64I-NEXT:    ret
253;
254; RV32I-LABEL: lshr_8bytes:
255; RV32I:       # %bb.0:
256; RV32I-NEXT:    lbu a3, 5(a0)
257; RV32I-NEXT:    lbu a4, 6(a0)
258; RV32I-NEXT:    lbu a5, 7(a0)
259; RV32I-NEXT:    lbu a6, 4(a0)
260; RV32I-NEXT:    slli a3, a3, 8
261; RV32I-NEXT:    slli a4, a4, 16
262; RV32I-NEXT:    slli a5, a5, 24
263; RV32I-NEXT:    or a3, a3, a6
264; RV32I-NEXT:    lbu a6, 0(a1)
265; RV32I-NEXT:    lbu a7, 1(a1)
266; RV32I-NEXT:    or a4, a5, a4
267; RV32I-NEXT:    lbu a5, 2(a1)
268; RV32I-NEXT:    lbu a1, 3(a1)
269; RV32I-NEXT:    slli a7, a7, 8
270; RV32I-NEXT:    or a6, a7, a6
271; RV32I-NEXT:    slli a5, a5, 16
272; RV32I-NEXT:    slli a1, a1, 24
273; RV32I-NEXT:    or a1, a1, a5
274; RV32I-NEXT:    or a5, a4, a3
275; RV32I-NEXT:    or a4, a1, a6
276; RV32I-NEXT:    addi a3, a4, -32
277; RV32I-NEXT:    srl a1, a5, a4
278; RV32I-NEXT:    bltz a3, .LBB3_2
279; RV32I-NEXT:  # %bb.1:
280; RV32I-NEXT:    mv a0, a1
281; RV32I-NEXT:    j .LBB3_3
282; RV32I-NEXT:  .LBB3_2:
283; RV32I-NEXT:    lbu a6, 1(a0)
284; RV32I-NEXT:    lbu a7, 0(a0)
285; RV32I-NEXT:    lbu t0, 2(a0)
286; RV32I-NEXT:    lbu a0, 3(a0)
287; RV32I-NEXT:    slli a6, a6, 8
288; RV32I-NEXT:    or a6, a6, a7
289; RV32I-NEXT:    slli t0, t0, 16
290; RV32I-NEXT:    slli a0, a0, 24
291; RV32I-NEXT:    or a0, a0, t0
292; RV32I-NEXT:    not a7, a4
293; RV32I-NEXT:    slli a5, a5, 1
294; RV32I-NEXT:    or a0, a0, a6
295; RV32I-NEXT:    srl a0, a0, a4
296; RV32I-NEXT:    sll a4, a5, a7
297; RV32I-NEXT:    or a0, a0, a4
298; RV32I-NEXT:  .LBB3_3:
299; RV32I-NEXT:    srai a3, a3, 31
300; RV32I-NEXT:    srli a4, a0, 16
301; RV32I-NEXT:    srli a5, a0, 24
302; RV32I-NEXT:    and a1, a3, a1
303; RV32I-NEXT:    srli a3, a1, 16
304; RV32I-NEXT:    srli a6, a1, 24
305; RV32I-NEXT:    srli a7, a1, 8
306; RV32I-NEXT:    sb a1, 4(a2)
307; RV32I-NEXT:    sb a7, 5(a2)
308; RV32I-NEXT:    sb a3, 6(a2)
309; RV32I-NEXT:    sb a6, 7(a2)
310; RV32I-NEXT:    srli a1, a0, 8
311; RV32I-NEXT:    sb a0, 0(a2)
312; RV32I-NEXT:    sb a1, 1(a2)
313; RV32I-NEXT:    sb a4, 2(a2)
314; RV32I-NEXT:    sb a5, 3(a2)
315; RV32I-NEXT:    ret
316  %src = load i64, ptr %src.ptr, align 1
317  %bitOff = load i64, ptr %bitOff.ptr, align 1
318  %res = lshr i64 %src, %bitOff
319  store i64 %res, ptr %dst, align 1
320  ret void
321}
322define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
323; RV64I-LABEL: shl_8bytes:
324; RV64I:       # %bb.0:
325; RV64I-NEXT:    lbu a3, 0(a0)
326; RV64I-NEXT:    lbu a4, 1(a0)
327; RV64I-NEXT:    lbu a5, 2(a0)
328; RV64I-NEXT:    lbu a6, 3(a0)
329; RV64I-NEXT:    lbu a7, 4(a0)
330; RV64I-NEXT:    lbu t0, 5(a0)
331; RV64I-NEXT:    lbu t1, 6(a0)
332; RV64I-NEXT:    lbu a0, 7(a0)
333; RV64I-NEXT:    slli a4, a4, 8
334; RV64I-NEXT:    slli a5, a5, 16
335; RV64I-NEXT:    slli a6, a6, 24
336; RV64I-NEXT:    slli t0, t0, 8
337; RV64I-NEXT:    or a3, a4, a3
338; RV64I-NEXT:    or a4, a6, a5
339; RV64I-NEXT:    or a5, t0, a7
340; RV64I-NEXT:    lbu a6, 0(a1)
341; RV64I-NEXT:    lbu a7, 1(a1)
342; RV64I-NEXT:    lbu t0, 2(a1)
343; RV64I-NEXT:    lbu t2, 3(a1)
344; RV64I-NEXT:    slli t1, t1, 16
345; RV64I-NEXT:    slli a0, a0, 24
346; RV64I-NEXT:    slli a7, a7, 8
347; RV64I-NEXT:    slli t0, t0, 16
348; RV64I-NEXT:    slli t2, t2, 24
349; RV64I-NEXT:    or a0, a0, t1
350; RV64I-NEXT:    or a6, a7, a6
351; RV64I-NEXT:    lbu a7, 4(a1)
352; RV64I-NEXT:    lbu t1, 5(a1)
353; RV64I-NEXT:    or t0, t2, t0
354; RV64I-NEXT:    lbu t2, 6(a1)
355; RV64I-NEXT:    lbu a1, 7(a1)
356; RV64I-NEXT:    slli t1, t1, 8
357; RV64I-NEXT:    or a7, t1, a7
358; RV64I-NEXT:    slli t2, t2, 16
359; RV64I-NEXT:    slli a1, a1, 24
360; RV64I-NEXT:    or a1, a1, t2
361; RV64I-NEXT:    or a3, a4, a3
362; RV64I-NEXT:    or a0, a0, a5
363; RV64I-NEXT:    or a4, t0, a6
364; RV64I-NEXT:    or a1, a1, a7
365; RV64I-NEXT:    slli a0, a0, 32
366; RV64I-NEXT:    slli a1, a1, 32
367; RV64I-NEXT:    or a0, a0, a3
368; RV64I-NEXT:    or a1, a1, a4
369; RV64I-NEXT:    sll a0, a0, a1
370; RV64I-NEXT:    srli a1, a0, 48
371; RV64I-NEXT:    srli a3, a0, 56
372; RV64I-NEXT:    srli a4, a0, 32
373; RV64I-NEXT:    srli a5, a0, 40
374; RV64I-NEXT:    srli a6, a0, 16
375; RV64I-NEXT:    srli a7, a0, 24
376; RV64I-NEXT:    srli t0, a0, 8
377; RV64I-NEXT:    sb a4, 4(a2)
378; RV64I-NEXT:    sb a5, 5(a2)
379; RV64I-NEXT:    sb a1, 6(a2)
380; RV64I-NEXT:    sb a3, 7(a2)
381; RV64I-NEXT:    sb a0, 0(a2)
382; RV64I-NEXT:    sb t0, 1(a2)
383; RV64I-NEXT:    sb a6, 2(a2)
384; RV64I-NEXT:    sb a7, 3(a2)
385; RV64I-NEXT:    ret
386;
387; RV32I-LABEL: shl_8bytes:
388; RV32I:       # %bb.0:
389; RV32I-NEXT:    lbu a3, 1(a0)
390; RV32I-NEXT:    lbu a4, 2(a0)
391; RV32I-NEXT:    lbu a5, 3(a0)
392; RV32I-NEXT:    lbu a6, 0(a0)
393; RV32I-NEXT:    slli a3, a3, 8
394; RV32I-NEXT:    slli a4, a4, 16
395; RV32I-NEXT:    slli a5, a5, 24
396; RV32I-NEXT:    or a3, a3, a6
397; RV32I-NEXT:    lbu a6, 0(a1)
398; RV32I-NEXT:    lbu a7, 1(a1)
399; RV32I-NEXT:    or a4, a5, a4
400; RV32I-NEXT:    lbu a5, 2(a1)
401; RV32I-NEXT:    lbu a1, 3(a1)
402; RV32I-NEXT:    slli a7, a7, 8
403; RV32I-NEXT:    or a6, a7, a6
404; RV32I-NEXT:    slli a5, a5, 16
405; RV32I-NEXT:    slli a1, a1, 24
406; RV32I-NEXT:    or a1, a1, a5
407; RV32I-NEXT:    or a5, a4, a3
408; RV32I-NEXT:    or a4, a1, a6
409; RV32I-NEXT:    addi a3, a4, -32
410; RV32I-NEXT:    sll a1, a5, a4
411; RV32I-NEXT:    bltz a3, .LBB4_2
412; RV32I-NEXT:  # %bb.1:
413; RV32I-NEXT:    mv a0, a1
414; RV32I-NEXT:    j .LBB4_3
415; RV32I-NEXT:  .LBB4_2:
416; RV32I-NEXT:    lbu a6, 5(a0)
417; RV32I-NEXT:    lbu a7, 4(a0)
418; RV32I-NEXT:    lbu t0, 6(a0)
419; RV32I-NEXT:    lbu a0, 7(a0)
420; RV32I-NEXT:    slli a6, a6, 8
421; RV32I-NEXT:    or a6, a6, a7
422; RV32I-NEXT:    slli t0, t0, 16
423; RV32I-NEXT:    slli a0, a0, 24
424; RV32I-NEXT:    or a0, a0, t0
425; RV32I-NEXT:    not a7, a4
426; RV32I-NEXT:    srli a5, a5, 1
427; RV32I-NEXT:    or a0, a0, a6
428; RV32I-NEXT:    sll a0, a0, a4
429; RV32I-NEXT:    srl a4, a5, a7
430; RV32I-NEXT:    or a0, a0, a4
431; RV32I-NEXT:  .LBB4_3:
432; RV32I-NEXT:    srai a3, a3, 31
433; RV32I-NEXT:    srli a4, a0, 16
434; RV32I-NEXT:    srli a5, a0, 24
435; RV32I-NEXT:    and a1, a3, a1
436; RV32I-NEXT:    srli a3, a1, 16
437; RV32I-NEXT:    srli a6, a1, 24
438; RV32I-NEXT:    srli a7, a1, 8
439; RV32I-NEXT:    sb a1, 0(a2)
440; RV32I-NEXT:    sb a7, 1(a2)
441; RV32I-NEXT:    sb a3, 2(a2)
442; RV32I-NEXT:    sb a6, 3(a2)
443; RV32I-NEXT:    srli a1, a0, 8
444; RV32I-NEXT:    sb a0, 4(a2)
445; RV32I-NEXT:    sb a1, 5(a2)
446; RV32I-NEXT:    sb a4, 6(a2)
447; RV32I-NEXT:    sb a5, 7(a2)
448; RV32I-NEXT:    ret
449  %src = load i64, ptr %src.ptr, align 1
450  %bitOff = load i64, ptr %bitOff.ptr, align 1
451  %res = shl i64 %src, %bitOff
452  store i64 %res, ptr %dst, align 1
453  ret void
454}
455define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
456; RV64I-LABEL: ashr_8bytes:
457; RV64I:       # %bb.0:
458; RV64I-NEXT:    lbu a3, 0(a0)
459; RV64I-NEXT:    lbu a4, 1(a0)
460; RV64I-NEXT:    lbu a5, 2(a0)
461; RV64I-NEXT:    lbu a6, 3(a0)
462; RV64I-NEXT:    lbu a7, 4(a0)
463; RV64I-NEXT:    lbu t0, 5(a0)
464; RV64I-NEXT:    lbu t1, 6(a0)
465; RV64I-NEXT:    lbu a0, 7(a0)
466; RV64I-NEXT:    slli a4, a4, 8
467; RV64I-NEXT:    slli a5, a5, 16
468; RV64I-NEXT:    slli a6, a6, 24
469; RV64I-NEXT:    slli t0, t0, 8
470; RV64I-NEXT:    or a3, a4, a3
471; RV64I-NEXT:    or a4, a6, a5
472; RV64I-NEXT:    or a5, t0, a7
473; RV64I-NEXT:    lbu a6, 0(a1)
474; RV64I-NEXT:    lbu a7, 1(a1)
475; RV64I-NEXT:    lbu t0, 2(a1)
476; RV64I-NEXT:    lbu t2, 3(a1)
477; RV64I-NEXT:    slli t1, t1, 16
478; RV64I-NEXT:    slli a0, a0, 24
479; RV64I-NEXT:    slli a7, a7, 8
480; RV64I-NEXT:    slli t0, t0, 16
481; RV64I-NEXT:    slli t2, t2, 24
482; RV64I-NEXT:    or a0, a0, t1
483; RV64I-NEXT:    or a6, a7, a6
484; RV64I-NEXT:    lbu a7, 4(a1)
485; RV64I-NEXT:    lbu t1, 5(a1)
486; RV64I-NEXT:    or t0, t2, t0
487; RV64I-NEXT:    lbu t2, 6(a1)
488; RV64I-NEXT:    lbu a1, 7(a1)
489; RV64I-NEXT:    slli t1, t1, 8
490; RV64I-NEXT:    or a7, t1, a7
491; RV64I-NEXT:    slli t2, t2, 16
492; RV64I-NEXT:    slli a1, a1, 24
493; RV64I-NEXT:    or a1, a1, t2
494; RV64I-NEXT:    or a3, a4, a3
495; RV64I-NEXT:    or a0, a0, a5
496; RV64I-NEXT:    or a4, t0, a6
497; RV64I-NEXT:    or a1, a1, a7
498; RV64I-NEXT:    slli a0, a0, 32
499; RV64I-NEXT:    slli a1, a1, 32
500; RV64I-NEXT:    or a0, a0, a3
501; RV64I-NEXT:    or a1, a1, a4
502; RV64I-NEXT:    sra a0, a0, a1
503; RV64I-NEXT:    srli a1, a0, 48
504; RV64I-NEXT:    srli a3, a0, 56
505; RV64I-NEXT:    srli a4, a0, 32
506; RV64I-NEXT:    srli a5, a0, 40
507; RV64I-NEXT:    srli a6, a0, 16
508; RV64I-NEXT:    srli a7, a0, 24
509; RV64I-NEXT:    srli t0, a0, 8
510; RV64I-NEXT:    sb a4, 4(a2)
511; RV64I-NEXT:    sb a5, 5(a2)
512; RV64I-NEXT:    sb a1, 6(a2)
513; RV64I-NEXT:    sb a3, 7(a2)
514; RV64I-NEXT:    sb a0, 0(a2)
515; RV64I-NEXT:    sb t0, 1(a2)
516; RV64I-NEXT:    sb a6, 2(a2)
517; RV64I-NEXT:    sb a7, 3(a2)
518; RV64I-NEXT:    ret
519;
520; RV32I-LABEL: ashr_8bytes:
521; RV32I:       # %bb.0:
522; RV32I-NEXT:    lbu a3, 5(a0)
523; RV32I-NEXT:    lbu a4, 4(a0)
524; RV32I-NEXT:    lbu a5, 6(a0)
525; RV32I-NEXT:    lbu a6, 7(a0)
526; RV32I-NEXT:    slli a3, a3, 8
527; RV32I-NEXT:    lbu a7, 0(a1)
528; RV32I-NEXT:    lbu t0, 1(a1)
529; RV32I-NEXT:    or a3, a3, a4
530; RV32I-NEXT:    lbu a4, 2(a1)
531; RV32I-NEXT:    lbu a1, 3(a1)
532; RV32I-NEXT:    slli t0, t0, 8
533; RV32I-NEXT:    or a7, t0, a7
534; RV32I-NEXT:    slli a4, a4, 16
535; RV32I-NEXT:    slli a1, a1, 24
536; RV32I-NEXT:    or a1, a1, a4
537; RV32I-NEXT:    slli a4, a5, 16
538; RV32I-NEXT:    slli a5, a6, 24
539; RV32I-NEXT:    or a4, a5, a4
540; RV32I-NEXT:    or a4, a4, a3
541; RV32I-NEXT:    or a3, a1, a7
542; RV32I-NEXT:    addi a6, a3, -32
543; RV32I-NEXT:    sra a1, a4, a3
544; RV32I-NEXT:    bltz a6, .LBB5_2
545; RV32I-NEXT:  # %bb.1:
546; RV32I-NEXT:    srai a5, a5, 31
547; RV32I-NEXT:    mv a0, a1
548; RV32I-NEXT:    mv a1, a5
549; RV32I-NEXT:    j .LBB5_3
550; RV32I-NEXT:  .LBB5_2:
551; RV32I-NEXT:    lbu a5, 1(a0)
552; RV32I-NEXT:    lbu a6, 0(a0)
553; RV32I-NEXT:    lbu a7, 2(a0)
554; RV32I-NEXT:    lbu a0, 3(a0)
555; RV32I-NEXT:    slli a5, a5, 8
556; RV32I-NEXT:    or a5, a5, a6
557; RV32I-NEXT:    slli a7, a7, 16
558; RV32I-NEXT:    slli a0, a0, 24
559; RV32I-NEXT:    or a0, a0, a7
560; RV32I-NEXT:    not a6, a3
561; RV32I-NEXT:    slli a4, a4, 1
562; RV32I-NEXT:    or a0, a0, a5
563; RV32I-NEXT:    srl a0, a0, a3
564; RV32I-NEXT:    sll a3, a4, a6
565; RV32I-NEXT:    or a0, a0, a3
566; RV32I-NEXT:  .LBB5_3:
567; RV32I-NEXT:    srli a3, a1, 16
568; RV32I-NEXT:    srli a4, a1, 24
569; RV32I-NEXT:    srli a5, a1, 8
570; RV32I-NEXT:    srli a6, a0, 16
571; RV32I-NEXT:    srli a7, a0, 24
572; RV32I-NEXT:    sb a1, 4(a2)
573; RV32I-NEXT:    sb a5, 5(a2)
574; RV32I-NEXT:    sb a3, 6(a2)
575; RV32I-NEXT:    sb a4, 7(a2)
576; RV32I-NEXT:    srli a1, a0, 8
577; RV32I-NEXT:    sb a0, 0(a2)
578; RV32I-NEXT:    sb a1, 1(a2)
579; RV32I-NEXT:    sb a6, 2(a2)
580; RV32I-NEXT:    sb a7, 3(a2)
581; RV32I-NEXT:    ret
582  %src = load i64, ptr %src.ptr, align 1
583  %bitOff = load i64, ptr %bitOff.ptr, align 1
584  %res = ashr i64 %src, %bitOff
585  store i64 %res, ptr %dst, align 1
586  ret void
587}
588
589define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
590; RV64I-LABEL: lshr_16bytes:
591; RV64I:       # %bb.0:
592; RV64I-NEXT:    lbu a3, 8(a0)
593; RV64I-NEXT:    lbu a4, 9(a0)
594; RV64I-NEXT:    lbu a5, 10(a0)
595; RV64I-NEXT:    lbu a6, 11(a0)
596; RV64I-NEXT:    lbu a7, 12(a0)
597; RV64I-NEXT:    lbu t0, 13(a0)
598; RV64I-NEXT:    lbu t1, 14(a0)
599; RV64I-NEXT:    lbu t2, 15(a0)
600; RV64I-NEXT:    slli a4, a4, 8
601; RV64I-NEXT:    slli a5, a5, 16
602; RV64I-NEXT:    slli a6, a6, 24
603; RV64I-NEXT:    slli t0, t0, 8
604; RV64I-NEXT:    or a3, a4, a3
605; RV64I-NEXT:    or a4, a6, a5
606; RV64I-NEXT:    or a5, t0, a7
607; RV64I-NEXT:    lbu a6, 0(a1)
608; RV64I-NEXT:    lbu a7, 1(a1)
609; RV64I-NEXT:    lbu t0, 2(a1)
610; RV64I-NEXT:    lbu t3, 3(a1)
611; RV64I-NEXT:    slli t1, t1, 16
612; RV64I-NEXT:    slli t2, t2, 24
613; RV64I-NEXT:    slli a7, a7, 8
614; RV64I-NEXT:    slli t0, t0, 16
615; RV64I-NEXT:    slli t3, t3, 24
616; RV64I-NEXT:    or t1, t2, t1
617; RV64I-NEXT:    or a6, a7, a6
618; RV64I-NEXT:    lbu a7, 4(a1)
619; RV64I-NEXT:    lbu t2, 5(a1)
620; RV64I-NEXT:    or t0, t3, t0
621; RV64I-NEXT:    lbu t3, 6(a1)
622; RV64I-NEXT:    lbu a1, 7(a1)
623; RV64I-NEXT:    slli t2, t2, 8
624; RV64I-NEXT:    or a7, t2, a7
625; RV64I-NEXT:    slli t3, t3, 16
626; RV64I-NEXT:    slli a1, a1, 24
627; RV64I-NEXT:    or a1, a1, t3
628; RV64I-NEXT:    or a3, a4, a3
629; RV64I-NEXT:    or a4, t1, a5
630; RV64I-NEXT:    or a6, t0, a6
631; RV64I-NEXT:    or a1, a1, a7
632; RV64I-NEXT:    slli a4, a4, 32
633; RV64I-NEXT:    slli a1, a1, 32
634; RV64I-NEXT:    or a5, a4, a3
635; RV64I-NEXT:    or a4, a1, a6
636; RV64I-NEXT:    addi a3, a4, -64
637; RV64I-NEXT:    srl a1, a5, a4
638; RV64I-NEXT:    bltz a3, .LBB6_2
639; RV64I-NEXT:  # %bb.1:
640; RV64I-NEXT:    mv a0, a1
641; RV64I-NEXT:    j .LBB6_3
642; RV64I-NEXT:  .LBB6_2:
643; RV64I-NEXT:    lbu a6, 1(a0)
644; RV64I-NEXT:    lbu a7, 2(a0)
645; RV64I-NEXT:    lbu t0, 3(a0)
646; RV64I-NEXT:    lbu t1, 0(a0)
647; RV64I-NEXT:    slli a6, a6, 8
648; RV64I-NEXT:    slli a7, a7, 16
649; RV64I-NEXT:    slli t0, t0, 24
650; RV64I-NEXT:    or a6, a6, t1
651; RV64I-NEXT:    lbu t1, 4(a0)
652; RV64I-NEXT:    lbu t2, 5(a0)
653; RV64I-NEXT:    or a7, t0, a7
654; RV64I-NEXT:    lbu t0, 6(a0)
655; RV64I-NEXT:    lbu a0, 7(a0)
656; RV64I-NEXT:    slli t2, t2, 8
657; RV64I-NEXT:    or t1, t2, t1
658; RV64I-NEXT:    slli t0, t0, 16
659; RV64I-NEXT:    slli a0, a0, 24
660; RV64I-NEXT:    or a0, a0, t0
661; RV64I-NEXT:    or a6, a7, a6
662; RV64I-NEXT:    not a7, a4
663; RV64I-NEXT:    slli a5, a5, 1
664; RV64I-NEXT:    or a0, a0, t1
665; RV64I-NEXT:    slli a0, a0, 32
666; RV64I-NEXT:    or a0, a0, a6
667; RV64I-NEXT:    srl a0, a0, a4
668; RV64I-NEXT:    sll a4, a5, a7
669; RV64I-NEXT:    or a0, a0, a4
670; RV64I-NEXT:  .LBB6_3:
671; RV64I-NEXT:    srai a3, a3, 63
672; RV64I-NEXT:    srli a4, a0, 56
673; RV64I-NEXT:    srli a5, a0, 48
674; RV64I-NEXT:    srli a6, a0, 40
675; RV64I-NEXT:    srli a7, a0, 32
676; RV64I-NEXT:    srli t0, a0, 24
677; RV64I-NEXT:    srli t1, a0, 16
678; RV64I-NEXT:    and a1, a3, a1
679; RV64I-NEXT:    sb a7, 4(a2)
680; RV64I-NEXT:    sb a6, 5(a2)
681; RV64I-NEXT:    sb a5, 6(a2)
682; RV64I-NEXT:    sb a4, 7(a2)
683; RV64I-NEXT:    srli a3, a1, 56
684; RV64I-NEXT:    srli a4, a1, 48
685; RV64I-NEXT:    srli a5, a1, 40
686; RV64I-NEXT:    srli a6, a1, 32
687; RV64I-NEXT:    srli a7, a1, 24
688; RV64I-NEXT:    srli t2, a1, 16
689; RV64I-NEXT:    sb a6, 12(a2)
690; RV64I-NEXT:    sb a5, 13(a2)
691; RV64I-NEXT:    sb a4, 14(a2)
692; RV64I-NEXT:    sb a3, 15(a2)
693; RV64I-NEXT:    srli a3, a1, 8
694; RV64I-NEXT:    sb a1, 8(a2)
695; RV64I-NEXT:    sb a3, 9(a2)
696; RV64I-NEXT:    sb t2, 10(a2)
697; RV64I-NEXT:    sb a7, 11(a2)
698; RV64I-NEXT:    srli a1, a0, 8
699; RV64I-NEXT:    sb a0, 0(a2)
700; RV64I-NEXT:    sb a1, 1(a2)
701; RV64I-NEXT:    sb t1, 2(a2)
702; RV64I-NEXT:    sb t0, 3(a2)
703; RV64I-NEXT:    ret
704;
705; RV32I-LABEL: lshr_16bytes:
706; RV32I:       # %bb.0:
707; RV32I-NEXT:    addi sp, sp, -32
708; RV32I-NEXT:    lbu a3, 0(a0)
709; RV32I-NEXT:    lbu a4, 1(a0)
710; RV32I-NEXT:    lbu a5, 2(a0)
711; RV32I-NEXT:    lbu a6, 3(a0)
712; RV32I-NEXT:    lbu a7, 4(a0)
713; RV32I-NEXT:    lbu t0, 5(a0)
714; RV32I-NEXT:    lbu t1, 6(a0)
715; RV32I-NEXT:    lbu t2, 7(a0)
716; RV32I-NEXT:    slli a4, a4, 8
717; RV32I-NEXT:    slli a5, a5, 16
718; RV32I-NEXT:    slli a6, a6, 24
719; RV32I-NEXT:    or a3, a4, a3
720; RV32I-NEXT:    or a4, a6, a5
721; RV32I-NEXT:    lbu a5, 8(a0)
722; RV32I-NEXT:    lbu a6, 9(a0)
723; RV32I-NEXT:    lbu t3, 10(a0)
724; RV32I-NEXT:    lbu t4, 11(a0)
725; RV32I-NEXT:    slli t0, t0, 8
726; RV32I-NEXT:    slli t1, t1, 16
727; RV32I-NEXT:    slli t2, t2, 24
728; RV32I-NEXT:    slli a6, a6, 8
729; RV32I-NEXT:    or a7, t0, a7
730; RV32I-NEXT:    or t0, t2, t1
731; RV32I-NEXT:    or a5, a6, a5
732; RV32I-NEXT:    lbu a6, 12(a0)
733; RV32I-NEXT:    lbu t1, 13(a0)
734; RV32I-NEXT:    lbu t2, 14(a0)
735; RV32I-NEXT:    lbu a0, 15(a0)
736; RV32I-NEXT:    slli t3, t3, 16
737; RV32I-NEXT:    slli t4, t4, 24
738; RV32I-NEXT:    slli t1, t1, 8
739; RV32I-NEXT:    slli t2, t2, 16
740; RV32I-NEXT:    slli a0, a0, 24
741; RV32I-NEXT:    or t3, t4, t3
742; RV32I-NEXT:    or a6, t1, a6
743; RV32I-NEXT:    lbu t1, 0(a1)
744; RV32I-NEXT:    lbu t4, 1(a1)
745; RV32I-NEXT:    or a0, a0, t2
746; RV32I-NEXT:    lbu t2, 2(a1)
747; RV32I-NEXT:    lbu a1, 3(a1)
748; RV32I-NEXT:    slli t4, t4, 8
749; RV32I-NEXT:    or t1, t4, t1
750; RV32I-NEXT:    sw zero, 16(sp)
751; RV32I-NEXT:    sw zero, 20(sp)
752; RV32I-NEXT:    sw zero, 24(sp)
753; RV32I-NEXT:    sw zero, 28(sp)
754; RV32I-NEXT:    slli t2, t2, 16
755; RV32I-NEXT:    slli a1, a1, 24
756; RV32I-NEXT:    or a1, a1, t2
757; RV32I-NEXT:    mv t2, sp
758; RV32I-NEXT:    or a3, a4, a3
759; RV32I-NEXT:    or a4, t0, a7
760; RV32I-NEXT:    or a5, t3, a5
761; RV32I-NEXT:    or a0, a0, a6
762; RV32I-NEXT:    or a1, a1, t1
763; RV32I-NEXT:    sw a3, 0(sp)
764; RV32I-NEXT:    sw a4, 4(sp)
765; RV32I-NEXT:    sw a5, 8(sp)
766; RV32I-NEXT:    sw a0, 12(sp)
767; RV32I-NEXT:    srli a0, a1, 3
768; RV32I-NEXT:    andi a3, a1, 31
769; RV32I-NEXT:    andi a0, a0, 12
770; RV32I-NEXT:    add a0, t2, a0
771; RV32I-NEXT:    lw a4, 0(a0)
772; RV32I-NEXT:    lw a5, 4(a0)
773; RV32I-NEXT:    lw a6, 8(a0)
774; RV32I-NEXT:    xori a3, a3, 31
775; RV32I-NEXT:    lw a0, 12(a0)
776; RV32I-NEXT:    srl a7, a5, a1
777; RV32I-NEXT:    slli t0, a6, 1
778; RV32I-NEXT:    srl a4, a4, a1
779; RV32I-NEXT:    slli a5, a5, 1
780; RV32I-NEXT:    srl a6, a6, a1
781; RV32I-NEXT:    slli t1, a0, 1
782; RV32I-NEXT:    srl a0, a0, a1
783; RV32I-NEXT:    sll a1, t0, a3
784; RV32I-NEXT:    sll a5, a5, a3
785; RV32I-NEXT:    sll a3, t1, a3
786; RV32I-NEXT:    srli t0, a0, 16
787; RV32I-NEXT:    srli t1, a0, 24
788; RV32I-NEXT:    srli t2, a0, 8
789; RV32I-NEXT:    or a1, a7, a1
790; RV32I-NEXT:    or a4, a4, a5
791; RV32I-NEXT:    or a3, a6, a3
792; RV32I-NEXT:    sb a0, 12(a2)
793; RV32I-NEXT:    sb t2, 13(a2)
794; RV32I-NEXT:    sb t0, 14(a2)
795; RV32I-NEXT:    sb t1, 15(a2)
796; RV32I-NEXT:    srli a0, a3, 16
797; RV32I-NEXT:    srli a5, a3, 24
798; RV32I-NEXT:    srli a6, a3, 8
799; RV32I-NEXT:    srli a7, a4, 16
800; RV32I-NEXT:    srli t0, a4, 24
801; RV32I-NEXT:    srli t1, a4, 8
802; RV32I-NEXT:    srli t2, a1, 16
803; RV32I-NEXT:    srli t3, a1, 24
804; RV32I-NEXT:    sb a3, 8(a2)
805; RV32I-NEXT:    sb a6, 9(a2)
806; RV32I-NEXT:    sb a0, 10(a2)
807; RV32I-NEXT:    sb a5, 11(a2)
808; RV32I-NEXT:    srli a0, a1, 8
809; RV32I-NEXT:    sb a4, 0(a2)
810; RV32I-NEXT:    sb t1, 1(a2)
811; RV32I-NEXT:    sb a7, 2(a2)
812; RV32I-NEXT:    sb t0, 3(a2)
813; RV32I-NEXT:    sb a1, 4(a2)
814; RV32I-NEXT:    sb a0, 5(a2)
815; RV32I-NEXT:    sb t2, 6(a2)
816; RV32I-NEXT:    sb t3, 7(a2)
817; RV32I-NEXT:    addi sp, sp, 32
818; RV32I-NEXT:    ret
819  %src = load i128, ptr %src.ptr, align 1
820  %bitOff = load i128, ptr %bitOff.ptr, align 1
821  %res = lshr i128 %src, %bitOff
822  store i128 %res, ptr %dst, align 1
823  ret void
824}
825define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
826; RV64I-LABEL: shl_16bytes:
827; RV64I:       # %bb.0:
828; RV64I-NEXT:    lbu a3, 0(a0)
829; RV64I-NEXT:    lbu a4, 1(a0)
830; RV64I-NEXT:    lbu a5, 2(a0)
831; RV64I-NEXT:    lbu a6, 3(a0)
832; RV64I-NEXT:    lbu a7, 4(a0)
833; RV64I-NEXT:    lbu t0, 5(a0)
834; RV64I-NEXT:    lbu t1, 6(a0)
835; RV64I-NEXT:    lbu t2, 7(a0)
836; RV64I-NEXT:    slli a4, a4, 8
837; RV64I-NEXT:    slli a5, a5, 16
838; RV64I-NEXT:    slli a6, a6, 24
839; RV64I-NEXT:    slli t0, t0, 8
840; RV64I-NEXT:    or a3, a4, a3
841; RV64I-NEXT:    or a4, a6, a5
842; RV64I-NEXT:    or a5, t0, a7
843; RV64I-NEXT:    lbu a6, 0(a1)
844; RV64I-NEXT:    lbu a7, 1(a1)
845; RV64I-NEXT:    lbu t0, 2(a1)
846; RV64I-NEXT:    lbu t3, 3(a1)
847; RV64I-NEXT:    slli t1, t1, 16
848; RV64I-NEXT:    slli t2, t2, 24
849; RV64I-NEXT:    slli a7, a7, 8
850; RV64I-NEXT:    slli t0, t0, 16
851; RV64I-NEXT:    slli t3, t3, 24
852; RV64I-NEXT:    or t1, t2, t1
853; RV64I-NEXT:    or a6, a7, a6
854; RV64I-NEXT:    lbu a7, 4(a1)
855; RV64I-NEXT:    lbu t2, 5(a1)
856; RV64I-NEXT:    or t0, t3, t0
857; RV64I-NEXT:    lbu t3, 6(a1)
858; RV64I-NEXT:    lbu a1, 7(a1)
859; RV64I-NEXT:    slli t2, t2, 8
860; RV64I-NEXT:    or a7, t2, a7
861; RV64I-NEXT:    slli t3, t3, 16
862; RV64I-NEXT:    slli a1, a1, 24
863; RV64I-NEXT:    or a1, a1, t3
864; RV64I-NEXT:    or a3, a4, a3
865; RV64I-NEXT:    or a4, t1, a5
866; RV64I-NEXT:    or a6, t0, a6
867; RV64I-NEXT:    or a1, a1, a7
868; RV64I-NEXT:    slli a4, a4, 32
869; RV64I-NEXT:    slli a1, a1, 32
870; RV64I-NEXT:    or a5, a4, a3
871; RV64I-NEXT:    or a4, a1, a6
872; RV64I-NEXT:    addi a3, a4, -64
873; RV64I-NEXT:    sll a1, a5, a4
874; RV64I-NEXT:    bltz a3, .LBB7_2
875; RV64I-NEXT:  # %bb.1:
876; RV64I-NEXT:    mv a0, a1
877; RV64I-NEXT:    j .LBB7_3
878; RV64I-NEXT:  .LBB7_2:
879; RV64I-NEXT:    lbu a6, 9(a0)
880; RV64I-NEXT:    lbu a7, 10(a0)
881; RV64I-NEXT:    lbu t0, 11(a0)
882; RV64I-NEXT:    lbu t1, 8(a0)
883; RV64I-NEXT:    slli a6, a6, 8
884; RV64I-NEXT:    slli a7, a7, 16
885; RV64I-NEXT:    slli t0, t0, 24
886; RV64I-NEXT:    or a6, a6, t1
887; RV64I-NEXT:    lbu t1, 12(a0)
888; RV64I-NEXT:    lbu t2, 13(a0)
889; RV64I-NEXT:    or a7, t0, a7
890; RV64I-NEXT:    lbu t0, 14(a0)
891; RV64I-NEXT:    lbu a0, 15(a0)
892; RV64I-NEXT:    slli t2, t2, 8
893; RV64I-NEXT:    or t1, t2, t1
894; RV64I-NEXT:    slli t0, t0, 16
895; RV64I-NEXT:    slli a0, a0, 24
896; RV64I-NEXT:    or a0, a0, t0
897; RV64I-NEXT:    or a6, a7, a6
898; RV64I-NEXT:    not a7, a4
899; RV64I-NEXT:    srli a5, a5, 1
900; RV64I-NEXT:    or a0, a0, t1
901; RV64I-NEXT:    slli a0, a0, 32
902; RV64I-NEXT:    or a0, a0, a6
903; RV64I-NEXT:    sll a0, a0, a4
904; RV64I-NEXT:    srl a4, a5, a7
905; RV64I-NEXT:    or a0, a0, a4
906; RV64I-NEXT:  .LBB7_3:
907; RV64I-NEXT:    srai a3, a3, 63
908; RV64I-NEXT:    srli a4, a0, 56
909; RV64I-NEXT:    srli a5, a0, 48
910; RV64I-NEXT:    srli a6, a0, 40
911; RV64I-NEXT:    srli a7, a0, 32
912; RV64I-NEXT:    srli t0, a0, 24
913; RV64I-NEXT:    srli t1, a0, 16
914; RV64I-NEXT:    and a1, a3, a1
915; RV64I-NEXT:    sb a7, 12(a2)
916; RV64I-NEXT:    sb a6, 13(a2)
917; RV64I-NEXT:    sb a5, 14(a2)
918; RV64I-NEXT:    sb a4, 15(a2)
919; RV64I-NEXT:    srli a3, a1, 56
920; RV64I-NEXT:    srli a4, a1, 48
921; RV64I-NEXT:    srli a5, a1, 40
922; RV64I-NEXT:    srli a6, a1, 32
923; RV64I-NEXT:    srli a7, a1, 24
924; RV64I-NEXT:    srli t2, a1, 16
925; RV64I-NEXT:    sb a6, 4(a2)
926; RV64I-NEXT:    sb a5, 5(a2)
927; RV64I-NEXT:    sb a4, 6(a2)
928; RV64I-NEXT:    sb a3, 7(a2)
929; RV64I-NEXT:    srli a3, a1, 8
930; RV64I-NEXT:    sb a1, 0(a2)
931; RV64I-NEXT:    sb a3, 1(a2)
932; RV64I-NEXT:    sb t2, 2(a2)
933; RV64I-NEXT:    sb a7, 3(a2)
934; RV64I-NEXT:    srli a1, a0, 8
935; RV64I-NEXT:    sb a0, 8(a2)
936; RV64I-NEXT:    sb a1, 9(a2)
937; RV64I-NEXT:    sb t1, 10(a2)
938; RV64I-NEXT:    sb t0, 11(a2)
939; RV64I-NEXT:    ret
940;
941; RV32I-LABEL: shl_16bytes:
942; RV32I:       # %bb.0:
943; RV32I-NEXT:    addi sp, sp, -32
944; RV32I-NEXT:    lbu a3, 0(a0)
945; RV32I-NEXT:    lbu a4, 1(a0)
946; RV32I-NEXT:    lbu a5, 2(a0)
947; RV32I-NEXT:    lbu a6, 3(a0)
948; RV32I-NEXT:    lbu a7, 4(a0)
949; RV32I-NEXT:    lbu t0, 5(a0)
950; RV32I-NEXT:    lbu t1, 6(a0)
951; RV32I-NEXT:    lbu t2, 7(a0)
952; RV32I-NEXT:    slli a4, a4, 8
953; RV32I-NEXT:    slli a5, a5, 16
954; RV32I-NEXT:    slli a6, a6, 24
955; RV32I-NEXT:    or a3, a4, a3
956; RV32I-NEXT:    or a4, a6, a5
957; RV32I-NEXT:    lbu a5, 8(a0)
958; RV32I-NEXT:    lbu a6, 9(a0)
959; RV32I-NEXT:    lbu t3, 10(a0)
960; RV32I-NEXT:    lbu t4, 11(a0)
961; RV32I-NEXT:    slli t0, t0, 8
962; RV32I-NEXT:    slli t1, t1, 16
963; RV32I-NEXT:    slli t2, t2, 24
964; RV32I-NEXT:    slli a6, a6, 8
965; RV32I-NEXT:    or a7, t0, a7
966; RV32I-NEXT:    or t0, t2, t1
967; RV32I-NEXT:    or a5, a6, a5
968; RV32I-NEXT:    lbu a6, 12(a0)
969; RV32I-NEXT:    lbu t1, 13(a0)
970; RV32I-NEXT:    lbu t2, 14(a0)
971; RV32I-NEXT:    lbu a0, 15(a0)
972; RV32I-NEXT:    slli t3, t3, 16
973; RV32I-NEXT:    slli t4, t4, 24
974; RV32I-NEXT:    slli t1, t1, 8
975; RV32I-NEXT:    slli t2, t2, 16
976; RV32I-NEXT:    slli a0, a0, 24
977; RV32I-NEXT:    or t3, t4, t3
978; RV32I-NEXT:    or a6, t1, a6
979; RV32I-NEXT:    lbu t1, 0(a1)
980; RV32I-NEXT:    lbu t4, 1(a1)
981; RV32I-NEXT:    or a0, a0, t2
982; RV32I-NEXT:    lbu t2, 2(a1)
983; RV32I-NEXT:    lbu a1, 3(a1)
984; RV32I-NEXT:    slli t4, t4, 8
985; RV32I-NEXT:    or t1, t4, t1
986; RV32I-NEXT:    sw zero, 0(sp)
987; RV32I-NEXT:    sw zero, 4(sp)
988; RV32I-NEXT:    sw zero, 8(sp)
989; RV32I-NEXT:    sw zero, 12(sp)
990; RV32I-NEXT:    slli t2, t2, 16
991; RV32I-NEXT:    slli a1, a1, 24
992; RV32I-NEXT:    or a1, a1, t2
993; RV32I-NEXT:    addi t2, sp, 16
994; RV32I-NEXT:    or a3, a4, a3
995; RV32I-NEXT:    or a4, t0, a7
996; RV32I-NEXT:    or a5, t3, a5
997; RV32I-NEXT:    or a0, a0, a6
998; RV32I-NEXT:    or a1, a1, t1
999; RV32I-NEXT:    sw a3, 16(sp)
1000; RV32I-NEXT:    sw a4, 20(sp)
1001; RV32I-NEXT:    sw a5, 24(sp)
1002; RV32I-NEXT:    sw a0, 28(sp)
1003; RV32I-NEXT:    srli a0, a1, 3
1004; RV32I-NEXT:    andi a3, a1, 31
1005; RV32I-NEXT:    andi a0, a0, 12
1006; RV32I-NEXT:    sub a0, t2, a0
1007; RV32I-NEXT:    lw a4, 0(a0)
1008; RV32I-NEXT:    lw a5, 4(a0)
1009; RV32I-NEXT:    lw a6, 8(a0)
1010; RV32I-NEXT:    lw a0, 12(a0)
1011; RV32I-NEXT:    xori a3, a3, 31
1012; RV32I-NEXT:    sll a7, a5, a1
1013; RV32I-NEXT:    srli t0, a4, 1
1014; RV32I-NEXT:    sll a0, a0, a1
1015; RV32I-NEXT:    srli t1, a6, 1
1016; RV32I-NEXT:    sll a6, a6, a1
1017; RV32I-NEXT:    srli a5, a5, 1
1018; RV32I-NEXT:    sll a1, a4, a1
1019; RV32I-NEXT:    srl a4, t0, a3
1020; RV32I-NEXT:    srl t0, t1, a3
1021; RV32I-NEXT:    srl a3, a5, a3
1022; RV32I-NEXT:    srli a5, a1, 16
1023; RV32I-NEXT:    srli t1, a1, 24
1024; RV32I-NEXT:    srli t2, a1, 8
1025; RV32I-NEXT:    or a4, a7, a4
1026; RV32I-NEXT:    or a0, a0, t0
1027; RV32I-NEXT:    or a3, a6, a3
1028; RV32I-NEXT:    sb a1, 0(a2)
1029; RV32I-NEXT:    sb t2, 1(a2)
1030; RV32I-NEXT:    sb a5, 2(a2)
1031; RV32I-NEXT:    sb t1, 3(a2)
1032; RV32I-NEXT:    srli a1, a3, 16
1033; RV32I-NEXT:    srli a5, a3, 24
1034; RV32I-NEXT:    srli a6, a3, 8
1035; RV32I-NEXT:    srli a7, a0, 16
1036; RV32I-NEXT:    srli t0, a0, 24
1037; RV32I-NEXT:    srli t1, a0, 8
1038; RV32I-NEXT:    srli t2, a4, 16
1039; RV32I-NEXT:    srli t3, a4, 24
1040; RV32I-NEXT:    sb a3, 8(a2)
1041; RV32I-NEXT:    sb a6, 9(a2)
1042; RV32I-NEXT:    sb a1, 10(a2)
1043; RV32I-NEXT:    sb a5, 11(a2)
1044; RV32I-NEXT:    srli a1, a4, 8
1045; RV32I-NEXT:    sb a0, 12(a2)
1046; RV32I-NEXT:    sb t1, 13(a2)
1047; RV32I-NEXT:    sb a7, 14(a2)
1048; RV32I-NEXT:    sb t0, 15(a2)
1049; RV32I-NEXT:    sb a4, 4(a2)
1050; RV32I-NEXT:    sb a1, 5(a2)
1051; RV32I-NEXT:    sb t2, 6(a2)
1052; RV32I-NEXT:    sb t3, 7(a2)
1053; RV32I-NEXT:    addi sp, sp, 32
1054; RV32I-NEXT:    ret
1055  %src = load i128, ptr %src.ptr, align 1
1056  %bitOff = load i128, ptr %bitOff.ptr, align 1
1057  %res = shl i128 %src, %bitOff
1058  store i128 %res, ptr %dst, align 1
1059  ret void
1060}
1061define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
1062; RV64I-LABEL: ashr_16bytes:
1063; RV64I:       # %bb.0:
1064; RV64I-NEXT:    lbu a3, 8(a0)
1065; RV64I-NEXT:    lbu a4, 9(a0)
1066; RV64I-NEXT:    lbu a5, 10(a0)
1067; RV64I-NEXT:    lbu a6, 11(a0)
1068; RV64I-NEXT:    lbu a7, 12(a0)
1069; RV64I-NEXT:    lbu t0, 13(a0)
1070; RV64I-NEXT:    lbu t1, 14(a0)
1071; RV64I-NEXT:    lbu t2, 15(a0)
1072; RV64I-NEXT:    slli a4, a4, 8
1073; RV64I-NEXT:    slli a5, a5, 16
1074; RV64I-NEXT:    slli a6, a6, 24
1075; RV64I-NEXT:    slli t0, t0, 8
1076; RV64I-NEXT:    or a3, a4, a3
1077; RV64I-NEXT:    or a4, a6, a5
1078; RV64I-NEXT:    or a5, t0, a7
1079; RV64I-NEXT:    lbu a6, 0(a1)
1080; RV64I-NEXT:    lbu a7, 1(a1)
1081; RV64I-NEXT:    lbu t0, 2(a1)
1082; RV64I-NEXT:    lbu t3, 3(a1)
1083; RV64I-NEXT:    slli t1, t1, 16
1084; RV64I-NEXT:    slli t2, t2, 24
1085; RV64I-NEXT:    slli a7, a7, 8
1086; RV64I-NEXT:    slli t0, t0, 16
1087; RV64I-NEXT:    slli t3, t3, 24
1088; RV64I-NEXT:    or t1, t2, t1
1089; RV64I-NEXT:    or a6, a7, a6
1090; RV64I-NEXT:    lbu a7, 4(a1)
1091; RV64I-NEXT:    lbu t2, 5(a1)
1092; RV64I-NEXT:    or t0, t3, t0
1093; RV64I-NEXT:    lbu t3, 6(a1)
1094; RV64I-NEXT:    lbu a1, 7(a1)
1095; RV64I-NEXT:    slli t2, t2, 8
1096; RV64I-NEXT:    or a7, t2, a7
1097; RV64I-NEXT:    slli t3, t3, 16
1098; RV64I-NEXT:    slli a1, a1, 24
1099; RV64I-NEXT:    or a1, a1, t3
1100; RV64I-NEXT:    or a3, a4, a3
1101; RV64I-NEXT:    or a5, t1, a5
1102; RV64I-NEXT:    or a6, t0, a6
1103; RV64I-NEXT:    or a1, a1, a7
1104; RV64I-NEXT:    slli a4, a5, 32
1105; RV64I-NEXT:    slli a1, a1, 32
1106; RV64I-NEXT:    or a4, a4, a3
1107; RV64I-NEXT:    or a3, a1, a6
1108; RV64I-NEXT:    addi a6, a3, -64
1109; RV64I-NEXT:    sra a1, a4, a3
1110; RV64I-NEXT:    bltz a6, .LBB8_2
1111; RV64I-NEXT:  # %bb.1:
1112; RV64I-NEXT:    sraiw a3, a5, 31
1113; RV64I-NEXT:    mv a0, a1
1114; RV64I-NEXT:    mv a1, a3
1115; RV64I-NEXT:    j .LBB8_3
1116; RV64I-NEXT:  .LBB8_2:
1117; RV64I-NEXT:    lbu a5, 1(a0)
1118; RV64I-NEXT:    lbu a6, 2(a0)
1119; RV64I-NEXT:    lbu a7, 3(a0)
1120; RV64I-NEXT:    lbu t0, 0(a0)
1121; RV64I-NEXT:    slli a5, a5, 8
1122; RV64I-NEXT:    slli a6, a6, 16
1123; RV64I-NEXT:    slli a7, a7, 24
1124; RV64I-NEXT:    or a5, a5, t0
1125; RV64I-NEXT:    lbu t0, 4(a0)
1126; RV64I-NEXT:    lbu t1, 5(a0)
1127; RV64I-NEXT:    or a6, a7, a6
1128; RV64I-NEXT:    lbu a7, 6(a0)
1129; RV64I-NEXT:    lbu a0, 7(a0)
1130; RV64I-NEXT:    slli t1, t1, 8
1131; RV64I-NEXT:    or t0, t1, t0
1132; RV64I-NEXT:    slli a7, a7, 16
1133; RV64I-NEXT:    slli a0, a0, 24
1134; RV64I-NEXT:    or a0, a0, a7
1135; RV64I-NEXT:    or a5, a6, a5
1136; RV64I-NEXT:    not a6, a3
1137; RV64I-NEXT:    slli a4, a4, 1
1138; RV64I-NEXT:    or a0, a0, t0
1139; RV64I-NEXT:    slli a0, a0, 32
1140; RV64I-NEXT:    or a0, a0, a5
1141; RV64I-NEXT:    srl a0, a0, a3
1142; RV64I-NEXT:    sll a3, a4, a6
1143; RV64I-NEXT:    or a0, a0, a3
1144; RV64I-NEXT:  .LBB8_3:
1145; RV64I-NEXT:    srli a3, a1, 56
1146; RV64I-NEXT:    srli a4, a1, 48
1147; RV64I-NEXT:    srli a5, a1, 40
1148; RV64I-NEXT:    srli a6, a1, 32
1149; RV64I-NEXT:    srli a7, a1, 24
1150; RV64I-NEXT:    srli t0, a1, 16
1151; RV64I-NEXT:    srli t1, a1, 8
1152; RV64I-NEXT:    srli t2, a0, 56
1153; RV64I-NEXT:    srli t3, a0, 48
1154; RV64I-NEXT:    srli t4, a0, 40
1155; RV64I-NEXT:    srli t5, a0, 32
1156; RV64I-NEXT:    sb a6, 12(a2)
1157; RV64I-NEXT:    sb a5, 13(a2)
1158; RV64I-NEXT:    sb a4, 14(a2)
1159; RV64I-NEXT:    sb a3, 15(a2)
1160; RV64I-NEXT:    srli a3, a0, 24
1161; RV64I-NEXT:    sb a1, 8(a2)
1162; RV64I-NEXT:    sb t1, 9(a2)
1163; RV64I-NEXT:    sb t0, 10(a2)
1164; RV64I-NEXT:    sb a7, 11(a2)
1165; RV64I-NEXT:    srli a1, a0, 16
1166; RV64I-NEXT:    sb t5, 4(a2)
1167; RV64I-NEXT:    sb t4, 5(a2)
1168; RV64I-NEXT:    sb t3, 6(a2)
1169; RV64I-NEXT:    sb t2, 7(a2)
1170; RV64I-NEXT:    srli a4, a0, 8
1171; RV64I-NEXT:    sb a0, 0(a2)
1172; RV64I-NEXT:    sb a4, 1(a2)
1173; RV64I-NEXT:    sb a1, 2(a2)
1174; RV64I-NEXT:    sb a3, 3(a2)
1175; RV64I-NEXT:    ret
1176;
1177; RV32I-LABEL: ashr_16bytes:
1178; RV32I:       # %bb.0:
1179; RV32I-NEXT:    addi sp, sp, -32
1180; RV32I-NEXT:    lbu a3, 0(a0)
1181; RV32I-NEXT:    lbu a4, 1(a0)
1182; RV32I-NEXT:    lbu a5, 2(a0)
1183; RV32I-NEXT:    lbu a6, 3(a0)
1184; RV32I-NEXT:    lbu a7, 4(a0)
1185; RV32I-NEXT:    lbu t0, 5(a0)
1186; RV32I-NEXT:    lbu t1, 6(a0)
1187; RV32I-NEXT:    lbu t2, 7(a0)
1188; RV32I-NEXT:    slli a4, a4, 8
1189; RV32I-NEXT:    or a3, a4, a3
1190; RV32I-NEXT:    lbu a4, 8(a0)
1191; RV32I-NEXT:    lbu t3, 9(a0)
1192; RV32I-NEXT:    lbu t4, 10(a0)
1193; RV32I-NEXT:    lbu t5, 11(a0)
1194; RV32I-NEXT:    slli a5, a5, 16
1195; RV32I-NEXT:    slli a6, a6, 24
1196; RV32I-NEXT:    slli t0, t0, 8
1197; RV32I-NEXT:    slli t1, t1, 16
1198; RV32I-NEXT:    slli t2, t2, 24
1199; RV32I-NEXT:    or a5, a6, a5
1200; RV32I-NEXT:    or a6, t0, a7
1201; RV32I-NEXT:    or a7, t2, t1
1202; RV32I-NEXT:    lbu t0, 12(a0)
1203; RV32I-NEXT:    lbu t1, 13(a0)
1204; RV32I-NEXT:    lbu t2, 14(a0)
1205; RV32I-NEXT:    lbu a0, 15(a0)
1206; RV32I-NEXT:    slli t3, t3, 8
1207; RV32I-NEXT:    slli t4, t4, 16
1208; RV32I-NEXT:    slli t5, t5, 24
1209; RV32I-NEXT:    slli t1, t1, 8
1210; RV32I-NEXT:    or a4, t3, a4
1211; RV32I-NEXT:    or t3, t5, t4
1212; RV32I-NEXT:    lbu t4, 0(a1)
1213; RV32I-NEXT:    lbu t5, 1(a1)
1214; RV32I-NEXT:    or t0, t1, t0
1215; RV32I-NEXT:    lbu t1, 2(a1)
1216; RV32I-NEXT:    lbu a1, 3(a1)
1217; RV32I-NEXT:    slli t5, t5, 8
1218; RV32I-NEXT:    or t4, t5, t4
1219; RV32I-NEXT:    slli t1, t1, 16
1220; RV32I-NEXT:    slli a1, a1, 24
1221; RV32I-NEXT:    or a1, a1, t1
1222; RV32I-NEXT:    or a3, a5, a3
1223; RV32I-NEXT:    mv a5, sp
1224; RV32I-NEXT:    slli t2, t2, 16
1225; RV32I-NEXT:    slli a0, a0, 24
1226; RV32I-NEXT:    or t1, a0, t2
1227; RV32I-NEXT:    srai a0, a0, 31
1228; RV32I-NEXT:    or a6, a7, a6
1229; RV32I-NEXT:    or a4, t3, a4
1230; RV32I-NEXT:    or a7, t1, t0
1231; RV32I-NEXT:    or a1, a1, t4
1232; RV32I-NEXT:    sw a0, 16(sp)
1233; RV32I-NEXT:    sw a0, 20(sp)
1234; RV32I-NEXT:    sw a0, 24(sp)
1235; RV32I-NEXT:    sw a0, 28(sp)
1236; RV32I-NEXT:    sw a3, 0(sp)
1237; RV32I-NEXT:    sw a6, 4(sp)
1238; RV32I-NEXT:    sw a4, 8(sp)
1239; RV32I-NEXT:    sw a7, 12(sp)
1240; RV32I-NEXT:    srli a0, a1, 3
1241; RV32I-NEXT:    andi a3, a1, 31
1242; RV32I-NEXT:    andi a0, a0, 12
1243; RV32I-NEXT:    add a0, a5, a0
1244; RV32I-NEXT:    lw a4, 0(a0)
1245; RV32I-NEXT:    lw a5, 4(a0)
1246; RV32I-NEXT:    lw a6, 8(a0)
1247; RV32I-NEXT:    xori a3, a3, 31
1248; RV32I-NEXT:    lw a0, 12(a0)
1249; RV32I-NEXT:    srl a7, a5, a1
1250; RV32I-NEXT:    slli t0, a6, 1
1251; RV32I-NEXT:    srl a4, a4, a1
1252; RV32I-NEXT:    slli a5, a5, 1
1253; RV32I-NEXT:    srl a6, a6, a1
1254; RV32I-NEXT:    slli t1, a0, 1
1255; RV32I-NEXT:    sra a0, a0, a1
1256; RV32I-NEXT:    sll a1, t0, a3
1257; RV32I-NEXT:    sll a5, a5, a3
1258; RV32I-NEXT:    sll a3, t1, a3
1259; RV32I-NEXT:    srli t0, a0, 16
1260; RV32I-NEXT:    srli t1, a0, 24
1261; RV32I-NEXT:    srli t2, a0, 8
1262; RV32I-NEXT:    or a1, a7, a1
1263; RV32I-NEXT:    or a4, a4, a5
1264; RV32I-NEXT:    or a3, a6, a3
1265; RV32I-NEXT:    sb a0, 12(a2)
1266; RV32I-NEXT:    sb t2, 13(a2)
1267; RV32I-NEXT:    sb t0, 14(a2)
1268; RV32I-NEXT:    sb t1, 15(a2)
1269; RV32I-NEXT:    srli a0, a3, 16
1270; RV32I-NEXT:    srli a5, a3, 24
1271; RV32I-NEXT:    srli a6, a3, 8
1272; RV32I-NEXT:    srli a7, a4, 16
1273; RV32I-NEXT:    srli t0, a4, 24
1274; RV32I-NEXT:    srli t1, a4, 8
1275; RV32I-NEXT:    srli t2, a1, 16
1276; RV32I-NEXT:    srli t3, a1, 24
1277; RV32I-NEXT:    sb a3, 8(a2)
1278; RV32I-NEXT:    sb a6, 9(a2)
1279; RV32I-NEXT:    sb a0, 10(a2)
1280; RV32I-NEXT:    sb a5, 11(a2)
1281; RV32I-NEXT:    srli a0, a1, 8
1282; RV32I-NEXT:    sb a4, 0(a2)
1283; RV32I-NEXT:    sb t1, 1(a2)
1284; RV32I-NEXT:    sb a7, 2(a2)
1285; RV32I-NEXT:    sb t0, 3(a2)
1286; RV32I-NEXT:    sb a1, 4(a2)
1287; RV32I-NEXT:    sb a0, 5(a2)
1288; RV32I-NEXT:    sb t2, 6(a2)
1289; RV32I-NEXT:    sb t3, 7(a2)
1290; RV32I-NEXT:    addi sp, sp, 32
1291; RV32I-NEXT:    ret
1292  %src = load i128, ptr %src.ptr, align 1
1293  %bitOff = load i128, ptr %bitOff.ptr, align 1
1294  %res = ashr i128 %src, %bitOff
1295  store i128 %res, ptr %dst, align 1
1296  ret void
1297}
1298
1299define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
1300; RV64I-LABEL: lshr_32bytes:
1301; RV64I:       # %bb.0:
1302; RV64I-NEXT:    addi sp, sp, -160
1303; RV64I-NEXT:    sd s0, 152(sp) # 8-byte Folded Spill
1304; RV64I-NEXT:    sd s1, 144(sp) # 8-byte Folded Spill
1305; RV64I-NEXT:    sd s2, 136(sp) # 8-byte Folded Spill
1306; RV64I-NEXT:    sd s3, 128(sp) # 8-byte Folded Spill
1307; RV64I-NEXT:    sd s4, 120(sp) # 8-byte Folded Spill
1308; RV64I-NEXT:    sd s5, 112(sp) # 8-byte Folded Spill
1309; RV64I-NEXT:    sd s6, 104(sp) # 8-byte Folded Spill
1310; RV64I-NEXT:    sd s7, 96(sp) # 8-byte Folded Spill
1311; RV64I-NEXT:    sd s8, 88(sp) # 8-byte Folded Spill
1312; RV64I-NEXT:    sd s9, 80(sp) # 8-byte Folded Spill
1313; RV64I-NEXT:    sd s10, 72(sp) # 8-byte Folded Spill
1314; RV64I-NEXT:    sd s11, 64(sp) # 8-byte Folded Spill
1315; RV64I-NEXT:    lbu a3, 0(a0)
1316; RV64I-NEXT:    lbu a4, 1(a0)
1317; RV64I-NEXT:    lbu a5, 2(a0)
1318; RV64I-NEXT:    lbu a6, 3(a0)
1319; RV64I-NEXT:    lbu a7, 4(a0)
1320; RV64I-NEXT:    lbu t0, 5(a0)
1321; RV64I-NEXT:    lbu t1, 6(a0)
1322; RV64I-NEXT:    lbu t2, 7(a0)
1323; RV64I-NEXT:    lbu t3, 8(a0)
1324; RV64I-NEXT:    lbu t4, 9(a0)
1325; RV64I-NEXT:    lbu t5, 10(a0)
1326; RV64I-NEXT:    lbu t6, 11(a0)
1327; RV64I-NEXT:    lbu s0, 12(a0)
1328; RV64I-NEXT:    lbu s1, 13(a0)
1329; RV64I-NEXT:    lbu s2, 14(a0)
1330; RV64I-NEXT:    lbu s3, 15(a0)
1331; RV64I-NEXT:    lbu s4, 16(a0)
1332; RV64I-NEXT:    lbu s5, 17(a0)
1333; RV64I-NEXT:    lbu s6, 18(a0)
1334; RV64I-NEXT:    lbu s7, 19(a0)
1335; RV64I-NEXT:    slli a4, a4, 8
1336; RV64I-NEXT:    slli s8, a5, 16
1337; RV64I-NEXT:    slli a6, a6, 24
1338; RV64I-NEXT:    slli t0, t0, 8
1339; RV64I-NEXT:    slli t1, t1, 16
1340; RV64I-NEXT:    slli t2, t2, 24
1341; RV64I-NEXT:    or a5, a4, a3
1342; RV64I-NEXT:    or a6, a6, s8
1343; RV64I-NEXT:    or a3, t0, a7
1344; RV64I-NEXT:    or a4, t2, t1
1345; RV64I-NEXT:    lbu s8, 20(a0)
1346; RV64I-NEXT:    lbu s9, 21(a0)
1347; RV64I-NEXT:    lbu s10, 22(a0)
1348; RV64I-NEXT:    lbu s11, 23(a0)
1349; RV64I-NEXT:    slli t4, t4, 8
1350; RV64I-NEXT:    slli t5, t5, 16
1351; RV64I-NEXT:    slli t6, t6, 24
1352; RV64I-NEXT:    slli s1, s1, 8
1353; RV64I-NEXT:    slli s2, s2, 16
1354; RV64I-NEXT:    slli s3, s3, 24
1355; RV64I-NEXT:    or a7, t4, t3
1356; RV64I-NEXT:    or t0, t6, t5
1357; RV64I-NEXT:    or t1, s1, s0
1358; RV64I-NEXT:    or t2, s3, s2
1359; RV64I-NEXT:    lbu t6, 24(a0)
1360; RV64I-NEXT:    lbu s0, 25(a0)
1361; RV64I-NEXT:    lbu s1, 26(a0)
1362; RV64I-NEXT:    lbu s2, 27(a0)
1363; RV64I-NEXT:    slli s5, s5, 8
1364; RV64I-NEXT:    slli s6, s6, 16
1365; RV64I-NEXT:    slli s7, s7, 24
1366; RV64I-NEXT:    slli s9, s9, 8
1367; RV64I-NEXT:    or t3, s5, s4
1368; RV64I-NEXT:    or t4, s7, s6
1369; RV64I-NEXT:    or t5, s9, s8
1370; RV64I-NEXT:    lbu s3, 28(a0)
1371; RV64I-NEXT:    lbu s4, 29(a0)
1372; RV64I-NEXT:    lbu s5, 30(a0)
1373; RV64I-NEXT:    lbu s6, 31(a0)
1374; RV64I-NEXT:    slli s10, s10, 16
1375; RV64I-NEXT:    slli s11, s11, 24
1376; RV64I-NEXT:    slli s0, s0, 8
1377; RV64I-NEXT:    slli s1, s1, 16
1378; RV64I-NEXT:    slli s2, s2, 24
1379; RV64I-NEXT:    slli s4, s4, 8
1380; RV64I-NEXT:    or a0, s11, s10
1381; RV64I-NEXT:    or t6, s0, t6
1382; RV64I-NEXT:    or s0, s2, s1
1383; RV64I-NEXT:    or s1, s4, s3
1384; RV64I-NEXT:    lbu s2, 0(a1)
1385; RV64I-NEXT:    lbu s3, 1(a1)
1386; RV64I-NEXT:    lbu s4, 2(a1)
1387; RV64I-NEXT:    lbu s7, 3(a1)
1388; RV64I-NEXT:    slli s5, s5, 16
1389; RV64I-NEXT:    slli s6, s6, 24
1390; RV64I-NEXT:    slli s3, s3, 8
1391; RV64I-NEXT:    slli s4, s4, 16
1392; RV64I-NEXT:    slli s7, s7, 24
1393; RV64I-NEXT:    or s5, s6, s5
1394; RV64I-NEXT:    or s2, s3, s2
1395; RV64I-NEXT:    lbu s3, 4(a1)
1396; RV64I-NEXT:    lbu s6, 5(a1)
1397; RV64I-NEXT:    or s4, s7, s4
1398; RV64I-NEXT:    lbu s7, 6(a1)
1399; RV64I-NEXT:    lbu a1, 7(a1)
1400; RV64I-NEXT:    slli s6, s6, 8
1401; RV64I-NEXT:    or s3, s6, s3
1402; RV64I-NEXT:    slli s7, s7, 16
1403; RV64I-NEXT:    slli a1, a1, 24
1404; RV64I-NEXT:    or a1, a1, s7
1405; RV64I-NEXT:    sd zero, 32(sp)
1406; RV64I-NEXT:    sd zero, 40(sp)
1407; RV64I-NEXT:    sd zero, 48(sp)
1408; RV64I-NEXT:    sd zero, 56(sp)
1409; RV64I-NEXT:    or a5, a6, a5
1410; RV64I-NEXT:    mv a6, sp
1411; RV64I-NEXT:    or a3, a4, a3
1412; RV64I-NEXT:    or a4, t0, a7
1413; RV64I-NEXT:    or a7, t2, t1
1414; RV64I-NEXT:    or t0, t4, t3
1415; RV64I-NEXT:    or a0, a0, t5
1416; RV64I-NEXT:    or t1, s0, t6
1417; RV64I-NEXT:    or t2, s5, s1
1418; RV64I-NEXT:    or t3, s4, s2
1419; RV64I-NEXT:    or a1, a1, s3
1420; RV64I-NEXT:    slli a3, a3, 32
1421; RV64I-NEXT:    slli a7, a7, 32
1422; RV64I-NEXT:    slli a0, a0, 32
1423; RV64I-NEXT:    slli t2, t2, 32
1424; RV64I-NEXT:    slli a1, a1, 32
1425; RV64I-NEXT:    or a3, a3, a5
1426; RV64I-NEXT:    or a4, a7, a4
1427; RV64I-NEXT:    or a0, a0, t0
1428; RV64I-NEXT:    or a5, t2, t1
1429; RV64I-NEXT:    or a1, a1, t3
1430; RV64I-NEXT:    sd a3, 0(sp)
1431; RV64I-NEXT:    sd a4, 8(sp)
1432; RV64I-NEXT:    sd a0, 16(sp)
1433; RV64I-NEXT:    sd a5, 24(sp)
1434; RV64I-NEXT:    srli a0, a1, 3
1435; RV64I-NEXT:    andi a3, a1, 63
1436; RV64I-NEXT:    andi a0, a0, 24
1437; RV64I-NEXT:    add a0, a6, a0
1438; RV64I-NEXT:    ld a4, 0(a0)
1439; RV64I-NEXT:    ld a5, 8(a0)
1440; RV64I-NEXT:    ld a6, 16(a0)
1441; RV64I-NEXT:    xori a3, a3, 63
1442; RV64I-NEXT:    ld a0, 24(a0)
1443; RV64I-NEXT:    srl a7, a5, a1
1444; RV64I-NEXT:    slli t0, a6, 1
1445; RV64I-NEXT:    srl a4, a4, a1
1446; RV64I-NEXT:    slli a5, a5, 1
1447; RV64I-NEXT:    srl a6, a6, a1
1448; RV64I-NEXT:    slli t1, a0, 1
1449; RV64I-NEXT:    srl t2, a0, a1
1450; RV64I-NEXT:    sll a0, t0, a3
1451; RV64I-NEXT:    sll a1, a5, a3
1452; RV64I-NEXT:    sll a3, t1, a3
1453; RV64I-NEXT:    srli a5, t2, 56
1454; RV64I-NEXT:    srli t0, t2, 48
1455; RV64I-NEXT:    srli t1, t2, 40
1456; RV64I-NEXT:    srli t3, t2, 32
1457; RV64I-NEXT:    srli t4, t2, 24
1458; RV64I-NEXT:    srli t5, t2, 16
1459; RV64I-NEXT:    srli t6, t2, 8
1460; RV64I-NEXT:    or a0, a7, a0
1461; RV64I-NEXT:    or a1, a4, a1
1462; RV64I-NEXT:    or a3, a6, a3
1463; RV64I-NEXT:    sb t3, 28(a2)
1464; RV64I-NEXT:    sb t1, 29(a2)
1465; RV64I-NEXT:    sb t0, 30(a2)
1466; RV64I-NEXT:    sb a5, 31(a2)
1467; RV64I-NEXT:    sb t2, 24(a2)
1468; RV64I-NEXT:    sb t6, 25(a2)
1469; RV64I-NEXT:    sb t5, 26(a2)
1470; RV64I-NEXT:    sb t4, 27(a2)
1471; RV64I-NEXT:    srli a4, a3, 56
1472; RV64I-NEXT:    srli a5, a3, 48
1473; RV64I-NEXT:    srli a6, a3, 40
1474; RV64I-NEXT:    srli a7, a3, 32
1475; RV64I-NEXT:    srli t0, a3, 24
1476; RV64I-NEXT:    srli t1, a3, 16
1477; RV64I-NEXT:    srli t2, a3, 8
1478; RV64I-NEXT:    srli t3, a1, 56
1479; RV64I-NEXT:    srli t4, a1, 48
1480; RV64I-NEXT:    srli t5, a1, 40
1481; RV64I-NEXT:    srli t6, a1, 32
1482; RV64I-NEXT:    srli s0, a1, 24
1483; RV64I-NEXT:    srli s1, a1, 16
1484; RV64I-NEXT:    srli s2, a1, 8
1485; RV64I-NEXT:    srli s3, a0, 56
1486; RV64I-NEXT:    srli s4, a0, 48
1487; RV64I-NEXT:    srli s5, a0, 40
1488; RV64I-NEXT:    sb a7, 20(a2)
1489; RV64I-NEXT:    sb a6, 21(a2)
1490; RV64I-NEXT:    sb a5, 22(a2)
1491; RV64I-NEXT:    sb a4, 23(a2)
1492; RV64I-NEXT:    srli a4, a0, 32
1493; RV64I-NEXT:    sb a3, 16(a2)
1494; RV64I-NEXT:    sb t2, 17(a2)
1495; RV64I-NEXT:    sb t1, 18(a2)
1496; RV64I-NEXT:    sb t0, 19(a2)
1497; RV64I-NEXT:    srli a3, a0, 24
1498; RV64I-NEXT:    sb t6, 4(a2)
1499; RV64I-NEXT:    sb t5, 5(a2)
1500; RV64I-NEXT:    sb t4, 6(a2)
1501; RV64I-NEXT:    sb t3, 7(a2)
1502; RV64I-NEXT:    srli a5, a0, 16
1503; RV64I-NEXT:    sb a1, 0(a2)
1504; RV64I-NEXT:    sb s2, 1(a2)
1505; RV64I-NEXT:    sb s1, 2(a2)
1506; RV64I-NEXT:    sb s0, 3(a2)
1507; RV64I-NEXT:    srli a1, a0, 8
1508; RV64I-NEXT:    sb a4, 12(a2)
1509; RV64I-NEXT:    sb s5, 13(a2)
1510; RV64I-NEXT:    sb s4, 14(a2)
1511; RV64I-NEXT:    sb s3, 15(a2)
1512; RV64I-NEXT:    sb a0, 8(a2)
1513; RV64I-NEXT:    sb a1, 9(a2)
1514; RV64I-NEXT:    sb a5, 10(a2)
1515; RV64I-NEXT:    sb a3, 11(a2)
1516; RV64I-NEXT:    ld s0, 152(sp) # 8-byte Folded Reload
1517; RV64I-NEXT:    ld s1, 144(sp) # 8-byte Folded Reload
1518; RV64I-NEXT:    ld s2, 136(sp) # 8-byte Folded Reload
1519; RV64I-NEXT:    ld s3, 128(sp) # 8-byte Folded Reload
1520; RV64I-NEXT:    ld s4, 120(sp) # 8-byte Folded Reload
1521; RV64I-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
1522; RV64I-NEXT:    ld s6, 104(sp) # 8-byte Folded Reload
1523; RV64I-NEXT:    ld s7, 96(sp) # 8-byte Folded Reload
1524; RV64I-NEXT:    ld s8, 88(sp) # 8-byte Folded Reload
1525; RV64I-NEXT:    ld s9, 80(sp) # 8-byte Folded Reload
1526; RV64I-NEXT:    ld s10, 72(sp) # 8-byte Folded Reload
1527; RV64I-NEXT:    ld s11, 64(sp) # 8-byte Folded Reload
1528; RV64I-NEXT:    addi sp, sp, 160
1529; RV64I-NEXT:    ret
1530;
1531; RV32I-LABEL: lshr_32bytes:
1532; RV32I:       # %bb.0:
1533; RV32I-NEXT:    addi sp, sp, -128
1534; RV32I-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
1535; RV32I-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
1536; RV32I-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
1537; RV32I-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
1538; RV32I-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
1539; RV32I-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
1540; RV32I-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
1541; RV32I-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
1542; RV32I-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
1543; RV32I-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
1544; RV32I-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
1545; RV32I-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
1546; RV32I-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
1547; RV32I-NEXT:    lbu a3, 0(a0)
1548; RV32I-NEXT:    lbu a4, 1(a0)
1549; RV32I-NEXT:    lbu a6, 2(a0)
1550; RV32I-NEXT:    lbu a7, 3(a0)
1551; RV32I-NEXT:    lbu a5, 4(a0)
1552; RV32I-NEXT:    lbu t0, 5(a0)
1553; RV32I-NEXT:    lbu t1, 6(a0)
1554; RV32I-NEXT:    lbu t2, 7(a0)
1555; RV32I-NEXT:    lbu t3, 8(a0)
1556; RV32I-NEXT:    lbu t4, 9(a0)
1557; RV32I-NEXT:    lbu t5, 10(a0)
1558; RV32I-NEXT:    lbu t6, 11(a0)
1559; RV32I-NEXT:    lbu s0, 12(a0)
1560; RV32I-NEXT:    lbu s2, 13(a0)
1561; RV32I-NEXT:    lbu s4, 14(a0)
1562; RV32I-NEXT:    lbu s5, 15(a0)
1563; RV32I-NEXT:    lbu s6, 16(a0)
1564; RV32I-NEXT:    lbu s7, 17(a0)
1565; RV32I-NEXT:    lbu s8, 18(a0)
1566; RV32I-NEXT:    lbu s9, 19(a0)
1567; RV32I-NEXT:    slli a4, a4, 8
1568; RV32I-NEXT:    slli a6, a6, 16
1569; RV32I-NEXT:    slli a7, a7, 24
1570; RV32I-NEXT:    or a3, a4, a3
1571; RV32I-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
1572; RV32I-NEXT:    or a4, a7, a6
1573; RV32I-NEXT:    lbu s10, 20(a0)
1574; RV32I-NEXT:    lbu s11, 21(a0)
1575; RV32I-NEXT:    lbu ra, 22(a0)
1576; RV32I-NEXT:    lbu a3, 23(a0)
1577; RV32I-NEXT:    slli t0, t0, 8
1578; RV32I-NEXT:    slli t1, t1, 16
1579; RV32I-NEXT:    slli t2, t2, 24
1580; RV32I-NEXT:    slli t4, t4, 8
1581; RV32I-NEXT:    slli t5, t5, 16
1582; RV32I-NEXT:    slli t6, t6, 24
1583; RV32I-NEXT:    or a5, t0, a5
1584; RV32I-NEXT:    or a6, t2, t1
1585; RV32I-NEXT:    or a7, t4, t3
1586; RV32I-NEXT:    or t0, t6, t5
1587; RV32I-NEXT:    lbu s1, 24(a0)
1588; RV32I-NEXT:    lbu s3, 25(a0)
1589; RV32I-NEXT:    lbu t4, 26(a0)
1590; RV32I-NEXT:    lbu t5, 27(a0)
1591; RV32I-NEXT:    slli s2, s2, 8
1592; RV32I-NEXT:    slli s4, s4, 16
1593; RV32I-NEXT:    slli s5, s5, 24
1594; RV32I-NEXT:    slli s7, s7, 8
1595; RV32I-NEXT:    or t1, s2, s0
1596; RV32I-NEXT:    or t2, s5, s4
1597; RV32I-NEXT:    or t3, s7, s6
1598; RV32I-NEXT:    lbu t6, 28(a0)
1599; RV32I-NEXT:    lbu s4, 29(a0)
1600; RV32I-NEXT:    lbu s5, 30(a0)
1601; RV32I-NEXT:    lbu s6, 31(a0)
1602; RV32I-NEXT:    slli s8, s8, 16
1603; RV32I-NEXT:    slli s9, s9, 24
1604; RV32I-NEXT:    slli s11, s11, 8
1605; RV32I-NEXT:    slli ra, ra, 16
1606; RV32I-NEXT:    slli a3, a3, 24
1607; RV32I-NEXT:    or a0, s9, s8
1608; RV32I-NEXT:    or s0, s11, s10
1609; RV32I-NEXT:    or s2, a3, ra
1610; RV32I-NEXT:    lbu a3, 0(a1)
1611; RV32I-NEXT:    lbu s7, 1(a1)
1612; RV32I-NEXT:    lbu s8, 2(a1)
1613; RV32I-NEXT:    lbu a1, 3(a1)
1614; RV32I-NEXT:    sw zero, 56(sp)
1615; RV32I-NEXT:    sw zero, 60(sp)
1616; RV32I-NEXT:    sw zero, 64(sp)
1617; RV32I-NEXT:    sw zero, 68(sp)
1618; RV32I-NEXT:    sw zero, 40(sp)
1619; RV32I-NEXT:    sw zero, 44(sp)
1620; RV32I-NEXT:    sw zero, 48(sp)
1621; RV32I-NEXT:    sw zero, 52(sp)
1622; RV32I-NEXT:    slli s3, s3, 8
1623; RV32I-NEXT:    or s1, s3, s1
1624; RV32I-NEXT:    addi s3, sp, 8
1625; RV32I-NEXT:    slli t4, t4, 16
1626; RV32I-NEXT:    slli t5, t5, 24
1627; RV32I-NEXT:    slli s4, s4, 8
1628; RV32I-NEXT:    slli s5, s5, 16
1629; RV32I-NEXT:    slli s6, s6, 24
1630; RV32I-NEXT:    slli s7, s7, 8
1631; RV32I-NEXT:    slli s8, s8, 16
1632; RV32I-NEXT:    slli a1, a1, 24
1633; RV32I-NEXT:    or t4, t5, t4
1634; RV32I-NEXT:    or t5, s4, t6
1635; RV32I-NEXT:    or t6, s6, s5
1636; RV32I-NEXT:    or a3, s7, a3
1637; RV32I-NEXT:    or a1, a1, s8
1638; RV32I-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
1639; RV32I-NEXT:    or a4, a4, s4
1640; RV32I-NEXT:    or a5, a6, a5
1641; RV32I-NEXT:    or a6, t0, a7
1642; RV32I-NEXT:    or a7, t2, t1
1643; RV32I-NEXT:    or t0, a0, t3
1644; RV32I-NEXT:    or t1, s2, s0
1645; RV32I-NEXT:    or t2, t4, s1
1646; RV32I-NEXT:    or t3, t6, t5
1647; RV32I-NEXT:    or a0, a1, a3
1648; RV32I-NEXT:    sw t0, 24(sp)
1649; RV32I-NEXT:    sw t1, 28(sp)
1650; RV32I-NEXT:    sw t2, 32(sp)
1651; RV32I-NEXT:    sw t3, 36(sp)
1652; RV32I-NEXT:    sw a4, 8(sp)
1653; RV32I-NEXT:    sw a5, 12(sp)
1654; RV32I-NEXT:    sw a6, 16(sp)
1655; RV32I-NEXT:    sw a7, 20(sp)
1656; RV32I-NEXT:    srli a1, a0, 3
1657; RV32I-NEXT:    andi a3, a0, 31
1658; RV32I-NEXT:    andi a4, a1, 28
1659; RV32I-NEXT:    xori a1, a3, 31
1660; RV32I-NEXT:    add a4, s3, a4
1661; RV32I-NEXT:    lw a3, 0(a4)
1662; RV32I-NEXT:    lw a5, 4(a4)
1663; RV32I-NEXT:    lw a6, 8(a4)
1664; RV32I-NEXT:    lw a7, 12(a4)
1665; RV32I-NEXT:    lw t0, 16(a4)
1666; RV32I-NEXT:    lw t1, 20(a4)
1667; RV32I-NEXT:    lw t2, 24(a4)
1668; RV32I-NEXT:    lw a4, 28(a4)
1669; RV32I-NEXT:    srl t3, a5, a0
1670; RV32I-NEXT:    slli t4, a6, 1
1671; RV32I-NEXT:    srl a3, a3, a0
1672; RV32I-NEXT:    slli a5, a5, 1
1673; RV32I-NEXT:    srl t5, a7, a0
1674; RV32I-NEXT:    slli t6, t0, 1
1675; RV32I-NEXT:    srl a6, a6, a0
1676; RV32I-NEXT:    slli a7, a7, 1
1677; RV32I-NEXT:    srl s0, t1, a0
1678; RV32I-NEXT:    slli s1, t2, 1
1679; RV32I-NEXT:    srl t0, t0, a0
1680; RV32I-NEXT:    slli t1, t1, 1
1681; RV32I-NEXT:    srl t2, t2, a0
1682; RV32I-NEXT:    slli s2, a4, 1
1683; RV32I-NEXT:    srl s3, a4, a0
1684; RV32I-NEXT:    sll a0, t4, a1
1685; RV32I-NEXT:    sll a4, a5, a1
1686; RV32I-NEXT:    sll a5, t6, a1
1687; RV32I-NEXT:    sll a7, a7, a1
1688; RV32I-NEXT:    sll t4, s1, a1
1689; RV32I-NEXT:    sll t1, t1, a1
1690; RV32I-NEXT:    sll t6, s2, a1
1691; RV32I-NEXT:    srli s1, s3, 24
1692; RV32I-NEXT:    srli s2, s3, 16
1693; RV32I-NEXT:    srli s4, s3, 8
1694; RV32I-NEXT:    or a0, t3, a0
1695; RV32I-NEXT:    or a1, a3, a4
1696; RV32I-NEXT:    or a3, t5, a5
1697; RV32I-NEXT:    or a4, a6, a7
1698; RV32I-NEXT:    or a5, s0, t4
1699; RV32I-NEXT:    or a6, t0, t1
1700; RV32I-NEXT:    or a7, t2, t6
1701; RV32I-NEXT:    sb s3, 28(a2)
1702; RV32I-NEXT:    sb s4, 29(a2)
1703; RV32I-NEXT:    sb s2, 30(a2)
1704; RV32I-NEXT:    sb s1, 31(a2)
1705; RV32I-NEXT:    srli t0, a7, 24
1706; RV32I-NEXT:    srli t1, a7, 16
1707; RV32I-NEXT:    srli t2, a7, 8
1708; RV32I-NEXT:    srli t3, a6, 24
1709; RV32I-NEXT:    srli t4, a6, 16
1710; RV32I-NEXT:    srli t5, a6, 8
1711; RV32I-NEXT:    srli t6, a5, 24
1712; RV32I-NEXT:    srli s0, a5, 16
1713; RV32I-NEXT:    srli s1, a5, 8
1714; RV32I-NEXT:    srli s2, a4, 24
1715; RV32I-NEXT:    srli s3, a4, 16
1716; RV32I-NEXT:    srli s4, a4, 8
1717; RV32I-NEXT:    srli s5, a3, 24
1718; RV32I-NEXT:    srli s6, a3, 16
1719; RV32I-NEXT:    srli s7, a3, 8
1720; RV32I-NEXT:    srli s8, a1, 24
1721; RV32I-NEXT:    srli s9, a1, 16
1722; RV32I-NEXT:    sb a7, 24(a2)
1723; RV32I-NEXT:    sb t2, 25(a2)
1724; RV32I-NEXT:    sb t1, 26(a2)
1725; RV32I-NEXT:    sb t0, 27(a2)
1726; RV32I-NEXT:    srli a7, a1, 8
1727; RV32I-NEXT:    sb a6, 16(a2)
1728; RV32I-NEXT:    sb t5, 17(a2)
1729; RV32I-NEXT:    sb t4, 18(a2)
1730; RV32I-NEXT:    sb t3, 19(a2)
1731; RV32I-NEXT:    srli a6, a0, 24
1732; RV32I-NEXT:    sb a5, 20(a2)
1733; RV32I-NEXT:    sb s1, 21(a2)
1734; RV32I-NEXT:    sb s0, 22(a2)
1735; RV32I-NEXT:    sb t6, 23(a2)
1736; RV32I-NEXT:    srli a5, a0, 16
1737; RV32I-NEXT:    sb a4, 8(a2)
1738; RV32I-NEXT:    sb s4, 9(a2)
1739; RV32I-NEXT:    sb s3, 10(a2)
1740; RV32I-NEXT:    sb s2, 11(a2)
1741; RV32I-NEXT:    srli a4, a0, 8
1742; RV32I-NEXT:    sb a3, 12(a2)
1743; RV32I-NEXT:    sb s7, 13(a2)
1744; RV32I-NEXT:    sb s6, 14(a2)
1745; RV32I-NEXT:    sb s5, 15(a2)
1746; RV32I-NEXT:    sb a1, 0(a2)
1747; RV32I-NEXT:    sb a7, 1(a2)
1748; RV32I-NEXT:    sb s9, 2(a2)
1749; RV32I-NEXT:    sb s8, 3(a2)
1750; RV32I-NEXT:    sb a0, 4(a2)
1751; RV32I-NEXT:    sb a4, 5(a2)
1752; RV32I-NEXT:    sb a5, 6(a2)
1753; RV32I-NEXT:    sb a6, 7(a2)
1754; RV32I-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
1755; RV32I-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
1756; RV32I-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
1757; RV32I-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
1758; RV32I-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
1759; RV32I-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
1760; RV32I-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
1761; RV32I-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
1762; RV32I-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
1763; RV32I-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
1764; RV32I-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
1765; RV32I-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
1766; RV32I-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
1767; RV32I-NEXT:    addi sp, sp, 128
1768; RV32I-NEXT:    ret
1769  %src = load i256, ptr %src.ptr, align 1
1770  %bitOff = load i256, ptr %bitOff.ptr, align 1
1771  %res = lshr i256 %src, %bitOff
1772  store i256 %res, ptr %dst, align 1
1773  ret void
1774}
1775define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
1776; RV64I-LABEL: shl_32bytes:
1777; RV64I:       # %bb.0:
1778; RV64I-NEXT:    addi sp, sp, -160
1779; RV64I-NEXT:    sd s0, 152(sp) # 8-byte Folded Spill
1780; RV64I-NEXT:    sd s1, 144(sp) # 8-byte Folded Spill
1781; RV64I-NEXT:    sd s2, 136(sp) # 8-byte Folded Spill
1782; RV64I-NEXT:    sd s3, 128(sp) # 8-byte Folded Spill
1783; RV64I-NEXT:    sd s4, 120(sp) # 8-byte Folded Spill
1784; RV64I-NEXT:    sd s5, 112(sp) # 8-byte Folded Spill
1785; RV64I-NEXT:    sd s6, 104(sp) # 8-byte Folded Spill
1786; RV64I-NEXT:    sd s7, 96(sp) # 8-byte Folded Spill
1787; RV64I-NEXT:    sd s8, 88(sp) # 8-byte Folded Spill
1788; RV64I-NEXT:    sd s9, 80(sp) # 8-byte Folded Spill
1789; RV64I-NEXT:    sd s10, 72(sp) # 8-byte Folded Spill
1790; RV64I-NEXT:    sd s11, 64(sp) # 8-byte Folded Spill
1791; RV64I-NEXT:    lbu a3, 0(a0)
1792; RV64I-NEXT:    lbu a4, 1(a0)
1793; RV64I-NEXT:    lbu a5, 2(a0)
1794; RV64I-NEXT:    lbu a6, 3(a0)
1795; RV64I-NEXT:    lbu a7, 4(a0)
1796; RV64I-NEXT:    lbu t0, 5(a0)
1797; RV64I-NEXT:    lbu t1, 6(a0)
1798; RV64I-NEXT:    lbu t2, 7(a0)
1799; RV64I-NEXT:    lbu t3, 8(a0)
1800; RV64I-NEXT:    lbu t4, 9(a0)
1801; RV64I-NEXT:    lbu t5, 10(a0)
1802; RV64I-NEXT:    lbu t6, 11(a0)
1803; RV64I-NEXT:    lbu s0, 12(a0)
1804; RV64I-NEXT:    lbu s1, 13(a0)
1805; RV64I-NEXT:    lbu s2, 14(a0)
1806; RV64I-NEXT:    lbu s3, 15(a0)
1807; RV64I-NEXT:    lbu s4, 16(a0)
1808; RV64I-NEXT:    lbu s5, 17(a0)
1809; RV64I-NEXT:    lbu s6, 18(a0)
1810; RV64I-NEXT:    lbu s7, 19(a0)
1811; RV64I-NEXT:    slli a4, a4, 8
1812; RV64I-NEXT:    slli s8, a5, 16
1813; RV64I-NEXT:    slli a6, a6, 24
1814; RV64I-NEXT:    slli t0, t0, 8
1815; RV64I-NEXT:    slli t1, t1, 16
1816; RV64I-NEXT:    slli t2, t2, 24
1817; RV64I-NEXT:    or a5, a4, a3
1818; RV64I-NEXT:    or a6, a6, s8
1819; RV64I-NEXT:    or a3, t0, a7
1820; RV64I-NEXT:    or a4, t2, t1
1821; RV64I-NEXT:    lbu s8, 20(a0)
1822; RV64I-NEXT:    lbu s9, 21(a0)
1823; RV64I-NEXT:    lbu s10, 22(a0)
1824; RV64I-NEXT:    lbu s11, 23(a0)
1825; RV64I-NEXT:    slli t4, t4, 8
1826; RV64I-NEXT:    slli t5, t5, 16
1827; RV64I-NEXT:    slli t6, t6, 24
1828; RV64I-NEXT:    slli s1, s1, 8
1829; RV64I-NEXT:    slli s2, s2, 16
1830; RV64I-NEXT:    slli s3, s3, 24
1831; RV64I-NEXT:    or a7, t4, t3
1832; RV64I-NEXT:    or t0, t6, t5
1833; RV64I-NEXT:    or t1, s1, s0
1834; RV64I-NEXT:    or t2, s3, s2
1835; RV64I-NEXT:    lbu t6, 24(a0)
1836; RV64I-NEXT:    lbu s0, 25(a0)
1837; RV64I-NEXT:    lbu s1, 26(a0)
1838; RV64I-NEXT:    lbu s2, 27(a0)
1839; RV64I-NEXT:    slli s5, s5, 8
1840; RV64I-NEXT:    slli s6, s6, 16
1841; RV64I-NEXT:    slli s7, s7, 24
1842; RV64I-NEXT:    slli s9, s9, 8
1843; RV64I-NEXT:    or t3, s5, s4
1844; RV64I-NEXT:    or t4, s7, s6
1845; RV64I-NEXT:    or t5, s9, s8
1846; RV64I-NEXT:    lbu s3, 28(a0)
1847; RV64I-NEXT:    lbu s4, 29(a0)
1848; RV64I-NEXT:    lbu s5, 30(a0)
1849; RV64I-NEXT:    lbu s6, 31(a0)
1850; RV64I-NEXT:    slli s10, s10, 16
1851; RV64I-NEXT:    slli s11, s11, 24
1852; RV64I-NEXT:    slli s0, s0, 8
1853; RV64I-NEXT:    slli s1, s1, 16
1854; RV64I-NEXT:    slli s2, s2, 24
1855; RV64I-NEXT:    slli s4, s4, 8
1856; RV64I-NEXT:    or a0, s11, s10
1857; RV64I-NEXT:    or t6, s0, t6
1858; RV64I-NEXT:    or s0, s2, s1
1859; RV64I-NEXT:    or s1, s4, s3
1860; RV64I-NEXT:    lbu s2, 0(a1)
1861; RV64I-NEXT:    lbu s3, 1(a1)
1862; RV64I-NEXT:    lbu s4, 2(a1)
1863; RV64I-NEXT:    lbu s7, 3(a1)
1864; RV64I-NEXT:    slli s5, s5, 16
1865; RV64I-NEXT:    slli s6, s6, 24
1866; RV64I-NEXT:    slli s3, s3, 8
1867; RV64I-NEXT:    slli s4, s4, 16
1868; RV64I-NEXT:    slli s7, s7, 24
1869; RV64I-NEXT:    or s5, s6, s5
1870; RV64I-NEXT:    or s2, s3, s2
1871; RV64I-NEXT:    lbu s3, 4(a1)
1872; RV64I-NEXT:    lbu s6, 5(a1)
1873; RV64I-NEXT:    or s4, s7, s4
1874; RV64I-NEXT:    lbu s7, 6(a1)
1875; RV64I-NEXT:    lbu a1, 7(a1)
1876; RV64I-NEXT:    slli s6, s6, 8
1877; RV64I-NEXT:    or s3, s6, s3
1878; RV64I-NEXT:    slli s7, s7, 16
1879; RV64I-NEXT:    slli a1, a1, 24
1880; RV64I-NEXT:    or a1, a1, s7
1881; RV64I-NEXT:    sd zero, 0(sp)
1882; RV64I-NEXT:    sd zero, 8(sp)
1883; RV64I-NEXT:    sd zero, 16(sp)
1884; RV64I-NEXT:    sd zero, 24(sp)
1885; RV64I-NEXT:    or a5, a6, a5
1886; RV64I-NEXT:    addi a6, sp, 32
1887; RV64I-NEXT:    or a3, a4, a3
1888; RV64I-NEXT:    or a4, t0, a7
1889; RV64I-NEXT:    or a7, t2, t1
1890; RV64I-NEXT:    or t0, t4, t3
1891; RV64I-NEXT:    or a0, a0, t5
1892; RV64I-NEXT:    or t1, s0, t6
1893; RV64I-NEXT:    or t2, s5, s1
1894; RV64I-NEXT:    or t3, s4, s2
1895; RV64I-NEXT:    or a1, a1, s3
1896; RV64I-NEXT:    slli a3, a3, 32
1897; RV64I-NEXT:    slli a7, a7, 32
1898; RV64I-NEXT:    slli a0, a0, 32
1899; RV64I-NEXT:    slli t2, t2, 32
1900; RV64I-NEXT:    slli a1, a1, 32
1901; RV64I-NEXT:    or a3, a3, a5
1902; RV64I-NEXT:    or a4, a7, a4
1903; RV64I-NEXT:    or a0, a0, t0
1904; RV64I-NEXT:    or a5, t2, t1
1905; RV64I-NEXT:    or a1, a1, t3
1906; RV64I-NEXT:    sd a3, 32(sp)
1907; RV64I-NEXT:    sd a4, 40(sp)
1908; RV64I-NEXT:    sd a0, 48(sp)
1909; RV64I-NEXT:    sd a5, 56(sp)
1910; RV64I-NEXT:    srli a0, a1, 3
1911; RV64I-NEXT:    andi a3, a1, 63
1912; RV64I-NEXT:    andi a0, a0, 24
1913; RV64I-NEXT:    sub a0, a6, a0
1914; RV64I-NEXT:    ld a4, 0(a0)
1915; RV64I-NEXT:    ld a5, 8(a0)
1916; RV64I-NEXT:    ld a6, 16(a0)
1917; RV64I-NEXT:    ld a0, 24(a0)
1918; RV64I-NEXT:    xori a3, a3, 63
1919; RV64I-NEXT:    sll a7, a5, a1
1920; RV64I-NEXT:    srli t0, a4, 1
1921; RV64I-NEXT:    sll t1, a0, a1
1922; RV64I-NEXT:    srli a0, a6, 1
1923; RV64I-NEXT:    sll a6, a6, a1
1924; RV64I-NEXT:    srli a5, a5, 1
1925; RV64I-NEXT:    sll a4, a4, a1
1926; RV64I-NEXT:    srl a1, t0, a3
1927; RV64I-NEXT:    srl t0, a0, a3
1928; RV64I-NEXT:    srl a3, a5, a3
1929; RV64I-NEXT:    srli a5, a4, 56
1930; RV64I-NEXT:    srli t2, a4, 48
1931; RV64I-NEXT:    srli t3, a4, 40
1932; RV64I-NEXT:    srli t4, a4, 32
1933; RV64I-NEXT:    srli t5, a4, 24
1934; RV64I-NEXT:    srli t6, a4, 16
1935; RV64I-NEXT:    srli s0, a4, 8
1936; RV64I-NEXT:    or a0, a7, a1
1937; RV64I-NEXT:    or a1, t1, t0
1938; RV64I-NEXT:    or a3, a6, a3
1939; RV64I-NEXT:    sb t4, 4(a2)
1940; RV64I-NEXT:    sb t3, 5(a2)
1941; RV64I-NEXT:    sb t2, 6(a2)
1942; RV64I-NEXT:    sb a5, 7(a2)
1943; RV64I-NEXT:    sb a4, 0(a2)
1944; RV64I-NEXT:    sb s0, 1(a2)
1945; RV64I-NEXT:    sb t6, 2(a2)
1946; RV64I-NEXT:    sb t5, 3(a2)
1947; RV64I-NEXT:    srli a4, a3, 56
1948; RV64I-NEXT:    srli a5, a3, 48
1949; RV64I-NEXT:    srli a6, a3, 40
1950; RV64I-NEXT:    srli a7, a3, 32
1951; RV64I-NEXT:    srli t0, a3, 24
1952; RV64I-NEXT:    srli t1, a3, 16
1953; RV64I-NEXT:    srli t2, a3, 8
1954; RV64I-NEXT:    srli t3, a1, 56
1955; RV64I-NEXT:    srli t4, a1, 48
1956; RV64I-NEXT:    srli t5, a1, 40
1957; RV64I-NEXT:    srli t6, a1, 32
1958; RV64I-NEXT:    srli s0, a1, 24
1959; RV64I-NEXT:    srli s1, a1, 16
1960; RV64I-NEXT:    srli s2, a1, 8
1961; RV64I-NEXT:    srli s3, a0, 56
1962; RV64I-NEXT:    srli s4, a0, 48
1963; RV64I-NEXT:    srli s5, a0, 40
1964; RV64I-NEXT:    sb a7, 20(a2)
1965; RV64I-NEXT:    sb a6, 21(a2)
1966; RV64I-NEXT:    sb a5, 22(a2)
1967; RV64I-NEXT:    sb a4, 23(a2)
1968; RV64I-NEXT:    srli a4, a0, 32
1969; RV64I-NEXT:    sb a3, 16(a2)
1970; RV64I-NEXT:    sb t2, 17(a2)
1971; RV64I-NEXT:    sb t1, 18(a2)
1972; RV64I-NEXT:    sb t0, 19(a2)
1973; RV64I-NEXT:    srli a3, a0, 24
1974; RV64I-NEXT:    sb t6, 28(a2)
1975; RV64I-NEXT:    sb t5, 29(a2)
1976; RV64I-NEXT:    sb t4, 30(a2)
1977; RV64I-NEXT:    sb t3, 31(a2)
1978; RV64I-NEXT:    srli a5, a0, 16
1979; RV64I-NEXT:    sb a1, 24(a2)
1980; RV64I-NEXT:    sb s2, 25(a2)
1981; RV64I-NEXT:    sb s1, 26(a2)
1982; RV64I-NEXT:    sb s0, 27(a2)
1983; RV64I-NEXT:    srli a1, a0, 8
1984; RV64I-NEXT:    sb a4, 12(a2)
1985; RV64I-NEXT:    sb s5, 13(a2)
1986; RV64I-NEXT:    sb s4, 14(a2)
1987; RV64I-NEXT:    sb s3, 15(a2)
1988; RV64I-NEXT:    sb a0, 8(a2)
1989; RV64I-NEXT:    sb a1, 9(a2)
1990; RV64I-NEXT:    sb a5, 10(a2)
1991; RV64I-NEXT:    sb a3, 11(a2)
1992; RV64I-NEXT:    ld s0, 152(sp) # 8-byte Folded Reload
1993; RV64I-NEXT:    ld s1, 144(sp) # 8-byte Folded Reload
1994; RV64I-NEXT:    ld s2, 136(sp) # 8-byte Folded Reload
1995; RV64I-NEXT:    ld s3, 128(sp) # 8-byte Folded Reload
1996; RV64I-NEXT:    ld s4, 120(sp) # 8-byte Folded Reload
1997; RV64I-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
1998; RV64I-NEXT:    ld s6, 104(sp) # 8-byte Folded Reload
1999; RV64I-NEXT:    ld s7, 96(sp) # 8-byte Folded Reload
2000; RV64I-NEXT:    ld s8, 88(sp) # 8-byte Folded Reload
2001; RV64I-NEXT:    ld s9, 80(sp) # 8-byte Folded Reload
2002; RV64I-NEXT:    ld s10, 72(sp) # 8-byte Folded Reload
2003; RV64I-NEXT:    ld s11, 64(sp) # 8-byte Folded Reload
2004; RV64I-NEXT:    addi sp, sp, 160
2005; RV64I-NEXT:    ret
2006;
2007; RV32I-LABEL: shl_32bytes:
2008; RV32I:       # %bb.0:
2009; RV32I-NEXT:    addi sp, sp, -128
2010; RV32I-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
2011; RV32I-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
2012; RV32I-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
2013; RV32I-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
2014; RV32I-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
2015; RV32I-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
2016; RV32I-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
2017; RV32I-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
2018; RV32I-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
2019; RV32I-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
2020; RV32I-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
2021; RV32I-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
2022; RV32I-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
2023; RV32I-NEXT:    lbu a3, 0(a0)
2024; RV32I-NEXT:    lbu a4, 1(a0)
2025; RV32I-NEXT:    lbu a6, 2(a0)
2026; RV32I-NEXT:    lbu a7, 3(a0)
2027; RV32I-NEXT:    lbu a5, 4(a0)
2028; RV32I-NEXT:    lbu t0, 5(a0)
2029; RV32I-NEXT:    lbu t1, 6(a0)
2030; RV32I-NEXT:    lbu t2, 7(a0)
2031; RV32I-NEXT:    lbu t3, 8(a0)
2032; RV32I-NEXT:    lbu t4, 9(a0)
2033; RV32I-NEXT:    lbu t5, 10(a0)
2034; RV32I-NEXT:    lbu t6, 11(a0)
2035; RV32I-NEXT:    lbu s0, 12(a0)
2036; RV32I-NEXT:    lbu s2, 13(a0)
2037; RV32I-NEXT:    lbu s4, 14(a0)
2038; RV32I-NEXT:    lbu s5, 15(a0)
2039; RV32I-NEXT:    lbu s6, 16(a0)
2040; RV32I-NEXT:    lbu s7, 17(a0)
2041; RV32I-NEXT:    lbu s8, 18(a0)
2042; RV32I-NEXT:    lbu s9, 19(a0)
2043; RV32I-NEXT:    slli a4, a4, 8
2044; RV32I-NEXT:    slli a6, a6, 16
2045; RV32I-NEXT:    slli a7, a7, 24
2046; RV32I-NEXT:    or a3, a4, a3
2047; RV32I-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
2048; RV32I-NEXT:    or a4, a7, a6
2049; RV32I-NEXT:    lbu s10, 20(a0)
2050; RV32I-NEXT:    lbu s11, 21(a0)
2051; RV32I-NEXT:    lbu ra, 22(a0)
2052; RV32I-NEXT:    lbu a3, 23(a0)
2053; RV32I-NEXT:    slli t0, t0, 8
2054; RV32I-NEXT:    slli t1, t1, 16
2055; RV32I-NEXT:    slli t2, t2, 24
2056; RV32I-NEXT:    slli t4, t4, 8
2057; RV32I-NEXT:    slli t5, t5, 16
2058; RV32I-NEXT:    slli t6, t6, 24
2059; RV32I-NEXT:    or a5, t0, a5
2060; RV32I-NEXT:    or a6, t2, t1
2061; RV32I-NEXT:    or a7, t4, t3
2062; RV32I-NEXT:    or t0, t6, t5
2063; RV32I-NEXT:    lbu s1, 24(a0)
2064; RV32I-NEXT:    lbu s3, 25(a0)
2065; RV32I-NEXT:    lbu t4, 26(a0)
2066; RV32I-NEXT:    lbu t5, 27(a0)
2067; RV32I-NEXT:    slli s2, s2, 8
2068; RV32I-NEXT:    slli s4, s4, 16
2069; RV32I-NEXT:    slli s5, s5, 24
2070; RV32I-NEXT:    slli s7, s7, 8
2071; RV32I-NEXT:    or t1, s2, s0
2072; RV32I-NEXT:    or t2, s5, s4
2073; RV32I-NEXT:    or t3, s7, s6
2074; RV32I-NEXT:    lbu t6, 28(a0)
2075; RV32I-NEXT:    lbu s4, 29(a0)
2076; RV32I-NEXT:    lbu s5, 30(a0)
2077; RV32I-NEXT:    lbu s6, 31(a0)
2078; RV32I-NEXT:    slli s8, s8, 16
2079; RV32I-NEXT:    slli s9, s9, 24
2080; RV32I-NEXT:    slli s11, s11, 8
2081; RV32I-NEXT:    slli ra, ra, 16
2082; RV32I-NEXT:    slli a3, a3, 24
2083; RV32I-NEXT:    or a0, s9, s8
2084; RV32I-NEXT:    or s0, s11, s10
2085; RV32I-NEXT:    or s2, a3, ra
2086; RV32I-NEXT:    lbu a3, 0(a1)
2087; RV32I-NEXT:    lbu s7, 1(a1)
2088; RV32I-NEXT:    lbu s8, 2(a1)
2089; RV32I-NEXT:    lbu a1, 3(a1)
2090; RV32I-NEXT:    sw zero, 24(sp)
2091; RV32I-NEXT:    sw zero, 28(sp)
2092; RV32I-NEXT:    sw zero, 32(sp)
2093; RV32I-NEXT:    sw zero, 36(sp)
2094; RV32I-NEXT:    sw zero, 8(sp)
2095; RV32I-NEXT:    sw zero, 12(sp)
2096; RV32I-NEXT:    sw zero, 16(sp)
2097; RV32I-NEXT:    sw zero, 20(sp)
2098; RV32I-NEXT:    slli s3, s3, 8
2099; RV32I-NEXT:    or s1, s3, s1
2100; RV32I-NEXT:    addi s3, sp, 40
2101; RV32I-NEXT:    slli t4, t4, 16
2102; RV32I-NEXT:    slli t5, t5, 24
2103; RV32I-NEXT:    slli s4, s4, 8
2104; RV32I-NEXT:    slli s5, s5, 16
2105; RV32I-NEXT:    slli s6, s6, 24
2106; RV32I-NEXT:    slli s7, s7, 8
2107; RV32I-NEXT:    slli s8, s8, 16
2108; RV32I-NEXT:    slli a1, a1, 24
2109; RV32I-NEXT:    or t4, t5, t4
2110; RV32I-NEXT:    or t5, s4, t6
2111; RV32I-NEXT:    or t6, s6, s5
2112; RV32I-NEXT:    or a3, s7, a3
2113; RV32I-NEXT:    or a1, a1, s8
2114; RV32I-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
2115; RV32I-NEXT:    or a4, a4, s4
2116; RV32I-NEXT:    or a5, a6, a5
2117; RV32I-NEXT:    or a6, t0, a7
2118; RV32I-NEXT:    or a7, t2, t1
2119; RV32I-NEXT:    or t0, a0, t3
2120; RV32I-NEXT:    or t1, s2, s0
2121; RV32I-NEXT:    or t2, t4, s1
2122; RV32I-NEXT:    or t3, t6, t5
2123; RV32I-NEXT:    or a0, a1, a3
2124; RV32I-NEXT:    sw t0, 56(sp)
2125; RV32I-NEXT:    sw t1, 60(sp)
2126; RV32I-NEXT:    sw t2, 64(sp)
2127; RV32I-NEXT:    sw t3, 68(sp)
2128; RV32I-NEXT:    sw a4, 40(sp)
2129; RV32I-NEXT:    sw a5, 44(sp)
2130; RV32I-NEXT:    sw a6, 48(sp)
2131; RV32I-NEXT:    sw a7, 52(sp)
2132; RV32I-NEXT:    srli a1, a0, 3
2133; RV32I-NEXT:    andi a3, a0, 31
2134; RV32I-NEXT:    andi a4, a1, 28
2135; RV32I-NEXT:    xori a1, a3, 31
2136; RV32I-NEXT:    sub a3, s3, a4
2137; RV32I-NEXT:    lw a4, 0(a3)
2138; RV32I-NEXT:    lw a5, 4(a3)
2139; RV32I-NEXT:    lw a6, 8(a3)
2140; RV32I-NEXT:    lw a7, 12(a3)
2141; RV32I-NEXT:    lw t0, 16(a3)
2142; RV32I-NEXT:    lw t1, 20(a3)
2143; RV32I-NEXT:    lw t2, 24(a3)
2144; RV32I-NEXT:    lw a3, 28(a3)
2145; RV32I-NEXT:    sll t3, a5, a0
2146; RV32I-NEXT:    srli t4, a4, 1
2147; RV32I-NEXT:    sll t5, a7, a0
2148; RV32I-NEXT:    srli t6, a6, 1
2149; RV32I-NEXT:    sll a6, a6, a0
2150; RV32I-NEXT:    srli a5, a5, 1
2151; RV32I-NEXT:    sll s0, t1, a0
2152; RV32I-NEXT:    srli s1, t0, 1
2153; RV32I-NEXT:    sll t0, t0, a0
2154; RV32I-NEXT:    srli a7, a7, 1
2155; RV32I-NEXT:    sll s2, a3, a0
2156; RV32I-NEXT:    srli a3, t2, 1
2157; RV32I-NEXT:    sll t2, t2, a0
2158; RV32I-NEXT:    srli t1, t1, 1
2159; RV32I-NEXT:    sll s3, a4, a0
2160; RV32I-NEXT:    srl a0, t4, a1
2161; RV32I-NEXT:    srl a4, t6, a1
2162; RV32I-NEXT:    srl a5, a5, a1
2163; RV32I-NEXT:    srl t4, s1, a1
2164; RV32I-NEXT:    srl a7, a7, a1
2165; RV32I-NEXT:    srl t6, a3, a1
2166; RV32I-NEXT:    srl t1, t1, a1
2167; RV32I-NEXT:    srli s1, s3, 24
2168; RV32I-NEXT:    srli s4, s3, 16
2169; RV32I-NEXT:    srli s5, s3, 8
2170; RV32I-NEXT:    or a0, t3, a0
2171; RV32I-NEXT:    or a1, t5, a4
2172; RV32I-NEXT:    or a3, a6, a5
2173; RV32I-NEXT:    or a4, s0, t4
2174; RV32I-NEXT:    or a5, t0, a7
2175; RV32I-NEXT:    or a6, s2, t6
2176; RV32I-NEXT:    or a7, t2, t1
2177; RV32I-NEXT:    sb s3, 0(a2)
2178; RV32I-NEXT:    sb s5, 1(a2)
2179; RV32I-NEXT:    sb s4, 2(a2)
2180; RV32I-NEXT:    sb s1, 3(a2)
2181; RV32I-NEXT:    srli t0, a7, 24
2182; RV32I-NEXT:    srli t1, a7, 16
2183; RV32I-NEXT:    srli t2, a7, 8
2184; RV32I-NEXT:    srli t3, a6, 24
2185; RV32I-NEXT:    srli t4, a6, 16
2186; RV32I-NEXT:    srli t5, a6, 8
2187; RV32I-NEXT:    srli t6, a5, 24
2188; RV32I-NEXT:    srli s0, a5, 16
2189; RV32I-NEXT:    srli s1, a5, 8
2190; RV32I-NEXT:    srli s2, a4, 24
2191; RV32I-NEXT:    srli s3, a4, 16
2192; RV32I-NEXT:    srli s4, a4, 8
2193; RV32I-NEXT:    srli s5, a3, 24
2194; RV32I-NEXT:    srli s6, a3, 16
2195; RV32I-NEXT:    srli s7, a3, 8
2196; RV32I-NEXT:    srli s8, a1, 24
2197; RV32I-NEXT:    srli s9, a1, 16
2198; RV32I-NEXT:    sb a7, 24(a2)
2199; RV32I-NEXT:    sb t2, 25(a2)
2200; RV32I-NEXT:    sb t1, 26(a2)
2201; RV32I-NEXT:    sb t0, 27(a2)
2202; RV32I-NEXT:    srli a7, a1, 8
2203; RV32I-NEXT:    sb a6, 28(a2)
2204; RV32I-NEXT:    sb t5, 29(a2)
2205; RV32I-NEXT:    sb t4, 30(a2)
2206; RV32I-NEXT:    sb t3, 31(a2)
2207; RV32I-NEXT:    srli a6, a0, 24
2208; RV32I-NEXT:    sb a5, 16(a2)
2209; RV32I-NEXT:    sb s1, 17(a2)
2210; RV32I-NEXT:    sb s0, 18(a2)
2211; RV32I-NEXT:    sb t6, 19(a2)
2212; RV32I-NEXT:    srli a5, a0, 16
2213; RV32I-NEXT:    sb a4, 20(a2)
2214; RV32I-NEXT:    sb s4, 21(a2)
2215; RV32I-NEXT:    sb s3, 22(a2)
2216; RV32I-NEXT:    sb s2, 23(a2)
2217; RV32I-NEXT:    srli a4, a0, 8
2218; RV32I-NEXT:    sb a3, 8(a2)
2219; RV32I-NEXT:    sb s7, 9(a2)
2220; RV32I-NEXT:    sb s6, 10(a2)
2221; RV32I-NEXT:    sb s5, 11(a2)
2222; RV32I-NEXT:    sb a1, 12(a2)
2223; RV32I-NEXT:    sb a7, 13(a2)
2224; RV32I-NEXT:    sb s9, 14(a2)
2225; RV32I-NEXT:    sb s8, 15(a2)
2226; RV32I-NEXT:    sb a0, 4(a2)
2227; RV32I-NEXT:    sb a4, 5(a2)
2228; RV32I-NEXT:    sb a5, 6(a2)
2229; RV32I-NEXT:    sb a6, 7(a2)
2230; RV32I-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
2231; RV32I-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
2232; RV32I-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
2233; RV32I-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
2234; RV32I-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
2235; RV32I-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
2236; RV32I-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
2237; RV32I-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
2238; RV32I-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
2239; RV32I-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
2240; RV32I-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
2241; RV32I-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
2242; RV32I-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
2243; RV32I-NEXT:    addi sp, sp, 128
2244; RV32I-NEXT:    ret
2245  %src = load i256, ptr %src.ptr, align 1
2246  %bitOff = load i256, ptr %bitOff.ptr, align 1
2247  %res = shl i256 %src, %bitOff
2248  store i256 %res, ptr %dst, align 1
2249  ret void
2250}
2251define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
2252; RV64I-LABEL: ashr_32bytes:
2253; RV64I:       # %bb.0:
2254; RV64I-NEXT:    addi sp, sp, -160
2255; RV64I-NEXT:    sd s0, 152(sp) # 8-byte Folded Spill
2256; RV64I-NEXT:    sd s1, 144(sp) # 8-byte Folded Spill
2257; RV64I-NEXT:    sd s2, 136(sp) # 8-byte Folded Spill
2258; RV64I-NEXT:    sd s3, 128(sp) # 8-byte Folded Spill
2259; RV64I-NEXT:    sd s4, 120(sp) # 8-byte Folded Spill
2260; RV64I-NEXT:    sd s5, 112(sp) # 8-byte Folded Spill
2261; RV64I-NEXT:    sd s6, 104(sp) # 8-byte Folded Spill
2262; RV64I-NEXT:    sd s7, 96(sp) # 8-byte Folded Spill
2263; RV64I-NEXT:    sd s8, 88(sp) # 8-byte Folded Spill
2264; RV64I-NEXT:    sd s9, 80(sp) # 8-byte Folded Spill
2265; RV64I-NEXT:    sd s10, 72(sp) # 8-byte Folded Spill
2266; RV64I-NEXT:    sd s11, 64(sp) # 8-byte Folded Spill
2267; RV64I-NEXT:    lbu a3, 0(a0)
2268; RV64I-NEXT:    lbu a4, 1(a0)
2269; RV64I-NEXT:    lbu a5, 2(a0)
2270; RV64I-NEXT:    lbu a6, 3(a0)
2271; RV64I-NEXT:    lbu a7, 4(a0)
2272; RV64I-NEXT:    lbu t0, 5(a0)
2273; RV64I-NEXT:    lbu t1, 6(a0)
2274; RV64I-NEXT:    lbu t2, 7(a0)
2275; RV64I-NEXT:    lbu t3, 8(a0)
2276; RV64I-NEXT:    lbu t4, 9(a0)
2277; RV64I-NEXT:    lbu t5, 10(a0)
2278; RV64I-NEXT:    lbu t6, 11(a0)
2279; RV64I-NEXT:    lbu s0, 12(a0)
2280; RV64I-NEXT:    lbu s1, 13(a0)
2281; RV64I-NEXT:    lbu s2, 14(a0)
2282; RV64I-NEXT:    lbu s3, 15(a0)
2283; RV64I-NEXT:    lbu s4, 16(a0)
2284; RV64I-NEXT:    lbu s5, 17(a0)
2285; RV64I-NEXT:    lbu s6, 18(a0)
2286; RV64I-NEXT:    lbu s7, 19(a0)
2287; RV64I-NEXT:    slli a4, a4, 8
2288; RV64I-NEXT:    slli a5, a5, 16
2289; RV64I-NEXT:    slli a6, a6, 24
2290; RV64I-NEXT:    slli t0, t0, 8
2291; RV64I-NEXT:    slli t1, t1, 16
2292; RV64I-NEXT:    slli t2, t2, 24
2293; RV64I-NEXT:    or a3, a4, a3
2294; RV64I-NEXT:    or a4, a6, a5
2295; RV64I-NEXT:    or a5, t0, a7
2296; RV64I-NEXT:    or a6, t2, t1
2297; RV64I-NEXT:    lbu s8, 20(a0)
2298; RV64I-NEXT:    lbu s9, 21(a0)
2299; RV64I-NEXT:    lbu s10, 22(a0)
2300; RV64I-NEXT:    lbu s11, 23(a0)
2301; RV64I-NEXT:    slli t4, t4, 8
2302; RV64I-NEXT:    slli t5, t5, 16
2303; RV64I-NEXT:    slli t6, t6, 24
2304; RV64I-NEXT:    slli s1, s1, 8
2305; RV64I-NEXT:    slli s2, s2, 16
2306; RV64I-NEXT:    slli s3, s3, 24
2307; RV64I-NEXT:    or a7, t4, t3
2308; RV64I-NEXT:    or t0, t6, t5
2309; RV64I-NEXT:    or t1, s1, s0
2310; RV64I-NEXT:    or t2, s3, s2
2311; RV64I-NEXT:    lbu t6, 24(a0)
2312; RV64I-NEXT:    lbu s0, 25(a0)
2313; RV64I-NEXT:    lbu s1, 26(a0)
2314; RV64I-NEXT:    lbu s2, 27(a0)
2315; RV64I-NEXT:    slli s5, s5, 8
2316; RV64I-NEXT:    slli s6, s6, 16
2317; RV64I-NEXT:    slli s7, s7, 24
2318; RV64I-NEXT:    slli s9, s9, 8
2319; RV64I-NEXT:    or t3, s5, s4
2320; RV64I-NEXT:    or t4, s7, s6
2321; RV64I-NEXT:    or t5, s9, s8
2322; RV64I-NEXT:    lbu s3, 28(a0)
2323; RV64I-NEXT:    lbu s4, 29(a0)
2324; RV64I-NEXT:    lbu s5, 30(a0)
2325; RV64I-NEXT:    lbu s6, 31(a0)
2326; RV64I-NEXT:    slli s10, s10, 16
2327; RV64I-NEXT:    slli s11, s11, 24
2328; RV64I-NEXT:    slli s0, s0, 8
2329; RV64I-NEXT:    slli s1, s1, 16
2330; RV64I-NEXT:    slli s2, s2, 24
2331; RV64I-NEXT:    slli s4, s4, 8
2332; RV64I-NEXT:    or a0, s11, s10
2333; RV64I-NEXT:    or t6, s0, t6
2334; RV64I-NEXT:    or s0, s2, s1
2335; RV64I-NEXT:    or s1, s4, s3
2336; RV64I-NEXT:    lbu s2, 0(a1)
2337; RV64I-NEXT:    lbu s3, 1(a1)
2338; RV64I-NEXT:    lbu s4, 2(a1)
2339; RV64I-NEXT:    lbu s7, 3(a1)
2340; RV64I-NEXT:    slli s5, s5, 16
2341; RV64I-NEXT:    slli s6, s6, 24
2342; RV64I-NEXT:    slli s3, s3, 8
2343; RV64I-NEXT:    slli s4, s4, 16
2344; RV64I-NEXT:    slli s7, s7, 24
2345; RV64I-NEXT:    or s5, s6, s5
2346; RV64I-NEXT:    or s2, s3, s2
2347; RV64I-NEXT:    lbu s3, 4(a1)
2348; RV64I-NEXT:    lbu s6, 5(a1)
2349; RV64I-NEXT:    or s4, s7, s4
2350; RV64I-NEXT:    lbu s7, 6(a1)
2351; RV64I-NEXT:    lbu a1, 7(a1)
2352; RV64I-NEXT:    slli s6, s6, 8
2353; RV64I-NEXT:    or s3, s6, s3
2354; RV64I-NEXT:    slli s7, s7, 16
2355; RV64I-NEXT:    slli a1, a1, 24
2356; RV64I-NEXT:    or a1, a1, s7
2357; RV64I-NEXT:    mv s6, sp
2358; RV64I-NEXT:    or a3, a4, a3
2359; RV64I-NEXT:    or a4, a6, a5
2360; RV64I-NEXT:    or a5, t0, a7
2361; RV64I-NEXT:    or a6, t2, t1
2362; RV64I-NEXT:    or a7, t4, t3
2363; RV64I-NEXT:    or a0, a0, t5
2364; RV64I-NEXT:    or t0, s0, t6
2365; RV64I-NEXT:    or t1, s5, s1
2366; RV64I-NEXT:    or t2, s4, s2
2367; RV64I-NEXT:    or a1, a1, s3
2368; RV64I-NEXT:    slli a4, a4, 32
2369; RV64I-NEXT:    slli a6, a6, 32
2370; RV64I-NEXT:    slli a0, a0, 32
2371; RV64I-NEXT:    slli t3, t1, 32
2372; RV64I-NEXT:    slli a1, a1, 32
2373; RV64I-NEXT:    sraiw t1, t1, 31
2374; RV64I-NEXT:    or a3, a4, a3
2375; RV64I-NEXT:    or a4, a6, a5
2376; RV64I-NEXT:    or a0, a0, a7
2377; RV64I-NEXT:    or a5, t3, t0
2378; RV64I-NEXT:    or a1, a1, t2
2379; RV64I-NEXT:    sd t1, 32(sp)
2380; RV64I-NEXT:    sd t1, 40(sp)
2381; RV64I-NEXT:    sd t1, 48(sp)
2382; RV64I-NEXT:    sd t1, 56(sp)
2383; RV64I-NEXT:    sd a3, 0(sp)
2384; RV64I-NEXT:    sd a4, 8(sp)
2385; RV64I-NEXT:    sd a0, 16(sp)
2386; RV64I-NEXT:    sd a5, 24(sp)
2387; RV64I-NEXT:    srli a0, a1, 3
2388; RV64I-NEXT:    andi a3, a1, 63
2389; RV64I-NEXT:    andi a0, a0, 24
2390; RV64I-NEXT:    add a0, s6, a0
2391; RV64I-NEXT:    ld a4, 0(a0)
2392; RV64I-NEXT:    ld a5, 8(a0)
2393; RV64I-NEXT:    ld a6, 16(a0)
2394; RV64I-NEXT:    xori a3, a3, 63
2395; RV64I-NEXT:    ld a0, 24(a0)
2396; RV64I-NEXT:    srl a7, a5, a1
2397; RV64I-NEXT:    slli t0, a6, 1
2398; RV64I-NEXT:    srl a4, a4, a1
2399; RV64I-NEXT:    slli a5, a5, 1
2400; RV64I-NEXT:    srl a6, a6, a1
2401; RV64I-NEXT:    slli t1, a0, 1
2402; RV64I-NEXT:    sra t2, a0, a1
2403; RV64I-NEXT:    sll a0, t0, a3
2404; RV64I-NEXT:    sll a1, a5, a3
2405; RV64I-NEXT:    sll a3, t1, a3
2406; RV64I-NEXT:    srli a5, t2, 56
2407; RV64I-NEXT:    srli t0, t2, 48
2408; RV64I-NEXT:    srli t1, t2, 40
2409; RV64I-NEXT:    srli t3, t2, 32
2410; RV64I-NEXT:    srli t4, t2, 24
2411; RV64I-NEXT:    srli t5, t2, 16
2412; RV64I-NEXT:    srli t6, t2, 8
2413; RV64I-NEXT:    or a0, a7, a0
2414; RV64I-NEXT:    or a1, a4, a1
2415; RV64I-NEXT:    or a3, a6, a3
2416; RV64I-NEXT:    sb t3, 28(a2)
2417; RV64I-NEXT:    sb t1, 29(a2)
2418; RV64I-NEXT:    sb t0, 30(a2)
2419; RV64I-NEXT:    sb a5, 31(a2)
2420; RV64I-NEXT:    sb t2, 24(a2)
2421; RV64I-NEXT:    sb t6, 25(a2)
2422; RV64I-NEXT:    sb t5, 26(a2)
2423; RV64I-NEXT:    sb t4, 27(a2)
2424; RV64I-NEXT:    srli a4, a3, 56
2425; RV64I-NEXT:    srli a5, a3, 48
2426; RV64I-NEXT:    srli a6, a3, 40
2427; RV64I-NEXT:    srli a7, a3, 32
2428; RV64I-NEXT:    srli t0, a3, 24
2429; RV64I-NEXT:    srli t1, a3, 16
2430; RV64I-NEXT:    srli t2, a3, 8
2431; RV64I-NEXT:    srli t3, a1, 56
2432; RV64I-NEXT:    srli t4, a1, 48
2433; RV64I-NEXT:    srli t5, a1, 40
2434; RV64I-NEXT:    srli t6, a1, 32
2435; RV64I-NEXT:    srli s0, a1, 24
2436; RV64I-NEXT:    srli s1, a1, 16
2437; RV64I-NEXT:    srli s2, a1, 8
2438; RV64I-NEXT:    srli s3, a0, 56
2439; RV64I-NEXT:    srli s4, a0, 48
2440; RV64I-NEXT:    srli s5, a0, 40
2441; RV64I-NEXT:    srli s6, a0, 32
2442; RV64I-NEXT:    sb a7, 20(a2)
2443; RV64I-NEXT:    sb a6, 21(a2)
2444; RV64I-NEXT:    sb a5, 22(a2)
2445; RV64I-NEXT:    sb a4, 23(a2)
2446; RV64I-NEXT:    srli a4, a0, 24
2447; RV64I-NEXT:    sb a3, 16(a2)
2448; RV64I-NEXT:    sb t2, 17(a2)
2449; RV64I-NEXT:    sb t1, 18(a2)
2450; RV64I-NEXT:    sb t0, 19(a2)
2451; RV64I-NEXT:    srli a3, a0, 16
2452; RV64I-NEXT:    sb t6, 4(a2)
2453; RV64I-NEXT:    sb t5, 5(a2)
2454; RV64I-NEXT:    sb t4, 6(a2)
2455; RV64I-NEXT:    sb t3, 7(a2)
2456; RV64I-NEXT:    srli a5, a0, 8
2457; RV64I-NEXT:    sb a1, 0(a2)
2458; RV64I-NEXT:    sb s2, 1(a2)
2459; RV64I-NEXT:    sb s1, 2(a2)
2460; RV64I-NEXT:    sb s0, 3(a2)
2461; RV64I-NEXT:    sb s6, 12(a2)
2462; RV64I-NEXT:    sb s5, 13(a2)
2463; RV64I-NEXT:    sb s4, 14(a2)
2464; RV64I-NEXT:    sb s3, 15(a2)
2465; RV64I-NEXT:    sb a0, 8(a2)
2466; RV64I-NEXT:    sb a5, 9(a2)
2467; RV64I-NEXT:    sb a3, 10(a2)
2468; RV64I-NEXT:    sb a4, 11(a2)
2469; RV64I-NEXT:    ld s0, 152(sp) # 8-byte Folded Reload
2470; RV64I-NEXT:    ld s1, 144(sp) # 8-byte Folded Reload
2471; RV64I-NEXT:    ld s2, 136(sp) # 8-byte Folded Reload
2472; RV64I-NEXT:    ld s3, 128(sp) # 8-byte Folded Reload
2473; RV64I-NEXT:    ld s4, 120(sp) # 8-byte Folded Reload
2474; RV64I-NEXT:    ld s5, 112(sp) # 8-byte Folded Reload
2475; RV64I-NEXT:    ld s6, 104(sp) # 8-byte Folded Reload
2476; RV64I-NEXT:    ld s7, 96(sp) # 8-byte Folded Reload
2477; RV64I-NEXT:    ld s8, 88(sp) # 8-byte Folded Reload
2478; RV64I-NEXT:    ld s9, 80(sp) # 8-byte Folded Reload
2479; RV64I-NEXT:    ld s10, 72(sp) # 8-byte Folded Reload
2480; RV64I-NEXT:    ld s11, 64(sp) # 8-byte Folded Reload
2481; RV64I-NEXT:    addi sp, sp, 160
2482; RV64I-NEXT:    ret
2483;
2484; RV32I-LABEL: ashr_32bytes:
2485; RV32I:       # %bb.0:
2486; RV32I-NEXT:    addi sp, sp, -128
2487; RV32I-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
2488; RV32I-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
2489; RV32I-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
2490; RV32I-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
2491; RV32I-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
2492; RV32I-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
2493; RV32I-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
2494; RV32I-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
2495; RV32I-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
2496; RV32I-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
2497; RV32I-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
2498; RV32I-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
2499; RV32I-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
2500; RV32I-NEXT:    lbu a3, 0(a0)
2501; RV32I-NEXT:    lbu a4, 1(a0)
2502; RV32I-NEXT:    lbu a6, 2(a0)
2503; RV32I-NEXT:    lbu a7, 3(a0)
2504; RV32I-NEXT:    lbu a5, 4(a0)
2505; RV32I-NEXT:    lbu t0, 5(a0)
2506; RV32I-NEXT:    lbu t1, 6(a0)
2507; RV32I-NEXT:    lbu t2, 7(a0)
2508; RV32I-NEXT:    lbu t3, 8(a0)
2509; RV32I-NEXT:    lbu t4, 9(a0)
2510; RV32I-NEXT:    lbu t5, 10(a0)
2511; RV32I-NEXT:    lbu t6, 11(a0)
2512; RV32I-NEXT:    lbu s0, 12(a0)
2513; RV32I-NEXT:    lbu s1, 13(a0)
2514; RV32I-NEXT:    lbu s2, 14(a0)
2515; RV32I-NEXT:    lbu s3, 15(a0)
2516; RV32I-NEXT:    lbu s4, 16(a0)
2517; RV32I-NEXT:    lbu s5, 17(a0)
2518; RV32I-NEXT:    lbu s6, 18(a0)
2519; RV32I-NEXT:    lbu s7, 19(a0)
2520; RV32I-NEXT:    slli a4, a4, 8
2521; RV32I-NEXT:    slli a6, a6, 16
2522; RV32I-NEXT:    slli a7, a7, 24
2523; RV32I-NEXT:    or a3, a4, a3
2524; RV32I-NEXT:    sw a3, 4(sp) # 4-byte Folded Spill
2525; RV32I-NEXT:    or a4, a7, a6
2526; RV32I-NEXT:    lbu s8, 20(a0)
2527; RV32I-NEXT:    lbu s9, 21(a0)
2528; RV32I-NEXT:    lbu s10, 22(a0)
2529; RV32I-NEXT:    lbu s11, 23(a0)
2530; RV32I-NEXT:    slli t0, t0, 8
2531; RV32I-NEXT:    slli t1, t1, 16
2532; RV32I-NEXT:    slli t2, t2, 24
2533; RV32I-NEXT:    slli t4, t4, 8
2534; RV32I-NEXT:    slli t5, t5, 16
2535; RV32I-NEXT:    slli t6, t6, 24
2536; RV32I-NEXT:    or a5, t0, a5
2537; RV32I-NEXT:    or a6, t2, t1
2538; RV32I-NEXT:    or a7, t4, t3
2539; RV32I-NEXT:    or t0, t6, t5
2540; RV32I-NEXT:    lbu ra, 24(a0)
2541; RV32I-NEXT:    lbu a3, 25(a0)
2542; RV32I-NEXT:    lbu t4, 26(a0)
2543; RV32I-NEXT:    lbu t5, 27(a0)
2544; RV32I-NEXT:    slli s1, s1, 8
2545; RV32I-NEXT:    slli s2, s2, 16
2546; RV32I-NEXT:    slli s3, s3, 24
2547; RV32I-NEXT:    slli s5, s5, 8
2548; RV32I-NEXT:    or t1, s1, s0
2549; RV32I-NEXT:    or t2, s3, s2
2550; RV32I-NEXT:    or t3, s5, s4
2551; RV32I-NEXT:    lbu t6, 28(a0)
2552; RV32I-NEXT:    lbu s0, 29(a0)
2553; RV32I-NEXT:    lbu s1, 30(a0)
2554; RV32I-NEXT:    lbu a0, 31(a0)
2555; RV32I-NEXT:    slli s6, s6, 16
2556; RV32I-NEXT:    slli s7, s7, 24
2557; RV32I-NEXT:    slli s9, s9, 8
2558; RV32I-NEXT:    slli s10, s10, 16
2559; RV32I-NEXT:    slli s11, s11, 24
2560; RV32I-NEXT:    or s2, s7, s6
2561; RV32I-NEXT:    or s3, s9, s8
2562; RV32I-NEXT:    or s4, s11, s10
2563; RV32I-NEXT:    lbu s5, 0(a1)
2564; RV32I-NEXT:    lbu s6, 1(a1)
2565; RV32I-NEXT:    lbu s7, 2(a1)
2566; RV32I-NEXT:    lbu a1, 3(a1)
2567; RV32I-NEXT:    slli a3, a3, 8
2568; RV32I-NEXT:    or a3, a3, ra
2569; RV32I-NEXT:    addi s8, sp, 8
2570; RV32I-NEXT:    slli t4, t4, 16
2571; RV32I-NEXT:    slli t5, t5, 24
2572; RV32I-NEXT:    slli s0, s0, 8
2573; RV32I-NEXT:    slli s1, s1, 16
2574; RV32I-NEXT:    slli a0, a0, 24
2575; RV32I-NEXT:    slli s6, s6, 8
2576; RV32I-NEXT:    slli s7, s7, 16
2577; RV32I-NEXT:    slli a1, a1, 24
2578; RV32I-NEXT:    or t4, t5, t4
2579; RV32I-NEXT:    or t5, s0, t6
2580; RV32I-NEXT:    or s1, a0, s1
2581; RV32I-NEXT:    or t6, s6, s5
2582; RV32I-NEXT:    or a1, a1, s7
2583; RV32I-NEXT:    srai s0, a0, 31
2584; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
2585; RV32I-NEXT:    or a4, a4, a0
2586; RV32I-NEXT:    or a5, a6, a5
2587; RV32I-NEXT:    or a6, t0, a7
2588; RV32I-NEXT:    or a7, t2, t1
2589; RV32I-NEXT:    or t0, s2, t3
2590; RV32I-NEXT:    or t1, s4, s3
2591; RV32I-NEXT:    or a3, t4, a3
2592; RV32I-NEXT:    or t2, s1, t5
2593; RV32I-NEXT:    or a0, a1, t6
2594; RV32I-NEXT:    sw s0, 56(sp)
2595; RV32I-NEXT:    sw s0, 60(sp)
2596; RV32I-NEXT:    sw s0, 64(sp)
2597; RV32I-NEXT:    sw s0, 68(sp)
2598; RV32I-NEXT:    sw s0, 40(sp)
2599; RV32I-NEXT:    sw s0, 44(sp)
2600; RV32I-NEXT:    sw s0, 48(sp)
2601; RV32I-NEXT:    sw s0, 52(sp)
2602; RV32I-NEXT:    sw t0, 24(sp)
2603; RV32I-NEXT:    sw t1, 28(sp)
2604; RV32I-NEXT:    sw a3, 32(sp)
2605; RV32I-NEXT:    sw t2, 36(sp)
2606; RV32I-NEXT:    sw a4, 8(sp)
2607; RV32I-NEXT:    sw a5, 12(sp)
2608; RV32I-NEXT:    sw a6, 16(sp)
2609; RV32I-NEXT:    sw a7, 20(sp)
2610; RV32I-NEXT:    srli a1, a0, 3
2611; RV32I-NEXT:    andi a3, a0, 31
2612; RV32I-NEXT:    andi a4, a1, 28
2613; RV32I-NEXT:    xori a1, a3, 31
2614; RV32I-NEXT:    add a4, s8, a4
2615; RV32I-NEXT:    lw a3, 0(a4)
2616; RV32I-NEXT:    lw a5, 4(a4)
2617; RV32I-NEXT:    lw a6, 8(a4)
2618; RV32I-NEXT:    lw a7, 12(a4)
2619; RV32I-NEXT:    lw t0, 16(a4)
2620; RV32I-NEXT:    lw t1, 20(a4)
2621; RV32I-NEXT:    lw t2, 24(a4)
2622; RV32I-NEXT:    lw a4, 28(a4)
2623; RV32I-NEXT:    srl t3, a5, a0
2624; RV32I-NEXT:    slli t4, a6, 1
2625; RV32I-NEXT:    srl a3, a3, a0
2626; RV32I-NEXT:    slli a5, a5, 1
2627; RV32I-NEXT:    srl t5, a7, a0
2628; RV32I-NEXT:    slli t6, t0, 1
2629; RV32I-NEXT:    srl a6, a6, a0
2630; RV32I-NEXT:    slli a7, a7, 1
2631; RV32I-NEXT:    srl s0, t1, a0
2632; RV32I-NEXT:    slli s1, t2, 1
2633; RV32I-NEXT:    srl t0, t0, a0
2634; RV32I-NEXT:    slli t1, t1, 1
2635; RV32I-NEXT:    srl t2, t2, a0
2636; RV32I-NEXT:    slli s2, a4, 1
2637; RV32I-NEXT:    sra s3, a4, a0
2638; RV32I-NEXT:    sll a0, t4, a1
2639; RV32I-NEXT:    sll a4, a5, a1
2640; RV32I-NEXT:    sll a5, t6, a1
2641; RV32I-NEXT:    sll a7, a7, a1
2642; RV32I-NEXT:    sll t4, s1, a1
2643; RV32I-NEXT:    sll t1, t1, a1
2644; RV32I-NEXT:    sll t6, s2, a1
2645; RV32I-NEXT:    srli s1, s3, 24
2646; RV32I-NEXT:    srli s2, s3, 16
2647; RV32I-NEXT:    srli s4, s3, 8
2648; RV32I-NEXT:    or a0, t3, a0
2649; RV32I-NEXT:    or a1, a3, a4
2650; RV32I-NEXT:    or a3, t5, a5
2651; RV32I-NEXT:    or a4, a6, a7
2652; RV32I-NEXT:    or a5, s0, t4
2653; RV32I-NEXT:    or a6, t0, t1
2654; RV32I-NEXT:    or a7, t2, t6
2655; RV32I-NEXT:    sb s3, 28(a2)
2656; RV32I-NEXT:    sb s4, 29(a2)
2657; RV32I-NEXT:    sb s2, 30(a2)
2658; RV32I-NEXT:    sb s1, 31(a2)
2659; RV32I-NEXT:    srli t0, a7, 24
2660; RV32I-NEXT:    srli t1, a7, 16
2661; RV32I-NEXT:    srli t2, a7, 8
2662; RV32I-NEXT:    srli t3, a6, 24
2663; RV32I-NEXT:    srli t4, a6, 16
2664; RV32I-NEXT:    srli t5, a6, 8
2665; RV32I-NEXT:    srli t6, a5, 24
2666; RV32I-NEXT:    srli s0, a5, 16
2667; RV32I-NEXT:    srli s1, a5, 8
2668; RV32I-NEXT:    srli s2, a4, 24
2669; RV32I-NEXT:    srli s3, a4, 16
2670; RV32I-NEXT:    srli s4, a4, 8
2671; RV32I-NEXT:    srli s5, a3, 24
2672; RV32I-NEXT:    srli s6, a3, 16
2673; RV32I-NEXT:    srli s7, a3, 8
2674; RV32I-NEXT:    srli s8, a1, 24
2675; RV32I-NEXT:    srli s9, a1, 16
2676; RV32I-NEXT:    sb a7, 24(a2)
2677; RV32I-NEXT:    sb t2, 25(a2)
2678; RV32I-NEXT:    sb t1, 26(a2)
2679; RV32I-NEXT:    sb t0, 27(a2)
2680; RV32I-NEXT:    srli a7, a1, 8
2681; RV32I-NEXT:    sb a6, 16(a2)
2682; RV32I-NEXT:    sb t5, 17(a2)
2683; RV32I-NEXT:    sb t4, 18(a2)
2684; RV32I-NEXT:    sb t3, 19(a2)
2685; RV32I-NEXT:    srli a6, a0, 24
2686; RV32I-NEXT:    sb a5, 20(a2)
2687; RV32I-NEXT:    sb s1, 21(a2)
2688; RV32I-NEXT:    sb s0, 22(a2)
2689; RV32I-NEXT:    sb t6, 23(a2)
2690; RV32I-NEXT:    srli a5, a0, 16
2691; RV32I-NEXT:    sb a4, 8(a2)
2692; RV32I-NEXT:    sb s4, 9(a2)
2693; RV32I-NEXT:    sb s3, 10(a2)
2694; RV32I-NEXT:    sb s2, 11(a2)
2695; RV32I-NEXT:    srli a4, a0, 8
2696; RV32I-NEXT:    sb a3, 12(a2)
2697; RV32I-NEXT:    sb s7, 13(a2)
2698; RV32I-NEXT:    sb s6, 14(a2)
2699; RV32I-NEXT:    sb s5, 15(a2)
2700; RV32I-NEXT:    sb a1, 0(a2)
2701; RV32I-NEXT:    sb a7, 1(a2)
2702; RV32I-NEXT:    sb s9, 2(a2)
2703; RV32I-NEXT:    sb s8, 3(a2)
2704; RV32I-NEXT:    sb a0, 4(a2)
2705; RV32I-NEXT:    sb a4, 5(a2)
2706; RV32I-NEXT:    sb a5, 6(a2)
2707; RV32I-NEXT:    sb a6, 7(a2)
2708; RV32I-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
2709; RV32I-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
2710; RV32I-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
2711; RV32I-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
2712; RV32I-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
2713; RV32I-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
2714; RV32I-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
2715; RV32I-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
2716; RV32I-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
2717; RV32I-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
2718; RV32I-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
2719; RV32I-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
2720; RV32I-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
2721; RV32I-NEXT:    addi sp, sp, 128
2722; RV32I-NEXT:    ret
2723  %src = load i256, ptr %src.ptr, align 1
2724  %bitOff = load i256, ptr %bitOff.ptr, align 1
2725  %res = ashr i256 %src, %bitOff
2726  store i256 %res, ptr %dst, align 1
2727  ret void
2728}
2729