xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-shifts-scalar.ll (revision b5b663aac17415625340eb29c8010832bfc4c21c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve %s --verify-machineinstrs -o - | FileCheck %s
3
4define dso_local arm_aapcs_vfpcc void @sink_shl_i32(ptr nocapture readonly %in, ptr noalias nocapture %out, i32 %shift, i32 %N) {
5; CHECK-LABEL: sink_shl_i32:
6; CHECK:       @ %bb.0: @ %entry
7; CHECK-NEXT:    .save {r7, lr}
8; CHECK-NEXT:    push {r7, lr}
9; CHECK-NEXT:    bic r3, r3, #3
10; CHECK-NEXT:    sub.w r12, r3, #4
11; CHECK-NEXT:    movs r3, #1
12; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
13; CHECK-NEXT:  .LBB0_1: @ %vector.body
14; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
15; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
16; CHECK-NEXT:    vshl.u32 q0, r2
17; CHECK-NEXT:    vstrb.8 q0, [r1], #16
18; CHECK-NEXT:    le lr, .LBB0_1
19; CHECK-NEXT:  @ %bb.2: @ %exit
20; CHECK-NEXT:    pop {r7, pc}
21entry:
22  br label %vector.ph
23
24vector.ph:
25  %n.vec = and i32 %N, -4
26  %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %shift, i32 0
27  %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
28  br label %vector.body
29
30vector.body:                                      ; preds = %vector.body, %vector.ph
31  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
32  %gep.in = getelementptr inbounds i32, ptr %in, i32 %index
33  %wide.load = load <4 x i32>, ptr %gep.in, align 4
34  %res = shl <4 x i32> %wide.load, %broadcast.splat11
35  %gep.out = getelementptr inbounds i32, ptr %out, i32 %index
36  store <4 x i32> %res, ptr %gep.out, align 4
37  %index.next = add i32 %index, 4
38  %cmp = icmp eq i32 %index.next, %n.vec
39  br i1 %cmp, label %exit, label %vector.body
40
41exit:
42  ret void
43}
44
45define dso_local arm_aapcs_vfpcc void @sink_shl_i16(ptr nocapture readonly %in, ptr noalias nocapture %out, i16 %shift, i32 %N) {
46; CHECK-LABEL: sink_shl_i16:
47; CHECK:       @ %bb.0: @ %entry
48; CHECK-NEXT:    .save {r7, lr}
49; CHECK-NEXT:    push {r7, lr}
50; CHECK-NEXT:    bic r3, r3, #3
51; CHECK-NEXT:    sub.w r12, r3, #4
52; CHECK-NEXT:    movs r3, #1
53; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
54; CHECK-NEXT:  .LBB1_1: @ %vector.body
55; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
56; CHECK-NEXT:    vldrw.u32 q0, [r0], #8
57; CHECK-NEXT:    vshl.u16 q0, r2
58; CHECK-NEXT:    vstrb.8 q0, [r1], #8
59; CHECK-NEXT:    le lr, .LBB1_1
60; CHECK-NEXT:  @ %bb.2: @ %exit
61; CHECK-NEXT:    pop {r7, pc}
62entry:
63  br label %vector.ph
64
65vector.ph:
66  %n.vec = and i32 %N, -4
67  %broadcast.splatinsert10 = insertelement <8 x i16> undef, i16 %shift, i32 0
68  %broadcast.splat11 = shufflevector <8 x i16> %broadcast.splatinsert10, <8 x i16> undef, <8 x i32> zeroinitializer
69  br label %vector.body
70
71vector.body:                                      ; preds = %vector.body, %vector.ph
72  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
73  %gep.in = getelementptr inbounds i16, ptr %in, i32 %index
74  %wide.load = load <8 x i16>, ptr %gep.in, align 4
75  %res = shl <8 x i16> %wide.load, %broadcast.splat11
76  %gep.out = getelementptr inbounds i16, ptr %out, i32 %index
77  store <8 x i16> %res, ptr %gep.out, align 4
78  %index.next = add i32 %index, 4
79  %cmp = icmp eq i32 %index.next, %n.vec
80  br i1 %cmp, label %exit, label %vector.body
81
82exit:
83  ret void
84}
85
86define dso_local arm_aapcs_vfpcc void @sink_shl_i8(ptr nocapture readonly %in, ptr noalias nocapture %out, i8 %shift, i32 %N) {
87; CHECK-LABEL: sink_shl_i8:
88; CHECK:       @ %bb.0: @ %entry
89; CHECK-NEXT:    .save {r7, lr}
90; CHECK-NEXT:    push {r7, lr}
91; CHECK-NEXT:    bic r3, r3, #3
92; CHECK-NEXT:    sub.w r12, r3, #4
93; CHECK-NEXT:    movs r3, #1
94; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
95; CHECK-NEXT:  .LBB2_1: @ %vector.body
96; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
97; CHECK-NEXT:    vldrw.u32 q0, [r0], #4
98; CHECK-NEXT:    vshl.u8 q0, r2
99; CHECK-NEXT:    vstrb.8 q0, [r1], #4
100; CHECK-NEXT:    le lr, .LBB2_1
101; CHECK-NEXT:  @ %bb.2: @ %exit
102; CHECK-NEXT:    pop {r7, pc}
103entry:
104  br label %vector.ph
105
106vector.ph:
107  %n.vec = and i32 %N, -4
108  %broadcast.splatinsert10 = insertelement <16 x i8> undef, i8 %shift, i32 0
109  %broadcast.splat11 = shufflevector <16 x i8> %broadcast.splatinsert10, <16 x i8> undef, <16 x i32> zeroinitializer
110  br label %vector.body
111
112vector.body:                                      ; preds = %vector.body, %vector.ph
113  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
114  %gep.in = getelementptr inbounds i8, ptr %in, i32 %index
115  %wide.load = load <16 x i8>, ptr %gep.in, align 4
116  %res = shl <16 x i8> %wide.load, %broadcast.splat11
117  %gep.out = getelementptr inbounds i8, ptr %out, i32 %index
118  store <16 x i8> %res, ptr %gep.out, align 4
119  %index.next = add i32 %index, 4
120  %cmp = icmp eq i32 %index.next, %n.vec
121  br i1 %cmp, label %exit, label %vector.body
122
123exit:
124  ret void
125}
126
127define dso_local arm_aapcs_vfpcc void @sink_lshr_i32(ptr nocapture readonly %in, ptr noalias nocapture %out, i32 %shift, i32 %N) {
128; CHECK-LABEL: sink_lshr_i32:
129; CHECK:       @ %bb.0: @ %entry
130; CHECK-NEXT:    .save {r7, lr}
131; CHECK-NEXT:    push {r7, lr}
132; CHECK-NEXT:    bic r3, r3, #3
133; CHECK-NEXT:    rsbs r2, r2, #0
134; CHECK-NEXT:    sub.w r12, r3, #4
135; CHECK-NEXT:    movs r3, #1
136; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
137; CHECK-NEXT:  .LBB3_1: @ %vector.body
138; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
139; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
140; CHECK-NEXT:    vshl.u32 q0, r2
141; CHECK-NEXT:    vstrb.8 q0, [r1], #16
142; CHECK-NEXT:    le lr, .LBB3_1
143; CHECK-NEXT:  @ %bb.2: @ %exit
144; CHECK-NEXT:    pop {r7, pc}
145entry:
146  br label %vector.ph
147
148vector.ph:
149  %n.vec = and i32 %N, -4
150  %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %shift, i32 0
151  %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
152  br label %vector.body
153
154vector.body:                                      ; preds = %vector.body, %vector.ph
155  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
156  %gep.in = getelementptr inbounds i32, ptr %in, i32 %index
157  %wide.load = load <4 x i32>, ptr %gep.in, align 4
158  %res = lshr <4 x i32> %wide.load, %broadcast.splat11
159  %gep.out = getelementptr inbounds i32, ptr %out, i32 %index
160  store <4 x i32> %res, ptr %gep.out, align 4
161  %index.next = add i32 %index, 4
162  %cmp = icmp eq i32 %index.next, %n.vec
163  br i1 %cmp, label %exit, label %vector.body
164
165exit:
166  ret void
167}
168
169define dso_local arm_aapcs_vfpcc void @sink_lshr_i16(ptr nocapture readonly %in, ptr noalias nocapture %out, i16 %shift, i32 %N) {
170; CHECK-LABEL: sink_lshr_i16:
171; CHECK:       @ %bb.0: @ %entry
172; CHECK-NEXT:    .save {r7, lr}
173; CHECK-NEXT:    push {r7, lr}
174; CHECK-NEXT:    bic r3, r3, #3
175; CHECK-NEXT:    rsbs r2, r2, #0
176; CHECK-NEXT:    sub.w r12, r3, #4
177; CHECK-NEXT:    movs r3, #1
178; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
179; CHECK-NEXT:  .LBB4_1: @ %vector.body
180; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
181; CHECK-NEXT:    vldrw.u32 q0, [r0], #8
182; CHECK-NEXT:    vshl.u16 q0, r2
183; CHECK-NEXT:    vstrb.8 q0, [r1], #8
184; CHECK-NEXT:    le lr, .LBB4_1
185; CHECK-NEXT:  @ %bb.2: @ %exit
186; CHECK-NEXT:    pop {r7, pc}
187entry:
188  br label %vector.ph
189
190vector.ph:
191  %n.vec = and i32 %N, -4
192  %broadcast.splatinsert10 = insertelement <8 x i16> undef, i16 %shift, i32 0
193  %broadcast.splat11 = shufflevector <8 x i16> %broadcast.splatinsert10, <8 x i16> undef, <8 x i32> zeroinitializer
194  br label %vector.body
195
196vector.body:                                      ; preds = %vector.body, %vector.ph
197  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
198  %gep.in = getelementptr inbounds i16, ptr %in, i32 %index
199  %wide.load = load <8 x i16>, ptr %gep.in, align 4
200  %res = lshr <8 x i16> %wide.load, %broadcast.splat11
201  %gep.out = getelementptr inbounds i16, ptr %out, i32 %index
202  store <8 x i16> %res, ptr %gep.out, align 4
203  %index.next = add i32 %index, 4
204  %cmp = icmp eq i32 %index.next, %n.vec
205  br i1 %cmp, label %exit, label %vector.body
206
207exit:
208  ret void
209}
210
211define dso_local arm_aapcs_vfpcc void @sink_lshr_i8(ptr nocapture readonly %in, ptr noalias nocapture %out, i8 %shift, i32 %N) {
212; CHECK-LABEL: sink_lshr_i8:
213; CHECK:       @ %bb.0: @ %entry
214; CHECK-NEXT:    .save {r7, lr}
215; CHECK-NEXT:    push {r7, lr}
216; CHECK-NEXT:    bic r3, r3, #3
217; CHECK-NEXT:    rsbs r2, r2, #0
218; CHECK-NEXT:    sub.w r12, r3, #4
219; CHECK-NEXT:    movs r3, #1
220; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
221; CHECK-NEXT:  .LBB5_1: @ %vector.body
222; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
223; CHECK-NEXT:    vldrw.u32 q0, [r0], #4
224; CHECK-NEXT:    vshl.u8 q0, r2
225; CHECK-NEXT:    vstrb.8 q0, [r1], #4
226; CHECK-NEXT:    le lr, .LBB5_1
227; CHECK-NEXT:  @ %bb.2: @ %exit
228; CHECK-NEXT:    pop {r7, pc}
229entry:
230  br label %vector.ph
231
232vector.ph:
233  %n.vec = and i32 %N, -4
234  %broadcast.splatinsert10 = insertelement <16 x i8> undef, i8 %shift, i32 0
235  %broadcast.splat11 = shufflevector <16 x i8> %broadcast.splatinsert10, <16 x i8> undef, <16 x i32> zeroinitializer
236  br label %vector.body
237
238vector.body:                                      ; preds = %vector.body, %vector.ph
239  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
240  %gep.in = getelementptr inbounds i8, ptr %in, i32 %index
241  %wide.load = load <16 x i8>, ptr %gep.in, align 4
242  %res = lshr <16 x i8> %wide.load, %broadcast.splat11
243  %gep.out = getelementptr inbounds i8, ptr %out, i32 %index
244  store <16 x i8> %res, ptr %gep.out, align 4
245  %index.next = add i32 %index, 4
246  %cmp = icmp eq i32 %index.next, %n.vec
247  br i1 %cmp, label %exit, label %vector.body
248
249exit:
250  ret void
251}
252
253define dso_local arm_aapcs_vfpcc void @sink_ashr_i32(ptr nocapture readonly %in, ptr noalias nocapture %out, i32 %shift, i32 %N) {
254; CHECK-LABEL: sink_ashr_i32:
255; CHECK:       @ %bb.0: @ %entry
256; CHECK-NEXT:    .save {r7, lr}
257; CHECK-NEXT:    push {r7, lr}
258; CHECK-NEXT:    bic r3, r3, #3
259; CHECK-NEXT:    rsbs r2, r2, #0
260; CHECK-NEXT:    sub.w r12, r3, #4
261; CHECK-NEXT:    movs r3, #1
262; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
263; CHECK-NEXT:  .LBB6_1: @ %vector.body
264; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
265; CHECK-NEXT:    vldrw.u32 q0, [r0], #16
266; CHECK-NEXT:    vshl.s32 q0, r2
267; CHECK-NEXT:    vstrb.8 q0, [r1], #16
268; CHECK-NEXT:    le lr, .LBB6_1
269; CHECK-NEXT:  @ %bb.2: @ %exit
270; CHECK-NEXT:    pop {r7, pc}
271entry:
272  br label %vector.ph
273
274vector.ph:
275  %n.vec = and i32 %N, -4
276  %broadcast.splatinsert10 = insertelement <4 x i32> undef, i32 %shift, i32 0
277  %broadcast.splat11 = shufflevector <4 x i32> %broadcast.splatinsert10, <4 x i32> undef, <4 x i32> zeroinitializer
278  br label %vector.body
279
280vector.body:                                      ; preds = %vector.body, %vector.ph
281  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
282  %gep.in = getelementptr inbounds i32, ptr %in, i32 %index
283  %wide.load = load <4 x i32>, ptr %gep.in, align 4
284  %res = ashr <4 x i32> %wide.load, %broadcast.splat11
285  %gep.out = getelementptr inbounds i32, ptr %out, i32 %index
286  store <4 x i32> %res, ptr %gep.out, align 4
287  %index.next = add i32 %index, 4
288  %cmp = icmp eq i32 %index.next, %n.vec
289  br i1 %cmp, label %exit, label %vector.body
290
291exit:
292  ret void
293}
294
295define dso_local arm_aapcs_vfpcc void @sink_ashr_i16(ptr nocapture readonly %in, ptr noalias nocapture %out, i16 %shift, i32 %N) {
296; CHECK-LABEL: sink_ashr_i16:
297; CHECK:       @ %bb.0: @ %entry
298; CHECK-NEXT:    .save {r7, lr}
299; CHECK-NEXT:    push {r7, lr}
300; CHECK-NEXT:    bic r3, r3, #3
301; CHECK-NEXT:    rsbs r2, r2, #0
302; CHECK-NEXT:    sub.w r12, r3, #4
303; CHECK-NEXT:    movs r3, #1
304; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
305; CHECK-NEXT:  .LBB7_1: @ %vector.body
306; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
307; CHECK-NEXT:    vldrw.u32 q0, [r0], #8
308; CHECK-NEXT:    vshl.s16 q0, r2
309; CHECK-NEXT:    vstrb.8 q0, [r1], #8
310; CHECK-NEXT:    le lr, .LBB7_1
311; CHECK-NEXT:  @ %bb.2: @ %exit
312; CHECK-NEXT:    pop {r7, pc}
313entry:
314  br label %vector.ph
315
316vector.ph:
317  %n.vec = and i32 %N, -4
318  %broadcast.splatinsert10 = insertelement <8 x i16> undef, i16 %shift, i32 0
319  %broadcast.splat11 = shufflevector <8 x i16> %broadcast.splatinsert10, <8 x i16> undef, <8 x i32> zeroinitializer
320  br label %vector.body
321
322vector.body:                                      ; preds = %vector.body, %vector.ph
323  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
324  %gep.in = getelementptr inbounds i16, ptr %in, i32 %index
325  %wide.load = load <8 x i16>, ptr %gep.in, align 4
326  %res = ashr <8 x i16> %wide.load, %broadcast.splat11
327  %gep.out = getelementptr inbounds i16, ptr %out, i32 %index
328  store <8 x i16> %res, ptr %gep.out, align 4
329  %index.next = add i32 %index, 4
330  %cmp = icmp eq i32 %index.next, %n.vec
331  br i1 %cmp, label %exit, label %vector.body
332
333exit:
334  ret void
335}
336
337define dso_local arm_aapcs_vfpcc void @sink_ashr_i8(ptr nocapture readonly %in, ptr noalias nocapture %out, i8 %shift, i32 %N) {
338; CHECK-LABEL: sink_ashr_i8:
339; CHECK:       @ %bb.0: @ %entry
340; CHECK-NEXT:    .save {r7, lr}
341; CHECK-NEXT:    push {r7, lr}
342; CHECK-NEXT:    bic r3, r3, #3
343; CHECK-NEXT:    rsbs r2, r2, #0
344; CHECK-NEXT:    sub.w r12, r3, #4
345; CHECK-NEXT:    movs r3, #1
346; CHECK-NEXT:    add.w lr, r3, r12, lsr #2
347; CHECK-NEXT:  .LBB8_1: @ %vector.body
348; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
349; CHECK-NEXT:    vldrw.u32 q0, [r0], #4
350; CHECK-NEXT:    vshl.s8 q0, r2
351; CHECK-NEXT:    vstrb.8 q0, [r1], #4
352; CHECK-NEXT:    le lr, .LBB8_1
353; CHECK-NEXT:  @ %bb.2: @ %exit
354; CHECK-NEXT:    pop {r7, pc}
355entry:
356  br label %vector.ph
357
358vector.ph:
359  %n.vec = and i32 %N, -4
360  %broadcast.splatinsert10 = insertelement <16 x i8> undef, i8 %shift, i32 0
361  %broadcast.splat11 = shufflevector <16 x i8> %broadcast.splatinsert10, <16 x i8> undef, <16 x i32> zeroinitializer
362  br label %vector.body
363
364vector.body:                                      ; preds = %vector.body, %vector.ph
365  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
366  %gep.in = getelementptr inbounds i8, ptr %in, i32 %index
367  %wide.load = load <16 x i8>, ptr %gep.in, align 4
368  %res = ashr <16 x i8> %wide.load, %broadcast.splat11
369  %gep.out = getelementptr inbounds i8, ptr %out, i32 %index
370  store <16 x i8> %res, ptr %gep.out, align 4
371  %index.next = add i32 %index, 4
372  %cmp = icmp eq i32 %index.next, %n.vec
373  br i1 %cmp, label %exit, label %vector.body
374
375exit:
376  ret void
377}
378