xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-widen-narrow.ll (revision f4f8f9f18590d6fdf531bb9d6981a6081a244d33)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
4
5define void @foo_int8_int32(ptr %dest, ptr readonly %src, i32 %n) {
6; CHECK-LABEL: foo_int8_int32:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    vldrw.u32 q0, [r1]
9; CHECK-NEXT:    vstrb.32 q0, [r0]
10; CHECK-NEXT:    bx lr
11entry:
12  %wide.load = load <4 x i32>, ptr %src, align 4
13  %0 = trunc <4 x i32> %wide.load to <4 x i8>
14  store <4 x i8> %0, ptr %dest, align 1
15  ret void
16}
17
18define void @foo_int16_int32(ptr %dest, ptr readonly %src, i32 %n) {
19; CHECK-LABEL: foo_int16_int32:
20; CHECK:       @ %bb.0: @ %entry
21; CHECK-NEXT:    vldrw.u32 q0, [r1]
22; CHECK-NEXT:    vstrh.32 q0, [r0]
23; CHECK-NEXT:    bx lr
24entry:
25  %wide.load = load <4 x i32>, ptr %src, align 4
26  %0 = trunc <4 x i32> %wide.load to <4 x i16>
27  store <4 x i16> %0, ptr %dest, align 2
28  ret void
29}
30
31define void @foo_int8_int16(ptr %dest, ptr readonly %src, i32 %n) {
32; CHECK-LABEL: foo_int8_int16:
33; CHECK:       @ %bb.0: @ %entry
34; CHECK-NEXT:    vldrh.u16 q0, [r1]
35; CHECK-NEXT:    vstrb.16 q0, [r0]
36; CHECK-NEXT:    bx lr
37entry:
38  %wide.load = load <8 x i16>, ptr %src, align 2
39  %0 = trunc <8 x i16> %wide.load to <8 x i8>
40  store <8 x i8> %0, ptr %dest, align 1
41  ret void
42}
43
44
45define void @foo_int8_int32_double(ptr %dest, ptr readonly %src, i32 %n) {
46; CHECK-LE-LABEL: foo_int8_int32_double:
47; CHECK-LE:       @ %bb.0: @ %entry
48; CHECK-LE-NEXT:    .pad #16
49; CHECK-LE-NEXT:    sub sp, #16
50; CHECK-LE-NEXT:    vldrh.u16 q0, [r1, #16]
51; CHECK-LE-NEXT:    mov r2, sp
52; CHECK-LE-NEXT:    vstrh.32 q0, [r2, #8]
53; CHECK-LE-NEXT:    vldrh.u16 q0, [r1]
54; CHECK-LE-NEXT:    vstrh.32 q0, [r2]
55; CHECK-LE-NEXT:    vldrw.u32 q0, [r2]
56; CHECK-LE-NEXT:    vstrb.16 q0, [r0]
57; CHECK-LE-NEXT:    add sp, #16
58; CHECK-LE-NEXT:    bx lr
59;
60; CHECK-BE-LABEL: foo_int8_int32_double:
61; CHECK-BE:       @ %bb.0: @ %entry
62; CHECK-BE-NEXT:    .pad #16
63; CHECK-BE-NEXT:    sub sp, #16
64; CHECK-BE-NEXT:    vldrb.u8 q0, [r1, #16]
65; CHECK-BE-NEXT:    mov r2, sp
66; CHECK-BE-NEXT:    vrev32.8 q0, q0
67; CHECK-BE-NEXT:    vstrh.32 q0, [r2, #8]
68; CHECK-BE-NEXT:    vldrb.u8 q0, [r1]
69; CHECK-BE-NEXT:    vrev32.8 q0, q0
70; CHECK-BE-NEXT:    vstrh.32 q0, [r2]
71; CHECK-BE-NEXT:    vldrh.u16 q0, [r2]
72; CHECK-BE-NEXT:    vstrb.16 q0, [r0]
73; CHECK-BE-NEXT:    add sp, #16
74; CHECK-BE-NEXT:    bx lr
75entry:
76  %wide.load = load <8 x i32>, ptr %src, align 2
77  %0 = trunc <8 x i32> %wide.load to <8 x i8>
78  store <8 x i8> %0, ptr %dest, align 1
79  ret void
80}
81
82define void @foo_int16_int32_double(ptr %dest, ptr readonly %src, i32 %n) {
83; CHECK-LABEL: foo_int16_int32_double:
84; CHECK:       @ %bb.0: @ %entry
85; CHECK-NEXT:    vldrw.u32 q0, [r1]
86; CHECK-NEXT:    vldrw.u32 q1, [r1, #16]
87; CHECK-NEXT:    vstrh.32 q1, [r0, #8]
88; CHECK-NEXT:    vstrh.32 q0, [r0]
89; CHECK-NEXT:    bx lr
90entry:
91  %wide.load = load <8 x i32>, ptr %src, align 4
92  %0 = trunc <8 x i32> %wide.load to <8 x i16>
93  store <8 x i16> %0, ptr %dest, align 2
94  ret void
95}
96
97define void @foo_int8_int16_double(ptr %dest, ptr readonly %src, i32 %n) {
98; CHECK-LABEL: foo_int8_int16_double:
99; CHECK:       @ %bb.0: @ %entry
100; CHECK-NEXT:    vldrh.u16 q0, [r1]
101; CHECK-NEXT:    vldrh.u16 q1, [r1, #16]
102; CHECK-NEXT:    vstrb.16 q1, [r0, #8]
103; CHECK-NEXT:    vstrb.16 q0, [r0]
104; CHECK-NEXT:    bx lr
105entry:
106  %wide.load = load <16 x i16>, ptr %src, align 2
107  %0 = trunc <16 x i16> %wide.load to <16 x i8>
108  store <16 x i8> %0, ptr %dest, align 1
109  ret void
110}
111
112define void @foo_int8_int32_quad(ptr %dest, ptr readonly %src, i32 %n) {
113; CHECK-LABEL: foo_int8_int32_quad:
114; CHECK:       @ %bb.0: @ %entry
115; CHECK-NEXT:    vldrw.u32 q0, [r1]
116; CHECK-NEXT:    vldrw.u32 q1, [r1, #16]
117; CHECK-NEXT:    vldrw.u32 q2, [r1, #32]
118; CHECK-NEXT:    vldrw.u32 q3, [r1, #48]
119; CHECK-NEXT:    vstrb.32 q1, [r0, #4]
120; CHECK-NEXT:    vstrb.32 q0, [r0]
121; CHECK-NEXT:    vstrb.32 q3, [r0, #12]
122; CHECK-NEXT:    vstrb.32 q2, [r0, #8]
123; CHECK-NEXT:    bx lr
124entry:
125  %wide.load = load <16 x i32>, ptr %src, align 4
126  %0 = trunc <16 x i32> %wide.load to <16 x i8>
127  store <16 x i8> %0, ptr %dest, align 1
128  ret void
129}
130
131
132define void @foo_int32_int8(ptr %dest, ptr readonly %src, i32 %n) {
133; CHECK-LABEL: foo_int32_int8:
134; CHECK:       @ %bb.0: @ %entry
135; CHECK-NEXT:    vldrb.s32 q0, [r1]
136; CHECK-NEXT:    vstrw.32 q0, [r0]
137; CHECK-NEXT:    bx lr
138entry:
139  %wide.load = load <4 x i8>, ptr %src, align 1
140  %0 = sext <4 x i8> %wide.load to <4 x i32>
141  store <4 x i32> %0, ptr %dest, align 4
142  ret void
143}
144
145define void @foo_int16_int8(ptr %dest, ptr readonly %src, i32 %n) {
146; CHECK-LABEL: foo_int16_int8:
147; CHECK:       @ %bb.0: @ %entry
148; CHECK-NEXT:    vldrb.s16 q0, [r1]
149; CHECK-NEXT:    vstrh.16 q0, [r0]
150; CHECK-NEXT:    bx lr
151entry:
152  %wide.load = load <8 x i8>, ptr %src, align 1
153  %0 = sext <8 x i8> %wide.load to <8 x i16>
154  store <8 x i16> %0, ptr %dest, align 2
155  ret void
156}
157
158define void @foo_int32_int16(ptr %dest, ptr readonly %src, i32 %n) {
159; CHECK-LABEL: foo_int32_int16:
160; CHECK:       @ %bb.0: @ %entry
161; CHECK-NEXT:    vldrh.s32 q0, [r1]
162; CHECK-NEXT:    vstrw.32 q0, [r0]
163; CHECK-NEXT:    bx lr
164entry:
165  %wide.load = load <4 x i16>, ptr %src, align 2
166  %0 = sext <4 x i16> %wide.load to <4 x i32>
167  store <4 x i32> %0, ptr %dest, align 4
168  ret void
169}
170
171define void @foo_int32_int8_double(ptr %dest, ptr readonly %src, i32 %n) {
172; CHECK-LABEL: foo_int32_int8_double:
173; CHECK:       @ %bb.0: @ %entry
174; CHECK-NEXT:    vldrb.s32 q0, [r1]
175; CHECK-NEXT:    vldrb.s32 q1, [r1, #4]
176; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
177; CHECK-NEXT:    vstrw.32 q0, [r0]
178; CHECK-NEXT:    bx lr
179entry:
180  %wide.load = load <8 x i8>, ptr %src, align 1
181  %0 = sext <8 x i8> %wide.load to <8 x i32>
182  store <8 x i32> %0, ptr %dest, align 4
183  ret void
184}
185
186define void @foo_int16_int8_double(ptr %dest, ptr readonly %src, i32 %n) {
187; CHECK-LABEL: foo_int16_int8_double:
188; CHECK:       @ %bb.0: @ %entry
189; CHECK-NEXT:    vldrb.s16 q0, [r1]
190; CHECK-NEXT:    vldrb.s16 q1, [r1, #8]
191; CHECK-NEXT:    vstrh.16 q1, [r0, #16]
192; CHECK-NEXT:    vstrh.16 q0, [r0]
193; CHECK-NEXT:    bx lr
194entry:
195  %wide.load = load <16 x i8>, ptr %src, align 1
196  %0 = sext <16 x i8> %wide.load to <16 x i16>
197  store <16 x i16> %0, ptr %dest, align 2
198  ret void
199}
200
201define void @foo_int32_int16_double(ptr %dest, ptr readonly %src, i32 %n) {
202; CHECK-LABEL: foo_int32_int16_double:
203; CHECK:       @ %bb.0: @ %entry
204; CHECK-NEXT:    vldrh.s32 q0, [r1]
205; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
206; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
207; CHECK-NEXT:    vstrw.32 q0, [r0]
208; CHECK-NEXT:    bx lr
209entry:
210  %wide.load = load <8 x i16>, ptr %src, align 2
211  %0 = sext <8 x i16> %wide.load to <8 x i32>
212  store <8 x i32> %0, ptr %dest, align 4
213  ret void
214}
215
216define void @foo_int32_int8_quad(ptr %dest, ptr readonly %src, i32 %n) {
217; CHECK-LABEL: foo_int32_int8_quad:
218; CHECK:       @ %bb.0: @ %entry
219; CHECK-NEXT:    vldrb.s32 q0, [r1]
220; CHECK-NEXT:    vldrb.s32 q1, [r1, #4]
221; CHECK-NEXT:    vldrb.s32 q2, [r1, #8]
222; CHECK-NEXT:    vldrb.s32 q3, [r1, #12]
223; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
224; CHECK-NEXT:    vstrw.32 q0, [r0]
225; CHECK-NEXT:    vstrw.32 q3, [r0, #48]
226; CHECK-NEXT:    vstrw.32 q2, [r0, #32]
227; CHECK-NEXT:    bx lr
228entry:
229  %wide.load = load <16 x i8>, ptr %src, align 1
230  %0 = sext <16 x i8> %wide.load to <16 x i32>
231  store <16 x i32> %0, ptr %dest, align 4
232  ret void
233}
234
235
236define void @foo_uint32_uint8(ptr %dest, ptr readonly %src, i32 %n) {
237; CHECK-LABEL: foo_uint32_uint8:
238; CHECK:       @ %bb.0: @ %entry
239; CHECK-NEXT:    vldrb.u32 q0, [r1]
240; CHECK-NEXT:    vstrw.32 q0, [r0]
241; CHECK-NEXT:    bx lr
242entry:
243  %wide.load = load <4 x i8>, ptr %src, align 1
244  %0 = zext <4 x i8> %wide.load to <4 x i32>
245  store <4 x i32> %0, ptr %dest, align 4
246  ret void
247}
248
249define void @foo_uint16_uint8(ptr %dest, ptr readonly %src, i32 %n) {
250; CHECK-LABEL: foo_uint16_uint8:
251; CHECK:       @ %bb.0: @ %entry
252; CHECK-NEXT:    vldrb.u16 q0, [r1]
253; CHECK-NEXT:    vstrh.16 q0, [r0]
254; CHECK-NEXT:    bx lr
255entry:
256  %wide.load = load <8 x i8>, ptr %src, align 1
257  %0 = zext <8 x i8> %wide.load to <8 x i16>
258  store <8 x i16> %0, ptr %dest, align 2
259  ret void
260}
261
262define void @foo_uint32_uint16(ptr %dest, ptr readonly %src, i32 %n) {
263; CHECK-LABEL: foo_uint32_uint16:
264; CHECK:       @ %bb.0: @ %entry
265; CHECK-NEXT:    vldrh.u32 q0, [r1]
266; CHECK-NEXT:    vstrw.32 q0, [r0]
267; CHECK-NEXT:    bx lr
268entry:
269  %wide.load = load <4 x i16>, ptr %src, align 2
270  %0 = zext <4 x i16> %wide.load to <4 x i32>
271  store <4 x i32> %0, ptr %dest, align 4
272  ret void
273}
274
275
276define void @foo_uint32_uint8_double(ptr %dest, ptr readonly %src, i32 %n) {
277; CHECK-LABEL: foo_uint32_uint8_double:
278; CHECK:       @ %bb.0: @ %entry
279; CHECK-NEXT:    vldrb.u32 q0, [r1]
280; CHECK-NEXT:    vldrb.u32 q1, [r1, #4]
281; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
282; CHECK-NEXT:    vstrw.32 q0, [r0]
283; CHECK-NEXT:    bx lr
284entry:
285  %wide.load = load <8 x i8>, ptr %src, align 1
286  %0 = zext <8 x i8> %wide.load to <8 x i32>
287  store <8 x i32> %0, ptr %dest, align 4
288  ret void
289}
290
291define void @foo_uint16_uint8_double(ptr %dest, ptr readonly %src, i32 %n) {
292; CHECK-LABEL: foo_uint16_uint8_double:
293; CHECK:       @ %bb.0: @ %entry
294; CHECK-NEXT:    vldrb.u16 q0, [r1]
295; CHECK-NEXT:    vldrb.u16 q1, [r1, #8]
296; CHECK-NEXT:    vstrh.16 q1, [r0, #16]
297; CHECK-NEXT:    vstrh.16 q0, [r0]
298; CHECK-NEXT:    bx lr
299entry:
300  %wide.load = load <16 x i8>, ptr %src, align 1
301  %0 = zext <16 x i8> %wide.load to <16 x i16>
302  store <16 x i16> %0, ptr %dest, align 2
303  ret void
304}
305
306define void @foo_uint32_uint16_double(ptr %dest, ptr readonly %src, i32 %n) {
307; CHECK-LABEL: foo_uint32_uint16_double:
308; CHECK:       @ %bb.0: @ %entry
309; CHECK-NEXT:    vldrh.u32 q0, [r1]
310; CHECK-NEXT:    vldrh.u32 q1, [r1, #8]
311; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
312; CHECK-NEXT:    vstrw.32 q0, [r0]
313; CHECK-NEXT:    bx lr
314entry:
315  %wide.load = load <8 x i16>, ptr %src, align 2
316  %0 = zext <8 x i16> %wide.load to <8 x i32>
317  store <8 x i32> %0, ptr %dest, align 4
318  ret void
319}
320
321define void @foo_uint32_uint8_quad(ptr %dest, ptr readonly %src, i32 %n) {
322; CHECK-LABEL: foo_uint32_uint8_quad:
323; CHECK:       @ %bb.0: @ %entry
324; CHECK-NEXT:    vldrb.u32 q0, [r1]
325; CHECK-NEXT:    vldrb.u32 q1, [r1, #4]
326; CHECK-NEXT:    vldrb.u32 q2, [r1, #8]
327; CHECK-NEXT:    vldrb.u32 q3, [r1, #12]
328; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
329; CHECK-NEXT:    vstrw.32 q0, [r0]
330; CHECK-NEXT:    vstrw.32 q3, [r0, #48]
331; CHECK-NEXT:    vstrw.32 q2, [r0, #32]
332; CHECK-NEXT:    bx lr
333entry:
334  %wide.load = load <16 x i8>, ptr %src, align 1
335  %0 = zext <16 x i8> %wide.load to <16 x i32>
336  store <16 x i32> %0, ptr %dest, align 4
337  ret void
338}
339
340
341define void @foo_int32_int8_both(ptr %dest, ptr readonly %src, i32 %n) {
342; CHECK-LE-LABEL: foo_int32_int8_both:
343; CHECK-LE:       @ %bb.0: @ %entry
344; CHECK-LE-NEXT:    .pad #32
345; CHECK-LE-NEXT:    sub sp, #32
346; CHECK-LE-NEXT:    vldrb.s16 q0, [r1, #8]
347; CHECK-LE-NEXT:    add r2, sp, #16
348; CHECK-LE-NEXT:    vstrw.32 q0, [r2]
349; CHECK-LE-NEXT:    vldrb.s16 q0, [r1]
350; CHECK-LE-NEXT:    mov r1, sp
351; CHECK-LE-NEXT:    vstrw.32 q0, [r1]
352; CHECK-LE-NEXT:    vldrh.u32 q0, [r2, #8]
353; CHECK-LE-NEXT:    vstrw.32 q0, [r0, #48]
354; CHECK-LE-NEXT:    vldrh.u32 q0, [r2]
355; CHECK-LE-NEXT:    vstrw.32 q0, [r0, #32]
356; CHECK-LE-NEXT:    vldrh.u32 q0, [r1, #8]
357; CHECK-LE-NEXT:    vstrw.32 q0, [r0, #16]
358; CHECK-LE-NEXT:    vldrh.u32 q0, [r1]
359; CHECK-LE-NEXT:    vstrw.32 q0, [r0]
360; CHECK-LE-NEXT:    add sp, #32
361; CHECK-LE-NEXT:    bx lr
362;
363; CHECK-BE-LABEL: foo_int32_int8_both:
364; CHECK-BE:       @ %bb.0: @ %entry
365; CHECK-BE-NEXT:    .pad #32
366; CHECK-BE-NEXT:    sub sp, #32
367; CHECK-BE-NEXT:    vldrb.s16 q0, [r1, #8]
368; CHECK-BE-NEXT:    add r2, sp, #16
369; CHECK-BE-NEXT:    vstrh.16 q0, [r2]
370; CHECK-BE-NEXT:    vldrb.s16 q0, [r1]
371; CHECK-BE-NEXT:    mov r1, sp
372; CHECK-BE-NEXT:    vstrh.16 q0, [r1]
373; CHECK-BE-NEXT:    vldrh.u32 q0, [r2, #8]
374; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #48]
375; CHECK-BE-NEXT:    vldrh.u32 q0, [r2]
376; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #32]
377; CHECK-BE-NEXT:    vldrh.u32 q0, [r1, #8]
378; CHECK-BE-NEXT:    vstrw.32 q0, [r0, #16]
379; CHECK-BE-NEXT:    vldrh.u32 q0, [r1]
380; CHECK-BE-NEXT:    vstrw.32 q0, [r0]
381; CHECK-BE-NEXT:    add sp, #32
382; CHECK-BE-NEXT:    bx lr
383entry:
384  %wide.load = load <16 x i8>, ptr %src, align 1
385  %0 = sext <16 x i8> %wide.load to <16 x i16>
386  %1 = zext <16 x i16> %0 to <16 x i32>
387  store <16 x i32> %1, ptr %dest, align 4
388  ret void
389}
390
391define ptr @foo_uint32_uint16_double_offset(ptr %dest, ptr readonly %src, i32 %n) {
392; CHECK-LABEL: foo_uint32_uint16_double_offset:
393; CHECK:       @ %bb.0: @ %entry
394; CHECK-NEXT:    vldrh.s32 q0, [r1, #16]!
395; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
396; CHECK-NEXT:    vstrw.32 q0, [r0]
397; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
398; CHECK-NEXT:    mov r0, r1
399; CHECK-NEXT:    bx lr
400entry:
401  %z = getelementptr inbounds <8 x i16>, ptr %src, i32 1
402  %wide.load = load <8 x i16>, ptr %z, align 2
403  %0 = sext <8 x i16> %wide.load to <8 x i32>
404  store <8 x i32> %0, ptr %dest, align 4
405  ret ptr %z
406}
407
408define ptr @foo_uint32_uint16_quad_offset(ptr %dest, ptr readonly %src, i32 %n) {
409; CHECK-LABEL: foo_uint32_uint16_quad_offset:
410; CHECK:       @ %bb.0: @ %entry
411; CHECK-NEXT:    vldrh.s32 q0, [r1, #32]!
412; CHECK-NEXT:    vldrh.s32 q1, [r1, #8]
413; CHECK-NEXT:    vldrh.s32 q2, [r1, #24]
414; CHECK-NEXT:    vldrh.s32 q3, [r1, #16]
415; CHECK-NEXT:    vstrw.32 q0, [r0]
416; CHECK-NEXT:    vstrw.32 q2, [r0, #48]
417; CHECK-NEXT:    vstrw.32 q1, [r0, #16]
418; CHECK-NEXT:    vstrw.32 q3, [r0, #32]
419; CHECK-NEXT:    mov r0, r1
420; CHECK-NEXT:    bx lr
421entry:
422  %z = getelementptr inbounds <16 x i16>, ptr %src, i32 1
423  %wide.load = load <16 x i16>, ptr %z, align 2
424  %0 = sext <16 x i16> %wide.load to <16 x i32>
425  store <16 x i32> %0, ptr %dest, align 4
426  ret ptr %z
427}
428
429
430define void @foo_int16_int32_align1(ptr %dest, ptr readonly %src, i32 %n) {
431; CHECK-LABEL: foo_int16_int32_align1:
432; CHECK:       @ %bb.0: @ %entry
433; CHECK-NEXT:    .pad #8
434; CHECK-NEXT:    sub sp, #8
435; CHECK-NEXT:    vldrw.u32 q0, [r1]
436; CHECK-NEXT:    mov r1, sp
437; CHECK-NEXT:    vstrh.32 q0, [r1]
438; CHECK-NEXT:    ldrd r1, r2, [sp]
439; CHECK-NEXT:    str r1, [r0]
440; CHECK-NEXT:    str r2, [r0, #4]
441; CHECK-NEXT:    add sp, #8
442; CHECK-NEXT:    bx lr
443entry:
444  %wide.load = load <4 x i32>, ptr %src, align 4
445  %0 = trunc <4 x i32> %wide.load to <4 x i16>
446  store <4 x i16> %0, ptr %dest, align 1
447  ret void
448}
449
450define void @foo_int32_int16_align1(ptr %dest, ptr readonly %src, i32 %n) {
451; CHECK-LABEL: foo_int32_int16_align1:
452; CHECK:       @ %bb.0: @ %entry
453; CHECK-NEXT:    .pad #8
454; CHECK-NEXT:    sub sp, #8
455; CHECK-NEXT:    ldr r2, [r1]
456; CHECK-NEXT:    ldr r1, [r1, #4]
457; CHECK-NEXT:    strd r2, r1, [sp]
458; CHECK-NEXT:    mov r1, sp
459; CHECK-NEXT:    vldrh.s32 q0, [r1]
460; CHECK-NEXT:    vstrw.32 q0, [r0]
461; CHECK-NEXT:    add sp, #8
462; CHECK-NEXT:    bx lr
463entry:
464  %wide.load = load <4 x i16>, ptr %src, align 1
465  %0 = sext <4 x i16> %wide.load to <4 x i32>
466  store <4 x i32> %0, ptr %dest, align 4
467  ret void
468}
469
470define void @foo_uint32_uint16_align1(ptr %dest, ptr readonly %src, i32 %n) {
471; CHECK-LABEL: foo_uint32_uint16_align1:
472; CHECK:       @ %bb.0: @ %entry
473; CHECK-NEXT:    .pad #8
474; CHECK-NEXT:    sub sp, #8
475; CHECK-NEXT:    ldr r2, [r1]
476; CHECK-NEXT:    ldr r1, [r1, #4]
477; CHECK-NEXT:    strd r2, r1, [sp]
478; CHECK-NEXT:    mov r1, sp
479; CHECK-NEXT:    vldrh.u32 q0, [r1]
480; CHECK-NEXT:    vstrw.32 q0, [r0]
481; CHECK-NEXT:    add sp, #8
482; CHECK-NEXT:    bx lr
483entry:
484  %wide.load = load <4 x i16>, ptr %src, align 1
485  %0 = zext <4 x i16> %wide.load to <4 x i32>
486  store <4 x i32> %0, ptr %dest, align 4
487  ret void
488}
489