xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll (revision 8570685d3b5a71d9a65a8c37a88fb0184d9b131c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-LE
3; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE
4
5define <2 x i16> @test0(ptr %i16_ptr, i64 %inc) {
6; CHECK-LE-LABEL: test0:
7; CHECK-LE:       // %bb.0:
8; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
9; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
10; CHECK-LE-NEXT:    ret
11;
12; CHECK-BE-LABEL: test0:
13; CHECK-BE:       // %bb.0:
14; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
15; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
16; CHECK-BE-NEXT:    ret
17  %i_0 = load i16, ptr %i16_ptr
18  %v0 = insertelement <2 x i16> undef, i16 %i_0, i32 0
19  ret <2 x i16> %v0
20}
21
22define <2 x i16> @test1(ptr %v2i16_ptr) {
23; CHECK-LE-LABEL: test1:
24; CHECK-LE:       // %bb.0:
25; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
26; CHECK-LE-NEXT:    add x8, x0, #2
27; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
28; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
29; CHECK-LE-NEXT:    ret
30;
31; CHECK-BE-LABEL: test1:
32; CHECK-BE:       // %bb.0:
33; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
34; CHECK-BE-NEXT:    add x8, x0, #2
35; CHECK-BE-NEXT:    ld1 { v0.h }[2], [x8]
36; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
37; CHECK-BE-NEXT:    ret
38  %v2i16 = load <2 x i16>, ptr %v2i16_ptr
39  ret <2 x i16> %v2i16
40}
41
42define <2 x i16> @test2(ptr %i16_ptr, i64 %inc) {
43; CHECK-LE-LABEL: test2:
44; CHECK-LE:       // %bb.0:
45; CHECK-LE-NEXT:    ld1 { v0.h }[0], [x0]
46; CHECK-LE-NEXT:    add x8, x0, x1, lsl #1
47; CHECK-LE-NEXT:    ld1 { v0.h }[2], [x8]
48; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
49; CHECK-LE-NEXT:    ret
50;
51; CHECK-BE-LABEL: test2:
52; CHECK-BE:       // %bb.0:
53; CHECK-BE-NEXT:    ld1 { v0.h }[0], [x0]
54; CHECK-BE-NEXT:    add x8, x0, x1, lsl #1
55; CHECK-BE-NEXT:    ld1 { v0.h }[2], [x8]
56; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
57; CHECK-BE-NEXT:    ret
58  %i_0 = load i16, ptr %i16_ptr
59  %i16_ptr_inc = getelementptr i16, ptr %i16_ptr, i64 %inc
60  %i_1 = load i16, ptr %i16_ptr_inc
61  %v0 = insertelement <2 x i16> undef, i16 %i_0, i32 0
62  %v1 = insertelement <2 x i16> %v0, i16 %i_1, i32 1
63  ret <2 x i16> %v1
64}
65
66define <2 x i8> @test3(ptr %v2i8_ptr) {
67; CHECK-LE-LABEL: test3:
68; CHECK-LE:       // %bb.0:
69; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
70; CHECK-LE-NEXT:    add x8, x0, #1
71; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
72; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
73; CHECK-LE-NEXT:    ret
74;
75; CHECK-BE-LABEL: test3:
76; CHECK-BE:       // %bb.0:
77; CHECK-BE-NEXT:    ld1 { v0.b }[0], [x0]
78; CHECK-BE-NEXT:    add x8, x0, #1
79; CHECK-BE-NEXT:    ld1 { v0.b }[4], [x8]
80; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
81; CHECK-BE-NEXT:    ret
82  %v2i8 = load <2 x i8>, ptr %v2i8_ptr
83  ret <2 x i8> %v2i8
84}
85
86define <4 x i8> @test4(ptr %v4i8_ptr) {
87; CHECK-LE-LABEL: test4:
88; CHECK-LE:       // %bb.0:
89; CHECK-LE-NEXT:    ldr s0, [x0]
90; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
91; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
92; CHECK-LE-NEXT:    ret
93;
94; CHECK-BE-LABEL: test4:
95; CHECK-BE:       // %bb.0:
96; CHECK-BE-NEXT:    ldr s0, [x0]
97; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
98; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
99; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
100; CHECK-BE-NEXT:    ret
101  %v4i8 = load <4 x i8>, ptr %v4i8_ptr
102  ret <4 x i8> %v4i8
103}
104
105define <2 x i32> @fsext_v2i32(ptr %a) {
106; CHECK-LE-LABEL: fsext_v2i32:
107; CHECK-LE:       // %bb.0:
108; CHECK-LE-NEXT:    ldrsb w8, [x0]
109; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
110; CHECK-LE-NEXT:    fmov s0, w8
111; CHECK-LE-NEXT:    mov v0.s[1], w9
112; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
113; CHECK-LE-NEXT:    ret
114;
115; CHECK-BE-LABEL: fsext_v2i32:
116; CHECK-BE:       // %bb.0:
117; CHECK-BE-NEXT:    ldrsb w8, [x0]
118; CHECK-BE-NEXT:    ldrsb w9, [x0, #1]
119; CHECK-BE-NEXT:    fmov s0, w8
120; CHECK-BE-NEXT:    mov v0.s[1], w9
121; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
122; CHECK-BE-NEXT:    ret
123  %x = load <2 x i8>, ptr %a
124  %y = sext <2 x i8> %x to <2 x i32>
125  ret <2 x i32> %y
126}
127
128define <3 x i32> @fsext_v3i32(ptr %a) {
129; CHECK-LE-LABEL: fsext_v3i32:
130; CHECK-LE:       // %bb.0:
131; CHECK-LE-NEXT:    ldr s0, [x0]
132; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
133; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
134; CHECK-LE-NEXT:    shl v0.4s, v0.4s, #24
135; CHECK-LE-NEXT:    sshr v0.4s, v0.4s, #24
136; CHECK-LE-NEXT:    ret
137;
138; CHECK-BE-LABEL: fsext_v3i32:
139; CHECK-BE:       // %bb.0:
140; CHECK-BE-NEXT:    ldr s0, [x0]
141; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
142; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
143; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
144; CHECK-BE-NEXT:    shl v0.4s, v0.4s, #24
145; CHECK-BE-NEXT:    sshr v0.4s, v0.4s, #24
146; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
147; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
148; CHECK-BE-NEXT:    ret
149  %x = load <3 x i8>, ptr %a
150  %y = sext <3 x i8> %x to <3 x i32>
151  ret <3 x i32> %y
152}
153
154define <4 x i32> @fsext_v4i32(ptr %a) {
155; CHECK-LE-LABEL: fsext_v4i32:
156; CHECK-LE:       // %bb.0:
157; CHECK-LE-NEXT:    ldr s0, [x0]
158; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
159; CHECK-LE-NEXT:    sshll v0.4s, v0.4h, #0
160; CHECK-LE-NEXT:    ret
161;
162; CHECK-BE-LABEL: fsext_v4i32:
163; CHECK-BE:       // %bb.0:
164; CHECK-BE-NEXT:    ldr s0, [x0]
165; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
166; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
167; CHECK-BE-NEXT:    sshll v0.4s, v0.4h, #0
168; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
169; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
170; CHECK-BE-NEXT:    ret
171  %x = load <4 x i8>, ptr %a
172  %y = sext <4 x i8> %x to <4 x i32>
173  ret <4 x i32> %y
174}
175
176define <8 x i32> @fsext_v8i32(ptr %a) {
177; CHECK-LE-LABEL: fsext_v8i32:
178; CHECK-LE:       // %bb.0:
179; CHECK-LE-NEXT:    ldr d0, [x0]
180; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
181; CHECK-LE-NEXT:    sshll2 v1.4s, v0.8h, #0
182; CHECK-LE-NEXT:    sshll v0.4s, v0.4h, #0
183; CHECK-LE-NEXT:    ret
184;
185; CHECK-BE-LABEL: fsext_v8i32:
186; CHECK-BE:       // %bb.0:
187; CHECK-BE-NEXT:    ld1 { v0.8b }, [x0]
188; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
189; CHECK-BE-NEXT:    sshll v1.4s, v0.4h, #0
190; CHECK-BE-NEXT:    sshll2 v0.4s, v0.8h, #0
191; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
192; CHECK-BE-NEXT:    rev64 v2.4s, v1.4s
193; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
194; CHECK-BE-NEXT:    ext v0.16b, v2.16b, v2.16b, #8
195; CHECK-BE-NEXT:    ret
196  %x = load <8 x i8>, ptr %a
197  %y = sext <8 x i8> %x to <8 x i32>
198  ret <8 x i32> %y
199}
200
201define <4 x i32> @fzext_v4i32(ptr %a) {
202; CHECK-LE-LABEL: fzext_v4i32:
203; CHECK-LE:       // %bb.0:
204; CHECK-LE-NEXT:    ldr s0, [x0]
205; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
206; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
207; CHECK-LE-NEXT:    ret
208;
209; CHECK-BE-LABEL: fzext_v4i32:
210; CHECK-BE:       // %bb.0:
211; CHECK-BE-NEXT:    ldr s0, [x0]
212; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
213; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
214; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
215; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
216; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
217; CHECK-BE-NEXT:    ret
218  %x = load <4 x i8>, ptr %a
219  %y = zext <4 x i8> %x to <4 x i32>
220  ret <4 x i32> %y
221}
222
223; TODO: This codegen could just be:
224;   ldrb w0, [x0]
225;
226define i32 @loadExti32(ptr %ref) {
227; CHECK-LE-LABEL: loadExti32:
228; CHECK-LE:       // %bb.0:
229; CHECK-LE-NEXT:    ldr s0, [x0]
230; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
231; CHECK-LE-NEXT:    umov w8, v0.h[0]
232; CHECK-LE-NEXT:    and w0, w8, #0xff
233; CHECK-LE-NEXT:    ret
234;
235; CHECK-BE-LABEL: loadExti32:
236; CHECK-BE:       // %bb.0:
237; CHECK-BE-NEXT:    ldr s0, [x0]
238; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
239; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
240; CHECK-BE-NEXT:    umov w8, v0.h[0]
241; CHECK-BE-NEXT:    and w0, w8, #0xff
242; CHECK-BE-NEXT:    ret
243  %a = load <4 x i8>, ptr %ref
244  %vecext = extractelement <4 x i8> %a, i32 0
245  %conv = zext i8 %vecext to i32
246  ret i32 %conv
247}
248
249define <2 x i16> @fsext_v2i16(ptr %a) {
250; CHECK-LE-LABEL: fsext_v2i16:
251; CHECK-LE:       // %bb.0:
252; CHECK-LE-NEXT:    ldrsb w8, [x0]
253; CHECK-LE-NEXT:    ldrsb w9, [x0, #1]
254; CHECK-LE-NEXT:    fmov s0, w8
255; CHECK-LE-NEXT:    mov v0.s[1], w9
256; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
257; CHECK-LE-NEXT:    ret
258;
259; CHECK-BE-LABEL: fsext_v2i16:
260; CHECK-BE:       // %bb.0:
261; CHECK-BE-NEXT:    ldrsb w8, [x0]
262; CHECK-BE-NEXT:    ldrsb w9, [x0, #1]
263; CHECK-BE-NEXT:    fmov s0, w8
264; CHECK-BE-NEXT:    mov v0.s[1], w9
265; CHECK-BE-NEXT:    rev64 v0.2s, v0.2s
266; CHECK-BE-NEXT:    ret
267  %x = load <2 x i8>, ptr %a
268  %y = sext <2 x i8> %x to <2 x i16>
269  ret <2 x i16> %y
270}
271
272define <3 x i16> @fsext_v3i16(ptr %a) {
273; CHECK-LE-LABEL: fsext_v3i16:
274; CHECK-LE:       // %bb.0:
275; CHECK-LE-NEXT:    ldr s0, [x0]
276; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
277; CHECK-LE-NEXT:    shl v0.4h, v0.4h, #8
278; CHECK-LE-NEXT:    sshr v0.4h, v0.4h, #8
279; CHECK-LE-NEXT:    ret
280;
281; CHECK-BE-LABEL: fsext_v3i16:
282; CHECK-BE:       // %bb.0:
283; CHECK-BE-NEXT:    ldr s0, [x0]
284; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
285; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
286; CHECK-BE-NEXT:    shl v0.4h, v0.4h, #8
287; CHECK-BE-NEXT:    sshr v0.4h, v0.4h, #8
288; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
289; CHECK-BE-NEXT:    ret
290  %x = load <3 x i8>, ptr %a
291  %y = sext <3 x i8> %x to <3 x i16>
292  ret <3 x i16> %y
293}
294
295define <4 x i16> @fsext_v4i16(ptr %a) {
296; CHECK-LE-LABEL: fsext_v4i16:
297; CHECK-LE:       // %bb.0:
298; CHECK-LE-NEXT:    ldr s0, [x0]
299; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
300; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
301; CHECK-LE-NEXT:    ret
302;
303; CHECK-BE-LABEL: fsext_v4i16:
304; CHECK-BE:       // %bb.0:
305; CHECK-BE-NEXT:    ldr s0, [x0]
306; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
307; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
308; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
309; CHECK-BE-NEXT:    ret
310  %x = load <4 x i8>, ptr %a
311  %y = sext <4 x i8> %x to <4 x i16>
312  ret <4 x i16> %y
313}
314
315define <8 x i16> @fsext_v8i16(ptr %a) {
316; CHECK-LE-LABEL: fsext_v8i16:
317; CHECK-LE:       // %bb.0:
318; CHECK-LE-NEXT:    ldr d0, [x0]
319; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
320; CHECK-LE-NEXT:    ret
321;
322; CHECK-BE-LABEL: fsext_v8i16:
323; CHECK-BE:       // %bb.0:
324; CHECK-BE-NEXT:    ld1 { v0.8b }, [x0]
325; CHECK-BE-NEXT:    sshll v0.8h, v0.8b, #0
326; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
327; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
328; CHECK-BE-NEXT:    ret
329  %x = load <8 x i8>, ptr %a
330  %y = sext <8 x i8> %x to <8 x i16>
331  ret <8 x i16> %y
332}
333
334define <16 x i16> @fsext_v16i16(ptr %a) {
335; CHECK-LE-LABEL: fsext_v16i16:
336; CHECK-LE:       // %bb.0:
337; CHECK-LE-NEXT:    ldr q0, [x0]
338; CHECK-LE-NEXT:    sshll2 v1.8h, v0.16b, #0
339; CHECK-LE-NEXT:    sshll v0.8h, v0.8b, #0
340; CHECK-LE-NEXT:    ret
341;
342; CHECK-BE-LABEL: fsext_v16i16:
343; CHECK-BE:       // %bb.0:
344; CHECK-BE-NEXT:    ld1 { v0.16b }, [x0]
345; CHECK-BE-NEXT:    sshll v1.8h, v0.8b, #0
346; CHECK-BE-NEXT:    sshll2 v0.8h, v0.16b, #0
347; CHECK-BE-NEXT:    rev64 v0.8h, v0.8h
348; CHECK-BE-NEXT:    rev64 v2.8h, v1.8h
349; CHECK-BE-NEXT:    ext v1.16b, v0.16b, v0.16b, #8
350; CHECK-BE-NEXT:    ext v0.16b, v2.16b, v2.16b, #8
351; CHECK-BE-NEXT:    ret
352  %x = load <16 x i8>, ptr %a
353  %y = sext <16 x i8> %x to <16 x i16>
354  ret <16 x i16> %y
355}
356
357define <4 x i16> @fzext_v4i16(ptr %a) {
358; CHECK-LE-LABEL: fzext_v4i16:
359; CHECK-LE:       // %bb.0:
360; CHECK-LE-NEXT:    ldr s0, [x0]
361; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
362; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
363; CHECK-LE-NEXT:    ret
364;
365; CHECK-BE-LABEL: fzext_v4i16:
366; CHECK-BE:       // %bb.0:
367; CHECK-BE-NEXT:    ldr s0, [x0]
368; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
369; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
370; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
371; CHECK-BE-NEXT:    ret
372  %x = load <4 x i8>, ptr %a
373  %y = zext <4 x i8> %x to <4 x i16>
374  ret <4 x i16> %y
375}
376
377define <4 x i16> @anyext_v4i16(ptr %a, ptr %b) {
378; CHECK-LE-LABEL: anyext_v4i16:
379; CHECK-LE:       // %bb.0:
380; CHECK-LE-NEXT:    ldr s0, [x0]
381; CHECK-LE-NEXT:    ldr s1, [x1]
382; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
383; CHECK-LE-NEXT:    shl v0.4h, v0.4h, #8
384; CHECK-LE-NEXT:    sshr v0.4h, v0.4h, #8
385; CHECK-LE-NEXT:    ret
386;
387; CHECK-BE-LABEL: anyext_v4i16:
388; CHECK-BE:       // %bb.0:
389; CHECK-BE-NEXT:    ldr s0, [x0]
390; CHECK-BE-NEXT:    ldr s1, [x1]
391; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
392; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
393; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
394; CHECK-BE-NEXT:    shl v0.4h, v0.4h, #8
395; CHECK-BE-NEXT:    sshr v0.4h, v0.4h, #8
396; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
397; CHECK-BE-NEXT:    ret
398  %x = load <4 x i8>, ptr %a, align 4
399  %y = load <4 x i8>, ptr %b, align 4
400  %z = add <4 x i8> %x, %y
401  %s = sext <4 x i8> %z to <4 x i16>
402  ret <4 x i16> %s
403}
404
405define <4 x i32> @anyext_v4i32(ptr %a, ptr %b) {
406; CHECK-LE-LABEL: anyext_v4i32:
407; CHECK-LE:       // %bb.0:
408; CHECK-LE-NEXT:    ldr s0, [x0]
409; CHECK-LE-NEXT:    ldr s1, [x1]
410; CHECK-LE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
411; CHECK-LE-NEXT:    ushll v0.4s, v0.4h, #0
412; CHECK-LE-NEXT:    shl v0.4s, v0.4s, #24
413; CHECK-LE-NEXT:    sshr v0.4s, v0.4s, #24
414; CHECK-LE-NEXT:    ret
415;
416; CHECK-BE-LABEL: anyext_v4i32:
417; CHECK-BE:       // %bb.0:
418; CHECK-BE-NEXT:    ldr s0, [x0]
419; CHECK-BE-NEXT:    ldr s1, [x1]
420; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
421; CHECK-BE-NEXT:    rev32 v1.8b, v1.8b
422; CHECK-BE-NEXT:    uaddl v0.8h, v0.8b, v1.8b
423; CHECK-BE-NEXT:    ushll v0.4s, v0.4h, #0
424; CHECK-BE-NEXT:    shl v0.4s, v0.4s, #24
425; CHECK-BE-NEXT:    sshr v0.4s, v0.4s, #24
426; CHECK-BE-NEXT:    rev64 v0.4s, v0.4s
427; CHECK-BE-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
428; CHECK-BE-NEXT:    ret
429  %x = load <4 x i8>, ptr %a, align 4
430  %y = load <4 x i8>, ptr %b, align 4
431  %z = add <4 x i8> %x, %y
432  %s = sext <4 x i8> %z to <4 x i32>
433  ret <4 x i32> %s
434}
435
436define <4 x i8> @bitcast(i32 %0) {
437; CHECK-LE-LABEL: bitcast:
438; CHECK-LE:       // %bb.0:
439; CHECK-LE-NEXT:    fmov s0, w0
440; CHECK-LE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
441; CHECK-LE-NEXT:    ret
442;
443; CHECK-BE-LABEL: bitcast:
444; CHECK-BE:       // %bb.0:
445; CHECK-BE-NEXT:    fmov s0, w0
446; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
447; CHECK-BE-NEXT:    zip1 v0.8b, v0.8b, v0.8b
448; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
449; CHECK-BE-NEXT:    ret
450  %2 = bitcast i32 %0 to <4 x i8>
451  ret <4 x i8> %2
452}
453
454define <4 x i8> @strict_align_aligned(ptr %v4i8_ptr) "target-features"="+strict-align" {
455; CHECK-LE-LABEL: strict_align_aligned:
456; CHECK-LE:       // %bb.0:
457; CHECK-LE-NEXT:    ldr s0, [x0]
458; CHECK-LE-NEXT:    ushll v0.8h, v0.8b, #0
459; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
460; CHECK-LE-NEXT:    ret
461;
462; CHECK-BE-LABEL: strict_align_aligned:
463; CHECK-BE:       // %bb.0:
464; CHECK-BE-NEXT:    ldr s0, [x0]
465; CHECK-BE-NEXT:    rev32 v0.8b, v0.8b
466; CHECK-BE-NEXT:    ushll v0.8h, v0.8b, #0
467; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
468; CHECK-BE-NEXT:    ret
469  %v4i8 = load <4 x i8>, ptr %v4i8_ptr, align 4
470  ret <4 x i8> %v4i8
471}
472
473define <4 x i8> @strict_align_unaligned(ptr %v4i8_ptr) "target-features"="+strict-align" {
474; CHECK-LE-LABEL: strict_align_unaligned:
475; CHECK-LE:       // %bb.0:
476; CHECK-LE-NEXT:    ld1 { v0.b }[0], [x0]
477; CHECK-LE-NEXT:    add x8, x0, #1
478; CHECK-LE-NEXT:    ld1 { v0.b }[2], [x8]
479; CHECK-LE-NEXT:    add x8, x0, #2
480; CHECK-LE-NEXT:    ld1 { v0.b }[4], [x8]
481; CHECK-LE-NEXT:    add x8, x0, #3
482; CHECK-LE-NEXT:    ld1 { v0.b }[6], [x8]
483; CHECK-LE-NEXT:    // kill: def $d0 killed $d0 killed $q0
484; CHECK-LE-NEXT:    ret
485;
486; CHECK-BE-LABEL: strict_align_unaligned:
487; CHECK-BE:       // %bb.0:
488; CHECK-BE-NEXT:    ld1 { v0.b }[0], [x0]
489; CHECK-BE-NEXT:    add x8, x0, #1
490; CHECK-BE-NEXT:    ld1 { v0.b }[2], [x8]
491; CHECK-BE-NEXT:    add x8, x0, #2
492; CHECK-BE-NEXT:    ld1 { v0.b }[4], [x8]
493; CHECK-BE-NEXT:    add x8, x0, #3
494; CHECK-BE-NEXT:    ld1 { v0.b }[6], [x8]
495; CHECK-BE-NEXT:    rev64 v0.4h, v0.4h
496; CHECK-BE-NEXT:    ret
497  %v4i8 = load <4 x i8>, ptr %v4i8_ptr, align 1
498  ret <4 x i8> %v4i8
499}
500