xref: /llvm-project/llvm/test/CodeGen/ARM/aes-erratum-fix.ll (revision 5f058398ab7a6c2cf3555daf190d3d13d68f78f5)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED
3
4; These CPUs should have the fix enabled by default. They use different
5; FileCheck prefixes because some instructions are scheduled differently.
6;
7; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
8; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX
9
10; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be
11; inserted wherever the compiler cannot prove that either input to the first aes
12; instruction in a fused aes pair was set by 64-bit Neon register writes or
13; 128-bit Neon register writes. All other register writes are unsafe, and
14; require a `vorr` to protect the AES input.
15
16declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>)
17declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>)
18declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>)
19declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>)
20
21declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr
22declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr
23declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr
24
25
26
27define arm_aapcs_vfpcc void @aese_zero(ptr %0) nounwind {
28; CHECK-FIX-LABEL: aese_zero:
29; CHECK-FIX:       @ %bb.0:
30; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
31; CHECK-FIX-NEXT:    vmov.i32 q9, #0x0
32; CHECK-FIX-NEXT:    aese.8 q9, q8
33; CHECK-FIX-NEXT:    aesmc.8 q8, q9
34; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
35; CHECK-FIX-NEXT:    bx lr
36  %2 = load <16 x i8>, ptr %0, align 8
37  %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2)
38  %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
39  store <16 x i8> %4, ptr %0, align 8
40  ret void
41}
42
43define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind {
44; CHECK-FIX-LABEL: aese_via_call1:
45; CHECK-FIX:       @ %bb.0:
46; CHECK-FIX-NEXT:    .save {r4, lr}
47; CHECK-FIX-NEXT:    push {r4, lr}
48; CHECK-FIX-NEXT:    mov r4, r0
49; CHECK-FIX-NEXT:    bl get_input
50; CHECK-FIX-NEXT:    vorr q0, q0, q0
51; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
52; CHECK-FIX-NEXT:    aese.8 q8, q0
53; CHECK-FIX-NEXT:    aesmc.8 q8, q8
54; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
55; CHECK-FIX-NEXT:    pop {r4, pc}
56  %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
57  %3 = load <16 x i8>, ptr %0, align 8
58  %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3)
59  %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4)
60  store <16 x i8> %5, ptr %0, align 8
61  ret void
62}
63
64define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind {
65; CHECK-FIX-LABEL: aese_via_call2:
66; CHECK-FIX:       @ %bb.0:
67; CHECK-FIX-NEXT:    .save {r4, lr}
68; CHECK-FIX-NEXT:    push {r4, lr}
69; CHECK-FIX-NEXT:    mov r4, r0
70; CHECK-FIX-NEXT:    bl get_inputf16
71; CHECK-FIX-NEXT:    vorr q0, q0, q0
72; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
73; CHECK-FIX-NEXT:    aese.8 q8, q0
74; CHECK-FIX-NEXT:    aesmc.8 q8, q8
75; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
76; CHECK-FIX-NEXT:    pop {r4, pc}
77  %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
78  %4 = load <16 x i8>, ptr %1, align 8
79  %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
80  %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
81  store <16 x i8> %6, ptr %1, align 8
82  ret void
83}
84
85define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind {
86; CHECK-FIX-LABEL: aese_via_call3:
87; CHECK-FIX:       @ %bb.0:
88; CHECK-FIX-NEXT:    .save {r4, lr}
89; CHECK-FIX-NEXT:    push {r4, lr}
90; CHECK-FIX-NEXT:    mov r4, r0
91; CHECK-FIX-NEXT:    bl get_inputf32
92; CHECK-FIX-NEXT:    vorr q0, q0, q0
93; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
94; CHECK-FIX-NEXT:    aese.8 q8, q0
95; CHECK-FIX-NEXT:    aesmc.8 q8, q8
96; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
97; CHECK-FIX-NEXT:    pop {r4, pc}
98  %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
99  %4 = load <16 x i8>, ptr %1, align 8
100  %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
101  %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
102  store <16 x i8> %6, ptr %1, align 8
103  ret void
104}
105
106define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind {
107; CHECK-FIX-LABEL: aese_once_via_ptr:
108; CHECK-FIX:       @ %bb.0:
109; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
110; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1]
111; CHECK-FIX-NEXT:    aese.8 q9, q8
112; CHECK-FIX-NEXT:    aesmc.8 q8, q9
113; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
114; CHECK-FIX-NEXT:    bx lr
115  %3 = load <16 x i8>, ptr %1, align 8
116  %4 = load <16 x i8>, ptr %0, align 8
117  %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
118  %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
119  store <16 x i8> %6, ptr %1, align 8
120  ret void
121}
122
123define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
124; CHECK-FIX-LABEL: aese_once_via_val:
125; CHECK-FIX:       @ %bb.0:
126; CHECK-FIX-NEXT:    vorr q0, q0, q0
127; CHECK-FIX-NEXT:    vorr q1, q1, q1
128; CHECK-FIX-NEXT:    aese.8 q0, q1
129; CHECK-FIX-NEXT:    aesmc.8 q0, q0
130; CHECK-FIX-NEXT:    bx lr
131  %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
132  %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
133  ret <16 x i8> %4
134}
135
136define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind {
137; CHECK-FIX-LABEL: aese_twice_via_ptr:
138; CHECK-FIX:       @ %bb.0:
139; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
140; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1]
141; CHECK-FIX-NEXT:    aese.8 q9, q8
142; CHECK-FIX-NEXT:    aesmc.8 q8, q9
143; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
144; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r0]
145; CHECK-FIX-NEXT:    aese.8 q9, q8
146; CHECK-FIX-NEXT:    aesmc.8 q8, q9
147; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
148; CHECK-FIX-NEXT:    bx lr
149  %3 = load <16 x i8>, ptr %1, align 8
150  %4 = load <16 x i8>, ptr %0, align 8
151  %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4)
152  %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
153  store <16 x i8> %6, ptr %1, align 8
154  %7 = load <16 x i8>, ptr %0, align 8
155  %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
156  %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
157  store <16 x i8> %9, ptr %1, align 8
158  ret void
159}
160
161define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
162; CHECK-FIX-LABEL: aese_twice_via_val:
163; CHECK-FIX:       @ %bb.0:
164; CHECK-FIX-NEXT:    vorr q1, q1, q1
165; CHECK-FIX-NEXT:    vorr q0, q0, q0
166; CHECK-FIX-NEXT:    vorr q0, q0, q0
167; CHECK-FIX-NEXT:    aese.8 q1, q0
168; CHECK-FIX-NEXT:    aesmc.8 q8, q1
169; CHECK-FIX-NEXT:    aese.8 q8, q0
170; CHECK-FIX-NEXT:    aesmc.8 q0, q8
171; CHECK-FIX-NEXT:    bx lr
172  %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0)
173  %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
174  %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0)
175  %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5)
176  ret <16 x i8> %6
177}
178
179define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
180; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr:
181; CHECK-FIX-NOSCHED:       @ %bb.0:
182; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
183; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
184; CHECK-FIX-NOSCHED-NEXT:  .LBB8_1: @ =>This Inner Loop Header: Depth=1
185; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
186; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
187; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r2]
188; CHECK-FIX-NOSCHED-NEXT:    aese.8 q9, q8
189; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q9
190; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
191; CHECK-FIX-NOSCHED-NEXT:    bne .LBB8_1
192; CHECK-FIX-NOSCHED-NEXT:  @ %bb.2:
193; CHECK-FIX-NOSCHED-NEXT:    bx lr
194;
195; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr:
196; CHECK-CORTEX-FIX:       @ %bb.0:
197; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
198; CHECK-CORTEX-FIX-NEXT:    bxeq lr
199; CHECK-CORTEX-FIX-NEXT:  .LBB8_1: @ =>This Inner Loop Header: Depth=1
200; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
201; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r2]
202; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
203; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
204; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
205; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
206; CHECK-CORTEX-FIX-NEXT:    bne .LBB8_1
207; CHECK-CORTEX-FIX-NEXT:  @ %bb.2:
208; CHECK-CORTEX-FIX-NEXT:    bx lr
209  %4 = icmp eq i32 %0, 0
210  br i1 %4, label %5, label %6
211
2125:
213  ret void
214
2156:
216  %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
217  %8 = load <16 x i8>, ptr %2, align 8
218  %9 = load <16 x i8>, ptr %1, align 8
219  %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9)
220  %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
221  store <16 x i8> %11, ptr %2, align 8
222  %12 = add nuw i32 %7, 1
223  %13 = icmp eq i32 %12, %0
224  br i1 %13, label %5, label %6
225}
226
227define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
228; CHECK-FIX-LABEL: aese_loop_via_val:
229; CHECK-FIX:       @ %bb.0:
230; CHECK-FIX-NEXT:    vorr q1, q1, q1
231; CHECK-FIX-NEXT:    vorr q0, q0, q0
232; CHECK-FIX-NEXT:    cmp r0, #0
233; CHECK-FIX-NEXT:    beq .LBB9_2
234; CHECK-FIX-NEXT:  .LBB9_1: @ =>This Inner Loop Header: Depth=1
235; CHECK-FIX-NEXT:    aese.8 q1, q0
236; CHECK-FIX-NEXT:    subs r0, r0, #1
237; CHECK-FIX-NEXT:    aesmc.8 q1, q1
238; CHECK-FIX-NEXT:    bne .LBB9_1
239; CHECK-FIX-NEXT:  .LBB9_2:
240; CHECK-FIX-NEXT:    vorr q0, q1, q1
241; CHECK-FIX-NEXT:    bx lr
242  %4 = icmp eq i32 %0, 0
243  br i1 %4, label %5, label %7
244
2455:
246  %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
247  ret <16 x i8> %6
248
2497:
250  %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
251  %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
252  %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1)
253  %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
254  %12 = add nuw i32 %8, 1
255  %13 = icmp eq i32 %12, %0
256  br i1 %13, label %5, label %7
257}
258
259define arm_aapcs_vfpcc void @aese_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
260; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr:
261; CHECK-FIX-NOSCHED:       @ %bb.0:
262; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
263; CHECK-FIX-NOSCHED-NEXT:    ldrb r0, [r0]
264; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
265; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d0[0], r0
266; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d16[0], r0
267; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
268; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
269; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
270; CHECK-FIX-NOSCHED-NEXT:    bx lr
271;
272; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr:
273; CHECK-CORTEX-FIX:       @ %bb.0:
274; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
275; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
276; CHECK-CORTEX-FIX-NEXT:    ldrb r0, [r0]
277; CHECK-CORTEX-FIX-NEXT:    vmov.8 d0[0], r0
278; CHECK-CORTEX-FIX-NEXT:    vmov.8 d16[0], r0
279; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
280; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
281; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
282; CHECK-CORTEX-FIX-NEXT:    bx lr
283  %4 = load i8, ptr %0, align 1
284  %5 = load <16 x i8>, ptr %2, align 8
285  %6 = insertelement <16 x i8> %5, i8 %4, i64 0
286  %7 = insertelement <16 x i8> %1, i8 %4, i64 0
287  %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7)
288  %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8)
289  store <16 x i8> %9, ptr %2, align 8
290  ret void
291}
292
293define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
294; CHECK-FIX-LABEL: aese_set8_via_val:
295; CHECK-FIX:       @ %bb.0:
296; CHECK-FIX-NEXT:    vorr q0, q0, q0
297; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
298; CHECK-FIX-NEXT:    vmov.8 d0[0], r0
299; CHECK-FIX-NEXT:    vmov.8 d16[0], r0
300; CHECK-FIX-NEXT:    aese.8 q8, q0
301; CHECK-FIX-NEXT:    aesmc.8 q8, q8
302; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
303; CHECK-FIX-NEXT:    bx lr
304  %4 = load <16 x i8>, ptr %2, align 8
305  %5 = insertelement <16 x i8> %4, i8 %0, i64 0
306  %6 = insertelement <16 x i8> %1, i8 %0, i64 0
307  %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6)
308  %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7)
309  store <16 x i8> %8, ptr %2, align 8
310  ret void
311}
312
313define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
314; CHECK-FIX-LABEL: aese_set8_cond_via_ptr:
315; CHECK-FIX:       @ %bb.0:
316; CHECK-FIX-NEXT:    vorr q0, q0, q0
317; CHECK-FIX-NEXT:    cmp r0, #0
318; CHECK-FIX-NEXT:    beq .LBB12_2
319; CHECK-FIX-NEXT:  @ %bb.1:
320; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
321; CHECK-FIX-NEXT:    vld1.8 {d16[0]}, [r1]
322; CHECK-FIX-NEXT:    cmp r0, #0
323; CHECK-FIX-NEXT:    bne .LBB12_3
324; CHECK-FIX-NEXT:    b .LBB12_4
325; CHECK-FIX-NEXT:  .LBB12_2:
326; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
327; CHECK-FIX-NEXT:    cmp r0, #0
328; CHECK-FIX-NEXT:    beq .LBB12_4
329; CHECK-FIX-NEXT:  .LBB12_3:
330; CHECK-FIX-NEXT:    vld1.8 {d0[0]}, [r1]
331; CHECK-FIX-NEXT:  .LBB12_4:
332; CHECK-FIX-NEXT:    aese.8 q8, q0
333; CHECK-FIX-NEXT:    aesmc.8 q8, q8
334; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
335; CHECK-FIX-NEXT:    bx lr
336  br i1 %0, label %5, label %9
337
3385:
339  %6 = load i8, ptr %1, align 1
340  %7 = load <16 x i8>, ptr %3, align 8
341  %8 = insertelement <16 x i8> %7, i8 %6, i64 0
342  br label %11
343
3449:
345  %10 = load <16 x i8>, ptr %3, align 8
346  br label %11
347
34811:
349  %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
350  br i1 %0, label %13, label %16
351
35213:
353  %14 = load i8, ptr %1, align 1
354  %15 = insertelement <16 x i8> %2, i8 %14, i64 0
355  br label %16
356
35716:
358  %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
359  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17)
360  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
361  store <16 x i8> %19, ptr %3, align 8
362  ret void
363}
364
365define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
366; CHECK-FIX-LABEL: aese_set8_cond_via_val:
367; CHECK-FIX:       @ %bb.0:
368; CHECK-FIX-NEXT:    vorr q0, q0, q0
369; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
370; CHECK-FIX-NEXT:    cmp r0, #0
371; CHECK-FIX-NEXT:    beq .LBB13_2
372; CHECK-FIX-NEXT:  @ %bb.1:
373; CHECK-FIX-NEXT:    vmov.8 d16[0], r1
374; CHECK-FIX-NEXT:  .LBB13_2: @ %select.end
375; CHECK-FIX-NEXT:    cmp r0, #0
376; CHECK-FIX-NEXT:    beq .LBB13_4
377; CHECK-FIX-NEXT:  @ %bb.3:
378; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
379; CHECK-FIX-NEXT:  .LBB13_4: @ %select.end2
380; CHECK-FIX-NEXT:    aese.8 q8, q0
381; CHECK-FIX-NEXT:    aesmc.8 q8, q8
382; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
383; CHECK-FIX-NEXT:    bx lr
384  %5 = load <16 x i8>, ptr %3, align 8
385  %6 = insertelement <16 x i8> %5, i8 %1, i64 0
386  %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
387  %8 = insertelement <16 x i8> %2, i8 %1, i64 0
388  %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
389  %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9)
390  %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10)
391  store <16 x i8> %11, ptr %3, align 8
392  ret void
393}
394
395define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
396; CHECK-FIX-LABEL: aese_set8_loop_via_ptr:
397; CHECK-FIX:       @ %bb.0:
398; CHECK-FIX-NEXT:    vorr q0, q0, q0
399; CHECK-FIX-NEXT:    ldrb r1, [r1]
400; CHECK-FIX-NEXT:    cmp r0, #0
401; CHECK-FIX-NEXT:    strb r1, [r2]
402; CHECK-FIX-NEXT:    bxeq lr
403; CHECK-FIX-NEXT:  .LBB14_1:
404; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
405; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
406; CHECK-FIX-NEXT:  .LBB14_2: @ =>This Inner Loop Header: Depth=1
407; CHECK-FIX-NEXT:    aese.8 q8, q0
408; CHECK-FIX-NEXT:    subs r0, r0, #1
409; CHECK-FIX-NEXT:    aesmc.8 q8, q8
410; CHECK-FIX-NEXT:    bne .LBB14_2
411; CHECK-FIX-NEXT:  @ %bb.3:
412; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
413; CHECK-FIX-NEXT:    bx lr
414  %5 = load i8, ptr %1, align 1
415  %6 = insertelement <16 x i8> %2, i8 %5, i64 0
416  %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
417  store i8 %5, ptr %7, align 8
418  %8 = icmp eq i32 %0, 0
419  br i1 %8, label %12, label %9
420
4219:
422  %10 = load <16 x i8>, ptr %3, align 8
423  br label %13
424
42511:
426  store <16 x i8> %17, ptr %3, align 8
427  br label %12
428
42912:
430  ret void
431
43213:
433  %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
434  %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
435  %16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6)
436  %17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16)
437  %18 = add nuw i32 %15, 1
438  %19 = icmp eq i32 %18, %0
439  br i1 %19, label %11, label %13
440}
441
442define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
443; CHECK-FIX-LABEL: aese_set8_loop_via_val:
444; CHECK-FIX:       @ %bb.0:
445; CHECK-FIX-NEXT:    vorr q0, q0, q0
446; CHECK-FIX-NEXT:    cmp r0, #0
447; CHECK-FIX-NEXT:    bxeq lr
448; CHECK-FIX-NEXT:  .LBB15_1:
449; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
450; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
451; CHECK-FIX-NEXT:  .LBB15_2: @ =>This Inner Loop Header: Depth=1
452; CHECK-FIX-NEXT:    vmov.8 d16[0], r1
453; CHECK-FIX-NEXT:    subs r0, r0, #1
454; CHECK-FIX-NEXT:    aese.8 q8, q0
455; CHECK-FIX-NEXT:    aesmc.8 q8, q8
456; CHECK-FIX-NEXT:    bne .LBB15_2
457; CHECK-FIX-NEXT:  @ %bb.3:
458; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
459; CHECK-FIX-NEXT:    bx lr
460  %5 = icmp eq i32 %0, 0
461  br i1 %5, label %10, label %6
462
4636:
464  %7 = insertelement <16 x i8> %2, i8 %1, i64 0
465  %8 = load <16 x i8>, ptr %3, align 8
466  br label %11
467
4689:
469  store <16 x i8> %16, ptr %3, align 8
470  br label %10
471
47210:
473  ret void
474
47511:
476  %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
477  %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
478  %14 = insertelement <16 x i8> %12, i8 %1, i64 0
479  %15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7)
480  %16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15)
481  %17 = add nuw i32 %13, 1
482  %18 = icmp eq i32 %17, %0
483  br i1 %18, label %9, label %11
484}
485
486define arm_aapcs_vfpcc void @aese_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
487; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr:
488; CHECK-FIX-NOSCHED:       @ %bb.0:
489; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
490; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0]
491; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
492; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0
493; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0
494; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
495; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
496; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
497; CHECK-FIX-NOSCHED-NEXT:    bx lr
498;
499; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr:
500; CHECK-CORTEX-FIX:       @ %bb.0:
501; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
502; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
503; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0]
504; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0
505; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0
506; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
507; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
508; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
509; CHECK-CORTEX-FIX-NEXT:    bx lr
510  %4 = load i16, ptr %0, align 2
511  %5 = bitcast ptr %2 to ptr
512  %6 = load <8 x i16>, ptr %5, align 8
513  %7 = insertelement <8 x i16> %6, i16 %4, i64 0
514  %8 = bitcast <8 x i16> %7 to <16 x i8>
515  %9 = bitcast <16 x i8> %1 to <8 x i16>
516  %10 = insertelement <8 x i16> %9, i16 %4, i64 0
517  %11 = bitcast <8 x i16> %10 to <16 x i8>
518  %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
519  %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
520  store <16 x i8> %13, ptr %2, align 8
521  ret void
522}
523
524define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
525; CHECK-FIX-LABEL: aese_set16_via_val:
526; CHECK-FIX:       @ %bb.0:
527; CHECK-FIX-NEXT:    vorr q0, q0, q0
528; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
529; CHECK-FIX-NEXT:    vmov.16 d0[0], r0
530; CHECK-FIX-NEXT:    vmov.16 d16[0], r0
531; CHECK-FIX-NEXT:    aese.8 q8, q0
532; CHECK-FIX-NEXT:    aesmc.8 q8, q8
533; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
534; CHECK-FIX-NEXT:    bx lr
535  %4 = bitcast ptr %2 to ptr
536  %5 = load <8 x i16>, ptr %4, align 8
537  %6 = insertelement <8 x i16> %5, i16 %0, i64 0
538  %7 = bitcast <8 x i16> %6 to <16 x i8>
539  %8 = bitcast <16 x i8> %1 to <8 x i16>
540  %9 = insertelement <8 x i16> %8, i16 %0, i64 0
541  %10 = bitcast <8 x i16> %9 to <16 x i8>
542  %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
543  %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
544  store <16 x i8> %12, ptr %2, align 8
545  ret void
546}
547
548define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
549; CHECK-FIX-LABEL: aese_set16_cond_via_ptr:
550; CHECK-FIX:       @ %bb.0:
551; CHECK-FIX-NEXT:    vorr q0, q0, q0
552; CHECK-FIX-NEXT:    cmp r0, #0
553; CHECK-FIX-NEXT:    beq .LBB18_2
554; CHECK-FIX-NEXT:  @ %bb.1:
555; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
556; CHECK-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16]
557; CHECK-FIX-NEXT:    cmp r0, #0
558; CHECK-FIX-NEXT:    bne .LBB18_3
559; CHECK-FIX-NEXT:    b .LBB18_4
560; CHECK-FIX-NEXT:  .LBB18_2:
561; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
562; CHECK-FIX-NEXT:    cmp r0, #0
563; CHECK-FIX-NEXT:    beq .LBB18_4
564; CHECK-FIX-NEXT:  .LBB18_3:
565; CHECK-FIX-NEXT:    vld1.16 {d0[0]}, [r1:16]
566; CHECK-FIX-NEXT:  .LBB18_4:
567; CHECK-FIX-NEXT:    aese.8 q8, q0
568; CHECK-FIX-NEXT:    aesmc.8 q8, q8
569; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
570; CHECK-FIX-NEXT:    bx lr
571  br i1 %0, label %5, label %10
572
5735:
574  %6 = load i16, ptr %1, align 2
575  %7 = bitcast ptr %3 to ptr
576  %8 = load <8 x i16>, ptr %7, align 8
577  %9 = insertelement <8 x i16> %8, i16 %6, i64 0
578  br label %13
579
58010:
581  %11 = bitcast ptr %3 to ptr
582  %12 = load <8 x i16>, ptr %11, align 8
583  br label %13
584
58513:
586  %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
587  br i1 %0, label %15, label %19
588
58915:
590  %16 = load i16, ptr %1, align 2
591  %17 = bitcast <16 x i8> %2 to <8 x i16>
592  %18 = insertelement <8 x i16> %17, i16 %16, i64 0
593  br label %21
594
59519:
596  %20 = bitcast <16 x i8> %2 to <8 x i16>
597  br label %21
598
59921:
600  %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
601  %23 = bitcast <8 x i16> %14 to <16 x i8>
602  %24 = bitcast <8 x i16> %22 to <16 x i8>
603  %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
604  %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
605  store <16 x i8> %26, ptr %3, align 8
606  ret void
607}
608
609define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
610; CHECK-FIX-LABEL: aese_set16_cond_via_val:
611; CHECK-FIX:       @ %bb.0:
612; CHECK-FIX-NEXT:    vorr q0, q0, q0
613; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
614; CHECK-FIX-NEXT:    cmp r0, #0
615; CHECK-FIX-NEXT:    beq .LBB19_2
616; CHECK-FIX-NEXT:  @ %bb.1:
617; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
618; CHECK-FIX-NEXT:  .LBB19_2: @ %select.end
619; CHECK-FIX-NEXT:    cmp r0, #0
620; CHECK-FIX-NEXT:    beq .LBB19_4
621; CHECK-FIX-NEXT:  @ %bb.3:
622; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
623; CHECK-FIX-NEXT:  .LBB19_4: @ %select.end2
624; CHECK-FIX-NEXT:    aese.8 q8, q0
625; CHECK-FIX-NEXT:    aesmc.8 q8, q8
626; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
627; CHECK-FIX-NEXT:    bx lr
628  %5 = bitcast ptr %3 to ptr
629  %6 = load <8 x i16>, ptr %5, align 8
630  %7 = insertelement <8 x i16> %6, i16 %1, i64 0
631  %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
632  %9 = bitcast <16 x i8> %2 to <8 x i16>
633  %10 = insertelement <8 x i16> %9, i16 %1, i64 0
634  %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
635  %12 = bitcast <8 x i16> %8 to <16 x i8>
636  %13 = bitcast <8 x i16> %11 to <16 x i8>
637  %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
638  %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
639  store <16 x i8> %15, ptr %3, align 8
640  ret void
641}
642
643define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
644; CHECK-FIX-LABEL: aese_set16_loop_via_ptr:
645; CHECK-FIX:       @ %bb.0:
646; CHECK-FIX-NEXT:    vorr q0, q0, q0
647; CHECK-FIX-NEXT:    ldrh r1, [r1]
648; CHECK-FIX-NEXT:    cmp r0, #0
649; CHECK-FIX-NEXT:    strh r1, [r2]
650; CHECK-FIX-NEXT:    bxeq lr
651; CHECK-FIX-NEXT:  .LBB20_1:
652; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
653; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
654; CHECK-FIX-NEXT:  .LBB20_2: @ =>This Inner Loop Header: Depth=1
655; CHECK-FIX-NEXT:    aese.8 q8, q0
656; CHECK-FIX-NEXT:    subs r0, r0, #1
657; CHECK-FIX-NEXT:    aesmc.8 q8, q8
658; CHECK-FIX-NEXT:    bne .LBB20_2
659; CHECK-FIX-NEXT:  @ %bb.3:
660; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
661; CHECK-FIX-NEXT:    bx lr
662  %5 = load i16, ptr %1, align 2
663  %6 = bitcast <16 x i8> %2 to <8 x i16>
664  %7 = insertelement <8 x i16> %6, i16 %5, i64 0
665  %8 = bitcast <8 x i16> %7 to <16 x i8>
666  %9 = bitcast ptr %3 to ptr
667  store i16 %5, ptr %9, align 8
668  %10 = icmp eq i32 %0, 0
669  br i1 %10, label %14, label %11
670
67111:
672  %12 = load <16 x i8>, ptr %3, align 8
673  br label %15
674
67513:
676  store <16 x i8> %19, ptr %3, align 8
677  br label %14
678
67914:
680  ret void
681
68215:
683  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
684  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
685  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
686  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
687  %20 = add nuw i32 %17, 1
688  %21 = icmp eq i32 %20, %0
689  br i1 %21, label %13, label %15
690}
691
692define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
693; CHECK-FIX-LABEL: aese_set16_loop_via_val:
694; CHECK-FIX:       @ %bb.0:
695; CHECK-FIX-NEXT:    vorr q0, q0, q0
696; CHECK-FIX-NEXT:    cmp r0, #0
697; CHECK-FIX-NEXT:    bxeq lr
698; CHECK-FIX-NEXT:  .LBB21_1:
699; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
700; CHECK-FIX-NEXT:  .LBB21_2: @ =>This Inner Loop Header: Depth=1
701; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
702; CHECK-FIX-NEXT:    subs r0, r0, #1
703; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
704; CHECK-FIX-NEXT:    aese.8 q8, q0
705; CHECK-FIX-NEXT:    aesmc.8 q8, q8
706; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
707; CHECK-FIX-NEXT:    bne .LBB21_2
708; CHECK-FIX-NEXT:  @ %bb.3:
709; CHECK-FIX-NEXT:    bx lr
710  %5 = icmp eq i32 %0, 0
711  br i1 %5, label %12, label %6
712
7136:
714  %7 = bitcast <16 x i8> %2 to <8 x i16>
715  %8 = insertelement <8 x i16> %7, i16 %1, i64 0
716  %9 = bitcast <8 x i16> %8 to <16 x i8>
717  %10 = bitcast ptr %3 to ptr
718  %11 = bitcast ptr %3 to ptr
719  br label %13
720
72112:
722  ret void
723
72413:
725  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
726  %15 = load <8 x i16>, ptr %10, align 8
727  %16 = insertelement <8 x i16> %15, i16 %1, i64 0
728  %17 = bitcast <8 x i16> %16 to <16 x i8>
729  store i16 %1, ptr %11, align 8
730  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
731  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
732  store <16 x i8> %19, ptr %3, align 8
733  %20 = add nuw i32 %14, 1
734  %21 = icmp eq i32 %20, %0
735  br i1 %21, label %12, label %13
736}
737
738define arm_aapcs_vfpcc void @aese_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
739; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr:
740; CHECK-FIX-NOSCHED:       @ %bb.0:
741; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
742; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [r0]
743; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
744; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d0[0], r0
745; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
746; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
747; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
748; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
749; CHECK-FIX-NOSCHED-NEXT:    bx lr
750;
751; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr:
752; CHECK-CORTEX-FIX:       @ %bb.0:
753; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
754; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
755; CHECK-CORTEX-FIX-NEXT:    ldr r0, [r0]
756; CHECK-CORTEX-FIX-NEXT:    vmov.32 d0[0], r0
757; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r0
758; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
759; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
760; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
761; CHECK-CORTEX-FIX-NEXT:    bx lr
762  %4 = load i32, ptr %0, align 4
763  %5 = bitcast ptr %2 to ptr
764  %6 = load <4 x i32>, ptr %5, align 8
765  %7 = insertelement <4 x i32> %6, i32 %4, i64 0
766  %8 = bitcast <4 x i32> %7 to <16 x i8>
767  %9 = bitcast <16 x i8> %1 to <4 x i32>
768  %10 = insertelement <4 x i32> %9, i32 %4, i64 0
769  %11 = bitcast <4 x i32> %10 to <16 x i8>
770  %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
771  %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
772  store <16 x i8> %13, ptr %2, align 8
773  ret void
774}
775
776define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
777; CHECK-FIX-LABEL: aese_set32_via_val:
778; CHECK-FIX:       @ %bb.0:
779; CHECK-FIX-NEXT:    vorr q0, q0, q0
780; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
781; CHECK-FIX-NEXT:    vmov.32 d0[0], r0
782; CHECK-FIX-NEXT:    vmov.32 d16[0], r0
783; CHECK-FIX-NEXT:    aese.8 q8, q0
784; CHECK-FIX-NEXT:    aesmc.8 q8, q8
785; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
786; CHECK-FIX-NEXT:    bx lr
787  %4 = bitcast ptr %2 to ptr
788  %5 = load <4 x i32>, ptr %4, align 8
789  %6 = insertelement <4 x i32> %5, i32 %0, i64 0
790  %7 = bitcast <4 x i32> %6 to <16 x i8>
791  %8 = bitcast <16 x i8> %1 to <4 x i32>
792  %9 = insertelement <4 x i32> %8, i32 %0, i64 0
793  %10 = bitcast <4 x i32> %9 to <16 x i8>
794  %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
795  %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
796  store <16 x i8> %12, ptr %2, align 8
797  ret void
798}
799
800define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
801; CHECK-FIX-LABEL: aese_set32_cond_via_ptr:
802; CHECK-FIX:       @ %bb.0:
803; CHECK-FIX-NEXT:    vorr q0, q0, q0
804; CHECK-FIX-NEXT:    cmp r0, #0
805; CHECK-FIX-NEXT:    beq .LBB24_2
806; CHECK-FIX-NEXT:  @ %bb.1:
807; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
808; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
809; CHECK-FIX-NEXT:    cmp r0, #0
810; CHECK-FIX-NEXT:    bne .LBB24_3
811; CHECK-FIX-NEXT:    b .LBB24_4
812; CHECK-FIX-NEXT:  .LBB24_2:
813; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
814; CHECK-FIX-NEXT:    cmp r0, #0
815; CHECK-FIX-NEXT:    beq .LBB24_4
816; CHECK-FIX-NEXT:  .LBB24_3:
817; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32]
818; CHECK-FIX-NEXT:  .LBB24_4:
819; CHECK-FIX-NEXT:    aese.8 q8, q0
820; CHECK-FIX-NEXT:    aesmc.8 q8, q8
821; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
822; CHECK-FIX-NEXT:    bx lr
823  br i1 %0, label %5, label %10
824
8255:
826  %6 = load i32, ptr %1, align 4
827  %7 = bitcast ptr %3 to ptr
828  %8 = load <4 x i32>, ptr %7, align 8
829  %9 = insertelement <4 x i32> %8, i32 %6, i64 0
830  br label %13
831
83210:
833  %11 = bitcast ptr %3 to ptr
834  %12 = load <4 x i32>, ptr %11, align 8
835  br label %13
836
83713:
838  %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
839  br i1 %0, label %15, label %19
840
84115:
842  %16 = load i32, ptr %1, align 4
843  %17 = bitcast <16 x i8> %2 to <4 x i32>
844  %18 = insertelement <4 x i32> %17, i32 %16, i64 0
845  br label %21
846
84719:
848  %20 = bitcast <16 x i8> %2 to <4 x i32>
849  br label %21
850
85121:
852  %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
853  %23 = bitcast <4 x i32> %14 to <16 x i8>
854  %24 = bitcast <4 x i32> %22 to <16 x i8>
855  %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
856  %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
857  store <16 x i8> %26, ptr %3, align 8
858  ret void
859}
860
861define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
862; CHECK-FIX-LABEL: aese_set32_cond_via_val:
863; CHECK-FIX:       @ %bb.0:
864; CHECK-FIX-NEXT:    vorr q0, q0, q0
865; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
866; CHECK-FIX-NEXT:    cmp r0, #0
867; CHECK-FIX-NEXT:    beq .LBB25_2
868; CHECK-FIX-NEXT:  @ %bb.1:
869; CHECK-FIX-NEXT:    vmov.32 d16[0], r1
870; CHECK-FIX-NEXT:  .LBB25_2: @ %select.end
871; CHECK-FIX-NEXT:    cmp r0, #0
872; CHECK-FIX-NEXT:    beq .LBB25_4
873; CHECK-FIX-NEXT:  @ %bb.3:
874; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
875; CHECK-FIX-NEXT:  .LBB25_4: @ %select.end2
876; CHECK-FIX-NEXT:    aese.8 q8, q0
877; CHECK-FIX-NEXT:    aesmc.8 q8, q8
878; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
879; CHECK-FIX-NEXT:    bx lr
880  %5 = bitcast ptr %3 to ptr
881  %6 = load <4 x i32>, ptr %5, align 8
882  %7 = insertelement <4 x i32> %6, i32 %1, i64 0
883  %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
884  %9 = bitcast <16 x i8> %2 to <4 x i32>
885  %10 = insertelement <4 x i32> %9, i32 %1, i64 0
886  %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
887  %12 = bitcast <4 x i32> %8 to <16 x i8>
888  %13 = bitcast <4 x i32> %11 to <16 x i8>
889  %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
890  %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
891  store <16 x i8> %15, ptr %3, align 8
892  ret void
893}
894
895define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
896; CHECK-FIX-LABEL: aese_set32_loop_via_ptr:
897; CHECK-FIX:       @ %bb.0:
898; CHECK-FIX-NEXT:    vorr q0, q0, q0
899; CHECK-FIX-NEXT:    ldr r1, [r1]
900; CHECK-FIX-NEXT:    cmp r0, #0
901; CHECK-FIX-NEXT:    str r1, [r2]
902; CHECK-FIX-NEXT:    bxeq lr
903; CHECK-FIX-NEXT:  .LBB26_1:
904; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
905; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
906; CHECK-FIX-NEXT:  .LBB26_2: @ =>This Inner Loop Header: Depth=1
907; CHECK-FIX-NEXT:    aese.8 q8, q0
908; CHECK-FIX-NEXT:    subs r0, r0, #1
909; CHECK-FIX-NEXT:    aesmc.8 q8, q8
910; CHECK-FIX-NEXT:    bne .LBB26_2
911; CHECK-FIX-NEXT:  @ %bb.3:
912; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
913; CHECK-FIX-NEXT:    bx lr
914  %5 = load i32, ptr %1, align 4
915  %6 = bitcast <16 x i8> %2 to <4 x i32>
916  %7 = insertelement <4 x i32> %6, i32 %5, i64 0
917  %8 = bitcast <4 x i32> %7 to <16 x i8>
918  %9 = bitcast ptr %3 to ptr
919  store i32 %5, ptr %9, align 8
920  %10 = icmp eq i32 %0, 0
921  br i1 %10, label %14, label %11
922
92311:
924  %12 = load <16 x i8>, ptr %3, align 8
925  br label %15
926
92713:
928  store <16 x i8> %19, ptr %3, align 8
929  br label %14
930
93114:
932  ret void
933
93415:
935  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
936  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
937  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
938  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
939  %20 = add nuw i32 %17, 1
940  %21 = icmp eq i32 %20, %0
941  br i1 %21, label %13, label %15
942}
943
944define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
945; CHECK-FIX-LABEL: aese_set32_loop_via_val:
946; CHECK-FIX:       @ %bb.0:
947; CHECK-FIX-NEXT:    vorr q0, q0, q0
948; CHECK-FIX-NEXT:    cmp r0, #0
949; CHECK-FIX-NEXT:    bxeq lr
950; CHECK-FIX-NEXT:  .LBB27_1:
951; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
952; CHECK-FIX-NEXT:  .LBB27_2: @ =>This Inner Loop Header: Depth=1
953; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
954; CHECK-FIX-NEXT:    subs r0, r0, #1
955; CHECK-FIX-NEXT:    vmov.32 d16[0], r1
956; CHECK-FIX-NEXT:    aese.8 q8, q0
957; CHECK-FIX-NEXT:    aesmc.8 q8, q8
958; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
959; CHECK-FIX-NEXT:    bne .LBB27_2
960; CHECK-FIX-NEXT:  @ %bb.3:
961; CHECK-FIX-NEXT:    bx lr
962  %5 = icmp eq i32 %0, 0
963  br i1 %5, label %12, label %6
964
9656:
966  %7 = bitcast <16 x i8> %2 to <4 x i32>
967  %8 = insertelement <4 x i32> %7, i32 %1, i64 0
968  %9 = bitcast <4 x i32> %8 to <16 x i8>
969  %10 = bitcast ptr %3 to ptr
970  %11 = bitcast ptr %3 to ptr
971  br label %13
972
97312:
974  ret void
975
97613:
977  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
978  %15 = load <4 x i32>, ptr %10, align 8
979  %16 = insertelement <4 x i32> %15, i32 %1, i64 0
980  %17 = bitcast <4 x i32> %16 to <16 x i8>
981  store i32 %1, ptr %11, align 8
982  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
983  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
984  store <16 x i8> %19, ptr %3, align 8
985  %20 = add nuw i32 %14, 1
986  %21 = icmp eq i32 %20, %0
987  br i1 %21, label %12, label %13
988}
989
990define arm_aapcs_vfpcc void @aese_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
991; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr:
992; CHECK-FIX-NOSCHED:       @ %bb.0:
993; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
994; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
995; CHECK-FIX-NOSCHED-NEXT:    vldr d0, [r0]
996; CHECK-FIX-NOSCHED-NEXT:    vorr d16, d0, d0
997; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
998; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
999; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
1000; CHECK-FIX-NOSCHED-NEXT:    bx lr
1001;
1002; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr:
1003; CHECK-CORTEX-FIX:       @ %bb.0:
1004; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
1005; CHECK-CORTEX-FIX-NEXT:    vldr d0, [r0]
1006; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1007; CHECK-CORTEX-FIX-NEXT:    vorr d16, d0, d0
1008; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
1009; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
1010; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1011; CHECK-CORTEX-FIX-NEXT:    bx lr
1012  %4 = load i64, ptr %0, align 8
1013  %5 = bitcast ptr %2 to ptr
1014  %6 = load <2 x i64>, ptr %5, align 8
1015  %7 = insertelement <2 x i64> %6, i64 %4, i64 0
1016  %8 = bitcast <2 x i64> %7 to <16 x i8>
1017  %9 = bitcast <16 x i8> %1 to <2 x i64>
1018  %10 = insertelement <2 x i64> %9, i64 %4, i64 0
1019  %11 = bitcast <2 x i64> %10 to <16 x i8>
1020  %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1021  %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1022  store <16 x i8> %13, ptr %2, align 8
1023  ret void
1024}
1025
1026define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
1027; CHECK-FIX-LABEL: aese_set64_via_val:
1028; CHECK-FIX:       @ %bb.0:
1029; CHECK-FIX-NEXT:    vorr q0, q0, q0
1030; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1031; CHECK-FIX-NEXT:    vmov.32 d0[0], r0
1032; CHECK-FIX-NEXT:    vmov.32 d16[0], r0
1033; CHECK-FIX-NEXT:    vmov.32 d0[1], r1
1034; CHECK-FIX-NEXT:    vmov.32 d16[1], r1
1035; CHECK-FIX-NEXT:    aese.8 q8, q0
1036; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1037; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1038; CHECK-FIX-NEXT:    bx lr
1039  %4 = bitcast ptr %2 to ptr
1040  %5 = load <2 x i64>, ptr %4, align 8
1041  %6 = insertelement <2 x i64> %5, i64 %0, i64 0
1042  %7 = bitcast <2 x i64> %6 to <16 x i8>
1043  %8 = bitcast <16 x i8> %1 to <2 x i64>
1044  %9 = insertelement <2 x i64> %8, i64 %0, i64 0
1045  %10 = bitcast <2 x i64> %9 to <16 x i8>
1046  %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1047  %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1048  store <16 x i8> %12, ptr %2, align 8
1049  ret void
1050}
1051
1052define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1053; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr:
1054; CHECK-FIX-NOSCHED:       @ %bb.0:
1055; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1056; CHECK-FIX-NOSCHED-NEXT:    beq .LBB30_2
1057; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
1058; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
1059; CHECK-FIX-NOSCHED-NEXT:    vldr d16, [r1]
1060; CHECK-FIX-NOSCHED-NEXT:    b .LBB30_3
1061; CHECK-FIX-NOSCHED-NEXT:  .LBB30_2:
1062; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
1063; CHECK-FIX-NOSCHED-NEXT:  .LBB30_3:
1064; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1065; CHECK-FIX-NOSCHED-NEXT:    vldrne d0, [r1]
1066; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
1067; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
1068; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
1069; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
1070; CHECK-FIX-NOSCHED-NEXT:    bx lr
1071;
1072; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr:
1073; CHECK-CORTEX-FIX:       @ %bb.0:
1074; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1075; CHECK-CORTEX-FIX-NEXT:    beq .LBB30_2
1076; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
1077; CHECK-CORTEX-FIX-NEXT:    vldr d18, [r1]
1078; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1079; CHECK-CORTEX-FIX-NEXT:    vorr d16, d18, d18
1080; CHECK-CORTEX-FIX-NEXT:    b .LBB30_3
1081; CHECK-CORTEX-FIX-NEXT:  .LBB30_2:
1082; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1083; CHECK-CORTEX-FIX-NEXT:  .LBB30_3:
1084; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1085; CHECK-CORTEX-FIX-NEXT:    vldrne d0, [r1]
1086; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
1087; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
1088; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
1089; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1090; CHECK-CORTEX-FIX-NEXT:    bx lr
1091  br i1 %0, label %5, label %10
1092
10935:
1094  %6 = load i64, ptr %1, align 8
1095  %7 = bitcast ptr %3 to ptr
1096  %8 = load <2 x i64>, ptr %7, align 8
1097  %9 = insertelement <2 x i64> %8, i64 %6, i64 0
1098  br label %13
1099
110010:
1101  %11 = bitcast ptr %3 to ptr
1102  %12 = load <2 x i64>, ptr %11, align 8
1103  br label %13
1104
110513:
1106  %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
1107  br i1 %0, label %15, label %19
1108
110915:
1110  %16 = load i64, ptr %1, align 8
1111  %17 = bitcast <16 x i8> %2 to <2 x i64>
1112  %18 = insertelement <2 x i64> %17, i64 %16, i64 0
1113  br label %21
1114
111519:
1116  %20 = bitcast <16 x i8> %2 to <2 x i64>
1117  br label %21
1118
111921:
1120  %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
1121  %23 = bitcast <2 x i64> %14 to <16 x i8>
1122  %24 = bitcast <2 x i64> %22 to <16 x i8>
1123  %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
1124  %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
1125  store <16 x i8> %26, ptr %3, align 8
1126  ret void
1127}
1128
1129define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
1130; CHECK-FIX-LABEL: aese_set64_cond_via_val:
1131; CHECK-FIX:       @ %bb.0:
1132; CHECK-FIX-NEXT:    vorr q0, q0, q0
1133; CHECK-FIX-NEXT:    ldr r1, [sp]
1134; CHECK-FIX-NEXT:    cmp r0, #0
1135; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1136; CHECK-FIX-NEXT:    beq .LBB31_2
1137; CHECK-FIX-NEXT:  @ %bb.1:
1138; CHECK-FIX-NEXT:    vmov.32 d16[0], r2
1139; CHECK-FIX-NEXT:    vmov.32 d16[1], r3
1140; CHECK-FIX-NEXT:  .LBB31_2: @ %select.end
1141; CHECK-FIX-NEXT:    cmp r0, #0
1142; CHECK-FIX-NEXT:    beq .LBB31_4
1143; CHECK-FIX-NEXT:  @ %bb.3:
1144; CHECK-FIX-NEXT:    vmov.32 d0[0], r2
1145; CHECK-FIX-NEXT:    vmov.32 d0[1], r3
1146; CHECK-FIX-NEXT:  .LBB31_4: @ %select.end2
1147; CHECK-FIX-NEXT:    aese.8 q8, q0
1148; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1149; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1150; CHECK-FIX-NEXT:    bx lr
1151  %5 = bitcast ptr %3 to ptr
1152  %6 = load <2 x i64>, ptr %5, align 8
1153  %7 = insertelement <2 x i64> %6, i64 %1, i64 0
1154  %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
1155  %9 = bitcast <16 x i8> %2 to <2 x i64>
1156  %10 = insertelement <2 x i64> %9, i64 %1, i64 0
1157  %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
1158  %12 = bitcast <2 x i64> %8 to <16 x i8>
1159  %13 = bitcast <2 x i64> %11 to <16 x i8>
1160  %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
1161  %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
1162  store <16 x i8> %15, ptr %3, align 8
1163  ret void
1164}
1165
1166define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1167; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr:
1168; CHECK-FIX-NOSCHED:       @ %bb.0:
1169; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
1170; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r11, lr}
1171; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r11, lr}
1172; CHECK-FIX-NOSCHED-NEXT:    ldrd r4, r5, [r1]
1173; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1174; CHECK-FIX-NOSCHED-NEXT:    strd r4, r5, [r2]
1175; CHECK-FIX-NOSCHED-NEXT:    beq .LBB32_4
1176; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
1177; CHECK-FIX-NOSCHED-NEXT:    vmov d0, r4, r5
1178; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
1179; CHECK-FIX-NOSCHED-NEXT:  .LBB32_2: @ =>This Inner Loop Header: Depth=1
1180; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
1181; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
1182; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
1183; CHECK-FIX-NOSCHED-NEXT:    bne .LBB32_2
1184; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
1185; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
1186; CHECK-FIX-NOSCHED-NEXT:  .LBB32_4:
1187; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r11, pc}
1188;
1189; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr:
1190; CHECK-CORTEX-FIX:       @ %bb.0:
1191; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
1192; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r11, lr}
1193; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r11, lr}
1194; CHECK-CORTEX-FIX-NEXT:    ldrd r4, r5, [r1]
1195; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1196; CHECK-CORTEX-FIX-NEXT:    strd r4, r5, [r2]
1197; CHECK-CORTEX-FIX-NEXT:    popeq {r4, r5, r11, pc}
1198; CHECK-CORTEX-FIX-NEXT:  .LBB32_1:
1199; CHECK-CORTEX-FIX-NEXT:    vmov d0, r4, r5
1200; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1201; CHECK-CORTEX-FIX-NEXT:  .LBB32_2: @ =>This Inner Loop Header: Depth=1
1202; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
1203; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
1204; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
1205; CHECK-CORTEX-FIX-NEXT:    bne .LBB32_2
1206; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
1207; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1208; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r11, pc}
1209  %5 = load i64, ptr %1, align 8
1210  %6 = bitcast <16 x i8> %2 to <2 x i64>
1211  %7 = insertelement <2 x i64> %6, i64 %5, i64 0
1212  %8 = bitcast <2 x i64> %7 to <16 x i8>
1213  %9 = bitcast ptr %3 to ptr
1214  store i64 %5, ptr %9, align 8
1215  %10 = icmp eq i32 %0, 0
1216  br i1 %10, label %14, label %11
1217
121811:
1219  %12 = load <16 x i8>, ptr %3, align 8
1220  br label %15
1221
122213:
1223  store <16 x i8> %19, ptr %3, align 8
1224  br label %14
1225
122614:
1227  ret void
1228
122915:
1230  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
1231  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
1232  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
1233  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1234  %20 = add nuw i32 %17, 1
1235  %21 = icmp eq i32 %20, %0
1236  br i1 %21, label %13, label %15
1237}
1238
1239define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
1240; CHECK-FIX-LABEL: aese_set64_loop_via_val:
1241; CHECK-FIX:       @ %bb.0:
1242; CHECK-FIX-NEXT:    vorr q0, q0, q0
1243; CHECK-FIX-NEXT:    cmp r0, #0
1244; CHECK-FIX-NEXT:    bxeq lr
1245; CHECK-FIX-NEXT:  .LBB33_1:
1246; CHECK-FIX-NEXT:    vmov.32 d0[0], r2
1247; CHECK-FIX-NEXT:    ldr r1, [sp]
1248; CHECK-FIX-NEXT:    vmov.32 d0[1], r3
1249; CHECK-FIX-NEXT:  .LBB33_2: @ =>This Inner Loop Header: Depth=1
1250; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1251; CHECK-FIX-NEXT:    subs r0, r0, #1
1252; CHECK-FIX-NEXT:    vmov.32 d16[0], r2
1253; CHECK-FIX-NEXT:    vmov.32 d16[1], r3
1254; CHECK-FIX-NEXT:    aese.8 q8, q0
1255; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1256; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1257; CHECK-FIX-NEXT:    bne .LBB33_2
1258; CHECK-FIX-NEXT:  @ %bb.3:
1259; CHECK-FIX-NEXT:    bx lr
1260  %5 = icmp eq i32 %0, 0
1261  br i1 %5, label %12, label %6
1262
12636:
1264  %7 = bitcast <16 x i8> %2 to <2 x i64>
1265  %8 = insertelement <2 x i64> %7, i64 %1, i64 0
1266  %9 = bitcast <2 x i64> %8 to <16 x i8>
1267  %10 = bitcast ptr %3 to ptr
1268  %11 = bitcast ptr %3 to ptr
1269  br label %13
1270
127112:
1272  ret void
1273
127413:
1275  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
1276  %15 = load <2 x i64>, ptr %10, align 8
1277  %16 = insertelement <2 x i64> %15, i64 %1, i64 0
1278  %17 = bitcast <2 x i64> %16 to <16 x i8>
1279  store i64 %1, ptr %11, align 8
1280  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1281  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
1282  store <16 x i8> %19, ptr %3, align 8
1283  %20 = add nuw i32 %14, 1
1284  %21 = icmp eq i32 %20, %0
1285  br i1 %21, label %12, label %13
1286}
1287
1288define arm_aapcs_vfpcc void @aese_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
1289; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr:
1290; CHECK-FIX-NOSCHED:       @ %bb.0:
1291; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
1292; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0]
1293; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
1294; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0
1295; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0
1296; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
1297; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
1298; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
1299; CHECK-FIX-NOSCHED-NEXT:    bx lr
1300;
1301; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr:
1302; CHECK-CORTEX-FIX:       @ %bb.0:
1303; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
1304; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1305; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0]
1306; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0
1307; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0
1308; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
1309; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
1310; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1311; CHECK-CORTEX-FIX-NEXT:    bx lr
1312  %4 = bitcast ptr %0 to ptr
1313  %5 = load i16, ptr %4, align 2
1314  %6 = bitcast ptr %2 to ptr
1315  %7 = load <8 x i16>, ptr %6, align 8
1316  %8 = insertelement <8 x i16> %7, i16 %5, i64 0
1317  %9 = bitcast <8 x i16> %8 to <16 x i8>
1318  %10 = bitcast <16 x i8> %1 to <8 x i16>
1319  %11 = insertelement <8 x i16> %10, i16 %5, i64 0
1320  %12 = bitcast <8 x i16> %11 to <16 x i8>
1321  %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12)
1322  %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13)
1323  store <16 x i8> %14, ptr %2, align 8
1324  ret void
1325}
1326
1327define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
1328; CHECK-FIX-LABEL: aese_setf16_via_val:
1329; CHECK-FIX:       @ %bb.0:
1330; CHECK-FIX-NEXT:    vorr q1, q1, q1
1331; CHECK-FIX-NEXT:    vmov r1, s0
1332; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
1333; CHECK-FIX-NEXT:    vmov.16 d2[0], r1
1334; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
1335; CHECK-FIX-NEXT:    aese.8 q8, q1
1336; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1337; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
1338; CHECK-FIX-NEXT:    bx lr
1339  %4 = bitcast ptr %2 to ptr
1340  %5 = load <8 x i16>, ptr %4, align 8
1341  %6 = bitcast half %0 to i16
1342  %7 = insertelement <8 x i16> %5, i16 %6, i64 0
1343  %8 = bitcast <8 x i16> %7 to <16 x i8>
1344  %9 = bitcast <16 x i8> %1 to <8 x i16>
1345  %10 = insertelement <8 x i16> %9, i16 %6, i64 0
1346  %11 = bitcast <8 x i16> %10 to <16 x i8>
1347  %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1348  %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1349  store <16 x i8> %13, ptr %2, align 8
1350  ret void
1351}
1352
1353define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1354; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr:
1355; CHECK-FIX-NOSCHED:       @ %bb.0:
1356; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1357; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1358; CHECK-FIX-NOSCHED-NEXT:    .pad #12
1359; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #12
1360; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1361; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_3
1362; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
1363; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
1364; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[3]
1365; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1]
1366; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d17[0]
1367; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d16[3]
1368; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
1369; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
1370; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[2]
1371; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
1372; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[1]
1373; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
1374; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d16[1]
1375; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1376; CHECK-FIX-NOSCHED-NEXT:    bne .LBB36_4
1377; CHECK-FIX-NOSCHED-NEXT:  .LBB36_2:
1378; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r0, d1[3]
1379; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d1[2]
1380; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d1[1]
1381; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r6, d1[0]
1382; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d0[3]
1383; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d0[2]
1384; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d0[1]
1385; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r1, d0[0]
1386; CHECK-FIX-NOSCHED-NEXT:    b .LBB36_5
1387; CHECK-FIX-NOSCHED-NEXT:  .LBB36_3:
1388; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #8
1389; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[0]}, [r2:32]
1390; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[0]}, [r3:32]
1391; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #4
1392; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[1]}, [r3:32]
1393; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #12
1394; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[1]}, [r3:32]
1395; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[3]
1396; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d17[0]
1397; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d16[3]
1398; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
1399; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d16[0]
1400; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
1401; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[2]
1402; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
1403; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[1]
1404; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
1405; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d16[1]
1406; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1407; CHECK-FIX-NOSCHED-NEXT:    beq .LBB36_2
1408; CHECK-FIX-NOSCHED-NEXT:  .LBB36_4:
1409; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r0, d1[3]
1410; CHECK-FIX-NOSCHED-NEXT:    ldrh r1, [r1]
1411; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d1[2]
1412; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d1[1]
1413; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r6, d1[0]
1414; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d0[3]
1415; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d0[2]
1416; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d0[1]
1417; CHECK-FIX-NOSCHED-NEXT:  .LBB36_5:
1418; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r1, r8, lsl #16
1419; CHECK-FIX-NOSCHED-NEXT:    pkhbt r3, r7, r3, lsl #16
1420; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r4, r0, lsl #16
1421; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r1
1422; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, lr, r12, lsl #16
1423; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r3
1424; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r1
1425; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r11, r10, lsl #16
1426; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r1
1427; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r6, r5, lsl #16
1428; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r1
1429; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp] @ 4-byte Reload
1430; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r9, r1, lsl #16
1431; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
1432; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
1433; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r1
1434; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
1435; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
1436; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
1437; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9
1438; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
1439; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
1440; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #12
1441; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1442;
1443; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr:
1444; CHECK-CORTEX-FIX:       @ %bb.0:
1445; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1446; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1447; CHECK-CORTEX-FIX-NEXT:    .pad #24
1448; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
1449; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1450; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_2
1451; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
1452; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1453; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
1454; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[0]
1455; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
1456; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
1457; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
1458; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
1459; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
1460; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
1461; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
1462; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
1463; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
1464; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
1465; CHECK-CORTEX-FIX-NEXT:    mov r3, r6
1466; CHECK-CORTEX-FIX-NEXT:    b .LBB36_3
1467; CHECK-CORTEX-FIX-NEXT:  .LBB36_2:
1468; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
1469; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r2:32]
1470; CHECK-CORTEX-FIX-NEXT:    add r7, r2, #4
1471; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r3:32]
1472; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #12
1473; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r7:32]
1474; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r3:32]
1475; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[0]
1476; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
1477; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
1478; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
1479; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
1480; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
1481; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
1482; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
1483; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
1484; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
1485; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
1486; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
1487; CHECK-CORTEX-FIX-NEXT:  .LBB36_3:
1488; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
1489; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1490; CHECK-CORTEX-FIX-NEXT:    beq .LBB36_5
1491; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
1492; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r1]
1493; CHECK-CORTEX-FIX-NEXT:    b .LBB36_6
1494; CHECK-CORTEX-FIX-NEXT:  .LBB36_5:
1495; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
1496; CHECK-CORTEX-FIX-NEXT:  .LBB36_6:
1497; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
1498; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
1499; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
1500; CHECK-CORTEX-FIX-NEXT:    pkhbt r9, r7, r4, lsl #16
1501; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
1502; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d0[1]
1503; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d0[2]
1504; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d0[3]
1505; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d1[0]
1506; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d1[1]
1507; CHECK-CORTEX-FIX-NEXT:    vmov.u16 lr, d1[2]
1508; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d1[3]
1509; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16
1510; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
1511; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
1512; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16
1513; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r11, r5, lsl #16
1514; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r3, r1, lsl #16
1515; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
1516; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16
1517; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload
1518; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4
1519; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1
1520; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
1521; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r9
1522; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r10, lsl #16
1523; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3
1524; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5
1525; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6
1526; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
1527; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
1528; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
1529; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1530; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24
1531; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1532  br i1 %0, label %5, label %12
1533
15345:
1535  %6 = bitcast ptr %1 to ptr
1536  %7 = load i16, ptr %6, align 2
1537  %8 = bitcast ptr %3 to ptr
1538  %9 = load <8 x i16>, ptr %8, align 8
1539  %10 = insertelement <8 x i16> %9, i16 %7, i64 0
1540  %11 = bitcast <8 x i16> %10 to <8 x half>
1541  br label %15
1542
154312:
1544  %13 = bitcast ptr %3 to ptr
1545  %14 = load <8 x half>, ptr %13, align 8
1546  br label %15
1547
154815:
1549  %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
1550  br i1 %0, label %17, label %23
1551
155217:
1553  %18 = bitcast ptr %1 to ptr
1554  %19 = load i16, ptr %18, align 2
1555  %20 = bitcast <16 x i8> %2 to <8 x i16>
1556  %21 = insertelement <8 x i16> %20, i16 %19, i64 0
1557  %22 = bitcast <8 x i16> %21 to <8 x half>
1558  br label %25
1559
156023:
1561  %24 = bitcast <16 x i8> %2 to <8 x half>
1562  br label %25
1563
156425:
1565  %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
1566  %27 = bitcast <8 x half> %16 to <16 x i8>
1567  %28 = bitcast <8 x half> %26 to <16 x i8>
1568  %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28)
1569  %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29)
1570  store <16 x i8> %30, ptr %3, align 8
1571  ret void
1572}
1573
1574define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
1575; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val:
1576; CHECK-FIX-NOSCHED:       @ %bb.0:
1577; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1578; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1579; CHECK-FIX-NOSCHED-NEXT:    .pad #12
1580; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #12
1581; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1582; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_2
1583; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
1584; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
1585; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s2, s0
1586; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[1]
1587; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d17[3]
1588; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d17[2]
1589; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
1590; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d16[1]
1591; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
1592; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[0]
1593; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #4] @ 4-byte Spill
1594; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[3]
1595; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp] @ 4-byte Spill
1596; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_3
1597; CHECK-FIX-NOSCHED-NEXT:  .LBB37_2:
1598; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #8
1599; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[0]}, [r1:32]
1600; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[0]}, [r2:32]
1601; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #4
1602; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[1]}, [r2:32]
1603; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #12
1604; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[1]}, [r2:32]
1605; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[1]
1606; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d17[3]
1607; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d17[2]
1608; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
1609; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d16[1]
1610; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
1611; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[0]
1612; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #4] @ 4-byte Spill
1613; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[3]
1614; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp] @ 4-byte Spill
1615; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[0]
1616; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r2
1617; CHECK-FIX-NOSCHED-NEXT:  .LBB37_3:
1618; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d3[3]
1619; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
1620; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d3[2]
1621; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d3[1]
1622; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d3[0]
1623; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d2[3]
1624; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d2[2]
1625; CHECK-FIX-NOSCHED-NEXT:    beq .LBB37_5
1626; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4:
1627; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d2[1]
1628; CHECK-FIX-NOSCHED-NEXT:    b .LBB37_6
1629; CHECK-FIX-NOSCHED-NEXT:  .LBB37_5:
1630; CHECK-FIX-NOSCHED-NEXT:    mov r0, lr
1631; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d2[0]
1632; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d2[1]
1633; CHECK-FIX-NOSCHED-NEXT:    vmov s0, lr
1634; CHECK-FIX-NOSCHED-NEXT:    mov lr, r0
1635; CHECK-FIX-NOSCHED-NEXT:  .LBB37_6:
1636; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s0
1637; CHECK-FIX-NOSCHED-NEXT:    vmov r6, s2
1638; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r12, lsl #16
1639; CHECK-FIX-NOSCHED-NEXT:    pkhbt r6, r6, r8, lsl #16
1640; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
1641; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r4, lsl #16
1642; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r6
1643; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
1644; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload
1645; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r0, lsl #16
1646; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
1647; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r2, lsl #16
1648; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
1649; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
1650; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
1651; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
1652; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
1653; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r9, lsl #16
1654; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
1655; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, lr, r7, lsl #16
1656; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
1657; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q9
1658; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
1659; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
1660; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #12
1661; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1662;
1663; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val:
1664; CHECK-CORTEX-FIX:       @ %bb.0:
1665; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1666; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
1667; CHECK-CORTEX-FIX-NEXT:    .pad #12
1668; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #12
1669; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1670; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_3
1671; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
1672; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1673; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s2, s0
1674; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[1]
1675; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
1676; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
1677; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
1678; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
1679; CHECK-CORTEX-FIX-NEXT:    str r2, [sp, #8] @ 4-byte Spill
1680; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d17[0]
1681; CHECK-CORTEX-FIX-NEXT:    str r2, [sp, #4] @ 4-byte Spill
1682; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d17[1]
1683; CHECK-CORTEX-FIX-NEXT:    str r2, [sp] @ 4-byte Spill
1684; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1685; CHECK-CORTEX-FIX-NEXT:    bne .LBB37_4
1686; CHECK-CORTEX-FIX-NEXT:  .LBB37_2:
1687; CHECK-CORTEX-FIX-NEXT:    vmov.u16 lr, d2[0]
1688; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d2[1]
1689; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d2[2]
1690; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d2[3]
1691; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r9, d3[0]
1692; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d3[1]
1693; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d3[2]
1694; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d3[3]
1695; CHECK-CORTEX-FIX-NEXT:    vmov s0, lr
1696; CHECK-CORTEX-FIX-NEXT:    b .LBB37_5
1697; CHECK-CORTEX-FIX-NEXT:  .LBB37_3:
1698; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
1699; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
1700; CHECK-CORTEX-FIX-NEXT:    add r3, r1, #4
1701; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r2:32]
1702; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #12
1703; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r3:32]
1704; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r2:32]
1705; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
1706; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[0]
1707; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
1708; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
1709; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
1710; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
1711; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
1712; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
1713; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
1714; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
1715; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
1716; CHECK-CORTEX-FIX-NEXT:    str r3, [sp] @ 4-byte Spill
1717; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
1718; CHECK-CORTEX-FIX-NEXT:    beq .LBB37_2
1719; CHECK-CORTEX-FIX-NEXT:  .LBB37_4:
1720; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d2[1]
1721; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d2[2]
1722; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d2[3]
1723; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r9, d3[0]
1724; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d3[1]
1725; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d3[2]
1726; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d3[3]
1727; CHECK-CORTEX-FIX-NEXT:  .LBB37_5:
1728; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r11, r6, lsl #16
1729; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r7, r10, lsl #16
1730; CHECK-CORTEX-FIX-NEXT:    ldm sp, {r6, r7} @ 8-byte Folded Reload
1731; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r4, lsl #16
1732; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16
1733; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r9, r2, lsl #16
1734; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r7, r6, lsl #16
1735; CHECK-CORTEX-FIX-NEXT:    vmov r7, s2
1736; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
1737; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
1738; CHECK-CORTEX-FIX-NEXT:    vmov r6, s0
1739; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r7
1740; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r4
1741; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r0
1742; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], lr
1743; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r8, lsl #16
1744; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6
1745; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r2
1746; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r3
1747; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r5
1748; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
1749; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
1750; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1751; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #12
1752; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
1753  br i1 %0, label %5, label %11
1754
17555:
1756  %6 = bitcast ptr %3 to ptr
1757  %7 = load <8 x i16>, ptr %6, align 8
1758  %8 = bitcast half %1 to i16
1759  %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1760  %10 = bitcast <8 x i16> %9 to <8 x half>
1761  br label %14
1762
176311:
1764  %12 = bitcast ptr %3 to ptr
1765  %13 = load <8 x half>, ptr %12, align 8
1766  br label %14
1767
176814:
1769  %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
1770  br i1 %0, label %16, label %21
1771
177216:
1773  %17 = bitcast <16 x i8> %2 to <8 x i16>
1774  %18 = bitcast half %1 to i16
1775  %19 = insertelement <8 x i16> %17, i16 %18, i64 0
1776  %20 = bitcast <8 x i16> %19 to <8 x half>
1777  br label %23
1778
177921:
1780  %22 = bitcast <16 x i8> %2 to <8 x half>
1781  br label %23
1782
178323:
1784  %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
1785  %25 = bitcast <8 x half> %15 to <16 x i8>
1786  %26 = bitcast <8 x half> %24 to <16 x i8>
1787  %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26)
1788  %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27)
1789  store <16 x i8> %28, ptr %3, align 8
1790  ret void
1791}
1792
1793define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1794; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr:
1795; CHECK-FIX:       @ %bb.0:
1796; CHECK-FIX-NEXT:    vorr q0, q0, q0
1797; CHECK-FIX-NEXT:    ldrh r1, [r1]
1798; CHECK-FIX-NEXT:    cmp r0, #0
1799; CHECK-FIX-NEXT:    strh r1, [r2]
1800; CHECK-FIX-NEXT:    bxeq lr
1801; CHECK-FIX-NEXT:  .LBB38_1:
1802; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
1803; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1804; CHECK-FIX-NEXT:  .LBB38_2: @ =>This Inner Loop Header: Depth=1
1805; CHECK-FIX-NEXT:    aese.8 q8, q0
1806; CHECK-FIX-NEXT:    subs r0, r0, #1
1807; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1808; CHECK-FIX-NEXT:    bne .LBB38_2
1809; CHECK-FIX-NEXT:  @ %bb.3:
1810; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1811; CHECK-FIX-NEXT:    bx lr
1812  %5 = bitcast ptr %1 to ptr
1813  %6 = load i16, ptr %5, align 2
1814  %7 = bitcast <16 x i8> %2 to <8 x i16>
1815  %8 = insertelement <8 x i16> %7, i16 %6, i64 0
1816  %9 = bitcast <8 x i16> %8 to <16 x i8>
1817  %10 = bitcast ptr %3 to ptr
1818  store i16 %6, ptr %10, align 8
1819  %11 = icmp eq i32 %0, 0
1820  br i1 %11, label %15, label %12
1821
182212:
1823  %13 = load <16 x i8>, ptr %3, align 8
1824  br label %16
1825
182614:
1827  store <16 x i8> %20, ptr %3, align 8
1828  br label %15
1829
183015:
1831  ret void
1832
183316:
1834  %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
1835  %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
1836  %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
1837  %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1838  %21 = add nuw i32 %18, 1
1839  %22 = icmp eq i32 %21, %0
1840  br i1 %22, label %14, label %16
1841}
1842
1843define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
1844; CHECK-FIX-LABEL: aese_setf16_loop_via_val:
1845; CHECK-FIX:       @ %bb.0:
1846; CHECK-FIX-NEXT:    vorr q1, q1, q1
1847; CHECK-FIX-NEXT:    cmp r0, #0
1848; CHECK-FIX-NEXT:    bxeq lr
1849; CHECK-FIX-NEXT:  .LBB39_1:
1850; CHECK-FIX-NEXT:    vmov r2, s0
1851; CHECK-FIX-NEXT:    vmov.16 d2[0], r2
1852; CHECK-FIX-NEXT:  .LBB39_2: @ =>This Inner Loop Header: Depth=1
1853; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
1854; CHECK-FIX-NEXT:    subs r0, r0, #1
1855; CHECK-FIX-NEXT:    vmov.16 d16[0], r2
1856; CHECK-FIX-NEXT:    aese.8 q8, q1
1857; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1858; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1859; CHECK-FIX-NEXT:    bne .LBB39_2
1860; CHECK-FIX-NEXT:  @ %bb.3:
1861; CHECK-FIX-NEXT:    bx lr
1862  %5 = icmp eq i32 %0, 0
1863  br i1 %5, label %13, label %6
1864
18656:
1866  %7 = bitcast <16 x i8> %2 to <8 x i16>
1867  %8 = bitcast half %1 to i16
1868  %9 = insertelement <8 x i16> %7, i16 %8, i64 0
1869  %10 = bitcast <8 x i16> %9 to <16 x i8>
1870  %11 = bitcast ptr %3 to ptr
1871  %12 = bitcast ptr %3 to ptr
1872  br label %14
1873
187413:
1875  ret void
1876
187714:
1878  %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
1879  %16 = load <8 x i16>, ptr %11, align 8
1880  %17 = insertelement <8 x i16> %16, i16 %8, i64 0
1881  %18 = bitcast <8 x i16> %17 to <16 x i8>
1882  store half %1, ptr %12, align 8
1883  %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10)
1884  %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19)
1885  store <16 x i8> %20, ptr %3, align 8
1886  %21 = add nuw i32 %15, 1
1887  %22 = icmp eq i32 %21, %0
1888  br i1 %22, label %13, label %14
1889}
1890
1891define arm_aapcs_vfpcc void @aese_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
1892; CHECK-FIX-LABEL: aese_setf32_via_ptr:
1893; CHECK-FIX:       @ %bb.0:
1894; CHECK-FIX-NEXT:    vldr s0, [r0]
1895; CHECK-FIX-NEXT:    vld1.64 {d2, d3}, [r1]
1896; CHECK-FIX-NEXT:    vmov.f32 s4, s0
1897; CHECK-FIX-NEXT:    vorr q1, q1, q1
1898; CHECK-FIX-NEXT:    vorr q0, q0, q0
1899; CHECK-FIX-NEXT:    aese.8 q1, q0
1900; CHECK-FIX-NEXT:    aesmc.8 q8, q1
1901; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
1902; CHECK-FIX-NEXT:    bx lr
1903  %4 = load float, ptr %0, align 4
1904  %5 = bitcast ptr %2 to ptr
1905  %6 = load <4 x float>, ptr %5, align 8
1906  %7 = insertelement <4 x float> %6, float %4, i64 0
1907  %8 = bitcast <4 x float> %7 to <16 x i8>
1908  %9 = bitcast <16 x i8> %1 to <4 x float>
1909  %10 = insertelement <4 x float> %9, float %4, i64 0
1910  %11 = bitcast <4 x float> %10 to <16 x i8>
1911  %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11)
1912  %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12)
1913  store <16 x i8> %13, ptr %2, align 8
1914  ret void
1915}
1916
1917define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
1918; CHECK-FIX-LABEL: aese_setf32_via_val:
1919; CHECK-FIX:       @ %bb.0:
1920; CHECK-FIX-NEXT:    vmov.f32 s4, s0
1921; CHECK-FIX-NEXT:    vld1.64 {d0, d1}, [r0]
1922; CHECK-FIX-NEXT:    vmov.f32 s0, s4
1923; CHECK-FIX-NEXT:    vorr q0, q0, q0
1924; CHECK-FIX-NEXT:    vorr q1, q1, q1
1925; CHECK-FIX-NEXT:    aese.8 q0, q1
1926; CHECK-FIX-NEXT:    aesmc.8 q8, q0
1927; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
1928; CHECK-FIX-NEXT:    bx lr
1929  %4 = bitcast ptr %2 to ptr
1930  %5 = load <4 x float>, ptr %4, align 8
1931  %6 = insertelement <4 x float> %5, float %0, i64 0
1932  %7 = bitcast <4 x float> %6 to <16 x i8>
1933  %8 = bitcast <16 x i8> %1 to <4 x float>
1934  %9 = insertelement <4 x float> %8, float %0, i64 0
1935  %10 = bitcast <4 x float> %9 to <16 x i8>
1936  %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10)
1937  %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11)
1938  store <16 x i8> %12, ptr %2, align 8
1939  ret void
1940}
1941
1942define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
1943; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr:
1944; CHECK-FIX:       @ %bb.0:
1945; CHECK-FIX-NEXT:    vorr q0, q0, q0
1946; CHECK-FIX-NEXT:    cmp r0, #0
1947; CHECK-FIX-NEXT:    beq .LBB42_2
1948; CHECK-FIX-NEXT:  @ %bb.1:
1949; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1950; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
1951; CHECK-FIX-NEXT:    cmp r0, #0
1952; CHECK-FIX-NEXT:    bne .LBB42_3
1953; CHECK-FIX-NEXT:    b .LBB42_4
1954; CHECK-FIX-NEXT:  .LBB42_2:
1955; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
1956; CHECK-FIX-NEXT:    cmp r0, #0
1957; CHECK-FIX-NEXT:    beq .LBB42_4
1958; CHECK-FIX-NEXT:  .LBB42_3:
1959; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32]
1960; CHECK-FIX-NEXT:  .LBB42_4:
1961; CHECK-FIX-NEXT:    aese.8 q8, q0
1962; CHECK-FIX-NEXT:    aesmc.8 q8, q8
1963; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
1964; CHECK-FIX-NEXT:    bx lr
1965  br i1 %0, label %5, label %10
1966
19675:
1968  %6 = load float, ptr %1, align 4
1969  %7 = bitcast ptr %3 to ptr
1970  %8 = load <4 x float>, ptr %7, align 8
1971  %9 = insertelement <4 x float> %8, float %6, i64 0
1972  br label %13
1973
197410:
1975  %11 = bitcast ptr %3 to ptr
1976  %12 = load <4 x float>, ptr %11, align 8
1977  br label %13
1978
197913:
1980  %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
1981  br i1 %0, label %15, label %19
1982
198315:
1984  %16 = load float, ptr %1, align 4
1985  %17 = bitcast <16 x i8> %2 to <4 x float>
1986  %18 = insertelement <4 x float> %17, float %16, i64 0
1987  br label %21
1988
198919:
1990  %20 = bitcast <16 x i8> %2 to <4 x float>
1991  br label %21
1992
199321:
1994  %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
1995  %23 = bitcast <4 x float> %14 to <16 x i8>
1996  %24 = bitcast <4 x float> %22 to <16 x i8>
1997  %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24)
1998  %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25)
1999  store <16 x i8> %26, ptr %3, align 8
2000  ret void
2001}
2002
2003define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
2004; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val:
2005; CHECK-FIX-NOSCHED:       @ %bb.0:
2006; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1]
2007; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
2008; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s8, s0
2009; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2
2010; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
2011; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s4, s0
2012; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1
2013; CHECK-FIX-NOSCHED-NEXT:    aese.8 q2, q1
2014; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q2
2015; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
2016; CHECK-FIX-NOSCHED-NEXT:    bx lr
2017;
2018; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val:
2019; CHECK-CORTEX-FIX:       @ %bb.0:
2020; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
2021; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1]
2022; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s8, s0
2023; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2
2024; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
2025; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s4, s0
2026; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1
2027; CHECK-CORTEX-FIX-NEXT:    aese.8 q2, q1
2028; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q2
2029; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2030; CHECK-CORTEX-FIX-NEXT:    bx lr
2031  %5 = bitcast ptr %3 to ptr
2032  %6 = load <4 x float>, ptr %5, align 8
2033  %7 = insertelement <4 x float> %6, float %1, i64 0
2034  %8 = select i1 %0, <4 x float> %7, <4 x float> %6
2035  %9 = bitcast <16 x i8> %2 to <4 x float>
2036  %10 = insertelement <4 x float> %9, float %1, i64 0
2037  %11 = select i1 %0, <4 x float> %10, <4 x float> %9
2038  %12 = bitcast <4 x float> %8 to <16 x i8>
2039  %13 = bitcast <4 x float> %11 to <16 x i8>
2040  %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13)
2041  %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14)
2042  store <16 x i8> %15, ptr %3, align 8
2043  ret void
2044}
2045
2046define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2047; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr:
2048; CHECK-FIX-NOSCHED:       @ %bb.0:
2049; CHECK-FIX-NOSCHED-NEXT:    vldr s4, [r1]
2050; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
2051; CHECK-FIX-NOSCHED-NEXT:    vstr s4, [r2]
2052; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
2053; CHECK-FIX-NOSCHED-NEXT:  .LBB44_1:
2054; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s0, s4
2055; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
2056; CHECK-FIX-NOSCHED-NEXT:  .LBB44_2: @ =>This Inner Loop Header: Depth=1
2057; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
2058; CHECK-FIX-NOSCHED-NEXT:    aese.8 q8, q0
2059; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
2060; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q8
2061; CHECK-FIX-NOSCHED-NEXT:    bne .LBB44_2
2062; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
2063; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
2064; CHECK-FIX-NOSCHED-NEXT:    bx lr
2065;
2066; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr:
2067; CHECK-CORTEX-FIX:       @ %bb.0:
2068; CHECK-CORTEX-FIX-NEXT:    vldr s4, [r1]
2069; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
2070; CHECK-CORTEX-FIX-NEXT:    vstr s4, [r2]
2071; CHECK-CORTEX-FIX-NEXT:    bxeq lr
2072; CHECK-CORTEX-FIX-NEXT:  .LBB44_1:
2073; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2074; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s0, s4
2075; CHECK-CORTEX-FIX-NEXT:  .LBB44_2: @ =>This Inner Loop Header: Depth=1
2076; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
2077; CHECK-CORTEX-FIX-NEXT:    aese.8 q8, q0
2078; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
2079; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q8
2080; CHECK-CORTEX-FIX-NEXT:    bne .LBB44_2
2081; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
2082; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2083; CHECK-CORTEX-FIX-NEXT:    bx lr
2084  %5 = load float, ptr %1, align 4
2085  %6 = bitcast <16 x i8> %2 to <4 x float>
2086  %7 = insertelement <4 x float> %6, float %5, i64 0
2087  %8 = bitcast <4 x float> %7 to <16 x i8>
2088  %9 = bitcast ptr %3 to ptr
2089  store float %5, ptr %9, align 8
2090  %10 = icmp eq i32 %0, 0
2091  br i1 %10, label %14, label %11
2092
209311:
2094  %12 = load <16 x i8>, ptr %3, align 8
2095  br label %15
2096
209713:
2098  store <16 x i8> %19, ptr %3, align 8
2099  br label %14
2100
210114:
2102  ret void
2103
210415:
2105  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2106  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2107  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8)
2108  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2109  %20 = add nuw i32 %17, 1
2110  %21 = icmp eq i32 %20, %0
2111  br i1 %21, label %13, label %15
2112}
2113
2114define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
2115; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val:
2116; CHECK-FIX-NOSCHED:       @ %bb.0:
2117; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
2118; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
2119; CHECK-FIX-NOSCHED-NEXT:  .LBB45_1:
2120; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s4, s0
2121; CHECK-FIX-NOSCHED-NEXT:  .LBB45_2: @ =>This Inner Loop Header: Depth=1
2122; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1]
2123; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
2124; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s8, s0
2125; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2
2126; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1
2127; CHECK-FIX-NOSCHED-NEXT:    aese.8 q2, q1
2128; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q2
2129; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
2130; CHECK-FIX-NOSCHED-NEXT:    bne .LBB45_2
2131; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
2132; CHECK-FIX-NOSCHED-NEXT:    bx lr
2133;
2134; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val:
2135; CHECK-CORTEX-FIX:       @ %bb.0:
2136; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
2137; CHECK-CORTEX-FIX-NEXT:    bxeq lr
2138; CHECK-CORTEX-FIX-NEXT:  .LBB45_1:
2139; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s4, s0
2140; CHECK-CORTEX-FIX-NEXT:  .LBB45_2: @ =>This Inner Loop Header: Depth=1
2141; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1]
2142; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s8, s0
2143; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2
2144; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
2145; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1
2146; CHECK-CORTEX-FIX-NEXT:    aese.8 q2, q1
2147; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q2
2148; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2149; CHECK-CORTEX-FIX-NEXT:    bne .LBB45_2
2150; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
2151; CHECK-CORTEX-FIX-NEXT:    bx lr
2152  %5 = icmp eq i32 %0, 0
2153  br i1 %5, label %12, label %6
2154
21556:
2156  %7 = bitcast <16 x i8> %2 to <4 x float>
2157  %8 = insertelement <4 x float> %7, float %1, i64 0
2158  %9 = bitcast <4 x float> %8 to <16 x i8>
2159  %10 = bitcast ptr %3 to ptr
2160  %11 = bitcast ptr %3 to ptr
2161  br label %13
2162
216312:
2164  ret void
2165
216613:
2167  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2168  %15 = load <4 x float>, ptr %10, align 8
2169  %16 = insertelement <4 x float> %15, float %1, i64 0
2170  %17 = bitcast <4 x float> %16 to <16 x i8>
2171  store float %1, ptr %11, align 8
2172  %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9)
2173  %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18)
2174  store <16 x i8> %19, ptr %3, align 8
2175  %20 = add nuw i32 %14, 1
2176  %21 = icmp eq i32 %20, %0
2177  br i1 %21, label %12, label %13
2178}
2179
2180define arm_aapcs_vfpcc void @aesd_zero(ptr %0) nounwind {
2181; CHECK-FIX-LABEL: aesd_zero:
2182; CHECK-FIX:       @ %bb.0:
2183; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
2184; CHECK-FIX-NEXT:    vmov.i32 q9, #0x0
2185; CHECK-FIX-NEXT:    aesd.8 q9, q8
2186; CHECK-FIX-NEXT:    aesimc.8 q8, q9
2187; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
2188; CHECK-FIX-NEXT:    bx lr
2189  %2 = load <16 x i8>, ptr %0, align 8
2190  %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2)
2191  %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2192  store <16 x i8> %4, ptr %0, align 8
2193  ret void
2194}
2195
2196define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind {
2197; CHECK-FIX-LABEL: aesd_via_call1:
2198; CHECK-FIX:       @ %bb.0:
2199; CHECK-FIX-NEXT:    .save {r4, lr}
2200; CHECK-FIX-NEXT:    push {r4, lr}
2201; CHECK-FIX-NEXT:    mov r4, r0
2202; CHECK-FIX-NEXT:    bl get_input
2203; CHECK-FIX-NEXT:    vorr q0, q0, q0
2204; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
2205; CHECK-FIX-NEXT:    aesd.8 q8, q0
2206; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2207; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
2208; CHECK-FIX-NEXT:    pop {r4, pc}
2209  %2 = call arm_aapcs_vfpcc <16 x i8> @get_input()
2210  %3 = load <16 x i8>, ptr %0, align 8
2211  %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3)
2212  %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4)
2213  store <16 x i8> %5, ptr %0, align 8
2214  ret void
2215}
2216
2217define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind {
2218; CHECK-FIX-LABEL: aesd_via_call2:
2219; CHECK-FIX:       @ %bb.0:
2220; CHECK-FIX-NEXT:    .save {r4, lr}
2221; CHECK-FIX-NEXT:    push {r4, lr}
2222; CHECK-FIX-NEXT:    mov r4, r0
2223; CHECK-FIX-NEXT:    bl get_inputf16
2224; CHECK-FIX-NEXT:    vorr q0, q0, q0
2225; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
2226; CHECK-FIX-NEXT:    aesd.8 q8, q0
2227; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2228; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
2229; CHECK-FIX-NEXT:    pop {r4, pc}
2230  %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0)
2231  %4 = load <16 x i8>, ptr %1, align 8
2232  %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2233  %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2234  store <16 x i8> %6, ptr %1, align 8
2235  ret void
2236}
2237
2238define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind {
2239; CHECK-FIX-LABEL: aesd_via_call3:
2240; CHECK-FIX:       @ %bb.0:
2241; CHECK-FIX-NEXT:    .save {r4, lr}
2242; CHECK-FIX-NEXT:    push {r4, lr}
2243; CHECK-FIX-NEXT:    mov r4, r0
2244; CHECK-FIX-NEXT:    bl get_inputf32
2245; CHECK-FIX-NEXT:    vorr q0, q0, q0
2246; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r4]
2247; CHECK-FIX-NEXT:    aesd.8 q8, q0
2248; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2249; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r4]
2250; CHECK-FIX-NEXT:    pop {r4, pc}
2251  %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0)
2252  %4 = load <16 x i8>, ptr %1, align 8
2253  %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2254  %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2255  store <16 x i8> %6, ptr %1, align 8
2256  ret void
2257}
2258
2259define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind {
2260; CHECK-FIX-LABEL: aesd_once_via_ptr:
2261; CHECK-FIX:       @ %bb.0:
2262; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
2263; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1]
2264; CHECK-FIX-NEXT:    aesd.8 q9, q8
2265; CHECK-FIX-NEXT:    aesimc.8 q8, q9
2266; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2267; CHECK-FIX-NEXT:    bx lr
2268  %3 = load <16 x i8>, ptr %1, align 8
2269  %4 = load <16 x i8>, ptr %0, align 8
2270  %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2271  %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2272  store <16 x i8> %6, ptr %1, align 8
2273  ret void
2274}
2275
2276define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2277; CHECK-FIX-LABEL: aesd_once_via_val:
2278; CHECK-FIX:       @ %bb.0:
2279; CHECK-FIX-NEXT:    vorr q0, q0, q0
2280; CHECK-FIX-NEXT:    vorr q1, q1, q1
2281; CHECK-FIX-NEXT:    aesd.8 q0, q1
2282; CHECK-FIX-NEXT:    aesimc.8 q0, q0
2283; CHECK-FIX-NEXT:    bx lr
2284  %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2285  %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2286  ret <16 x i8> %4
2287}
2288
2289define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind {
2290; CHECK-FIX-LABEL: aesd_twice_via_ptr:
2291; CHECK-FIX:       @ %bb.0:
2292; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
2293; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r1]
2294; CHECK-FIX-NEXT:    aesd.8 q9, q8
2295; CHECK-FIX-NEXT:    aesimc.8 q8, q9
2296; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2297; CHECK-FIX-NEXT:    vld1.64 {d18, d19}, [r0]
2298; CHECK-FIX-NEXT:    aesd.8 q9, q8
2299; CHECK-FIX-NEXT:    aesimc.8 q8, q9
2300; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2301; CHECK-FIX-NEXT:    bx lr
2302  %3 = load <16 x i8>, ptr %1, align 8
2303  %4 = load <16 x i8>, ptr %0, align 8
2304  %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4)
2305  %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2306  store <16 x i8> %6, ptr %1, align 8
2307  %7 = load <16 x i8>, ptr %0, align 8
2308  %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2309  %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2310  store <16 x i8> %9, ptr %1, align 8
2311  ret void
2312}
2313
2314define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind {
2315; CHECK-FIX-LABEL: aesd_twice_via_val:
2316; CHECK-FIX:       @ %bb.0:
2317; CHECK-FIX-NEXT:    vorr q1, q1, q1
2318; CHECK-FIX-NEXT:    vorr q0, q0, q0
2319; CHECK-FIX-NEXT:    vorr q0, q0, q0
2320; CHECK-FIX-NEXT:    aesd.8 q1, q0
2321; CHECK-FIX-NEXT:    aesimc.8 q8, q1
2322; CHECK-FIX-NEXT:    aesd.8 q8, q0
2323; CHECK-FIX-NEXT:    aesimc.8 q0, q8
2324; CHECK-FIX-NEXT:    bx lr
2325  %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0)
2326  %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3)
2327  %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0)
2328  %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5)
2329  ret <16 x i8> %6
2330}
2331
2332define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind {
2333; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr:
2334; CHECK-FIX-NOSCHED:       @ %bb.0:
2335; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
2336; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
2337; CHECK-FIX-NOSCHED-NEXT:  .LBB54_1: @ =>This Inner Loop Header: Depth=1
2338; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
2339; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
2340; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r2]
2341; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q9, q8
2342; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q9
2343; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
2344; CHECK-FIX-NOSCHED-NEXT:    bne .LBB54_1
2345; CHECK-FIX-NOSCHED-NEXT:  @ %bb.2:
2346; CHECK-FIX-NOSCHED-NEXT:    bx lr
2347;
2348; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr:
2349; CHECK-CORTEX-FIX:       @ %bb.0:
2350; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
2351; CHECK-CORTEX-FIX-NEXT:    bxeq lr
2352; CHECK-CORTEX-FIX-NEXT:  .LBB54_1: @ =>This Inner Loop Header: Depth=1
2353; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2354; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r2]
2355; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
2356; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8
2357; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9
2358; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2359; CHECK-CORTEX-FIX-NEXT:    bne .LBB54_1
2360; CHECK-CORTEX-FIX-NEXT:  @ %bb.2:
2361; CHECK-CORTEX-FIX-NEXT:    bx lr
2362  %4 = icmp eq i32 %0, 0
2363  br i1 %4, label %5, label %6
2364
23655:
2366  ret void
2367
23686:
2369  %7 = phi i32 [ %12, %6 ], [ 0, %3 ]
2370  %8 = load <16 x i8>, ptr %2, align 8
2371  %9 = load <16 x i8>, ptr %1, align 8
2372  %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9)
2373  %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2374  store <16 x i8> %11, ptr %2, align 8
2375  %12 = add nuw i32 %7, 1
2376  %13 = icmp eq i32 %12, %0
2377  br i1 %13, label %5, label %6
2378}
2379
2380define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind {
2381; CHECK-FIX-LABEL: aesd_loop_via_val:
2382; CHECK-FIX:       @ %bb.0:
2383; CHECK-FIX-NEXT:    vorr q1, q1, q1
2384; CHECK-FIX-NEXT:    vorr q0, q0, q0
2385; CHECK-FIX-NEXT:    cmp r0, #0
2386; CHECK-FIX-NEXT:    beq .LBB55_2
2387; CHECK-FIX-NEXT:  .LBB55_1: @ =>This Inner Loop Header: Depth=1
2388; CHECK-FIX-NEXT:    aesd.8 q1, q0
2389; CHECK-FIX-NEXT:    subs r0, r0, #1
2390; CHECK-FIX-NEXT:    aesimc.8 q1, q1
2391; CHECK-FIX-NEXT:    bne .LBB55_1
2392; CHECK-FIX-NEXT:  .LBB55_2:
2393; CHECK-FIX-NEXT:    vorr q0, q1, q1
2394; CHECK-FIX-NEXT:    bx lr
2395  %4 = icmp eq i32 %0, 0
2396  br i1 %4, label %5, label %7
2397
23985:
2399  %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ]
2400  ret <16 x i8> %6
2401
24027:
2403  %8 = phi i32 [ %12, %7 ], [ 0, %3 ]
2404  %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ]
2405  %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1)
2406  %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2407  %12 = add nuw i32 %8, 1
2408  %13 = icmp eq i32 %12, %0
2409  br i1 %13, label %5, label %7
2410}
2411
2412define arm_aapcs_vfpcc void @aesd_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2413; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr:
2414; CHECK-FIX-NOSCHED:       @ %bb.0:
2415; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
2416; CHECK-FIX-NOSCHED-NEXT:    ldrb r0, [r0]
2417; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
2418; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d0[0], r0
2419; CHECK-FIX-NOSCHED-NEXT:    vmov.8 d16[0], r0
2420; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
2421; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
2422; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
2423; CHECK-FIX-NOSCHED-NEXT:    bx lr
2424;
2425; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr:
2426; CHECK-CORTEX-FIX:       @ %bb.0:
2427; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
2428; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2429; CHECK-CORTEX-FIX-NEXT:    ldrb r0, [r0]
2430; CHECK-CORTEX-FIX-NEXT:    vmov.8 d0[0], r0
2431; CHECK-CORTEX-FIX-NEXT:    vmov.8 d16[0], r0
2432; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
2433; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
2434; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2435; CHECK-CORTEX-FIX-NEXT:    bx lr
2436  %4 = load i8, ptr %0, align 1
2437  %5 = load <16 x i8>, ptr %2, align 8
2438  %6 = insertelement <16 x i8> %5, i8 %4, i64 0
2439  %7 = insertelement <16 x i8> %1, i8 %4, i64 0
2440  %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7)
2441  %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8)
2442  store <16 x i8> %9, ptr %2, align 8
2443  ret void
2444}
2445
2446define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
2447; CHECK-FIX-LABEL: aesd_set8_via_val:
2448; CHECK-FIX:       @ %bb.0:
2449; CHECK-FIX-NEXT:    vorr q0, q0, q0
2450; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2451; CHECK-FIX-NEXT:    vmov.8 d0[0], r0
2452; CHECK-FIX-NEXT:    vmov.8 d16[0], r0
2453; CHECK-FIX-NEXT:    aesd.8 q8, q0
2454; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2455; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2456; CHECK-FIX-NEXT:    bx lr
2457  %4 = load <16 x i8>, ptr %2, align 8
2458  %5 = insertelement <16 x i8> %4, i8 %0, i64 0
2459  %6 = insertelement <16 x i8> %1, i8 %0, i64 0
2460  %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6)
2461  %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7)
2462  store <16 x i8> %8, ptr %2, align 8
2463  ret void
2464}
2465
2466define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2467; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr:
2468; CHECK-FIX:       @ %bb.0:
2469; CHECK-FIX-NEXT:    vorr q0, q0, q0
2470; CHECK-FIX-NEXT:    cmp r0, #0
2471; CHECK-FIX-NEXT:    beq .LBB58_2
2472; CHECK-FIX-NEXT:  @ %bb.1:
2473; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2474; CHECK-FIX-NEXT:    vld1.8 {d16[0]}, [r1]
2475; CHECK-FIX-NEXT:    cmp r0, #0
2476; CHECK-FIX-NEXT:    bne .LBB58_3
2477; CHECK-FIX-NEXT:    b .LBB58_4
2478; CHECK-FIX-NEXT:  .LBB58_2:
2479; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2480; CHECK-FIX-NEXT:    cmp r0, #0
2481; CHECK-FIX-NEXT:    beq .LBB58_4
2482; CHECK-FIX-NEXT:  .LBB58_3:
2483; CHECK-FIX-NEXT:    vld1.8 {d0[0]}, [r1]
2484; CHECK-FIX-NEXT:  .LBB58_4:
2485; CHECK-FIX-NEXT:    aesd.8 q8, q0
2486; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2487; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2488; CHECK-FIX-NEXT:    bx lr
2489  br i1 %0, label %5, label %9
2490
24915:
2492  %6 = load i8, ptr %1, align 1
2493  %7 = load <16 x i8>, ptr %3, align 8
2494  %8 = insertelement <16 x i8> %7, i8 %6, i64 0
2495  br label %11
2496
24979:
2498  %10 = load <16 x i8>, ptr %3, align 8
2499  br label %11
2500
250111:
2502  %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ]
2503  br i1 %0, label %13, label %16
2504
250513:
2506  %14 = load i8, ptr %1, align 1
2507  %15 = insertelement <16 x i8> %2, i8 %14, i64 0
2508  br label %16
2509
251016:
2511  %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ]
2512  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17)
2513  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2514  store <16 x i8> %19, ptr %3, align 8
2515  ret void
2516}
2517
2518define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2519; CHECK-FIX-LABEL: aesd_set8_cond_via_val:
2520; CHECK-FIX:       @ %bb.0:
2521; CHECK-FIX-NEXT:    vorr q0, q0, q0
2522; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2523; CHECK-FIX-NEXT:    cmp r0, #0
2524; CHECK-FIX-NEXT:    beq .LBB59_2
2525; CHECK-FIX-NEXT:  @ %bb.1:
2526; CHECK-FIX-NEXT:    vmov.8 d16[0], r1
2527; CHECK-FIX-NEXT:  .LBB59_2: @ %select.end
2528; CHECK-FIX-NEXT:    cmp r0, #0
2529; CHECK-FIX-NEXT:    beq .LBB59_4
2530; CHECK-FIX-NEXT:  @ %bb.3:
2531; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
2532; CHECK-FIX-NEXT:  .LBB59_4: @ %select.end2
2533; CHECK-FIX-NEXT:    aesd.8 q8, q0
2534; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2535; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2536; CHECK-FIX-NEXT:    bx lr
2537  %5 = load <16 x i8>, ptr %3, align 8
2538  %6 = insertelement <16 x i8> %5, i8 %1, i64 0
2539  %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5
2540  %8 = insertelement <16 x i8> %2, i8 %1, i64 0
2541  %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2
2542  %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9)
2543  %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10)
2544  store <16 x i8> %11, ptr %3, align 8
2545  ret void
2546}
2547
2548define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2549; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr:
2550; CHECK-FIX:       @ %bb.0:
2551; CHECK-FIX-NEXT:    vorr q0, q0, q0
2552; CHECK-FIX-NEXT:    ldrb r1, [r1]
2553; CHECK-FIX-NEXT:    cmp r0, #0
2554; CHECK-FIX-NEXT:    strb r1, [r2]
2555; CHECK-FIX-NEXT:    bxeq lr
2556; CHECK-FIX-NEXT:  .LBB60_1:
2557; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
2558; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2559; CHECK-FIX-NEXT:  .LBB60_2: @ =>This Inner Loop Header: Depth=1
2560; CHECK-FIX-NEXT:    aesd.8 q8, q0
2561; CHECK-FIX-NEXT:    subs r0, r0, #1
2562; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2563; CHECK-FIX-NEXT:    bne .LBB60_2
2564; CHECK-FIX-NEXT:  @ %bb.3:
2565; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2566; CHECK-FIX-NEXT:    bx lr
2567  %5 = load i8, ptr %1, align 1
2568  %6 = insertelement <16 x i8> %2, i8 %5, i64 0
2569  %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0
2570  store i8 %5, ptr %7, align 8
2571  %8 = icmp eq i32 %0, 0
2572  br i1 %8, label %12, label %9
2573
25749:
2575  %10 = load <16 x i8>, ptr %3, align 8
2576  br label %13
2577
257811:
2579  store <16 x i8> %17, ptr %3, align 8
2580  br label %12
2581
258212:
2583  ret void
2584
258513:
2586  %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ]
2587  %15 = phi i32 [ 0, %9 ], [ %18, %13 ]
2588  %16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6)
2589  %17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16)
2590  %18 = add nuw i32 %15, 1
2591  %19 = icmp eq i32 %18, %0
2592  br i1 %19, label %11, label %13
2593}
2594
2595define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2596; CHECK-FIX-LABEL: aesd_set8_loop_via_val:
2597; CHECK-FIX:       @ %bb.0:
2598; CHECK-FIX-NEXT:    vorr q0, q0, q0
2599; CHECK-FIX-NEXT:    cmp r0, #0
2600; CHECK-FIX-NEXT:    bxeq lr
2601; CHECK-FIX-NEXT:  .LBB61_1:
2602; CHECK-FIX-NEXT:    vmov.8 d0[0], r1
2603; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2604; CHECK-FIX-NEXT:  .LBB61_2: @ =>This Inner Loop Header: Depth=1
2605; CHECK-FIX-NEXT:    vmov.8 d16[0], r1
2606; CHECK-FIX-NEXT:    subs r0, r0, #1
2607; CHECK-FIX-NEXT:    aesd.8 q8, q0
2608; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2609; CHECK-FIX-NEXT:    bne .LBB61_2
2610; CHECK-FIX-NEXT:  @ %bb.3:
2611; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2612; CHECK-FIX-NEXT:    bx lr
2613  %5 = icmp eq i32 %0, 0
2614  br i1 %5, label %10, label %6
2615
26166:
2617  %7 = insertelement <16 x i8> %2, i8 %1, i64 0
2618  %8 = load <16 x i8>, ptr %3, align 8
2619  br label %11
2620
26219:
2622  store <16 x i8> %16, ptr %3, align 8
2623  br label %10
2624
262510:
2626  ret void
2627
262811:
2629  %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ]
2630  %13 = phi i32 [ 0, %6 ], [ %17, %11 ]
2631  %14 = insertelement <16 x i8> %12, i8 %1, i64 0
2632  %15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7)
2633  %16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15)
2634  %17 = add nuw i32 %13, 1
2635  %18 = icmp eq i32 %17, %0
2636  br i1 %18, label %9, label %11
2637}
2638
2639define arm_aapcs_vfpcc void @aesd_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2640; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr:
2641; CHECK-FIX-NOSCHED:       @ %bb.0:
2642; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
2643; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0]
2644; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
2645; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0
2646; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0
2647; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
2648; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
2649; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
2650; CHECK-FIX-NOSCHED-NEXT:    bx lr
2651;
2652; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr:
2653; CHECK-CORTEX-FIX:       @ %bb.0:
2654; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
2655; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2656; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0]
2657; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0
2658; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0
2659; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
2660; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
2661; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2662; CHECK-CORTEX-FIX-NEXT:    bx lr
2663  %4 = load i16, ptr %0, align 2
2664  %5 = bitcast ptr %2 to ptr
2665  %6 = load <8 x i16>, ptr %5, align 8
2666  %7 = insertelement <8 x i16> %6, i16 %4, i64 0
2667  %8 = bitcast <8 x i16> %7 to <16 x i8>
2668  %9 = bitcast <16 x i8> %1 to <8 x i16>
2669  %10 = insertelement <8 x i16> %9, i16 %4, i64 0
2670  %11 = bitcast <8 x i16> %10 to <16 x i8>
2671  %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2672  %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2673  store <16 x i8> %13, ptr %2, align 8
2674  ret void
2675}
2676
2677define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind {
2678; CHECK-FIX-LABEL: aesd_set16_via_val:
2679; CHECK-FIX:       @ %bb.0:
2680; CHECK-FIX-NEXT:    vorr q0, q0, q0
2681; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2682; CHECK-FIX-NEXT:    vmov.16 d0[0], r0
2683; CHECK-FIX-NEXT:    vmov.16 d16[0], r0
2684; CHECK-FIX-NEXT:    aesd.8 q8, q0
2685; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2686; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2687; CHECK-FIX-NEXT:    bx lr
2688  %4 = bitcast ptr %2 to ptr
2689  %5 = load <8 x i16>, ptr %4, align 8
2690  %6 = insertelement <8 x i16> %5, i16 %0, i64 0
2691  %7 = bitcast <8 x i16> %6 to <16 x i8>
2692  %8 = bitcast <16 x i8> %1 to <8 x i16>
2693  %9 = insertelement <8 x i16> %8, i16 %0, i64 0
2694  %10 = bitcast <8 x i16> %9 to <16 x i8>
2695  %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
2696  %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
2697  store <16 x i8> %12, ptr %2, align 8
2698  ret void
2699}
2700
2701define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2702; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr:
2703; CHECK-FIX:       @ %bb.0:
2704; CHECK-FIX-NEXT:    vorr q0, q0, q0
2705; CHECK-FIX-NEXT:    cmp r0, #0
2706; CHECK-FIX-NEXT:    beq .LBB64_2
2707; CHECK-FIX-NEXT:  @ %bb.1:
2708; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2709; CHECK-FIX-NEXT:    vld1.16 {d16[0]}, [r1:16]
2710; CHECK-FIX-NEXT:    cmp r0, #0
2711; CHECK-FIX-NEXT:    bne .LBB64_3
2712; CHECK-FIX-NEXT:    b .LBB64_4
2713; CHECK-FIX-NEXT:  .LBB64_2:
2714; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2715; CHECK-FIX-NEXT:    cmp r0, #0
2716; CHECK-FIX-NEXT:    beq .LBB64_4
2717; CHECK-FIX-NEXT:  .LBB64_3:
2718; CHECK-FIX-NEXT:    vld1.16 {d0[0]}, [r1:16]
2719; CHECK-FIX-NEXT:  .LBB64_4:
2720; CHECK-FIX-NEXT:    aesd.8 q8, q0
2721; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2722; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2723; CHECK-FIX-NEXT:    bx lr
2724  br i1 %0, label %5, label %10
2725
27265:
2727  %6 = load i16, ptr %1, align 2
2728  %7 = bitcast ptr %3 to ptr
2729  %8 = load <8 x i16>, ptr %7, align 8
2730  %9 = insertelement <8 x i16> %8, i16 %6, i64 0
2731  br label %13
2732
273310:
2734  %11 = bitcast ptr %3 to ptr
2735  %12 = load <8 x i16>, ptr %11, align 8
2736  br label %13
2737
273813:
2739  %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ]
2740  br i1 %0, label %15, label %19
2741
274215:
2743  %16 = load i16, ptr %1, align 2
2744  %17 = bitcast <16 x i8> %2 to <8 x i16>
2745  %18 = insertelement <8 x i16> %17, i16 %16, i64 0
2746  br label %21
2747
274819:
2749  %20 = bitcast <16 x i8> %2 to <8 x i16>
2750  br label %21
2751
275221:
2753  %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ]
2754  %23 = bitcast <8 x i16> %14 to <16 x i8>
2755  %24 = bitcast <8 x i16> %22 to <16 x i8>
2756  %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
2757  %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
2758  store <16 x i8> %26, ptr %3, align 8
2759  ret void
2760}
2761
2762define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2763; CHECK-FIX-LABEL: aesd_set16_cond_via_val:
2764; CHECK-FIX:       @ %bb.0:
2765; CHECK-FIX-NEXT:    vorr q0, q0, q0
2766; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2767; CHECK-FIX-NEXT:    cmp r0, #0
2768; CHECK-FIX-NEXT:    beq .LBB65_2
2769; CHECK-FIX-NEXT:  @ %bb.1:
2770; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
2771; CHECK-FIX-NEXT:  .LBB65_2: @ %select.end
2772; CHECK-FIX-NEXT:    cmp r0, #0
2773; CHECK-FIX-NEXT:    beq .LBB65_4
2774; CHECK-FIX-NEXT:  @ %bb.3:
2775; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
2776; CHECK-FIX-NEXT:  .LBB65_4: @ %select.end2
2777; CHECK-FIX-NEXT:    aesd.8 q8, q0
2778; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2779; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2780; CHECK-FIX-NEXT:    bx lr
2781  %5 = bitcast ptr %3 to ptr
2782  %6 = load <8 x i16>, ptr %5, align 8
2783  %7 = insertelement <8 x i16> %6, i16 %1, i64 0
2784  %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6
2785  %9 = bitcast <16 x i8> %2 to <8 x i16>
2786  %10 = insertelement <8 x i16> %9, i16 %1, i64 0
2787  %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9
2788  %12 = bitcast <8 x i16> %8 to <16 x i8>
2789  %13 = bitcast <8 x i16> %11 to <16 x i8>
2790  %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
2791  %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
2792  store <16 x i8> %15, ptr %3, align 8
2793  ret void
2794}
2795
2796define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2797; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr:
2798; CHECK-FIX:       @ %bb.0:
2799; CHECK-FIX-NEXT:    vorr q0, q0, q0
2800; CHECK-FIX-NEXT:    ldrh r1, [r1]
2801; CHECK-FIX-NEXT:    cmp r0, #0
2802; CHECK-FIX-NEXT:    strh r1, [r2]
2803; CHECK-FIX-NEXT:    bxeq lr
2804; CHECK-FIX-NEXT:  .LBB66_1:
2805; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
2806; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2807; CHECK-FIX-NEXT:  .LBB66_2: @ =>This Inner Loop Header: Depth=1
2808; CHECK-FIX-NEXT:    aesd.8 q8, q0
2809; CHECK-FIX-NEXT:    subs r0, r0, #1
2810; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2811; CHECK-FIX-NEXT:    bne .LBB66_2
2812; CHECK-FIX-NEXT:  @ %bb.3:
2813; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2814; CHECK-FIX-NEXT:    bx lr
2815  %5 = load i16, ptr %1, align 2
2816  %6 = bitcast <16 x i8> %2 to <8 x i16>
2817  %7 = insertelement <8 x i16> %6, i16 %5, i64 0
2818  %8 = bitcast <8 x i16> %7 to <16 x i8>
2819  %9 = bitcast ptr %3 to ptr
2820  store i16 %5, ptr %9, align 8
2821  %10 = icmp eq i32 %0, 0
2822  br i1 %10, label %14, label %11
2823
282411:
2825  %12 = load <16 x i8>, ptr %3, align 8
2826  br label %15
2827
282813:
2829  store <16 x i8> %19, ptr %3, align 8
2830  br label %14
2831
283214:
2833  ret void
2834
283515:
2836  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
2837  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
2838  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
2839  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2840  %20 = add nuw i32 %17, 1
2841  %21 = icmp eq i32 %20, %0
2842  br i1 %21, label %13, label %15
2843}
2844
2845define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind {
2846; CHECK-FIX-LABEL: aesd_set16_loop_via_val:
2847; CHECK-FIX:       @ %bb.0:
2848; CHECK-FIX-NEXT:    vorr q0, q0, q0
2849; CHECK-FIX-NEXT:    cmp r0, #0
2850; CHECK-FIX-NEXT:    bxeq lr
2851; CHECK-FIX-NEXT:  .LBB67_1:
2852; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
2853; CHECK-FIX-NEXT:  .LBB67_2: @ =>This Inner Loop Header: Depth=1
2854; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2855; CHECK-FIX-NEXT:    subs r0, r0, #1
2856; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
2857; CHECK-FIX-NEXT:    aesd.8 q8, q0
2858; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2859; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2860; CHECK-FIX-NEXT:    bne .LBB67_2
2861; CHECK-FIX-NEXT:  @ %bb.3:
2862; CHECK-FIX-NEXT:    bx lr
2863  %5 = icmp eq i32 %0, 0
2864  br i1 %5, label %12, label %6
2865
28666:
2867  %7 = bitcast <16 x i8> %2 to <8 x i16>
2868  %8 = insertelement <8 x i16> %7, i16 %1, i64 0
2869  %9 = bitcast <8 x i16> %8 to <16 x i8>
2870  %10 = bitcast ptr %3 to ptr
2871  %11 = bitcast ptr %3 to ptr
2872  br label %13
2873
287412:
2875  ret void
2876
287713:
2878  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
2879  %15 = load <8 x i16>, ptr %10, align 8
2880  %16 = insertelement <8 x i16> %15, i16 %1, i64 0
2881  %17 = bitcast <8 x i16> %16 to <16 x i8>
2882  store i16 %1, ptr %11, align 8
2883  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
2884  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
2885  store <16 x i8> %19, ptr %3, align 8
2886  %20 = add nuw i32 %14, 1
2887  %21 = icmp eq i32 %20, %0
2888  br i1 %21, label %12, label %13
2889}
2890
2891define arm_aapcs_vfpcc void @aesd_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
2892; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr:
2893; CHECK-FIX-NOSCHED:       @ %bb.0:
2894; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
2895; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [r0]
2896; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
2897; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d0[0], r0
2898; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r0
2899; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
2900; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
2901; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
2902; CHECK-FIX-NOSCHED-NEXT:    bx lr
2903;
2904; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr:
2905; CHECK-CORTEX-FIX:       @ %bb.0:
2906; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
2907; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2908; CHECK-CORTEX-FIX-NEXT:    ldr r0, [r0]
2909; CHECK-CORTEX-FIX-NEXT:    vmov.32 d0[0], r0
2910; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r0
2911; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
2912; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
2913; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2914; CHECK-CORTEX-FIX-NEXT:    bx lr
2915  %4 = load i32, ptr %0, align 4
2916  %5 = bitcast ptr %2 to ptr
2917  %6 = load <4 x i32>, ptr %5, align 8
2918  %7 = insertelement <4 x i32> %6, i32 %4, i64 0
2919  %8 = bitcast <4 x i32> %7 to <16 x i8>
2920  %9 = bitcast <16 x i8> %1 to <4 x i32>
2921  %10 = insertelement <4 x i32> %9, i32 %4, i64 0
2922  %11 = bitcast <4 x i32> %10 to <16 x i8>
2923  %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
2924  %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
2925  store <16 x i8> %13, ptr %2, align 8
2926  ret void
2927}
2928
2929define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind {
2930; CHECK-FIX-LABEL: aesd_set32_via_val:
2931; CHECK-FIX:       @ %bb.0:
2932; CHECK-FIX-NEXT:    vorr q0, q0, q0
2933; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
2934; CHECK-FIX-NEXT:    vmov.32 d0[0], r0
2935; CHECK-FIX-NEXT:    vmov.32 d16[0], r0
2936; CHECK-FIX-NEXT:    aesd.8 q8, q0
2937; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2938; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
2939; CHECK-FIX-NEXT:    bx lr
2940  %4 = bitcast ptr %2 to ptr
2941  %5 = load <4 x i32>, ptr %4, align 8
2942  %6 = insertelement <4 x i32> %5, i32 %0, i64 0
2943  %7 = bitcast <4 x i32> %6 to <16 x i8>
2944  %8 = bitcast <16 x i8> %1 to <4 x i32>
2945  %9 = insertelement <4 x i32> %8, i32 %0, i64 0
2946  %10 = bitcast <4 x i32> %9 to <16 x i8>
2947  %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
2948  %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
2949  store <16 x i8> %12, ptr %2, align 8
2950  ret void
2951}
2952
2953define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
2954; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr:
2955; CHECK-FIX:       @ %bb.0:
2956; CHECK-FIX-NEXT:    vorr q0, q0, q0
2957; CHECK-FIX-NEXT:    cmp r0, #0
2958; CHECK-FIX-NEXT:    beq .LBB70_2
2959; CHECK-FIX-NEXT:  @ %bb.1:
2960; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2961; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
2962; CHECK-FIX-NEXT:    cmp r0, #0
2963; CHECK-FIX-NEXT:    bne .LBB70_3
2964; CHECK-FIX-NEXT:    b .LBB70_4
2965; CHECK-FIX-NEXT:  .LBB70_2:
2966; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
2967; CHECK-FIX-NEXT:    cmp r0, #0
2968; CHECK-FIX-NEXT:    beq .LBB70_4
2969; CHECK-FIX-NEXT:  .LBB70_3:
2970; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32]
2971; CHECK-FIX-NEXT:  .LBB70_4:
2972; CHECK-FIX-NEXT:    aesd.8 q8, q0
2973; CHECK-FIX-NEXT:    aesimc.8 q8, q8
2974; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
2975; CHECK-FIX-NEXT:    bx lr
2976  br i1 %0, label %5, label %10
2977
29785:
2979  %6 = load i32, ptr %1, align 4
2980  %7 = bitcast ptr %3 to ptr
2981  %8 = load <4 x i32>, ptr %7, align 8
2982  %9 = insertelement <4 x i32> %8, i32 %6, i64 0
2983  br label %13
2984
298510:
2986  %11 = bitcast ptr %3 to ptr
2987  %12 = load <4 x i32>, ptr %11, align 8
2988  br label %13
2989
299013:
2991  %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ]
2992  br i1 %0, label %15, label %19
2993
299415:
2995  %16 = load i32, ptr %1, align 4
2996  %17 = bitcast <16 x i8> %2 to <4 x i32>
2997  %18 = insertelement <4 x i32> %17, i32 %16, i64 0
2998  br label %21
2999
300019:
3001  %20 = bitcast <16 x i8> %2 to <4 x i32>
3002  br label %21
3003
300421:
3005  %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ]
3006  %23 = bitcast <4 x i32> %14 to <16 x i8>
3007  %24 = bitcast <4 x i32> %22 to <16 x i8>
3008  %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3009  %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3010  store <16 x i8> %26, ptr %3, align 8
3011  ret void
3012}
3013
3014define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
3015; CHECK-FIX-LABEL: aesd_set32_cond_via_val:
3016; CHECK-FIX:       @ %bb.0:
3017; CHECK-FIX-NEXT:    vorr q0, q0, q0
3018; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3019; CHECK-FIX-NEXT:    cmp r0, #0
3020; CHECK-FIX-NEXT:    beq .LBB71_2
3021; CHECK-FIX-NEXT:  @ %bb.1:
3022; CHECK-FIX-NEXT:    vmov.32 d16[0], r1
3023; CHECK-FIX-NEXT:  .LBB71_2: @ %select.end
3024; CHECK-FIX-NEXT:    cmp r0, #0
3025; CHECK-FIX-NEXT:    beq .LBB71_4
3026; CHECK-FIX-NEXT:  @ %bb.3:
3027; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
3028; CHECK-FIX-NEXT:  .LBB71_4: @ %select.end2
3029; CHECK-FIX-NEXT:    aesd.8 q8, q0
3030; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3031; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3032; CHECK-FIX-NEXT:    bx lr
3033  %5 = bitcast ptr %3 to ptr
3034  %6 = load <4 x i32>, ptr %5, align 8
3035  %7 = insertelement <4 x i32> %6, i32 %1, i64 0
3036  %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6
3037  %9 = bitcast <16 x i8> %2 to <4 x i32>
3038  %10 = insertelement <4 x i32> %9, i32 %1, i64 0
3039  %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9
3040  %12 = bitcast <4 x i32> %8 to <16 x i8>
3041  %13 = bitcast <4 x i32> %11 to <16 x i8>
3042  %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3043  %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3044  store <16 x i8> %15, ptr %3, align 8
3045  ret void
3046}
3047
3048define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3049; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr:
3050; CHECK-FIX:       @ %bb.0:
3051; CHECK-FIX-NEXT:    vorr q0, q0, q0
3052; CHECK-FIX-NEXT:    ldr r1, [r1]
3053; CHECK-FIX-NEXT:    cmp r0, #0
3054; CHECK-FIX-NEXT:    str r1, [r2]
3055; CHECK-FIX-NEXT:    bxeq lr
3056; CHECK-FIX-NEXT:  .LBB72_1:
3057; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
3058; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3059; CHECK-FIX-NEXT:  .LBB72_2: @ =>This Inner Loop Header: Depth=1
3060; CHECK-FIX-NEXT:    aesd.8 q8, q0
3061; CHECK-FIX-NEXT:    subs r0, r0, #1
3062; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3063; CHECK-FIX-NEXT:    bne .LBB72_2
3064; CHECK-FIX-NEXT:  @ %bb.3:
3065; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3066; CHECK-FIX-NEXT:    bx lr
3067  %5 = load i32, ptr %1, align 4
3068  %6 = bitcast <16 x i8> %2 to <4 x i32>
3069  %7 = insertelement <4 x i32> %6, i32 %5, i64 0
3070  %8 = bitcast <4 x i32> %7 to <16 x i8>
3071  %9 = bitcast ptr %3 to ptr
3072  store i32 %5, ptr %9, align 8
3073  %10 = icmp eq i32 %0, 0
3074  br i1 %10, label %14, label %11
3075
307611:
3077  %12 = load <16 x i8>, ptr %3, align 8
3078  br label %15
3079
308013:
3081  store <16 x i8> %19, ptr %3, align 8
3082  br label %14
3083
308414:
3085  ret void
3086
308715:
3088  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3089  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3090  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3091  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3092  %20 = add nuw i32 %17, 1
3093  %21 = icmp eq i32 %20, %0
3094  br i1 %21, label %13, label %15
3095}
3096
3097define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind {
3098; CHECK-FIX-LABEL: aesd_set32_loop_via_val:
3099; CHECK-FIX:       @ %bb.0:
3100; CHECK-FIX-NEXT:    vorr q0, q0, q0
3101; CHECK-FIX-NEXT:    cmp r0, #0
3102; CHECK-FIX-NEXT:    bxeq lr
3103; CHECK-FIX-NEXT:  .LBB73_1:
3104; CHECK-FIX-NEXT:    vmov.32 d0[0], r1
3105; CHECK-FIX-NEXT:  .LBB73_2: @ =>This Inner Loop Header: Depth=1
3106; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3107; CHECK-FIX-NEXT:    subs r0, r0, #1
3108; CHECK-FIX-NEXT:    vmov.32 d16[0], r1
3109; CHECK-FIX-NEXT:    aesd.8 q8, q0
3110; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3111; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3112; CHECK-FIX-NEXT:    bne .LBB73_2
3113; CHECK-FIX-NEXT:  @ %bb.3:
3114; CHECK-FIX-NEXT:    bx lr
3115  %5 = icmp eq i32 %0, 0
3116  br i1 %5, label %12, label %6
3117
31186:
3119  %7 = bitcast <16 x i8> %2 to <4 x i32>
3120  %8 = insertelement <4 x i32> %7, i32 %1, i64 0
3121  %9 = bitcast <4 x i32> %8 to <16 x i8>
3122  %10 = bitcast ptr %3 to ptr
3123  %11 = bitcast ptr %3 to ptr
3124  br label %13
3125
312612:
3127  ret void
3128
312913:
3130  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3131  %15 = load <4 x i32>, ptr %10, align 8
3132  %16 = insertelement <4 x i32> %15, i32 %1, i64 0
3133  %17 = bitcast <4 x i32> %16 to <16 x i8>
3134  store i32 %1, ptr %11, align 8
3135  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3136  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3137  store <16 x i8> %19, ptr %3, align 8
3138  %20 = add nuw i32 %14, 1
3139  %21 = icmp eq i32 %20, %0
3140  br i1 %21, label %12, label %13
3141}
3142
3143define arm_aapcs_vfpcc void @aesd_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
3144; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr:
3145; CHECK-FIX-NOSCHED:       @ %bb.0:
3146; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
3147; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
3148; CHECK-FIX-NOSCHED-NEXT:    vldr d0, [r0]
3149; CHECK-FIX-NOSCHED-NEXT:    vorr d16, d0, d0
3150; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
3151; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3152; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
3153; CHECK-FIX-NOSCHED-NEXT:    bx lr
3154;
3155; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr:
3156; CHECK-CORTEX-FIX:       @ %bb.0:
3157; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
3158; CHECK-CORTEX-FIX-NEXT:    vldr d0, [r0]
3159; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
3160; CHECK-CORTEX-FIX-NEXT:    vorr d16, d0, d0
3161; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
3162; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
3163; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
3164; CHECK-CORTEX-FIX-NEXT:    bx lr
3165  %4 = load i64, ptr %0, align 8
3166  %5 = bitcast ptr %2 to ptr
3167  %6 = load <2 x i64>, ptr %5, align 8
3168  %7 = insertelement <2 x i64> %6, i64 %4, i64 0
3169  %8 = bitcast <2 x i64> %7 to <16 x i8>
3170  %9 = bitcast <16 x i8> %1 to <2 x i64>
3171  %10 = insertelement <2 x i64> %9, i64 %4, i64 0
3172  %11 = bitcast <2 x i64> %10 to <16 x i8>
3173  %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3174  %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3175  store <16 x i8> %13, ptr %2, align 8
3176  ret void
3177}
3178
3179define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind {
3180; CHECK-FIX-LABEL: aesd_set64_via_val:
3181; CHECK-FIX:       @ %bb.0:
3182; CHECK-FIX-NEXT:    vorr q0, q0, q0
3183; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3184; CHECK-FIX-NEXT:    vmov.32 d0[0], r0
3185; CHECK-FIX-NEXT:    vmov.32 d16[0], r0
3186; CHECK-FIX-NEXT:    vmov.32 d0[1], r1
3187; CHECK-FIX-NEXT:    vmov.32 d16[1], r1
3188; CHECK-FIX-NEXT:    aesd.8 q8, q0
3189; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3190; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3191; CHECK-FIX-NEXT:    bx lr
3192  %4 = bitcast ptr %2 to ptr
3193  %5 = load <2 x i64>, ptr %4, align 8
3194  %6 = insertelement <2 x i64> %5, i64 %0, i64 0
3195  %7 = bitcast <2 x i64> %6 to <16 x i8>
3196  %8 = bitcast <16 x i8> %1 to <2 x i64>
3197  %9 = insertelement <2 x i64> %8, i64 %0, i64 0
3198  %10 = bitcast <2 x i64> %9 to <16 x i8>
3199  %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
3200  %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
3201  store <16 x i8> %12, ptr %2, align 8
3202  ret void
3203}
3204
3205define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3206; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr:
3207; CHECK-FIX-NOSCHED:       @ %bb.0:
3208; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3209; CHECK-FIX-NOSCHED-NEXT:    beq .LBB76_2
3210; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
3211; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
3212; CHECK-FIX-NOSCHED-NEXT:    vldr d16, [r1]
3213; CHECK-FIX-NOSCHED-NEXT:    b .LBB76_3
3214; CHECK-FIX-NOSCHED-NEXT:  .LBB76_2:
3215; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
3216; CHECK-FIX-NOSCHED-NEXT:  .LBB76_3:
3217; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3218; CHECK-FIX-NOSCHED-NEXT:    vldrne d0, [r1]
3219; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
3220; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
3221; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3222; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
3223; CHECK-FIX-NOSCHED-NEXT:    bx lr
3224;
3225; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr:
3226; CHECK-CORTEX-FIX:       @ %bb.0:
3227; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3228; CHECK-CORTEX-FIX-NEXT:    beq .LBB76_2
3229; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
3230; CHECK-CORTEX-FIX-NEXT:    vldr d18, [r1]
3231; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3232; CHECK-CORTEX-FIX-NEXT:    vorr d16, d18, d18
3233; CHECK-CORTEX-FIX-NEXT:    b .LBB76_3
3234; CHECK-CORTEX-FIX-NEXT:  .LBB76_2:
3235; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3236; CHECK-CORTEX-FIX-NEXT:  .LBB76_3:
3237; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3238; CHECK-CORTEX-FIX-NEXT:    vldrne d0, [r1]
3239; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
3240; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
3241; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
3242; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3243; CHECK-CORTEX-FIX-NEXT:    bx lr
3244  br i1 %0, label %5, label %10
3245
32465:
3247  %6 = load i64, ptr %1, align 8
3248  %7 = bitcast ptr %3 to ptr
3249  %8 = load <2 x i64>, ptr %7, align 8
3250  %9 = insertelement <2 x i64> %8, i64 %6, i64 0
3251  br label %13
3252
325310:
3254  %11 = bitcast ptr %3 to ptr
3255  %12 = load <2 x i64>, ptr %11, align 8
3256  br label %13
3257
325813:
3259  %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ]
3260  br i1 %0, label %15, label %19
3261
326215:
3263  %16 = load i64, ptr %1, align 8
3264  %17 = bitcast <16 x i8> %2 to <2 x i64>
3265  %18 = insertelement <2 x i64> %17, i64 %16, i64 0
3266  br label %21
3267
326819:
3269  %20 = bitcast <16 x i8> %2 to <2 x i64>
3270  br label %21
3271
327221:
3273  %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ]
3274  %23 = bitcast <2 x i64> %14 to <16 x i8>
3275  %24 = bitcast <2 x i64> %22 to <16 x i8>
3276  %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
3277  %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
3278  store <16 x i8> %26, ptr %3, align 8
3279  ret void
3280}
3281
3282define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
3283; CHECK-FIX-LABEL: aesd_set64_cond_via_val:
3284; CHECK-FIX:       @ %bb.0:
3285; CHECK-FIX-NEXT:    vorr q0, q0, q0
3286; CHECK-FIX-NEXT:    ldr r1, [sp]
3287; CHECK-FIX-NEXT:    cmp r0, #0
3288; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
3289; CHECK-FIX-NEXT:    beq .LBB77_2
3290; CHECK-FIX-NEXT:  @ %bb.1:
3291; CHECK-FIX-NEXT:    vmov.32 d16[0], r2
3292; CHECK-FIX-NEXT:    vmov.32 d16[1], r3
3293; CHECK-FIX-NEXT:  .LBB77_2: @ %select.end
3294; CHECK-FIX-NEXT:    cmp r0, #0
3295; CHECK-FIX-NEXT:    beq .LBB77_4
3296; CHECK-FIX-NEXT:  @ %bb.3:
3297; CHECK-FIX-NEXT:    vmov.32 d0[0], r2
3298; CHECK-FIX-NEXT:    vmov.32 d0[1], r3
3299; CHECK-FIX-NEXT:  .LBB77_4: @ %select.end2
3300; CHECK-FIX-NEXT:    aesd.8 q8, q0
3301; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3302; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
3303; CHECK-FIX-NEXT:    bx lr
3304  %5 = bitcast ptr %3 to ptr
3305  %6 = load <2 x i64>, ptr %5, align 8
3306  %7 = insertelement <2 x i64> %6, i64 %1, i64 0
3307  %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6
3308  %9 = bitcast <16 x i8> %2 to <2 x i64>
3309  %10 = insertelement <2 x i64> %9, i64 %1, i64 0
3310  %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9
3311  %12 = bitcast <2 x i64> %8 to <16 x i8>
3312  %13 = bitcast <2 x i64> %11 to <16 x i8>
3313  %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
3314  %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
3315  store <16 x i8> %15, ptr %3, align 8
3316  ret void
3317}
3318
3319define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3320; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr:
3321; CHECK-FIX-NOSCHED:       @ %bb.0:
3322; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
3323; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r11, lr}
3324; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r11, lr}
3325; CHECK-FIX-NOSCHED-NEXT:    ldrd r4, r5, [r1]
3326; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3327; CHECK-FIX-NOSCHED-NEXT:    strd r4, r5, [r2]
3328; CHECK-FIX-NOSCHED-NEXT:    beq .LBB78_4
3329; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
3330; CHECK-FIX-NOSCHED-NEXT:    vmov d0, r4, r5
3331; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
3332; CHECK-FIX-NOSCHED-NEXT:  .LBB78_2: @ =>This Inner Loop Header: Depth=1
3333; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
3334; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
3335; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3336; CHECK-FIX-NOSCHED-NEXT:    bne .LBB78_2
3337; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
3338; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
3339; CHECK-FIX-NOSCHED-NEXT:  .LBB78_4:
3340; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r11, pc}
3341;
3342; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr:
3343; CHECK-CORTEX-FIX:       @ %bb.0:
3344; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
3345; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r11, lr}
3346; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r11, lr}
3347; CHECK-CORTEX-FIX-NEXT:    ldrd r4, r5, [r1]
3348; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3349; CHECK-CORTEX-FIX-NEXT:    strd r4, r5, [r2]
3350; CHECK-CORTEX-FIX-NEXT:    popeq {r4, r5, r11, pc}
3351; CHECK-CORTEX-FIX-NEXT:  .LBB78_1:
3352; CHECK-CORTEX-FIX-NEXT:    vmov d0, r4, r5
3353; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3354; CHECK-CORTEX-FIX-NEXT:  .LBB78_2: @ =>This Inner Loop Header: Depth=1
3355; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
3356; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
3357; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
3358; CHECK-CORTEX-FIX-NEXT:    bne .LBB78_2
3359; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
3360; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3361; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r11, pc}
3362  %5 = load i64, ptr %1, align 8
3363  %6 = bitcast <16 x i8> %2 to <2 x i64>
3364  %7 = insertelement <2 x i64> %6, i64 %5, i64 0
3365  %8 = bitcast <2 x i64> %7 to <16 x i8>
3366  %9 = bitcast ptr %3 to ptr
3367  store i64 %5, ptr %9, align 8
3368  %10 = icmp eq i32 %0, 0
3369  br i1 %10, label %14, label %11
3370
337111:
3372  %12 = load <16 x i8>, ptr %3, align 8
3373  br label %15
3374
337513:
3376  store <16 x i8> %19, ptr %3, align 8
3377  br label %14
3378
337914:
3380  ret void
3381
338215:
3383  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
3384  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
3385  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
3386  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3387  %20 = add nuw i32 %17, 1
3388  %21 = icmp eq i32 %20, %0
3389  br i1 %21, label %13, label %15
3390}
3391
3392define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind {
3393; CHECK-FIX-LABEL: aesd_set64_loop_via_val:
3394; CHECK-FIX:       @ %bb.0:
3395; CHECK-FIX-NEXT:    vorr q0, q0, q0
3396; CHECK-FIX-NEXT:    cmp r0, #0
3397; CHECK-FIX-NEXT:    bxeq lr
3398; CHECK-FIX-NEXT:  .LBB79_1:
3399; CHECK-FIX-NEXT:    vmov.32 d0[0], r2
3400; CHECK-FIX-NEXT:    ldr r1, [sp]
3401; CHECK-FIX-NEXT:    vmov.32 d0[1], r3
3402; CHECK-FIX-NEXT:  .LBB79_2: @ =>This Inner Loop Header: Depth=1
3403; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
3404; CHECK-FIX-NEXT:    subs r0, r0, #1
3405; CHECK-FIX-NEXT:    vmov.32 d16[0], r2
3406; CHECK-FIX-NEXT:    vmov.32 d16[1], r3
3407; CHECK-FIX-NEXT:    aesd.8 q8, q0
3408; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3409; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
3410; CHECK-FIX-NEXT:    bne .LBB79_2
3411; CHECK-FIX-NEXT:  @ %bb.3:
3412; CHECK-FIX-NEXT:    bx lr
3413  %5 = icmp eq i32 %0, 0
3414  br i1 %5, label %12, label %6
3415
34166:
3417  %7 = bitcast <16 x i8> %2 to <2 x i64>
3418  %8 = insertelement <2 x i64> %7, i64 %1, i64 0
3419  %9 = bitcast <2 x i64> %8 to <16 x i8>
3420  %10 = bitcast ptr %3 to ptr
3421  %11 = bitcast ptr %3 to ptr
3422  br label %13
3423
342412:
3425  ret void
3426
342713:
3428  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
3429  %15 = load <2 x i64>, ptr %10, align 8
3430  %16 = insertelement <2 x i64> %15, i64 %1, i64 0
3431  %17 = bitcast <2 x i64> %16 to <16 x i8>
3432  store i64 %1, ptr %11, align 8
3433  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3434  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
3435  store <16 x i8> %19, ptr %3, align 8
3436  %20 = add nuw i32 %14, 1
3437  %21 = icmp eq i32 %20, %0
3438  br i1 %21, label %12, label %13
3439}
3440
3441define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
3442; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr:
3443; CHECK-FIX-NOSCHED:       @ %bb.0:
3444; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
3445; CHECK-FIX-NOSCHED-NEXT:    ldrh r0, [r0]
3446; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
3447; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d0[0], r0
3448; CHECK-FIX-NOSCHED-NEXT:    vmov.16 d16[0], r0
3449; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
3450; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3451; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
3452; CHECK-FIX-NOSCHED-NEXT:    bx lr
3453;
3454; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr:
3455; CHECK-CORTEX-FIX:       @ %bb.0:
3456; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
3457; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
3458; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r0]
3459; CHECK-CORTEX-FIX-NEXT:    vmov.16 d0[0], r0
3460; CHECK-CORTEX-FIX-NEXT:    vmov.16 d16[0], r0
3461; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
3462; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
3463; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
3464; CHECK-CORTEX-FIX-NEXT:    bx lr
3465  %4 = bitcast ptr %0 to ptr
3466  %5 = load i16, ptr %4, align 2
3467  %6 = bitcast ptr %2 to ptr
3468  %7 = load <8 x i16>, ptr %6, align 8
3469  %8 = insertelement <8 x i16> %7, i16 %5, i64 0
3470  %9 = bitcast <8 x i16> %8 to <16 x i8>
3471  %10 = bitcast <16 x i8> %1 to <8 x i16>
3472  %11 = insertelement <8 x i16> %10, i16 %5, i64 0
3473  %12 = bitcast <8 x i16> %11 to <16 x i8>
3474  %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12)
3475  %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13)
3476  store <16 x i8> %14, ptr %2, align 8
3477  ret void
3478}
3479
3480define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind {
3481; CHECK-FIX-LABEL: aesd_setf16_via_val:
3482; CHECK-FIX:       @ %bb.0:
3483; CHECK-FIX-NEXT:    vorr q1, q1, q1
3484; CHECK-FIX-NEXT:    vmov r1, s0
3485; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
3486; CHECK-FIX-NEXT:    vmov.16 d2[0], r1
3487; CHECK-FIX-NEXT:    vmov.16 d16[0], r1
3488; CHECK-FIX-NEXT:    aesd.8 q8, q1
3489; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3490; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
3491; CHECK-FIX-NEXT:    bx lr
3492  %4 = bitcast ptr %2 to ptr
3493  %5 = load <8 x i16>, ptr %4, align 8
3494  %6 = bitcast half %0 to i16
3495  %7 = insertelement <8 x i16> %5, i16 %6, i64 0
3496  %8 = bitcast <8 x i16> %7 to <16 x i8>
3497  %9 = bitcast <16 x i8> %1 to <8 x i16>
3498  %10 = insertelement <8 x i16> %9, i16 %6, i64 0
3499  %11 = bitcast <8 x i16> %10 to <16 x i8>
3500  %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
3501  %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
3502  store <16 x i8> %13, ptr %2, align 8
3503  ret void
3504}
3505
3506define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3507; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr:
3508; CHECK-FIX-NOSCHED:       @ %bb.0:
3509; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3510; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3511; CHECK-FIX-NOSCHED-NEXT:    .pad #12
3512; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #12
3513; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3514; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_3
3515; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
3516; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
3517; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[3]
3518; CHECK-FIX-NOSCHED-NEXT:    ldrh r7, [r1]
3519; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d17[0]
3520; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d16[3]
3521; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
3522; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
3523; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[2]
3524; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
3525; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[1]
3526; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
3527; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d16[1]
3528; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3529; CHECK-FIX-NOSCHED-NEXT:    bne .LBB82_4
3530; CHECK-FIX-NOSCHED-NEXT:  .LBB82_2:
3531; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r0, d1[3]
3532; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d1[2]
3533; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d1[1]
3534; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r6, d1[0]
3535; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d0[3]
3536; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d0[2]
3537; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d0[1]
3538; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r1, d0[0]
3539; CHECK-FIX-NOSCHED-NEXT:    b .LBB82_5
3540; CHECK-FIX-NOSCHED-NEXT:  .LBB82_3:
3541; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #8
3542; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[0]}, [r2:32]
3543; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[0]}, [r3:32]
3544; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #4
3545; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[1]}, [r3:32]
3546; CHECK-FIX-NOSCHED-NEXT:    add r3, r2, #12
3547; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[1]}, [r3:32]
3548; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[3]
3549; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d17[0]
3550; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d16[3]
3551; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
3552; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d16[0]
3553; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #8] @ 4-byte Spill
3554; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[2]
3555; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp, #4] @ 4-byte Spill
3556; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d17[1]
3557; CHECK-FIX-NOSCHED-NEXT:    str r3, [sp] @ 4-byte Spill
3558; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d16[1]
3559; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3560; CHECK-FIX-NOSCHED-NEXT:    beq .LBB82_2
3561; CHECK-FIX-NOSCHED-NEXT:  .LBB82_4:
3562; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r0, d1[3]
3563; CHECK-FIX-NOSCHED-NEXT:    ldrh r1, [r1]
3564; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d1[2]
3565; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d1[1]
3566; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r6, d1[0]
3567; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d0[3]
3568; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d0[2]
3569; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d0[1]
3570; CHECK-FIX-NOSCHED-NEXT:  .LBB82_5:
3571; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r1, r8, lsl #16
3572; CHECK-FIX-NOSCHED-NEXT:    pkhbt r3, r7, r3, lsl #16
3573; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r4, r0, lsl #16
3574; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r1
3575; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, lr, r12, lsl #16
3576; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r3
3577; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r1
3578; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r11, r10, lsl #16
3579; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r1
3580; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r6, r5, lsl #16
3581; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r1
3582; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp] @ 4-byte Reload
3583; CHECK-FIX-NOSCHED-NEXT:    pkhbt r1, r9, r1, lsl #16
3584; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
3585; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
3586; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r1
3587; CHECK-FIX-NOSCHED-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
3588; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r1, r0, lsl #16
3589; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
3590; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9
3591; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3592; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
3593; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #12
3594; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3595;
3596; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr:
3597; CHECK-CORTEX-FIX:       @ %bb.0:
3598; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3599; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3600; CHECK-CORTEX-FIX-NEXT:    .pad #24
3601; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #24
3602; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3603; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_2
3604; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
3605; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3606; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
3607; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[0]
3608; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
3609; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
3610; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
3611; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
3612; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
3613; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
3614; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
3615; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #12] @ 4-byte Spill
3616; CHECK-CORTEX-FIX-NEXT:    ldrh r3, [r1]
3617; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
3618; CHECK-CORTEX-FIX-NEXT:    mov r3, r6
3619; CHECK-CORTEX-FIX-NEXT:    b .LBB82_3
3620; CHECK-CORTEX-FIX-NEXT:  .LBB82_2:
3621; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #8
3622; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r2:32]
3623; CHECK-CORTEX-FIX-NEXT:    add r7, r2, #4
3624; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r3:32]
3625; CHECK-CORTEX-FIX-NEXT:    add r3, r2, #12
3626; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r7:32]
3627; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r3:32]
3628; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[0]
3629; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #16] @ 4-byte Spill
3630; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
3631; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[1]
3632; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #20] @ 4-byte Spill
3633; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[2]
3634; CHECK-CORTEX-FIX-NEXT:    str r7, [sp, #12] @ 4-byte Spill
3635; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d17[2]
3636; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
3637; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[3]
3638; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
3639; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
3640; CHECK-CORTEX-FIX-NEXT:  .LBB82_3:
3641; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d17[3]
3642; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3643; CHECK-CORTEX-FIX-NEXT:    beq .LBB82_5
3644; CHECK-CORTEX-FIX-NEXT:  @ %bb.4:
3645; CHECK-CORTEX-FIX-NEXT:    ldrh r0, [r1]
3646; CHECK-CORTEX-FIX-NEXT:    b .LBB82_6
3647; CHECK-CORTEX-FIX-NEXT:  .LBB82_5:
3648; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r0, d0[0]
3649; CHECK-CORTEX-FIX-NEXT:  .LBB82_6:
3650; CHECK-CORTEX-FIX-NEXT:    str r0, [sp] @ 4-byte Spill
3651; CHECK-CORTEX-FIX-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
3652; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
3653; CHECK-CORTEX-FIX-NEXT:    pkhbt r9, r7, r4, lsl #16
3654; CHECK-CORTEX-FIX-NEXT:    ldr r4, [sp, #20] @ 4-byte Reload
3655; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d0[1]
3656; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d0[2]
3657; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d0[3]
3658; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d1[0]
3659; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d1[1]
3660; CHECK-CORTEX-FIX-NEXT:    vmov.u16 lr, d1[2]
3661; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d1[3]
3662; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r0, r1, lsl #16
3663; CHECK-CORTEX-FIX-NEXT:    ldr r1, [sp, #12] @ 4-byte Reload
3664; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, lr, r8, lsl #16
3665; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r12, lsl #16
3666; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r11, r5, lsl #16
3667; CHECK-CORTEX-FIX-NEXT:    pkhbt r1, r3, r1, lsl #16
3668; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp, #16] @ 4-byte Reload
3669; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r3, r4, lsl #16
3670; CHECK-CORTEX-FIX-NEXT:    ldr r3, [sp] @ 4-byte Reload
3671; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r4
3672; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r1
3673; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r7
3674; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], r9
3675; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r10, lsl #16
3676; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r3
3677; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r5
3678; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r6
3679; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r0
3680; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8
3681; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9
3682; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3683; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #24
3684; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3685  br i1 %0, label %5, label %12
3686
36875:
3688  %6 = bitcast ptr %1 to ptr
3689  %7 = load i16, ptr %6, align 2
3690  %8 = bitcast ptr %3 to ptr
3691  %9 = load <8 x i16>, ptr %8, align 8
3692  %10 = insertelement <8 x i16> %9, i16 %7, i64 0
3693  %11 = bitcast <8 x i16> %10 to <8 x half>
3694  br label %15
3695
369612:
3697  %13 = bitcast ptr %3 to ptr
3698  %14 = load <8 x half>, ptr %13, align 8
3699  br label %15
3700
370115:
3702  %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ]
3703  br i1 %0, label %17, label %23
3704
370517:
3706  %18 = bitcast ptr %1 to ptr
3707  %19 = load i16, ptr %18, align 2
3708  %20 = bitcast <16 x i8> %2 to <8 x i16>
3709  %21 = insertelement <8 x i16> %20, i16 %19, i64 0
3710  %22 = bitcast <8 x i16> %21 to <8 x half>
3711  br label %25
3712
371323:
3714  %24 = bitcast <16 x i8> %2 to <8 x half>
3715  br label %25
3716
371725:
3718  %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ]
3719  %27 = bitcast <8 x half> %16 to <16 x i8>
3720  %28 = bitcast <8 x half> %26 to <16 x i8>
3721  %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28)
3722  %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29)
3723  store <16 x i8> %30, ptr %3, align 8
3724  ret void
3725}
3726
3727define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind {
3728; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val:
3729; CHECK-FIX-NOSCHED:       @ %bb.0:
3730; CHECK-FIX-NOSCHED-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3731; CHECK-FIX-NOSCHED-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3732; CHECK-FIX-NOSCHED-NEXT:    .pad #12
3733; CHECK-FIX-NOSCHED-NEXT:    sub sp, sp, #12
3734; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3735; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_2
3736; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
3737; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r1]
3738; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s2, s0
3739; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[1]
3740; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d17[3]
3741; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d17[2]
3742; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
3743; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d16[1]
3744; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
3745; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[0]
3746; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #4] @ 4-byte Spill
3747; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[3]
3748; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp] @ 4-byte Spill
3749; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_3
3750; CHECK-FIX-NOSCHED-NEXT:  .LBB83_2:
3751; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #8
3752; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[0]}, [r1:32]
3753; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[0]}, [r2:32]
3754; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #4
3755; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d16[1]}, [r2:32]
3756; CHECK-FIX-NOSCHED-NEXT:    add r2, r1, #12
3757; CHECK-FIX-NOSCHED-NEXT:    vld1.32 {d17[1]}, [r2:32]
3758; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[1]
3759; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r7, d17[3]
3760; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d17[2]
3761; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r11, d16[2]
3762; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r8, d16[1]
3763; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #8] @ 4-byte Spill
3764; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d17[0]
3765; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp, #4] @ 4-byte Spill
3766; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[3]
3767; CHECK-FIX-NOSCHED-NEXT:    str r2, [sp] @ 4-byte Spill
3768; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d16[0]
3769; CHECK-FIX-NOSCHED-NEXT:    vmov s2, r2
3770; CHECK-FIX-NOSCHED-NEXT:  .LBB83_3:
3771; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r9, d3[3]
3772; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
3773; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r10, d3[2]
3774; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r2, d3[1]
3775; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r3, d3[0]
3776; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r4, d2[3]
3777; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r5, d2[2]
3778; CHECK-FIX-NOSCHED-NEXT:    beq .LBB83_5
3779; CHECK-FIX-NOSCHED-NEXT:  @ %bb.4:
3780; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d2[1]
3781; CHECK-FIX-NOSCHED-NEXT:    b .LBB83_6
3782; CHECK-FIX-NOSCHED-NEXT:  .LBB83_5:
3783; CHECK-FIX-NOSCHED-NEXT:    mov r0, lr
3784; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 lr, d2[0]
3785; CHECK-FIX-NOSCHED-NEXT:    vmov.u16 r12, d2[1]
3786; CHECK-FIX-NOSCHED-NEXT:    vmov s0, lr
3787; CHECK-FIX-NOSCHED-NEXT:    mov lr, r0
3788; CHECK-FIX-NOSCHED-NEXT:  .LBB83_6:
3789; CHECK-FIX-NOSCHED-NEXT:    vmov r0, s0
3790; CHECK-FIX-NOSCHED-NEXT:    vmov r6, s2
3791; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r0, r12, lsl #16
3792; CHECK-FIX-NOSCHED-NEXT:    pkhbt r6, r6, r8, lsl #16
3793; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[0], r0
3794; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r5, r4, lsl #16
3795; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[0], r6
3796; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d18[1], r0
3797; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp] @ 4-byte Reload
3798; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r11, r0, lsl #16
3799; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d16[1], r0
3800; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r3, r2, lsl #16
3801; CHECK-FIX-NOSCHED-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
3802; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[0], r0
3803; CHECK-FIX-NOSCHED-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
3804; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r2, r0, lsl #16
3805; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[0], r0
3806; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, r10, r9, lsl #16
3807; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d19[1], r0
3808; CHECK-FIX-NOSCHED-NEXT:    pkhbt r0, lr, r7, lsl #16
3809; CHECK-FIX-NOSCHED-NEXT:    vmov.32 d17[1], r0
3810; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q9
3811; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
3812; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
3813; CHECK-FIX-NOSCHED-NEXT:    add sp, sp, #12
3814; CHECK-FIX-NOSCHED-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3815;
3816; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val:
3817; CHECK-CORTEX-FIX:       @ %bb.0:
3818; CHECK-CORTEX-FIX-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3819; CHECK-CORTEX-FIX-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
3820; CHECK-CORTEX-FIX-NEXT:    .pad #12
3821; CHECK-CORTEX-FIX-NEXT:    sub sp, sp, #12
3822; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3823; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_3
3824; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
3825; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
3826; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s2, s0
3827; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[1]
3828; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
3829; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
3830; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
3831; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
3832; CHECK-CORTEX-FIX-NEXT:    str r2, [sp, #8] @ 4-byte Spill
3833; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d17[0]
3834; CHECK-CORTEX-FIX-NEXT:    str r2, [sp, #4] @ 4-byte Spill
3835; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d17[1]
3836; CHECK-CORTEX-FIX-NEXT:    str r2, [sp] @ 4-byte Spill
3837; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3838; CHECK-CORTEX-FIX-NEXT:    bne .LBB83_4
3839; CHECK-CORTEX-FIX-NEXT:  .LBB83_2:
3840; CHECK-CORTEX-FIX-NEXT:    vmov.u16 lr, d2[0]
3841; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d2[1]
3842; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d2[2]
3843; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d2[3]
3844; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r9, d3[0]
3845; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d3[1]
3846; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d3[2]
3847; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d3[3]
3848; CHECK-CORTEX-FIX-NEXT:    vmov s0, lr
3849; CHECK-CORTEX-FIX-NEXT:    b .LBB83_5
3850; CHECK-CORTEX-FIX-NEXT:  .LBB83_3:
3851; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #8
3852; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
3853; CHECK-CORTEX-FIX-NEXT:    add r3, r1, #4
3854; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[0]}, [r2:32]
3855; CHECK-CORTEX-FIX-NEXT:    add r2, r1, #12
3856; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d16[1]}, [r3:32]
3857; CHECK-CORTEX-FIX-NEXT:    vld1.32 {d17[1]}, [r2:32]
3858; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d16[1]
3859; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d16[0]
3860; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r7, d16[2]
3861; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r10, d16[3]
3862; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #8] @ 4-byte Spill
3863; CHECK-CORTEX-FIX-NEXT:    vmov s2, r2
3864; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[0]
3865; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r11, d17[2]
3866; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r6, d17[3]
3867; CHECK-CORTEX-FIX-NEXT:    str r3, [sp, #4] @ 4-byte Spill
3868; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d17[1]
3869; CHECK-CORTEX-FIX-NEXT:    str r3, [sp] @ 4-byte Spill
3870; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
3871; CHECK-CORTEX-FIX-NEXT:    beq .LBB83_2
3872; CHECK-CORTEX-FIX-NEXT:  .LBB83_4:
3873; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r8, d2[1]
3874; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r3, d2[2]
3875; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r4, d2[3]
3876; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r9, d3[0]
3877; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r2, d3[1]
3878; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r5, d3[2]
3879; CHECK-CORTEX-FIX-NEXT:    vmov.u16 r12, d3[3]
3880; CHECK-CORTEX-FIX-NEXT:  .LBB83_5:
3881; CHECK-CORTEX-FIX-NEXT:    pkhbt lr, r11, r6, lsl #16
3882; CHECK-CORTEX-FIX-NEXT:    pkhbt r0, r7, r10, lsl #16
3883; CHECK-CORTEX-FIX-NEXT:    ldm sp, {r6, r7} @ 8-byte Folded Reload
3884; CHECK-CORTEX-FIX-NEXT:    pkhbt r3, r3, r4, lsl #16
3885; CHECK-CORTEX-FIX-NEXT:    pkhbt r5, r5, r12, lsl #16
3886; CHECK-CORTEX-FIX-NEXT:    pkhbt r2, r9, r2, lsl #16
3887; CHECK-CORTEX-FIX-NEXT:    pkhbt r4, r7, r6, lsl #16
3888; CHECK-CORTEX-FIX-NEXT:    vmov r7, s2
3889; CHECK-CORTEX-FIX-NEXT:    ldr r6, [sp, #8] @ 4-byte Reload
3890; CHECK-CORTEX-FIX-NEXT:    pkhbt r7, r7, r6, lsl #16
3891; CHECK-CORTEX-FIX-NEXT:    vmov r6, s0
3892; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[0], r7
3893; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[0], r4
3894; CHECK-CORTEX-FIX-NEXT:    vmov.32 d18[1], r0
3895; CHECK-CORTEX-FIX-NEXT:    vmov.32 d19[1], lr
3896; CHECK-CORTEX-FIX-NEXT:    pkhbt r6, r6, r8, lsl #16
3897; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[0], r6
3898; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[0], r2
3899; CHECK-CORTEX-FIX-NEXT:    vmov.32 d16[1], r3
3900; CHECK-CORTEX-FIX-NEXT:    vmov.32 d17[1], r5
3901; CHECK-CORTEX-FIX-NEXT:    aesd.8 q9, q8
3902; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q9
3903; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
3904; CHECK-CORTEX-FIX-NEXT:    add sp, sp, #12
3905; CHECK-CORTEX-FIX-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
3906  br i1 %0, label %5, label %11
3907
39085:
3909  %6 = bitcast ptr %3 to ptr
3910  %7 = load <8 x i16>, ptr %6, align 8
3911  %8 = bitcast half %1 to i16
3912  %9 = insertelement <8 x i16> %7, i16 %8, i64 0
3913  %10 = bitcast <8 x i16> %9 to <8 x half>
3914  br label %14
3915
391611:
3917  %12 = bitcast ptr %3 to ptr
3918  %13 = load <8 x half>, ptr %12, align 8
3919  br label %14
3920
392114:
3922  %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ]
3923  br i1 %0, label %16, label %21
3924
392516:
3926  %17 = bitcast <16 x i8> %2 to <8 x i16>
3927  %18 = bitcast half %1 to i16
3928  %19 = insertelement <8 x i16> %17, i16 %18, i64 0
3929  %20 = bitcast <8 x i16> %19 to <8 x half>
3930  br label %23
3931
393221:
3933  %22 = bitcast <16 x i8> %2 to <8 x half>
3934  br label %23
3935
393623:
3937  %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ]
3938  %25 = bitcast <8 x half> %15 to <16 x i8>
3939  %26 = bitcast <8 x half> %24 to <16 x i8>
3940  %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26)
3941  %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27)
3942  store <16 x i8> %28, ptr %3, align 8
3943  ret void
3944}
3945
3946define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
3947; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr:
3948; CHECK-FIX:       @ %bb.0:
3949; CHECK-FIX-NEXT:    vorr q0, q0, q0
3950; CHECK-FIX-NEXT:    ldrh r1, [r1]
3951; CHECK-FIX-NEXT:    cmp r0, #0
3952; CHECK-FIX-NEXT:    strh r1, [r2]
3953; CHECK-FIX-NEXT:    bxeq lr
3954; CHECK-FIX-NEXT:  .LBB84_1:
3955; CHECK-FIX-NEXT:    vmov.16 d0[0], r1
3956; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
3957; CHECK-FIX-NEXT:  .LBB84_2: @ =>This Inner Loop Header: Depth=1
3958; CHECK-FIX-NEXT:    aesd.8 q8, q0
3959; CHECK-FIX-NEXT:    subs r0, r0, #1
3960; CHECK-FIX-NEXT:    aesimc.8 q8, q8
3961; CHECK-FIX-NEXT:    bne .LBB84_2
3962; CHECK-FIX-NEXT:  @ %bb.3:
3963; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
3964; CHECK-FIX-NEXT:    bx lr
3965  %5 = bitcast ptr %1 to ptr
3966  %6 = load i16, ptr %5, align 2
3967  %7 = bitcast <16 x i8> %2 to <8 x i16>
3968  %8 = insertelement <8 x i16> %7, i16 %6, i64 0
3969  %9 = bitcast <8 x i16> %8 to <16 x i8>
3970  %10 = bitcast ptr %3 to ptr
3971  store i16 %6, ptr %10, align 8
3972  %11 = icmp eq i32 %0, 0
3973  br i1 %11, label %15, label %12
3974
397512:
3976  %13 = load <16 x i8>, ptr %3, align 8
3977  br label %16
3978
397914:
3980  store <16 x i8> %20, ptr %3, align 8
3981  br label %15
3982
398315:
3984  ret void
3985
398616:
3987  %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ]
3988  %18 = phi i32 [ 0, %12 ], [ %21, %16 ]
3989  %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
3990  %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
3991  %21 = add nuw i32 %18, 1
3992  %22 = icmp eq i32 %21, %0
3993  br i1 %22, label %14, label %16
3994}
3995
3996define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind {
3997; CHECK-FIX-LABEL: aesd_setf16_loop_via_val:
3998; CHECK-FIX:       @ %bb.0:
3999; CHECK-FIX-NEXT:    vorr q1, q1, q1
4000; CHECK-FIX-NEXT:    cmp r0, #0
4001; CHECK-FIX-NEXT:    bxeq lr
4002; CHECK-FIX-NEXT:  .LBB85_1:
4003; CHECK-FIX-NEXT:    vmov r2, s0
4004; CHECK-FIX-NEXT:    vmov.16 d2[0], r2
4005; CHECK-FIX-NEXT:  .LBB85_2: @ =>This Inner Loop Header: Depth=1
4006; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r1]
4007; CHECK-FIX-NEXT:    subs r0, r0, #1
4008; CHECK-FIX-NEXT:    vmov.16 d16[0], r2
4009; CHECK-FIX-NEXT:    aesd.8 q8, q1
4010; CHECK-FIX-NEXT:    aesimc.8 q8, q8
4011; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
4012; CHECK-FIX-NEXT:    bne .LBB85_2
4013; CHECK-FIX-NEXT:  @ %bb.3:
4014; CHECK-FIX-NEXT:    bx lr
4015  %5 = icmp eq i32 %0, 0
4016  br i1 %5, label %13, label %6
4017
40186:
4019  %7 = bitcast <16 x i8> %2 to <8 x i16>
4020  %8 = bitcast half %1 to i16
4021  %9 = insertelement <8 x i16> %7, i16 %8, i64 0
4022  %10 = bitcast <8 x i16> %9 to <16 x i8>
4023  %11 = bitcast ptr %3 to ptr
4024  %12 = bitcast ptr %3 to ptr
4025  br label %14
4026
402713:
4028  ret void
4029
403014:
4031  %15 = phi i32 [ 0, %6 ], [ %21, %14 ]
4032  %16 = load <8 x i16>, ptr %11, align 8
4033  %17 = insertelement <8 x i16> %16, i16 %8, i64 0
4034  %18 = bitcast <8 x i16> %17 to <16 x i8>
4035  store half %1, ptr %12, align 8
4036  %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10)
4037  %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19)
4038  store <16 x i8> %20, ptr %3, align 8
4039  %21 = add nuw i32 %15, 1
4040  %22 = icmp eq i32 %21, %0
4041  br i1 %22, label %13, label %14
4042}
4043
4044define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind {
4045; CHECK-FIX-LABEL: aesd_setf32_via_ptr:
4046; CHECK-FIX:       @ %bb.0:
4047; CHECK-FIX-NEXT:    vldr s0, [r0]
4048; CHECK-FIX-NEXT:    vld1.64 {d2, d3}, [r1]
4049; CHECK-FIX-NEXT:    vmov.f32 s4, s0
4050; CHECK-FIX-NEXT:    vorr q1, q1, q1
4051; CHECK-FIX-NEXT:    vorr q0, q0, q0
4052; CHECK-FIX-NEXT:    aesd.8 q1, q0
4053; CHECK-FIX-NEXT:    aesimc.8 q8, q1
4054; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
4055; CHECK-FIX-NEXT:    bx lr
4056  %4 = load float, ptr %0, align 4
4057  %5 = bitcast ptr %2 to ptr
4058  %6 = load <4 x float>, ptr %5, align 8
4059  %7 = insertelement <4 x float> %6, float %4, i64 0
4060  %8 = bitcast <4 x float> %7 to <16 x i8>
4061  %9 = bitcast <16 x i8> %1 to <4 x float>
4062  %10 = insertelement <4 x float> %9, float %4, i64 0
4063  %11 = bitcast <4 x float> %10 to <16 x i8>
4064  %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11)
4065  %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12)
4066  store <16 x i8> %13, ptr %2, align 8
4067  ret void
4068}
4069
4070define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind {
4071; CHECK-FIX-LABEL: aesd_setf32_via_val:
4072; CHECK-FIX:       @ %bb.0:
4073; CHECK-FIX-NEXT:    vmov.f32 s4, s0
4074; CHECK-FIX-NEXT:    vld1.64 {d0, d1}, [r0]
4075; CHECK-FIX-NEXT:    vmov.f32 s0, s4
4076; CHECK-FIX-NEXT:    vorr q0, q0, q0
4077; CHECK-FIX-NEXT:    vorr q1, q1, q1
4078; CHECK-FIX-NEXT:    aesd.8 q0, q1
4079; CHECK-FIX-NEXT:    aesimc.8 q8, q0
4080; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
4081; CHECK-FIX-NEXT:    bx lr
4082  %4 = bitcast ptr %2 to ptr
4083  %5 = load <4 x float>, ptr %4, align 8
4084  %6 = insertelement <4 x float> %5, float %0, i64 0
4085  %7 = bitcast <4 x float> %6 to <16 x i8>
4086  %8 = bitcast <16 x i8> %1 to <4 x float>
4087  %9 = insertelement <4 x float> %8, float %0, i64 0
4088  %10 = bitcast <4 x float> %9 to <16 x i8>
4089  %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10)
4090  %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11)
4091  store <16 x i8> %12, ptr %2, align 8
4092  ret void
4093}
4094
4095define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
4096; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr:
4097; CHECK-FIX:       @ %bb.0:
4098; CHECK-FIX-NEXT:    vorr q0, q0, q0
4099; CHECK-FIX-NEXT:    cmp r0, #0
4100; CHECK-FIX-NEXT:    beq .LBB88_2
4101; CHECK-FIX-NEXT:  @ %bb.1:
4102; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
4103; CHECK-FIX-NEXT:    vld1.32 {d16[0]}, [r1:32]
4104; CHECK-FIX-NEXT:    cmp r0, #0
4105; CHECK-FIX-NEXT:    bne .LBB88_3
4106; CHECK-FIX-NEXT:    b .LBB88_4
4107; CHECK-FIX-NEXT:  .LBB88_2:
4108; CHECK-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
4109; CHECK-FIX-NEXT:    cmp r0, #0
4110; CHECK-FIX-NEXT:    beq .LBB88_4
4111; CHECK-FIX-NEXT:  .LBB88_3:
4112; CHECK-FIX-NEXT:    vld1.32 {d0[0]}, [r1:32]
4113; CHECK-FIX-NEXT:  .LBB88_4:
4114; CHECK-FIX-NEXT:    aesd.8 q8, q0
4115; CHECK-FIX-NEXT:    aesimc.8 q8, q8
4116; CHECK-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
4117; CHECK-FIX-NEXT:    bx lr
4118  br i1 %0, label %5, label %10
4119
41205:
4121  %6 = load float, ptr %1, align 4
4122  %7 = bitcast ptr %3 to ptr
4123  %8 = load <4 x float>, ptr %7, align 8
4124  %9 = insertelement <4 x float> %8, float %6, i64 0
4125  br label %13
4126
412710:
4128  %11 = bitcast ptr %3 to ptr
4129  %12 = load <4 x float>, ptr %11, align 8
4130  br label %13
4131
413213:
4133  %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ]
4134  br i1 %0, label %15, label %19
4135
413615:
4137  %16 = load float, ptr %1, align 4
4138  %17 = bitcast <16 x i8> %2 to <4 x float>
4139  %18 = insertelement <4 x float> %17, float %16, i64 0
4140  br label %21
4141
414219:
4143  %20 = bitcast <16 x i8> %2 to <4 x float>
4144  br label %21
4145
414621:
4147  %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ]
4148  %23 = bitcast <4 x float> %14 to <16 x i8>
4149  %24 = bitcast <4 x float> %22 to <16 x i8>
4150  %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24)
4151  %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25)
4152  store <16 x i8> %26, ptr %3, align 8
4153  ret void
4154}
4155
4156define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind {
4157; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val:
4158; CHECK-FIX-NOSCHED:       @ %bb.0:
4159; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1]
4160; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
4161; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s8, s0
4162; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2
4163; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
4164; CHECK-FIX-NOSCHED-NEXT:    vmovne.f32 s4, s0
4165; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1
4166; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q2, q1
4167; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q2
4168; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
4169; CHECK-FIX-NOSCHED-NEXT:    bx lr
4170;
4171; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val:
4172; CHECK-CORTEX-FIX:       @ %bb.0:
4173; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
4174; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1]
4175; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s8, s0
4176; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2
4177; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
4178; CHECK-CORTEX-FIX-NEXT:    vmovne.f32 s4, s0
4179; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1
4180; CHECK-CORTEX-FIX-NEXT:    aesd.8 q2, q1
4181; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q2
4182; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
4183; CHECK-CORTEX-FIX-NEXT:    bx lr
4184  %5 = bitcast ptr %3 to ptr
4185  %6 = load <4 x float>, ptr %5, align 8
4186  %7 = insertelement <4 x float> %6, float %1, i64 0
4187  %8 = select i1 %0, <4 x float> %7, <4 x float> %6
4188  %9 = bitcast <16 x i8> %2 to <4 x float>
4189  %10 = insertelement <4 x float> %9, float %1, i64 0
4190  %11 = select i1 %0, <4 x float> %10, <4 x float> %9
4191  %12 = bitcast <4 x float> %8 to <16 x i8>
4192  %13 = bitcast <4 x float> %11 to <16 x i8>
4193  %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13)
4194  %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14)
4195  store <16 x i8> %15, ptr %3, align 8
4196  ret void
4197}
4198
4199define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind {
4200; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr:
4201; CHECK-FIX-NOSCHED:       @ %bb.0:
4202; CHECK-FIX-NOSCHED-NEXT:    vldr s4, [r1]
4203; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
4204; CHECK-FIX-NOSCHED-NEXT:    vstr s4, [r2]
4205; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
4206; CHECK-FIX-NOSCHED-NEXT:  .LBB90_1:
4207; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s0, s4
4208; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r2]
4209; CHECK-FIX-NOSCHED-NEXT:  .LBB90_2: @ =>This Inner Loop Header: Depth=1
4210; CHECK-FIX-NOSCHED-NEXT:    vorr q0, q0, q0
4211; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q8, q0
4212; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
4213; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q8
4214; CHECK-FIX-NOSCHED-NEXT:    bne .LBB90_2
4215; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
4216; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r2]
4217; CHECK-FIX-NOSCHED-NEXT:    bx lr
4218;
4219; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr:
4220; CHECK-CORTEX-FIX:       @ %bb.0:
4221; CHECK-CORTEX-FIX-NEXT:    vldr s4, [r1]
4222; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
4223; CHECK-CORTEX-FIX-NEXT:    vstr s4, [r2]
4224; CHECK-CORTEX-FIX-NEXT:    bxeq lr
4225; CHECK-CORTEX-FIX-NEXT:  .LBB90_1:
4226; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r2]
4227; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s0, s4
4228; CHECK-CORTEX-FIX-NEXT:  .LBB90_2: @ =>This Inner Loop Header: Depth=1
4229; CHECK-CORTEX-FIX-NEXT:    vorr q0, q0, q0
4230; CHECK-CORTEX-FIX-NEXT:    aesd.8 q8, q0
4231; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
4232; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q8
4233; CHECK-CORTEX-FIX-NEXT:    bne .LBB90_2
4234; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
4235; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r2]
4236; CHECK-CORTEX-FIX-NEXT:    bx lr
4237  %5 = load float, ptr %1, align 4
4238  %6 = bitcast <16 x i8> %2 to <4 x float>
4239  %7 = insertelement <4 x float> %6, float %5, i64 0
4240  %8 = bitcast <4 x float> %7 to <16 x i8>
4241  %9 = bitcast ptr %3 to ptr
4242  store float %5, ptr %9, align 8
4243  %10 = icmp eq i32 %0, 0
4244  br i1 %10, label %14, label %11
4245
424611:
4247  %12 = load <16 x i8>, ptr %3, align 8
4248  br label %15
4249
425013:
4251  store <16 x i8> %19, ptr %3, align 8
4252  br label %14
4253
425414:
4255  ret void
4256
425715:
4258  %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ]
4259  %17 = phi i32 [ 0, %11 ], [ %20, %15 ]
4260  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8)
4261  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4262  %20 = add nuw i32 %17, 1
4263  %21 = icmp eq i32 %20, %0
4264  br i1 %21, label %13, label %15
4265}
4266
4267define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind {
4268; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val:
4269; CHECK-FIX-NOSCHED:       @ %bb.0:
4270; CHECK-FIX-NOSCHED-NEXT:    cmp r0, #0
4271; CHECK-FIX-NOSCHED-NEXT:    bxeq lr
4272; CHECK-FIX-NOSCHED-NEXT:  .LBB91_1:
4273; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s4, s0
4274; CHECK-FIX-NOSCHED-NEXT:  .LBB91_2: @ =>This Inner Loop Header: Depth=1
4275; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d4, d5}, [r1]
4276; CHECK-FIX-NOSCHED-NEXT:    subs r0, r0, #1
4277; CHECK-FIX-NOSCHED-NEXT:    vmov.f32 s8, s0
4278; CHECK-FIX-NOSCHED-NEXT:    vorr q2, q2, q2
4279; CHECK-FIX-NOSCHED-NEXT:    vorr q1, q1, q1
4280; CHECK-FIX-NOSCHED-NEXT:    aesd.8 q2, q1
4281; CHECK-FIX-NOSCHED-NEXT:    aesimc.8 q8, q2
4282; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r1]
4283; CHECK-FIX-NOSCHED-NEXT:    bne .LBB91_2
4284; CHECK-FIX-NOSCHED-NEXT:  @ %bb.3:
4285; CHECK-FIX-NOSCHED-NEXT:    bx lr
4286;
4287; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val:
4288; CHECK-CORTEX-FIX:       @ %bb.0:
4289; CHECK-CORTEX-FIX-NEXT:    cmp r0, #0
4290; CHECK-CORTEX-FIX-NEXT:    bxeq lr
4291; CHECK-CORTEX-FIX-NEXT:  .LBB91_1:
4292; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s4, s0
4293; CHECK-CORTEX-FIX-NEXT:  .LBB91_2: @ =>This Inner Loop Header: Depth=1
4294; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d4, d5}, [r1]
4295; CHECK-CORTEX-FIX-NEXT:    vmov.f32 s8, s0
4296; CHECK-CORTEX-FIX-NEXT:    vorr q2, q2, q2
4297; CHECK-CORTEX-FIX-NEXT:    subs r0, r0, #1
4298; CHECK-CORTEX-FIX-NEXT:    vorr q1, q1, q1
4299; CHECK-CORTEX-FIX-NEXT:    aesd.8 q2, q1
4300; CHECK-CORTEX-FIX-NEXT:    aesimc.8 q8, q2
4301; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r1]
4302; CHECK-CORTEX-FIX-NEXT:    bne .LBB91_2
4303; CHECK-CORTEX-FIX-NEXT:  @ %bb.3:
4304; CHECK-CORTEX-FIX-NEXT:    bx lr
4305  %5 = icmp eq i32 %0, 0
4306  br i1 %5, label %12, label %6
4307
43086:
4309  %7 = bitcast <16 x i8> %2 to <4 x float>
4310  %8 = insertelement <4 x float> %7, float %1, i64 0
4311  %9 = bitcast <4 x float> %8 to <16 x i8>
4312  %10 = bitcast ptr %3 to ptr
4313  %11 = bitcast ptr %3 to ptr
4314  br label %13
4315
431612:
4317  ret void
4318
431913:
4320  %14 = phi i32 [ 0, %6 ], [ %20, %13 ]
4321  %15 = load <4 x float>, ptr %10, align 8
4322  %16 = insertelement <4 x float> %15, float %1, i64 0
4323  %17 = bitcast <4 x float> %16 to <16 x i8>
4324  store float %1, ptr %11, align 8
4325  %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9)
4326  %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18)
4327  store <16 x i8> %19, ptr %3, align 8
4328  %20 = add nuw i32 %14, 1
4329  %21 = icmp eq i32 %20, %0
4330  br i1 %21, label %12, label %13
4331}
4332
4333define arm_aapcs_vfpcc void @aese_constantisland(ptr %0) nounwind {
4334; CHECK-FIX-NOSCHED-LABEL: aese_constantisland:
4335; CHECK-FIX-NOSCHED:       @ %bb.0:
4336; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d16, d17}, [r0]
4337; CHECK-FIX-NOSCHED-NEXT:    adr r1, .LCPI92_0
4338; CHECK-FIX-NOSCHED-NEXT:    vld1.64 {d18, d19}, [r1:128]
4339; CHECK-FIX-NOSCHED-NEXT:    aese.8 q9, q8
4340; CHECK-FIX-NOSCHED-NEXT:    aesmc.8 q8, q9
4341; CHECK-FIX-NOSCHED-NEXT:    vst1.64 {d16, d17}, [r0]
4342; CHECK-FIX-NOSCHED-NEXT:    bx lr
4343; CHECK-FIX-NOSCHED-NEXT:    .p2align 4
4344; CHECK-FIX-NOSCHED-NEXT:  @ %bb.1:
4345; CHECK-FIX-NOSCHED-NEXT:  .LCPI92_0:
4346; CHECK-FIX-NOSCHED-NEXT:    .byte 0 @ 0x0
4347; CHECK-FIX-NOSCHED-NEXT:    .byte 1 @ 0x1
4348; CHECK-FIX-NOSCHED-NEXT:    .byte 2 @ 0x2
4349; CHECK-FIX-NOSCHED-NEXT:    .byte 3 @ 0x3
4350; CHECK-FIX-NOSCHED-NEXT:    .byte 4 @ 0x4
4351; CHECK-FIX-NOSCHED-NEXT:    .byte 5 @ 0x5
4352; CHECK-FIX-NOSCHED-NEXT:    .byte 6 @ 0x6
4353; CHECK-FIX-NOSCHED-NEXT:    .byte 7 @ 0x7
4354; CHECK-FIX-NOSCHED-NEXT:    .byte 8 @ 0x8
4355; CHECK-FIX-NOSCHED-NEXT:    .byte 9 @ 0x9
4356; CHECK-FIX-NOSCHED-NEXT:    .byte 10 @ 0xa
4357; CHECK-FIX-NOSCHED-NEXT:    .byte 11 @ 0xb
4358; CHECK-FIX-NOSCHED-NEXT:    .byte 12 @ 0xc
4359; CHECK-FIX-NOSCHED-NEXT:    .byte 13 @ 0xd
4360; CHECK-FIX-NOSCHED-NEXT:    .byte 14 @ 0xe
4361; CHECK-FIX-NOSCHED-NEXT:    .byte 15 @ 0xf
4362;
4363; CHECK-CORTEX-FIX-LABEL: aese_constantisland:
4364; CHECK-CORTEX-FIX:       @ %bb.0:
4365; CHECK-CORTEX-FIX-NEXT:    adr r1, .LCPI92_0
4366; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d16, d17}, [r0]
4367; CHECK-CORTEX-FIX-NEXT:    vld1.64 {d18, d19}, [r1:128]
4368; CHECK-CORTEX-FIX-NEXT:    aese.8 q9, q8
4369; CHECK-CORTEX-FIX-NEXT:    aesmc.8 q8, q9
4370; CHECK-CORTEX-FIX-NEXT:    vst1.64 {d16, d17}, [r0]
4371; CHECK-CORTEX-FIX-NEXT:    bx lr
4372; CHECK-CORTEX-FIX-NEXT:    .p2align 4
4373; CHECK-CORTEX-FIX-NEXT:  @ %bb.1:
4374; CHECK-CORTEX-FIX-NEXT:  .LCPI92_0:
4375; CHECK-CORTEX-FIX-NEXT:    .byte 0 @ 0x0
4376; CHECK-CORTEX-FIX-NEXT:    .byte 1 @ 0x1
4377; CHECK-CORTEX-FIX-NEXT:    .byte 2 @ 0x2
4378; CHECK-CORTEX-FIX-NEXT:    .byte 3 @ 0x3
4379; CHECK-CORTEX-FIX-NEXT:    .byte 4 @ 0x4
4380; CHECK-CORTEX-FIX-NEXT:    .byte 5 @ 0x5
4381; CHECK-CORTEX-FIX-NEXT:    .byte 6 @ 0x6
4382; CHECK-CORTEX-FIX-NEXT:    .byte 7 @ 0x7
4383; CHECK-CORTEX-FIX-NEXT:    .byte 8 @ 0x8
4384; CHECK-CORTEX-FIX-NEXT:    .byte 9 @ 0x9
4385; CHECK-CORTEX-FIX-NEXT:    .byte 10 @ 0xa
4386; CHECK-CORTEX-FIX-NEXT:    .byte 11 @ 0xb
4387; CHECK-CORTEX-FIX-NEXT:    .byte 12 @ 0xc
4388; CHECK-CORTEX-FIX-NEXT:    .byte 13 @ 0xd
4389; CHECK-CORTEX-FIX-NEXT:    .byte 14 @ 0xe
4390; CHECK-CORTEX-FIX-NEXT:    .byte 15 @ 0xf
4391  %2 = load <16 x i8>, ptr %0, align 8
4392  %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2)
4393  %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3)
4394  store <16 x i8> %4, ptr %0, align 8
4395  ret void
4396}
4397