1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple armv8---eabi -mattr=+aes,+fix-cortex-a57-aes-1742098 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-FIX-NOSCHED 3 4; These CPUs should have the fix enabled by default. They use different 5; FileCheck prefixes because some instructions are scheduled differently. 6; 7; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a57 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX 8; RUN: llc -mtriple armv8---eabi -mcpu=cortex-a72 -verify-machineinstrs -o - %s | FileCheck %s --check-prefixes=CHECK-FIX,CHECK-CORTEX-FIX 9 10; This checks that adding `+fix-cortex-a57-aes-1742098` causes `vorr` to be 11; inserted wherever the compiler cannot prove that either input to the first aes 12; instruction in a fused aes pair was set by 64-bit Neon register writes or 13; 128-bit Neon register writes. All other register writes are unsafe, and 14; require a `vorr` to protect the AES input. 15 16declare <16 x i8> @llvm.arm.neon.aese(<16 x i8>, <16 x i8>) 17declare <16 x i8> @llvm.arm.neon.aesmc(<16 x i8>) 18declare <16 x i8> @llvm.arm.neon.aesd(<16 x i8>, <16 x i8>) 19declare <16 x i8> @llvm.arm.neon.aesimc(<16 x i8>) 20 21declare arm_aapcs_vfpcc <16 x i8> @get_input() local_unnamed_addr 22declare arm_aapcs_vfpcc <16 x i8> @get_inputf16(half) local_unnamed_addr 23declare arm_aapcs_vfpcc <16 x i8> @get_inputf32(float) local_unnamed_addr 24 25 26 27define arm_aapcs_vfpcc void @aese_zero(ptr %0) nounwind { 28; CHECK-FIX-LABEL: aese_zero: 29; CHECK-FIX: @ %bb.0: 30; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 31; CHECK-FIX-NEXT: vmov.i32 q9, #0x0 32; CHECK-FIX-NEXT: aese.8 q9, q8 33; CHECK-FIX-NEXT: aesmc.8 q8, q9 34; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 35; CHECK-FIX-NEXT: bx lr 36 %2 = load <16 x i8>, ptr %0, align 8 37 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> zeroinitializer, <16 x i8> %2) 38 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) 39 store <16 x i8> %4, ptr %0, align 8 40 ret void 41} 42 43define arm_aapcs_vfpcc void @aese_via_call1(ptr %0) nounwind { 44; CHECK-FIX-LABEL: aese_via_call1: 45; CHECK-FIX: @ %bb.0: 46; CHECK-FIX-NEXT: .save {r4, lr} 47; CHECK-FIX-NEXT: push {r4, lr} 48; CHECK-FIX-NEXT: mov r4, r0 49; CHECK-FIX-NEXT: bl get_input 50; CHECK-FIX-NEXT: vorr q0, q0, q0 51; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 52; CHECK-FIX-NEXT: aese.8 q8, q0 53; CHECK-FIX-NEXT: aesmc.8 q8, q8 54; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 55; CHECK-FIX-NEXT: pop {r4, pc} 56 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() 57 %3 = load <16 x i8>, ptr %0, align 8 58 %4 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %2, <16 x i8> %3) 59 %5 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %4) 60 store <16 x i8> %5, ptr %0, align 8 61 ret void 62} 63 64define arm_aapcs_vfpcc void @aese_via_call2(half %0, ptr %1) nounwind { 65; CHECK-FIX-LABEL: aese_via_call2: 66; CHECK-FIX: @ %bb.0: 67; CHECK-FIX-NEXT: .save {r4, lr} 68; CHECK-FIX-NEXT: push {r4, lr} 69; CHECK-FIX-NEXT: mov r4, r0 70; CHECK-FIX-NEXT: bl get_inputf16 71; CHECK-FIX-NEXT: vorr q0, q0, q0 72; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 73; CHECK-FIX-NEXT: aese.8 q8, q0 74; CHECK-FIX-NEXT: aesmc.8 q8, q8 75; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 76; CHECK-FIX-NEXT: pop {r4, pc} 77 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) 78 %4 = load <16 x i8>, ptr %1, align 8 79 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) 80 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) 81 store <16 x i8> %6, ptr %1, align 8 82 ret void 83} 84 85define arm_aapcs_vfpcc void @aese_via_call3(float %0, ptr %1) nounwind { 86; CHECK-FIX-LABEL: aese_via_call3: 87; CHECK-FIX: @ %bb.0: 88; CHECK-FIX-NEXT: .save {r4, lr} 89; CHECK-FIX-NEXT: push {r4, lr} 90; CHECK-FIX-NEXT: mov r4, r0 91; CHECK-FIX-NEXT: bl get_inputf32 92; CHECK-FIX-NEXT: vorr q0, q0, q0 93; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 94; CHECK-FIX-NEXT: aese.8 q8, q0 95; CHECK-FIX-NEXT: aesmc.8 q8, q8 96; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 97; CHECK-FIX-NEXT: pop {r4, pc} 98 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) 99 %4 = load <16 x i8>, ptr %1, align 8 100 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) 101 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) 102 store <16 x i8> %6, ptr %1, align 8 103 ret void 104} 105 106define arm_aapcs_vfpcc void @aese_once_via_ptr(ptr %0, ptr %1) nounwind { 107; CHECK-FIX-LABEL: aese_once_via_ptr: 108; CHECK-FIX: @ %bb.0: 109; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 110; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1] 111; CHECK-FIX-NEXT: aese.8 q9, q8 112; CHECK-FIX-NEXT: aesmc.8 q8, q9 113; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 114; CHECK-FIX-NEXT: bx lr 115 %3 = load <16 x i8>, ptr %1, align 8 116 %4 = load <16 x i8>, ptr %0, align 8 117 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) 118 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) 119 store <16 x i8> %6, ptr %1, align 8 120 ret void 121} 122 123define arm_aapcs_vfpcc <16 x i8> @aese_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { 124; CHECK-FIX-LABEL: aese_once_via_val: 125; CHECK-FIX: @ %bb.0: 126; CHECK-FIX-NEXT: vorr q0, q0, q0 127; CHECK-FIX-NEXT: vorr q1, q1, q1 128; CHECK-FIX-NEXT: aese.8 q0, q1 129; CHECK-FIX-NEXT: aesmc.8 q0, q0 130; CHECK-FIX-NEXT: bx lr 131 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0) 132 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) 133 ret <16 x i8> %4 134} 135 136define arm_aapcs_vfpcc void @aese_twice_via_ptr(ptr %0, ptr %1) nounwind { 137; CHECK-FIX-LABEL: aese_twice_via_ptr: 138; CHECK-FIX: @ %bb.0: 139; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 140; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1] 141; CHECK-FIX-NEXT: aese.8 q9, q8 142; CHECK-FIX-NEXT: aesmc.8 q8, q9 143; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 144; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0] 145; CHECK-FIX-NEXT: aese.8 q9, q8 146; CHECK-FIX-NEXT: aesmc.8 q8, q9 147; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 148; CHECK-FIX-NEXT: bx lr 149 %3 = load <16 x i8>, ptr %1, align 8 150 %4 = load <16 x i8>, ptr %0, align 8 151 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %3, <16 x i8> %4) 152 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) 153 store <16 x i8> %6, ptr %1, align 8 154 %7 = load <16 x i8>, ptr %0, align 8 155 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) 156 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) 157 store <16 x i8> %9, ptr %1, align 8 158 ret void 159} 160 161define arm_aapcs_vfpcc <16 x i8> @aese_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { 162; CHECK-FIX-LABEL: aese_twice_via_val: 163; CHECK-FIX: @ %bb.0: 164; CHECK-FIX-NEXT: vorr q1, q1, q1 165; CHECK-FIX-NEXT: vorr q0, q0, q0 166; CHECK-FIX-NEXT: vorr q0, q0, q0 167; CHECK-FIX-NEXT: aese.8 q1, q0 168; CHECK-FIX-NEXT: aesmc.8 q8, q1 169; CHECK-FIX-NEXT: aese.8 q8, q0 170; CHECK-FIX-NEXT: aesmc.8 q0, q8 171; CHECK-FIX-NEXT: bx lr 172 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %1, <16 x i8> %0) 173 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) 174 %5 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %4, <16 x i8> %0) 175 %6 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %5) 176 ret <16 x i8> %6 177} 178 179define arm_aapcs_vfpcc void @aese_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind { 180; CHECK-FIX-NOSCHED-LABEL: aese_loop_via_ptr: 181; CHECK-FIX-NOSCHED: @ %bb.0: 182; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 183; CHECK-FIX-NOSCHED-NEXT: bxeq lr 184; CHECK-FIX-NOSCHED-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1 185; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 186; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 187; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2] 188; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8 189; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9 190; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 191; CHECK-FIX-NOSCHED-NEXT: bne .LBB8_1 192; CHECK-FIX-NOSCHED-NEXT: @ %bb.2: 193; CHECK-FIX-NOSCHED-NEXT: bx lr 194; 195; CHECK-CORTEX-FIX-LABEL: aese_loop_via_ptr: 196; CHECK-CORTEX-FIX: @ %bb.0: 197; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 198; CHECK-CORTEX-FIX-NEXT: bxeq lr 199; CHECK-CORTEX-FIX-NEXT: .LBB8_1: @ =>This Inner Loop Header: Depth=1 200; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 201; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2] 202; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 203; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8 204; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9 205; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 206; CHECK-CORTEX-FIX-NEXT: bne .LBB8_1 207; CHECK-CORTEX-FIX-NEXT: @ %bb.2: 208; CHECK-CORTEX-FIX-NEXT: bx lr 209 %4 = icmp eq i32 %0, 0 210 br i1 %4, label %5, label %6 211 2125: 213 ret void 214 2156: 216 %7 = phi i32 [ %12, %6 ], [ 0, %3 ] 217 %8 = load <16 x i8>, ptr %2, align 8 218 %9 = load <16 x i8>, ptr %1, align 8 219 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %9) 220 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) 221 store <16 x i8> %11, ptr %2, align 8 222 %12 = add nuw i32 %7, 1 223 %13 = icmp eq i32 %12, %0 224 br i1 %13, label %5, label %6 225} 226 227define arm_aapcs_vfpcc <16 x i8> @aese_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { 228; CHECK-FIX-LABEL: aese_loop_via_val: 229; CHECK-FIX: @ %bb.0: 230; CHECK-FIX-NEXT: vorr q1, q1, q1 231; CHECK-FIX-NEXT: vorr q0, q0, q0 232; CHECK-FIX-NEXT: cmp r0, #0 233; CHECK-FIX-NEXT: beq .LBB9_2 234; CHECK-FIX-NEXT: .LBB9_1: @ =>This Inner Loop Header: Depth=1 235; CHECK-FIX-NEXT: aese.8 q1, q0 236; CHECK-FIX-NEXT: subs r0, r0, #1 237; CHECK-FIX-NEXT: aesmc.8 q1, q1 238; CHECK-FIX-NEXT: bne .LBB9_1 239; CHECK-FIX-NEXT: .LBB9_2: 240; CHECK-FIX-NEXT: vorr q0, q1, q1 241; CHECK-FIX-NEXT: bx lr 242 %4 = icmp eq i32 %0, 0 243 br i1 %4, label %5, label %7 244 2455: 246 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ] 247 ret <16 x i8> %6 248 2497: 250 %8 = phi i32 [ %12, %7 ], [ 0, %3 ] 251 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ] 252 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %1) 253 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) 254 %12 = add nuw i32 %8, 1 255 %13 = icmp eq i32 %12, %0 256 br i1 %13, label %5, label %7 257} 258 259define arm_aapcs_vfpcc void @aese_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 260; CHECK-FIX-NOSCHED-LABEL: aese_set8_via_ptr: 261; CHECK-FIX-NOSCHED: @ %bb.0: 262; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 263; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0] 264; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 265; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0 266; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0 267; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 268; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 269; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 270; CHECK-FIX-NOSCHED-NEXT: bx lr 271; 272; CHECK-CORTEX-FIX-LABEL: aese_set8_via_ptr: 273; CHECK-CORTEX-FIX: @ %bb.0: 274; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 275; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 276; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0] 277; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0 278; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0 279; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 280; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 281; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 282; CHECK-CORTEX-FIX-NEXT: bx lr 283 %4 = load i8, ptr %0, align 1 284 %5 = load <16 x i8>, ptr %2, align 8 285 %6 = insertelement <16 x i8> %5, i8 %4, i64 0 286 %7 = insertelement <16 x i8> %1, i8 %4, i64 0 287 %8 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %6, <16 x i8> %7) 288 %9 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %8) 289 store <16 x i8> %9, ptr %2, align 8 290 ret void 291} 292 293define arm_aapcs_vfpcc void @aese_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind { 294; CHECK-FIX-LABEL: aese_set8_via_val: 295; CHECK-FIX: @ %bb.0: 296; CHECK-FIX-NEXT: vorr q0, q0, q0 297; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 298; CHECK-FIX-NEXT: vmov.8 d0[0], r0 299; CHECK-FIX-NEXT: vmov.8 d16[0], r0 300; CHECK-FIX-NEXT: aese.8 q8, q0 301; CHECK-FIX-NEXT: aesmc.8 q8, q8 302; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 303; CHECK-FIX-NEXT: bx lr 304 %4 = load <16 x i8>, ptr %2, align 8 305 %5 = insertelement <16 x i8> %4, i8 %0, i64 0 306 %6 = insertelement <16 x i8> %1, i8 %0, i64 0 307 %7 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %5, <16 x i8> %6) 308 %8 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %7) 309 store <16 x i8> %8, ptr %2, align 8 310 ret void 311} 312 313define arm_aapcs_vfpcc void @aese_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 314; CHECK-FIX-LABEL: aese_set8_cond_via_ptr: 315; CHECK-FIX: @ %bb.0: 316; CHECK-FIX-NEXT: vorr q0, q0, q0 317; CHECK-FIX-NEXT: cmp r0, #0 318; CHECK-FIX-NEXT: beq .LBB12_2 319; CHECK-FIX-NEXT: @ %bb.1: 320; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 321; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1] 322; CHECK-FIX-NEXT: cmp r0, #0 323; CHECK-FIX-NEXT: bne .LBB12_3 324; CHECK-FIX-NEXT: b .LBB12_4 325; CHECK-FIX-NEXT: .LBB12_2: 326; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 327; CHECK-FIX-NEXT: cmp r0, #0 328; CHECK-FIX-NEXT: beq .LBB12_4 329; CHECK-FIX-NEXT: .LBB12_3: 330; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] 331; CHECK-FIX-NEXT: .LBB12_4: 332; CHECK-FIX-NEXT: aese.8 q8, q0 333; CHECK-FIX-NEXT: aesmc.8 q8, q8 334; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 335; CHECK-FIX-NEXT: bx lr 336 br i1 %0, label %5, label %9 337 3385: 339 %6 = load i8, ptr %1, align 1 340 %7 = load <16 x i8>, ptr %3, align 8 341 %8 = insertelement <16 x i8> %7, i8 %6, i64 0 342 br label %11 343 3449: 345 %10 = load <16 x i8>, ptr %3, align 8 346 br label %11 347 34811: 349 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ] 350 br i1 %0, label %13, label %16 351 35213: 353 %14 = load i8, ptr %1, align 1 354 %15 = insertelement <16 x i8> %2, i8 %14, i64 0 355 br label %16 356 35716: 358 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] 359 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %17) 360 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 361 store <16 x i8> %19, ptr %3, align 8 362 ret void 363} 364 365define arm_aapcs_vfpcc void @aese_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 366; CHECK-FIX-LABEL: aese_set8_cond_via_val: 367; CHECK-FIX: @ %bb.0: 368; CHECK-FIX-NEXT: vorr q0, q0, q0 369; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 370; CHECK-FIX-NEXT: cmp r0, #0 371; CHECK-FIX-NEXT: beq .LBB13_2 372; CHECK-FIX-NEXT: @ %bb.1: 373; CHECK-FIX-NEXT: vmov.8 d16[0], r1 374; CHECK-FIX-NEXT: .LBB13_2: @ %select.end 375; CHECK-FIX-NEXT: cmp r0, #0 376; CHECK-FIX-NEXT: beq .LBB13_4 377; CHECK-FIX-NEXT: @ %bb.3: 378; CHECK-FIX-NEXT: vmov.8 d0[0], r1 379; CHECK-FIX-NEXT: .LBB13_4: @ %select.end2 380; CHECK-FIX-NEXT: aese.8 q8, q0 381; CHECK-FIX-NEXT: aesmc.8 q8, q8 382; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 383; CHECK-FIX-NEXT: bx lr 384 %5 = load <16 x i8>, ptr %3, align 8 385 %6 = insertelement <16 x i8> %5, i8 %1, i64 0 386 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 387 %8 = insertelement <16 x i8> %2, i8 %1, i64 0 388 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 389 %10 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %9) 390 %11 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %10) 391 store <16 x i8> %11, ptr %3, align 8 392 ret void 393} 394 395define arm_aapcs_vfpcc void @aese_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 396; CHECK-FIX-LABEL: aese_set8_loop_via_ptr: 397; CHECK-FIX: @ %bb.0: 398; CHECK-FIX-NEXT: vorr q0, q0, q0 399; CHECK-FIX-NEXT: ldrb r1, [r1] 400; CHECK-FIX-NEXT: cmp r0, #0 401; CHECK-FIX-NEXT: strb r1, [r2] 402; CHECK-FIX-NEXT: bxeq lr 403; CHECK-FIX-NEXT: .LBB14_1: 404; CHECK-FIX-NEXT: vmov.8 d0[0], r1 405; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 406; CHECK-FIX-NEXT: .LBB14_2: @ =>This Inner Loop Header: Depth=1 407; CHECK-FIX-NEXT: aese.8 q8, q0 408; CHECK-FIX-NEXT: subs r0, r0, #1 409; CHECK-FIX-NEXT: aesmc.8 q8, q8 410; CHECK-FIX-NEXT: bne .LBB14_2 411; CHECK-FIX-NEXT: @ %bb.3: 412; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 413; CHECK-FIX-NEXT: bx lr 414 %5 = load i8, ptr %1, align 1 415 %6 = insertelement <16 x i8> %2, i8 %5, i64 0 416 %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0 417 store i8 %5, ptr %7, align 8 418 %8 = icmp eq i32 %0, 0 419 br i1 %8, label %12, label %9 420 4219: 422 %10 = load <16 x i8>, ptr %3, align 8 423 br label %13 424 42511: 426 store <16 x i8> %17, ptr %3, align 8 427 br label %12 428 42912: 430 ret void 431 43213: 433 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ] 434 %15 = phi i32 [ 0, %9 ], [ %18, %13 ] 435 %16 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %6) 436 %17 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %16) 437 %18 = add nuw i32 %15, 1 438 %19 = icmp eq i32 %18, %0 439 br i1 %19, label %11, label %13 440} 441 442define arm_aapcs_vfpcc void @aese_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 443; CHECK-FIX-LABEL: aese_set8_loop_via_val: 444; CHECK-FIX: @ %bb.0: 445; CHECK-FIX-NEXT: vorr q0, q0, q0 446; CHECK-FIX-NEXT: cmp r0, #0 447; CHECK-FIX-NEXT: bxeq lr 448; CHECK-FIX-NEXT: .LBB15_1: 449; CHECK-FIX-NEXT: vmov.8 d0[0], r1 450; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 451; CHECK-FIX-NEXT: .LBB15_2: @ =>This Inner Loop Header: Depth=1 452; CHECK-FIX-NEXT: vmov.8 d16[0], r1 453; CHECK-FIX-NEXT: subs r0, r0, #1 454; CHECK-FIX-NEXT: aese.8 q8, q0 455; CHECK-FIX-NEXT: aesmc.8 q8, q8 456; CHECK-FIX-NEXT: bne .LBB15_2 457; CHECK-FIX-NEXT: @ %bb.3: 458; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 459; CHECK-FIX-NEXT: bx lr 460 %5 = icmp eq i32 %0, 0 461 br i1 %5, label %10, label %6 462 4636: 464 %7 = insertelement <16 x i8> %2, i8 %1, i64 0 465 %8 = load <16 x i8>, ptr %3, align 8 466 br label %11 467 4689: 469 store <16 x i8> %16, ptr %3, align 8 470 br label %10 471 47210: 473 ret void 474 47511: 476 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ] 477 %13 = phi i32 [ 0, %6 ], [ %17, %11 ] 478 %14 = insertelement <16 x i8> %12, i8 %1, i64 0 479 %15 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %14, <16 x i8> %7) 480 %16 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %15) 481 %17 = add nuw i32 %13, 1 482 %18 = icmp eq i32 %17, %0 483 br i1 %18, label %9, label %11 484} 485 486define arm_aapcs_vfpcc void @aese_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 487; CHECK-FIX-NOSCHED-LABEL: aese_set16_via_ptr: 488; CHECK-FIX-NOSCHED: @ %bb.0: 489; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 490; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] 491; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 492; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 493; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0 494; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 495; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 496; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 497; CHECK-FIX-NOSCHED-NEXT: bx lr 498; 499; CHECK-CORTEX-FIX-LABEL: aese_set16_via_ptr: 500; CHECK-CORTEX-FIX: @ %bb.0: 501; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 502; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 503; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] 504; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 505; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0 506; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 507; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 508; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 509; CHECK-CORTEX-FIX-NEXT: bx lr 510 %4 = load i16, ptr %0, align 2 511 %5 = bitcast ptr %2 to ptr 512 %6 = load <8 x i16>, ptr %5, align 8 513 %7 = insertelement <8 x i16> %6, i16 %4, i64 0 514 %8 = bitcast <8 x i16> %7 to <16 x i8> 515 %9 = bitcast <16 x i8> %1 to <8 x i16> 516 %10 = insertelement <8 x i16> %9, i16 %4, i64 0 517 %11 = bitcast <8 x i16> %10 to <16 x i8> 518 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) 519 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) 520 store <16 x i8> %13, ptr %2, align 8 521 ret void 522} 523 524define arm_aapcs_vfpcc void @aese_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind { 525; CHECK-FIX-LABEL: aese_set16_via_val: 526; CHECK-FIX: @ %bb.0: 527; CHECK-FIX-NEXT: vorr q0, q0, q0 528; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 529; CHECK-FIX-NEXT: vmov.16 d0[0], r0 530; CHECK-FIX-NEXT: vmov.16 d16[0], r0 531; CHECK-FIX-NEXT: aese.8 q8, q0 532; CHECK-FIX-NEXT: aesmc.8 q8, q8 533; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 534; CHECK-FIX-NEXT: bx lr 535 %4 = bitcast ptr %2 to ptr 536 %5 = load <8 x i16>, ptr %4, align 8 537 %6 = insertelement <8 x i16> %5, i16 %0, i64 0 538 %7 = bitcast <8 x i16> %6 to <16 x i8> 539 %8 = bitcast <16 x i8> %1 to <8 x i16> 540 %9 = insertelement <8 x i16> %8, i16 %0, i64 0 541 %10 = bitcast <8 x i16> %9 to <16 x i8> 542 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) 543 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) 544 store <16 x i8> %12, ptr %2, align 8 545 ret void 546} 547 548define arm_aapcs_vfpcc void @aese_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 549; CHECK-FIX-LABEL: aese_set16_cond_via_ptr: 550; CHECK-FIX: @ %bb.0: 551; CHECK-FIX-NEXT: vorr q0, q0, q0 552; CHECK-FIX-NEXT: cmp r0, #0 553; CHECK-FIX-NEXT: beq .LBB18_2 554; CHECK-FIX-NEXT: @ %bb.1: 555; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 556; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16] 557; CHECK-FIX-NEXT: cmp r0, #0 558; CHECK-FIX-NEXT: bne .LBB18_3 559; CHECK-FIX-NEXT: b .LBB18_4 560; CHECK-FIX-NEXT: .LBB18_2: 561; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 562; CHECK-FIX-NEXT: cmp r0, #0 563; CHECK-FIX-NEXT: beq .LBB18_4 564; CHECK-FIX-NEXT: .LBB18_3: 565; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] 566; CHECK-FIX-NEXT: .LBB18_4: 567; CHECK-FIX-NEXT: aese.8 q8, q0 568; CHECK-FIX-NEXT: aesmc.8 q8, q8 569; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 570; CHECK-FIX-NEXT: bx lr 571 br i1 %0, label %5, label %10 572 5735: 574 %6 = load i16, ptr %1, align 2 575 %7 = bitcast ptr %3 to ptr 576 %8 = load <8 x i16>, ptr %7, align 8 577 %9 = insertelement <8 x i16> %8, i16 %6, i64 0 578 br label %13 579 58010: 581 %11 = bitcast ptr %3 to ptr 582 %12 = load <8 x i16>, ptr %11, align 8 583 br label %13 584 58513: 586 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ] 587 br i1 %0, label %15, label %19 588 58915: 590 %16 = load i16, ptr %1, align 2 591 %17 = bitcast <16 x i8> %2 to <8 x i16> 592 %18 = insertelement <8 x i16> %17, i16 %16, i64 0 593 br label %21 594 59519: 596 %20 = bitcast <16 x i8> %2 to <8 x i16> 597 br label %21 598 59921: 600 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ] 601 %23 = bitcast <8 x i16> %14 to <16 x i8> 602 %24 = bitcast <8 x i16> %22 to <16 x i8> 603 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) 604 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) 605 store <16 x i8> %26, ptr %3, align 8 606 ret void 607} 608 609define arm_aapcs_vfpcc void @aese_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 610; CHECK-FIX-LABEL: aese_set16_cond_via_val: 611; CHECK-FIX: @ %bb.0: 612; CHECK-FIX-NEXT: vorr q0, q0, q0 613; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 614; CHECK-FIX-NEXT: cmp r0, #0 615; CHECK-FIX-NEXT: beq .LBB19_2 616; CHECK-FIX-NEXT: @ %bb.1: 617; CHECK-FIX-NEXT: vmov.16 d16[0], r1 618; CHECK-FIX-NEXT: .LBB19_2: @ %select.end 619; CHECK-FIX-NEXT: cmp r0, #0 620; CHECK-FIX-NEXT: beq .LBB19_4 621; CHECK-FIX-NEXT: @ %bb.3: 622; CHECK-FIX-NEXT: vmov.16 d0[0], r1 623; CHECK-FIX-NEXT: .LBB19_4: @ %select.end2 624; CHECK-FIX-NEXT: aese.8 q8, q0 625; CHECK-FIX-NEXT: aesmc.8 q8, q8 626; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 627; CHECK-FIX-NEXT: bx lr 628 %5 = bitcast ptr %3 to ptr 629 %6 = load <8 x i16>, ptr %5, align 8 630 %7 = insertelement <8 x i16> %6, i16 %1, i64 0 631 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 632 %9 = bitcast <16 x i8> %2 to <8 x i16> 633 %10 = insertelement <8 x i16> %9, i16 %1, i64 0 634 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9 635 %12 = bitcast <8 x i16> %8 to <16 x i8> 636 %13 = bitcast <8 x i16> %11 to <16 x i8> 637 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) 638 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) 639 store <16 x i8> %15, ptr %3, align 8 640 ret void 641} 642 643define arm_aapcs_vfpcc void @aese_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 644; CHECK-FIX-LABEL: aese_set16_loop_via_ptr: 645; CHECK-FIX: @ %bb.0: 646; CHECK-FIX-NEXT: vorr q0, q0, q0 647; CHECK-FIX-NEXT: ldrh r1, [r1] 648; CHECK-FIX-NEXT: cmp r0, #0 649; CHECK-FIX-NEXT: strh r1, [r2] 650; CHECK-FIX-NEXT: bxeq lr 651; CHECK-FIX-NEXT: .LBB20_1: 652; CHECK-FIX-NEXT: vmov.16 d0[0], r1 653; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 654; CHECK-FIX-NEXT: .LBB20_2: @ =>This Inner Loop Header: Depth=1 655; CHECK-FIX-NEXT: aese.8 q8, q0 656; CHECK-FIX-NEXT: subs r0, r0, #1 657; CHECK-FIX-NEXT: aesmc.8 q8, q8 658; CHECK-FIX-NEXT: bne .LBB20_2 659; CHECK-FIX-NEXT: @ %bb.3: 660; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 661; CHECK-FIX-NEXT: bx lr 662 %5 = load i16, ptr %1, align 2 663 %6 = bitcast <16 x i8> %2 to <8 x i16> 664 %7 = insertelement <8 x i16> %6, i16 %5, i64 0 665 %8 = bitcast <8 x i16> %7 to <16 x i8> 666 %9 = bitcast ptr %3 to ptr 667 store i16 %5, ptr %9, align 8 668 %10 = icmp eq i32 %0, 0 669 br i1 %10, label %14, label %11 670 67111: 672 %12 = load <16 x i8>, ptr %3, align 8 673 br label %15 674 67513: 676 store <16 x i8> %19, ptr %3, align 8 677 br label %14 678 67914: 680 ret void 681 68215: 683 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 684 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 685 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) 686 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 687 %20 = add nuw i32 %17, 1 688 %21 = icmp eq i32 %20, %0 689 br i1 %21, label %13, label %15 690} 691 692define arm_aapcs_vfpcc void @aese_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 693; CHECK-FIX-LABEL: aese_set16_loop_via_val: 694; CHECK-FIX: @ %bb.0: 695; CHECK-FIX-NEXT: vorr q0, q0, q0 696; CHECK-FIX-NEXT: cmp r0, #0 697; CHECK-FIX-NEXT: bxeq lr 698; CHECK-FIX-NEXT: .LBB21_1: 699; CHECK-FIX-NEXT: vmov.16 d0[0], r1 700; CHECK-FIX-NEXT: .LBB21_2: @ =>This Inner Loop Header: Depth=1 701; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 702; CHECK-FIX-NEXT: subs r0, r0, #1 703; CHECK-FIX-NEXT: vmov.16 d16[0], r1 704; CHECK-FIX-NEXT: aese.8 q8, q0 705; CHECK-FIX-NEXT: aesmc.8 q8, q8 706; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 707; CHECK-FIX-NEXT: bne .LBB21_2 708; CHECK-FIX-NEXT: @ %bb.3: 709; CHECK-FIX-NEXT: bx lr 710 %5 = icmp eq i32 %0, 0 711 br i1 %5, label %12, label %6 712 7136: 714 %7 = bitcast <16 x i8> %2 to <8 x i16> 715 %8 = insertelement <8 x i16> %7, i16 %1, i64 0 716 %9 = bitcast <8 x i16> %8 to <16 x i8> 717 %10 = bitcast ptr %3 to ptr 718 %11 = bitcast ptr %3 to ptr 719 br label %13 720 72112: 722 ret void 723 72413: 725 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 726 %15 = load <8 x i16>, ptr %10, align 8 727 %16 = insertelement <8 x i16> %15, i16 %1, i64 0 728 %17 = bitcast <8 x i16> %16 to <16 x i8> 729 store i16 %1, ptr %11, align 8 730 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) 731 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 732 store <16 x i8> %19, ptr %3, align 8 733 %20 = add nuw i32 %14, 1 734 %21 = icmp eq i32 %20, %0 735 br i1 %21, label %12, label %13 736} 737 738define arm_aapcs_vfpcc void @aese_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 739; CHECK-FIX-NOSCHED-LABEL: aese_set32_via_ptr: 740; CHECK-FIX-NOSCHED: @ %bb.0: 741; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 742; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0] 743; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 744; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0 745; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 746; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 747; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 748; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 749; CHECK-FIX-NOSCHED-NEXT: bx lr 750; 751; CHECK-CORTEX-FIX-LABEL: aese_set32_via_ptr: 752; CHECK-CORTEX-FIX: @ %bb.0: 753; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 754; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 755; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0] 756; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0 757; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0 758; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 759; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 760; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 761; CHECK-CORTEX-FIX-NEXT: bx lr 762 %4 = load i32, ptr %0, align 4 763 %5 = bitcast ptr %2 to ptr 764 %6 = load <4 x i32>, ptr %5, align 8 765 %7 = insertelement <4 x i32> %6, i32 %4, i64 0 766 %8 = bitcast <4 x i32> %7 to <16 x i8> 767 %9 = bitcast <16 x i8> %1 to <4 x i32> 768 %10 = insertelement <4 x i32> %9, i32 %4, i64 0 769 %11 = bitcast <4 x i32> %10 to <16 x i8> 770 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) 771 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) 772 store <16 x i8> %13, ptr %2, align 8 773 ret void 774} 775 776define arm_aapcs_vfpcc void @aese_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind { 777; CHECK-FIX-LABEL: aese_set32_via_val: 778; CHECK-FIX: @ %bb.0: 779; CHECK-FIX-NEXT: vorr q0, q0, q0 780; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 781; CHECK-FIX-NEXT: vmov.32 d0[0], r0 782; CHECK-FIX-NEXT: vmov.32 d16[0], r0 783; CHECK-FIX-NEXT: aese.8 q8, q0 784; CHECK-FIX-NEXT: aesmc.8 q8, q8 785; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 786; CHECK-FIX-NEXT: bx lr 787 %4 = bitcast ptr %2 to ptr 788 %5 = load <4 x i32>, ptr %4, align 8 789 %6 = insertelement <4 x i32> %5, i32 %0, i64 0 790 %7 = bitcast <4 x i32> %6 to <16 x i8> 791 %8 = bitcast <16 x i8> %1 to <4 x i32> 792 %9 = insertelement <4 x i32> %8, i32 %0, i64 0 793 %10 = bitcast <4 x i32> %9 to <16 x i8> 794 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) 795 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) 796 store <16 x i8> %12, ptr %2, align 8 797 ret void 798} 799 800define arm_aapcs_vfpcc void @aese_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 801; CHECK-FIX-LABEL: aese_set32_cond_via_ptr: 802; CHECK-FIX: @ %bb.0: 803; CHECK-FIX-NEXT: vorr q0, q0, q0 804; CHECK-FIX-NEXT: cmp r0, #0 805; CHECK-FIX-NEXT: beq .LBB24_2 806; CHECK-FIX-NEXT: @ %bb.1: 807; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 808; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 809; CHECK-FIX-NEXT: cmp r0, #0 810; CHECK-FIX-NEXT: bne .LBB24_3 811; CHECK-FIX-NEXT: b .LBB24_4 812; CHECK-FIX-NEXT: .LBB24_2: 813; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 814; CHECK-FIX-NEXT: cmp r0, #0 815; CHECK-FIX-NEXT: beq .LBB24_4 816; CHECK-FIX-NEXT: .LBB24_3: 817; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] 818; CHECK-FIX-NEXT: .LBB24_4: 819; CHECK-FIX-NEXT: aese.8 q8, q0 820; CHECK-FIX-NEXT: aesmc.8 q8, q8 821; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 822; CHECK-FIX-NEXT: bx lr 823 br i1 %0, label %5, label %10 824 8255: 826 %6 = load i32, ptr %1, align 4 827 %7 = bitcast ptr %3 to ptr 828 %8 = load <4 x i32>, ptr %7, align 8 829 %9 = insertelement <4 x i32> %8, i32 %6, i64 0 830 br label %13 831 83210: 833 %11 = bitcast ptr %3 to ptr 834 %12 = load <4 x i32>, ptr %11, align 8 835 br label %13 836 83713: 838 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ] 839 br i1 %0, label %15, label %19 840 84115: 842 %16 = load i32, ptr %1, align 4 843 %17 = bitcast <16 x i8> %2 to <4 x i32> 844 %18 = insertelement <4 x i32> %17, i32 %16, i64 0 845 br label %21 846 84719: 848 %20 = bitcast <16 x i8> %2 to <4 x i32> 849 br label %21 850 85121: 852 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ] 853 %23 = bitcast <4 x i32> %14 to <16 x i8> 854 %24 = bitcast <4 x i32> %22 to <16 x i8> 855 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) 856 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) 857 store <16 x i8> %26, ptr %3, align 8 858 ret void 859} 860 861define arm_aapcs_vfpcc void @aese_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { 862; CHECK-FIX-LABEL: aese_set32_cond_via_val: 863; CHECK-FIX: @ %bb.0: 864; CHECK-FIX-NEXT: vorr q0, q0, q0 865; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 866; CHECK-FIX-NEXT: cmp r0, #0 867; CHECK-FIX-NEXT: beq .LBB25_2 868; CHECK-FIX-NEXT: @ %bb.1: 869; CHECK-FIX-NEXT: vmov.32 d16[0], r1 870; CHECK-FIX-NEXT: .LBB25_2: @ %select.end 871; CHECK-FIX-NEXT: cmp r0, #0 872; CHECK-FIX-NEXT: beq .LBB25_4 873; CHECK-FIX-NEXT: @ %bb.3: 874; CHECK-FIX-NEXT: vmov.32 d0[0], r1 875; CHECK-FIX-NEXT: .LBB25_4: @ %select.end2 876; CHECK-FIX-NEXT: aese.8 q8, q0 877; CHECK-FIX-NEXT: aesmc.8 q8, q8 878; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 879; CHECK-FIX-NEXT: bx lr 880 %5 = bitcast ptr %3 to ptr 881 %6 = load <4 x i32>, ptr %5, align 8 882 %7 = insertelement <4 x i32> %6, i32 %1, i64 0 883 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 884 %9 = bitcast <16 x i8> %2 to <4 x i32> 885 %10 = insertelement <4 x i32> %9, i32 %1, i64 0 886 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9 887 %12 = bitcast <4 x i32> %8 to <16 x i8> 888 %13 = bitcast <4 x i32> %11 to <16 x i8> 889 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) 890 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) 891 store <16 x i8> %15, ptr %3, align 8 892 ret void 893} 894 895define arm_aapcs_vfpcc void @aese_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 896; CHECK-FIX-LABEL: aese_set32_loop_via_ptr: 897; CHECK-FIX: @ %bb.0: 898; CHECK-FIX-NEXT: vorr q0, q0, q0 899; CHECK-FIX-NEXT: ldr r1, [r1] 900; CHECK-FIX-NEXT: cmp r0, #0 901; CHECK-FIX-NEXT: str r1, [r2] 902; CHECK-FIX-NEXT: bxeq lr 903; CHECK-FIX-NEXT: .LBB26_1: 904; CHECK-FIX-NEXT: vmov.32 d0[0], r1 905; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 906; CHECK-FIX-NEXT: .LBB26_2: @ =>This Inner Loop Header: Depth=1 907; CHECK-FIX-NEXT: aese.8 q8, q0 908; CHECK-FIX-NEXT: subs r0, r0, #1 909; CHECK-FIX-NEXT: aesmc.8 q8, q8 910; CHECK-FIX-NEXT: bne .LBB26_2 911; CHECK-FIX-NEXT: @ %bb.3: 912; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 913; CHECK-FIX-NEXT: bx lr 914 %5 = load i32, ptr %1, align 4 915 %6 = bitcast <16 x i8> %2 to <4 x i32> 916 %7 = insertelement <4 x i32> %6, i32 %5, i64 0 917 %8 = bitcast <4 x i32> %7 to <16 x i8> 918 %9 = bitcast ptr %3 to ptr 919 store i32 %5, ptr %9, align 8 920 %10 = icmp eq i32 %0, 0 921 br i1 %10, label %14, label %11 922 92311: 924 %12 = load <16 x i8>, ptr %3, align 8 925 br label %15 926 92713: 928 store <16 x i8> %19, ptr %3, align 8 929 br label %14 930 93114: 932 ret void 933 93415: 935 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 936 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 937 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) 938 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 939 %20 = add nuw i32 %17, 1 940 %21 = icmp eq i32 %20, %0 941 br i1 %21, label %13, label %15 942} 943 944define arm_aapcs_vfpcc void @aese_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { 945; CHECK-FIX-LABEL: aese_set32_loop_via_val: 946; CHECK-FIX: @ %bb.0: 947; CHECK-FIX-NEXT: vorr q0, q0, q0 948; CHECK-FIX-NEXT: cmp r0, #0 949; CHECK-FIX-NEXT: bxeq lr 950; CHECK-FIX-NEXT: .LBB27_1: 951; CHECK-FIX-NEXT: vmov.32 d0[0], r1 952; CHECK-FIX-NEXT: .LBB27_2: @ =>This Inner Loop Header: Depth=1 953; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 954; CHECK-FIX-NEXT: subs r0, r0, #1 955; CHECK-FIX-NEXT: vmov.32 d16[0], r1 956; CHECK-FIX-NEXT: aese.8 q8, q0 957; CHECK-FIX-NEXT: aesmc.8 q8, q8 958; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 959; CHECK-FIX-NEXT: bne .LBB27_2 960; CHECK-FIX-NEXT: @ %bb.3: 961; CHECK-FIX-NEXT: bx lr 962 %5 = icmp eq i32 %0, 0 963 br i1 %5, label %12, label %6 964 9656: 966 %7 = bitcast <16 x i8> %2 to <4 x i32> 967 %8 = insertelement <4 x i32> %7, i32 %1, i64 0 968 %9 = bitcast <4 x i32> %8 to <16 x i8> 969 %10 = bitcast ptr %3 to ptr 970 %11 = bitcast ptr %3 to ptr 971 br label %13 972 97312: 974 ret void 975 97613: 977 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 978 %15 = load <4 x i32>, ptr %10, align 8 979 %16 = insertelement <4 x i32> %15, i32 %1, i64 0 980 %17 = bitcast <4 x i32> %16 to <16 x i8> 981 store i32 %1, ptr %11, align 8 982 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) 983 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 984 store <16 x i8> %19, ptr %3, align 8 985 %20 = add nuw i32 %14, 1 986 %21 = icmp eq i32 %20, %0 987 br i1 %21, label %12, label %13 988} 989 990define arm_aapcs_vfpcc void @aese_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 991; CHECK-FIX-NOSCHED-LABEL: aese_set64_via_ptr: 992; CHECK-FIX-NOSCHED: @ %bb.0: 993; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 994; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 995; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] 996; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0 997; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 998; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 999; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 1000; CHECK-FIX-NOSCHED-NEXT: bx lr 1001; 1002; CHECK-CORTEX-FIX-LABEL: aese_set64_via_ptr: 1003; CHECK-CORTEX-FIX: @ %bb.0: 1004; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 1005; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] 1006; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1007; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0 1008; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 1009; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 1010; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1011; CHECK-CORTEX-FIX-NEXT: bx lr 1012 %4 = load i64, ptr %0, align 8 1013 %5 = bitcast ptr %2 to ptr 1014 %6 = load <2 x i64>, ptr %5, align 8 1015 %7 = insertelement <2 x i64> %6, i64 %4, i64 0 1016 %8 = bitcast <2 x i64> %7 to <16 x i8> 1017 %9 = bitcast <16 x i8> %1 to <2 x i64> 1018 %10 = insertelement <2 x i64> %9, i64 %4, i64 0 1019 %11 = bitcast <2 x i64> %10 to <16 x i8> 1020 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) 1021 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) 1022 store <16 x i8> %13, ptr %2, align 8 1023 ret void 1024} 1025 1026define arm_aapcs_vfpcc void @aese_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind { 1027; CHECK-FIX-LABEL: aese_set64_via_val: 1028; CHECK-FIX: @ %bb.0: 1029; CHECK-FIX-NEXT: vorr q0, q0, q0 1030; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1031; CHECK-FIX-NEXT: vmov.32 d0[0], r0 1032; CHECK-FIX-NEXT: vmov.32 d16[0], r0 1033; CHECK-FIX-NEXT: vmov.32 d0[1], r1 1034; CHECK-FIX-NEXT: vmov.32 d16[1], r1 1035; CHECK-FIX-NEXT: aese.8 q8, q0 1036; CHECK-FIX-NEXT: aesmc.8 q8, q8 1037; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1038; CHECK-FIX-NEXT: bx lr 1039 %4 = bitcast ptr %2 to ptr 1040 %5 = load <2 x i64>, ptr %4, align 8 1041 %6 = insertelement <2 x i64> %5, i64 %0, i64 0 1042 %7 = bitcast <2 x i64> %6 to <16 x i8> 1043 %8 = bitcast <16 x i8> %1 to <2 x i64> 1044 %9 = insertelement <2 x i64> %8, i64 %0, i64 0 1045 %10 = bitcast <2 x i64> %9 to <16 x i8> 1046 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) 1047 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) 1048 store <16 x i8> %12, ptr %2, align 8 1049 ret void 1050} 1051 1052define arm_aapcs_vfpcc void @aese_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 1053; CHECK-FIX-NOSCHED-LABEL: aese_set64_cond_via_ptr: 1054; CHECK-FIX-NOSCHED: @ %bb.0: 1055; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1056; CHECK-FIX-NOSCHED-NEXT: beq .LBB30_2 1057; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 1058; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 1059; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1] 1060; CHECK-FIX-NOSCHED-NEXT: b .LBB30_3 1061; CHECK-FIX-NOSCHED-NEXT: .LBB30_2: 1062; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 1063; CHECK-FIX-NOSCHED-NEXT: .LBB30_3: 1064; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1065; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1] 1066; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 1067; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 1068; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 1069; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 1070; CHECK-FIX-NOSCHED-NEXT: bx lr 1071; 1072; CHECK-CORTEX-FIX-LABEL: aese_set64_cond_via_ptr: 1073; CHECK-CORTEX-FIX: @ %bb.0: 1074; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1075; CHECK-CORTEX-FIX-NEXT: beq .LBB30_2 1076; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 1077; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1] 1078; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1079; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18 1080; CHECK-CORTEX-FIX-NEXT: b .LBB30_3 1081; CHECK-CORTEX-FIX-NEXT: .LBB30_2: 1082; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1083; CHECK-CORTEX-FIX-NEXT: .LBB30_3: 1084; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1085; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1] 1086; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 1087; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 1088; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 1089; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1090; CHECK-CORTEX-FIX-NEXT: bx lr 1091 br i1 %0, label %5, label %10 1092 10935: 1094 %6 = load i64, ptr %1, align 8 1095 %7 = bitcast ptr %3 to ptr 1096 %8 = load <2 x i64>, ptr %7, align 8 1097 %9 = insertelement <2 x i64> %8, i64 %6, i64 0 1098 br label %13 1099 110010: 1101 %11 = bitcast ptr %3 to ptr 1102 %12 = load <2 x i64>, ptr %11, align 8 1103 br label %13 1104 110513: 1106 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ] 1107 br i1 %0, label %15, label %19 1108 110915: 1110 %16 = load i64, ptr %1, align 8 1111 %17 = bitcast <16 x i8> %2 to <2 x i64> 1112 %18 = insertelement <2 x i64> %17, i64 %16, i64 0 1113 br label %21 1114 111519: 1116 %20 = bitcast <16 x i8> %2 to <2 x i64> 1117 br label %21 1118 111921: 1120 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ] 1121 %23 = bitcast <2 x i64> %14 to <16 x i8> 1122 %24 = bitcast <2 x i64> %22 to <16 x i8> 1123 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) 1124 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) 1125 store <16 x i8> %26, ptr %3, align 8 1126 ret void 1127} 1128 1129define arm_aapcs_vfpcc void @aese_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { 1130; CHECK-FIX-LABEL: aese_set64_cond_via_val: 1131; CHECK-FIX: @ %bb.0: 1132; CHECK-FIX-NEXT: vorr q0, q0, q0 1133; CHECK-FIX-NEXT: ldr r1, [sp] 1134; CHECK-FIX-NEXT: cmp r0, #0 1135; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1136; CHECK-FIX-NEXT: beq .LBB31_2 1137; CHECK-FIX-NEXT: @ %bb.1: 1138; CHECK-FIX-NEXT: vmov.32 d16[0], r2 1139; CHECK-FIX-NEXT: vmov.32 d16[1], r3 1140; CHECK-FIX-NEXT: .LBB31_2: @ %select.end 1141; CHECK-FIX-NEXT: cmp r0, #0 1142; CHECK-FIX-NEXT: beq .LBB31_4 1143; CHECK-FIX-NEXT: @ %bb.3: 1144; CHECK-FIX-NEXT: vmov.32 d0[0], r2 1145; CHECK-FIX-NEXT: vmov.32 d0[1], r3 1146; CHECK-FIX-NEXT: .LBB31_4: @ %select.end2 1147; CHECK-FIX-NEXT: aese.8 q8, q0 1148; CHECK-FIX-NEXT: aesmc.8 q8, q8 1149; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1150; CHECK-FIX-NEXT: bx lr 1151 %5 = bitcast ptr %3 to ptr 1152 %6 = load <2 x i64>, ptr %5, align 8 1153 %7 = insertelement <2 x i64> %6, i64 %1, i64 0 1154 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 1155 %9 = bitcast <16 x i8> %2 to <2 x i64> 1156 %10 = insertelement <2 x i64> %9, i64 %1, i64 0 1157 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9 1158 %12 = bitcast <2 x i64> %8 to <16 x i8> 1159 %13 = bitcast <2 x i64> %11 to <16 x i8> 1160 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) 1161 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) 1162 store <16 x i8> %15, ptr %3, align 8 1163 ret void 1164} 1165 1166define arm_aapcs_vfpcc void @aese_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 1167; CHECK-FIX-NOSCHED-LABEL: aese_set64_loop_via_ptr: 1168; CHECK-FIX-NOSCHED: @ %bb.0: 1169; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 1170; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr} 1171; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr} 1172; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1] 1173; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1174; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2] 1175; CHECK-FIX-NOSCHED-NEXT: beq .LBB32_4 1176; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 1177; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5 1178; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 1179; CHECK-FIX-NOSCHED-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1 1180; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 1181; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 1182; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 1183; CHECK-FIX-NOSCHED-NEXT: bne .LBB32_2 1184; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 1185; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 1186; CHECK-FIX-NOSCHED-NEXT: .LBB32_4: 1187; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc} 1188; 1189; CHECK-CORTEX-FIX-LABEL: aese_set64_loop_via_ptr: 1190; CHECK-CORTEX-FIX: @ %bb.0: 1191; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 1192; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr} 1193; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr} 1194; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1] 1195; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1196; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2] 1197; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc} 1198; CHECK-CORTEX-FIX-NEXT: .LBB32_1: 1199; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5 1200; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1201; CHECK-CORTEX-FIX-NEXT: .LBB32_2: @ =>This Inner Loop Header: Depth=1 1202; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 1203; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 1204; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 1205; CHECK-CORTEX-FIX-NEXT: bne .LBB32_2 1206; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 1207; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1208; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc} 1209 %5 = load i64, ptr %1, align 8 1210 %6 = bitcast <16 x i8> %2 to <2 x i64> 1211 %7 = insertelement <2 x i64> %6, i64 %5, i64 0 1212 %8 = bitcast <2 x i64> %7 to <16 x i8> 1213 %9 = bitcast ptr %3 to ptr 1214 store i64 %5, ptr %9, align 8 1215 %10 = icmp eq i32 %0, 0 1216 br i1 %10, label %14, label %11 1217 121811: 1219 %12 = load <16 x i8>, ptr %3, align 8 1220 br label %15 1221 122213: 1223 store <16 x i8> %19, ptr %3, align 8 1224 br label %14 1225 122614: 1227 ret void 1228 122915: 1230 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 1231 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 1232 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) 1233 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 1234 %20 = add nuw i32 %17, 1 1235 %21 = icmp eq i32 %20, %0 1236 br i1 %21, label %13, label %15 1237} 1238 1239define arm_aapcs_vfpcc void @aese_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { 1240; CHECK-FIX-LABEL: aese_set64_loop_via_val: 1241; CHECK-FIX: @ %bb.0: 1242; CHECK-FIX-NEXT: vorr q0, q0, q0 1243; CHECK-FIX-NEXT: cmp r0, #0 1244; CHECK-FIX-NEXT: bxeq lr 1245; CHECK-FIX-NEXT: .LBB33_1: 1246; CHECK-FIX-NEXT: vmov.32 d0[0], r2 1247; CHECK-FIX-NEXT: ldr r1, [sp] 1248; CHECK-FIX-NEXT: vmov.32 d0[1], r3 1249; CHECK-FIX-NEXT: .LBB33_2: @ =>This Inner Loop Header: Depth=1 1250; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1251; CHECK-FIX-NEXT: subs r0, r0, #1 1252; CHECK-FIX-NEXT: vmov.32 d16[0], r2 1253; CHECK-FIX-NEXT: vmov.32 d16[1], r3 1254; CHECK-FIX-NEXT: aese.8 q8, q0 1255; CHECK-FIX-NEXT: aesmc.8 q8, q8 1256; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1257; CHECK-FIX-NEXT: bne .LBB33_2 1258; CHECK-FIX-NEXT: @ %bb.3: 1259; CHECK-FIX-NEXT: bx lr 1260 %5 = icmp eq i32 %0, 0 1261 br i1 %5, label %12, label %6 1262 12636: 1264 %7 = bitcast <16 x i8> %2 to <2 x i64> 1265 %8 = insertelement <2 x i64> %7, i64 %1, i64 0 1266 %9 = bitcast <2 x i64> %8 to <16 x i8> 1267 %10 = bitcast ptr %3 to ptr 1268 %11 = bitcast ptr %3 to ptr 1269 br label %13 1270 127112: 1272 ret void 1273 127413: 1275 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 1276 %15 = load <2 x i64>, ptr %10, align 8 1277 %16 = insertelement <2 x i64> %15, i64 %1, i64 0 1278 %17 = bitcast <2 x i64> %16 to <16 x i8> 1279 store i64 %1, ptr %11, align 8 1280 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) 1281 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 1282 store <16 x i8> %19, ptr %3, align 8 1283 %20 = add nuw i32 %14, 1 1284 %21 = icmp eq i32 %20, %0 1285 br i1 %21, label %12, label %13 1286} 1287 1288define arm_aapcs_vfpcc void @aese_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 1289; CHECK-FIX-NOSCHED-LABEL: aese_setf16_via_ptr: 1290; CHECK-FIX-NOSCHED: @ %bb.0: 1291; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 1292; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] 1293; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 1294; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 1295; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0 1296; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 1297; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 1298; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 1299; CHECK-FIX-NOSCHED-NEXT: bx lr 1300; 1301; CHECK-CORTEX-FIX-LABEL: aese_setf16_via_ptr: 1302; CHECK-CORTEX-FIX: @ %bb.0: 1303; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 1304; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1305; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] 1306; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 1307; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0 1308; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 1309; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 1310; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1311; CHECK-CORTEX-FIX-NEXT: bx lr 1312 %4 = bitcast ptr %0 to ptr 1313 %5 = load i16, ptr %4, align 2 1314 %6 = bitcast ptr %2 to ptr 1315 %7 = load <8 x i16>, ptr %6, align 8 1316 %8 = insertelement <8 x i16> %7, i16 %5, i64 0 1317 %9 = bitcast <8 x i16> %8 to <16 x i8> 1318 %10 = bitcast <16 x i8> %1 to <8 x i16> 1319 %11 = insertelement <8 x i16> %10, i16 %5, i64 0 1320 %12 = bitcast <8 x i16> %11 to <16 x i8> 1321 %13 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %9, <16 x i8> %12) 1322 %14 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %13) 1323 store <16 x i8> %14, ptr %2, align 8 1324 ret void 1325} 1326 1327define arm_aapcs_vfpcc void @aese_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind { 1328; CHECK-FIX-LABEL: aese_setf16_via_val: 1329; CHECK-FIX: @ %bb.0: 1330; CHECK-FIX-NEXT: vorr q1, q1, q1 1331; CHECK-FIX-NEXT: vmov r1, s0 1332; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 1333; CHECK-FIX-NEXT: vmov.16 d2[0], r1 1334; CHECK-FIX-NEXT: vmov.16 d16[0], r1 1335; CHECK-FIX-NEXT: aese.8 q8, q1 1336; CHECK-FIX-NEXT: aesmc.8 q8, q8 1337; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 1338; CHECK-FIX-NEXT: bx lr 1339 %4 = bitcast ptr %2 to ptr 1340 %5 = load <8 x i16>, ptr %4, align 8 1341 %6 = bitcast half %0 to i16 1342 %7 = insertelement <8 x i16> %5, i16 %6, i64 0 1343 %8 = bitcast <8 x i16> %7 to <16 x i8> 1344 %9 = bitcast <16 x i8> %1 to <8 x i16> 1345 %10 = insertelement <8 x i16> %9, i16 %6, i64 0 1346 %11 = bitcast <8 x i16> %10 to <16 x i8> 1347 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) 1348 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) 1349 store <16 x i8> %13, ptr %2, align 8 1350 ret void 1351} 1352 1353define arm_aapcs_vfpcc void @aese_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 1354; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_ptr: 1355; CHECK-FIX-NOSCHED: @ %bb.0: 1356; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1357; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1358; CHECK-FIX-NOSCHED-NEXT: .pad #12 1359; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 1360; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1361; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_3 1362; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 1363; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 1364; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] 1365; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1] 1366; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] 1367; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] 1368; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 1369; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill 1370; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] 1371; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill 1372; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] 1373; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill 1374; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] 1375; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1376; CHECK-FIX-NOSCHED-NEXT: bne .LBB36_4 1377; CHECK-FIX-NOSCHED-NEXT: .LBB36_2: 1378; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] 1379; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] 1380; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] 1381; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] 1382; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] 1383; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] 1384; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] 1385; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0] 1386; CHECK-FIX-NOSCHED-NEXT: b .LBB36_5 1387; CHECK-FIX-NOSCHED-NEXT: .LBB36_3: 1388; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8 1389; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32] 1390; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32] 1391; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4 1392; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32] 1393; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12 1394; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32] 1395; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] 1396; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] 1397; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] 1398; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 1399; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0] 1400; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill 1401; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] 1402; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill 1403; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] 1404; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill 1405; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] 1406; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1407; CHECK-FIX-NOSCHED-NEXT: beq .LBB36_2 1408; CHECK-FIX-NOSCHED-NEXT: .LBB36_4: 1409; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] 1410; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1] 1411; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] 1412; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] 1413; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] 1414; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] 1415; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] 1416; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] 1417; CHECK-FIX-NOSCHED-NEXT: .LBB36_5: 1418; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16 1419; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16 1420; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16 1421; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1 1422; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16 1423; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3 1424; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 1425; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16 1426; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1 1427; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16 1428; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1 1429; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload 1430; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16 1431; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 1432; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 1433; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1 1434; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload 1435; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 1436; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 1437; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9 1438; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 1439; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 1440; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 1441; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 1442; 1443; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_ptr: 1444; CHECK-CORTEX-FIX: @ %bb.0: 1445; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1446; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1447; CHECK-CORTEX-FIX-NEXT: .pad #24 1448; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24 1449; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1450; CHECK-CORTEX-FIX-NEXT: beq .LBB36_2 1451; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 1452; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1453; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 1454; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0] 1455; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] 1456; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill 1457; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] 1458; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 1459; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] 1460; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 1461; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] 1462; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill 1463; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] 1464; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill 1465; CHECK-CORTEX-FIX-NEXT: mov r3, r6 1466; CHECK-CORTEX-FIX-NEXT: b .LBB36_3 1467; CHECK-CORTEX-FIX-NEXT: .LBB36_2: 1468; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8 1469; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32] 1470; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4 1471; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32] 1472; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12 1473; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32] 1474; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32] 1475; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0] 1476; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill 1477; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 1478; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1] 1479; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill 1480; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] 1481; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill 1482; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] 1483; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 1484; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] 1485; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 1486; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] 1487; CHECK-CORTEX-FIX-NEXT: .LBB36_3: 1488; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3] 1489; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1490; CHECK-CORTEX-FIX-NEXT: beq .LBB36_5 1491; CHECK-CORTEX-FIX-NEXT: @ %bb.4: 1492; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1] 1493; CHECK-CORTEX-FIX-NEXT: b .LBB36_6 1494; CHECK-CORTEX-FIX-NEXT: .LBB36_5: 1495; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0] 1496; CHECK-CORTEX-FIX-NEXT: .LBB36_6: 1497; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill 1498; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 1499; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload 1500; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16 1501; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload 1502; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1] 1503; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2] 1504; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3] 1505; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0] 1506; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1] 1507; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2] 1508; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3] 1509; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16 1510; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload 1511; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 1512; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 1513; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16 1514; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16 1515; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload 1516; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16 1517; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload 1518; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4 1519; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1 1520; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 1521; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9 1522; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16 1523; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3 1524; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5 1525; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6 1526; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0 1527; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8 1528; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9 1529; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1530; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24 1531; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 1532 br i1 %0, label %5, label %12 1533 15345: 1535 %6 = bitcast ptr %1 to ptr 1536 %7 = load i16, ptr %6, align 2 1537 %8 = bitcast ptr %3 to ptr 1538 %9 = load <8 x i16>, ptr %8, align 8 1539 %10 = insertelement <8 x i16> %9, i16 %7, i64 0 1540 %11 = bitcast <8 x i16> %10 to <8 x half> 1541 br label %15 1542 154312: 1544 %13 = bitcast ptr %3 to ptr 1545 %14 = load <8 x half>, ptr %13, align 8 1546 br label %15 1547 154815: 1549 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ] 1550 br i1 %0, label %17, label %23 1551 155217: 1553 %18 = bitcast ptr %1 to ptr 1554 %19 = load i16, ptr %18, align 2 1555 %20 = bitcast <16 x i8> %2 to <8 x i16> 1556 %21 = insertelement <8 x i16> %20, i16 %19, i64 0 1557 %22 = bitcast <8 x i16> %21 to <8 x half> 1558 br label %25 1559 156023: 1561 %24 = bitcast <16 x i8> %2 to <8 x half> 1562 br label %25 1563 156425: 1565 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ] 1566 %27 = bitcast <8 x half> %16 to <16 x i8> 1567 %28 = bitcast <8 x half> %26 to <16 x i8> 1568 %29 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %27, <16 x i8> %28) 1569 %30 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %29) 1570 store <16 x i8> %30, ptr %3, align 8 1571 ret void 1572} 1573 1574define arm_aapcs_vfpcc void @aese_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind { 1575; CHECK-FIX-NOSCHED-LABEL: aese_setf16_cond_via_val: 1576; CHECK-FIX-NOSCHED: @ %bb.0: 1577; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1578; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1579; CHECK-FIX-NOSCHED-NEXT: .pad #12 1580; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 1581; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1582; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_2 1583; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 1584; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 1585; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0 1586; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] 1587; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] 1588; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] 1589; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 1590; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] 1591; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill 1592; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] 1593; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill 1594; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] 1595; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill 1596; CHECK-FIX-NOSCHED-NEXT: b .LBB37_3 1597; CHECK-FIX-NOSCHED-NEXT: .LBB37_2: 1598; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8 1599; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32] 1600; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32] 1601; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4 1602; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32] 1603; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12 1604; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32] 1605; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] 1606; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] 1607; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] 1608; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 1609; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] 1610; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill 1611; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] 1612; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill 1613; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] 1614; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill 1615; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0] 1616; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2 1617; CHECK-FIX-NOSCHED-NEXT: .LBB37_3: 1618; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3] 1619; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 1620; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2] 1621; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1] 1622; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0] 1623; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3] 1624; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2] 1625; CHECK-FIX-NOSCHED-NEXT: beq .LBB37_5 1626; CHECK-FIX-NOSCHED-NEXT: @ %bb.4: 1627; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] 1628; CHECK-FIX-NOSCHED-NEXT: b .LBB37_6 1629; CHECK-FIX-NOSCHED-NEXT: .LBB37_5: 1630; CHECK-FIX-NOSCHED-NEXT: mov r0, lr 1631; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0] 1632; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] 1633; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr 1634; CHECK-FIX-NOSCHED-NEXT: mov lr, r0 1635; CHECK-FIX-NOSCHED-NEXT: .LBB37_6: 1636; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0 1637; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2 1638; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16 1639; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16 1640; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 1641; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16 1642; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6 1643; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 1644; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload 1645; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16 1646; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 1647; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16 1648; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload 1649; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 1650; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 1651; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 1652; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 1653; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16 1654; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 1655; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16 1656; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 1657; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q9 1658; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 1659; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 1660; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 1661; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 1662; 1663; CHECK-CORTEX-FIX-LABEL: aese_setf16_cond_via_val: 1664; CHECK-CORTEX-FIX: @ %bb.0: 1665; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1666; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 1667; CHECK-CORTEX-FIX-NEXT: .pad #12 1668; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12 1669; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1670; CHECK-CORTEX-FIX-NEXT: beq .LBB37_3 1671; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 1672; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1673; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0 1674; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1] 1675; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] 1676; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] 1677; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] 1678; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] 1679; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill 1680; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0] 1681; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill 1682; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1] 1683; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill 1684; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1685; CHECK-CORTEX-FIX-NEXT: bne .LBB37_4 1686; CHECK-CORTEX-FIX-NEXT: .LBB37_2: 1687; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0] 1688; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] 1689; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] 1690; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] 1691; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] 1692; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] 1693; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] 1694; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] 1695; CHECK-CORTEX-FIX-NEXT: vmov s0, lr 1696; CHECK-CORTEX-FIX-NEXT: b .LBB37_5 1697; CHECK-CORTEX-FIX-NEXT: .LBB37_3: 1698; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8 1699; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 1700; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4 1701; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32] 1702; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12 1703; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32] 1704; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32] 1705; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 1706; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0] 1707; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] 1708; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] 1709; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 1710; CHECK-CORTEX-FIX-NEXT: vmov s2, r2 1711; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] 1712; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] 1713; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] 1714; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 1715; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] 1716; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill 1717; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 1718; CHECK-CORTEX-FIX-NEXT: beq .LBB37_2 1719; CHECK-CORTEX-FIX-NEXT: .LBB37_4: 1720; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] 1721; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] 1722; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] 1723; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] 1724; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] 1725; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] 1726; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] 1727; CHECK-CORTEX-FIX-NEXT: .LBB37_5: 1728; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16 1729; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16 1730; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload 1731; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16 1732; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16 1733; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16 1734; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16 1735; CHECK-CORTEX-FIX-NEXT: vmov r7, s2 1736; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload 1737; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16 1738; CHECK-CORTEX-FIX-NEXT: vmov r6, s0 1739; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7 1740; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4 1741; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0 1742; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr 1743; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16 1744; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6 1745; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2 1746; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3 1747; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5 1748; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8 1749; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9 1750; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1751; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12 1752; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 1753 br i1 %0, label %5, label %11 1754 17555: 1756 %6 = bitcast ptr %3 to ptr 1757 %7 = load <8 x i16>, ptr %6, align 8 1758 %8 = bitcast half %1 to i16 1759 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 1760 %10 = bitcast <8 x i16> %9 to <8 x half> 1761 br label %14 1762 176311: 1764 %12 = bitcast ptr %3 to ptr 1765 %13 = load <8 x half>, ptr %12, align 8 1766 br label %14 1767 176814: 1769 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ] 1770 br i1 %0, label %16, label %21 1771 177216: 1773 %17 = bitcast <16 x i8> %2 to <8 x i16> 1774 %18 = bitcast half %1 to i16 1775 %19 = insertelement <8 x i16> %17, i16 %18, i64 0 1776 %20 = bitcast <8 x i16> %19 to <8 x half> 1777 br label %23 1778 177921: 1780 %22 = bitcast <16 x i8> %2 to <8 x half> 1781 br label %23 1782 178323: 1784 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ] 1785 %25 = bitcast <8 x half> %15 to <16 x i8> 1786 %26 = bitcast <8 x half> %24 to <16 x i8> 1787 %27 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %25, <16 x i8> %26) 1788 %28 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %27) 1789 store <16 x i8> %28, ptr %3, align 8 1790 ret void 1791} 1792 1793define arm_aapcs_vfpcc void @aese_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 1794; CHECK-FIX-LABEL: aese_setf16_loop_via_ptr: 1795; CHECK-FIX: @ %bb.0: 1796; CHECK-FIX-NEXT: vorr q0, q0, q0 1797; CHECK-FIX-NEXT: ldrh r1, [r1] 1798; CHECK-FIX-NEXT: cmp r0, #0 1799; CHECK-FIX-NEXT: strh r1, [r2] 1800; CHECK-FIX-NEXT: bxeq lr 1801; CHECK-FIX-NEXT: .LBB38_1: 1802; CHECK-FIX-NEXT: vmov.16 d0[0], r1 1803; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1804; CHECK-FIX-NEXT: .LBB38_2: @ =>This Inner Loop Header: Depth=1 1805; CHECK-FIX-NEXT: aese.8 q8, q0 1806; CHECK-FIX-NEXT: subs r0, r0, #1 1807; CHECK-FIX-NEXT: aesmc.8 q8, q8 1808; CHECK-FIX-NEXT: bne .LBB38_2 1809; CHECK-FIX-NEXT: @ %bb.3: 1810; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1811; CHECK-FIX-NEXT: bx lr 1812 %5 = bitcast ptr %1 to ptr 1813 %6 = load i16, ptr %5, align 2 1814 %7 = bitcast <16 x i8> %2 to <8 x i16> 1815 %8 = insertelement <8 x i16> %7, i16 %6, i64 0 1816 %9 = bitcast <8 x i16> %8 to <16 x i8> 1817 %10 = bitcast ptr %3 to ptr 1818 store i16 %6, ptr %10, align 8 1819 %11 = icmp eq i32 %0, 0 1820 br i1 %11, label %15, label %12 1821 182212: 1823 %13 = load <16 x i8>, ptr %3, align 8 1824 br label %16 1825 182614: 1827 store <16 x i8> %20, ptr %3, align 8 1828 br label %15 1829 183015: 1831 ret void 1832 183316: 1834 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ] 1835 %18 = phi i32 [ 0, %12 ], [ %21, %16 ] 1836 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) 1837 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19) 1838 %21 = add nuw i32 %18, 1 1839 %22 = icmp eq i32 %21, %0 1840 br i1 %22, label %14, label %16 1841} 1842 1843define arm_aapcs_vfpcc void @aese_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind { 1844; CHECK-FIX-LABEL: aese_setf16_loop_via_val: 1845; CHECK-FIX: @ %bb.0: 1846; CHECK-FIX-NEXT: vorr q1, q1, q1 1847; CHECK-FIX-NEXT: cmp r0, #0 1848; CHECK-FIX-NEXT: bxeq lr 1849; CHECK-FIX-NEXT: .LBB39_1: 1850; CHECK-FIX-NEXT: vmov r2, s0 1851; CHECK-FIX-NEXT: vmov.16 d2[0], r2 1852; CHECK-FIX-NEXT: .LBB39_2: @ =>This Inner Loop Header: Depth=1 1853; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 1854; CHECK-FIX-NEXT: subs r0, r0, #1 1855; CHECK-FIX-NEXT: vmov.16 d16[0], r2 1856; CHECK-FIX-NEXT: aese.8 q8, q1 1857; CHECK-FIX-NEXT: aesmc.8 q8, q8 1858; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1859; CHECK-FIX-NEXT: bne .LBB39_2 1860; CHECK-FIX-NEXT: @ %bb.3: 1861; CHECK-FIX-NEXT: bx lr 1862 %5 = icmp eq i32 %0, 0 1863 br i1 %5, label %13, label %6 1864 18656: 1866 %7 = bitcast <16 x i8> %2 to <8 x i16> 1867 %8 = bitcast half %1 to i16 1868 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 1869 %10 = bitcast <8 x i16> %9 to <16 x i8> 1870 %11 = bitcast ptr %3 to ptr 1871 %12 = bitcast ptr %3 to ptr 1872 br label %14 1873 187413: 1875 ret void 1876 187714: 1878 %15 = phi i32 [ 0, %6 ], [ %21, %14 ] 1879 %16 = load <8 x i16>, ptr %11, align 8 1880 %17 = insertelement <8 x i16> %16, i16 %8, i64 0 1881 %18 = bitcast <8 x i16> %17 to <16 x i8> 1882 store half %1, ptr %12, align 8 1883 %19 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %18, <16 x i8> %10) 1884 %20 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %19) 1885 store <16 x i8> %20, ptr %3, align 8 1886 %21 = add nuw i32 %15, 1 1887 %22 = icmp eq i32 %21, %0 1888 br i1 %22, label %13, label %14 1889} 1890 1891define arm_aapcs_vfpcc void @aese_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 1892; CHECK-FIX-LABEL: aese_setf32_via_ptr: 1893; CHECK-FIX: @ %bb.0: 1894; CHECK-FIX-NEXT: vldr s0, [r0] 1895; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1] 1896; CHECK-FIX-NEXT: vmov.f32 s4, s0 1897; CHECK-FIX-NEXT: vorr q1, q1, q1 1898; CHECK-FIX-NEXT: vorr q0, q0, q0 1899; CHECK-FIX-NEXT: aese.8 q1, q0 1900; CHECK-FIX-NEXT: aesmc.8 q8, q1 1901; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 1902; CHECK-FIX-NEXT: bx lr 1903 %4 = load float, ptr %0, align 4 1904 %5 = bitcast ptr %2 to ptr 1905 %6 = load <4 x float>, ptr %5, align 8 1906 %7 = insertelement <4 x float> %6, float %4, i64 0 1907 %8 = bitcast <4 x float> %7 to <16 x i8> 1908 %9 = bitcast <16 x i8> %1 to <4 x float> 1909 %10 = insertelement <4 x float> %9, float %4, i64 0 1910 %11 = bitcast <4 x float> %10 to <16 x i8> 1911 %12 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %8, <16 x i8> %11) 1912 %13 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %12) 1913 store <16 x i8> %13, ptr %2, align 8 1914 ret void 1915} 1916 1917define arm_aapcs_vfpcc void @aese_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind { 1918; CHECK-FIX-LABEL: aese_setf32_via_val: 1919; CHECK-FIX: @ %bb.0: 1920; CHECK-FIX-NEXT: vmov.f32 s4, s0 1921; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0] 1922; CHECK-FIX-NEXT: vmov.f32 s0, s4 1923; CHECK-FIX-NEXT: vorr q0, q0, q0 1924; CHECK-FIX-NEXT: vorr q1, q1, q1 1925; CHECK-FIX-NEXT: aese.8 q0, q1 1926; CHECK-FIX-NEXT: aesmc.8 q8, q0 1927; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 1928; CHECK-FIX-NEXT: bx lr 1929 %4 = bitcast ptr %2 to ptr 1930 %5 = load <4 x float>, ptr %4, align 8 1931 %6 = insertelement <4 x float> %5, float %0, i64 0 1932 %7 = bitcast <4 x float> %6 to <16 x i8> 1933 %8 = bitcast <16 x i8> %1 to <4 x float> 1934 %9 = insertelement <4 x float> %8, float %0, i64 0 1935 %10 = bitcast <4 x float> %9 to <16 x i8> 1936 %11 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %7, <16 x i8> %10) 1937 %12 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %11) 1938 store <16 x i8> %12, ptr %2, align 8 1939 ret void 1940} 1941 1942define arm_aapcs_vfpcc void @aese_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 1943; CHECK-FIX-LABEL: aese_setf32_cond_via_ptr: 1944; CHECK-FIX: @ %bb.0: 1945; CHECK-FIX-NEXT: vorr q0, q0, q0 1946; CHECK-FIX-NEXT: cmp r0, #0 1947; CHECK-FIX-NEXT: beq .LBB42_2 1948; CHECK-FIX-NEXT: @ %bb.1: 1949; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1950; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 1951; CHECK-FIX-NEXT: cmp r0, #0 1952; CHECK-FIX-NEXT: bne .LBB42_3 1953; CHECK-FIX-NEXT: b .LBB42_4 1954; CHECK-FIX-NEXT: .LBB42_2: 1955; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 1956; CHECK-FIX-NEXT: cmp r0, #0 1957; CHECK-FIX-NEXT: beq .LBB42_4 1958; CHECK-FIX-NEXT: .LBB42_3: 1959; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] 1960; CHECK-FIX-NEXT: .LBB42_4: 1961; CHECK-FIX-NEXT: aese.8 q8, q0 1962; CHECK-FIX-NEXT: aesmc.8 q8, q8 1963; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 1964; CHECK-FIX-NEXT: bx lr 1965 br i1 %0, label %5, label %10 1966 19675: 1968 %6 = load float, ptr %1, align 4 1969 %7 = bitcast ptr %3 to ptr 1970 %8 = load <4 x float>, ptr %7, align 8 1971 %9 = insertelement <4 x float> %8, float %6, i64 0 1972 br label %13 1973 197410: 1975 %11 = bitcast ptr %3 to ptr 1976 %12 = load <4 x float>, ptr %11, align 8 1977 br label %13 1978 197913: 1980 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ] 1981 br i1 %0, label %15, label %19 1982 198315: 1984 %16 = load float, ptr %1, align 4 1985 %17 = bitcast <16 x i8> %2 to <4 x float> 1986 %18 = insertelement <4 x float> %17, float %16, i64 0 1987 br label %21 1988 198919: 1990 %20 = bitcast <16 x i8> %2 to <4 x float> 1991 br label %21 1992 199321: 1994 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ] 1995 %23 = bitcast <4 x float> %14 to <16 x i8> 1996 %24 = bitcast <4 x float> %22 to <16 x i8> 1997 %25 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %23, <16 x i8> %24) 1998 %26 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %25) 1999 store <16 x i8> %26, ptr %3, align 8 2000 ret void 2001} 2002 2003define arm_aapcs_vfpcc void @aese_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind { 2004; CHECK-FIX-NOSCHED-LABEL: aese_setf32_cond_via_val: 2005; CHECK-FIX-NOSCHED: @ %bb.0: 2006; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] 2007; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 2008; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0 2009; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 2010; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 2011; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0 2012; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 2013; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1 2014; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2 2015; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 2016; CHECK-FIX-NOSCHED-NEXT: bx lr 2017; 2018; CHECK-CORTEX-FIX-LABEL: aese_setf32_cond_via_val: 2019; CHECK-CORTEX-FIX: @ %bb.0: 2020; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 2021; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] 2022; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0 2023; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 2024; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 2025; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0 2026; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 2027; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1 2028; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2 2029; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2030; CHECK-CORTEX-FIX-NEXT: bx lr 2031 %5 = bitcast ptr %3 to ptr 2032 %6 = load <4 x float>, ptr %5, align 8 2033 %7 = insertelement <4 x float> %6, float %1, i64 0 2034 %8 = select i1 %0, <4 x float> %7, <4 x float> %6 2035 %9 = bitcast <16 x i8> %2 to <4 x float> 2036 %10 = insertelement <4 x float> %9, float %1, i64 0 2037 %11 = select i1 %0, <4 x float> %10, <4 x float> %9 2038 %12 = bitcast <4 x float> %8 to <16 x i8> 2039 %13 = bitcast <4 x float> %11 to <16 x i8> 2040 %14 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %12, <16 x i8> %13) 2041 %15 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %14) 2042 store <16 x i8> %15, ptr %3, align 8 2043 ret void 2044} 2045 2046define arm_aapcs_vfpcc void @aese_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2047; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_ptr: 2048; CHECK-FIX-NOSCHED: @ %bb.0: 2049; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1] 2050; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 2051; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2] 2052; CHECK-FIX-NOSCHED-NEXT: bxeq lr 2053; CHECK-FIX-NOSCHED-NEXT: .LBB44_1: 2054; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4 2055; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 2056; CHECK-FIX-NOSCHED-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1 2057; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 2058; CHECK-FIX-NOSCHED-NEXT: aese.8 q8, q0 2059; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 2060; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q8 2061; CHECK-FIX-NOSCHED-NEXT: bne .LBB44_2 2062; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 2063; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 2064; CHECK-FIX-NOSCHED-NEXT: bx lr 2065; 2066; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_ptr: 2067; CHECK-CORTEX-FIX: @ %bb.0: 2068; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1] 2069; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 2070; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2] 2071; CHECK-CORTEX-FIX-NEXT: bxeq lr 2072; CHECK-CORTEX-FIX-NEXT: .LBB44_1: 2073; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2074; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4 2075; CHECK-CORTEX-FIX-NEXT: .LBB44_2: @ =>This Inner Loop Header: Depth=1 2076; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 2077; CHECK-CORTEX-FIX-NEXT: aese.8 q8, q0 2078; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 2079; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q8 2080; CHECK-CORTEX-FIX-NEXT: bne .LBB44_2 2081; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 2082; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2083; CHECK-CORTEX-FIX-NEXT: bx lr 2084 %5 = load float, ptr %1, align 4 2085 %6 = bitcast <16 x i8> %2 to <4 x float> 2086 %7 = insertelement <4 x float> %6, float %5, i64 0 2087 %8 = bitcast <4 x float> %7 to <16 x i8> 2088 %9 = bitcast ptr %3 to ptr 2089 store float %5, ptr %9, align 8 2090 %10 = icmp eq i32 %0, 0 2091 br i1 %10, label %14, label %11 2092 209311: 2094 %12 = load <16 x i8>, ptr %3, align 8 2095 br label %15 2096 209713: 2098 store <16 x i8> %19, ptr %3, align 8 2099 br label %14 2100 210114: 2102 ret void 2103 210415: 2105 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 2106 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 2107 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %16, <16 x i8> %8) 2108 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 2109 %20 = add nuw i32 %17, 1 2110 %21 = icmp eq i32 %20, %0 2111 br i1 %21, label %13, label %15 2112} 2113 2114define arm_aapcs_vfpcc void @aese_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind { 2115; CHECK-FIX-NOSCHED-LABEL: aese_setf32_loop_via_val: 2116; CHECK-FIX-NOSCHED: @ %bb.0: 2117; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 2118; CHECK-FIX-NOSCHED-NEXT: bxeq lr 2119; CHECK-FIX-NOSCHED-NEXT: .LBB45_1: 2120; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0 2121; CHECK-FIX-NOSCHED-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1 2122; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] 2123; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 2124; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0 2125; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 2126; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 2127; CHECK-FIX-NOSCHED-NEXT: aese.8 q2, q1 2128; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q2 2129; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 2130; CHECK-FIX-NOSCHED-NEXT: bne .LBB45_2 2131; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 2132; CHECK-FIX-NOSCHED-NEXT: bx lr 2133; 2134; CHECK-CORTEX-FIX-LABEL: aese_setf32_loop_via_val: 2135; CHECK-CORTEX-FIX: @ %bb.0: 2136; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 2137; CHECK-CORTEX-FIX-NEXT: bxeq lr 2138; CHECK-CORTEX-FIX-NEXT: .LBB45_1: 2139; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0 2140; CHECK-CORTEX-FIX-NEXT: .LBB45_2: @ =>This Inner Loop Header: Depth=1 2141; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] 2142; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0 2143; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 2144; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 2145; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 2146; CHECK-CORTEX-FIX-NEXT: aese.8 q2, q1 2147; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q2 2148; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2149; CHECK-CORTEX-FIX-NEXT: bne .LBB45_2 2150; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 2151; CHECK-CORTEX-FIX-NEXT: bx lr 2152 %5 = icmp eq i32 %0, 0 2153 br i1 %5, label %12, label %6 2154 21556: 2156 %7 = bitcast <16 x i8> %2 to <4 x float> 2157 %8 = insertelement <4 x float> %7, float %1, i64 0 2158 %9 = bitcast <4 x float> %8 to <16 x i8> 2159 %10 = bitcast ptr %3 to ptr 2160 %11 = bitcast ptr %3 to ptr 2161 br label %13 2162 216312: 2164 ret void 2165 216613: 2167 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 2168 %15 = load <4 x float>, ptr %10, align 8 2169 %16 = insertelement <4 x float> %15, float %1, i64 0 2170 %17 = bitcast <4 x float> %16 to <16 x i8> 2171 store float %1, ptr %11, align 8 2172 %18 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> %17, <16 x i8> %9) 2173 %19 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %18) 2174 store <16 x i8> %19, ptr %3, align 8 2175 %20 = add nuw i32 %14, 1 2176 %21 = icmp eq i32 %20, %0 2177 br i1 %21, label %12, label %13 2178} 2179 2180define arm_aapcs_vfpcc void @aesd_zero(ptr %0) nounwind { 2181; CHECK-FIX-LABEL: aesd_zero: 2182; CHECK-FIX: @ %bb.0: 2183; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 2184; CHECK-FIX-NEXT: vmov.i32 q9, #0x0 2185; CHECK-FIX-NEXT: aesd.8 q9, q8 2186; CHECK-FIX-NEXT: aesimc.8 q8, q9 2187; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 2188; CHECK-FIX-NEXT: bx lr 2189 %2 = load <16 x i8>, ptr %0, align 8 2190 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> zeroinitializer, <16 x i8> %2) 2191 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) 2192 store <16 x i8> %4, ptr %0, align 8 2193 ret void 2194} 2195 2196define arm_aapcs_vfpcc void @aesd_via_call1(ptr %0) nounwind { 2197; CHECK-FIX-LABEL: aesd_via_call1: 2198; CHECK-FIX: @ %bb.0: 2199; CHECK-FIX-NEXT: .save {r4, lr} 2200; CHECK-FIX-NEXT: push {r4, lr} 2201; CHECK-FIX-NEXT: mov r4, r0 2202; CHECK-FIX-NEXT: bl get_input 2203; CHECK-FIX-NEXT: vorr q0, q0, q0 2204; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 2205; CHECK-FIX-NEXT: aesd.8 q8, q0 2206; CHECK-FIX-NEXT: aesimc.8 q8, q8 2207; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 2208; CHECK-FIX-NEXT: pop {r4, pc} 2209 %2 = call arm_aapcs_vfpcc <16 x i8> @get_input() 2210 %3 = load <16 x i8>, ptr %0, align 8 2211 %4 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %2, <16 x i8> %3) 2212 %5 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %4) 2213 store <16 x i8> %5, ptr %0, align 8 2214 ret void 2215} 2216 2217define arm_aapcs_vfpcc void @aesd_via_call2(half %0, ptr %1) nounwind { 2218; CHECK-FIX-LABEL: aesd_via_call2: 2219; CHECK-FIX: @ %bb.0: 2220; CHECK-FIX-NEXT: .save {r4, lr} 2221; CHECK-FIX-NEXT: push {r4, lr} 2222; CHECK-FIX-NEXT: mov r4, r0 2223; CHECK-FIX-NEXT: bl get_inputf16 2224; CHECK-FIX-NEXT: vorr q0, q0, q0 2225; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 2226; CHECK-FIX-NEXT: aesd.8 q8, q0 2227; CHECK-FIX-NEXT: aesimc.8 q8, q8 2228; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 2229; CHECK-FIX-NEXT: pop {r4, pc} 2230 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf16(half %0) 2231 %4 = load <16 x i8>, ptr %1, align 8 2232 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) 2233 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) 2234 store <16 x i8> %6, ptr %1, align 8 2235 ret void 2236} 2237 2238define arm_aapcs_vfpcc void @aesd_via_call3(float %0, ptr %1) nounwind { 2239; CHECK-FIX-LABEL: aesd_via_call3: 2240; CHECK-FIX: @ %bb.0: 2241; CHECK-FIX-NEXT: .save {r4, lr} 2242; CHECK-FIX-NEXT: push {r4, lr} 2243; CHECK-FIX-NEXT: mov r4, r0 2244; CHECK-FIX-NEXT: bl get_inputf32 2245; CHECK-FIX-NEXT: vorr q0, q0, q0 2246; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r4] 2247; CHECK-FIX-NEXT: aesd.8 q8, q0 2248; CHECK-FIX-NEXT: aesimc.8 q8, q8 2249; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r4] 2250; CHECK-FIX-NEXT: pop {r4, pc} 2251 %3 = call arm_aapcs_vfpcc <16 x i8> @get_inputf32(float %0) 2252 %4 = load <16 x i8>, ptr %1, align 8 2253 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) 2254 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) 2255 store <16 x i8> %6, ptr %1, align 8 2256 ret void 2257} 2258 2259define arm_aapcs_vfpcc void @aesd_once_via_ptr(ptr %0, ptr %1) nounwind { 2260; CHECK-FIX-LABEL: aesd_once_via_ptr: 2261; CHECK-FIX: @ %bb.0: 2262; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 2263; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1] 2264; CHECK-FIX-NEXT: aesd.8 q9, q8 2265; CHECK-FIX-NEXT: aesimc.8 q8, q9 2266; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2267; CHECK-FIX-NEXT: bx lr 2268 %3 = load <16 x i8>, ptr %1, align 8 2269 %4 = load <16 x i8>, ptr %0, align 8 2270 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) 2271 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) 2272 store <16 x i8> %6, ptr %1, align 8 2273 ret void 2274} 2275 2276define arm_aapcs_vfpcc <16 x i8> @aesd_once_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { 2277; CHECK-FIX-LABEL: aesd_once_via_val: 2278; CHECK-FIX: @ %bb.0: 2279; CHECK-FIX-NEXT: vorr q0, q0, q0 2280; CHECK-FIX-NEXT: vorr q1, q1, q1 2281; CHECK-FIX-NEXT: aesd.8 q0, q1 2282; CHECK-FIX-NEXT: aesimc.8 q0, q0 2283; CHECK-FIX-NEXT: bx lr 2284 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0) 2285 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) 2286 ret <16 x i8> %4 2287} 2288 2289define arm_aapcs_vfpcc void @aesd_twice_via_ptr(ptr %0, ptr %1) nounwind { 2290; CHECK-FIX-LABEL: aesd_twice_via_ptr: 2291; CHECK-FIX: @ %bb.0: 2292; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 2293; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r1] 2294; CHECK-FIX-NEXT: aesd.8 q9, q8 2295; CHECK-FIX-NEXT: aesimc.8 q8, q9 2296; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2297; CHECK-FIX-NEXT: vld1.64 {d18, d19}, [r0] 2298; CHECK-FIX-NEXT: aesd.8 q9, q8 2299; CHECK-FIX-NEXT: aesimc.8 q8, q9 2300; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2301; CHECK-FIX-NEXT: bx lr 2302 %3 = load <16 x i8>, ptr %1, align 8 2303 %4 = load <16 x i8>, ptr %0, align 8 2304 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %3, <16 x i8> %4) 2305 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) 2306 store <16 x i8> %6, ptr %1, align 8 2307 %7 = load <16 x i8>, ptr %0, align 8 2308 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) 2309 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) 2310 store <16 x i8> %9, ptr %1, align 8 2311 ret void 2312} 2313 2314define arm_aapcs_vfpcc <16 x i8> @aesd_twice_via_val(<16 x i8> %0, <16 x i8> %1) nounwind { 2315; CHECK-FIX-LABEL: aesd_twice_via_val: 2316; CHECK-FIX: @ %bb.0: 2317; CHECK-FIX-NEXT: vorr q1, q1, q1 2318; CHECK-FIX-NEXT: vorr q0, q0, q0 2319; CHECK-FIX-NEXT: vorr q0, q0, q0 2320; CHECK-FIX-NEXT: aesd.8 q1, q0 2321; CHECK-FIX-NEXT: aesimc.8 q8, q1 2322; CHECK-FIX-NEXT: aesd.8 q8, q0 2323; CHECK-FIX-NEXT: aesimc.8 q0, q8 2324; CHECK-FIX-NEXT: bx lr 2325 %3 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %1, <16 x i8> %0) 2326 %4 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %3) 2327 %5 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %4, <16 x i8> %0) 2328 %6 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %5) 2329 ret <16 x i8> %6 2330} 2331 2332define arm_aapcs_vfpcc void @aesd_loop_via_ptr(i32 %0, ptr %1, ptr %2) nounwind { 2333; CHECK-FIX-NOSCHED-LABEL: aesd_loop_via_ptr: 2334; CHECK-FIX-NOSCHED: @ %bb.0: 2335; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 2336; CHECK-FIX-NOSCHED-NEXT: bxeq lr 2337; CHECK-FIX-NOSCHED-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1 2338; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 2339; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 2340; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r2] 2341; CHECK-FIX-NOSCHED-NEXT: aesd.8 q9, q8 2342; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q9 2343; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 2344; CHECK-FIX-NOSCHED-NEXT: bne .LBB54_1 2345; CHECK-FIX-NOSCHED-NEXT: @ %bb.2: 2346; CHECK-FIX-NOSCHED-NEXT: bx lr 2347; 2348; CHECK-CORTEX-FIX-LABEL: aesd_loop_via_ptr: 2349; CHECK-CORTEX-FIX: @ %bb.0: 2350; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 2351; CHECK-CORTEX-FIX-NEXT: bxeq lr 2352; CHECK-CORTEX-FIX-NEXT: .LBB54_1: @ =>This Inner Loop Header: Depth=1 2353; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2354; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r2] 2355; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 2356; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8 2357; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9 2358; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2359; CHECK-CORTEX-FIX-NEXT: bne .LBB54_1 2360; CHECK-CORTEX-FIX-NEXT: @ %bb.2: 2361; CHECK-CORTEX-FIX-NEXT: bx lr 2362 %4 = icmp eq i32 %0, 0 2363 br i1 %4, label %5, label %6 2364 23655: 2366 ret void 2367 23686: 2369 %7 = phi i32 [ %12, %6 ], [ 0, %3 ] 2370 %8 = load <16 x i8>, ptr %2, align 8 2371 %9 = load <16 x i8>, ptr %1, align 8 2372 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %9) 2373 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) 2374 store <16 x i8> %11, ptr %2, align 8 2375 %12 = add nuw i32 %7, 1 2376 %13 = icmp eq i32 %12, %0 2377 br i1 %13, label %5, label %6 2378} 2379 2380define arm_aapcs_vfpcc <16 x i8> @aesd_loop_via_val(i32 %0, <16 x i8> %1, <16 x i8> %2) nounwind { 2381; CHECK-FIX-LABEL: aesd_loop_via_val: 2382; CHECK-FIX: @ %bb.0: 2383; CHECK-FIX-NEXT: vorr q1, q1, q1 2384; CHECK-FIX-NEXT: vorr q0, q0, q0 2385; CHECK-FIX-NEXT: cmp r0, #0 2386; CHECK-FIX-NEXT: beq .LBB55_2 2387; CHECK-FIX-NEXT: .LBB55_1: @ =>This Inner Loop Header: Depth=1 2388; CHECK-FIX-NEXT: aesd.8 q1, q0 2389; CHECK-FIX-NEXT: subs r0, r0, #1 2390; CHECK-FIX-NEXT: aesimc.8 q1, q1 2391; CHECK-FIX-NEXT: bne .LBB55_1 2392; CHECK-FIX-NEXT: .LBB55_2: 2393; CHECK-FIX-NEXT: vorr q0, q1, q1 2394; CHECK-FIX-NEXT: bx lr 2395 %4 = icmp eq i32 %0, 0 2396 br i1 %4, label %5, label %7 2397 23985: 2399 %6 = phi <16 x i8> [ %2, %3 ], [ %11, %7 ] 2400 ret <16 x i8> %6 2401 24027: 2403 %8 = phi i32 [ %12, %7 ], [ 0, %3 ] 2404 %9 = phi <16 x i8> [ %11, %7 ], [ %2, %3 ] 2405 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %1) 2406 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) 2407 %12 = add nuw i32 %8, 1 2408 %13 = icmp eq i32 %12, %0 2409 br i1 %13, label %5, label %7 2410} 2411 2412define arm_aapcs_vfpcc void @aesd_set8_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 2413; CHECK-FIX-NOSCHED-LABEL: aesd_set8_via_ptr: 2414; CHECK-FIX-NOSCHED: @ %bb.0: 2415; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 2416; CHECK-FIX-NOSCHED-NEXT: ldrb r0, [r0] 2417; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 2418; CHECK-FIX-NOSCHED-NEXT: vmov.8 d0[0], r0 2419; CHECK-FIX-NOSCHED-NEXT: vmov.8 d16[0], r0 2420; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 2421; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 2422; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 2423; CHECK-FIX-NOSCHED-NEXT: bx lr 2424; 2425; CHECK-CORTEX-FIX-LABEL: aesd_set8_via_ptr: 2426; CHECK-CORTEX-FIX: @ %bb.0: 2427; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 2428; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2429; CHECK-CORTEX-FIX-NEXT: ldrb r0, [r0] 2430; CHECK-CORTEX-FIX-NEXT: vmov.8 d0[0], r0 2431; CHECK-CORTEX-FIX-NEXT: vmov.8 d16[0], r0 2432; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 2433; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 2434; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2435; CHECK-CORTEX-FIX-NEXT: bx lr 2436 %4 = load i8, ptr %0, align 1 2437 %5 = load <16 x i8>, ptr %2, align 8 2438 %6 = insertelement <16 x i8> %5, i8 %4, i64 0 2439 %7 = insertelement <16 x i8> %1, i8 %4, i64 0 2440 %8 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %6, <16 x i8> %7) 2441 %9 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %8) 2442 store <16 x i8> %9, ptr %2, align 8 2443 ret void 2444} 2445 2446define arm_aapcs_vfpcc void @aesd_set8_via_val(i8 zeroext %0, <16 x i8> %1, ptr %2) nounwind { 2447; CHECK-FIX-LABEL: aesd_set8_via_val: 2448; CHECK-FIX: @ %bb.0: 2449; CHECK-FIX-NEXT: vorr q0, q0, q0 2450; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2451; CHECK-FIX-NEXT: vmov.8 d0[0], r0 2452; CHECK-FIX-NEXT: vmov.8 d16[0], r0 2453; CHECK-FIX-NEXT: aesd.8 q8, q0 2454; CHECK-FIX-NEXT: aesimc.8 q8, q8 2455; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2456; CHECK-FIX-NEXT: bx lr 2457 %4 = load <16 x i8>, ptr %2, align 8 2458 %5 = insertelement <16 x i8> %4, i8 %0, i64 0 2459 %6 = insertelement <16 x i8> %1, i8 %0, i64 0 2460 %7 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %5, <16 x i8> %6) 2461 %8 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %7) 2462 store <16 x i8> %8, ptr %2, align 8 2463 ret void 2464} 2465 2466define arm_aapcs_vfpcc void @aesd_set8_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2467; CHECK-FIX-LABEL: aesd_set8_cond_via_ptr: 2468; CHECK-FIX: @ %bb.0: 2469; CHECK-FIX-NEXT: vorr q0, q0, q0 2470; CHECK-FIX-NEXT: cmp r0, #0 2471; CHECK-FIX-NEXT: beq .LBB58_2 2472; CHECK-FIX-NEXT: @ %bb.1: 2473; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2474; CHECK-FIX-NEXT: vld1.8 {d16[0]}, [r1] 2475; CHECK-FIX-NEXT: cmp r0, #0 2476; CHECK-FIX-NEXT: bne .LBB58_3 2477; CHECK-FIX-NEXT: b .LBB58_4 2478; CHECK-FIX-NEXT: .LBB58_2: 2479; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2480; CHECK-FIX-NEXT: cmp r0, #0 2481; CHECK-FIX-NEXT: beq .LBB58_4 2482; CHECK-FIX-NEXT: .LBB58_3: 2483; CHECK-FIX-NEXT: vld1.8 {d0[0]}, [r1] 2484; CHECK-FIX-NEXT: .LBB58_4: 2485; CHECK-FIX-NEXT: aesd.8 q8, q0 2486; CHECK-FIX-NEXT: aesimc.8 q8, q8 2487; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2488; CHECK-FIX-NEXT: bx lr 2489 br i1 %0, label %5, label %9 2490 24915: 2492 %6 = load i8, ptr %1, align 1 2493 %7 = load <16 x i8>, ptr %3, align 8 2494 %8 = insertelement <16 x i8> %7, i8 %6, i64 0 2495 br label %11 2496 24979: 2498 %10 = load <16 x i8>, ptr %3, align 8 2499 br label %11 2500 250111: 2502 %12 = phi <16 x i8> [ %8, %5 ], [ %10, %9 ] 2503 br i1 %0, label %13, label %16 2504 250513: 2506 %14 = load i8, ptr %1, align 1 2507 %15 = insertelement <16 x i8> %2, i8 %14, i64 0 2508 br label %16 2509 251016: 2511 %17 = phi <16 x i8> [ %15, %13 ], [ %2, %11 ] 2512 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %17) 2513 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 2514 store <16 x i8> %19, ptr %3, align 8 2515 ret void 2516} 2517 2518define arm_aapcs_vfpcc void @aesd_set8_cond_via_val(i1 zeroext %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 2519; CHECK-FIX-LABEL: aesd_set8_cond_via_val: 2520; CHECK-FIX: @ %bb.0: 2521; CHECK-FIX-NEXT: vorr q0, q0, q0 2522; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2523; CHECK-FIX-NEXT: cmp r0, #0 2524; CHECK-FIX-NEXT: beq .LBB59_2 2525; CHECK-FIX-NEXT: @ %bb.1: 2526; CHECK-FIX-NEXT: vmov.8 d16[0], r1 2527; CHECK-FIX-NEXT: .LBB59_2: @ %select.end 2528; CHECK-FIX-NEXT: cmp r0, #0 2529; CHECK-FIX-NEXT: beq .LBB59_4 2530; CHECK-FIX-NEXT: @ %bb.3: 2531; CHECK-FIX-NEXT: vmov.8 d0[0], r1 2532; CHECK-FIX-NEXT: .LBB59_4: @ %select.end2 2533; CHECK-FIX-NEXT: aesd.8 q8, q0 2534; CHECK-FIX-NEXT: aesimc.8 q8, q8 2535; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2536; CHECK-FIX-NEXT: bx lr 2537 %5 = load <16 x i8>, ptr %3, align 8 2538 %6 = insertelement <16 x i8> %5, i8 %1, i64 0 2539 %7 = select i1 %0, <16 x i8> %6, <16 x i8> %5 2540 %8 = insertelement <16 x i8> %2, i8 %1, i64 0 2541 %9 = select i1 %0, <16 x i8> %8, <16 x i8> %2 2542 %10 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %9) 2543 %11 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %10) 2544 store <16 x i8> %11, ptr %3, align 8 2545 ret void 2546} 2547 2548define arm_aapcs_vfpcc void @aesd_set8_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2549; CHECK-FIX-LABEL: aesd_set8_loop_via_ptr: 2550; CHECK-FIX: @ %bb.0: 2551; CHECK-FIX-NEXT: vorr q0, q0, q0 2552; CHECK-FIX-NEXT: ldrb r1, [r1] 2553; CHECK-FIX-NEXT: cmp r0, #0 2554; CHECK-FIX-NEXT: strb r1, [r2] 2555; CHECK-FIX-NEXT: bxeq lr 2556; CHECK-FIX-NEXT: .LBB60_1: 2557; CHECK-FIX-NEXT: vmov.8 d0[0], r1 2558; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2559; CHECK-FIX-NEXT: .LBB60_2: @ =>This Inner Loop Header: Depth=1 2560; CHECK-FIX-NEXT: aesd.8 q8, q0 2561; CHECK-FIX-NEXT: subs r0, r0, #1 2562; CHECK-FIX-NEXT: aesimc.8 q8, q8 2563; CHECK-FIX-NEXT: bne .LBB60_2 2564; CHECK-FIX-NEXT: @ %bb.3: 2565; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2566; CHECK-FIX-NEXT: bx lr 2567 %5 = load i8, ptr %1, align 1 2568 %6 = insertelement <16 x i8> %2, i8 %5, i64 0 2569 %7 = getelementptr inbounds <16 x i8>, ptr %3, i32 0, i32 0 2570 store i8 %5, ptr %7, align 8 2571 %8 = icmp eq i32 %0, 0 2572 br i1 %8, label %12, label %9 2573 25749: 2575 %10 = load <16 x i8>, ptr %3, align 8 2576 br label %13 2577 257811: 2579 store <16 x i8> %17, ptr %3, align 8 2580 br label %12 2581 258212: 2583 ret void 2584 258513: 2586 %14 = phi <16 x i8> [ %10, %9 ], [ %17, %13 ] 2587 %15 = phi i32 [ 0, %9 ], [ %18, %13 ] 2588 %16 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %6) 2589 %17 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %16) 2590 %18 = add nuw i32 %15, 1 2591 %19 = icmp eq i32 %18, %0 2592 br i1 %19, label %11, label %13 2593} 2594 2595define arm_aapcs_vfpcc void @aesd_set8_loop_via_val(i32 %0, i8 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 2596; CHECK-FIX-LABEL: aesd_set8_loop_via_val: 2597; CHECK-FIX: @ %bb.0: 2598; CHECK-FIX-NEXT: vorr q0, q0, q0 2599; CHECK-FIX-NEXT: cmp r0, #0 2600; CHECK-FIX-NEXT: bxeq lr 2601; CHECK-FIX-NEXT: .LBB61_1: 2602; CHECK-FIX-NEXT: vmov.8 d0[0], r1 2603; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2604; CHECK-FIX-NEXT: .LBB61_2: @ =>This Inner Loop Header: Depth=1 2605; CHECK-FIX-NEXT: vmov.8 d16[0], r1 2606; CHECK-FIX-NEXT: subs r0, r0, #1 2607; CHECK-FIX-NEXT: aesd.8 q8, q0 2608; CHECK-FIX-NEXT: aesimc.8 q8, q8 2609; CHECK-FIX-NEXT: bne .LBB61_2 2610; CHECK-FIX-NEXT: @ %bb.3: 2611; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2612; CHECK-FIX-NEXT: bx lr 2613 %5 = icmp eq i32 %0, 0 2614 br i1 %5, label %10, label %6 2615 26166: 2617 %7 = insertelement <16 x i8> %2, i8 %1, i64 0 2618 %8 = load <16 x i8>, ptr %3, align 8 2619 br label %11 2620 26219: 2622 store <16 x i8> %16, ptr %3, align 8 2623 br label %10 2624 262510: 2626 ret void 2627 262811: 2629 %12 = phi <16 x i8> [ %8, %6 ], [ %16, %11 ] 2630 %13 = phi i32 [ 0, %6 ], [ %17, %11 ] 2631 %14 = insertelement <16 x i8> %12, i8 %1, i64 0 2632 %15 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %14, <16 x i8> %7) 2633 %16 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %15) 2634 %17 = add nuw i32 %13, 1 2635 %18 = icmp eq i32 %17, %0 2636 br i1 %18, label %9, label %11 2637} 2638 2639define arm_aapcs_vfpcc void @aesd_set16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 2640; CHECK-FIX-NOSCHED-LABEL: aesd_set16_via_ptr: 2641; CHECK-FIX-NOSCHED: @ %bb.0: 2642; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 2643; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] 2644; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 2645; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 2646; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0 2647; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 2648; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 2649; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 2650; CHECK-FIX-NOSCHED-NEXT: bx lr 2651; 2652; CHECK-CORTEX-FIX-LABEL: aesd_set16_via_ptr: 2653; CHECK-CORTEX-FIX: @ %bb.0: 2654; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 2655; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2656; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] 2657; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 2658; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0 2659; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 2660; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 2661; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2662; CHECK-CORTEX-FIX-NEXT: bx lr 2663 %4 = load i16, ptr %0, align 2 2664 %5 = bitcast ptr %2 to ptr 2665 %6 = load <8 x i16>, ptr %5, align 8 2666 %7 = insertelement <8 x i16> %6, i16 %4, i64 0 2667 %8 = bitcast <8 x i16> %7 to <16 x i8> 2668 %9 = bitcast <16 x i8> %1 to <8 x i16> 2669 %10 = insertelement <8 x i16> %9, i16 %4, i64 0 2670 %11 = bitcast <8 x i16> %10 to <16 x i8> 2671 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) 2672 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) 2673 store <16 x i8> %13, ptr %2, align 8 2674 ret void 2675} 2676 2677define arm_aapcs_vfpcc void @aesd_set16_via_val(i16 zeroext %0, <16 x i8> %1, ptr %2) nounwind { 2678; CHECK-FIX-LABEL: aesd_set16_via_val: 2679; CHECK-FIX: @ %bb.0: 2680; CHECK-FIX-NEXT: vorr q0, q0, q0 2681; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2682; CHECK-FIX-NEXT: vmov.16 d0[0], r0 2683; CHECK-FIX-NEXT: vmov.16 d16[0], r0 2684; CHECK-FIX-NEXT: aesd.8 q8, q0 2685; CHECK-FIX-NEXT: aesimc.8 q8, q8 2686; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2687; CHECK-FIX-NEXT: bx lr 2688 %4 = bitcast ptr %2 to ptr 2689 %5 = load <8 x i16>, ptr %4, align 8 2690 %6 = insertelement <8 x i16> %5, i16 %0, i64 0 2691 %7 = bitcast <8 x i16> %6 to <16 x i8> 2692 %8 = bitcast <16 x i8> %1 to <8 x i16> 2693 %9 = insertelement <8 x i16> %8, i16 %0, i64 0 2694 %10 = bitcast <8 x i16> %9 to <16 x i8> 2695 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) 2696 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) 2697 store <16 x i8> %12, ptr %2, align 8 2698 ret void 2699} 2700 2701define arm_aapcs_vfpcc void @aesd_set16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2702; CHECK-FIX-LABEL: aesd_set16_cond_via_ptr: 2703; CHECK-FIX: @ %bb.0: 2704; CHECK-FIX-NEXT: vorr q0, q0, q0 2705; CHECK-FIX-NEXT: cmp r0, #0 2706; CHECK-FIX-NEXT: beq .LBB64_2 2707; CHECK-FIX-NEXT: @ %bb.1: 2708; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2709; CHECK-FIX-NEXT: vld1.16 {d16[0]}, [r1:16] 2710; CHECK-FIX-NEXT: cmp r0, #0 2711; CHECK-FIX-NEXT: bne .LBB64_3 2712; CHECK-FIX-NEXT: b .LBB64_4 2713; CHECK-FIX-NEXT: .LBB64_2: 2714; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2715; CHECK-FIX-NEXT: cmp r0, #0 2716; CHECK-FIX-NEXT: beq .LBB64_4 2717; CHECK-FIX-NEXT: .LBB64_3: 2718; CHECK-FIX-NEXT: vld1.16 {d0[0]}, [r1:16] 2719; CHECK-FIX-NEXT: .LBB64_4: 2720; CHECK-FIX-NEXT: aesd.8 q8, q0 2721; CHECK-FIX-NEXT: aesimc.8 q8, q8 2722; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2723; CHECK-FIX-NEXT: bx lr 2724 br i1 %0, label %5, label %10 2725 27265: 2727 %6 = load i16, ptr %1, align 2 2728 %7 = bitcast ptr %3 to ptr 2729 %8 = load <8 x i16>, ptr %7, align 8 2730 %9 = insertelement <8 x i16> %8, i16 %6, i64 0 2731 br label %13 2732 273310: 2734 %11 = bitcast ptr %3 to ptr 2735 %12 = load <8 x i16>, ptr %11, align 8 2736 br label %13 2737 273813: 2739 %14 = phi <8 x i16> [ %9, %5 ], [ %12, %10 ] 2740 br i1 %0, label %15, label %19 2741 274215: 2743 %16 = load i16, ptr %1, align 2 2744 %17 = bitcast <16 x i8> %2 to <8 x i16> 2745 %18 = insertelement <8 x i16> %17, i16 %16, i64 0 2746 br label %21 2747 274819: 2749 %20 = bitcast <16 x i8> %2 to <8 x i16> 2750 br label %21 2751 275221: 2753 %22 = phi <8 x i16> [ %18, %15 ], [ %20, %19 ] 2754 %23 = bitcast <8 x i16> %14 to <16 x i8> 2755 %24 = bitcast <8 x i16> %22 to <16 x i8> 2756 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) 2757 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) 2758 store <16 x i8> %26, ptr %3, align 8 2759 ret void 2760} 2761 2762define arm_aapcs_vfpcc void @aesd_set16_cond_via_val(i1 zeroext %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 2763; CHECK-FIX-LABEL: aesd_set16_cond_via_val: 2764; CHECK-FIX: @ %bb.0: 2765; CHECK-FIX-NEXT: vorr q0, q0, q0 2766; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2767; CHECK-FIX-NEXT: cmp r0, #0 2768; CHECK-FIX-NEXT: beq .LBB65_2 2769; CHECK-FIX-NEXT: @ %bb.1: 2770; CHECK-FIX-NEXT: vmov.16 d16[0], r1 2771; CHECK-FIX-NEXT: .LBB65_2: @ %select.end 2772; CHECK-FIX-NEXT: cmp r0, #0 2773; CHECK-FIX-NEXT: beq .LBB65_4 2774; CHECK-FIX-NEXT: @ %bb.3: 2775; CHECK-FIX-NEXT: vmov.16 d0[0], r1 2776; CHECK-FIX-NEXT: .LBB65_4: @ %select.end2 2777; CHECK-FIX-NEXT: aesd.8 q8, q0 2778; CHECK-FIX-NEXT: aesimc.8 q8, q8 2779; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2780; CHECK-FIX-NEXT: bx lr 2781 %5 = bitcast ptr %3 to ptr 2782 %6 = load <8 x i16>, ptr %5, align 8 2783 %7 = insertelement <8 x i16> %6, i16 %1, i64 0 2784 %8 = select i1 %0, <8 x i16> %7, <8 x i16> %6 2785 %9 = bitcast <16 x i8> %2 to <8 x i16> 2786 %10 = insertelement <8 x i16> %9, i16 %1, i64 0 2787 %11 = select i1 %0, <8 x i16> %10, <8 x i16> %9 2788 %12 = bitcast <8 x i16> %8 to <16 x i8> 2789 %13 = bitcast <8 x i16> %11 to <16 x i8> 2790 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) 2791 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) 2792 store <16 x i8> %15, ptr %3, align 8 2793 ret void 2794} 2795 2796define arm_aapcs_vfpcc void @aesd_set16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2797; CHECK-FIX-LABEL: aesd_set16_loop_via_ptr: 2798; CHECK-FIX: @ %bb.0: 2799; CHECK-FIX-NEXT: vorr q0, q0, q0 2800; CHECK-FIX-NEXT: ldrh r1, [r1] 2801; CHECK-FIX-NEXT: cmp r0, #0 2802; CHECK-FIX-NEXT: strh r1, [r2] 2803; CHECK-FIX-NEXT: bxeq lr 2804; CHECK-FIX-NEXT: .LBB66_1: 2805; CHECK-FIX-NEXT: vmov.16 d0[0], r1 2806; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2807; CHECK-FIX-NEXT: .LBB66_2: @ =>This Inner Loop Header: Depth=1 2808; CHECK-FIX-NEXT: aesd.8 q8, q0 2809; CHECK-FIX-NEXT: subs r0, r0, #1 2810; CHECK-FIX-NEXT: aesimc.8 q8, q8 2811; CHECK-FIX-NEXT: bne .LBB66_2 2812; CHECK-FIX-NEXT: @ %bb.3: 2813; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2814; CHECK-FIX-NEXT: bx lr 2815 %5 = load i16, ptr %1, align 2 2816 %6 = bitcast <16 x i8> %2 to <8 x i16> 2817 %7 = insertelement <8 x i16> %6, i16 %5, i64 0 2818 %8 = bitcast <8 x i16> %7 to <16 x i8> 2819 %9 = bitcast ptr %3 to ptr 2820 store i16 %5, ptr %9, align 8 2821 %10 = icmp eq i32 %0, 0 2822 br i1 %10, label %14, label %11 2823 282411: 2825 %12 = load <16 x i8>, ptr %3, align 8 2826 br label %15 2827 282813: 2829 store <16 x i8> %19, ptr %3, align 8 2830 br label %14 2831 283214: 2833 ret void 2834 283515: 2836 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 2837 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 2838 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) 2839 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 2840 %20 = add nuw i32 %17, 1 2841 %21 = icmp eq i32 %20, %0 2842 br i1 %21, label %13, label %15 2843} 2844 2845define arm_aapcs_vfpcc void @aesd_set16_loop_via_val(i32 %0, i16 zeroext %1, <16 x i8> %2, ptr %3) nounwind { 2846; CHECK-FIX-LABEL: aesd_set16_loop_via_val: 2847; CHECK-FIX: @ %bb.0: 2848; CHECK-FIX-NEXT: vorr q0, q0, q0 2849; CHECK-FIX-NEXT: cmp r0, #0 2850; CHECK-FIX-NEXT: bxeq lr 2851; CHECK-FIX-NEXT: .LBB67_1: 2852; CHECK-FIX-NEXT: vmov.16 d0[0], r1 2853; CHECK-FIX-NEXT: .LBB67_2: @ =>This Inner Loop Header: Depth=1 2854; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2855; CHECK-FIX-NEXT: subs r0, r0, #1 2856; CHECK-FIX-NEXT: vmov.16 d16[0], r1 2857; CHECK-FIX-NEXT: aesd.8 q8, q0 2858; CHECK-FIX-NEXT: aesimc.8 q8, q8 2859; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2860; CHECK-FIX-NEXT: bne .LBB67_2 2861; CHECK-FIX-NEXT: @ %bb.3: 2862; CHECK-FIX-NEXT: bx lr 2863 %5 = icmp eq i32 %0, 0 2864 br i1 %5, label %12, label %6 2865 28666: 2867 %7 = bitcast <16 x i8> %2 to <8 x i16> 2868 %8 = insertelement <8 x i16> %7, i16 %1, i64 0 2869 %9 = bitcast <8 x i16> %8 to <16 x i8> 2870 %10 = bitcast ptr %3 to ptr 2871 %11 = bitcast ptr %3 to ptr 2872 br label %13 2873 287412: 2875 ret void 2876 287713: 2878 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 2879 %15 = load <8 x i16>, ptr %10, align 8 2880 %16 = insertelement <8 x i16> %15, i16 %1, i64 0 2881 %17 = bitcast <8 x i16> %16 to <16 x i8> 2882 store i16 %1, ptr %11, align 8 2883 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) 2884 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 2885 store <16 x i8> %19, ptr %3, align 8 2886 %20 = add nuw i32 %14, 1 2887 %21 = icmp eq i32 %20, %0 2888 br i1 %21, label %12, label %13 2889} 2890 2891define arm_aapcs_vfpcc void @aesd_set32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 2892; CHECK-FIX-NOSCHED-LABEL: aesd_set32_via_ptr: 2893; CHECK-FIX-NOSCHED: @ %bb.0: 2894; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 2895; CHECK-FIX-NOSCHED-NEXT: ldr r0, [r0] 2896; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 2897; CHECK-FIX-NOSCHED-NEXT: vmov.32 d0[0], r0 2898; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r0 2899; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 2900; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 2901; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 2902; CHECK-FIX-NOSCHED-NEXT: bx lr 2903; 2904; CHECK-CORTEX-FIX-LABEL: aesd_set32_via_ptr: 2905; CHECK-CORTEX-FIX: @ %bb.0: 2906; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 2907; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2908; CHECK-CORTEX-FIX-NEXT: ldr r0, [r0] 2909; CHECK-CORTEX-FIX-NEXT: vmov.32 d0[0], r0 2910; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r0 2911; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 2912; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 2913; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2914; CHECK-CORTEX-FIX-NEXT: bx lr 2915 %4 = load i32, ptr %0, align 4 2916 %5 = bitcast ptr %2 to ptr 2917 %6 = load <4 x i32>, ptr %5, align 8 2918 %7 = insertelement <4 x i32> %6, i32 %4, i64 0 2919 %8 = bitcast <4 x i32> %7 to <16 x i8> 2920 %9 = bitcast <16 x i8> %1 to <4 x i32> 2921 %10 = insertelement <4 x i32> %9, i32 %4, i64 0 2922 %11 = bitcast <4 x i32> %10 to <16 x i8> 2923 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) 2924 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) 2925 store <16 x i8> %13, ptr %2, align 8 2926 ret void 2927} 2928 2929define arm_aapcs_vfpcc void @aesd_set32_via_val(i32 %0, <16 x i8> %1, ptr %2) nounwind { 2930; CHECK-FIX-LABEL: aesd_set32_via_val: 2931; CHECK-FIX: @ %bb.0: 2932; CHECK-FIX-NEXT: vorr q0, q0, q0 2933; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 2934; CHECK-FIX-NEXT: vmov.32 d0[0], r0 2935; CHECK-FIX-NEXT: vmov.32 d16[0], r0 2936; CHECK-FIX-NEXT: aesd.8 q8, q0 2937; CHECK-FIX-NEXT: aesimc.8 q8, q8 2938; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 2939; CHECK-FIX-NEXT: bx lr 2940 %4 = bitcast ptr %2 to ptr 2941 %5 = load <4 x i32>, ptr %4, align 8 2942 %6 = insertelement <4 x i32> %5, i32 %0, i64 0 2943 %7 = bitcast <4 x i32> %6 to <16 x i8> 2944 %8 = bitcast <16 x i8> %1 to <4 x i32> 2945 %9 = insertelement <4 x i32> %8, i32 %0, i64 0 2946 %10 = bitcast <4 x i32> %9 to <16 x i8> 2947 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) 2948 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) 2949 store <16 x i8> %12, ptr %2, align 8 2950 ret void 2951} 2952 2953define arm_aapcs_vfpcc void @aesd_set32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 2954; CHECK-FIX-LABEL: aesd_set32_cond_via_ptr: 2955; CHECK-FIX: @ %bb.0: 2956; CHECK-FIX-NEXT: vorr q0, q0, q0 2957; CHECK-FIX-NEXT: cmp r0, #0 2958; CHECK-FIX-NEXT: beq .LBB70_2 2959; CHECK-FIX-NEXT: @ %bb.1: 2960; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2961; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 2962; CHECK-FIX-NEXT: cmp r0, #0 2963; CHECK-FIX-NEXT: bne .LBB70_3 2964; CHECK-FIX-NEXT: b .LBB70_4 2965; CHECK-FIX-NEXT: .LBB70_2: 2966; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 2967; CHECK-FIX-NEXT: cmp r0, #0 2968; CHECK-FIX-NEXT: beq .LBB70_4 2969; CHECK-FIX-NEXT: .LBB70_3: 2970; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] 2971; CHECK-FIX-NEXT: .LBB70_4: 2972; CHECK-FIX-NEXT: aesd.8 q8, q0 2973; CHECK-FIX-NEXT: aesimc.8 q8, q8 2974; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 2975; CHECK-FIX-NEXT: bx lr 2976 br i1 %0, label %5, label %10 2977 29785: 2979 %6 = load i32, ptr %1, align 4 2980 %7 = bitcast ptr %3 to ptr 2981 %8 = load <4 x i32>, ptr %7, align 8 2982 %9 = insertelement <4 x i32> %8, i32 %6, i64 0 2983 br label %13 2984 298510: 2986 %11 = bitcast ptr %3 to ptr 2987 %12 = load <4 x i32>, ptr %11, align 8 2988 br label %13 2989 299013: 2991 %14 = phi <4 x i32> [ %9, %5 ], [ %12, %10 ] 2992 br i1 %0, label %15, label %19 2993 299415: 2995 %16 = load i32, ptr %1, align 4 2996 %17 = bitcast <16 x i8> %2 to <4 x i32> 2997 %18 = insertelement <4 x i32> %17, i32 %16, i64 0 2998 br label %21 2999 300019: 3001 %20 = bitcast <16 x i8> %2 to <4 x i32> 3002 br label %21 3003 300421: 3005 %22 = phi <4 x i32> [ %18, %15 ], [ %20, %19 ] 3006 %23 = bitcast <4 x i32> %14 to <16 x i8> 3007 %24 = bitcast <4 x i32> %22 to <16 x i8> 3008 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) 3009 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) 3010 store <16 x i8> %26, ptr %3, align 8 3011 ret void 3012} 3013 3014define arm_aapcs_vfpcc void @aesd_set32_cond_via_val(i1 zeroext %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { 3015; CHECK-FIX-LABEL: aesd_set32_cond_via_val: 3016; CHECK-FIX: @ %bb.0: 3017; CHECK-FIX-NEXT: vorr q0, q0, q0 3018; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3019; CHECK-FIX-NEXT: cmp r0, #0 3020; CHECK-FIX-NEXT: beq .LBB71_2 3021; CHECK-FIX-NEXT: @ %bb.1: 3022; CHECK-FIX-NEXT: vmov.32 d16[0], r1 3023; CHECK-FIX-NEXT: .LBB71_2: @ %select.end 3024; CHECK-FIX-NEXT: cmp r0, #0 3025; CHECK-FIX-NEXT: beq .LBB71_4 3026; CHECK-FIX-NEXT: @ %bb.3: 3027; CHECK-FIX-NEXT: vmov.32 d0[0], r1 3028; CHECK-FIX-NEXT: .LBB71_4: @ %select.end2 3029; CHECK-FIX-NEXT: aesd.8 q8, q0 3030; CHECK-FIX-NEXT: aesimc.8 q8, q8 3031; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3032; CHECK-FIX-NEXT: bx lr 3033 %5 = bitcast ptr %3 to ptr 3034 %6 = load <4 x i32>, ptr %5, align 8 3035 %7 = insertelement <4 x i32> %6, i32 %1, i64 0 3036 %8 = select i1 %0, <4 x i32> %7, <4 x i32> %6 3037 %9 = bitcast <16 x i8> %2 to <4 x i32> 3038 %10 = insertelement <4 x i32> %9, i32 %1, i64 0 3039 %11 = select i1 %0, <4 x i32> %10, <4 x i32> %9 3040 %12 = bitcast <4 x i32> %8 to <16 x i8> 3041 %13 = bitcast <4 x i32> %11 to <16 x i8> 3042 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) 3043 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) 3044 store <16 x i8> %15, ptr %3, align 8 3045 ret void 3046} 3047 3048define arm_aapcs_vfpcc void @aesd_set32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 3049; CHECK-FIX-LABEL: aesd_set32_loop_via_ptr: 3050; CHECK-FIX: @ %bb.0: 3051; CHECK-FIX-NEXT: vorr q0, q0, q0 3052; CHECK-FIX-NEXT: ldr r1, [r1] 3053; CHECK-FIX-NEXT: cmp r0, #0 3054; CHECK-FIX-NEXT: str r1, [r2] 3055; CHECK-FIX-NEXT: bxeq lr 3056; CHECK-FIX-NEXT: .LBB72_1: 3057; CHECK-FIX-NEXT: vmov.32 d0[0], r1 3058; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3059; CHECK-FIX-NEXT: .LBB72_2: @ =>This Inner Loop Header: Depth=1 3060; CHECK-FIX-NEXT: aesd.8 q8, q0 3061; CHECK-FIX-NEXT: subs r0, r0, #1 3062; CHECK-FIX-NEXT: aesimc.8 q8, q8 3063; CHECK-FIX-NEXT: bne .LBB72_2 3064; CHECK-FIX-NEXT: @ %bb.3: 3065; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3066; CHECK-FIX-NEXT: bx lr 3067 %5 = load i32, ptr %1, align 4 3068 %6 = bitcast <16 x i8> %2 to <4 x i32> 3069 %7 = insertelement <4 x i32> %6, i32 %5, i64 0 3070 %8 = bitcast <4 x i32> %7 to <16 x i8> 3071 %9 = bitcast ptr %3 to ptr 3072 store i32 %5, ptr %9, align 8 3073 %10 = icmp eq i32 %0, 0 3074 br i1 %10, label %14, label %11 3075 307611: 3077 %12 = load <16 x i8>, ptr %3, align 8 3078 br label %15 3079 308013: 3081 store <16 x i8> %19, ptr %3, align 8 3082 br label %14 3083 308414: 3085 ret void 3086 308715: 3088 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 3089 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 3090 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) 3091 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 3092 %20 = add nuw i32 %17, 1 3093 %21 = icmp eq i32 %20, %0 3094 br i1 %21, label %13, label %15 3095} 3096 3097define arm_aapcs_vfpcc void @aesd_set32_loop_via_val(i32 %0, i32 %1, <16 x i8> %2, ptr %3) nounwind { 3098; CHECK-FIX-LABEL: aesd_set32_loop_via_val: 3099; CHECK-FIX: @ %bb.0: 3100; CHECK-FIX-NEXT: vorr q0, q0, q0 3101; CHECK-FIX-NEXT: cmp r0, #0 3102; CHECK-FIX-NEXT: bxeq lr 3103; CHECK-FIX-NEXT: .LBB73_1: 3104; CHECK-FIX-NEXT: vmov.32 d0[0], r1 3105; CHECK-FIX-NEXT: .LBB73_2: @ =>This Inner Loop Header: Depth=1 3106; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3107; CHECK-FIX-NEXT: subs r0, r0, #1 3108; CHECK-FIX-NEXT: vmov.32 d16[0], r1 3109; CHECK-FIX-NEXT: aesd.8 q8, q0 3110; CHECK-FIX-NEXT: aesimc.8 q8, q8 3111; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3112; CHECK-FIX-NEXT: bne .LBB73_2 3113; CHECK-FIX-NEXT: @ %bb.3: 3114; CHECK-FIX-NEXT: bx lr 3115 %5 = icmp eq i32 %0, 0 3116 br i1 %5, label %12, label %6 3117 31186: 3119 %7 = bitcast <16 x i8> %2 to <4 x i32> 3120 %8 = insertelement <4 x i32> %7, i32 %1, i64 0 3121 %9 = bitcast <4 x i32> %8 to <16 x i8> 3122 %10 = bitcast ptr %3 to ptr 3123 %11 = bitcast ptr %3 to ptr 3124 br label %13 3125 312612: 3127 ret void 3128 312913: 3130 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 3131 %15 = load <4 x i32>, ptr %10, align 8 3132 %16 = insertelement <4 x i32> %15, i32 %1, i64 0 3133 %17 = bitcast <4 x i32> %16 to <16 x i8> 3134 store i32 %1, ptr %11, align 8 3135 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) 3136 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 3137 store <16 x i8> %19, ptr %3, align 8 3138 %20 = add nuw i32 %14, 1 3139 %21 = icmp eq i32 %20, %0 3140 br i1 %21, label %12, label %13 3141} 3142 3143define arm_aapcs_vfpcc void @aesd_set64_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 3144; CHECK-FIX-NOSCHED-LABEL: aesd_set64_via_ptr: 3145; CHECK-FIX-NOSCHED: @ %bb.0: 3146; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 3147; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 3148; CHECK-FIX-NOSCHED-NEXT: vldr d0, [r0] 3149; CHECK-FIX-NOSCHED-NEXT: vorr d16, d0, d0 3150; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 3151; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3152; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 3153; CHECK-FIX-NOSCHED-NEXT: bx lr 3154; 3155; CHECK-CORTEX-FIX-LABEL: aesd_set64_via_ptr: 3156; CHECK-CORTEX-FIX: @ %bb.0: 3157; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 3158; CHECK-CORTEX-FIX-NEXT: vldr d0, [r0] 3159; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 3160; CHECK-CORTEX-FIX-NEXT: vorr d16, d0, d0 3161; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 3162; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 3163; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 3164; CHECK-CORTEX-FIX-NEXT: bx lr 3165 %4 = load i64, ptr %0, align 8 3166 %5 = bitcast ptr %2 to ptr 3167 %6 = load <2 x i64>, ptr %5, align 8 3168 %7 = insertelement <2 x i64> %6, i64 %4, i64 0 3169 %8 = bitcast <2 x i64> %7 to <16 x i8> 3170 %9 = bitcast <16 x i8> %1 to <2 x i64> 3171 %10 = insertelement <2 x i64> %9, i64 %4, i64 0 3172 %11 = bitcast <2 x i64> %10 to <16 x i8> 3173 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) 3174 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) 3175 store <16 x i8> %13, ptr %2, align 8 3176 ret void 3177} 3178 3179define arm_aapcs_vfpcc void @aesd_set64_via_val(i64 %0, <16 x i8> %1, ptr %2) nounwind { 3180; CHECK-FIX-LABEL: aesd_set64_via_val: 3181; CHECK-FIX: @ %bb.0: 3182; CHECK-FIX-NEXT: vorr q0, q0, q0 3183; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3184; CHECK-FIX-NEXT: vmov.32 d0[0], r0 3185; CHECK-FIX-NEXT: vmov.32 d16[0], r0 3186; CHECK-FIX-NEXT: vmov.32 d0[1], r1 3187; CHECK-FIX-NEXT: vmov.32 d16[1], r1 3188; CHECK-FIX-NEXT: aesd.8 q8, q0 3189; CHECK-FIX-NEXT: aesimc.8 q8, q8 3190; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3191; CHECK-FIX-NEXT: bx lr 3192 %4 = bitcast ptr %2 to ptr 3193 %5 = load <2 x i64>, ptr %4, align 8 3194 %6 = insertelement <2 x i64> %5, i64 %0, i64 0 3195 %7 = bitcast <2 x i64> %6 to <16 x i8> 3196 %8 = bitcast <16 x i8> %1 to <2 x i64> 3197 %9 = insertelement <2 x i64> %8, i64 %0, i64 0 3198 %10 = bitcast <2 x i64> %9 to <16 x i8> 3199 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) 3200 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) 3201 store <16 x i8> %12, ptr %2, align 8 3202 ret void 3203} 3204 3205define arm_aapcs_vfpcc void @aesd_set64_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 3206; CHECK-FIX-NOSCHED-LABEL: aesd_set64_cond_via_ptr: 3207; CHECK-FIX-NOSCHED: @ %bb.0: 3208; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3209; CHECK-FIX-NOSCHED-NEXT: beq .LBB76_2 3210; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 3211; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 3212; CHECK-FIX-NOSCHED-NEXT: vldr d16, [r1] 3213; CHECK-FIX-NOSCHED-NEXT: b .LBB76_3 3214; CHECK-FIX-NOSCHED-NEXT: .LBB76_2: 3215; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 3216; CHECK-FIX-NOSCHED-NEXT: .LBB76_3: 3217; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3218; CHECK-FIX-NOSCHED-NEXT: vldrne d0, [r1] 3219; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 3220; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 3221; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3222; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 3223; CHECK-FIX-NOSCHED-NEXT: bx lr 3224; 3225; CHECK-CORTEX-FIX-LABEL: aesd_set64_cond_via_ptr: 3226; CHECK-CORTEX-FIX: @ %bb.0: 3227; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3228; CHECK-CORTEX-FIX-NEXT: beq .LBB76_2 3229; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 3230; CHECK-CORTEX-FIX-NEXT: vldr d18, [r1] 3231; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3232; CHECK-CORTEX-FIX-NEXT: vorr d16, d18, d18 3233; CHECK-CORTEX-FIX-NEXT: b .LBB76_3 3234; CHECK-CORTEX-FIX-NEXT: .LBB76_2: 3235; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3236; CHECK-CORTEX-FIX-NEXT: .LBB76_3: 3237; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3238; CHECK-CORTEX-FIX-NEXT: vldrne d0, [r1] 3239; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 3240; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 3241; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 3242; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3243; CHECK-CORTEX-FIX-NEXT: bx lr 3244 br i1 %0, label %5, label %10 3245 32465: 3247 %6 = load i64, ptr %1, align 8 3248 %7 = bitcast ptr %3 to ptr 3249 %8 = load <2 x i64>, ptr %7, align 8 3250 %9 = insertelement <2 x i64> %8, i64 %6, i64 0 3251 br label %13 3252 325310: 3254 %11 = bitcast ptr %3 to ptr 3255 %12 = load <2 x i64>, ptr %11, align 8 3256 br label %13 3257 325813: 3259 %14 = phi <2 x i64> [ %9, %5 ], [ %12, %10 ] 3260 br i1 %0, label %15, label %19 3261 326215: 3263 %16 = load i64, ptr %1, align 8 3264 %17 = bitcast <16 x i8> %2 to <2 x i64> 3265 %18 = insertelement <2 x i64> %17, i64 %16, i64 0 3266 br label %21 3267 326819: 3269 %20 = bitcast <16 x i8> %2 to <2 x i64> 3270 br label %21 3271 327221: 3273 %22 = phi <2 x i64> [ %18, %15 ], [ %20, %19 ] 3274 %23 = bitcast <2 x i64> %14 to <16 x i8> 3275 %24 = bitcast <2 x i64> %22 to <16 x i8> 3276 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) 3277 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) 3278 store <16 x i8> %26, ptr %3, align 8 3279 ret void 3280} 3281 3282define arm_aapcs_vfpcc void @aesd_set64_cond_via_val(i1 zeroext %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { 3283; CHECK-FIX-LABEL: aesd_set64_cond_via_val: 3284; CHECK-FIX: @ %bb.0: 3285; CHECK-FIX-NEXT: vorr q0, q0, q0 3286; CHECK-FIX-NEXT: ldr r1, [sp] 3287; CHECK-FIX-NEXT: cmp r0, #0 3288; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 3289; CHECK-FIX-NEXT: beq .LBB77_2 3290; CHECK-FIX-NEXT: @ %bb.1: 3291; CHECK-FIX-NEXT: vmov.32 d16[0], r2 3292; CHECK-FIX-NEXT: vmov.32 d16[1], r3 3293; CHECK-FIX-NEXT: .LBB77_2: @ %select.end 3294; CHECK-FIX-NEXT: cmp r0, #0 3295; CHECK-FIX-NEXT: beq .LBB77_4 3296; CHECK-FIX-NEXT: @ %bb.3: 3297; CHECK-FIX-NEXT: vmov.32 d0[0], r2 3298; CHECK-FIX-NEXT: vmov.32 d0[1], r3 3299; CHECK-FIX-NEXT: .LBB77_4: @ %select.end2 3300; CHECK-FIX-NEXT: aesd.8 q8, q0 3301; CHECK-FIX-NEXT: aesimc.8 q8, q8 3302; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 3303; CHECK-FIX-NEXT: bx lr 3304 %5 = bitcast ptr %3 to ptr 3305 %6 = load <2 x i64>, ptr %5, align 8 3306 %7 = insertelement <2 x i64> %6, i64 %1, i64 0 3307 %8 = select i1 %0, <2 x i64> %7, <2 x i64> %6 3308 %9 = bitcast <16 x i8> %2 to <2 x i64> 3309 %10 = insertelement <2 x i64> %9, i64 %1, i64 0 3310 %11 = select i1 %0, <2 x i64> %10, <2 x i64> %9 3311 %12 = bitcast <2 x i64> %8 to <16 x i8> 3312 %13 = bitcast <2 x i64> %11 to <16 x i8> 3313 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) 3314 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) 3315 store <16 x i8> %15, ptr %3, align 8 3316 ret void 3317} 3318 3319define arm_aapcs_vfpcc void @aesd_set64_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 3320; CHECK-FIX-NOSCHED-LABEL: aesd_set64_loop_via_ptr: 3321; CHECK-FIX-NOSCHED: @ %bb.0: 3322; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 3323; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r11, lr} 3324; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r11, lr} 3325; CHECK-FIX-NOSCHED-NEXT: ldrd r4, r5, [r1] 3326; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3327; CHECK-FIX-NOSCHED-NEXT: strd r4, r5, [r2] 3328; CHECK-FIX-NOSCHED-NEXT: beq .LBB78_4 3329; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 3330; CHECK-FIX-NOSCHED-NEXT: vmov d0, r4, r5 3331; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 3332; CHECK-FIX-NOSCHED-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1 3333; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 3334; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 3335; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3336; CHECK-FIX-NOSCHED-NEXT: bne .LBB78_2 3337; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 3338; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 3339; CHECK-FIX-NOSCHED-NEXT: .LBB78_4: 3340; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r11, pc} 3341; 3342; CHECK-CORTEX-FIX-LABEL: aesd_set64_loop_via_ptr: 3343; CHECK-CORTEX-FIX: @ %bb.0: 3344; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 3345; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r11, lr} 3346; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r11, lr} 3347; CHECK-CORTEX-FIX-NEXT: ldrd r4, r5, [r1] 3348; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3349; CHECK-CORTEX-FIX-NEXT: strd r4, r5, [r2] 3350; CHECK-CORTEX-FIX-NEXT: popeq {r4, r5, r11, pc} 3351; CHECK-CORTEX-FIX-NEXT: .LBB78_1: 3352; CHECK-CORTEX-FIX-NEXT: vmov d0, r4, r5 3353; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3354; CHECK-CORTEX-FIX-NEXT: .LBB78_2: @ =>This Inner Loop Header: Depth=1 3355; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 3356; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 3357; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 3358; CHECK-CORTEX-FIX-NEXT: bne .LBB78_2 3359; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 3360; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3361; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r11, pc} 3362 %5 = load i64, ptr %1, align 8 3363 %6 = bitcast <16 x i8> %2 to <2 x i64> 3364 %7 = insertelement <2 x i64> %6, i64 %5, i64 0 3365 %8 = bitcast <2 x i64> %7 to <16 x i8> 3366 %9 = bitcast ptr %3 to ptr 3367 store i64 %5, ptr %9, align 8 3368 %10 = icmp eq i32 %0, 0 3369 br i1 %10, label %14, label %11 3370 337111: 3372 %12 = load <16 x i8>, ptr %3, align 8 3373 br label %15 3374 337513: 3376 store <16 x i8> %19, ptr %3, align 8 3377 br label %14 3378 337914: 3380 ret void 3381 338215: 3383 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 3384 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 3385 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) 3386 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 3387 %20 = add nuw i32 %17, 1 3388 %21 = icmp eq i32 %20, %0 3389 br i1 %21, label %13, label %15 3390} 3391 3392define arm_aapcs_vfpcc void @aesd_set64_loop_via_val(i32 %0, i64 %1, <16 x i8> %2, ptr %3) nounwind { 3393; CHECK-FIX-LABEL: aesd_set64_loop_via_val: 3394; CHECK-FIX: @ %bb.0: 3395; CHECK-FIX-NEXT: vorr q0, q0, q0 3396; CHECK-FIX-NEXT: cmp r0, #0 3397; CHECK-FIX-NEXT: bxeq lr 3398; CHECK-FIX-NEXT: .LBB79_1: 3399; CHECK-FIX-NEXT: vmov.32 d0[0], r2 3400; CHECK-FIX-NEXT: ldr r1, [sp] 3401; CHECK-FIX-NEXT: vmov.32 d0[1], r3 3402; CHECK-FIX-NEXT: .LBB79_2: @ =>This Inner Loop Header: Depth=1 3403; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 3404; CHECK-FIX-NEXT: subs r0, r0, #1 3405; CHECK-FIX-NEXT: vmov.32 d16[0], r2 3406; CHECK-FIX-NEXT: vmov.32 d16[1], r3 3407; CHECK-FIX-NEXT: aesd.8 q8, q0 3408; CHECK-FIX-NEXT: aesimc.8 q8, q8 3409; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 3410; CHECK-FIX-NEXT: bne .LBB79_2 3411; CHECK-FIX-NEXT: @ %bb.3: 3412; CHECK-FIX-NEXT: bx lr 3413 %5 = icmp eq i32 %0, 0 3414 br i1 %5, label %12, label %6 3415 34166: 3417 %7 = bitcast <16 x i8> %2 to <2 x i64> 3418 %8 = insertelement <2 x i64> %7, i64 %1, i64 0 3419 %9 = bitcast <2 x i64> %8 to <16 x i8> 3420 %10 = bitcast ptr %3 to ptr 3421 %11 = bitcast ptr %3 to ptr 3422 br label %13 3423 342412: 3425 ret void 3426 342713: 3428 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 3429 %15 = load <2 x i64>, ptr %10, align 8 3430 %16 = insertelement <2 x i64> %15, i64 %1, i64 0 3431 %17 = bitcast <2 x i64> %16 to <16 x i8> 3432 store i64 %1, ptr %11, align 8 3433 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) 3434 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 3435 store <16 x i8> %19, ptr %3, align 8 3436 %20 = add nuw i32 %14, 1 3437 %21 = icmp eq i32 %20, %0 3438 br i1 %21, label %12, label %13 3439} 3440 3441define arm_aapcs_vfpcc void @aesd_setf16_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 3442; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_via_ptr: 3443; CHECK-FIX-NOSCHED: @ %bb.0: 3444; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 3445; CHECK-FIX-NOSCHED-NEXT: ldrh r0, [r0] 3446; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 3447; CHECK-FIX-NOSCHED-NEXT: vmov.16 d0[0], r0 3448; CHECK-FIX-NOSCHED-NEXT: vmov.16 d16[0], r0 3449; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 3450; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3451; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 3452; CHECK-FIX-NOSCHED-NEXT: bx lr 3453; 3454; CHECK-CORTEX-FIX-LABEL: aesd_setf16_via_ptr: 3455; CHECK-CORTEX-FIX: @ %bb.0: 3456; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 3457; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 3458; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r0] 3459; CHECK-CORTEX-FIX-NEXT: vmov.16 d0[0], r0 3460; CHECK-CORTEX-FIX-NEXT: vmov.16 d16[0], r0 3461; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 3462; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 3463; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 3464; CHECK-CORTEX-FIX-NEXT: bx lr 3465 %4 = bitcast ptr %0 to ptr 3466 %5 = load i16, ptr %4, align 2 3467 %6 = bitcast ptr %2 to ptr 3468 %7 = load <8 x i16>, ptr %6, align 8 3469 %8 = insertelement <8 x i16> %7, i16 %5, i64 0 3470 %9 = bitcast <8 x i16> %8 to <16 x i8> 3471 %10 = bitcast <16 x i8> %1 to <8 x i16> 3472 %11 = insertelement <8 x i16> %10, i16 %5, i64 0 3473 %12 = bitcast <8 x i16> %11 to <16 x i8> 3474 %13 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %9, <16 x i8> %12) 3475 %14 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %13) 3476 store <16 x i8> %14, ptr %2, align 8 3477 ret void 3478} 3479 3480define arm_aapcs_vfpcc void @aesd_setf16_via_val(half %0, <16 x i8> %1, ptr %2) nounwind { 3481; CHECK-FIX-LABEL: aesd_setf16_via_val: 3482; CHECK-FIX: @ %bb.0: 3483; CHECK-FIX-NEXT: vorr q1, q1, q1 3484; CHECK-FIX-NEXT: vmov r1, s0 3485; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r0] 3486; CHECK-FIX-NEXT: vmov.16 d2[0], r1 3487; CHECK-FIX-NEXT: vmov.16 d16[0], r1 3488; CHECK-FIX-NEXT: aesd.8 q8, q1 3489; CHECK-FIX-NEXT: aesimc.8 q8, q8 3490; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 3491; CHECK-FIX-NEXT: bx lr 3492 %4 = bitcast ptr %2 to ptr 3493 %5 = load <8 x i16>, ptr %4, align 8 3494 %6 = bitcast half %0 to i16 3495 %7 = insertelement <8 x i16> %5, i16 %6, i64 0 3496 %8 = bitcast <8 x i16> %7 to <16 x i8> 3497 %9 = bitcast <16 x i8> %1 to <8 x i16> 3498 %10 = insertelement <8 x i16> %9, i16 %6, i64 0 3499 %11 = bitcast <8 x i16> %10 to <16 x i8> 3500 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) 3501 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) 3502 store <16 x i8> %13, ptr %2, align 8 3503 ret void 3504} 3505 3506define arm_aapcs_vfpcc void @aesd_setf16_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 3507; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_ptr: 3508; CHECK-FIX-NOSCHED: @ %bb.0: 3509; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3510; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3511; CHECK-FIX-NOSCHED-NEXT: .pad #12 3512; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 3513; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3514; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_3 3515; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 3516; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 3517; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] 3518; CHECK-FIX-NOSCHED-NEXT: ldrh r7, [r1] 3519; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] 3520; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] 3521; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 3522; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill 3523; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] 3524; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill 3525; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] 3526; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill 3527; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] 3528; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3529; CHECK-FIX-NOSCHED-NEXT: bne .LBB82_4 3530; CHECK-FIX-NOSCHED-NEXT: .LBB82_2: 3531; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] 3532; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] 3533; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] 3534; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] 3535; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] 3536; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] 3537; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] 3538; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r1, d0[0] 3539; CHECK-FIX-NOSCHED-NEXT: b .LBB82_5 3540; CHECK-FIX-NOSCHED-NEXT: .LBB82_3: 3541; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #8 3542; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r2:32] 3543; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r3:32] 3544; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #4 3545; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r3:32] 3546; CHECK-FIX-NOSCHED-NEXT: add r3, r2, #12 3547; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r3:32] 3548; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[3] 3549; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d17[0] 3550; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d16[3] 3551; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 3552; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d16[0] 3553; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #8] @ 4-byte Spill 3554; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[2] 3555; CHECK-FIX-NOSCHED-NEXT: str r3, [sp, #4] @ 4-byte Spill 3556; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d17[1] 3557; CHECK-FIX-NOSCHED-NEXT: str r3, [sp] @ 4-byte Spill 3558; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d16[1] 3559; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3560; CHECK-FIX-NOSCHED-NEXT: beq .LBB82_2 3561; CHECK-FIX-NOSCHED-NEXT: .LBB82_4: 3562; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r0, d1[3] 3563; CHECK-FIX-NOSCHED-NEXT: ldrh r1, [r1] 3564; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d1[2] 3565; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d1[1] 3566; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r6, d1[0] 3567; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d0[3] 3568; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d0[2] 3569; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d0[1] 3570; CHECK-FIX-NOSCHED-NEXT: .LBB82_5: 3571; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r1, r8, lsl #16 3572; CHECK-FIX-NOSCHED-NEXT: pkhbt r3, r7, r3, lsl #16 3573; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r4, r0, lsl #16 3574; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r1 3575; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, lr, r12, lsl #16 3576; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r3 3577; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r1 3578; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r11, r10, lsl #16 3579; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r1 3580; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r6, r5, lsl #16 3581; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r1 3582; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp] @ 4-byte Reload 3583; CHECK-FIX-NOSCHED-NEXT: pkhbt r1, r9, r1, lsl #16 3584; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 3585; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 3586; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r1 3587; CHECK-FIX-NOSCHED-NEXT: ldr r1, [sp, #4] @ 4-byte Reload 3588; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r1, r0, lsl #16 3589; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 3590; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9 3591; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3592; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 3593; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 3594; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 3595; 3596; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_ptr: 3597; CHECK-CORTEX-FIX: @ %bb.0: 3598; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3599; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3600; CHECK-CORTEX-FIX-NEXT: .pad #24 3601; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #24 3602; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3603; CHECK-CORTEX-FIX-NEXT: beq .LBB82_2 3604; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 3605; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3606; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 3607; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[0] 3608; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] 3609; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill 3610; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] 3611; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 3612; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] 3613; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 3614; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] 3615; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #12] @ 4-byte Spill 3616; CHECK-CORTEX-FIX-NEXT: ldrh r3, [r1] 3617; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill 3618; CHECK-CORTEX-FIX-NEXT: mov r3, r6 3619; CHECK-CORTEX-FIX-NEXT: b .LBB82_3 3620; CHECK-CORTEX-FIX-NEXT: .LBB82_2: 3621; CHECK-CORTEX-FIX-NEXT: add r3, r2, #8 3622; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r2:32] 3623; CHECK-CORTEX-FIX-NEXT: add r7, r2, #4 3624; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r3:32] 3625; CHECK-CORTEX-FIX-NEXT: add r3, r2, #12 3626; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r7:32] 3627; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r3:32] 3628; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[0] 3629; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #16] @ 4-byte Spill 3630; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 3631; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[1] 3632; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #20] @ 4-byte Spill 3633; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[2] 3634; CHECK-CORTEX-FIX-NEXT: str r7, [sp, #12] @ 4-byte Spill 3635; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d17[2] 3636; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 3637; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[3] 3638; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 3639; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] 3640; CHECK-CORTEX-FIX-NEXT: .LBB82_3: 3641; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d17[3] 3642; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3643; CHECK-CORTEX-FIX-NEXT: beq .LBB82_5 3644; CHECK-CORTEX-FIX-NEXT: @ %bb.4: 3645; CHECK-CORTEX-FIX-NEXT: ldrh r0, [r1] 3646; CHECK-CORTEX-FIX-NEXT: b .LBB82_6 3647; CHECK-CORTEX-FIX-NEXT: .LBB82_5: 3648; CHECK-CORTEX-FIX-NEXT: vmov.u16 r0, d0[0] 3649; CHECK-CORTEX-FIX-NEXT: .LBB82_6: 3650; CHECK-CORTEX-FIX-NEXT: str r0, [sp] @ 4-byte Spill 3651; CHECK-CORTEX-FIX-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 3652; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #4] @ 4-byte Reload 3653; CHECK-CORTEX-FIX-NEXT: pkhbt r9, r7, r4, lsl #16 3654; CHECK-CORTEX-FIX-NEXT: ldr r4, [sp, #20] @ 4-byte Reload 3655; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d0[1] 3656; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d0[2] 3657; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d0[3] 3658; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d1[0] 3659; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d1[1] 3660; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d1[2] 3661; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d1[3] 3662; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r0, r1, lsl #16 3663; CHECK-CORTEX-FIX-NEXT: ldr r1, [sp, #12] @ 4-byte Reload 3664; CHECK-CORTEX-FIX-NEXT: pkhbt r0, lr, r8, lsl #16 3665; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r12, lsl #16 3666; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r11, r5, lsl #16 3667; CHECK-CORTEX-FIX-NEXT: pkhbt r1, r3, r1, lsl #16 3668; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp, #16] @ 4-byte Reload 3669; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r3, r4, lsl #16 3670; CHECK-CORTEX-FIX-NEXT: ldr r3, [sp] @ 4-byte Reload 3671; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r4 3672; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r1 3673; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r7 3674; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], r9 3675; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r10, lsl #16 3676; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r3 3677; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r5 3678; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r6 3679; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r0 3680; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8 3681; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9 3682; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3683; CHECK-CORTEX-FIX-NEXT: add sp, sp, #24 3684; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 3685 br i1 %0, label %5, label %12 3686 36875: 3688 %6 = bitcast ptr %1 to ptr 3689 %7 = load i16, ptr %6, align 2 3690 %8 = bitcast ptr %3 to ptr 3691 %9 = load <8 x i16>, ptr %8, align 8 3692 %10 = insertelement <8 x i16> %9, i16 %7, i64 0 3693 %11 = bitcast <8 x i16> %10 to <8 x half> 3694 br label %15 3695 369612: 3697 %13 = bitcast ptr %3 to ptr 3698 %14 = load <8 x half>, ptr %13, align 8 3699 br label %15 3700 370115: 3702 %16 = phi <8 x half> [ %11, %5 ], [ %14, %12 ] 3703 br i1 %0, label %17, label %23 3704 370517: 3706 %18 = bitcast ptr %1 to ptr 3707 %19 = load i16, ptr %18, align 2 3708 %20 = bitcast <16 x i8> %2 to <8 x i16> 3709 %21 = insertelement <8 x i16> %20, i16 %19, i64 0 3710 %22 = bitcast <8 x i16> %21 to <8 x half> 3711 br label %25 3712 371323: 3714 %24 = bitcast <16 x i8> %2 to <8 x half> 3715 br label %25 3716 371725: 3718 %26 = phi <8 x half> [ %22, %17 ], [ %24, %23 ] 3719 %27 = bitcast <8 x half> %16 to <16 x i8> 3720 %28 = bitcast <8 x half> %26 to <16 x i8> 3721 %29 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %27, <16 x i8> %28) 3722 %30 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %29) 3723 store <16 x i8> %30, ptr %3, align 8 3724 ret void 3725} 3726 3727define arm_aapcs_vfpcc void @aesd_setf16_cond_via_val(i1 zeroext %0, half %1, <16 x i8> %2, ptr %3) nounwind { 3728; CHECK-FIX-NOSCHED-LABEL: aesd_setf16_cond_via_val: 3729; CHECK-FIX-NOSCHED: @ %bb.0: 3730; CHECK-FIX-NOSCHED-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3731; CHECK-FIX-NOSCHED-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3732; CHECK-FIX-NOSCHED-NEXT: .pad #12 3733; CHECK-FIX-NOSCHED-NEXT: sub sp, sp, #12 3734; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3735; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_2 3736; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 3737; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r1] 3738; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s2, s0 3739; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] 3740; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] 3741; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] 3742; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 3743; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] 3744; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill 3745; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] 3746; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill 3747; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] 3748; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill 3749; CHECK-FIX-NOSCHED-NEXT: b .LBB83_3 3750; CHECK-FIX-NOSCHED-NEXT: .LBB83_2: 3751; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #8 3752; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[0]}, [r1:32] 3753; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[0]}, [r2:32] 3754; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #4 3755; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d16[1]}, [r2:32] 3756; CHECK-FIX-NOSCHED-NEXT: add r2, r1, #12 3757; CHECK-FIX-NOSCHED-NEXT: vld1.32 {d17[1]}, [r2:32] 3758; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[1] 3759; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r7, d17[3] 3760; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d17[2] 3761; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r11, d16[2] 3762; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r8, d16[1] 3763; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #8] @ 4-byte Spill 3764; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d17[0] 3765; CHECK-FIX-NOSCHED-NEXT: str r2, [sp, #4] @ 4-byte Spill 3766; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[3] 3767; CHECK-FIX-NOSCHED-NEXT: str r2, [sp] @ 4-byte Spill 3768; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d16[0] 3769; CHECK-FIX-NOSCHED-NEXT: vmov s2, r2 3770; CHECK-FIX-NOSCHED-NEXT: .LBB83_3: 3771; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r9, d3[3] 3772; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 3773; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r10, d3[2] 3774; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r2, d3[1] 3775; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r3, d3[0] 3776; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r4, d2[3] 3777; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r5, d2[2] 3778; CHECK-FIX-NOSCHED-NEXT: beq .LBB83_5 3779; CHECK-FIX-NOSCHED-NEXT: @ %bb.4: 3780; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] 3781; CHECK-FIX-NOSCHED-NEXT: b .LBB83_6 3782; CHECK-FIX-NOSCHED-NEXT: .LBB83_5: 3783; CHECK-FIX-NOSCHED-NEXT: mov r0, lr 3784; CHECK-FIX-NOSCHED-NEXT: vmov.u16 lr, d2[0] 3785; CHECK-FIX-NOSCHED-NEXT: vmov.u16 r12, d2[1] 3786; CHECK-FIX-NOSCHED-NEXT: vmov s0, lr 3787; CHECK-FIX-NOSCHED-NEXT: mov lr, r0 3788; CHECK-FIX-NOSCHED-NEXT: .LBB83_6: 3789; CHECK-FIX-NOSCHED-NEXT: vmov r0, s0 3790; CHECK-FIX-NOSCHED-NEXT: vmov r6, s2 3791; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r0, r12, lsl #16 3792; CHECK-FIX-NOSCHED-NEXT: pkhbt r6, r6, r8, lsl #16 3793; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[0], r0 3794; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r5, r4, lsl #16 3795; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[0], r6 3796; CHECK-FIX-NOSCHED-NEXT: vmov.32 d18[1], r0 3797; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp] @ 4-byte Reload 3798; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r11, r0, lsl #16 3799; CHECK-FIX-NOSCHED-NEXT: vmov.32 d16[1], r0 3800; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r3, r2, lsl #16 3801; CHECK-FIX-NOSCHED-NEXT: ldr r2, [sp, #4] @ 4-byte Reload 3802; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[0], r0 3803; CHECK-FIX-NOSCHED-NEXT: ldr r0, [sp, #8] @ 4-byte Reload 3804; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r2, r0, lsl #16 3805; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[0], r0 3806; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, r10, r9, lsl #16 3807; CHECK-FIX-NOSCHED-NEXT: vmov.32 d19[1], r0 3808; CHECK-FIX-NOSCHED-NEXT: pkhbt r0, lr, r7, lsl #16 3809; CHECK-FIX-NOSCHED-NEXT: vmov.32 d17[1], r0 3810; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q9 3811; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 3812; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 3813; CHECK-FIX-NOSCHED-NEXT: add sp, sp, #12 3814; CHECK-FIX-NOSCHED-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 3815; 3816; CHECK-CORTEX-FIX-LABEL: aesd_setf16_cond_via_val: 3817; CHECK-CORTEX-FIX: @ %bb.0: 3818; CHECK-CORTEX-FIX-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3819; CHECK-CORTEX-FIX-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 3820; CHECK-CORTEX-FIX-NEXT: .pad #12 3821; CHECK-CORTEX-FIX-NEXT: sub sp, sp, #12 3822; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3823; CHECK-CORTEX-FIX-NEXT: beq .LBB83_3 3824; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 3825; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r1] 3826; CHECK-CORTEX-FIX-NEXT: vmov.f32 s2, s0 3827; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[1] 3828; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] 3829; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] 3830; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] 3831; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] 3832; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #8] @ 4-byte Spill 3833; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[0] 3834; CHECK-CORTEX-FIX-NEXT: str r2, [sp, #4] @ 4-byte Spill 3835; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d17[1] 3836; CHECK-CORTEX-FIX-NEXT: str r2, [sp] @ 4-byte Spill 3837; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3838; CHECK-CORTEX-FIX-NEXT: bne .LBB83_4 3839; CHECK-CORTEX-FIX-NEXT: .LBB83_2: 3840; CHECK-CORTEX-FIX-NEXT: vmov.u16 lr, d2[0] 3841; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] 3842; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] 3843; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] 3844; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] 3845; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] 3846; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] 3847; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] 3848; CHECK-CORTEX-FIX-NEXT: vmov s0, lr 3849; CHECK-CORTEX-FIX-NEXT: b .LBB83_5 3850; CHECK-CORTEX-FIX-NEXT: .LBB83_3: 3851; CHECK-CORTEX-FIX-NEXT: add r2, r1, #8 3852; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 3853; CHECK-CORTEX-FIX-NEXT: add r3, r1, #4 3854; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[0]}, [r2:32] 3855; CHECK-CORTEX-FIX-NEXT: add r2, r1, #12 3856; CHECK-CORTEX-FIX-NEXT: vld1.32 {d16[1]}, [r3:32] 3857; CHECK-CORTEX-FIX-NEXT: vld1.32 {d17[1]}, [r2:32] 3858; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d16[1] 3859; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d16[0] 3860; CHECK-CORTEX-FIX-NEXT: vmov.u16 r7, d16[2] 3861; CHECK-CORTEX-FIX-NEXT: vmov.u16 r10, d16[3] 3862; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #8] @ 4-byte Spill 3863; CHECK-CORTEX-FIX-NEXT: vmov s2, r2 3864; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[0] 3865; CHECK-CORTEX-FIX-NEXT: vmov.u16 r11, d17[2] 3866; CHECK-CORTEX-FIX-NEXT: vmov.u16 r6, d17[3] 3867; CHECK-CORTEX-FIX-NEXT: str r3, [sp, #4] @ 4-byte Spill 3868; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d17[1] 3869; CHECK-CORTEX-FIX-NEXT: str r3, [sp] @ 4-byte Spill 3870; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 3871; CHECK-CORTEX-FIX-NEXT: beq .LBB83_2 3872; CHECK-CORTEX-FIX-NEXT: .LBB83_4: 3873; CHECK-CORTEX-FIX-NEXT: vmov.u16 r8, d2[1] 3874; CHECK-CORTEX-FIX-NEXT: vmov.u16 r3, d2[2] 3875; CHECK-CORTEX-FIX-NEXT: vmov.u16 r4, d2[3] 3876; CHECK-CORTEX-FIX-NEXT: vmov.u16 r9, d3[0] 3877; CHECK-CORTEX-FIX-NEXT: vmov.u16 r2, d3[1] 3878; CHECK-CORTEX-FIX-NEXT: vmov.u16 r5, d3[2] 3879; CHECK-CORTEX-FIX-NEXT: vmov.u16 r12, d3[3] 3880; CHECK-CORTEX-FIX-NEXT: .LBB83_5: 3881; CHECK-CORTEX-FIX-NEXT: pkhbt lr, r11, r6, lsl #16 3882; CHECK-CORTEX-FIX-NEXT: pkhbt r0, r7, r10, lsl #16 3883; CHECK-CORTEX-FIX-NEXT: ldm sp, {r6, r7} @ 8-byte Folded Reload 3884; CHECK-CORTEX-FIX-NEXT: pkhbt r3, r3, r4, lsl #16 3885; CHECK-CORTEX-FIX-NEXT: pkhbt r5, r5, r12, lsl #16 3886; CHECK-CORTEX-FIX-NEXT: pkhbt r2, r9, r2, lsl #16 3887; CHECK-CORTEX-FIX-NEXT: pkhbt r4, r7, r6, lsl #16 3888; CHECK-CORTEX-FIX-NEXT: vmov r7, s2 3889; CHECK-CORTEX-FIX-NEXT: ldr r6, [sp, #8] @ 4-byte Reload 3890; CHECK-CORTEX-FIX-NEXT: pkhbt r7, r7, r6, lsl #16 3891; CHECK-CORTEX-FIX-NEXT: vmov r6, s0 3892; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[0], r7 3893; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[0], r4 3894; CHECK-CORTEX-FIX-NEXT: vmov.32 d18[1], r0 3895; CHECK-CORTEX-FIX-NEXT: vmov.32 d19[1], lr 3896; CHECK-CORTEX-FIX-NEXT: pkhbt r6, r6, r8, lsl #16 3897; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[0], r6 3898; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[0], r2 3899; CHECK-CORTEX-FIX-NEXT: vmov.32 d16[1], r3 3900; CHECK-CORTEX-FIX-NEXT: vmov.32 d17[1], r5 3901; CHECK-CORTEX-FIX-NEXT: aesd.8 q9, q8 3902; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q9 3903; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 3904; CHECK-CORTEX-FIX-NEXT: add sp, sp, #12 3905; CHECK-CORTEX-FIX-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 3906 br i1 %0, label %5, label %11 3907 39085: 3909 %6 = bitcast ptr %3 to ptr 3910 %7 = load <8 x i16>, ptr %6, align 8 3911 %8 = bitcast half %1 to i16 3912 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 3913 %10 = bitcast <8 x i16> %9 to <8 x half> 3914 br label %14 3915 391611: 3917 %12 = bitcast ptr %3 to ptr 3918 %13 = load <8 x half>, ptr %12, align 8 3919 br label %14 3920 392114: 3922 %15 = phi <8 x half> [ %10, %5 ], [ %13, %11 ] 3923 br i1 %0, label %16, label %21 3924 392516: 3926 %17 = bitcast <16 x i8> %2 to <8 x i16> 3927 %18 = bitcast half %1 to i16 3928 %19 = insertelement <8 x i16> %17, i16 %18, i64 0 3929 %20 = bitcast <8 x i16> %19 to <8 x half> 3930 br label %23 3931 393221: 3933 %22 = bitcast <16 x i8> %2 to <8 x half> 3934 br label %23 3935 393623: 3937 %24 = phi <8 x half> [ %20, %16 ], [ %22, %21 ] 3938 %25 = bitcast <8 x half> %15 to <16 x i8> 3939 %26 = bitcast <8 x half> %24 to <16 x i8> 3940 %27 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %25, <16 x i8> %26) 3941 %28 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %27) 3942 store <16 x i8> %28, ptr %3, align 8 3943 ret void 3944} 3945 3946define arm_aapcs_vfpcc void @aesd_setf16_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 3947; CHECK-FIX-LABEL: aesd_setf16_loop_via_ptr: 3948; CHECK-FIX: @ %bb.0: 3949; CHECK-FIX-NEXT: vorr q0, q0, q0 3950; CHECK-FIX-NEXT: ldrh r1, [r1] 3951; CHECK-FIX-NEXT: cmp r0, #0 3952; CHECK-FIX-NEXT: strh r1, [r2] 3953; CHECK-FIX-NEXT: bxeq lr 3954; CHECK-FIX-NEXT: .LBB84_1: 3955; CHECK-FIX-NEXT: vmov.16 d0[0], r1 3956; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 3957; CHECK-FIX-NEXT: .LBB84_2: @ =>This Inner Loop Header: Depth=1 3958; CHECK-FIX-NEXT: aesd.8 q8, q0 3959; CHECK-FIX-NEXT: subs r0, r0, #1 3960; CHECK-FIX-NEXT: aesimc.8 q8, q8 3961; CHECK-FIX-NEXT: bne .LBB84_2 3962; CHECK-FIX-NEXT: @ %bb.3: 3963; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 3964; CHECK-FIX-NEXT: bx lr 3965 %5 = bitcast ptr %1 to ptr 3966 %6 = load i16, ptr %5, align 2 3967 %7 = bitcast <16 x i8> %2 to <8 x i16> 3968 %8 = insertelement <8 x i16> %7, i16 %6, i64 0 3969 %9 = bitcast <8 x i16> %8 to <16 x i8> 3970 %10 = bitcast ptr %3 to ptr 3971 store i16 %6, ptr %10, align 8 3972 %11 = icmp eq i32 %0, 0 3973 br i1 %11, label %15, label %12 3974 397512: 3976 %13 = load <16 x i8>, ptr %3, align 8 3977 br label %16 3978 397914: 3980 store <16 x i8> %20, ptr %3, align 8 3981 br label %15 3982 398315: 3984 ret void 3985 398616: 3987 %17 = phi <16 x i8> [ %13, %12 ], [ %20, %16 ] 3988 %18 = phi i32 [ 0, %12 ], [ %21, %16 ] 3989 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) 3990 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19) 3991 %21 = add nuw i32 %18, 1 3992 %22 = icmp eq i32 %21, %0 3993 br i1 %22, label %14, label %16 3994} 3995 3996define arm_aapcs_vfpcc void @aesd_setf16_loop_via_val(i32 %0, half %1, <16 x i8> %2, ptr %3) nounwind { 3997; CHECK-FIX-LABEL: aesd_setf16_loop_via_val: 3998; CHECK-FIX: @ %bb.0: 3999; CHECK-FIX-NEXT: vorr q1, q1, q1 4000; CHECK-FIX-NEXT: cmp r0, #0 4001; CHECK-FIX-NEXT: bxeq lr 4002; CHECK-FIX-NEXT: .LBB85_1: 4003; CHECK-FIX-NEXT: vmov r2, s0 4004; CHECK-FIX-NEXT: vmov.16 d2[0], r2 4005; CHECK-FIX-NEXT: .LBB85_2: @ =>This Inner Loop Header: Depth=1 4006; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r1] 4007; CHECK-FIX-NEXT: subs r0, r0, #1 4008; CHECK-FIX-NEXT: vmov.16 d16[0], r2 4009; CHECK-FIX-NEXT: aesd.8 q8, q1 4010; CHECK-FIX-NEXT: aesimc.8 q8, q8 4011; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 4012; CHECK-FIX-NEXT: bne .LBB85_2 4013; CHECK-FIX-NEXT: @ %bb.3: 4014; CHECK-FIX-NEXT: bx lr 4015 %5 = icmp eq i32 %0, 0 4016 br i1 %5, label %13, label %6 4017 40186: 4019 %7 = bitcast <16 x i8> %2 to <8 x i16> 4020 %8 = bitcast half %1 to i16 4021 %9 = insertelement <8 x i16> %7, i16 %8, i64 0 4022 %10 = bitcast <8 x i16> %9 to <16 x i8> 4023 %11 = bitcast ptr %3 to ptr 4024 %12 = bitcast ptr %3 to ptr 4025 br label %14 4026 402713: 4028 ret void 4029 403014: 4031 %15 = phi i32 [ 0, %6 ], [ %21, %14 ] 4032 %16 = load <8 x i16>, ptr %11, align 8 4033 %17 = insertelement <8 x i16> %16, i16 %8, i64 0 4034 %18 = bitcast <8 x i16> %17 to <16 x i8> 4035 store half %1, ptr %12, align 8 4036 %19 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %18, <16 x i8> %10) 4037 %20 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %19) 4038 store <16 x i8> %20, ptr %3, align 8 4039 %21 = add nuw i32 %15, 1 4040 %22 = icmp eq i32 %21, %0 4041 br i1 %22, label %13, label %14 4042} 4043 4044define arm_aapcs_vfpcc void @aesd_setf32_via_ptr(ptr %0, <16 x i8> %1, ptr %2) nounwind { 4045; CHECK-FIX-LABEL: aesd_setf32_via_ptr: 4046; CHECK-FIX: @ %bb.0: 4047; CHECK-FIX-NEXT: vldr s0, [r0] 4048; CHECK-FIX-NEXT: vld1.64 {d2, d3}, [r1] 4049; CHECK-FIX-NEXT: vmov.f32 s4, s0 4050; CHECK-FIX-NEXT: vorr q1, q1, q1 4051; CHECK-FIX-NEXT: vorr q0, q0, q0 4052; CHECK-FIX-NEXT: aesd.8 q1, q0 4053; CHECK-FIX-NEXT: aesimc.8 q8, q1 4054; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r1] 4055; CHECK-FIX-NEXT: bx lr 4056 %4 = load float, ptr %0, align 4 4057 %5 = bitcast ptr %2 to ptr 4058 %6 = load <4 x float>, ptr %5, align 8 4059 %7 = insertelement <4 x float> %6, float %4, i64 0 4060 %8 = bitcast <4 x float> %7 to <16 x i8> 4061 %9 = bitcast <16 x i8> %1 to <4 x float> 4062 %10 = insertelement <4 x float> %9, float %4, i64 0 4063 %11 = bitcast <4 x float> %10 to <16 x i8> 4064 %12 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %8, <16 x i8> %11) 4065 %13 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %12) 4066 store <16 x i8> %13, ptr %2, align 8 4067 ret void 4068} 4069 4070define arm_aapcs_vfpcc void @aesd_setf32_via_val(float %0, <16 x i8> %1, ptr %2) nounwind { 4071; CHECK-FIX-LABEL: aesd_setf32_via_val: 4072; CHECK-FIX: @ %bb.0: 4073; CHECK-FIX-NEXT: vmov.f32 s4, s0 4074; CHECK-FIX-NEXT: vld1.64 {d0, d1}, [r0] 4075; CHECK-FIX-NEXT: vmov.f32 s0, s4 4076; CHECK-FIX-NEXT: vorr q0, q0, q0 4077; CHECK-FIX-NEXT: vorr q1, q1, q1 4078; CHECK-FIX-NEXT: aesd.8 q0, q1 4079; CHECK-FIX-NEXT: aesimc.8 q8, q0 4080; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r0] 4081; CHECK-FIX-NEXT: bx lr 4082 %4 = bitcast ptr %2 to ptr 4083 %5 = load <4 x float>, ptr %4, align 8 4084 %6 = insertelement <4 x float> %5, float %0, i64 0 4085 %7 = bitcast <4 x float> %6 to <16 x i8> 4086 %8 = bitcast <16 x i8> %1 to <4 x float> 4087 %9 = insertelement <4 x float> %8, float %0, i64 0 4088 %10 = bitcast <4 x float> %9 to <16 x i8> 4089 %11 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %7, <16 x i8> %10) 4090 %12 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %11) 4091 store <16 x i8> %12, ptr %2, align 8 4092 ret void 4093} 4094 4095define arm_aapcs_vfpcc void @aesd_setf32_cond_via_ptr(i1 zeroext %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 4096; CHECK-FIX-LABEL: aesd_setf32_cond_via_ptr: 4097; CHECK-FIX: @ %bb.0: 4098; CHECK-FIX-NEXT: vorr q0, q0, q0 4099; CHECK-FIX-NEXT: cmp r0, #0 4100; CHECK-FIX-NEXT: beq .LBB88_2 4101; CHECK-FIX-NEXT: @ %bb.1: 4102; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 4103; CHECK-FIX-NEXT: vld1.32 {d16[0]}, [r1:32] 4104; CHECK-FIX-NEXT: cmp r0, #0 4105; CHECK-FIX-NEXT: bne .LBB88_3 4106; CHECK-FIX-NEXT: b .LBB88_4 4107; CHECK-FIX-NEXT: .LBB88_2: 4108; CHECK-FIX-NEXT: vld1.64 {d16, d17}, [r2] 4109; CHECK-FIX-NEXT: cmp r0, #0 4110; CHECK-FIX-NEXT: beq .LBB88_4 4111; CHECK-FIX-NEXT: .LBB88_3: 4112; CHECK-FIX-NEXT: vld1.32 {d0[0]}, [r1:32] 4113; CHECK-FIX-NEXT: .LBB88_4: 4114; CHECK-FIX-NEXT: aesd.8 q8, q0 4115; CHECK-FIX-NEXT: aesimc.8 q8, q8 4116; CHECK-FIX-NEXT: vst1.64 {d16, d17}, [r2] 4117; CHECK-FIX-NEXT: bx lr 4118 br i1 %0, label %5, label %10 4119 41205: 4121 %6 = load float, ptr %1, align 4 4122 %7 = bitcast ptr %3 to ptr 4123 %8 = load <4 x float>, ptr %7, align 8 4124 %9 = insertelement <4 x float> %8, float %6, i64 0 4125 br label %13 4126 412710: 4128 %11 = bitcast ptr %3 to ptr 4129 %12 = load <4 x float>, ptr %11, align 8 4130 br label %13 4131 413213: 4133 %14 = phi <4 x float> [ %9, %5 ], [ %12, %10 ] 4134 br i1 %0, label %15, label %19 4135 413615: 4137 %16 = load float, ptr %1, align 4 4138 %17 = bitcast <16 x i8> %2 to <4 x float> 4139 %18 = insertelement <4 x float> %17, float %16, i64 0 4140 br label %21 4141 414219: 4143 %20 = bitcast <16 x i8> %2 to <4 x float> 4144 br label %21 4145 414621: 4147 %22 = phi <4 x float> [ %18, %15 ], [ %20, %19 ] 4148 %23 = bitcast <4 x float> %14 to <16 x i8> 4149 %24 = bitcast <4 x float> %22 to <16 x i8> 4150 %25 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %23, <16 x i8> %24) 4151 %26 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %25) 4152 store <16 x i8> %26, ptr %3, align 8 4153 ret void 4154} 4155 4156define arm_aapcs_vfpcc void @aesd_setf32_cond_via_val(i1 zeroext %0, float %1, <16 x i8> %2, ptr %3) nounwind { 4157; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_cond_via_val: 4158; CHECK-FIX-NOSCHED: @ %bb.0: 4159; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] 4160; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 4161; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s8, s0 4162; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 4163; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 4164; CHECK-FIX-NOSCHED-NEXT: vmovne.f32 s4, s0 4165; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 4166; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1 4167; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2 4168; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 4169; CHECK-FIX-NOSCHED-NEXT: bx lr 4170; 4171; CHECK-CORTEX-FIX-LABEL: aesd_setf32_cond_via_val: 4172; CHECK-CORTEX-FIX: @ %bb.0: 4173; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 4174; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] 4175; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s8, s0 4176; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 4177; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 4178; CHECK-CORTEX-FIX-NEXT: vmovne.f32 s4, s0 4179; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 4180; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1 4181; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2 4182; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 4183; CHECK-CORTEX-FIX-NEXT: bx lr 4184 %5 = bitcast ptr %3 to ptr 4185 %6 = load <4 x float>, ptr %5, align 8 4186 %7 = insertelement <4 x float> %6, float %1, i64 0 4187 %8 = select i1 %0, <4 x float> %7, <4 x float> %6 4188 %9 = bitcast <16 x i8> %2 to <4 x float> 4189 %10 = insertelement <4 x float> %9, float %1, i64 0 4190 %11 = select i1 %0, <4 x float> %10, <4 x float> %9 4191 %12 = bitcast <4 x float> %8 to <16 x i8> 4192 %13 = bitcast <4 x float> %11 to <16 x i8> 4193 %14 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %12, <16 x i8> %13) 4194 %15 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %14) 4195 store <16 x i8> %15, ptr %3, align 8 4196 ret void 4197} 4198 4199define arm_aapcs_vfpcc void @aesd_setf32_loop_via_ptr(i32 %0, ptr %1, <16 x i8> %2, ptr %3) nounwind { 4200; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_ptr: 4201; CHECK-FIX-NOSCHED: @ %bb.0: 4202; CHECK-FIX-NOSCHED-NEXT: vldr s4, [r1] 4203; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 4204; CHECK-FIX-NOSCHED-NEXT: vstr s4, [r2] 4205; CHECK-FIX-NOSCHED-NEXT: bxeq lr 4206; CHECK-FIX-NOSCHED-NEXT: .LBB90_1: 4207; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s0, s4 4208; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r2] 4209; CHECK-FIX-NOSCHED-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1 4210; CHECK-FIX-NOSCHED-NEXT: vorr q0, q0, q0 4211; CHECK-FIX-NOSCHED-NEXT: aesd.8 q8, q0 4212; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 4213; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q8 4214; CHECK-FIX-NOSCHED-NEXT: bne .LBB90_2 4215; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 4216; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r2] 4217; CHECK-FIX-NOSCHED-NEXT: bx lr 4218; 4219; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_ptr: 4220; CHECK-CORTEX-FIX: @ %bb.0: 4221; CHECK-CORTEX-FIX-NEXT: vldr s4, [r1] 4222; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 4223; CHECK-CORTEX-FIX-NEXT: vstr s4, [r2] 4224; CHECK-CORTEX-FIX-NEXT: bxeq lr 4225; CHECK-CORTEX-FIX-NEXT: .LBB90_1: 4226; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r2] 4227; CHECK-CORTEX-FIX-NEXT: vmov.f32 s0, s4 4228; CHECK-CORTEX-FIX-NEXT: .LBB90_2: @ =>This Inner Loop Header: Depth=1 4229; CHECK-CORTEX-FIX-NEXT: vorr q0, q0, q0 4230; CHECK-CORTEX-FIX-NEXT: aesd.8 q8, q0 4231; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 4232; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q8 4233; CHECK-CORTEX-FIX-NEXT: bne .LBB90_2 4234; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 4235; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r2] 4236; CHECK-CORTEX-FIX-NEXT: bx lr 4237 %5 = load float, ptr %1, align 4 4238 %6 = bitcast <16 x i8> %2 to <4 x float> 4239 %7 = insertelement <4 x float> %6, float %5, i64 0 4240 %8 = bitcast <4 x float> %7 to <16 x i8> 4241 %9 = bitcast ptr %3 to ptr 4242 store float %5, ptr %9, align 8 4243 %10 = icmp eq i32 %0, 0 4244 br i1 %10, label %14, label %11 4245 424611: 4247 %12 = load <16 x i8>, ptr %3, align 8 4248 br label %15 4249 425013: 4251 store <16 x i8> %19, ptr %3, align 8 4252 br label %14 4253 425414: 4255 ret void 4256 425715: 4258 %16 = phi <16 x i8> [ %12, %11 ], [ %19, %15 ] 4259 %17 = phi i32 [ 0, %11 ], [ %20, %15 ] 4260 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %16, <16 x i8> %8) 4261 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 4262 %20 = add nuw i32 %17, 1 4263 %21 = icmp eq i32 %20, %0 4264 br i1 %21, label %13, label %15 4265} 4266 4267define arm_aapcs_vfpcc void @aesd_setf32_loop_via_val(i32 %0, float %1, <16 x i8> %2, ptr %3) nounwind { 4268; CHECK-FIX-NOSCHED-LABEL: aesd_setf32_loop_via_val: 4269; CHECK-FIX-NOSCHED: @ %bb.0: 4270; CHECK-FIX-NOSCHED-NEXT: cmp r0, #0 4271; CHECK-FIX-NOSCHED-NEXT: bxeq lr 4272; CHECK-FIX-NOSCHED-NEXT: .LBB91_1: 4273; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s4, s0 4274; CHECK-FIX-NOSCHED-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1 4275; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d4, d5}, [r1] 4276; CHECK-FIX-NOSCHED-NEXT: subs r0, r0, #1 4277; CHECK-FIX-NOSCHED-NEXT: vmov.f32 s8, s0 4278; CHECK-FIX-NOSCHED-NEXT: vorr q2, q2, q2 4279; CHECK-FIX-NOSCHED-NEXT: vorr q1, q1, q1 4280; CHECK-FIX-NOSCHED-NEXT: aesd.8 q2, q1 4281; CHECK-FIX-NOSCHED-NEXT: aesimc.8 q8, q2 4282; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r1] 4283; CHECK-FIX-NOSCHED-NEXT: bne .LBB91_2 4284; CHECK-FIX-NOSCHED-NEXT: @ %bb.3: 4285; CHECK-FIX-NOSCHED-NEXT: bx lr 4286; 4287; CHECK-CORTEX-FIX-LABEL: aesd_setf32_loop_via_val: 4288; CHECK-CORTEX-FIX: @ %bb.0: 4289; CHECK-CORTEX-FIX-NEXT: cmp r0, #0 4290; CHECK-CORTEX-FIX-NEXT: bxeq lr 4291; CHECK-CORTEX-FIX-NEXT: .LBB91_1: 4292; CHECK-CORTEX-FIX-NEXT: vmov.f32 s4, s0 4293; CHECK-CORTEX-FIX-NEXT: .LBB91_2: @ =>This Inner Loop Header: Depth=1 4294; CHECK-CORTEX-FIX-NEXT: vld1.64 {d4, d5}, [r1] 4295; CHECK-CORTEX-FIX-NEXT: vmov.f32 s8, s0 4296; CHECK-CORTEX-FIX-NEXT: vorr q2, q2, q2 4297; CHECK-CORTEX-FIX-NEXT: subs r0, r0, #1 4298; CHECK-CORTEX-FIX-NEXT: vorr q1, q1, q1 4299; CHECK-CORTEX-FIX-NEXT: aesd.8 q2, q1 4300; CHECK-CORTEX-FIX-NEXT: aesimc.8 q8, q2 4301; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r1] 4302; CHECK-CORTEX-FIX-NEXT: bne .LBB91_2 4303; CHECK-CORTEX-FIX-NEXT: @ %bb.3: 4304; CHECK-CORTEX-FIX-NEXT: bx lr 4305 %5 = icmp eq i32 %0, 0 4306 br i1 %5, label %12, label %6 4307 43086: 4309 %7 = bitcast <16 x i8> %2 to <4 x float> 4310 %8 = insertelement <4 x float> %7, float %1, i64 0 4311 %9 = bitcast <4 x float> %8 to <16 x i8> 4312 %10 = bitcast ptr %3 to ptr 4313 %11 = bitcast ptr %3 to ptr 4314 br label %13 4315 431612: 4317 ret void 4318 431913: 4320 %14 = phi i32 [ 0, %6 ], [ %20, %13 ] 4321 %15 = load <4 x float>, ptr %10, align 8 4322 %16 = insertelement <4 x float> %15, float %1, i64 0 4323 %17 = bitcast <4 x float> %16 to <16 x i8> 4324 store float %1, ptr %11, align 8 4325 %18 = call <16 x i8> @llvm.arm.neon.aesd(<16 x i8> %17, <16 x i8> %9) 4326 %19 = call <16 x i8> @llvm.arm.neon.aesimc(<16 x i8> %18) 4327 store <16 x i8> %19, ptr %3, align 8 4328 %20 = add nuw i32 %14, 1 4329 %21 = icmp eq i32 %20, %0 4330 br i1 %21, label %12, label %13 4331} 4332 4333define arm_aapcs_vfpcc void @aese_constantisland(ptr %0) nounwind { 4334; CHECK-FIX-NOSCHED-LABEL: aese_constantisland: 4335; CHECK-FIX-NOSCHED: @ %bb.0: 4336; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d16, d17}, [r0] 4337; CHECK-FIX-NOSCHED-NEXT: adr r1, .LCPI92_0 4338; CHECK-FIX-NOSCHED-NEXT: vld1.64 {d18, d19}, [r1:128] 4339; CHECK-FIX-NOSCHED-NEXT: aese.8 q9, q8 4340; CHECK-FIX-NOSCHED-NEXT: aesmc.8 q8, q9 4341; CHECK-FIX-NOSCHED-NEXT: vst1.64 {d16, d17}, [r0] 4342; CHECK-FIX-NOSCHED-NEXT: bx lr 4343; CHECK-FIX-NOSCHED-NEXT: .p2align 4 4344; CHECK-FIX-NOSCHED-NEXT: @ %bb.1: 4345; CHECK-FIX-NOSCHED-NEXT: .LCPI92_0: 4346; CHECK-FIX-NOSCHED-NEXT: .byte 0 @ 0x0 4347; CHECK-FIX-NOSCHED-NEXT: .byte 1 @ 0x1 4348; CHECK-FIX-NOSCHED-NEXT: .byte 2 @ 0x2 4349; CHECK-FIX-NOSCHED-NEXT: .byte 3 @ 0x3 4350; CHECK-FIX-NOSCHED-NEXT: .byte 4 @ 0x4 4351; CHECK-FIX-NOSCHED-NEXT: .byte 5 @ 0x5 4352; CHECK-FIX-NOSCHED-NEXT: .byte 6 @ 0x6 4353; CHECK-FIX-NOSCHED-NEXT: .byte 7 @ 0x7 4354; CHECK-FIX-NOSCHED-NEXT: .byte 8 @ 0x8 4355; CHECK-FIX-NOSCHED-NEXT: .byte 9 @ 0x9 4356; CHECK-FIX-NOSCHED-NEXT: .byte 10 @ 0xa 4357; CHECK-FIX-NOSCHED-NEXT: .byte 11 @ 0xb 4358; CHECK-FIX-NOSCHED-NEXT: .byte 12 @ 0xc 4359; CHECK-FIX-NOSCHED-NEXT: .byte 13 @ 0xd 4360; CHECK-FIX-NOSCHED-NEXT: .byte 14 @ 0xe 4361; CHECK-FIX-NOSCHED-NEXT: .byte 15 @ 0xf 4362; 4363; CHECK-CORTEX-FIX-LABEL: aese_constantisland: 4364; CHECK-CORTEX-FIX: @ %bb.0: 4365; CHECK-CORTEX-FIX-NEXT: adr r1, .LCPI92_0 4366; CHECK-CORTEX-FIX-NEXT: vld1.64 {d16, d17}, [r0] 4367; CHECK-CORTEX-FIX-NEXT: vld1.64 {d18, d19}, [r1:128] 4368; CHECK-CORTEX-FIX-NEXT: aese.8 q9, q8 4369; CHECK-CORTEX-FIX-NEXT: aesmc.8 q8, q9 4370; CHECK-CORTEX-FIX-NEXT: vst1.64 {d16, d17}, [r0] 4371; CHECK-CORTEX-FIX-NEXT: bx lr 4372; CHECK-CORTEX-FIX-NEXT: .p2align 4 4373; CHECK-CORTEX-FIX-NEXT: @ %bb.1: 4374; CHECK-CORTEX-FIX-NEXT: .LCPI92_0: 4375; CHECK-CORTEX-FIX-NEXT: .byte 0 @ 0x0 4376; CHECK-CORTEX-FIX-NEXT: .byte 1 @ 0x1 4377; CHECK-CORTEX-FIX-NEXT: .byte 2 @ 0x2 4378; CHECK-CORTEX-FIX-NEXT: .byte 3 @ 0x3 4379; CHECK-CORTEX-FIX-NEXT: .byte 4 @ 0x4 4380; CHECK-CORTEX-FIX-NEXT: .byte 5 @ 0x5 4381; CHECK-CORTEX-FIX-NEXT: .byte 6 @ 0x6 4382; CHECK-CORTEX-FIX-NEXT: .byte 7 @ 0x7 4383; CHECK-CORTEX-FIX-NEXT: .byte 8 @ 0x8 4384; CHECK-CORTEX-FIX-NEXT: .byte 9 @ 0x9 4385; CHECK-CORTEX-FIX-NEXT: .byte 10 @ 0xa 4386; CHECK-CORTEX-FIX-NEXT: .byte 11 @ 0xb 4387; CHECK-CORTEX-FIX-NEXT: .byte 12 @ 0xc 4388; CHECK-CORTEX-FIX-NEXT: .byte 13 @ 0xd 4389; CHECK-CORTEX-FIX-NEXT: .byte 14 @ 0xe 4390; CHECK-CORTEX-FIX-NEXT: .byte 15 @ 0xf 4391 %2 = load <16 x i8>, ptr %0, align 8 4392 %3 = call <16 x i8> @llvm.arm.neon.aese(<16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %2) 4393 %4 = call <16 x i8> @llvm.arm.neon.aesmc(<16 x i8> %3) 4394 store <16 x i8> %4, ptr %0, align 8 4395 ret void 4396} 4397