1; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s 2; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s 3; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s 4; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s 5; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s 6; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s 7; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s 8; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s 9; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s 10; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s 11; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78c| FileCheck %s 12; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s 13; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-e1 | FileCheck %s 14; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-n1 | FileCheck %s 15; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-v1 | FileCheck %s 16; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=neoverse-512tvb | FileCheck %s 17; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s 18; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s 19; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s 20; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1 | FileCheck %s 21; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1a | FileCheck %s 22; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=ampere1b | FileCheck %s 23 24declare <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d, <16 x i8> %k) 25declare <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %d) 26declare <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d, <16 x i8> %k) 27declare <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %d) 28 29define void @aesea(ptr %a0, ptr %b0, ptr %c0, <16 x i8> %d, <16 x i8> %e) { 30 %d0 = load <16 x i8>, ptr %a0 31 %a1 = getelementptr inbounds <16 x i8>, ptr %a0, i64 1 32 %d1 = load <16 x i8>, ptr %a1 33 %a2 = getelementptr inbounds <16 x i8>, ptr %a0, i64 2 34 %d2 = load <16 x i8>, ptr %a2 35 %a3 = getelementptr inbounds <16 x i8>, ptr %a0, i64 3 36 %d3 = load <16 x i8>, ptr %a3 37 %k0 = load <16 x i8>, ptr %b0 38 %e00 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d0, <16 x i8> %k0) 39 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00) 40 %e01 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d1, <16 x i8> %k0) 41 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01) 42 %e02 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d2, <16 x i8> %k0) 43 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02) 44 %e03 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %d3, <16 x i8> %k0) 45 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03) 46 %b1 = getelementptr inbounds <16 x i8>, ptr %b0, i64 1 47 %k1 = load <16 x i8>, ptr %b1 48 %e10 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f00, <16 x i8> %k1) 49 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e00) 50 %e11 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f01, <16 x i8> %k1) 51 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e01) 52 %e12 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f02, <16 x i8> %k1) 53 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e02) 54 %e13 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f03, <16 x i8> %k1) 55 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e03) 56 %b2 = getelementptr inbounds <16 x i8>, ptr %b0, i64 2 57 %k2 = load <16 x i8>, ptr %b2 58 %e20 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f10, <16 x i8> %k2) 59 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e10) 60 %e21 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f11, <16 x i8> %k2) 61 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e11) 62 %e22 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f12, <16 x i8> %k2) 63 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e12) 64 %e23 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f13, <16 x i8> %k2) 65 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e13) 66 %b3 = getelementptr inbounds <16 x i8>, ptr %b0, i64 3 67 %k3 = load <16 x i8>, ptr %b3 68 %e30 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f20, <16 x i8> %k3) 69 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e20) 70 %e31 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f21, <16 x i8> %k3) 71 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e21) 72 %e32 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f22, <16 x i8> %k3) 73 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e22) 74 %e33 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f23, <16 x i8> %k3) 75 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %e23) 76 %g0 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f30, <16 x i8> %d) 77 %h0 = xor <16 x i8> %g0, %e 78 %g1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f31, <16 x i8> %d) 79 %h1 = xor <16 x i8> %g1, %e 80 %g2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f32, <16 x i8> %d) 81 %h2 = xor <16 x i8> %g2, %e 82 %g3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %f33, <16 x i8> %d) 83 %h3 = xor <16 x i8> %g3, %e 84 store <16 x i8> %h0, ptr %c0 85 %c1 = getelementptr inbounds <16 x i8>, ptr %c0, i64 1 86 store <16 x i8> %h1, ptr %c1 87 %c2 = getelementptr inbounds <16 x i8>, ptr %c0, i64 2 88 store <16 x i8> %h2, ptr %c2 89 %c3 = getelementptr inbounds <16 x i8>, ptr %c0, i64 3 90 store <16 x i8> %h3, ptr %c3 91 ret void 92 93; CHECK-LABEL: aesea: 94; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} 95; CHECK: aesmc [[VA]], [[VA]] 96; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} 97; CHECK-NEXT: aesmc [[VB]], [[VB]] 98; CHECK: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} 99; CHECK-NEXT: aesmc [[VC]], [[VC]] 100; CHECK: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} 101; CHECK-NEXT: aesmc [[VD]], [[VD]] 102; CHECK: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} 103; CHECK-NEXT: aesmc [[VE]], [[VE]] 104; CHECK: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} 105; CHECK-NEXT: aesmc [[VF]], [[VF]] 106; CHECK: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} 107; CHECK-NEXT: aesmc [[VG]], [[VG]] 108; CHECK: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} 109; CHECK-NEXT: aesmc [[VH]], [[VH]] 110; CHECK-NOT: aesmc 111} 112 113define void @aesda(ptr %a0, ptr %b0, ptr %c0, <16 x i8> %d, <16 x i8> %e) { 114 %d0 = load <16 x i8>, ptr %a0 115 %a1 = getelementptr inbounds <16 x i8>, ptr %a0, i64 1 116 %d1 = load <16 x i8>, ptr %a1 117 %a2 = getelementptr inbounds <16 x i8>, ptr %a0, i64 2 118 %d2 = load <16 x i8>, ptr %a2 119 %a3 = getelementptr inbounds <16 x i8>, ptr %a0, i64 3 120 %d3 = load <16 x i8>, ptr %a3 121 %k0 = load <16 x i8>, ptr %b0 122 %e00 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d0, <16 x i8> %k0) 123 %f00 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00) 124 %e01 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d1, <16 x i8> %k0) 125 %f01 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01) 126 %e02 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d2, <16 x i8> %k0) 127 %f02 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02) 128 %e03 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %d3, <16 x i8> %k0) 129 %f03 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03) 130 %b1 = getelementptr inbounds <16 x i8>, ptr %b0, i64 1 131 %k1 = load <16 x i8>, ptr %b1 132 %e10 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f00, <16 x i8> %k1) 133 %f10 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e00) 134 %e11 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f01, <16 x i8> %k1) 135 %f11 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e01) 136 %e12 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f02, <16 x i8> %k1) 137 %f12 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e02) 138 %e13 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f03, <16 x i8> %k1) 139 %f13 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e03) 140 %b2 = getelementptr inbounds <16 x i8>, ptr %b0, i64 2 141 %k2 = load <16 x i8>, ptr %b2 142 %e20 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f10, <16 x i8> %k2) 143 %f20 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e10) 144 %e21 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f11, <16 x i8> %k2) 145 %f21 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e11) 146 %e22 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f12, <16 x i8> %k2) 147 %f22 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e12) 148 %e23 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f13, <16 x i8> %k2) 149 %f23 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e13) 150 %b3 = getelementptr inbounds <16 x i8>, ptr %b0, i64 3 151 %k3 = load <16 x i8>, ptr %b3 152 %e30 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f20, <16 x i8> %k3) 153 %f30 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e20) 154 %e31 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f21, <16 x i8> %k3) 155 %f31 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e21) 156 %e32 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f22, <16 x i8> %k3) 157 %f32 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e22) 158 %e33 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f23, <16 x i8> %k3) 159 %f33 = call <16 x i8> @llvm.aarch64.crypto.aesimc(<16 x i8> %e23) 160 %g0 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f30, <16 x i8> %d) 161 %h0 = xor <16 x i8> %g0, %e 162 %g1 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f31, <16 x i8> %d) 163 %h1 = xor <16 x i8> %g1, %e 164 %g2 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f32, <16 x i8> %d) 165 %h2 = xor <16 x i8> %g2, %e 166 %g3 = call <16 x i8> @llvm.aarch64.crypto.aesd(<16 x i8> %f33, <16 x i8> %d) 167 %h3 = xor <16 x i8> %g3, %e 168 store <16 x i8> %h0, ptr %c0 169 %c1 = getelementptr inbounds <16 x i8>, ptr %c0, i64 1 170 store <16 x i8> %h1, ptr %c1 171 %c2 = getelementptr inbounds <16 x i8>, ptr %c0, i64 2 172 store <16 x i8> %h2, ptr %c2 173 %c3 = getelementptr inbounds <16 x i8>, ptr %c0, i64 3 174 store <16 x i8> %h3, ptr %c3 175 ret void 176 177; CHECK-LABEL: aesda: 178; CHECK: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} 179; CHECK: aesimc [[VA]], [[VA]] 180; CHECK: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} 181; CHECK-NEXT: aesimc [[VB]], [[VB]] 182; CHECK: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} 183; CHECK-NEXT: aesimc [[VC]], [[VC]] 184; CHECK: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} 185; CHECK-NEXT: aesimc [[VD]], [[VD]] 186; CHECK: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} 187; CHECK-NEXT: aesimc [[VE]], [[VE]] 188; CHECK: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} 189; CHECK-NEXT: aesimc [[VF]], [[VF]] 190; CHECK: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} 191; CHECK-NEXT: aesimc [[VG]], [[VG]] 192; CHECK: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} 193; CHECK-NEXT: aesimc [[VH]], [[VH]] 194; CHECK-NOT: aesimc 195} 196 197define void @aes_load_store(ptr %p1, ptr %p2 , ptr %p3) { 198entry: 199 %x1 = alloca <16 x i8>, align 16 200 %x2 = alloca <16 x i8>, align 16 201 %x3 = alloca <16 x i8>, align 16 202 %x4 = alloca <16 x i8>, align 16 203 %x5 = alloca <16 x i8>, align 16 204 %in1 = load <16 x i8>, ptr %p1, align 16 205 store <16 x i8> %in1, ptr %x1, align 16 206 %aese1 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %in1, <16 x i8> %in1) #2 207 %in2 = load <16 x i8>, ptr %p2, align 16 208 %aesmc1= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese1) #2 209 %aese2 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc1, <16 x i8> %in2) #2 210 store <16 x i8> %aesmc1, ptr %x3, align 16 211 %in3 = load <16 x i8>, ptr %p3, align 16 212 %aesmc2= call <16 x i8> @llvm.aarch64.crypto.aesmc(<16 x i8> %aese2) #2 213 %aese3 = call <16 x i8> @llvm.aarch64.crypto.aese(<16 x i8> %aesmc2, <16 x i8> %in3) #2 214 store <16 x i8> %aese3, ptr %x5, align 16 215 ret void 216 217; CHECK-LABEL: aes_load_store: 218; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} 219; aese and aesmc are described to share a unit, hence won't be scheduled on the 220; same cycle and the scheduler can find another instruction to place inbetween 221; CHECK: aesmc [[VA]], [[VA]] 222; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} 223; CHECK-NEXT: aesmc [[VB]], [[VB]] 224; CHECK-NOT: aesmc 225} 226