1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+kl,+widekl | FileCheck %s 3 4; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/X86/keylocker-builtins.c 5 6define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { 7; CHECK-LABEL: test_loadiwkey: 8; CHECK: # %bb.0: # %entry 9; CHECK-NEXT: movl %edi, %eax 10; CHECK-NEXT: loadiwkey %xmm2, %xmm1 11; CHECK-NEXT: retq 12entry: 13 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) 14 ret void 15} 16 17define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h) { 18; CHECK-LABEL: test_encodekey128_u32: 19; CHECK: # %bb.0: # %entry 20; CHECK-NEXT: encodekey128 %edi, %eax 21; CHECK-NEXT: movups %xmm0, (%rsi) 22; CHECK-NEXT: movups %xmm1, 16(%rsi) 23; CHECK-NEXT: movups %xmm2, 32(%rsi) 24; CHECK-NEXT: movups %xmm4, 48(%rsi) 25; CHECK-NEXT: movups %xmm5, 64(%rsi) 26; CHECK-NEXT: movups %xmm6, 80(%rsi) 27; CHECK-NEXT: retq 28entry: 29 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) 30 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 31 store <2 x i64> %1, ptr %h, align 1 32 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 33 %3 = getelementptr i8, ptr %h, i64 16 34 store <2 x i64> %2, ptr %3, align 1 35 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 36 %5 = getelementptr i8, ptr %h, i64 32 37 store <2 x i64> %4, ptr %5, align 1 38 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 39 %7 = getelementptr i8, ptr %h, i64 48 40 store <2 x i64> %6, ptr %7, align 1 41 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 42 %9 = getelementptr i8, ptr %h, i64 64 43 store <2 x i64> %8, ptr %9, align 1 44 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 45 %11 = getelementptr i8, ptr %h, i64 80 46 store <2 x i64> %10, ptr %11, align 1 47 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 48 ret i32 %12 49} 50 51define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h) { 52; CHECK-LABEL: test_encodekey256_u32: 53; CHECK: # %bb.0: # %entry 54; CHECK-NEXT: encodekey256 %edi, %eax 55; CHECK-NEXT: movups %xmm0, (%rsi) 56; CHECK-NEXT: movups %xmm1, 16(%rsi) 57; CHECK-NEXT: movups %xmm2, 32(%rsi) 58; CHECK-NEXT: movups %xmm3, 48(%rsi) 59; CHECK-NEXT: movups %xmm4, 64(%rsi) 60; CHECK-NEXT: movups %xmm5, 80(%rsi) 61; CHECK-NEXT: movups %xmm6, 96(%rsi) 62; CHECK-NEXT: retq 63entry: 64 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) 65 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 66 store <2 x i64> %1, ptr %h, align 1 67 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 68 %3 = getelementptr i8, ptr %h, i64 16 69 store <2 x i64> %2, ptr %3, align 1 70 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 71 %5 = getelementptr i8, ptr %h, i64 32 72 store <2 x i64> %4, ptr %5, align 1 73 %6 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 74 %7 = getelementptr i8, ptr %h, i64 48 75 store <2 x i64> %6, ptr %7, align 1 76 %8 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 77 %9 = getelementptr i8, ptr %h, i64 64 78 store <2 x i64> %8, ptr %9, align 1 79 %10 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 80 %11 = getelementptr i8, ptr %h, i64 80 81 store <2 x i64> %10, ptr %11, align 1 82 %12 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 83 %13 = getelementptr i8, ptr %h, i64 96 84 store <2 x i64> %12, ptr %13, align 1 85 %14 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 86 ret i32 %14 87} 88 89define zeroext i8 @test_mm_aesenc256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) { 90; CHECK-LABEL: test_mm_aesenc256kl_u8: 91; CHECK: # %bb.0: # %entry 92; CHECK-NEXT: aesenc256kl (%rsi), %xmm0 93; CHECK-NEXT: sete %al 94; CHECK-NEXT: movaps %xmm0, (%rdi) 95; CHECK-NEXT: retq 96entry: 97 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %idata, ptr %h) #1 98 %1 = extractvalue { i8, <2 x i64> } %0, 1 99 store <2 x i64> %1, ptr %odata, align 16 100 %2 = extractvalue { i8, <2 x i64> } %0, 0 101 ret i8 %2 102} 103 104define zeroext i8 @test_mm_aesdec256kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) { 105; CHECK-LABEL: test_mm_aesdec256kl_u8: 106; CHECK: # %bb.0: # %entry 107; CHECK-NEXT: aesdec256kl (%rsi), %xmm0 108; CHECK-NEXT: sete %al 109; CHECK-NEXT: movaps %xmm0, (%rdi) 110; CHECK-NEXT: retq 111entry: 112 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %idata, ptr %h) #1 113 %1 = extractvalue { i8, <2 x i64> } %0, 1 114 store <2 x i64> %1, ptr %odata, align 16 115 %2 = extractvalue { i8, <2 x i64> } %0, 0 116 ret i8 %2 117} 118 119define zeroext i8 @test_mm_aesenc128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) { 120; CHECK-LABEL: test_mm_aesenc128kl_u8: 121; CHECK: # %bb.0: # %entry 122; CHECK-NEXT: aesenc128kl (%rsi), %xmm0 123; CHECK-NEXT: sete %al 124; CHECK-NEXT: movaps %xmm0, (%rdi) 125; CHECK-NEXT: retq 126entry: 127 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %idata, ptr %h) #1 128 %1 = extractvalue { i8, <2 x i64> } %0, 1 129 store <2 x i64> %1, ptr %odata, align 16 130 %2 = extractvalue { i8, <2 x i64> } %0, 0 131 ret i8 %2 132} 133 134define zeroext i8 @test_mm_aesdec128kl_u8(ptr %odata, <2 x i64> %idata, ptr %h) { 135; CHECK-LABEL: test_mm_aesdec128kl_u8: 136; CHECK: # %bb.0: # %entry 137; CHECK-NEXT: aesdec128kl (%rsi), %xmm0 138; CHECK-NEXT: sete %al 139; CHECK-NEXT: movaps %xmm0, (%rdi) 140; CHECK-NEXT: retq 141entry: 142 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %idata, ptr %h) #1 143 %1 = extractvalue { i8, <2 x i64> } %0, 1 144 store <2 x i64> %1, ptr %odata, align 16 145 %2 = extractvalue { i8, <2 x i64> } %0, 0 146 ret i8 %2 147} 148 149define zeroext i8 @test__mm_aesencwide128kl_u8(ptr %odata, ptr %idata, ptr %h) { 150; CHECK-LABEL: test__mm_aesencwide128kl_u8: 151; CHECK: # %bb.0: # %entry 152; CHECK-NEXT: movaps (%rsi), %xmm0 153; CHECK-NEXT: movaps 16(%rsi), %xmm1 154; CHECK-NEXT: movaps 32(%rsi), %xmm2 155; CHECK-NEXT: movaps 48(%rsi), %xmm3 156; CHECK-NEXT: movaps 64(%rsi), %xmm4 157; CHECK-NEXT: movaps 80(%rsi), %xmm5 158; CHECK-NEXT: movaps 96(%rsi), %xmm6 159; CHECK-NEXT: movaps 112(%rsi), %xmm7 160; CHECK-NEXT: aesencwide128kl (%rdx) 161; CHECK-NEXT: sete %al 162; CHECK-NEXT: movaps %xmm0, (%rdi) 163; CHECK-NEXT: movaps %xmm1, 16(%rdi) 164; CHECK-NEXT: movaps %xmm2, 32(%rdi) 165; CHECK-NEXT: movaps %xmm3, 48(%rdi) 166; CHECK-NEXT: movaps %xmm4, 64(%rdi) 167; CHECK-NEXT: movaps %xmm5, 80(%rdi) 168; CHECK-NEXT: movaps %xmm6, 96(%rdi) 169; CHECK-NEXT: movaps %xmm7, 112(%rdi) 170; CHECK-NEXT: retq 171entry: 172 %0 = load <2 x i64>, ptr %idata, align 16 173 %1 = getelementptr <2 x i64>, ptr %idata, i64 1 174 %2 = load <2 x i64>, ptr %1, align 16 175 %3 = getelementptr <2 x i64>, ptr %idata, i64 2 176 %4 = load <2 x i64>, ptr %3, align 16 177 %5 = getelementptr <2 x i64>, ptr %idata, i64 3 178 %6 = load <2 x i64>, ptr %5, align 16 179 %7 = getelementptr <2 x i64>, ptr %idata, i64 4 180 %8 = load <2 x i64>, ptr %7, align 16 181 %9 = getelementptr <2 x i64>, ptr %idata, i64 5 182 %10 = load <2 x i64>, ptr %9, align 16 183 %11 = getelementptr <2 x i64>, ptr %idata, i64 6 184 %12 = load <2 x i64>, ptr %11, align 16 185 %13 = getelementptr <2 x i64>, ptr %idata, i64 7 186 %14 = load <2 x i64>, ptr %13, align 16 187 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 188 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 189 store <2 x i64> %16, ptr %odata, align 16 190 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 191 %18 = getelementptr <2 x i64>, ptr %odata, i64 1 192 store <2 x i64> %17, ptr %18, align 16 193 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 194 %20 = getelementptr <2 x i64>, ptr %odata, i64 2 195 store <2 x i64> %19, ptr %20, align 16 196 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 197 %22 = getelementptr <2 x i64>, ptr %odata, i64 3 198 store <2 x i64> %21, ptr %22, align 16 199 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 200 %24 = getelementptr <2 x i64>, ptr %odata, i64 4 201 store <2 x i64> %23, ptr %24, align 16 202 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 203 %26 = getelementptr <2 x i64>, ptr %odata, i64 5 204 store <2 x i64> %25, ptr %26, align 16 205 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 206 %28 = getelementptr <2 x i64>, ptr %odata, i64 6 207 store <2 x i64> %27, ptr %28, align 16 208 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 209 %30 = getelementptr <2 x i64>, ptr %odata, i64 7 210 store <2 x i64> %29, ptr %30, align 16 211 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 212 ret i8 %31 213} 214 215define zeroext i8 @test__mm_aesdecwide128kl_u8(ptr %odata, ptr %idata, ptr %h) { 216; CHECK-LABEL: test__mm_aesdecwide128kl_u8: 217; CHECK: # %bb.0: # %entry 218; CHECK-NEXT: movaps (%rsi), %xmm0 219; CHECK-NEXT: movaps 16(%rsi), %xmm1 220; CHECK-NEXT: movaps 32(%rsi), %xmm2 221; CHECK-NEXT: movaps 48(%rsi), %xmm3 222; CHECK-NEXT: movaps 64(%rsi), %xmm4 223; CHECK-NEXT: movaps 80(%rsi), %xmm5 224; CHECK-NEXT: movaps 96(%rsi), %xmm6 225; CHECK-NEXT: movaps 112(%rsi), %xmm7 226; CHECK-NEXT: aesdecwide128kl (%rdx) 227; CHECK-NEXT: sete %al 228; CHECK-NEXT: movaps %xmm0, (%rdi) 229; CHECK-NEXT: movaps %xmm1, 16(%rdi) 230; CHECK-NEXT: movaps %xmm2, 32(%rdi) 231; CHECK-NEXT: movaps %xmm3, 48(%rdi) 232; CHECK-NEXT: movaps %xmm4, 64(%rdi) 233; CHECK-NEXT: movaps %xmm5, 80(%rdi) 234; CHECK-NEXT: movaps %xmm6, 96(%rdi) 235; CHECK-NEXT: movaps %xmm7, 112(%rdi) 236; CHECK-NEXT: retq 237entry: 238 %0 = load <2 x i64>, ptr %idata, align 16 239 %1 = getelementptr <2 x i64>, ptr %idata, i64 1 240 %2 = load <2 x i64>, ptr %1, align 16 241 %3 = getelementptr <2 x i64>, ptr %idata, i64 2 242 %4 = load <2 x i64>, ptr %3, align 16 243 %5 = getelementptr <2 x i64>, ptr %idata, i64 3 244 %6 = load <2 x i64>, ptr %5, align 16 245 %7 = getelementptr <2 x i64>, ptr %idata, i64 4 246 %8 = load <2 x i64>, ptr %7, align 16 247 %9 = getelementptr <2 x i64>, ptr %idata, i64 5 248 %10 = load <2 x i64>, ptr %9, align 16 249 %11 = getelementptr <2 x i64>, ptr %idata, i64 6 250 %12 = load <2 x i64>, ptr %11, align 16 251 %13 = getelementptr <2 x i64>, ptr %idata, i64 7 252 %14 = load <2 x i64>, ptr %13, align 16 253 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 254 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 255 store <2 x i64> %16, ptr %odata, align 16 256 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 257 %18 = getelementptr <2 x i64>, ptr %odata, i64 1 258 store <2 x i64> %17, ptr %18, align 16 259 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 260 %20 = getelementptr <2 x i64>, ptr %odata, i64 2 261 store <2 x i64> %19, ptr %20, align 16 262 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 263 %22 = getelementptr <2 x i64>, ptr %odata, i64 3 264 store <2 x i64> %21, ptr %22, align 16 265 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 266 %24 = getelementptr <2 x i64>, ptr %odata, i64 4 267 store <2 x i64> %23, ptr %24, align 16 268 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 269 %26 = getelementptr <2 x i64>, ptr %odata, i64 5 270 store <2 x i64> %25, ptr %26, align 16 271 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 272 %28 = getelementptr <2 x i64>, ptr %odata, i64 6 273 store <2 x i64> %27, ptr %28, align 16 274 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 275 %30 = getelementptr <2 x i64>, ptr %odata, i64 7 276 store <2 x i64> %29, ptr %30, align 16 277 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 278 ret i8 %31 279} 280 281define zeroext i8 @test__mm_aesencwide256kl_u8(ptr %odata, ptr %idata, ptr %h) { 282; CHECK-LABEL: test__mm_aesencwide256kl_u8: 283; CHECK: # %bb.0: # %entry 284; CHECK-NEXT: movaps (%rsi), %xmm0 285; CHECK-NEXT: movaps 16(%rsi), %xmm1 286; CHECK-NEXT: movaps 32(%rsi), %xmm2 287; CHECK-NEXT: movaps 48(%rsi), %xmm3 288; CHECK-NEXT: movaps 64(%rsi), %xmm4 289; CHECK-NEXT: movaps 80(%rsi), %xmm5 290; CHECK-NEXT: movaps 96(%rsi), %xmm6 291; CHECK-NEXT: movaps 112(%rsi), %xmm7 292; CHECK-NEXT: aesencwide256kl (%rdx) 293; CHECK-NEXT: sete %al 294; CHECK-NEXT: movaps %xmm0, (%rdi) 295; CHECK-NEXT: movaps %xmm1, 16(%rdi) 296; CHECK-NEXT: movaps %xmm2, 32(%rdi) 297; CHECK-NEXT: movaps %xmm3, 48(%rdi) 298; CHECK-NEXT: movaps %xmm4, 64(%rdi) 299; CHECK-NEXT: movaps %xmm5, 80(%rdi) 300; CHECK-NEXT: movaps %xmm6, 96(%rdi) 301; CHECK-NEXT: movaps %xmm7, 112(%rdi) 302; CHECK-NEXT: retq 303entry: 304 %0 = load <2 x i64>, ptr %idata, align 16 305 %1 = getelementptr <2 x i64>, ptr %idata, i64 1 306 %2 = load <2 x i64>, ptr %1, align 16 307 %3 = getelementptr <2 x i64>, ptr %idata, i64 2 308 %4 = load <2 x i64>, ptr %3, align 16 309 %5 = getelementptr <2 x i64>, ptr %idata, i64 3 310 %6 = load <2 x i64>, ptr %5, align 16 311 %7 = getelementptr <2 x i64>, ptr %idata, i64 4 312 %8 = load <2 x i64>, ptr %7, align 16 313 %9 = getelementptr <2 x i64>, ptr %idata, i64 5 314 %10 = load <2 x i64>, ptr %9, align 16 315 %11 = getelementptr <2 x i64>, ptr %idata, i64 6 316 %12 = load <2 x i64>, ptr %11, align 16 317 %13 = getelementptr <2 x i64>, ptr %idata, i64 7 318 %14 = load <2 x i64>, ptr %13, align 16 319 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 320 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 321 store <2 x i64> %16, ptr %odata, align 16 322 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 323 %18 = getelementptr <2 x i64>, ptr %odata, i64 1 324 store <2 x i64> %17, ptr %18, align 16 325 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 326 %20 = getelementptr <2 x i64>, ptr %odata, i64 2 327 store <2 x i64> %19, ptr %20, align 16 328 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 329 %22 = getelementptr <2 x i64>, ptr %odata, i64 3 330 store <2 x i64> %21, ptr %22, align 16 331 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 332 %24 = getelementptr <2 x i64>, ptr %odata, i64 4 333 store <2 x i64> %23, ptr %24, align 16 334 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 335 %26 = getelementptr <2 x i64>, ptr %odata, i64 5 336 store <2 x i64> %25, ptr %26, align 16 337 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 338 %28 = getelementptr <2 x i64>, ptr %odata, i64 6 339 store <2 x i64> %27, ptr %28, align 16 340 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 341 %30 = getelementptr <2 x i64>, ptr %odata, i64 7 342 store <2 x i64> %29, ptr %30, align 16 343 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 344 ret i8 %31 345} 346 347define zeroext i8 @test__mm_aesdecwide256kl_u8(ptr %odata, ptr %idata, ptr %h) { 348; CHECK-LABEL: test__mm_aesdecwide256kl_u8: 349; CHECK: # %bb.0: # %entry 350; CHECK-NEXT: movaps (%rsi), %xmm0 351; CHECK-NEXT: movaps 16(%rsi), %xmm1 352; CHECK-NEXT: movaps 32(%rsi), %xmm2 353; CHECK-NEXT: movaps 48(%rsi), %xmm3 354; CHECK-NEXT: movaps 64(%rsi), %xmm4 355; CHECK-NEXT: movaps 80(%rsi), %xmm5 356; CHECK-NEXT: movaps 96(%rsi), %xmm6 357; CHECK-NEXT: movaps 112(%rsi), %xmm7 358; CHECK-NEXT: aesdecwide256kl (%rdx) 359; CHECK-NEXT: sete %al 360; CHECK-NEXT: movaps %xmm0, (%rdi) 361; CHECK-NEXT: movaps %xmm1, 16(%rdi) 362; CHECK-NEXT: movaps %xmm2, 32(%rdi) 363; CHECK-NEXT: movaps %xmm3, 48(%rdi) 364; CHECK-NEXT: movaps %xmm4, 64(%rdi) 365; CHECK-NEXT: movaps %xmm5, 80(%rdi) 366; CHECK-NEXT: movaps %xmm6, 96(%rdi) 367; CHECK-NEXT: movaps %xmm7, 112(%rdi) 368; CHECK-NEXT: retq 369entry: 370 %0 = load <2 x i64>, ptr %idata, align 16 371 %1 = getelementptr <2 x i64>, ptr %idata, i64 1 372 %2 = load <2 x i64>, ptr %1, align 16 373 %3 = getelementptr <2 x i64>, ptr %idata, i64 2 374 %4 = load <2 x i64>, ptr %3, align 16 375 %5 = getelementptr <2 x i64>, ptr %idata, i64 3 376 %6 = load <2 x i64>, ptr %5, align 16 377 %7 = getelementptr <2 x i64>, ptr %idata, i64 4 378 %8 = load <2 x i64>, ptr %7, align 16 379 %9 = getelementptr <2 x i64>, ptr %idata, i64 5 380 %10 = load <2 x i64>, ptr %9, align 16 381 %11 = getelementptr <2 x i64>, ptr %idata, i64 6 382 %12 = load <2 x i64>, ptr %11, align 16 383 %13 = getelementptr <2 x i64>, ptr %idata, i64 7 384 %14 = load <2 x i64>, ptr %13, align 16 385 %15 = tail call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %h, <2 x i64> %0, <2 x i64> %2, <2 x i64> %4, <2 x i64> %6, <2 x i64> %8, <2 x i64> %10, <2 x i64> %12, <2 x i64> %14) #1 386 %16 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 1 387 store <2 x i64> %16, ptr %odata, align 16 388 %17 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 2 389 %18 = getelementptr <2 x i64>, ptr %odata, i64 1 390 store <2 x i64> %17, ptr %18, align 16 391 %19 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 3 392 %20 = getelementptr <2 x i64>, ptr %odata, i64 2 393 store <2 x i64> %19, ptr %20, align 16 394 %21 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 4 395 %22 = getelementptr <2 x i64>, ptr %odata, i64 3 396 store <2 x i64> %21, ptr %22, align 16 397 %23 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 5 398 %24 = getelementptr <2 x i64>, ptr %odata, i64 4 399 store <2 x i64> %23, ptr %24, align 16 400 %25 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 6 401 %26 = getelementptr <2 x i64>, ptr %odata, i64 5 402 store <2 x i64> %25, ptr %26, align 16 403 %27 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 7 404 %28 = getelementptr <2 x i64>, ptr %odata, i64 6 405 store <2 x i64> %27, ptr %28, align 16 406 %29 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 8 407 %30 = getelementptr <2 x i64>, ptr %odata, i64 7 408 store <2 x i64> %29, ptr %30, align 16 409 %31 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %15, 0 410 ret i8 %31 411} 412 413declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) 414declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) 415declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) 416declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr) 417declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr) 418declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr) 419declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr) 420declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 421declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 422declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 423declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 424