1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+kl,widekl | FileCheck %s --check-prefix=X64 3; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+kl,widekl -mattr=+avx2 | FileCheck %s --check-prefix=X86 4; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unkown-unknown -mattr=+widekl | FileCheck %s --check-prefix=X64 5; RUN: llc < %s -verify-machineinstrs -mtriple=i386-unkown-unknown -mattr=+widekl -mattr=+avx2 | FileCheck %s --check-prefix=X86 6 7declare void @llvm.x86.loadiwkey(<2 x i64>, <2 x i64>, <2 x i64>, i32) 8declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>) 9declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32, <2 x i64>, <2 x i64>) 10declare { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64>, ptr) 11declare { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64>, ptr) 12declare { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64>, ptr) 13declare { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64>, ptr) 14declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 15declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 16declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 17declare { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>) 18 19define void @test_loadiwkey(i32 %ctl, <2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi) { 20; X64-LABEL: test_loadiwkey: 21; X64: # %bb.0: # %entry 22; X64-NEXT: movl %edi, %eax 23; X64-NEXT: loadiwkey %xmm2, %xmm1 24; X64-NEXT: retq 25; 26; X86-LABEL: test_loadiwkey: 27; X86: # %bb.0: # %entry 28; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 29; X86-NEXT: loadiwkey %xmm2, %xmm1 30; X86-NEXT: retl 31entry: 32 tail call void @llvm.x86.loadiwkey(<2 x i64> %intkey, <2 x i64> %enkey_lo, <2 x i64> %enkey_hi, i32 %ctl) 33 ret void 34} 35 36define i32 @test_encodekey128_u32(i32 %htype, <2 x i64> %key, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5) nounwind { 37; X64-LABEL: test_encodekey128_u32: 38; X64: # %bb.0: # %entry 39; X64-NEXT: encodekey128 %edi, %eax 40; X64-NEXT: movaps %xmm0, (%rsi) 41; X64-NEXT: movaps %xmm1, (%rdx) 42; X64-NEXT: movaps %xmm2, (%rcx) 43; X64-NEXT: retq 44; 45; X86-LABEL: test_encodekey128_u32: 46; X86: # %bb.0: # %entry 47; X86-NEXT: pushl %esi 48; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 49; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 50; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 51; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 52; X86-NEXT: encodekey128 %eax, %eax 53; X86-NEXT: vmovaps %xmm0, (%esi) 54; X86-NEXT: vmovaps %xmm1, (%edx) 55; X86-NEXT: vmovaps %xmm2, (%ecx) 56; X86-NEXT: popl %esi 57; X86-NEXT: retl 58entry: 59 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32 %htype, <2 x i64> %key) 60 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 61 store <2 x i64> %1, ptr %h0, align 16 62 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 63 store <2 x i64> %2, ptr %h1, align 16 64 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 65 store <2 x i64> %3, ptr %h2, align 16 66 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 67 ret i32 %4 68} 69 70define i32 @test_encodekey256_u32(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi, ptr nocapture %h0, ptr nocapture %h1, ptr nocapture %h2, ptr nocapture %h3, ptr nocapture %h4, ptr nocapture %h5, ptr nocapture readnone %h6) nounwind { 71; X64-LABEL: test_encodekey256_u32: 72; X64: # %bb.0: # %entry 73; X64-NEXT: encodekey256 %edi, %eax 74; X64-NEXT: movaps %xmm0, (%rsi) 75; X64-NEXT: movaps %xmm1, (%rdx) 76; X64-NEXT: movaps %xmm2, (%rcx) 77; X64-NEXT: movaps %xmm3, (%r8) 78; X64-NEXT: retq 79; 80; X86-LABEL: test_encodekey256_u32: 81; X86: # %bb.0: # %entry 82; X86-NEXT: pushl %edi 83; X86-NEXT: pushl %esi 84; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 85; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 86; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 87; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 88; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 89; X86-NEXT: encodekey256 %eax, %eax 90; X86-NEXT: vmovaps %xmm0, (%edi) 91; X86-NEXT: vmovaps %xmm1, (%esi) 92; X86-NEXT: vmovaps %xmm2, (%edx) 93; X86-NEXT: vmovaps %xmm3, (%ecx) 94; X86-NEXT: popl %esi 95; X86-NEXT: popl %edi 96; X86-NEXT: retl 97entry: 98 %0 = tail call { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey256(i32 %htype, <2 x i64> %key_lo, <2 x i64> %key_hi) 99 %1 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 100 store <2 x i64> %1, ptr %h0, align 16 101 %2 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 102 store <2 x i64> %2, ptr %h1, align 16 103 %3 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 104 store <2 x i64> %3, ptr %h2, align 16 105 %4 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 106 store <2 x i64> %4, ptr %h3, align 16 107 %5 = extractvalue { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 108 ret i32 %5 109} 110 111define i8 @test_mm_aesenc128kl_u8(<2 x i64> %data, ptr %h, ptr %out) { 112; X64-LABEL: test_mm_aesenc128kl_u8: 113; X64: # %bb.0: # %entry 114; X64-NEXT: aesenc128kl (%rdi), %xmm0 115; X64-NEXT: sete %al 116; X64-NEXT: movaps %xmm0, (%rsi) 117; X64-NEXT: retq 118; 119; X86-LABEL: test_mm_aesenc128kl_u8: 120; X86: # %bb.0: # %entry 121; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 122; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 123; X86-NEXT: aesenc128kl (%eax), %xmm0 124; X86-NEXT: sete %al 125; X86-NEXT: vmovaps %xmm0, (%ecx) 126; X86-NEXT: retl 127entry: 128 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc128kl(<2 x i64> %data, ptr %h) 129 %1 = extractvalue { i8, <2 x i64> } %0, 1 130 store <2 x i64> %1, ptr %out 131 %2 = extractvalue { i8, <2 x i64> } %0, 0 132 ret i8 %2 133} 134 135define i8 @test_mm_aesdec128kl_u8(<2 x i64> %data, ptr %h, ptr %out) { 136; X64-LABEL: test_mm_aesdec128kl_u8: 137; X64: # %bb.0: # %entry 138; X64-NEXT: aesdec128kl (%rdi), %xmm0 139; X64-NEXT: sete %al 140; X64-NEXT: movaps %xmm0, (%rsi) 141; X64-NEXT: retq 142; 143; X86-LABEL: test_mm_aesdec128kl_u8: 144; X86: # %bb.0: # %entry 145; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 146; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 147; X86-NEXT: aesdec128kl (%eax), %xmm0 148; X86-NEXT: sete %al 149; X86-NEXT: vmovaps %xmm0, (%ecx) 150; X86-NEXT: retl 151entry: 152 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec128kl(<2 x i64> %data, ptr %h) 153 %1 = extractvalue { i8, <2 x i64> } %0, 1 154 store <2 x i64> %1, ptr %out 155 %2 = extractvalue { i8, <2 x i64> } %0, 0 156 ret i8 %2 157} 158 159define i8 @test_mm_aesenc256kl_u8(<2 x i64> %data, ptr %h, ptr %out) { 160; X64-LABEL: test_mm_aesenc256kl_u8: 161; X64: # %bb.0: # %entry 162; X64-NEXT: aesenc256kl (%rdi), %xmm0 163; X64-NEXT: sete %al 164; X64-NEXT: movaps %xmm0, (%rsi) 165; X64-NEXT: retq 166; 167; X86-LABEL: test_mm_aesenc256kl_u8: 168; X86: # %bb.0: # %entry 169; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 170; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 171; X86-NEXT: aesenc256kl (%eax), %xmm0 172; X86-NEXT: sete %al 173; X86-NEXT: vmovaps %xmm0, (%ecx) 174; X86-NEXT: retl 175entry: 176 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr %h) 177 %1 = extractvalue { i8, <2 x i64> } %0, 1 178 store <2 x i64> %1, ptr %out 179 %2 = extractvalue { i8, <2 x i64> } %0, 0 180 ret i8 %2 181} 182 183define i8 @test_mm_aesdec256kl_u8(<2 x i64> %data, ptr %h, ptr %out) { 184; X64-LABEL: test_mm_aesdec256kl_u8: 185; X64: # %bb.0: # %entry 186; X64-NEXT: aesdec256kl (%rdi), %xmm0 187; X64-NEXT: sete %al 188; X64-NEXT: movaps %xmm0, (%rsi) 189; X64-NEXT: retq 190; 191; X86-LABEL: test_mm_aesdec256kl_u8: 192; X86: # %bb.0: # %entry 193; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 194; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 195; X86-NEXT: aesdec256kl (%eax), %xmm0 196; X86-NEXT: sete %al 197; X86-NEXT: vmovaps %xmm0, (%ecx) 198; X86-NEXT: retl 199entry: 200 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesdec256kl(<2 x i64> %data, ptr %h) 201 %1 = extractvalue { i8, <2 x i64> } %0, 1 202 store <2 x i64> %1, ptr %out 203 %2 = extractvalue { i8, <2 x i64> } %0, 0 204 ret i8 %2 205} 206 207define i8 @test_mm_aesencwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind { 208; X64-LABEL: test_mm_aesencwide128kl_u8: 209; X64: # %bb.0: # %entry 210; X64-NEXT: pushq %rbx 211; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 212; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 213; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 214; X64-NEXT: aesencwide128kl (%rdi) 215; X64-NEXT: sete %al 216; X64-NEXT: movaps %xmm0, (%rsi) 217; X64-NEXT: movaps %xmm1, (%rdx) 218; X64-NEXT: movaps %xmm1, (%rcx) 219; X64-NEXT: movaps %xmm1, (%r8) 220; X64-NEXT: movaps %xmm1, (%r9) 221; X64-NEXT: movaps %xmm1, (%rbx) 222; X64-NEXT: movaps %xmm1, (%r11) 223; X64-NEXT: movaps %xmm1, (%r10) 224; X64-NEXT: popq %rbx 225; X64-NEXT: retq 226; 227; X86-LABEL: test_mm_aesencwide128kl_u8: 228; X86: # %bb.0: # %entry 229; X86-NEXT: pushl %ebp 230; X86-NEXT: movl %esp, %ebp 231; X86-NEXT: andl $-16, %esp 232; X86-NEXT: subl $16, %esp 233; X86-NEXT: vmovaps 24(%ebp), %xmm3 234; X86-NEXT: vmovaps 40(%ebp), %xmm4 235; X86-NEXT: vmovaps 56(%ebp), %xmm5 236; X86-NEXT: vmovaps 72(%ebp), %xmm6 237; X86-NEXT: vmovaps 88(%ebp), %xmm7 238; X86-NEXT: movl 8(%ebp), %eax 239; X86-NEXT: aesencwide128kl (%eax) 240; X86-NEXT: movl 104(%ebp), %eax 241; X86-NEXT: vmovaps %xmm0, (%eax) 242; X86-NEXT: movl 108(%ebp), %eax 243; X86-NEXT: vmovaps %xmm1, (%eax) 244; X86-NEXT: movl 112(%ebp), %eax 245; X86-NEXT: vmovaps %xmm1, (%eax) 246; X86-NEXT: movl 116(%ebp), %eax 247; X86-NEXT: vmovaps %xmm1, (%eax) 248; X86-NEXT: movl 120(%ebp), %eax 249; X86-NEXT: vmovaps %xmm1, (%eax) 250; X86-NEXT: movl 124(%ebp), %eax 251; X86-NEXT: vmovaps %xmm1, (%eax) 252; X86-NEXT: movl 128(%ebp), %eax 253; X86-NEXT: vmovaps %xmm1, (%eax) 254; X86-NEXT: movl 132(%ebp), %eax 255; X86-NEXT: vmovaps %xmm1, (%eax) 256; X86-NEXT: sete %al 257; X86-NEXT: movl %ebp, %esp 258; X86-NEXT: popl %ebp 259; X86-NEXT: retl 260entry: 261 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) 262 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 263 store <2 x i64> %1, ptr %out0 264 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 265 store <2 x i64> %2, ptr %out1 266 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 267 store <2 x i64> %2, ptr %out2 268 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 269 store <2 x i64> %2, ptr %out3 270 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 271 store <2 x i64> %2, ptr %out4 272 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 273 store <2 x i64> %2, ptr %out5 274 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 275 store <2 x i64> %2, ptr %out6 276 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 277 store <2 x i64> %2, ptr %out7 278 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 279 ret i8 %9 280} 281 282define i8 @test_mm_aesdecwide128kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind { 283; X64-LABEL: test_mm_aesdecwide128kl_u8: 284; X64: # %bb.0: # %entry 285; X64-NEXT: pushq %rbx 286; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 287; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 288; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 289; X64-NEXT: aesdecwide128kl (%rdi) 290; X64-NEXT: sete %al 291; X64-NEXT: movaps %xmm0, (%rsi) 292; X64-NEXT: movaps %xmm1, (%rdx) 293; X64-NEXT: movaps %xmm1, (%rcx) 294; X64-NEXT: movaps %xmm1, (%r8) 295; X64-NEXT: movaps %xmm1, (%r9) 296; X64-NEXT: movaps %xmm1, (%rbx) 297; X64-NEXT: movaps %xmm1, (%r11) 298; X64-NEXT: movaps %xmm1, (%r10) 299; X64-NEXT: popq %rbx 300; X64-NEXT: retq 301; 302; X86-LABEL: test_mm_aesdecwide128kl_u8: 303; X86: # %bb.0: # %entry 304; X86-NEXT: pushl %ebp 305; X86-NEXT: movl %esp, %ebp 306; X86-NEXT: andl $-16, %esp 307; X86-NEXT: subl $16, %esp 308; X86-NEXT: vmovaps 24(%ebp), %xmm3 309; X86-NEXT: vmovaps 40(%ebp), %xmm4 310; X86-NEXT: vmovaps 56(%ebp), %xmm5 311; X86-NEXT: vmovaps 72(%ebp), %xmm6 312; X86-NEXT: vmovaps 88(%ebp), %xmm7 313; X86-NEXT: movl 8(%ebp), %eax 314; X86-NEXT: aesdecwide128kl (%eax) 315; X86-NEXT: movl 104(%ebp), %eax 316; X86-NEXT: vmovaps %xmm0, (%eax) 317; X86-NEXT: movl 108(%ebp), %eax 318; X86-NEXT: vmovaps %xmm1, (%eax) 319; X86-NEXT: movl 112(%ebp), %eax 320; X86-NEXT: vmovaps %xmm1, (%eax) 321; X86-NEXT: movl 116(%ebp), %eax 322; X86-NEXT: vmovaps %xmm1, (%eax) 323; X86-NEXT: movl 120(%ebp), %eax 324; X86-NEXT: vmovaps %xmm1, (%eax) 325; X86-NEXT: movl 124(%ebp), %eax 326; X86-NEXT: vmovaps %xmm1, (%eax) 327; X86-NEXT: movl 128(%ebp), %eax 328; X86-NEXT: vmovaps %xmm1, (%eax) 329; X86-NEXT: movl 132(%ebp), %eax 330; X86-NEXT: vmovaps %xmm1, (%eax) 331; X86-NEXT: sete %al 332; X86-NEXT: movl %ebp, %esp 333; X86-NEXT: popl %ebp 334; X86-NEXT: retl 335entry: 336 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide128kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) 337 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 338 store <2 x i64> %1, ptr %out0 339 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 340 store <2 x i64> %2, ptr %out1 341 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 342 store <2 x i64> %2, ptr %out2 343 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 344 store <2 x i64> %2, ptr %out3 345 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 346 store <2 x i64> %2, ptr %out4 347 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 348 store <2 x i64> %2, ptr %out5 349 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 350 store <2 x i64> %2, ptr %out6 351 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 352 store <2 x i64> %2, ptr %out7 353 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 354 ret i8 %9 355} 356 357define i8 @test_mm_aesencwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind { 358; X64-LABEL: test_mm_aesencwide256kl_u8: 359; X64: # %bb.0: # %entry 360; X64-NEXT: pushq %rbx 361; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 362; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 363; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 364; X64-NEXT: aesencwide256kl (%rdi) 365; X64-NEXT: sete %al 366; X64-NEXT: movaps %xmm0, (%rsi) 367; X64-NEXT: movaps %xmm1, (%rdx) 368; X64-NEXT: movaps %xmm1, (%rcx) 369; X64-NEXT: movaps %xmm1, (%r8) 370; X64-NEXT: movaps %xmm1, (%r9) 371; X64-NEXT: movaps %xmm1, (%rbx) 372; X64-NEXT: movaps %xmm1, (%r11) 373; X64-NEXT: movaps %xmm1, (%r10) 374; X64-NEXT: popq %rbx 375; X64-NEXT: retq 376; 377; X86-LABEL: test_mm_aesencwide256kl_u8: 378; X86: # %bb.0: # %entry 379; X86-NEXT: pushl %ebp 380; X86-NEXT: movl %esp, %ebp 381; X86-NEXT: andl $-16, %esp 382; X86-NEXT: subl $16, %esp 383; X86-NEXT: vmovaps 24(%ebp), %xmm3 384; X86-NEXT: vmovaps 40(%ebp), %xmm4 385; X86-NEXT: vmovaps 56(%ebp), %xmm5 386; X86-NEXT: vmovaps 72(%ebp), %xmm6 387; X86-NEXT: vmovaps 88(%ebp), %xmm7 388; X86-NEXT: movl 8(%ebp), %eax 389; X86-NEXT: aesencwide256kl (%eax) 390; X86-NEXT: movl 104(%ebp), %eax 391; X86-NEXT: vmovaps %xmm0, (%eax) 392; X86-NEXT: movl 108(%ebp), %eax 393; X86-NEXT: vmovaps %xmm1, (%eax) 394; X86-NEXT: movl 112(%ebp), %eax 395; X86-NEXT: vmovaps %xmm1, (%eax) 396; X86-NEXT: movl 116(%ebp), %eax 397; X86-NEXT: vmovaps %xmm1, (%eax) 398; X86-NEXT: movl 120(%ebp), %eax 399; X86-NEXT: vmovaps %xmm1, (%eax) 400; X86-NEXT: movl 124(%ebp), %eax 401; X86-NEXT: vmovaps %xmm1, (%eax) 402; X86-NEXT: movl 128(%ebp), %eax 403; X86-NEXT: vmovaps %xmm1, (%eax) 404; X86-NEXT: movl 132(%ebp), %eax 405; X86-NEXT: vmovaps %xmm1, (%eax) 406; X86-NEXT: sete %al 407; X86-NEXT: movl %ebp, %esp 408; X86-NEXT: popl %ebp 409; X86-NEXT: retl 410entry: 411 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesencwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) 412 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 413 store <2 x i64> %1, ptr %out0 414 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 415 store <2 x i64> %2, ptr %out1 416 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 417 store <2 x i64> %2, ptr %out2 418 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 419 store <2 x i64> %2, ptr %out3 420 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 421 store <2 x i64> %2, ptr %out4 422 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 423 store <2 x i64> %2, ptr %out5 424 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 425 store <2 x i64> %2, ptr %out6 426 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 427 store <2 x i64> %2, ptr %out7 428 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 429 ret i8 %9 430} 431 432define i8 @test_mm_aesdecwide256kl_u8(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind { 433; X64-LABEL: test_mm_aesdecwide256kl_u8: 434; X64: # %bb.0: # %entry 435; X64-NEXT: pushq %rbx 436; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 437; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 438; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx 439; X64-NEXT: aesdecwide256kl (%rdi) 440; X64-NEXT: sete %al 441; X64-NEXT: movaps %xmm0, (%rsi) 442; X64-NEXT: movaps %xmm1, (%rdx) 443; X64-NEXT: movaps %xmm1, (%rcx) 444; X64-NEXT: movaps %xmm1, (%r8) 445; X64-NEXT: movaps %xmm1, (%r9) 446; X64-NEXT: movaps %xmm1, (%rbx) 447; X64-NEXT: movaps %xmm1, (%r11) 448; X64-NEXT: movaps %xmm1, (%r10) 449; X64-NEXT: popq %rbx 450; X64-NEXT: retq 451; 452; X86-LABEL: test_mm_aesdecwide256kl_u8: 453; X86: # %bb.0: # %entry 454; X86-NEXT: pushl %ebp 455; X86-NEXT: movl %esp, %ebp 456; X86-NEXT: andl $-16, %esp 457; X86-NEXT: subl $16, %esp 458; X86-NEXT: vmovaps 24(%ebp), %xmm3 459; X86-NEXT: vmovaps 40(%ebp), %xmm4 460; X86-NEXT: vmovaps 56(%ebp), %xmm5 461; X86-NEXT: vmovaps 72(%ebp), %xmm6 462; X86-NEXT: vmovaps 88(%ebp), %xmm7 463; X86-NEXT: movl 8(%ebp), %eax 464; X86-NEXT: aesdecwide256kl (%eax) 465; X86-NEXT: movl 104(%ebp), %eax 466; X86-NEXT: vmovaps %xmm0, (%eax) 467; X86-NEXT: movl 108(%ebp), %eax 468; X86-NEXT: vmovaps %xmm1, (%eax) 469; X86-NEXT: movl 112(%ebp), %eax 470; X86-NEXT: vmovaps %xmm1, (%eax) 471; X86-NEXT: movl 116(%ebp), %eax 472; X86-NEXT: vmovaps %xmm1, (%eax) 473; X86-NEXT: movl 120(%ebp), %eax 474; X86-NEXT: vmovaps %xmm1, (%eax) 475; X86-NEXT: movl 124(%ebp), %eax 476; X86-NEXT: vmovaps %xmm1, (%eax) 477; X86-NEXT: movl 128(%ebp), %eax 478; X86-NEXT: vmovaps %xmm1, (%eax) 479; X86-NEXT: movl 132(%ebp), %eax 480; X86-NEXT: vmovaps %xmm1, (%eax) 481; X86-NEXT: sete %al 482; X86-NEXT: movl %ebp, %esp 483; X86-NEXT: popl %ebp 484; X86-NEXT: retl 485entry: 486 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr %p, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) 487 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 488 store <2 x i64> %1, ptr %out0 489 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 490 store <2 x i64> %2, ptr %out1 491 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 492 store <2 x i64> %2, ptr %out2 493 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 494 store <2 x i64> %2, ptr %out3 495 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 496 store <2 x i64> %2, ptr %out4 497 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 498 store <2 x i64> %2, ptr %out5 499 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 500 store <2 x i64> %2, ptr %out6 501 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 502 store <2 x i64> %2, ptr %out7 503 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 504 ret i8 %9 505} 506 507; Tests to make sure we can select an appropriate addressing mode for a global. 508 509@foo = external dso_local global [64 x i8] 510 511define i8 @test_mm_aesenc256kl_u8_global(<2 x i64> %data, ptr %out) { 512; X64-LABEL: test_mm_aesenc256kl_u8_global: 513; X64: # %bb.0: # %entry 514; X64-NEXT: aesenc256kl foo(%rip), %xmm0 515; X64-NEXT: sete %al 516; X64-NEXT: movaps %xmm0, (%rdi) 517; X64-NEXT: retq 518; 519; X86-LABEL: test_mm_aesenc256kl_u8_global: 520; X86: # %bb.0: # %entry 521; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 522; X86-NEXT: aesenc256kl foo, %xmm0 523; X86-NEXT: sete %al 524; X86-NEXT: vmovaps %xmm0, (%ecx) 525; X86-NEXT: retl 526entry: 527 %0 = tail call { i8, <2 x i64> } @llvm.x86.aesenc256kl(<2 x i64> %data, ptr @foo) 528 %1 = extractvalue { i8, <2 x i64> } %0, 1 529 store <2 x i64> %1, ptr %out 530 %2 = extractvalue { i8, <2 x i64> } %0, 0 531 ret i8 %2 532} 533 534define i8 @test_mm_aesdecwide256kl_u8_global(<2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7, ptr %out0, ptr %out1, ptr %out2, ptr %out3, ptr %out4, ptr %out5, ptr %out6, ptr %out7) nounwind { 535; X64-LABEL: test_mm_aesdecwide256kl_u8_global: 536; X64: # %bb.0: # %entry 537; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 538; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 539; X64-NEXT: aesdecwide256kl foo(%rip) 540; X64-NEXT: sete %al 541; X64-NEXT: movaps %xmm0, (%rdi) 542; X64-NEXT: movaps %xmm1, (%rsi) 543; X64-NEXT: movaps %xmm1, (%rdx) 544; X64-NEXT: movaps %xmm1, (%rcx) 545; X64-NEXT: movaps %xmm1, (%r8) 546; X64-NEXT: movaps %xmm1, (%r9) 547; X64-NEXT: movaps %xmm1, (%r11) 548; X64-NEXT: movaps %xmm1, (%r10) 549; X64-NEXT: retq 550; 551; X86-LABEL: test_mm_aesdecwide256kl_u8_global: 552; X86: # %bb.0: # %entry 553; X86-NEXT: pushl %ebp 554; X86-NEXT: movl %esp, %ebp 555; X86-NEXT: andl $-16, %esp 556; X86-NEXT: subl $16, %esp 557; X86-NEXT: movl 88(%ebp), %eax 558; X86-NEXT: vmovaps 8(%ebp), %xmm3 559; X86-NEXT: vmovaps 24(%ebp), %xmm4 560; X86-NEXT: vmovaps 40(%ebp), %xmm5 561; X86-NEXT: vmovaps 56(%ebp), %xmm6 562; X86-NEXT: vmovaps 72(%ebp), %xmm7 563; X86-NEXT: aesdecwide256kl foo 564; X86-NEXT: vmovaps %xmm0, (%eax) 565; X86-NEXT: movl 92(%ebp), %eax 566; X86-NEXT: vmovaps %xmm1, (%eax) 567; X86-NEXT: movl 96(%ebp), %eax 568; X86-NEXT: vmovaps %xmm1, (%eax) 569; X86-NEXT: movl 100(%ebp), %eax 570; X86-NEXT: vmovaps %xmm1, (%eax) 571; X86-NEXT: movl 104(%ebp), %eax 572; X86-NEXT: vmovaps %xmm1, (%eax) 573; X86-NEXT: movl 108(%ebp), %eax 574; X86-NEXT: vmovaps %xmm1, (%eax) 575; X86-NEXT: movl 112(%ebp), %eax 576; X86-NEXT: vmovaps %xmm1, (%eax) 577; X86-NEXT: movl 116(%ebp), %eax 578; X86-NEXT: vmovaps %xmm1, (%eax) 579; X86-NEXT: sete %al 580; X86-NEXT: movl %ebp, %esp 581; X86-NEXT: popl %ebp 582; X86-NEXT: retl 583entry: 584 %0 = call { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.aesdecwide256kl(ptr @foo, <2 x i64> %v0, <2 x i64> %v1, <2 x i64> %v2, <2 x i64> %v3, <2 x i64> %v4, <2 x i64> %v5, <2 x i64> %v6, <2 x i64> %v7) 585 %1 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 1 586 store <2 x i64> %1, ptr %out0 587 %2 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 2 588 store <2 x i64> %2, ptr %out1 589 %3 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 3 590 store <2 x i64> %2, ptr %out2 591 %4 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 4 592 store <2 x i64> %2, ptr %out3 593 %5 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 5 594 store <2 x i64> %2, ptr %out4 595 %6 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 6 596 store <2 x i64> %2, ptr %out5 597 %7 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 7 598 store <2 x i64> %2, ptr %out6 599 %8 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 8 600 store <2 x i64> %2, ptr %out7 601 %9 = extractvalue { i8, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %0, 0 602 ret i8 %9 603} 604