1*0a6a1f1dSLionel Sambuc; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s 2*0a6a1f1dSLionel Sambuc 3*0a6a1f1dSLionel Sambuctarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4*0a6a1f1dSLionel Sambuctarget triple = "x86_64-unknown-unknown" 5*0a6a1f1dSLionel Sambuc 6*0a6a1f1dSLionel Sambuc; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack. 7*0a6a1f1dSLionel Sambuc; 8*0a6a1f1dSLionel Sambuc; Many of these tests are primarily to check memory folding with specific instructions. Using a basic 9*0a6a1f1dSLionel Sambuc; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats 10*0a6a1f1dSLionel Sambuc; being tested - the load-execute version of the instruction from the tables would be matched instead. 11*0a6a1f1dSLionel Sambuc 12*0a6a1f1dSLionel Sambucdefine void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) { 13*0a6a1f1dSLionel Sambuc ;CHECK-LABEL: stack_fold_vmulpd 14*0a6a1f1dSLionel Sambuc ;CHECK: vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 15*0a6a1f1dSLionel Sambuc 16*0a6a1f1dSLionel Sambuc %1 = load <64 x double>* %a 17*0a6a1f1dSLionel Sambuc %2 = load <64 x double>* %b 18*0a6a1f1dSLionel Sambuc %3 = fadd <64 x double> %1, %2 19*0a6a1f1dSLionel Sambuc %4 = fsub <64 x double> %1, %2 20*0a6a1f1dSLionel Sambuc %5 = fmul <64 x double> %3, %4 21*0a6a1f1dSLionel Sambuc store <64 x double> %5, <64 x double>* %c 22*0a6a1f1dSLionel Sambuc ret void 23*0a6a1f1dSLionel Sambuc} 24*0a6a1f1dSLionel Sambuc 25*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) { 26*0a6a1f1dSLionel Sambuc ;CHECK-LABEL: stack_fold_cvtdq2ps 27*0a6a1f1dSLionel Sambuc ;CHECK: vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 28*0a6a1f1dSLionel Sambuc 29*0a6a1f1dSLionel Sambuc %1 = load <128 x i32>* %a 30*0a6a1f1dSLionel Sambuc %2 = load <128 x i32>* %b 31*0a6a1f1dSLionel Sambuc %3 = and <128 x i32> %1, %2 32*0a6a1f1dSLionel Sambuc %4 = xor <128 x i32> %1, %2 33*0a6a1f1dSLionel Sambuc %5 = sitofp <128 x i32> %3 to <128 x float> 34*0a6a1f1dSLionel Sambuc %6 = sitofp <128 x i32> %4 to <128 x float> 35*0a6a1f1dSLionel Sambuc %7 = fadd <128 x float> %5, %6 36*0a6a1f1dSLionel Sambuc store <128 x float> %7, <128 x float>* %c 37*0a6a1f1dSLionel Sambuc ret void 38*0a6a1f1dSLionel Sambuc} 39*0a6a1f1dSLionel Sambuc 40*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) { 41*0a6a1f1dSLionel Sambuc ;CHECK-LABEL: stack_fold_cvtpd2ps 42*0a6a1f1dSLionel Sambuc ;CHECK: vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 43*0a6a1f1dSLionel Sambuc 44*0a6a1f1dSLionel Sambuc %1 = load <128 x double>* %a 45*0a6a1f1dSLionel Sambuc %2 = load <128 x double>* %b 46*0a6a1f1dSLionel Sambuc %3 = fadd <128 x double> %1, %2 47*0a6a1f1dSLionel Sambuc %4 = fsub <128 x double> %1, %2 48*0a6a1f1dSLionel Sambuc %5 = fptrunc <128 x double> %3 to <128 x float> 49*0a6a1f1dSLionel Sambuc %6 = fptrunc <128 x double> %4 to <128 x float> 50*0a6a1f1dSLionel Sambuc %7 = fadd <128 x float> %5, %6 51*0a6a1f1dSLionel Sambuc store <128 x float> %7, <128 x float>* %c 52*0a6a1f1dSLionel Sambuc ret void 53*0a6a1f1dSLionel Sambuc} 54*0a6a1f1dSLionel Sambuc 55*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 { 56*0a6a1f1dSLionel Sambuc ;CHECK-LABEL: stack_fold_cvttpd2dq 57*0a6a1f1dSLionel Sambuc ;CHECK: vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 58*0a6a1f1dSLionel Sambuc 59*0a6a1f1dSLionel Sambuc %1 = load <64 x double>* %a 60*0a6a1f1dSLionel Sambuc %2 = load <64 x double>* %b 61*0a6a1f1dSLionel Sambuc %3 = fadd <64 x double> %1, %2 62*0a6a1f1dSLionel Sambuc %4 = fsub <64 x double> %1, %2 63*0a6a1f1dSLionel Sambuc %5 = fptosi <64 x double> %3 to <64 x i32> 64*0a6a1f1dSLionel Sambuc %6 = fptosi <64 x double> %4 to <64 x i32> 65*0a6a1f1dSLionel Sambuc %7 = or <64 x i32> %5, %6 66*0a6a1f1dSLionel Sambuc store <64 x i32> %7, <64 x i32>* %c 67*0a6a1f1dSLionel Sambuc ret void 68*0a6a1f1dSLionel Sambuc} 69*0a6a1f1dSLionel Sambuc 70*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 { 71*0a6a1f1dSLionel Sambuc ;CHECK-LABEL: stack_fold_cvttps2dq 72*0a6a1f1dSLionel Sambuc ;CHECK: vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 73*0a6a1f1dSLionel Sambuc 74*0a6a1f1dSLionel Sambuc %1 = load <128 x float>* %a 75*0a6a1f1dSLionel Sambuc %2 = load <128 x float>* %b 76*0a6a1f1dSLionel Sambuc %3 = fadd <128 x float> %1, %2 77*0a6a1f1dSLionel Sambuc %4 = fsub <128 x float> %1, %2 78*0a6a1f1dSLionel Sambuc %5 = fptosi <128 x float> %3 to <128 x i32> 79*0a6a1f1dSLionel Sambuc %6 = fptosi <128 x float> %4 to <128 x i32> 80*0a6a1f1dSLionel Sambuc %7 = or <128 x i32> %5, %6 81*0a6a1f1dSLionel Sambuc store <128 x i32> %7, <128 x i32>* %c 82*0a6a1f1dSLionel Sambuc ret void 83*0a6a1f1dSLionel Sambuc} 84