xref: /minix3/external/bsd/llvm/dist/llvm/test/CodeGen/X86/avx1-stack-reload-folding.ll (revision 0a6a1f1d05b60e214de2f05a7310ddd1f0e590e7)
1*0a6a1f1dSLionel Sambuc; RUN: llc -O3 -disable-peephole -mcpu=corei7-avx -mattr=+avx < %s | FileCheck %s
2*0a6a1f1dSLionel Sambuc
3*0a6a1f1dSLionel Sambuctarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
4*0a6a1f1dSLionel Sambuctarget triple = "x86_64-unknown-unknown"
5*0a6a1f1dSLionel Sambuc
6*0a6a1f1dSLionel Sambuc; Stack reload folding tests - we use the 'big vectors' pattern to guarantee spilling to stack.
7*0a6a1f1dSLionel Sambuc;
8*0a6a1f1dSLionel Sambuc; Many of these tests are primarily to check memory folding with specific instructions. Using a basic
9*0a6a1f1dSLionel Sambuc; load/cvt/store pattern to test for this would mean that it wouldn't be the memory folding code thats
10*0a6a1f1dSLionel Sambuc; being tested - the load-execute version of the instruction from the tables would be matched instead.
11*0a6a1f1dSLionel Sambuc
12*0a6a1f1dSLionel Sambucdefine void @stack_fold_vmulpd(<64 x double>* %a, <64 x double>* %b, <64 x double>* %c) {
13*0a6a1f1dSLionel Sambuc  ;CHECK-LABEL: stack_fold_vmulpd
14*0a6a1f1dSLionel Sambuc  ;CHECK:       vmulpd {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
15*0a6a1f1dSLionel Sambuc
16*0a6a1f1dSLionel Sambuc  %1 = load <64 x double>* %a
17*0a6a1f1dSLionel Sambuc  %2 = load <64 x double>* %b
18*0a6a1f1dSLionel Sambuc  %3 = fadd <64 x double> %1, %2
19*0a6a1f1dSLionel Sambuc  %4 = fsub <64 x double> %1, %2
20*0a6a1f1dSLionel Sambuc  %5 = fmul <64 x double> %3, %4
21*0a6a1f1dSLionel Sambuc  store <64 x double> %5, <64 x double>* %c
22*0a6a1f1dSLionel Sambuc  ret void
23*0a6a1f1dSLionel Sambuc}
24*0a6a1f1dSLionel Sambuc
25*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvtdq2ps(<128 x i32>* %a, <128 x i32>* %b, <128 x float>* %c) {
26*0a6a1f1dSLionel Sambuc  ;CHECK-LABEL: stack_fold_cvtdq2ps
27*0a6a1f1dSLionel Sambuc  ;CHECK:   vcvtdq2ps {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
28*0a6a1f1dSLionel Sambuc
29*0a6a1f1dSLionel Sambuc  %1 = load <128 x i32>* %a
30*0a6a1f1dSLionel Sambuc  %2 = load <128 x i32>* %b
31*0a6a1f1dSLionel Sambuc  %3 = and <128 x i32> %1, %2
32*0a6a1f1dSLionel Sambuc  %4 = xor <128 x i32> %1, %2
33*0a6a1f1dSLionel Sambuc  %5 = sitofp <128 x i32> %3 to <128 x float>
34*0a6a1f1dSLionel Sambuc  %6 = sitofp <128 x i32> %4 to <128 x float>
35*0a6a1f1dSLionel Sambuc  %7 = fadd <128 x float> %5, %6
36*0a6a1f1dSLionel Sambuc  store <128 x float> %7, <128 x float>* %c
37*0a6a1f1dSLionel Sambuc  ret void
38*0a6a1f1dSLionel Sambuc}
39*0a6a1f1dSLionel Sambuc
40*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvtpd2ps(<128 x double>* %a, <128 x double>* %b, <128 x float>* %c) {
41*0a6a1f1dSLionel Sambuc  ;CHECK-LABEL: stack_fold_cvtpd2ps
42*0a6a1f1dSLionel Sambuc  ;CHECK:   vcvtpd2psy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
43*0a6a1f1dSLionel Sambuc
44*0a6a1f1dSLionel Sambuc  %1 = load <128 x double>* %a
45*0a6a1f1dSLionel Sambuc  %2 = load <128 x double>* %b
46*0a6a1f1dSLionel Sambuc  %3 = fadd <128 x double> %1, %2
47*0a6a1f1dSLionel Sambuc  %4 = fsub <128 x double> %1, %2
48*0a6a1f1dSLionel Sambuc  %5 = fptrunc <128 x double> %3 to <128 x float>
49*0a6a1f1dSLionel Sambuc  %6 = fptrunc <128 x double> %4 to <128 x float>
50*0a6a1f1dSLionel Sambuc  %7 = fadd <128 x float> %5, %6
51*0a6a1f1dSLionel Sambuc  store <128 x float> %7, <128 x float>* %c
52*0a6a1f1dSLionel Sambuc  ret void
53*0a6a1f1dSLionel Sambuc}
54*0a6a1f1dSLionel Sambuc
55*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvttpd2dq(<64 x double>* %a, <64 x double>* %b, <64 x i32>* %c) #0 {
56*0a6a1f1dSLionel Sambuc  ;CHECK-LABEL: stack_fold_cvttpd2dq
57*0a6a1f1dSLionel Sambuc  ;CHECK:  vcvttpd2dqy {{[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
58*0a6a1f1dSLionel Sambuc
59*0a6a1f1dSLionel Sambuc  %1 = load <64 x double>* %a
60*0a6a1f1dSLionel Sambuc  %2 = load <64 x double>* %b
61*0a6a1f1dSLionel Sambuc  %3 = fadd <64 x double> %1, %2
62*0a6a1f1dSLionel Sambuc  %4 = fsub <64 x double> %1, %2
63*0a6a1f1dSLionel Sambuc  %5 = fptosi <64 x double> %3 to <64 x i32>
64*0a6a1f1dSLionel Sambuc  %6 = fptosi <64 x double> %4 to <64 x i32>
65*0a6a1f1dSLionel Sambuc  %7 = or <64 x i32> %5, %6
66*0a6a1f1dSLionel Sambuc  store <64 x i32> %7, <64 x i32>* %c
67*0a6a1f1dSLionel Sambuc  ret void
68*0a6a1f1dSLionel Sambuc}
69*0a6a1f1dSLionel Sambuc
70*0a6a1f1dSLionel Sambucdefine void @stack_fold_cvttps2dq(<128 x float>* %a, <128 x float>* %b, <128 x i32>* %c) #0 {
71*0a6a1f1dSLionel Sambuc  ;CHECK-LABEL: stack_fold_cvttps2dq
72*0a6a1f1dSLionel Sambuc  ;CHECK:   vcvttps2dq {{[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
73*0a6a1f1dSLionel Sambuc
74*0a6a1f1dSLionel Sambuc  %1 = load <128 x float>* %a
75*0a6a1f1dSLionel Sambuc  %2 = load <128 x float>* %b
76*0a6a1f1dSLionel Sambuc  %3 = fadd <128 x float> %1, %2
77*0a6a1f1dSLionel Sambuc  %4 = fsub <128 x float> %1, %2
78*0a6a1f1dSLionel Sambuc  %5 = fptosi <128 x float> %3 to <128 x i32>
79*0a6a1f1dSLionel Sambuc  %6 = fptosi <128 x float> %4 to <128 x i32>
80*0a6a1f1dSLionel Sambuc  %7 = or <128 x i32> %5, %6
81*0a6a1f1dSLionel Sambuc  store <128 x i32> %7, <128 x i32>* %c
82*0a6a1f1dSLionel Sambuc  ret void
83*0a6a1f1dSLionel Sambuc}
84