xref: /llvm-project/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll (revision 87750c9de4b7bd71539bfadd61c10317235da138)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5; Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)"
6
7declare void @use_v4f64(<4 x double>)
8
9define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
10; CHECK-LABEL: define <4 x double> @fadd_v4f64(
11; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
12; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
13; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
14; CHECK-NEXT:    [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
15; CHECK-NEXT:    ret <4 x double> [[POST]]
16;
17  %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
18  %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
19  %op = fadd <4 x double> %a1, %b1
20  %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
21  ret <4 x double> %post
22}
23
24define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) {
25; CHECK-LABEL: define <4 x double> @fadd_v4f64_poison_idx(
26; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
27; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 poison>
28; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 poison>
29; CHECK-NEXT:    [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
30; CHECK-NEXT:    ret <4 x double> [[POST]]
31;
32  %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
33  %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
34  %op = fadd <4 x double> %a1, %b1
35  %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
36  ret <4 x double> %post
37}
38
39define <4 x double> @fadd_v4f64_mixed_types(<4 x double> %a, <2 x double> %b) {
40; CHECK-LABEL: define <4 x double> @fadd_v4f64_mixed_types(
41; CHECK-SAME: <4 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
42; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
43; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
44; CHECK-NEXT:    [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
45; CHECK-NEXT:    ret <4 x double> [[POST]]
46;
47  %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
48  %b1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
49  %op = fadd <4 x double> %a1, %b1
50  %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
51  ret <4 x double> %post
52}
53
54define <4 x float> @fadd_v4f32_mixed_types(<4 x float> %a0) {
55; CHECK-LABEL: define <4 x float> @fadd_v4f32_mixed_types(
56; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] {
57; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
58; CHECK-NEXT:    [[POST:%.*]] = fmul <4 x float> [[TMP1]], <float 0.000000e+00, float 0.000000e+00, float undef, float undef>
59; CHECK-NEXT:    ret <4 x float> [[POST]]
60;
61  %pre = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 5>
62  %op = fmul <2 x float> %pre, zeroinitializer
63  %post = shufflevector <2 x float> %op, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64  ret <4 x float> %post
65}
66
67; Negative test - multiple use of fadd
68define <4 x double> @fadd_v4f64_multiuse_op(<4 x double> %a, <4 x double> %b) {
69; CHECK-LABEL: define <4 x double> @fadd_v4f64_multiuse_op(
70; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
71; CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
72; CHECK-NEXT:    [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
73; CHECK-NEXT:    [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]]
74; CHECK-NEXT:    [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
75; CHECK-NEXT:    call void @use_v4f64(<4 x double> [[OP]])
76; CHECK-NEXT:    ret <4 x double> [[POST]]
77;
78  %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
79  %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
80  %op = fadd <4 x double> %a1, %b1
81  %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
82  call void @use_v4f64(<4 x double> %op)
83  ret <4 x double> %post
84}
85
86; Negative test - multiple use of inner shuffle (only fold if the moved shuffle is cheaper).
87define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %b) {
88; SSE-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle(
89; SSE-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
90; SSE-NEXT:    [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
91; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
92; SSE-NEXT:    [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
93; SSE-NEXT:    [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
94; SSE-NEXT:    call void @use_v4f64(<4 x double> [[A1]])
95; SSE-NEXT:    ret <4 x double> [[POST]]
96;
97; AVX-LABEL: define <4 x double> @fadd_v4f64_multiuse_shuffle(
98; AVX-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
99; AVX-NEXT:    [[A1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
100; AVX-NEXT:    [[B1:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
101; AVX-NEXT:    [[OP:%.*]] = fadd <4 x double> [[A1]], [[B1]]
102; AVX-NEXT:    [[POST:%.*]] = shufflevector <4 x double> [[OP]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
103; AVX-NEXT:    call void @use_v4f64(<4 x double> [[A1]])
104; AVX-NEXT:    ret <4 x double> [[POST]]
105;
106  %a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
107  %b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
108  %op = fadd <4 x double> %a1, %b1
109  %post = shufflevector <4 x double> %op, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
110  call void @use_v4f64(<4 x double> %a1)
111  ret <4 x double> %post
112}
113
114define <4 x i32> @sdiv_v4i32(<4 x i32> %a, <4 x i32> %b) {
115; CHECK-LABEL: define <4 x i32> @sdiv_v4i32(
116; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
117; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
118; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
119; CHECK-NEXT:    [[POST:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP2]]
120; CHECK-NEXT:    ret <4 x i32> [[POST]]
121;
122  %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
123  %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
124  %op = sdiv <4 x i32> %a1, %b1
125  %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 0>
126  ret <4 x i32> %post
127}
128
129; Negative test - don't introduce poison element into div/rem instruction
130define <4 x i32> @sdiv_v4i32_poison(<4 x i32> %a, <4 x i32> %b) {
131; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison(
132; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
133; CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
134; CHECK-NEXT:    [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
135; CHECK-NEXT:    [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]]
136; CHECK-NEXT:    [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 poison>
137; CHECK-NEXT:    ret <4 x i32> [[POST]]
138;
139  %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
140  %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
141  %op = sdiv <4 x i32> %a1, %b1
142  %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 poison>
143  ret <4 x i32> %post
144}
145
146; Negative test - don't introduce poison element into div/rem instruction
147define <4 x i32> @sdiv_v4i32_poison_idx(<4 x i32> %a, <4 x i32> %b) {
148; CHECK-LABEL: define <4 x i32> @sdiv_v4i32_poison_idx(
149; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
150; CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
151; CHECK-NEXT:    [[B1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
152; CHECK-NEXT:    [[OP:%.*]] = sdiv <4 x i32> [[A1]], [[B1]]
153; CHECK-NEXT:    [[POST:%.*]] = shufflevector <4 x i32> [[OP]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
154; CHECK-NEXT:    ret <4 x i32> [[POST]]
155;
156  %a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
157  %b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
158  %op = sdiv <4 x i32> %a1, %b1
159  %post = shufflevector <4 x i32> %op, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
160  ret <4 x i32> %post
161}
162