xref: /llvm-project/llvm/test/CodeGen/X86/sad_variations.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx  | FileCheck %s --check-prefix=AVX
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f  | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX
7
8define i32 @sad8_32bit_icmp_sge(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i32 %stride) local_unnamed_addr #0 {
9; SSE2-LABEL: sad8_32bit_icmp_sge:
10; SSE2:       # %bb.0: # %entry
11; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
12; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
13; SSE2-NEXT:    psadbw %xmm0, %xmm1
14; SSE2-NEXT:    movd %xmm1, %eax
15; SSE2-NEXT:    retq
16;
17; AVX-LABEL: sad8_32bit_icmp_sge:
18; AVX:       # %bb.0: # %entry
19; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
20; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
21; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
22; AVX-NEXT:    vmovd %xmm0, %eax
23; AVX-NEXT:    retq
24
25entry:
26  %idx.ext = zext i32 %stride to i64
27  br label %for.body
28
29for.body:                                         ; preds = %entry
30  %0 = load <8 x i8>, ptr %cur, align 1
31  %1 = zext <8 x i8> %0 to <8 x i32>
32  %2 = load <8 x i8>, ptr %ref, align 1
33  %3 = zext <8 x i8> %2 to <8 x i32>
34  %4 = sub nsw <8 x i32> %1, %3
35  %5 = icmp sgt <8 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
36  %6 = sub nsw <8 x i32> zeroinitializer, %4
37  %7 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> %6
38  %rdx.shuf = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
39  %bin.rdx = add <8 x i32> %7, %rdx.shuf
40  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
41  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
42  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
43  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
44  %8 = extractelement <8 x i32> %bin.rdx232, i32 0
45  ret i32 %8
46}
47
48define i32 @sad8_32bit_icmp_sgt(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i32 %stride) local_unnamed_addr #1 {
49; SSE2-LABEL: sad8_32bit_icmp_sgt:
50; SSE2:       # %bb.0: # %entry
51; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
52; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
53; SSE2-NEXT:    psadbw %xmm0, %xmm1
54; SSE2-NEXT:    movd %xmm1, %eax
55; SSE2-NEXT:    retq
56;
57; AVX-LABEL: sad8_32bit_icmp_sgt:
58; AVX:       # %bb.0: # %entry
59; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
60; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
61; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
62; AVX-NEXT:    vmovd %xmm0, %eax
63; AVX-NEXT:    retq
64entry:
65  %idx.ext = zext i32 %stride to i64
66  br label %for.body
67
68for.body:                                         ; preds = %entry
69  %0 = load <8 x i8>, ptr %cur, align 1
70  %1 = zext <8 x i8> %0 to <8 x i32>
71  %2 = load <8 x i8>, ptr %ref, align 1
72  %3 = zext <8 x i8> %2 to <8 x i32>
73  %4 = sub nsw <8 x i32> %1, %3
74  %5 = icmp sgt <8 x i32> %4, zeroinitializer
75  %6 = sub nsw <8 x i32> zeroinitializer, %4
76  %7 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> %6
77  %rdx.shuf = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
78  %bin.rdx = add <8 x i32> %7, %rdx.shuf
79  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
80  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
81  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
82  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
83  %8 = extractelement <8 x i32> %bin.rdx232, i32 0
84  ret i32 %8
85}
86
87define i32 @sad8_32bit_icmp_sle(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i32 %stride) local_unnamed_addr #2 {
88; SSE2-LABEL: sad8_32bit_icmp_sle:
89; SSE2:       # %bb.0: # %entry
90; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
91; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
92; SSE2-NEXT:    psadbw %xmm0, %xmm1
93; SSE2-NEXT:    movd %xmm1, %eax
94; SSE2-NEXT:    retq
95;
96; AVX-LABEL: sad8_32bit_icmp_sle:
97; AVX:       # %bb.0: # %entry
98; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
99; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
100; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
101; AVX-NEXT:    vmovd %xmm0, %eax
102; AVX-NEXT:    retq
103entry:
104  %idx.ext = zext i32 %stride to i64
105  br label %for.body
106
107for.body:                                         ; preds = %entry
108  %0 = load <8 x i8>, ptr %cur, align 1
109  %1 = zext <8 x i8> %0 to <8 x i32>
110  %2 = load <8 x i8>, ptr %ref, align 1
111  %3 = zext <8 x i8> %2 to <8 x i32>
112  %4 = sub nsw <8 x i32> %1, %3
113  %5 = icmp slt <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
114  %6 = sub nsw <8 x i32> zeroinitializer, %4
115  %7 = select <8 x i1> %5, <8 x i32> %6, <8 x i32> %4
116  %rdx.shuf = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
117  %bin.rdx = add <8 x i32> %7, %rdx.shuf
118  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
119  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
120  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
121  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
122  %8 = extractelement <8 x i32> %bin.rdx232, i32 0
123  ret i32 %8
124}
125
126define i32 @sad8_32bit_icmp_slt(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i32 %stride) local_unnamed_addr #3 {
127; SSE2-LABEL: sad8_32bit_icmp_slt:
128; SSE2:       # %bb.0: # %entry
129; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
130; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
131; SSE2-NEXT:    psadbw %xmm0, %xmm1
132; SSE2-NEXT:    movd %xmm1, %eax
133; SSE2-NEXT:    retq
134;
135; AVX-LABEL: sad8_32bit_icmp_slt:
136; AVX:       # %bb.0: # %entry
137; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
138; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
139; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
140; AVX-NEXT:    vmovd %xmm0, %eax
141; AVX-NEXT:    retq
142entry:
143  %idx.ext = zext i32 %stride to i64
144  br label %for.body
145
146for.body:                                         ; preds = %entry
147  %0 = load <8 x i8>, ptr %cur, align 1
148  %1 = zext <8 x i8> %0 to <8 x i32>
149  %2 = load <8 x i8>, ptr %ref, align 1
150  %3 = zext <8 x i8> %2 to <8 x i32>
151  %4 = sub nsw <8 x i32> %1, %3
152  %5 = icmp slt <8 x i32> %4, zeroinitializer
153  %6 = sub nsw <8 x i32> zeroinitializer, %4
154  %7 = select <8 x i1> %5, <8 x i32> %6, <8 x i32> %4
155  %rdx.shuf = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
156  %bin.rdx = add <8 x i32> %7, %rdx.shuf
157  %rdx.shuf229 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
158  %bin.rdx230 = add <8 x i32> %bin.rdx, %rdx.shuf229
159  %rdx.shuf231 = shufflevector <8 x i32> %bin.rdx230, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
160  %bin.rdx232 = add <8 x i32> %bin.rdx230, %rdx.shuf231
161  %8 = extractelement <8 x i32> %bin.rdx232, i32 0
162  ret i32 %8
163}
164
165define i64 @sad8_64bit_icmp_sext_slt(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
166; SSE2-LABEL: sad8_64bit_icmp_sext_slt:
167; SSE2:       # %bb.0: # %entry
168; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
169; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
170; SSE2-NEXT:    psadbw %xmm0, %xmm1
171; SSE2-NEXT:    movq %xmm1, %rax
172; SSE2-NEXT:    retq
173;
174; AVX-LABEL: sad8_64bit_icmp_sext_slt:
175; AVX:       # %bb.0: # %entry
176; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
177; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
178; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
179; AVX-NEXT:    vmovq %xmm0, %rax
180; AVX-NEXT:    retq
181entry:
182  br label %for.body
183
184for.body:                                         ; preds = %entry
185  %0 = load <8 x i8>, ptr %cur, align 1
186  %1 = zext <8 x i8> %0 to <8 x i32>
187  %2 = load <8 x i8>, ptr %ref, align 1
188  %3 = zext <8 x i8> %2 to <8 x i32>
189  %4 = sub nsw <8 x i32> %1, %3
190  %5 = icmp slt <8 x i32> %4, zeroinitializer
191  %6 = sub nsw <8 x i32> zeroinitializer, %4
192  %7 = select <8 x i1> %5, <8 x i32> %6, <8 x i32> %4
193  %8 = sext <8 x i32> %7 to <8 x i64>
194  %rdx.shuf = shufflevector <8 x i64> %8, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
195  %bin.rdx = add <8 x i64> %rdx.shuf, %8
196  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
197  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
198  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
199  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
200  %9 = extractelement <8 x i64> %bin.rdx239, i32 0
201  ret i64 %9
202}
203
204define i64 @sad8_64bit_icmp_zext_slt(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
205; SSE2-LABEL: sad8_64bit_icmp_zext_slt:
206; SSE2:       # %bb.0: # %entry
207; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
208; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
209; SSE2-NEXT:    psadbw %xmm0, %xmm1
210; SSE2-NEXT:    movq %xmm1, %rax
211; SSE2-NEXT:    retq
212;
213; AVX-LABEL: sad8_64bit_icmp_zext_slt:
214; AVX:       # %bb.0: # %entry
215; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
216; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
217; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
218; AVX-NEXT:    vmovq %xmm0, %rax
219; AVX-NEXT:    retq
220entry:
221  br label %for.body
222
223for.body:                                         ; preds = %entry
224  %0 = load <8 x i8>, ptr %cur, align 1
225  %1 = zext <8 x i8> %0 to <8 x i32>
226  %2 = load <8 x i8>, ptr %ref, align 1
227  %3 = zext <8 x i8> %2 to <8 x i32>
228  %4 = sub nsw <8 x i32> %1, %3
229  %5 = icmp slt <8 x i32> %4, zeroinitializer
230  %6 = sub nsw <8 x i32> zeroinitializer, %4
231  %7 = select <8 x i1> %5, <8 x i32> %6, <8 x i32> %4
232  %8 = zext <8 x i32> %7 to <8 x i64>
233  %rdx.shuf = shufflevector <8 x i64> %8, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
234  %bin.rdx = add <8 x i64> %rdx.shuf, %8
235  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
236  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
237  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
238  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
239  %9 = extractelement <8 x i64> %bin.rdx239, i32 0
240  ret i64 %9
241}
242
243define i64 @sad8_early_64bit_icmp_zext_slt(ptr nocapture readonly %cur, ptr nocapture readonly %ref, i64 %stride) local_unnamed_addr #4 {
244; SSE2-LABEL: sad8_early_64bit_icmp_zext_slt:
245; SSE2:       # %bb.0: # %entry
246; SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
247; SSE2-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
248; SSE2-NEXT:    psadbw %xmm0, %xmm1
249; SSE2-NEXT:    movq %xmm1, %rax
250; SSE2-NEXT:    retq
251;
252; AVX-LABEL: sad8_early_64bit_icmp_zext_slt:
253; AVX:       # %bb.0: # %entry
254; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
255; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
256; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
257; AVX-NEXT:    vmovq %xmm0, %rax
258; AVX-NEXT:    retq
259entry:
260  br label %for.body
261
262for.body:                                         ; preds = %entry
263  %0 = load <8 x i8>, ptr %cur, align 1
264  %1 = zext <8 x i8> %0 to <8 x i64>
265  %2 = load <8 x i8>, ptr %ref, align 1
266  %3 = zext <8 x i8> %2 to <8 x i64>
267  %4 = sub nsw <8 x i64> %1, %3
268  %5 = icmp slt <8 x i64> %4, zeroinitializer
269  %6 = sub nsw <8 x i64> zeroinitializer, %4
270  %7 = select <8 x i1> %5, <8 x i64> %6, <8 x i64> %4
271  %rdx.shuf = shufflevector <8 x i64> %7, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
272  %bin.rdx = add <8 x i64> %rdx.shuf, %7
273  %rdx.shuf236 = shufflevector <8 x i64> %bin.rdx, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
274  %bin.rdx237 = add <8 x i64> %bin.rdx, %rdx.shuf236
275  %rdx.shuf238 = shufflevector <8 x i64> %bin.rdx237, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
276  %bin.rdx239 = add <8 x i64> %bin.rdx237, %rdx.shuf238
277  %8 = extractelement <8 x i64> %bin.rdx239, i32 0
278  ret i64 %8
279}
280