xref: /llvm-project/llvm/test/CodeGen/X86/combine-pavg.ll (revision d650fcd6bf1323513213dd69eacbb2b08c870618)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5
6declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
7declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
8
9define <16 x i8> @combine_pavgb_self(<16 x i8> %a0) {
10; SSE-LABEL: combine_pavgb_self:
11; SSE:       # %bb.0:
12; SSE-NEXT:    retq
13;
14; AVX-LABEL: combine_pavgb_self:
15; AVX:       # %bb.0:
16; AVX-NEXT:    retq
17  %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a0)
18  ret <16 x i8> %1
19}
20
21define <16 x i8> @combine_pavgb_zero(<16 x i8> %a0) {
22; SSE-LABEL: combine_pavgb_zero:
23; SSE:       # %bb.0:
24; SSE-NEXT:    pxor %xmm1, %xmm1
25; SSE-NEXT:    pavgb %xmm1, %xmm0
26; SSE-NEXT:    retq
27;
28; AVX-LABEL: combine_pavgb_zero:
29; AVX:       # %bb.0:
30; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
31; AVX-NEXT:    vpavgb %xmm1, %xmm0, %xmm0
32; AVX-NEXT:    retq
33  %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> %a0)
34  ret <16 x i8> %1
35}
36
37define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
38; SSE-LABEL: combine_pavgw_knownbits:
39; SSE:       # %bb.0:
40; SSE-NEXT:    pmovsxbw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
41; SSE-NEXT:    pand %xmm4, %xmm0
42; SSE-NEXT:    pand %xmm4, %xmm1
43; SSE-NEXT:    pavgw %xmm1, %xmm0
44; SSE-NEXT:    pand %xmm4, %xmm2
45; SSE-NEXT:    pand %xmm4, %xmm3
46; SSE-NEXT:    pavgw %xmm2, %xmm3
47; SSE-NEXT:    packuswb %xmm3, %xmm0
48; SSE-NEXT:    retq
49;
50; AVX1-LABEL: combine_pavgw_knownbits:
51; AVX1:       # %bb.0:
52; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
53; AVX1-NEXT:    vpand %xmm4, %xmm0, %xmm0
54; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
55; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
56; AVX1-NEXT:    vpand %xmm4, %xmm2, %xmm1
57; AVX1-NEXT:    vpand %xmm4, %xmm3, %xmm2
58; AVX1-NEXT:    vpavgw %xmm2, %xmm1, %xmm1
59; AVX1-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
60; AVX1-NEXT:    retq
61;
62; AVX2-LABEL: combine_pavgw_knownbits:
63; AVX2:       # %bb.0:
64; AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31]
65; AVX2-NEXT:    vpand %xmm4, %xmm0, %xmm0
66; AVX2-NEXT:    vpand %xmm4, %xmm1, %xmm1
67; AVX2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
68; AVX2-NEXT:    vpand %xmm4, %xmm2, %xmm1
69; AVX2-NEXT:    vpand %xmm4, %xmm3, %xmm2
70; AVX2-NEXT:    vpavgw %xmm2, %xmm1, %xmm1
71; AVX2-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0
72; AVX2-NEXT:    retq
73  %m0 = and <8 x i16> %a0, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
74  %m1 = and <8 x i16> %a1, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
75  %m2 = and <8 x i16> %a2, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
76  %m3 = and <8 x i16> %a3, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
77  %avg01 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m0, <8 x i16> %m1)
78  %avg23 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m2, <8 x i16> %m3)
79  %shuffle = shufflevector <8 x i16> %avg01, <8 x i16> %avg23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
80  %trunc = trunc <16 x i16> %shuffle to <16 x i8>
81  ret <16 x i8> %trunc
82}
83
84define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) {
85; SSE-LABEL: combine_pavgw_demandedelts:
86; SSE:       # %bb.0:
87; SSE-NEXT:    pavgw %xmm1, %xmm0
88; SSE-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
89; SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
90; SSE-NEXT:    retq
91;
92; AVX1-LABEL: combine_pavgw_demandedelts:
93; AVX1:       # %bb.0:
94; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
95; AVX1-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
96; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
97; AVX1-NEXT:    retq
98;
99; AVX2-LABEL: combine_pavgw_demandedelts:
100; AVX2:       # %bb.0:
101; AVX2-NEXT:    vpavgw %xmm1, %xmm0, %xmm0
102; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
103; AVX2-NEXT:    retq
104  %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
105  %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1)
106  %shuffle = shufflevector <8 x i16> %avg, <8 x i16> poison, <8 x i32> zeroinitializer
107  ret <8 x i16> %shuffle
108}
109
110