1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 5 6declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone 7declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 8 9define <16 x i8> @combine_pavgb_self(<16 x i8> %a0) { 10; SSE-LABEL: combine_pavgb_self: 11; SSE: # %bb.0: 12; SSE-NEXT: retq 13; 14; AVX-LABEL: combine_pavgb_self: 15; AVX: # %bb.0: 16; AVX-NEXT: retq 17 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a0) 18 ret <16 x i8> %1 19} 20 21define <16 x i8> @combine_pavgb_zero(<16 x i8> %a0) { 22; SSE-LABEL: combine_pavgb_zero: 23; SSE: # %bb.0: 24; SSE-NEXT: pxor %xmm1, %xmm1 25; SSE-NEXT: pavgb %xmm1, %xmm0 26; SSE-NEXT: retq 27; 28; AVX-LABEL: combine_pavgb_zero: 29; AVX: # %bb.0: 30; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 31; AVX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 32; AVX-NEXT: retq 33 %1 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> zeroinitializer, <16 x i8> %a0) 34 ret <16 x i8> %1 35} 36 37define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) { 38; SSE-LABEL: combine_pavgw_knownbits: 39; SSE: # %bb.0: 40; SSE-NEXT: pmovsxbw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31] 41; SSE-NEXT: pand %xmm4, %xmm0 42; SSE-NEXT: pand %xmm4, %xmm1 43; SSE-NEXT: pavgw %xmm1, %xmm0 44; SSE-NEXT: pand %xmm4, %xmm2 45; SSE-NEXT: pand %xmm4, %xmm3 46; SSE-NEXT: pavgw %xmm2, %xmm3 47; SSE-NEXT: packuswb %xmm3, %xmm0 48; SSE-NEXT: retq 49; 50; AVX1-LABEL: combine_pavgw_knownbits: 51; AVX1: # %bb.0: 52; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31] 53; AVX1-NEXT: vpand %xmm4, %xmm0, %xmm0 54; AVX1-NEXT: vpand %xmm4, %xmm1, %xmm1 55; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 56; AVX1-NEXT: vpand %xmm4, %xmm2, %xmm1 57; AVX1-NEXT: vpand %xmm4, %xmm3, %xmm2 58; AVX1-NEXT: vpavgw %xmm2, %xmm1, %xmm1 59; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 60; AVX1-NEXT: retq 61; 62; AVX2-LABEL: combine_pavgw_knownbits: 63; AVX2: # %bb.0: 64; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm4 = [31,31,31,31,31,31,31,31] 65; AVX2-NEXT: vpand %xmm4, %xmm0, %xmm0 66; AVX2-NEXT: vpand %xmm4, %xmm1, %xmm1 67; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 68; AVX2-NEXT: vpand %xmm4, %xmm2, %xmm1 69; AVX2-NEXT: vpand %xmm4, %xmm3, %xmm2 70; AVX2-NEXT: vpavgw %xmm2, %xmm1, %xmm1 71; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 72; AVX2-NEXT: retq 73 %m0 = and <8 x i16> %a0, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31> 74 %m1 = and <8 x i16> %a1, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31> 75 %m2 = and <8 x i16> %a2, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31> 76 %m3 = and <8 x i16> %a3, <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31> 77 %avg01 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m0, <8 x i16> %m1) 78 %avg23 = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %m2, <8 x i16> %m3) 79 %shuffle = shufflevector <8 x i16> %avg01, <8 x i16> %avg23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 80 %trunc = trunc <16 x i16> %shuffle to <16 x i8> 81 ret <16 x i8> %trunc 82} 83 84define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { 85; SSE-LABEL: combine_pavgw_demandedelts: 86; SSE: # %bb.0: 87; SSE-NEXT: pavgw %xmm1, %xmm0 88; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 89; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 90; SSE-NEXT: retq 91; 92; AVX1-LABEL: combine_pavgw_demandedelts: 93; AVX1: # %bb.0: 94; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 95; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 96; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 97; AVX1-NEXT: retq 98; 99; AVX2-LABEL: combine_pavgw_demandedelts: 100; AVX2: # %bb.0: 101; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 102; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 103; AVX2-NEXT: retq 104 %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 105 %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1) 106 %shuffle = shufflevector <8 x i16> %avg, <8 x i16> poison, <8 x i32> zeroinitializer 107 ret <8 x i16> %shuffle 108} 109 110