1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 | FileCheck %s 3 4; Verify that the backend correctly combines AVX2 builtin intrinsics. 5 6; 7; VPBLEND Identities 8; 9 10define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0) { 11; CHECK-LABEL: test_x86_avx2_pblendw: 12; CHECK: # %bb.0: 13; CHECK-NEXT: retq 14 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a0, i32 7) 15 ret <16 x i16> %res 16} 17 18define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0) { 19; CHECK-LABEL: test_x86_avx2_pblendd_128: 20; CHECK: # %bb.0: 21; CHECK-NEXT: retq 22 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a0, i32 7) 23 ret <4 x i32> %res 24} 25 26define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0) { 27; CHECK-LABEL: test_x86_avx2_pblendd_256: 28; CHECK: # %bb.0: 29; CHECK-NEXT: retq 30 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a0, i32 7) 31 ret <8 x i32> %res 32} 33 34define <16 x i16> @test2_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 35; CHECK-LABEL: test2_x86_avx2_pblendw: 36; CHECK: # %bb.0: 37; CHECK-NEXT: retq 38 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 0) 39 ret <16 x i16> %res 40} 41 42define <4 x i32> @test2_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 43; CHECK-LABEL: test2_x86_avx2_pblendd_128: 44; CHECK: # %bb.0: 45; CHECK-NEXT: retq 46 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 0) 47 ret <4 x i32> %res 48} 49 50define <8 x i32> @test2_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 51; CHECK-LABEL: test2_x86_avx2_pblendd_256: 52; CHECK: # %bb.0: 53; CHECK-NEXT: retq 54 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 0) 55 ret <8 x i32> %res 56} 57 58define <16 x i16> @test3_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 59; CHECK-LABEL: test3_x86_avx2_pblendw: 60; CHECK: # %bb.0: 61; CHECK-NEXT: vmovaps %ymm1, %ymm0 62; CHECK-NEXT: retq 63 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i32 -1) 64 ret <16 x i16> %res 65} 66 67define <4 x i32> @test3_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 68; CHECK-LABEL: test3_x86_avx2_pblendd_128: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vmovaps %xmm1, %xmm0 71; CHECK-NEXT: retq 72 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 -1) 73 ret <4 x i32> %res 74} 75 76define <8 x i32> @test3_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 77; CHECK-LABEL: test3_x86_avx2_pblendd_256: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vmovaps %ymm1, %ymm0 80; CHECK-NEXT: retq 81 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 -1) 82 ret <8 x i32> %res 83} 84 85; 86; Demanded Elts 87; 88 89define <2 x i64> @demandedelts_vpsllvd(<2 x i64> %a0, <2 x i64> %a1) { 90; CHECK-LABEL: demandedelts_vpsllvd: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 93; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 94; CHECK-NEXT: retq 95 %shuffle = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> zeroinitializer 96 %shift = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %shuffle, <2 x i64> %a1) 97 %res = shufflevector <2 x i64> %shift, <2 x i64> undef, <2 x i32> zeroinitializer 98 ret <2 x i64> %res 99} 100 101define <4 x i32> @demandedelts_vpsravd(<4 x i32> %a0, <4 x i32> %a1) { 102; CHECK-LABEL: demandedelts_vpsravd: 103; CHECK: # %bb.0: 104; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 105; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 106; CHECK-NEXT: retq 107 %shuffle = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 1, i32 1> 108 %shift = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %shuffle, <4 x i32> %a1) 109 %res = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> zeroinitializer 110 ret <4 x i32> %res 111} 112 113define <4 x i64> @demandedelts_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) { 114; CHECK-LABEL: demandedelts_vpsrlvq: 115; CHECK: # %bb.0: 116; CHECK-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 117; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 118; CHECK-NEXT: retq 119 %shuffle = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> zeroinitializer 120 %shift = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %shuffle) 121 %result = shufflevector <4 x i64> %shift, <4 x i64> undef, <4 x i32> zeroinitializer 122 ret <4 x i64> %result 123} 124 125; 126; isBinOp Handling 127; 128 129define <4 x i32> @binop_shuffle_vpsllvd(<4 x i32> %a0, <4 x i32> %a1) { 130; CHECK-LABEL: binop_shuffle_vpsllvd: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 133; CHECK-NEXT: retq 134 %shuffle0 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 135 %shuffle1 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 136 %shift = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %shuffle0, <4 x i32> %shuffle1) 137 %res = shufflevector <4 x i32> %shift, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 138 ret <4 x i32> %res 139} 140 141define <8 x i32> @binop_shuffle_vpsravd(<8 x i32> %a0, <8 x i32> %a1) { 142; CHECK-LABEL: binop_shuffle_vpsravd: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0 145; CHECK-NEXT: retq 146 %shuffle0 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 147 %shuffle1 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 148 %shift = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %shuffle0, <8 x i32> %shuffle1) 149 %res = shufflevector <8 x i32> %shift, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 150 ret <8 x i32> %res 151} 152 153define <4 x i64> @binop_shuffle_vpsrlvq(<4 x i64> %a0, <4 x i64> %a1) { 154; CHECK-LABEL: binop_shuffle_vpsrlvq: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 157; CHECK-NEXT: retq 158 %shuffle0 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 159 %shuffle1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 160 %shift = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %shuffle0, <4 x i64> %shuffle1) 161 %res = shufflevector <4 x i64> %shift, <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 162 ret <4 x i64> %res 163} 164 165declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i32) 166declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) 167declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) 168 169declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 170declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 171declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 172declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 173declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 174 175declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 176declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 177declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 178declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 179declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 180