1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX 3; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL 4; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL 6 7define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) { 8; AVX2-LABEL: test_x86_avx2_packssdw: 9; AVX2: # %bb.0: 10; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x6b,0xc1] 11; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 12; 13; AVX512VL-LABEL: test_x86_avx2_packssdw: 14; AVX512VL: # %bb.0: 15; AVX512VL-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1] 16; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 17 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 18 ret <16 x i16> %res 19} 20declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone 21 22 23define <16 x i16> @test_x86_avx2_packssdw_fold() { 24; X86-AVX-LABEL: test_x86_avx2_packssdw_fold: 25; X86-AVX: # %bb.0: 26; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 27; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 28; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 29; X86-AVX-NEXT: retl # encoding: [0xc3] 30; 31; X86-AVX512VL-LABEL: test_x86_avx2_packssdw_fold: 32; X86-AVX512VL: # %bb.0: 33; X86-AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 34; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 35; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 36; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 37; 38; X64-AVX-LABEL: test_x86_avx2_packssdw_fold: 39; X64-AVX: # %bb.0: 40; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 41; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 42; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 43; X64-AVX-NEXT: retq # encoding: [0xc3] 44; 45; X64-AVX512VL-LABEL: test_x86_avx2_packssdw_fold: 46; X64-AVX512VL: # %bb.0: 47; X64-AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280] 48; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 49; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 50; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 51 %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) 52 ret <16 x i16> %res 53} 54 55 56define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) { 57; AVX2-LABEL: test_x86_avx2_packsswb: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x63,0xc1] 60; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 61; 62; AVX512VL-LABEL: test_x86_avx2_packsswb: 63; AVX512VL: # %bb.0: 64; AVX512VL-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1] 65; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 66 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 67 ret <32 x i8> %res 68} 69declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone 70 71 72define <32 x i8> @test_x86_avx2_packsswb_fold() { 73; X86-AVX-LABEL: test_x86_avx2_packsswb_fold: 74; X86-AVX: # %bb.0: 75; X86-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 76; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 77; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 78; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 79; X86-AVX-NEXT: retl # encoding: [0xc3] 80; 81; X86-AVX512VL-LABEL: test_x86_avx2_packsswb_fold: 82; X86-AVX512VL: # %bb.0: 83; X86-AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 84; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 85; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 86; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 87; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 88; 89; X64-AVX-LABEL: test_x86_avx2_packsswb_fold: 90; X64-AVX: # %bb.0: 91; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 92; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 93; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 94; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 95; X64-AVX-NEXT: retq # encoding: [0xc3] 96; 97; X64-AVX512VL-LABEL: test_x86_avx2_packsswb_fold: 98; X64-AVX512VL: # %bb.0: 99; X64-AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0] 100; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 101; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 102; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 103; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 104 %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer) 105 ret <32 x i8> %res 106} 107 108 109define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) { 110; AVX2-LABEL: test_x86_avx2_packuswb: 111; AVX2: # %bb.0: 112; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x67,0xc1] 113; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 114; 115; AVX512VL-LABEL: test_x86_avx2_packuswb: 116; AVX512VL: # %bb.0: 117; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1] 118; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 119 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1] 120 ret <32 x i8> %res 121} 122declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone 123 124 125define <32 x i8> @test_x86_avx2_packuswb_fold() { 126; X86-AVX-LABEL: test_x86_avx2_packuswb_fold: 127; X86-AVX: # %bb.0: 128; X86-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 129; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 130; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 131; X86-AVX-NEXT: # ymm0 = mem[0,1,0,1] 132; X86-AVX-NEXT: retl # encoding: [0xc3] 133; 134; X86-AVX512VL-LABEL: test_x86_avx2_packuswb_fold: 135; X86-AVX512VL: # %bb.0: 136; X86-AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 137; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 138; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 139; X86-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 140; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 141; 142; X64-AVX-LABEL: test_x86_avx2_packuswb_fold: 143; X64-AVX: # %bb.0: 144; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 145; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 146; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 147; X64-AVX-NEXT: # ymm0 = mem[0,1,0,1] 148; X64-AVX-NEXT: retq # encoding: [0xc3] 149; 150; X64-AVX512VL-LABEL: test_x86_avx2_packuswb_fold: 151; X64-AVX512VL: # %bb.0: 152; X64-AVX512VL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0] 153; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x05,A,A,A,A] 154; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 155; X64-AVX512VL-NEXT: # ymm0 = mem[0,1,0,1] 156; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 157 %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer) 158 ret <32 x i8> %res 159} 160 161 162define <32 x i8> @test_x86_avx2_pavg_b(<32 x i8> %a0, <32 x i8> %a1) { 163; AVX2-LABEL: test_x86_avx2_pavg_b: 164; AVX2: # %bb.0: 165; AVX2-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe0,0xc1] 166; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 167; 168; AVX512VL-LABEL: test_x86_avx2_pavg_b: 169; AVX512VL: # %bb.0: 170; AVX512VL-NEXT: vpavgb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xc1] 171; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 172 %res = call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 173 ret <32 x i8> %res 174} 175declare <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8>, <32 x i8>) nounwind readnone 176 177 178define <16 x i16> @test_x86_avx2_pavg_w(<16 x i16> %a0, <16 x i16> %a1) { 179; AVX2-LABEL: test_x86_avx2_pavg_w: 180; AVX2: # %bb.0: 181; AVX2-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe3,0xc1] 182; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 183; 184; AVX512VL-LABEL: test_x86_avx2_pavg_w: 185; AVX512VL: # %bb.0: 186; AVX512VL-NEXT: vpavgw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xc1] 187; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 188 %res = call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 189 ret <16 x i16> %res 190} 191declare <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16>, <16 x i16>) nounwind readnone 192 193 194define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { 195; AVX2-LABEL: test_x86_avx2_pmadd_wd: 196; AVX2: # %bb.0: 197; AVX2-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf5,0xc1] 198; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 199; 200; AVX512VL-LABEL: test_x86_avx2_pmadd_wd: 201; AVX512VL: # %bb.0: 202; AVX512VL-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1] 203; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 204 %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1] 205 ret <8 x i32> %res 206} 207declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone 208 209 210define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) { 211; CHECK-LABEL: test_x86_avx2_pmovmskb: 212; CHECK: # %bb.0: 213; CHECK-NEXT: vpmovmskb %ymm0, %eax # encoding: [0xc5,0xfd,0xd7,0xc0] 214; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 215; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 216 %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1] 217 ret i32 %res 218} 219declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone 220 221 222define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) { 223; AVX2-LABEL: test_x86_avx2_pmulh_w: 224; AVX2: # %bb.0: 225; AVX2-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe5,0xc1] 226; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 227; 228; AVX512VL-LABEL: test_x86_avx2_pmulh_w: 229; AVX512VL: # %bb.0: 230; AVX512VL-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1] 231; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 232 %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 233 ret <16 x i16> %res 234} 235declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone 236 237 238define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) { 239; AVX2-LABEL: test_x86_avx2_pmulhu_w: 240; AVX2: # %bb.0: 241; AVX2-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe4,0xc1] 242; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 243; 244; AVX512VL-LABEL: test_x86_avx2_pmulhu_w: 245; AVX512VL: # %bb.0: 246; AVX512VL-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1] 247; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 248 %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 249 ret <16 x i16> %res 250} 251declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone 252 253 254define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) { 255; AVX2-LABEL: test_x86_avx2_psad_bw: 256; AVX2: # %bb.0: 257; AVX2-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf6,0xc1] 258; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 259; 260; AVX512VL-LABEL: test_x86_avx2_psad_bw: 261; AVX512VL: # %bb.0: 262; AVX512VL-NEXT: vpsadbw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1] 263; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 264 %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1] 265 ret <4 x i64> %res 266} 267declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone 268 269 270define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) { 271; AVX2-LABEL: test_x86_avx2_psll_d: 272; AVX2: # %bb.0: 273; AVX2-NEXT: vpslld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf2,0xc1] 274; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 275; 276; AVX512VL-LABEL: test_x86_avx2_psll_d: 277; AVX512VL: # %bb.0: 278; AVX512VL-NEXT: vpslld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1] 279; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 280 %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 281 ret <8 x i32> %res 282} 283declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone 284 285 286define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) { 287; AVX2-LABEL: test_x86_avx2_psll_q: 288; AVX2: # %bb.0: 289; AVX2-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf3,0xc1] 290; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 291; 292; AVX512VL-LABEL: test_x86_avx2_psll_q: 293; AVX512VL: # %bb.0: 294; AVX512VL-NEXT: vpsllq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1] 295; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 296 %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 297 ret <4 x i64> %res 298} 299declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone 300 301 302define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) { 303; AVX2-LABEL: test_x86_avx2_psll_w: 304; AVX2: # %bb.0: 305; AVX2-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xf1,0xc1] 306; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 307; 308; AVX512VL-LABEL: test_x86_avx2_psll_w: 309; AVX512VL: # %bb.0: 310; AVX512VL-NEXT: vpsllw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1] 311; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 312 %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 313 ret <16 x i16> %res 314} 315declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone 316 317 318define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) { 319; AVX2-LABEL: test_x86_avx2_pslli_d: 320; AVX2: # %bb.0: 321; AVX2-NEXT: vpslld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xf0,0x07] 322; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 323; 324; AVX512VL-LABEL: test_x86_avx2_pslli_d: 325; AVX512VL: # %bb.0: 326; AVX512VL-NEXT: vpslld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07] 327; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 328 %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 329 ret <8 x i32> %res 330} 331declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone 332 333 334define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) { 335; AVX2-LABEL: test_x86_avx2_pslli_q: 336; AVX2: # %bb.0: 337; AVX2-NEXT: vpsllq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xf0,0x07] 338; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 339; 340; AVX512VL-LABEL: test_x86_avx2_pslli_q: 341; AVX512VL: # %bb.0: 342; AVX512VL-NEXT: vpsllq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07] 343; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 344 %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 345 ret <4 x i64> %res 346} 347declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone 348 349 350define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) { 351; AVX2-LABEL: test_x86_avx2_pslli_w: 352; AVX2: # %bb.0: 353; AVX2-NEXT: vpsllw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xf0,0x07] 354; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 355; 356; AVX512VL-LABEL: test_x86_avx2_pslli_w: 357; AVX512VL: # %bb.0: 358; AVX512VL-NEXT: vpsllw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07] 359; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 360 %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 361 ret <16 x i16> %res 362} 363declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone 364 365 366define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) { 367; AVX2-LABEL: test_x86_avx2_psra_d: 368; AVX2: # %bb.0: 369; AVX2-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe2,0xc1] 370; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 371; 372; AVX512VL-LABEL: test_x86_avx2_psra_d: 373; AVX512VL: # %bb.0: 374; AVX512VL-NEXT: vpsrad %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1] 375; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 376 %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 377 ret <8 x i32> %res 378} 379declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone 380 381 382define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) { 383; AVX2-LABEL: test_x86_avx2_psra_w: 384; AVX2: # %bb.0: 385; AVX2-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xe1,0xc1] 386; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 387; 388; AVX512VL-LABEL: test_x86_avx2_psra_w: 389; AVX512VL: # %bb.0: 390; AVX512VL-NEXT: vpsraw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1] 391; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 392 %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 393 ret <16 x i16> %res 394} 395declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone 396 397 398define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) { 399; AVX2-LABEL: test_x86_avx2_psrai_d: 400; AVX2: # %bb.0: 401; AVX2-NEXT: vpsrad $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xe0,0x07] 402; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 403; 404; AVX512VL-LABEL: test_x86_avx2_psrai_d: 405; AVX512VL: # %bb.0: 406; AVX512VL-NEXT: vpsrad $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07] 407; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 408 %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 409 ret <8 x i32> %res 410} 411declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone 412 413 414define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) { 415; AVX2-LABEL: test_x86_avx2_psrai_w: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vpsraw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xe0,0x07] 418; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 419; 420; AVX512VL-LABEL: test_x86_avx2_psrai_w: 421; AVX512VL: # %bb.0: 422; AVX512VL-NEXT: vpsraw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07] 423; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 424 %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 425 ret <16 x i16> %res 426} 427declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone 428 429 430define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) { 431; AVX2-LABEL: test_x86_avx2_psrl_d: 432; AVX2: # %bb.0: 433; AVX2-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd2,0xc1] 434; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 435; 436; AVX512VL-LABEL: test_x86_avx2_psrl_d: 437; AVX512VL: # %bb.0: 438; AVX512VL-NEXT: vpsrld %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1] 439; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 440 %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1] 441 ret <8 x i32> %res 442} 443declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone 444 445 446define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) { 447; AVX2-LABEL: test_x86_avx2_psrl_q: 448; AVX2: # %bb.0: 449; AVX2-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd3,0xc1] 450; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 451; 452; AVX512VL-LABEL: test_x86_avx2_psrl_q: 453; AVX512VL: # %bb.0: 454; AVX512VL-NEXT: vpsrlq %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1] 455; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 456 %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1] 457 ret <4 x i64> %res 458} 459declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone 460 461 462define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) { 463; AVX2-LABEL: test_x86_avx2_psrl_w: 464; AVX2: # %bb.0: 465; AVX2-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0xc1] 466; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 467; 468; AVX512VL-LABEL: test_x86_avx2_psrl_w: 469; AVX512VL: # %bb.0: 470; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] 471; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 472 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 473 ret <16 x i16> %res 474} 475declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone 476 477 478define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, ptr %p) { 479; X86-AVX-LABEL: test_x86_avx2_psrl_w_load: 480; X86-AVX: # %bb.0: 481; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 482; X86-AVX-NEXT: vpsrlw (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x00] 483; X86-AVX-NEXT: retl # encoding: [0xc3] 484; 485; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: 486; X86-AVX512VL: # %bb.0: 487; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 488; X86-AVX512VL-NEXT: vpsrlw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x00] 489; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 490; 491; X64-AVX-LABEL: test_x86_avx2_psrl_w_load: 492; X64-AVX: # %bb.0: 493; X64-AVX-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xd1,0x07] 494; X64-AVX-NEXT: retq # encoding: [0xc3] 495; 496; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: 497; X64-AVX512VL: # %bb.0: 498; X64-AVX512VL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x07] 499; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 500 %a1 = load <8 x i16>, ptr %p 501 %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] 502 ret <16 x i16> %res 503} 504 505 506define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) { 507; AVX2-LABEL: test_x86_avx2_psrli_d: 508; AVX2: # %bb.0: 509; AVX2-NEXT: vpsrld $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x72,0xd0,0x07] 510; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 511; 512; AVX512VL-LABEL: test_x86_avx2_psrli_d: 513; AVX512VL: # %bb.0: 514; AVX512VL-NEXT: vpsrld $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07] 515; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 516 %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1] 517 ret <8 x i32> %res 518} 519declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone 520 521 522define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) { 523; AVX2-LABEL: test_x86_avx2_psrli_q: 524; AVX2: # %bb.0: 525; AVX2-NEXT: vpsrlq $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x73,0xd0,0x07] 526; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 527; 528; AVX512VL-LABEL: test_x86_avx2_psrli_q: 529; AVX512VL: # %bb.0: 530; AVX512VL-NEXT: vpsrlq $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07] 531; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 532 %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1] 533 ret <4 x i64> %res 534} 535declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone 536 537 538define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { 539; AVX2-LABEL: test_x86_avx2_psrli_w: 540; AVX2: # %bb.0: 541; AVX2-NEXT: vpsrlw $7, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x71,0xd0,0x07] 542; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 543; 544; AVX512VL-LABEL: test_x86_avx2_psrli_w: 545; AVX512VL: # %bb.0: 546; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07] 547; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 548 %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1] 549 ret <16 x i16> %res 550} 551declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone 552 553 554define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { 555; CHECK-LABEL: test_x86_avx2_phadd_d: 556; CHECK: # %bb.0: 557; CHECK-NEXT: vphaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x02,0xc1] 558; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 559 %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 560 ret <8 x i32> %res 561} 562declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone 563 564 565define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) { 566; CHECK-LABEL: test_x86_avx2_phadd_sw: 567; CHECK: # %bb.0: 568; CHECK-NEXT: vphaddsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x03,0xc1] 569; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 570 %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 571 ret <16 x i16> %res 572} 573declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone 574 575 576define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) { 577; CHECK-LABEL: test_x86_avx2_phadd_w: 578; CHECK: # %bb.0: 579; CHECK-NEXT: vphaddw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x01,0xc1] 580; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 581 %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 582 ret <16 x i16> %res 583} 584declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone 585 586 587define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) { 588; CHECK-LABEL: test_x86_avx2_phsub_d: 589; CHECK: # %bb.0: 590; CHECK-NEXT: vphsubd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x06,0xc1] 591; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 592 %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 593 ret <8 x i32> %res 594} 595declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone 596 597 598define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) { 599; CHECK-LABEL: test_x86_avx2_phsub_sw: 600; CHECK: # %bb.0: 601; CHECK-NEXT: vphsubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x07,0xc1] 602; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 603 %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 604 ret <16 x i16> %res 605} 606declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone 607 608 609define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) { 610; CHECK-LABEL: test_x86_avx2_phsub_w: 611; CHECK: # %bb.0: 612; CHECK-NEXT: vphsubw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x05,0xc1] 613; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 614 %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 615 ret <16 x i16> %res 616} 617declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone 618 619 620define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) { 621; AVX2-LABEL: test_x86_avx2_pmadd_ub_sw: 622; AVX2: # %bb.0: 623; AVX2-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 624; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 625; 626; AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw: 627; AVX512VL: # %bb.0: 628; AVX512VL-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1] 629; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 630 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 631 ret <16 x i16> %res 632} 633declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone 634 635; Make sure we don't commute this operation. 636define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) { 637; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 638; X86-AVX: # %bb.0: 639; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 640; X86-AVX-NEXT: vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08] 641; X86-AVX-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0] 642; X86-AVX-NEXT: retl # encoding: [0xc3] 643; 644; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 645; X86-AVX512VL: # %bb.0: 646; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 647; X86-AVX512VL-NEXT: vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08] 648; X86-AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0] 649; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 650; 651; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 652; X64-AVX: # %bb.0: 653; X64-AVX-NEXT: vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f] 654; X64-AVX-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x04,0xc0] 655; X64-AVX-NEXT: retq # encoding: [0xc3] 656; 657; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0: 658; X64-AVX512VL: # %bb.0: 659; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f] 660; X64-AVX512VL-NEXT: vpmaddubsw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0] 661; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 662 %a0 = load <32 x i8>, ptr %ptr 663 %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1] 664 ret <16 x i16> %res 665} 666 667define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) { 668; AVX2-LABEL: test_x86_avx2_pmul_hr_sw: 669; AVX2: # %bb.0: 670; AVX2-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 671; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 672; 673; AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw: 674; AVX512VL: # %bb.0: 675; AVX512VL-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1] 676; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 677 %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 678 ret <16 x i16> %res 679} 680declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone 681 682 683define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) { 684; AVX2-LABEL: test_x86_avx2_pshuf_b: 685; AVX2: # %bb.0: 686; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 687; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 688; 689; AVX512VL-LABEL: test_x86_avx2_pshuf_b: 690; AVX512VL: # %bb.0: 691; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1] 692; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 693 %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1] 694 ret <32 x i8> %res 695} 696declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone 697 698 699define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) { 700; CHECK-LABEL: test_x86_avx2_psign_b: 701; CHECK: # %bb.0: 702; CHECK-NEXT: vpsignb %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x08,0xc1] 703; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 704 %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] 705 ret <32 x i8> %res 706} 707declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone 708 709 710define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) { 711; CHECK-LABEL: test_x86_avx2_psign_d: 712; CHECK: # %bb.0: 713; CHECK-NEXT: vpsignd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x0a,0xc1] 714; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 715 %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1] 716 ret <8 x i32> %res 717} 718declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone 719 720 721define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) { 722; CHECK-LABEL: test_x86_avx2_psign_w: 723; CHECK: # %bb.0: 724; CHECK-NEXT: vpsignw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x09,0xc1] 725; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 726 %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] 727 ret <16 x i16> %res 728} 729declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone 730 731 732define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) { 733; CHECK-LABEL: test_x86_avx2_mpsadbw: 734; CHECK: # %bb.0: 735; CHECK-NEXT: vmpsadbw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07] 736; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 737 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] 738 ret <16 x i16> %res 739} 740declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone 741 742; We shouldn't commute this operation to fold the load. 743define <16 x i16> @test_x86_avx2_mpsadbw_load_op0(ptr %ptr, <32 x i8> %a1) { 744; X86-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0: 745; X86-AVX: # %bb.0: 746; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 747; X86-AVX-NEXT: vmovdqa (%eax), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x08] 748; X86-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] 749; X86-AVX-NEXT: retl # encoding: [0xc3] 750; 751; X86-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0: 752; X86-AVX512VL: # %bb.0: 753; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 754; X86-AVX512VL-NEXT: vmovdqa (%eax), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08] 755; X86-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] 756; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 757; 758; X64-AVX-LABEL: test_x86_avx2_mpsadbw_load_op0: 759; X64-AVX: # %bb.0: 760; X64-AVX-NEXT: vmovdqa (%rdi), %ymm1 # encoding: [0xc5,0xfd,0x6f,0x0f] 761; X64-AVX-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] 762; X64-AVX-NEXT: retq # encoding: [0xc3] 763; 764; X64-AVX512VL-LABEL: test_x86_avx2_mpsadbw_load_op0: 765; X64-AVX512VL: # %bb.0: 766; X64-AVX512VL-NEXT: vmovdqa (%rdi), %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f] 767; X64-AVX512VL-NEXT: vmpsadbw $7, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x42,0xc0,0x07] 768; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 769 %a0 = load <32 x i8>, ptr %ptr 770 %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1] 771 ret <16 x i16> %res 772} 773 774define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) { 775; AVX2-LABEL: test_x86_avx2_packusdw: 776; AVX2: # %bb.0: 777; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 778; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 779; 780; AVX512VL-LABEL: test_x86_avx2_packusdw: 781; AVX512VL: # %bb.0: 782; AVX512VL-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1] 783; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 784 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1] 785 ret <16 x i16> %res 786} 787declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone 788 789 790define <16 x i16> @test_x86_avx2_packusdw_fold() { 791; X86-AVX-LABEL: test_x86_avx2_packusdw_fold: 792; X86-AVX: # %bb.0: 793; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 794; X86-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 795; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 796; X86-AVX-NEXT: retl # encoding: [0xc3] 797; 798; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: 799; X86-AVX512VL: # %bb.0: 800; X86-AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 801; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 802; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 803; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 804; 805; X64-AVX-LABEL: test_x86_avx2_packusdw_fold: 806; X64-AVX: # %bb.0: 807; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 808; X64-AVX-NEXT: # encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 809; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 810; X64-AVX-NEXT: retq # encoding: [0xc3] 811; 812; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: 813; X64-AVX512VL: # %bb.0: 814; X64-AVX512VL-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] 815; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] 816; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 817; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 818 %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) 819 ret <16 x i16> %res 820} 821 822 823define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { 824; CHECK-LABEL: test_x86_avx2_pblendvb: 825; CHECK: # %bb.0: 826; CHECK-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20] 827; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 828 %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1] 829 ret <32 x i8> %res 830} 831declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone 832 833 834define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) { 835; CHECK-LABEL: test_x86_avx2_pblendw: 836; CHECK: # %bb.0: 837; CHECK-NEXT: vpblendw $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07] 838; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15] 839; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 840 %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1] 841 ret <16 x i16> %res 842} 843declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone 844 845 846define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) { 847; CHECK-LABEL: test_x86_avx2_pblendd_128: 848; CHECK: # %bb.0: 849; CHECK-NEXT: vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08] 850; CHECK-NEXT: # xmm0 = xmm1[0,1,2],xmm0[3] 851; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 852 %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1] 853 ret <4 x i32> %res 854} 855declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone 856 857 858define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) { 859; CHECK-LABEL: test_x86_avx2_pblendd_256: 860; CHECK: # %bb.0: 861; CHECK-NEXT: vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07] 862; CHECK-NEXT: # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7] 863; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] 864 %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1] 865 ret <8 x i32> %res 866} 867declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 868 869 870; Check that the arguments are swapped between the intrinsic definition 871; and its lowering. Indeed, the offsets are the first source in 872; the instruction. 873define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) { 874; AVX2-LABEL: test_x86_avx2_permd: 875; AVX2: # %bb.0: 876; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0] 877; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 878; 879; AVX512VL-LABEL: test_x86_avx2_permd: 880; AVX512VL: # %bb.0: 881; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 882; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 883 %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 884 ret <8 x i32> %res 885} 886declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly 887 888 889; Check that the arguments are swapped between the intrinsic definition 890; and its lowering. Indeed, the offsets are the first source in 891; the instruction. 892define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) { 893; AVX2-LABEL: test_x86_avx2_permps: 894; AVX2: # %bb.0: 895; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0x16,0xc0] 896; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 897; 898; AVX512VL-LABEL: test_x86_avx2_permps: 899; AVX512VL: # %bb.0: 900; AVX512VL-NEXT: vpermps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0] 901; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 902 %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1] 903 ret <8 x float> %res 904} 905declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly 906 907 908define <2 x i64> @test_x86_avx2_maskload_q(ptr %a0, <2 x i64> %a1) { 909; X86-LABEL: test_x86_avx2_maskload_q: 910; X86: # %bb.0: 911; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 912; X86-NEXT: vpmaskmovq (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x00] 913; X86-NEXT: retl # encoding: [0xc3] 914; 915; X64-LABEL: test_x86_avx2_maskload_q: 916; X64: # %bb.0: 917; X64-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x8c,0x07] 918; X64-NEXT: retq # encoding: [0xc3] 919 %res = call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 920 ret <2 x i64> %res 921} 922declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>) nounwind readonly 923 924 925define <4 x i64> @test_x86_avx2_maskload_q_256(ptr %a0, <4 x i64> %a1) { 926; X86-LABEL: test_x86_avx2_maskload_q_256: 927; X86: # %bb.0: 928; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 929; X86-NEXT: vpmaskmovq (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x00] 930; X86-NEXT: retl # encoding: [0xc3] 931; 932; X64-LABEL: test_x86_avx2_maskload_q_256: 933; X64: # %bb.0: 934; X64-NEXT: vpmaskmovq (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x8c,0x07] 935; X64-NEXT: retq # encoding: [0xc3] 936 %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 937 ret <4 x i64> %res 938} 939declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>) nounwind readonly 940 941 942define <4 x i32> @test_x86_avx2_maskload_d(ptr %a0, <4 x i32> %a1) { 943; X86-LABEL: test_x86_avx2_maskload_d: 944; X86: # %bb.0: 945; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 946; X86-NEXT: vpmaskmovd (%eax), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x00] 947; X86-NEXT: retl # encoding: [0xc3] 948; 949; X64-LABEL: test_x86_avx2_maskload_d: 950; X64: # %bb.0: 951; X64-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x8c,0x07] 952; X64-NEXT: retq # encoding: [0xc3] 953 %res = call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 954 ret <4 x i32> %res 955} 956declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>) nounwind readonly 957 958 959define <8 x i32> @test_x86_avx2_maskload_d_256(ptr %a0, <8 x i32> %a1) { 960; X86-LABEL: test_x86_avx2_maskload_d_256: 961; X86: # %bb.0: 962; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 963; X86-NEXT: vpmaskmovd (%eax), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x00] 964; X86-NEXT: retl # encoding: [0xc3] 965; 966; X64-LABEL: test_x86_avx2_maskload_d_256: 967; X64: # %bb.0: 968; X64-NEXT: vpmaskmovd (%rdi), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x8c,0x07] 969; X64-NEXT: retq # encoding: [0xc3] 970 %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 971 ret <8 x i32> %res 972} 973declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>) nounwind readonly 974 975 976define void @test_x86_avx2_maskstore_q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) { 977; X86-LABEL: test_x86_avx2_maskstore_q: 978; X86: # %bb.0: 979; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 980; X86-NEXT: vpmaskmovq %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0xf9,0x8e,0x08] 981; X86-NEXT: retl # encoding: [0xc3] 982; 983; X64-LABEL: test_x86_avx2_maskstore_q: 984; X64: # %bb.0: 985; X64-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0xf9,0x8e,0x0f] 986; X64-NEXT: retq # encoding: [0xc3] 987 call void @llvm.x86.avx2.maskstore.q(ptr %a0, <2 x i64> %a1, <2 x i64> %a2) 988 ret void 989} 990declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>) nounwind 991 992 993define void @test_x86_avx2_maskstore_q_256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) { 994; X86-LABEL: test_x86_avx2_maskstore_q_256: 995; X86: # %bb.0: 996; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 997; X86-NEXT: vpmaskmovq %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0xfd,0x8e,0x08] 998; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 999; X86-NEXT: retl # encoding: [0xc3] 1000; 1001; X64-LABEL: test_x86_avx2_maskstore_q_256: 1002; X64: # %bb.0: 1003; X64-NEXT: vpmaskmovq %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0xfd,0x8e,0x0f] 1004; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1005; X64-NEXT: retq # encoding: [0xc3] 1006 call void @llvm.x86.avx2.maskstore.q.256(ptr %a0, <4 x i64> %a1, <4 x i64> %a2) 1007 ret void 1008} 1009declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>) nounwind 1010 1011 1012define void @test_x86_avx2_maskstore_d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) { 1013; X86-LABEL: test_x86_avx2_maskstore_d: 1014; X86: # %bb.0: 1015; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1016; X86-NEXT: vpmaskmovd %xmm1, %xmm0, (%eax) # encoding: [0xc4,0xe2,0x79,0x8e,0x08] 1017; X86-NEXT: retl # encoding: [0xc3] 1018; 1019; X64-LABEL: test_x86_avx2_maskstore_d: 1020; X64: # %bb.0: 1021; X64-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # encoding: [0xc4,0xe2,0x79,0x8e,0x0f] 1022; X64-NEXT: retq # encoding: [0xc3] 1023 call void @llvm.x86.avx2.maskstore.d(ptr %a0, <4 x i32> %a1, <4 x i32> %a2) 1024 ret void 1025} 1026declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>) nounwind 1027 1028 1029define void @test_x86_avx2_maskstore_d_256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) { 1030; X86-LABEL: test_x86_avx2_maskstore_d_256: 1031; X86: # %bb.0: 1032; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1033; X86-NEXT: vpmaskmovd %ymm1, %ymm0, (%eax) # encoding: [0xc4,0xe2,0x7d,0x8e,0x08] 1034; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1035; X86-NEXT: retl # encoding: [0xc3] 1036; 1037; X64-LABEL: test_x86_avx2_maskstore_d_256: 1038; X64: # %bb.0: 1039; X64-NEXT: vpmaskmovd %ymm1, %ymm0, (%rdi) # encoding: [0xc4,0xe2,0x7d,0x8e,0x0f] 1040; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1041; X64-NEXT: retq # encoding: [0xc3] 1042 call void @llvm.x86.avx2.maskstore.d.256(ptr %a0, <8 x i32> %a1, <8 x i32> %a2) 1043 ret void 1044} 1045declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>) nounwind 1046 1047 1048define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) { 1049; AVX2-LABEL: test_x86_avx2_psllv_d: 1050; AVX2: # %bb.0: 1051; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0xc1] 1052; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1053; 1054; AVX512VL-LABEL: test_x86_avx2_psllv_d: 1055; AVX512VL: # %bb.0: 1056; AVX512VL-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1] 1057; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1058 %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1059 ret <4 x i32> %res 1060} 1061 1062define <4 x i32> @test_x86_avx2_psllv_d_const() { 1063; X86-AVX-LABEL: test_x86_avx2_psllv_d_const: 1064; X86-AVX: # %bb.0: 1065; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1066; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1067; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1068; X86-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1069; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1070; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295] 1071; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1072; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1073; X86-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1074; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1075; X86-AVX-NEXT: retl # encoding: [0xc3] 1076; 1077; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: 1078; X86-AVX512VL: # %bb.0: 1079; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1080; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1081; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1082; X86-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1083; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1084; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295] 1085; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1086; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1087; X86-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1088; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1089; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1090; 1091; X64-AVX-LABEL: test_x86_avx2_psllv_d_const: 1092; X64-AVX: # %bb.0: 1093; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1094; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1095; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1096; X64-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1097; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1098; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295] 1099; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1100; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1101; X64-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1102; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1103; X64-AVX-NEXT: retq # encoding: [0xc3] 1104; 1105; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: 1106; X64-AVX512VL: # %bb.0: 1107; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1108; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1109; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1110; X64-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] 1111; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1112; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,1,1,4294967295] 1113; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1114; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1115; X64-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] 1116; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1117; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1118 %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>) 1119 %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> <i32 1, i32 1, i32 1, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>) 1120 %res2 = add <4 x i32> %res0, %res1 1121 ret <4 x i32> %res2 1122} 1123declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone 1124 1125 1126define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1127; AVX2-LABEL: test_x86_avx2_psllv_d_256: 1128; AVX2: # %bb.0: 1129; AVX2-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0xc1] 1130; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1131; 1132; AVX512VL-LABEL: test_x86_avx2_psllv_d_256: 1133; AVX512VL: # %bb.0: 1134; AVX512VL-NEXT: vpsllvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1] 1135; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1136 %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1137 ret <8 x i32> %res 1138} 1139 1140define <8 x i32> @test_x86_avx2_psllv_d_256_const() { 1141; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const: 1142; X86-AVX: # %bb.0: 1143; X86-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1144; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1145; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1146; X86-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1147; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1148; X86-AVX-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1149; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1150; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1151; X86-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1152; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1153; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1154; X86-AVX-NEXT: retl # encoding: [0xc3] 1155; 1156; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: 1157; X86-AVX512VL: # %bb.0: 1158; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1159; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1160; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1161; X86-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1162; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1163; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1164; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1165; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1166; X86-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1167; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1168; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1169; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1170; 1171; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const: 1172; X64-AVX: # %bb.0: 1173; X64-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1174; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1175; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1176; X64-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1177; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1178; X64-AVX-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1179; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1180; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1181; X64-AVX-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1182; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1183; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1184; X64-AVX-NEXT: retq # encoding: [0xc3] 1185; 1186; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: 1187; X64-AVX512VL: # %bb.0: 1188; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1189; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1190; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1191; X64-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] 1192; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1193; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1194; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1195; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1196; X64-AVX512VL-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] 1197; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1198; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1199; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1200 %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>) 1201 %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>) 1202 %res2 = add <8 x i32> %res0, %res1 1203 ret <8 x i32> %res2 1204} 1205declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1206 1207 1208define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) { 1209; AVX2-LABEL: test_x86_avx2_psllv_q: 1210; AVX2: # %bb.0: 1211; AVX2-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0xc1] 1212; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1213; 1214; AVX512VL-LABEL: test_x86_avx2_psllv_q: 1215; AVX512VL: # %bb.0: 1216; AVX512VL-NEXT: vpsllvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1] 1217; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1218 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1219 ret <2 x i64> %res 1220} 1221define <2 x i64> @test_x86_avx2_psllv_q_const() { 1222; X86-AVX-LABEL: test_x86_avx2_psllv_q_const: 1223; X86-AVX: # %bb.0: 1224; X86-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615] 1225; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1226; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1227; X86-AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1228; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1229; X86-AVX-NEXT: retl # encoding: [0xc3] 1230; 1231; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: 1232; X86-AVX512VL: # %bb.0: 1233; X86-AVX512VL-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615] 1234; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1235; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1236; X86-AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1237; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1238; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1239; 1240; X64-AVX-LABEL: test_x86_avx2_psllv_q_const: 1241; X64-AVX: # %bb.0: 1242; X64-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615] 1243; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1244; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1245; X64-AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1246; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1247; X64-AVX-NEXT: retq # encoding: [0xc3] 1248; 1249; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: 1250; X64-AVX512VL: # %bb.0: 1251; X64-AVX512VL-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,18446744073709551615] 1252; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1253; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1254; X64-AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] 1255; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1256; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1257 %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> <i64 4, i64 -1>, <2 x i64> <i64 1, i64 -1>) 1258 ret <2 x i64> %res 1259} 1260declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone 1261 1262 1263define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 1264; AVX2-LABEL: test_x86_avx2_psllv_q_256: 1265; AVX2: # %bb.0: 1266; AVX2-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0xc1] 1267; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1268; 1269; AVX512VL-LABEL: test_x86_avx2_psllv_q_256: 1270; AVX512VL: # %bb.0: 1271; AVX512VL-NEXT: vpsllvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1] 1272; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1273 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 1274 ret <4 x i64> %res 1275} 1276 1277define <4 x i64> @test_x86_avx2_psllv_q_256_const() { 1278; X86-AVX-LABEL: test_x86_avx2_psllv_q_256_const: 1279; X86-AVX: # %bb.0: 1280; X86-AVX-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615] 1281; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1282; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1283; X86-AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1284; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1285; X86-AVX-NEXT: retl # encoding: [0xc3] 1286; 1287; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: 1288; X86-AVX512VL: # %bb.0: 1289; X86-AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615] 1290; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1291; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1292; X86-AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1293; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1294; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1295; 1296; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const: 1297; X64-AVX: # %bb.0: 1298; X64-AVX-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615] 1299; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1300; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1301; X64-AVX-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1302; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1303; X64-AVX-NEXT: retq # encoding: [0xc3] 1304; 1305; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: 1306; X64-AVX512VL: # %bb.0: 1307; X64-AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,18446744073709551615] 1308; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1309; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1310; X64-AVX512VL-NEXT: vpsllvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] 1311; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1312; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1313 %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 -1>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>) 1314 ret <4 x i64> %res 1315} 1316declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 1317 1318 1319define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) { 1320; AVX2-LABEL: test_x86_avx2_psrlv_d: 1321; AVX2: # %bb.0: 1322; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0xc1] 1323; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1324; 1325; AVX512VL-LABEL: test_x86_avx2_psrlv_d: 1326; AVX512VL: # %bb.0: 1327; AVX512VL-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1] 1328; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1329 %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1330 ret <4 x i32> %res 1331} 1332 1333define <4 x i32> @test_x86_avx2_psrlv_d_const() { 1334; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const: 1335; X86-AVX: # %bb.0: 1336; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1337; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1338; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1339; X86-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1340; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1341; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295] 1342; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1343; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1344; X86-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1345; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1346; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1347; X86-AVX-NEXT: retl # encoding: [0xc3] 1348; 1349; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: 1350; X86-AVX512VL: # %bb.0: 1351; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1352; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1353; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1354; X86-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1355; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1356; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295] 1357; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1358; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1359; X86-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1360; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1361; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1362; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1363; 1364; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const: 1365; X64-AVX: # %bb.0: 1366; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1367; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1368; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1369; X64-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1370; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1371; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295] 1372; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1373; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1374; X64-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1375; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1376; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] 1377; X64-AVX-NEXT: retq # encoding: [0xc3] 1378; 1379; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: 1380; X64-AVX512VL: # %bb.0: 1381; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,0,4294967295] 1382; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1383; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1384; X64-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] 1385; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1386; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm1 = [4,4,4,4294967295] 1387; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x0d,A,A,A,A] 1388; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1389; X64-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] 1390; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1391; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 1392; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1393 %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 2, i32 9, i32 0, i32 -1>, <4 x i32> <i32 1, i32 0, i32 33, i32 -1>) 1394 %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> <i32 4, i32 4, i32 4, i32 -1>, <4 x i32> <i32 1, i32 1, i32 1, i32 -1>) 1395 %res2 = add <4 x i32> %res0, %res1 1396 ret <4 x i32> %res2 1397} 1398declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone 1399 1400 1401define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1402; AVX2-LABEL: test_x86_avx2_psrlv_d_256: 1403; AVX2: # %bb.0: 1404; AVX2-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0xc1] 1405; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1406; 1407; AVX512VL-LABEL: test_x86_avx2_psrlv_d_256: 1408; AVX512VL: # %bb.0: 1409; AVX512VL-NEXT: vpsrlvd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1] 1410; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1411 %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1412 ret <8 x i32> %res 1413} 1414 1415define <8 x i32> @test_x86_avx2_psrlv_d_256_const() { 1416; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: 1417; X86-AVX: # %bb.0: 1418; X86-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1419; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1420; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1421; X86-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1422; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1423; X86-AVX-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1424; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1425; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1426; X86-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1427; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1428; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1429; X86-AVX-NEXT: retl # encoding: [0xc3] 1430; 1431; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: 1432; X86-AVX512VL: # %bb.0: 1433; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1434; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1435; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1436; X86-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1437; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1438; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1439; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1440; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1441; X86-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1442; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1443; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1444; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1445; 1446; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: 1447; X64-AVX: # %bb.0: 1448; X64-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1449; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1450; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1451; X64-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1452; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1453; X64-AVX-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1454; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1455; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1456; X64-AVX-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1457; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1458; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] 1459; X64-AVX-NEXT: retq # encoding: [0xc3] 1460; 1461; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: 1462; X64-AVX512VL: # %bb.0: 1463; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] 1464; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1465; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1466; X64-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] 1467; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1468; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] 1469; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x0d,A,A,A,A] 1470; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1471; X64-AVX512VL-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] 1472; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1473; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 1474; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1475 %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0>, <8 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2>) 1476 %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>) 1477 %res2 = add <8 x i32> %res0, %res1 1478 ret <8 x i32> %res2 1479} 1480declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1481 1482 1483define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) { 1484; AVX2-LABEL: test_x86_avx2_psrlv_q: 1485; AVX2: # %bb.0: 1486; AVX2-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0xc1] 1487; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1488; 1489; AVX512VL-LABEL: test_x86_avx2_psrlv_q: 1490; AVX512VL: # %bb.0: 1491; AVX512VL-NEXT: vpsrlvq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1] 1492; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1493 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] 1494 ret <2 x i64> %res 1495} 1496 1497define <2 x i64> @test_x86_avx2_psrlv_q_const() { 1498; X86-AVX-LABEL: test_x86_avx2_psrlv_q_const: 1499; X86-AVX: # %bb.0: 1500; X86-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,4] 1501; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1502; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1503; X86-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1504; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1505; X86-AVX-NEXT: retl # encoding: [0xc3] 1506; 1507; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: 1508; X86-AVX512VL: # %bb.0: 1509; X86-AVX512VL-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,4] 1510; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1511; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1512; X86-AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1513; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1514; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1515; 1516; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: 1517; X64-AVX: # %bb.0: 1518; X64-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [4,4] 1519; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x22,0x05,A,A,A,A] 1520; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1521; X64-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1522; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1523; X64-AVX-NEXT: retq # encoding: [0xc3] 1524; 1525; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: 1526; X64-AVX512VL: # %bb.0: 1527; X64-AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm0 = [4,4] 1528; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] 1529; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1530; X64-AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] 1531; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1532; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1533 %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> <i64 4, i64 4>, <2 x i64> <i64 1, i64 -1>) 1534 ret <2 x i64> %res 1535} 1536declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone 1537 1538 1539define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) { 1540; AVX2-LABEL: test_x86_avx2_psrlv_q_256: 1541; AVX2: # %bb.0: 1542; AVX2-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0xc1] 1543; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1544; 1545; AVX512VL-LABEL: test_x86_avx2_psrlv_q_256: 1546; AVX512VL: # %bb.0: 1547; AVX512VL-NEXT: vpsrlvq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1] 1548; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1549 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] 1550 ret <4 x i64> %res 1551} 1552 1553 1554define <4 x i64> @test_x86_avx2_psrlv_q_256_const() { 1555; X86-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: 1556; X86-AVX: # %bb.0: 1557; X86-AVX-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,4] 1558; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1559; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1560; X86-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1561; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1562; X86-AVX-NEXT: retl # encoding: [0xc3] 1563; 1564; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: 1565; X86-AVX512VL: # %bb.0: 1566; X86-AVX512VL-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,4,4,4] 1567; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0x05,A,A,A,A] 1568; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1569; X86-AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1570; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1571; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1572; 1573; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: 1574; X64-AVX: # %bb.0: 1575; X64-AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4] 1576; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] 1577; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1578; X64-AVX-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1579; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1580; X64-AVX-NEXT: retq # encoding: [0xc3] 1581; 1582; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: 1583; X64-AVX512VL: # %bb.0: 1584; X64-AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4] 1585; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] 1586; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1587; X64-AVX512VL-NEXT: vpsrlvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] 1588; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1589; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1590 %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> <i64 4, i64 4, i64 4, i64 4>, <4 x i64> <i64 1, i64 1, i64 1, i64 -1>) 1591 ret <4 x i64> %res 1592} 1593declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone 1594 1595 1596define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) { 1597; AVX2-LABEL: test_x86_avx2_psrav_d: 1598; AVX2: # %bb.0: 1599; AVX2-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0xc1] 1600; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1601; 1602; AVX512VL-LABEL: test_x86_avx2_psrav_d: 1603; AVX512VL: # %bb.0: 1604; AVX512VL-NEXT: vpsravd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1] 1605; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1606 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] 1607 ret <4 x i32> %res 1608} 1609 1610define <4 x i32> @test_x86_avx2_psrav_d_const() { 1611; X86-AVX-LABEL: test_x86_avx2_psrav_d_const: 1612; X86-AVX: # %bb.0: 1613; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23] 1614; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1615; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1616; X86-AVX-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1617; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1618; X86-AVX-NEXT: retl # encoding: [0xc3] 1619; 1620; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: 1621; X86-AVX512VL: # %bb.0: 1622; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23] 1623; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1624; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1625; X86-AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1626; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1627; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1628; 1629; X64-AVX-LABEL: test_x86_avx2_psrav_d_const: 1630; X64-AVX: # %bb.0: 1631; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23] 1632; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1633; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1634; X64-AVX-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1635; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1636; X64-AVX-NEXT: retq # encoding: [0xc3] 1637; 1638; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: 1639; X64-AVX512VL: # %bb.0: 1640; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,9,4294967284,23] 1641; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0x05,A,A,A,A] 1642; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1643; X64-AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] 1644; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1645; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1646 %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>) 1647 ret <4 x i32> %res 1648} 1649declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone 1650 1651define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { 1652; AVX2-LABEL: test_x86_avx2_psrav_d_256: 1653; AVX2: # %bb.0: 1654; AVX2-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0xc1] 1655; AVX2-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1656; 1657; AVX512VL-LABEL: test_x86_avx2_psrav_d_256: 1658; AVX512VL: # %bb.0: 1659; AVX512VL-NEXT: vpsravd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1] 1660; AVX512VL-NEXT: ret{{[l|q]}} # encoding: [0xc3] 1661 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] 1662 ret <8 x i32> %res 1663} 1664 1665define <8 x i32> @test_x86_avx2_psrav_d_256_const() { 1666; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const: 1667; X86-AVX: # %bb.0: 1668; X86-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1669; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1670; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1671; X86-AVX-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1672; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1673; X86-AVX-NEXT: retl # encoding: [0xc3] 1674; 1675; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: 1676; X86-AVX512VL: # %bb.0: 1677; X86-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1678; X86-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1679; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1680; X86-AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1681; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}, kind: FK_Data_4 1682; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 1683; 1684; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const: 1685; X64-AVX: # %bb.0: 1686; X64-AVX-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1687; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1688; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1689; X64-AVX-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1690; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1691; X64-AVX-NEXT: retq # encoding: [0xc3] 1692; 1693; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: 1694; X64-AVX512VL: # %bb.0: 1695; X64-AVX512VL-NEXT: vpmovsxbd {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] 1696; X64-AVX512VL-NEXT: # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0x05,A,A,A,A] 1697; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1698; X64-AVX512VL-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] 1699; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 1700; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 1701 %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>) 1702 ret <8 x i32> %res 1703} 1704declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone 1705 1706define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, ptr %a1, <4 x i32> %idx, <2 x double> %mask) { 1707; X86-LABEL: test_x86_avx2_gather_d_pd: 1708; X86: # %bb.0: 1709; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1710; X86-NEXT: vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x48] 1711; X86-NEXT: retl # encoding: [0xc3] 1712; 1713; X64-LABEL: test_x86_avx2_gather_d_pd: 1714; X64: # %bb.0: 1715; X64-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f] 1716; X64-NEXT: retq # encoding: [0xc3] 1717 %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0, 1718 ptr %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ; 1719 ret <2 x double> %res 1720} 1721declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, ptr, 1722 <4 x i32>, <2 x double>, i8) nounwind readonly 1723 1724define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, ptr %a1, <4 x i32> %idx, <4 x double> %mask) { 1725; X86-LABEL: test_x86_avx2_gather_d_pd_256: 1726; X86: # %bb.0: 1727; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1728; X86-NEXT: vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x48] 1729; X86-NEXT: retl # encoding: [0xc3] 1730; 1731; X64-LABEL: test_x86_avx2_gather_d_pd_256: 1732; X64: # %bb.0: 1733; X64-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f] 1734; X64-NEXT: retq # encoding: [0xc3] 1735 %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0, 1736 ptr %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ; 1737 ret <4 x double> %res 1738} 1739declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, ptr, 1740 <4 x i32>, <4 x double>, i8) nounwind readonly 1741 1742define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, ptr %a1, <2 x i64> %idx, <2 x double> %mask) { 1743; X86-LABEL: test_x86_avx2_gather_q_pd: 1744; X86: # %bb.0: 1745; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1746; X86-NEXT: vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x48] 1747; X86-NEXT: retl # encoding: [0xc3] 1748; 1749; X64-LABEL: test_x86_avx2_gather_q_pd: 1750; X64: # %bb.0: 1751; X64-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x4f] 1752; X64-NEXT: retq # encoding: [0xc3] 1753 %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0, 1754 ptr %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ; 1755 ret <2 x double> %res 1756} 1757declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, ptr, 1758 <2 x i64>, <2 x double>, i8) nounwind readonly 1759 1760define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, ptr %a1, <4 x i64> %idx, <4 x double> %mask) { 1761; X86-LABEL: test_x86_avx2_gather_q_pd_256: 1762; X86: # %bb.0: 1763; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1764; X86-NEXT: vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x48] 1765; X86-NEXT: retl # encoding: [0xc3] 1766; 1767; X64-LABEL: test_x86_avx2_gather_q_pd_256: 1768; X64: # %bb.0: 1769; X64-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x93,0x04,0x4f] 1770; X64-NEXT: retq # encoding: [0xc3] 1771 %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0, 1772 ptr %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ; 1773 ret <4 x double> %res 1774} 1775declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, ptr, 1776 <4 x i64>, <4 x double>, i8) nounwind readonly 1777 1778define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, ptr %a1, <4 x i32> %idx, <4 x float> %mask) { 1779; X86-LABEL: test_x86_avx2_gather_d_ps: 1780; X86: # %bb.0: 1781; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1782; X86-NEXT: vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48] 1783; X86-NEXT: retl # encoding: [0xc3] 1784; 1785; X64-LABEL: test_x86_avx2_gather_d_ps: 1786; X64: # %bb.0: 1787; X64-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x92,0x04,0x4f] 1788; X64-NEXT: retq # encoding: [0xc3] 1789 %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0, 1790 ptr %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ; 1791 ret <4 x float> %res 1792} 1793declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, ptr, 1794 <4 x i32>, <4 x float>, i8) nounwind readonly 1795 1796define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, ptr %a1, <8 x i32> %idx, <8 x float> %mask) { 1797; X86-LABEL: test_x86_avx2_gather_d_ps_256: 1798; X86: # %bb.0: 1799; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1800; X86-NEXT: vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x48] 1801; X86-NEXT: retl # encoding: [0xc3] 1802; 1803; X64-LABEL: test_x86_avx2_gather_d_ps_256: 1804; X64: # %bb.0: 1805; X64-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x4f] 1806; X64-NEXT: retq # encoding: [0xc3] 1807 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 1808 ptr %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ; 1809 ret <8 x float> %res 1810} 1811declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, ptr, 1812 <8 x i32>, <8 x float>, i8) nounwind readonly 1813 1814define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, ptr %a1, <2 x i64> %idx, <4 x float> %mask) { 1815; X86-LABEL: test_x86_avx2_gather_q_ps: 1816; X86: # %bb.0: 1817; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1818; X86-NEXT: vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x48] 1819; X86-NEXT: retl # encoding: [0xc3] 1820; 1821; X64-LABEL: test_x86_avx2_gather_q_ps: 1822; X64: # %bb.0: 1823; X64-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x93,0x04,0x4f] 1824; X64-NEXT: retq # encoding: [0xc3] 1825 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0, 1826 ptr %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ; 1827 ret <4 x float> %res 1828} 1829declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, ptr, 1830 <2 x i64>, <4 x float>, i8) nounwind readonly 1831 1832define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, ptr %a1, <4 x i64> %idx, <4 x float> %mask) { 1833; X86-LABEL: test_x86_avx2_gather_q_ps_256: 1834; X86: # %bb.0: 1835; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1836; X86-NEXT: vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x48] 1837; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1838; X86-NEXT: retl # encoding: [0xc3] 1839; 1840; X64-LABEL: test_x86_avx2_gather_q_ps_256: 1841; X64: # %bb.0: 1842; X64-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x4f] 1843; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1844; X64-NEXT: retq # encoding: [0xc3] 1845 %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0, 1846 ptr %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ; 1847 ret <4 x float> %res 1848} 1849declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, ptr, 1850 <4 x i64>, <4 x float>, i8) nounwind readonly 1851 1852define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, ptr %a1, <4 x i32> %idx, <2 x i64> %mask) { 1853; X86-LABEL: test_x86_avx2_gather_d_q: 1854; X86: # %bb.0: 1855; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1856; X86-NEXT: vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x48] 1857; X86-NEXT: retl # encoding: [0xc3] 1858; 1859; X64-LABEL: test_x86_avx2_gather_d_q: 1860; X64: # %bb.0: 1861; X64-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f] 1862; X64-NEXT: retq # encoding: [0xc3] 1863 %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0, 1864 ptr %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ; 1865 ret <2 x i64> %res 1866} 1867declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, ptr, 1868 <4 x i32>, <2 x i64>, i8) nounwind readonly 1869 1870define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, ptr %a1, <4 x i32> %idx, <4 x i64> %mask) { 1871; X86-LABEL: test_x86_avx2_gather_d_q_256: 1872; X86: # %bb.0: 1873; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1874; X86-NEXT: vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x48] 1875; X86-NEXT: retl # encoding: [0xc3] 1876; 1877; X64-LABEL: test_x86_avx2_gather_d_q_256: 1878; X64: # %bb.0: 1879; X64-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f] 1880; X64-NEXT: retq # encoding: [0xc3] 1881 %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0, 1882 ptr %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ; 1883 ret <4 x i64> %res 1884} 1885declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, ptr, 1886 <4 x i32>, <4 x i64>, i8) nounwind readonly 1887 1888define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i64> %mask) { 1889; X86-LABEL: test_x86_avx2_gather_q_q: 1890; X86: # %bb.0: 1891; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1892; X86-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 1893; X86-NEXT: retl # encoding: [0xc3] 1894; 1895; X64-LABEL: test_x86_avx2_gather_q_q: 1896; X64: # %bb.0: 1897; X64-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 1898; X64-NEXT: retq # encoding: [0xc3] 1899 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 1900 ptr %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ; 1901 ret <2 x i64> %res 1902} 1903declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, ptr, 1904 <2 x i64>, <2 x i64>, i8) nounwind readonly 1905 1906define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, ptr %a1, <4 x i64> %idx, <4 x i64> %mask) { 1907; X86-LABEL: test_x86_avx2_gather_q_q_256: 1908; X86: # %bb.0: 1909; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1910; X86-NEXT: vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x48] 1911; X86-NEXT: retl # encoding: [0xc3] 1912; 1913; X64-LABEL: test_x86_avx2_gather_q_q_256: 1914; X64: # %bb.0: 1915; X64-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0xed,0x91,0x04,0x4f] 1916; X64-NEXT: retq # encoding: [0xc3] 1917 %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0, 1918 ptr %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ; 1919 ret <4 x i64> %res 1920} 1921declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, ptr, 1922 <4 x i64>, <4 x i64>, i8) nounwind readonly 1923 1924define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, ptr %a1, <4 x i32> %idx, <4 x i32> %mask) { 1925; X86-LABEL: test_x86_avx2_gather_d_d: 1926; X86: # %bb.0: 1927; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1928; X86-NEXT: vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x48] 1929; X86-NEXT: retl # encoding: [0xc3] 1930; 1931; X64-LABEL: test_x86_avx2_gather_d_d: 1932; X64: # %bb.0: 1933; X64-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x90,0x04,0x4f] 1934; X64-NEXT: retq # encoding: [0xc3] 1935 %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0, 1936 ptr %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ; 1937 ret <4 x i32> %res 1938} 1939declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, ptr, 1940 <4 x i32>, <4 x i32>, i8) nounwind readonly 1941 1942define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, ptr %a1, <8 x i32> %idx, <8 x i32> %mask) { 1943; X86-LABEL: test_x86_avx2_gather_d_d_256: 1944; X86: # %bb.0: 1945; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1946; X86-NEXT: vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x48] 1947; X86-NEXT: retl # encoding: [0xc3] 1948; 1949; X64-LABEL: test_x86_avx2_gather_d_d_256: 1950; X64: # %bb.0: 1951; X64-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x4f] 1952; X64-NEXT: retq # encoding: [0xc3] 1953 %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0, 1954 ptr %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ; 1955 ret <8 x i32> %res 1956} 1957declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, ptr, 1958 <8 x i32>, <8 x i32>, i8) nounwind readonly 1959 1960define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, ptr %a1, <2 x i64> %idx, <4 x i32> %mask) { 1961; X86-LABEL: test_x86_avx2_gather_q_d: 1962; X86: # %bb.0: 1963; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1964; X86-NEXT: vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x48] 1965; X86-NEXT: retl # encoding: [0xc3] 1966; 1967; X64-LABEL: test_x86_avx2_gather_q_d: 1968; X64: # %bb.0: 1969; X64-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0x69,0x91,0x04,0x4f] 1970; X64-NEXT: retq # encoding: [0xc3] 1971 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0, 1972 ptr %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ; 1973 ret <4 x i32> %res 1974} 1975declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, ptr, 1976 <2 x i64>, <4 x i32>, i8) nounwind readonly 1977 1978define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, ptr %a1, <4 x i64> %idx, <4 x i32> %mask) { 1979; X86-LABEL: test_x86_avx2_gather_q_d_256: 1980; X86: # %bb.0: 1981; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 1982; X86-NEXT: vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x48] 1983; X86-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1984; X86-NEXT: retl # encoding: [0xc3] 1985; 1986; X64-LABEL: test_x86_avx2_gather_q_d_256: 1987; X64: # %bb.0: 1988; X64-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x4f] 1989; X64-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] 1990; X64-NEXT: retq # encoding: [0xc3] 1991 %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0, 1992 ptr %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ; 1993 ret <4 x i32> %res 1994} 1995declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, ptr, 1996 <4 x i64>, <4 x i32>, i8) nounwind readonly 1997 1998; PR13298 1999define <8 x float> @test_gather_mask(<8 x float> %a0, ptr %a, <8 x i32> %idx, <8 x float> %mask, ptr nocapture %out) { 2000;; gather with mask 2001; X86-AVX-LABEL: test_gather_mask: 2002; X86-AVX: # %bb.0: 2003; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2004; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 2005; X86-AVX-NEXT: vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda] 2006; X86-AVX-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] 2007; X86-AVX-NEXT: vmovups %ymm2, (%eax) # encoding: [0xc5,0xfc,0x11,0x10] 2008; X86-AVX-NEXT: retl # encoding: [0xc3] 2009; 2010; X86-AVX512VL-LABEL: test_gather_mask: 2011; X86-AVX512VL: # %bb.0: 2012; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] 2013; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] 2014; X86-AVX512VL-NEXT: vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] 2015; X86-AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] 2016; X86-AVX512VL-NEXT: vmovups %ymm2, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] 2017; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 2018; 2019; X64-AVX-LABEL: test_gather_mask: 2020; X64-AVX: # %bb.0: 2021; X64-AVX-NEXT: vmovaps %ymm2, %ymm3 # encoding: [0xc5,0xfc,0x28,0xda] 2022; X64-AVX-NEXT: vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f] 2023; X64-AVX-NEXT: vmovups %ymm2, (%rsi) # encoding: [0xc5,0xfc,0x11,0x16] 2024; X64-AVX-NEXT: retq # encoding: [0xc3] 2025; 2026; X64-AVX512VL-LABEL: test_gather_mask: 2027; X64-AVX512VL: # %bb.0: 2028; X64-AVX512VL-NEXT: vmovaps %ymm2, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] 2029; X64-AVX512VL-NEXT: vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 # encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f] 2030; X64-AVX512VL-NEXT: vmovups %ymm2, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x16] 2031; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 2032 %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0, 2033 ptr %a, <8 x i32> %idx, <8 x float> %mask, i8 4) ; 2034 2035;; for debugging, we'll just dump out the mask 2036 store <8 x float> %mask, ptr %out, align 4 2037 2038 ret <8 x float> %res 2039} 2040 2041define <2 x i64> @test_mask_demanded_bits(<2 x i64> %a0, ptr %a1, <2 x i64> %idx, <2 x i1> %mask) { 2042; X86-AVX-LABEL: test_mask_demanded_bits: 2043; X86-AVX: # %bb.0: 2044; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2045; X86-AVX-NEXT: vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2046; X86-AVX-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 2047; X86-AVX-NEXT: retl # encoding: [0xc3] 2048; 2049; X86-AVX512VL-LABEL: test_mask_demanded_bits: 2050; X86-AVX512VL: # %bb.0: 2051; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] 2052; X86-AVX512VL-NEXT: vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2053; X86-AVX512VL-NEXT: vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48] 2054; X86-AVX512VL-NEXT: retl # encoding: [0xc3] 2055; 2056; X64-AVX-LABEL: test_mask_demanded_bits: 2057; X64-AVX: # %bb.0: 2058; X64-AVX-NEXT: vpsllq $63, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2059; X64-AVX-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 2060; X64-AVX-NEXT: retq # encoding: [0xc3] 2061; 2062; X64-AVX512VL-LABEL: test_mask_demanded_bits: 2063; X64-AVX512VL: # %bb.0: 2064; X64-AVX512VL-NEXT: vpsllq $63, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xf2,0x3f] 2065; X64-AVX512VL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f] 2066; X64-AVX512VL-NEXT: retq # encoding: [0xc3] 2067 %mask1 = sext <2 x i1> %mask to <2 x i64> 2068 %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0, 2069 ptr %a1, <2 x i64> %idx, <2 x i64> %mask1, i8 2) ; 2070 ret <2 x i64> %res 2071} 2072