1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64 4 5define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 6; CHECK-LABEL: test_vpaddq: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 9; CHECK-NEXT: ret{{[l|q]}} 10 %x = add <4 x i64> %i, %j 11 ret <4 x i64> %x 12} 13 14define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 15; CHECK-LABEL: test_vpaddd: 16; CHECK: # %bb.0: 17; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 18; CHECK-NEXT: ret{{[l|q]}} 19 %x = add <8 x i32> %i, %j 20 ret <8 x i32> %x 21} 22 23define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 24; CHECK-LABEL: test_vpaddw: 25; CHECK: # %bb.0: 26; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 27; CHECK-NEXT: ret{{[l|q]}} 28 %x = add <16 x i16> %i, %j 29 ret <16 x i16> %x 30} 31 32define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 33; CHECK-LABEL: test_vpaddb: 34; CHECK: # %bb.0: 35; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 36; CHECK-NEXT: ret{{[l|q]}} 37 %x = add <32 x i8> %i, %j 38 ret <32 x i8> %x 39} 40 41define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 42; CHECK-LABEL: test_vpsubq: 43; CHECK: # %bb.0: 44; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0 45; CHECK-NEXT: ret{{[l|q]}} 46 %x = sub <4 x i64> %i, %j 47 ret <4 x i64> %x 48} 49 50define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 51; CHECK-LABEL: test_vpsubd: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 54; CHECK-NEXT: ret{{[l|q]}} 55 %x = sub <8 x i32> %i, %j 56 ret <8 x i32> %x 57} 58 59define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 60; CHECK-LABEL: test_vpsubw: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vpsubw %ymm1, %ymm0, %ymm0 63; CHECK-NEXT: ret{{[l|q]}} 64 %x = sub <16 x i16> %i, %j 65 ret <16 x i16> %x 66} 67 68define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 69; CHECK-LABEL: test_vpsubb: 70; CHECK: # %bb.0: 71; CHECK-NEXT: vpsubb %ymm1, %ymm0, %ymm0 72; CHECK-NEXT: ret{{[l|q]}} 73 %x = sub <32 x i8> %i, %j 74 ret <32 x i8> %x 75} 76 77define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 78; CHECK-LABEL: test_vpmulld: 79; CHECK: # %bb.0: 80; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 81; CHECK-NEXT: ret{{[l|q]}} 82 %x = mul <8 x i32> %i, %j 83 ret <8 x i32> %x 84} 85 86define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 87; CHECK-LABEL: test_vpmullw: 88; CHECK: # %bb.0: 89; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0 90; CHECK-NEXT: ret{{[l|q]}} 91 %x = mul <16 x i16> %i, %j 92 ret <16 x i16> %x 93} 94 95define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 96; X86-LABEL: mul_v16i8: 97; X86: # %bb.0: 98; X86-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 99; X86-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 100; X86-NEXT: vpmullw %ymm1, %ymm0, %ymm0 101; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 102; X86-NEXT: vextracti128 $1, %ymm0, %xmm1 103; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 104; X86-NEXT: vzeroupper 105; X86-NEXT: retl 106; 107; X64-LABEL: mul_v16i8: 108; X64: # %bb.0: 109; X64-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero 110; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 111; X64-NEXT: vpmullw %ymm1, %ymm0, %ymm0 112; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 113; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 114; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 115; X64-NEXT: vzeroupper 116; X64-NEXT: retq 117 %x = mul <16 x i8> %i, %j 118 ret <16 x i8> %x 119} 120 121define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 122; CHECK-LABEL: mul_v32i8: 123; CHECK: # %bb.0: 124; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 125; CHECK-NEXT: vpand %ymm2, %ymm1, %ymm3 126; CHECK-NEXT: vpmaddubsw %ymm3, %ymm0, %ymm3 127; CHECK-NEXT: vpand %ymm2, %ymm3, %ymm3 128; CHECK-NEXT: vpandn %ymm1, %ymm2, %ymm1 129; CHECK-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 130; CHECK-NEXT: vpsllw $8, %ymm0, %ymm0 131; CHECK-NEXT: vpor %ymm0, %ymm3, %ymm0 132; CHECK-NEXT: ret{{[l|q]}} 133 %x = mul <32 x i8> %i, %j 134 ret <32 x i8> %x 135} 136 137define <4 x i64> @mul_v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 138; CHECK-LABEL: mul_v4i64: 139; CHECK: # %bb.0: 140; CHECK-NEXT: vpsrlq $32, %ymm0, %ymm2 141; CHECK-NEXT: vpmuludq %ymm1, %ymm2, %ymm2 142; CHECK-NEXT: vpsrlq $32, %ymm1, %ymm3 143; CHECK-NEXT: vpmuludq %ymm3, %ymm0, %ymm3 144; CHECK-NEXT: vpaddq %ymm2, %ymm3, %ymm2 145; CHECK-NEXT: vpsllq $32, %ymm2, %ymm2 146; CHECK-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 147; CHECK-NEXT: vpaddq %ymm2, %ymm0, %ymm0 148; CHECK-NEXT: ret{{[l|q]}} 149 %x = mul <4 x i64> %i, %j 150 ret <4 x i64> %x 151} 152 153define <8 x i32> @mul_const1(<8 x i32> %x) { 154; CHECK-LABEL: mul_const1: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vpaddd %ymm0, %ymm0, %ymm0 157; CHECK-NEXT: ret{{[l|q]}} 158 %y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 159 ret <8 x i32> %y 160} 161 162define <4 x i64> @mul_const2(<4 x i64> %x) { 163; CHECK-LABEL: mul_const2: 164; CHECK: # %bb.0: 165; CHECK-NEXT: vpsllq $2, %ymm0, %ymm0 166; CHECK-NEXT: ret{{[l|q]}} 167 %y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4> 168 ret <4 x i64> %y 169} 170 171define <16 x i16> @mul_const3(<16 x i16> %x) { 172; CHECK-LABEL: mul_const3: 173; CHECK: # %bb.0: 174; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0 175; CHECK-NEXT: ret{{[l|q]}} 176 %y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 177 ret <16 x i16> %y 178} 179 180define <4 x i64> @mul_const4(<4 x i64> %x) { 181; CHECK-LABEL: mul_const4: 182; CHECK: # %bb.0: 183; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 184; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0 185; CHECK-NEXT: ret{{[l|q]}} 186 %y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1> 187 ret <4 x i64> %y 188} 189 190define <8 x i32> @mul_const5(<8 x i32> %x) { 191; CHECK-LABEL: mul_const5: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0 194; CHECK-NEXT: ret{{[l|q]}} 195 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 196 ret <8 x i32> %y 197} 198 199define <8 x i32> @mul_const6(<8 x i32> %x) { 200; X86-LABEL: mul_const6: 201; X86: # %bb.0: 202; X86-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 203; X86-NEXT: retl 204; 205; X64-LABEL: mul_const6: 206; X64: # %bb.0: 207; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 208; X64-NEXT: retq 209 %y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0> 210 ret <8 x i32> %y 211} 212 213define <8 x i64> @mul_const7(<8 x i64> %x) { 214; CHECK-LABEL: mul_const7: 215; CHECK: # %bb.0: 216; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm0 217; CHECK-NEXT: vpaddq %ymm1, %ymm1, %ymm1 218; CHECK-NEXT: ret{{[l|q]}} 219 %y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 220 ret <8 x i64> %y 221} 222 223define <8 x i16> @mul_const8(<8 x i16> %x) { 224; CHECK-LABEL: mul_const8: 225; CHECK: # %bb.0: 226; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0 227; CHECK-NEXT: ret{{[l|q]}} 228 %y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 229 ret <8 x i16> %y 230} 231 232define <8 x i32> @mul_const9(<8 x i32> %x) { 233; CHECK-LABEL: mul_const9: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,0] 236; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 237; CHECK-NEXT: ret{{[l|q]}} 238 %y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 239 ret <8 x i32> %y 240} 241 242; ptr 0x01010101 243define <4 x i32> @mul_const10(<4 x i32> %x) { 244; CHECK-LABEL: mul_const10: 245; CHECK: # %bb.0: 246; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [16843009,16843009,16843009,16843009] 247; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 248; CHECK-NEXT: ret{{[l|q]}} 249 %m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009> 250 ret <4 x i32> %m 251} 252 253; ptr 0x80808080 254define <4 x i32> @mul_const11(<4 x i32> %x) { 255; CHECK-LABEL: mul_const11: 256; CHECK: # %bb.0: 257; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2155905152,2155905152,2155905152,2155905152] 258; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 259; CHECK-NEXT: ret{{[l|q]}} 260 %m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152> 261 ret <4 x i32> %m 262} 263