1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX 7 8; Test multiplies of various narrow types. 9 10define <2 x i8> @mul_v2i8(<2 x i8> %x, <2 x i8> %y) { 11; SSE2-LABEL: mul_v2i8: 12; SSE2: # %bb.0: 13; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 14; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 15; SSE2-NEXT: pmullw %xmm1, %xmm0 16; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 17; SSE2-NEXT: packuswb %xmm0, %xmm0 18; SSE2-NEXT: retq 19; 20; SSE41-LABEL: mul_v2i8: 21; SSE41: # %bb.0: 22; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 23; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 24; SSE41-NEXT: pmullw %xmm1, %xmm0 25; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 26; SSE41-NEXT: retq 27; 28; AVX-LABEL: mul_v2i8: 29; AVX: # %bb.0: 30; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 31; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 32; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 33; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,u,u,u,u,u,u,u,u,u,u,u,u,u,u] 34; AVX-NEXT: retq 35 %res = mul <2 x i8> %x, %y 36 ret <2 x i8> %res 37} 38 39define <4 x i8> @mul_v4i8(<4 x i8> %x, <4 x i8> %y) { 40; SSE2-LABEL: mul_v4i8: 41; SSE2: # %bb.0: 42; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 43; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 44; SSE2-NEXT: pmullw %xmm1, %xmm0 45; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 46; SSE2-NEXT: packuswb %xmm0, %xmm0 47; SSE2-NEXT: retq 48; 49; SSE41-LABEL: mul_v4i8: 50; SSE41: # %bb.0: 51; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 52; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 53; SSE41-NEXT: pmullw %xmm1, %xmm0 54; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u] 55; SSE41-NEXT: retq 56; 57; AVX-LABEL: mul_v4i8: 58; AVX: # %bb.0: 59; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 60; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 61; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 62; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u] 63; AVX-NEXT: retq 64 %res = mul <4 x i8> %x, %y 65 ret <4 x i8> %res 66} 67 68define <8 x i8> @mul_v8i8(<8 x i8> %x, <8 x i8> %y) { 69; SSE2-LABEL: mul_v8i8: 70; SSE2: # %bb.0: 71; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 72; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 73; SSE2-NEXT: pmullw %xmm1, %xmm0 74; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 75; SSE2-NEXT: packuswb %xmm0, %xmm0 76; SSE2-NEXT: retq 77; 78; SSE41-LABEL: mul_v8i8: 79; SSE41: # %bb.0: 80; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 81; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 82; SSE41-NEXT: pmullw %xmm1, %xmm0 83; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 84; SSE41-NEXT: retq 85; 86; AVX-LABEL: mul_v8i8: 87; AVX: # %bb.0: 88; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 89; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 90; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 91; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 92; AVX-NEXT: retq 93 %res = mul <8 x i8> %x, %y 94 ret <8 x i8> %res 95} 96 97define <2 x i16> @mul_v2i16(<2 x i16> %x, <2 x i16> %y) { 98; SSE-LABEL: mul_v2i16: 99; SSE: # %bb.0: 100; SSE-NEXT: pmullw %xmm1, %xmm0 101; SSE-NEXT: retq 102; 103; AVX-LABEL: mul_v2i16: 104; AVX: # %bb.0: 105; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 106; AVX-NEXT: retq 107 %res = mul <2 x i16> %x, %y 108 ret <2 x i16> %res 109} 110 111define <4 x i16> @mul_v4i16(<4 x i16> %x, <4 x i16> %y) { 112; SSE-LABEL: mul_v4i16: 113; SSE: # %bb.0: 114; SSE-NEXT: pmullw %xmm1, %xmm0 115; SSE-NEXT: retq 116; 117; AVX-LABEL: mul_v4i16: 118; AVX: # %bb.0: 119; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 120; AVX-NEXT: retq 121 %res = mul <4 x i16> %x, %y 122 ret <4 x i16> %res 123} 124 125define <2 x i32> @mul_v2i32(<2 x i32> %x, <2 x i32> %y) { 126; SSE2-LABEL: mul_v2i32: 127; SSE2: # %bb.0: 128; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 129; SSE2-NEXT: pmuludq %xmm1, %xmm0 130; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 131; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 132; SSE2-NEXT: pmuludq %xmm2, %xmm1 133; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 134; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 135; SSE2-NEXT: retq 136; 137; SSE41-LABEL: mul_v2i32: 138; SSE41: # %bb.0: 139; SSE41-NEXT: pmulld %xmm1, %xmm0 140; SSE41-NEXT: retq 141; 142; AVX-LABEL: mul_v2i32: 143; AVX: # %bb.0: 144; AVX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 145; AVX-NEXT: retq 146 %res = mul <2 x i32> %x, %y 147 ret <2 x i32> %res 148} 149