1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefixes=CHECK,CHECK-SSE 3; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1 4; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512 5 6define i32 @mul_and_to_neg_shl_and(i32 %x) { 7; CHECK-LABEL: mul_and_to_neg_shl_and: 8; CHECK: # %bb.0: 9; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 10; CHECK-NEXT: negl %edi 11; CHECK-NEXT: leal (,%rdi,8), %eax 12; CHECK-NEXT: andl $56, %eax 13; CHECK-NEXT: retq 14 %mul = mul i32 %x, 56 15 %and = and i32 %mul, 56 16 ret i32 %and 17} 18 19define i32 @mul_and_to_neg_shl_and2(i32 %x) { 20; CHECK-LABEL: mul_and_to_neg_shl_and2: 21; CHECK: # %bb.0: 22; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 23; CHECK-NEXT: negl %edi 24; CHECK-NEXT: leal (,%rdi,8), %eax 25; CHECK-NEXT: andl $48, %eax 26; CHECK-NEXT: retq 27 %mul = mul i32 %x, 56 28 %and = and i32 %mul, 51 29 ret i32 %and 30} 31 32define <4 x i32> @mul_and_to_neg_shl_and_vec(<4 x i32> %x) { 33; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec: 34; CHECK-SSE: # %bb.0: 35; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 36; CHECK-SSE-NEXT: psubd %xmm0, %xmm1 37; CHECK-SSE-NEXT: pslld $3, %xmm1 38; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 39; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 40; CHECK-SSE-NEXT: retq 41; 42; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec: 43; CHECK-AVX1: # %bb.0: 44; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 45; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 46; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0 47; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 48; CHECK-AVX1-NEXT: retq 49; 50; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec: 51; CHECK-AVX512: # %bb.0: 52; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 53; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0 54; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0 55; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 56; CHECK-AVX512-NEXT: retq 57 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56> 58 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48> 59 ret <4 x i32> %and 60} 61 62define <4 x i32> @mul_and_to_neg_shl_and_vec_fail_no_splat(<4 x i32> %x) { 63; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: 64; CHECK-SSE: # %bb.0: 65; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 66; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 67; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 68; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 69; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 70; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 71; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 72; CHECK-SSE-NEXT: retq 73; 74; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: 75; CHECK-AVX1: # %bb.0: 76; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 77; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 78; CHECK-AVX1-NEXT: retq 79; 80; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_fail_no_splat: 81; CHECK-AVX512: # %bb.0: 82; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 83; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 84; CHECK-AVX512-NEXT: retq 85 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 64> 86 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48> 87 ret <4 x i32> %and 88} 89 90;; todo_no_splat ones have the correct invariants for all elements. 91define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat1(<4 x i32> %x) { 92; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: 93; CHECK-SSE: # %bb.0: 94; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] 95; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 96; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 97; CHECK-SSE-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 98; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] 99; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 100; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 101; CHECK-SSE-NEXT: retq 102; 103; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: 104; CHECK-AVX1: # %bb.0: 105; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 106; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 107; CHECK-AVX1-NEXT: retq 108; 109; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat1: 110; CHECK-AVX512: # %bb.0: 111; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 112; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 113; CHECK-AVX512-NEXT: retq 114 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 48> 115 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48> 116 ret <4 x i32> %and 117} 118 119define <4 x i32> @mul_and_to_neg_shl_and_vec_todo_no_splat2(<4 x i32> %x) { 120; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2: 121; CHECK-SSE: # %bb.0: 122; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [56,56,56,56] 123; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] 124; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm0 125; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 126; CHECK-SSE-NEXT: pmuludq %xmm1, %xmm2 127; CHECK-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] 128; CHECK-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 129; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 130; CHECK-SSE-NEXT: retq 131; 132; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2: 133; CHECK-AVX1: # %bb.0: 134; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 135; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 136; CHECK-AVX1-NEXT: retq 137; 138; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_todo_no_splat2: 139; CHECK-AVX512: # %bb.0: 140; CHECK-AVX512-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 141; CHECK-AVX512-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 142; CHECK-AVX512-NEXT: retq 143 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56> 144 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 32> 145 ret <4 x i32> %and 146} 147 148define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_mul(<4 x i32> %x) { 149; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul: 150; CHECK-SSE: # %bb.0: 151; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 152; CHECK-SSE-NEXT: psubd %xmm0, %xmm1 153; CHECK-SSE-NEXT: pslld $3, %xmm1 154; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 155; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 156; CHECK-SSE-NEXT: retq 157; 158; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul: 159; CHECK-AVX1: # %bb.0: 160; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 161; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 162; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0 163; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 164; CHECK-AVX1-NEXT: retq 165; 166; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul: 167; CHECK-AVX512: # %bb.0: 168; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 169; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0 170; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0 171; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 172; CHECK-AVX512-NEXT: retq 173 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 undef> 174 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 48> 175 ret <4 x i32> %and 176} 177 178define <4 x i32> @mul_and_to_neg_shl_and_vec_with_undef_and(<4 x i32> %x) { 179; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and: 180; CHECK-SSE: # %bb.0: 181; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 182; CHECK-SSE-NEXT: psubd %xmm0, %xmm1 183; CHECK-SSE-NEXT: pslld $3, %xmm1 184; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 185; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 186; CHECK-SSE-NEXT: retq 187; 188; CHECK-AVX1-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and: 189; CHECK-AVX1: # %bb.0: 190; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 191; CHECK-AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 192; CHECK-AVX1-NEXT: vpslld $3, %xmm0, %xmm0 193; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 194; CHECK-AVX1-NEXT: retq 195; 196; CHECK-AVX512-LABEL: mul_and_to_neg_shl_and_vec_with_undef_and: 197; CHECK-AVX512: # %bb.0: 198; CHECK-AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 199; CHECK-AVX512-NEXT: vpsubd %xmm0, %xmm1, %xmm0 200; CHECK-AVX512-NEXT: vpslld $3, %xmm0, %xmm0 201; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 202; CHECK-AVX512-NEXT: retq 203 %mul = mul <4 x i32> %x, <i32 56, i32 56, i32 56, i32 56> 204 %and = and <4 x i32> %mul, <i32 48, i32 48, i32 48, i32 undef> 205 ret <4 x i32> %and 206} 207 208define <16 x i8> @mul_and_to_neg_shl_and_vec_with_undef_mul_and(<16 x i8> %x) { 209; CHECK-SSE-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and: 210; CHECK-SSE: # %bb.0: 211; CHECK-SSE-NEXT: pxor %xmm1, %xmm1 212; CHECK-SSE-NEXT: psubb %xmm0, %xmm1 213; CHECK-SSE-NEXT: psllw $2, %xmm1 214; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 215; CHECK-SSE-NEXT: movdqa %xmm1, %xmm0 216; CHECK-SSE-NEXT: retq 217; 218; CHECK-AVX-LABEL: mul_and_to_neg_shl_and_vec_with_undef_mul_and: 219; CHECK-AVX: # %bb.0: 220; CHECK-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 221; CHECK-AVX-NEXT: vpsubb %xmm0, %xmm1, %xmm0 222; CHECK-AVX-NEXT: vpsllw $2, %xmm0, %xmm0 223; CHECK-AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 224; CHECK-AVX-NEXT: retq 225 %mul = mul <16 x i8> %x, <i8 12, i8 12, i8 12, i8 12, i8 undef, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12> 226 %and = and <16 x i8> %mul, <i8 11, i8 undef, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11> 227 ret <16 x i8> %and 228} 229 230define i32 @mul_and_to_neg_shl_and_fail_invalid_mul(i32 %x) { 231; CHECK-LABEL: mul_and_to_neg_shl_and_fail_invalid_mul: 232; CHECK: # %bb.0: 233; CHECK-NEXT: imull $57, %edi, %eax 234; CHECK-NEXT: andl $56, %eax 235; CHECK-NEXT: retq 236 %mul = mul i32 %x, 57 237 %and = and i32 %mul, 56 238 ret i32 %and 239} 240 241define i32 @mul_and_to_neg_shl_and_fail_mul_p2(i32 %x) { 242; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mul_p2: 243; CHECK: # %bb.0: 244; CHECK-NEXT: movl %edi, %eax 245; CHECK-NEXT: shll $6, %eax 246; CHECK-NEXT: andl $64, %eax 247; CHECK-NEXT: retq 248 %mul = mul i32 %x, 64 249 %and = and i32 %mul, 64 250 ret i32 %and 251} 252 253define i32 @mul_and_to_neg_shl_and_fail_mask_to_large(i32 %x) { 254; CHECK-LABEL: mul_and_to_neg_shl_and_fail_mask_to_large: 255; CHECK: # %bb.0: 256; CHECK-NEXT: imull $56, %edi, %eax 257; CHECK-NEXT: andl $120, %eax 258; CHECK-NEXT: retq 259 %mul = mul i32 %x, 56 260 %and = and i32 %mul, 120 261 ret i32 %and 262} 263