1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vpopcntdq,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK 3 4 5define <16 x i32> @combine_mask_with_or(<16 x i32> %v0) { 6; CHECK-LABEL: combine_mask_with_or: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 9; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A 10; CHECK-NEXT: kmovw %eax, %k1 11; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 12; CHECK-NEXT: vpord %zmm2, %zmm1, %zmm1 13; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 14; CHECK-NEXT: kmovw %eax, %k1 15; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 16; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 17; CHECK-NEXT: retq 18 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 19 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 20 %op0_0 = or <16 x i32> %shuf0_0, %shuf0_1 21 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 22 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 23 ret <16 x i32> %r 24} 25 26define <16 x i32> @combine_mask_with_mul(<16 x i32> %v0) { 27; CHECK-LABEL: combine_mask_with_mul: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 30; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A 31; CHECK-NEXT: kmovw %eax, %k1 32; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 33; CHECK-NEXT: vpmulld %zmm2, %zmm1, %zmm1 34; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 35; CHECK-NEXT: kmovw %eax, %k1 36; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 37; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 38; CHECK-NEXT: retq 39 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 40 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 41 %op0_0 = mul <16 x i32> %shuf0_0, %shuf0_1 42 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 43 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 44 ret <16 x i32> %r 45} 46 47define <16 x i32> @combine_mask_with_abs(<16 x i32> %v0) { 48; CHECK-LABEL: combine_mask_with_abs: 49; CHECK: # %bb.0: 50; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 51; CHECK-NEXT: vpabsd %zmm1, %zmm1 52; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 53; CHECK-NEXT: kmovw %eax, %k1 54; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 55; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 56; CHECK-NEXT: retq 57 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 58 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 59 %op0_0_tmp0 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_0, i1 true) 60 %op0_0_tmp1 = tail call <16 x i32> @llvm.abs.v16i32(<16 x i32> %shuf0_1, i1 true) 61 %op0_0 = shufflevector <16 x i32> %op0_0_tmp0, <16 x i32> %op0_0_tmp0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 62 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 63 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 64 ret <16 x i32> %r 65} 66 67define <16 x i32> @combine_mask_with_umin(<16 x i32> %v0) { 68; CHECK-LABEL: combine_mask_with_umin: 69; CHECK: # %bb.0: 70; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 71; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA 72; CHECK-NEXT: kmovw %eax, %k1 73; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 74; CHECK-NEXT: vpminud %zmm2, %zmm1, %zmm1 75; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 76; CHECK-NEXT: kmovw %eax, %k1 77; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 78; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 79; CHECK-NEXT: retq 80 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 81 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 82 %op0_0 = tail call <16 x i32> @llvm.umin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) 83 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 84 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 85 ret <16 x i32> %r 86} 87 88define <16 x i32> @combine_mask_with_umax(<16 x i32> %v0) { 89; CHECK-LABEL: combine_mask_with_umax: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 92; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA 93; CHECK-NEXT: kmovw %eax, %k1 94; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 95; CHECK-NEXT: vpmaxud %zmm2, %zmm1, %zmm1 96; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 97; CHECK-NEXT: kmovw %eax, %k1 98; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 99; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 100; CHECK-NEXT: retq 101 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 102 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 103 %op0_0 = tail call <16 x i32> @llvm.umax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) 104 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 105 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 106 ret <16 x i32> %r 107} 108 109define <16 x i32> @combine_mask_with_smin(<16 x i32> %v0) { 110; CHECK-LABEL: combine_mask_with_smin: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 113; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA 114; CHECK-NEXT: kmovw %eax, %k1 115; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 116; CHECK-NEXT: vpminsd %zmm2, %zmm1, %zmm1 117; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 118; CHECK-NEXT: kmovw %eax, %k1 119; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 120; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 121; CHECK-NEXT: retq 122 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 123 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 124 %op0_0 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) 125 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 126 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 127 ret <16 x i32> %r 128} 129 130define <16 x i32> @combine_mask_with_smax(<16 x i32> %v0) { 131; CHECK-LABEL: combine_mask_with_smax: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 134; CHECK-NEXT: movw $-21846, %ax # imm = 0xAAAA 135; CHECK-NEXT: kmovw %eax, %k1 136; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 137; CHECK-NEXT: vpmaxsd %zmm2, %zmm1, %zmm1 138; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 139; CHECK-NEXT: kmovw %eax, %k1 140; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 141; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 142; CHECK-NEXT: retq 143 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 144 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 145 %op0_0 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %shuf0_0, <16 x i32> %shuf0_1) 146 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 147 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 148 ret <16 x i32> %r 149} 150 151define <16 x i32> @combine_mask_with_shl(<16 x i32> %v0) { 152; CHECK-LABEL: combine_mask_with_shl: 153; CHECK: # %bb.0: 154; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 155; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A 156; CHECK-NEXT: kmovw %eax, %k1 157; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 158; CHECK-NEXT: vpsllvd %zmm2, %zmm1, %zmm1 159; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 160; CHECK-NEXT: kmovw %eax, %k1 161; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 162; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 163; CHECK-NEXT: retq 164 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 165 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 166 %op0_0 = shl <16 x i32> %shuf0_0, %shuf0_1 167 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 168 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 169 ret <16 x i32> %r 170} 171 172define <16 x i32> @combine_mask_with_ashr(<16 x i32> %v0) { 173; CHECK-LABEL: combine_mask_with_ashr: 174; CHECK: # %bb.0: 175; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 176; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A 177; CHECK-NEXT: kmovw %eax, %k1 178; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 179; CHECK-NEXT: vpsravd %zmm2, %zmm1, %zmm1 180; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 181; CHECK-NEXT: kmovw %eax, %k1 182; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 183; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 184; CHECK-NEXT: retq 185 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 186 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 187 %op0_0 = ashr <16 x i32> %shuf0_0, %shuf0_1 188 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 189 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 190 ret <16 x i32> %r 191} 192 193define <16 x i32> @combine_mask_with_lshr(<16 x i32> %v0) { 194; CHECK-LABEL: combine_mask_with_lshr: 195; CHECK: # %bb.0: 196; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14] 197; CHECK-NEXT: movw $2570, %ax # imm = 0xA0A 198; CHECK-NEXT: kmovw %eax, %k1 199; CHECK-NEXT: vpblendmd %zmm1, %zmm0, %zmm2 {%k1} 200; CHECK-NEXT: vpsrlvd %zmm2, %zmm1, %zmm1 201; CHECK-NEXT: movw $-3856, %ax # imm = 0xF0F0 202; CHECK-NEXT: kmovw %eax, %k1 203; CHECK-NEXT: vpopcntd %zmm0, %zmm1 {%k1} 204; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 205; CHECK-NEXT: retq 206 %shuf0_0 = shufflevector <16 x i32> %v0, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 207 %shuf0_1 = shufflevector <16 x i32> %v0, <16 x i32> %shuf0_0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 208 %op0_0 = lshr <16 x i32> %shuf0_0, %shuf0_1 209 %op1_0 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %v0) 210 %r = shufflevector <16 x i32> %op0_0, <16 x i32> %op1_0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31> 211 ret <16 x i32> %r 212} 213 214declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>) 215declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) 216declare <16 x i32> @llvm.umin.v16i32(<16 x i32>, <16 x i32>) 217declare <16 x i32> @llvm.umax.v16i32(<16 x i32>, <16 x i32>) 218declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) 219declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1) 220