1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle %s -o - | FileCheck %s 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-perlane-shuffle %s -o - | FileCheck %s 4 5define <4 x i32> @test_2xi32_to_4xi32(<4 x i32> %vec) { 6; CHECK-LABEL: test_2xi32_to_4xi32: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 9; CHECK-NEXT: retq 10 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 11 ret <4 x i32> %res 12} 13define <4 x i32> @test_masked_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) { 14; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask0: 15; CHECK: # %bb.0: 16; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 17; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] 18; CHECK-NEXT: vmovdqa %xmm1, %xmm0 19; CHECK-NEXT: retq 20 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 21 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 22 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 23 ret <4 x i32> %res 24} 25 26define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %mask) { 27; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask0: 28; CHECK: # %bb.0: 29; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 30; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] 31; CHECK-NEXT: retq 32 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 33 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 34 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 35 ret <4 x i32> %res 36} 37define <4 x i32> @test_masked_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) { 38; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask1: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 41; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] 42; CHECK-NEXT: vmovdqa %xmm1, %xmm0 43; CHECK-NEXT: retq 44 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 45 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 46 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 47 ret <4 x i32> %res 48} 49 50define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %mask) { 51; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask1: 52; CHECK: # %bb.0: 53; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 54; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] 55; CHECK-NEXT: retq 56 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 57 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 58 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 59 ret <4 x i32> %res 60} 61define <4 x i32> @test_masked_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) { 62; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask2: 63; CHECK: # %bb.0: 64; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 65; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] 66; CHECK-NEXT: vmovdqa %xmm1, %xmm0 67; CHECK-NEXT: retq 68 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 69 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 70 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 71 ret <4 x i32> %res 72} 73 74define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %mask) { 75; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask2: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 78; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] 79; CHECK-NEXT: retq 80 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 81 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 82 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 83 ret <4 x i32> %res 84} 85define <4 x i32> @test_masked_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) { 86; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask3: 87; CHECK: # %bb.0: 88; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1 89; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1] 90; CHECK-NEXT: vmovdqa %xmm1, %xmm0 91; CHECK-NEXT: retq 92 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 93 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 94 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 95 ret <4 x i32> %res 96} 97 98define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %mask) { 99; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask3: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 102; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1] 103; CHECK-NEXT: retq 104 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 105 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 106 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 107 ret <4 x i32> %res 108} 109define <8 x i32> @test_2xi32_to_8xi32(<8 x i32> %vec) { 110; CHECK-LABEL: test_2xi32_to_8xi32: 111; CHECK: # %bb.0: 112; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 113; CHECK-NEXT: retq 114 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 115 ret <8 x i32> %res 116} 117define <8 x i32> @test_masked_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) { 118; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask0: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 121; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 122; CHECK-NEXT: vmovdqa %ymm1, %ymm0 123; CHECK-NEXT: retq 124 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 125 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 126 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 127 ret <8 x i32> %res 128} 129 130define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %mask) { 131; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask0: 132; CHECK: # %bb.0: 133; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 134; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 135; CHECK-NEXT: retq 136 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 137 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 138 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 139 ret <8 x i32> %res 140} 141define <8 x i32> @test_masked_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) { 142; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask1: 143; CHECK: # %bb.0: 144; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 145; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 146; CHECK-NEXT: vmovdqa %ymm1, %ymm0 147; CHECK-NEXT: retq 148 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 149 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 150 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 151 ret <8 x i32> %res 152} 153 154define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %mask) { 155; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask1: 156; CHECK: # %bb.0: 157; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 158; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 159; CHECK-NEXT: retq 160 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 161 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 162 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 163 ret <8 x i32> %res 164} 165define <8 x i32> @test_masked_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) { 166; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask2: 167; CHECK: # %bb.0: 168; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 169; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 170; CHECK-NEXT: vmovdqa %ymm1, %ymm0 171; CHECK-NEXT: retq 172 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 173 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 174 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 175 ret <8 x i32> %res 176} 177 178define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %mask) { 179; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask2: 180; CHECK: # %bb.0: 181; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 182; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 183; CHECK-NEXT: retq 184 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 185 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 186 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 187 ret <8 x i32> %res 188} 189define <8 x i32> @test_masked_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) { 190; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask3: 191; CHECK: # %bb.0: 192; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1 193; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] 194; CHECK-NEXT: vmovdqa %ymm1, %ymm0 195; CHECK-NEXT: retq 196 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 197 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 198 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 199 ret <8 x i32> %res 200} 201 202define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %mask) { 203; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask3: 204; CHECK: # %bb.0: 205; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 206; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] 207; CHECK-NEXT: retq 208 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 209 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 210 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 211 ret <8 x i32> %res 212} 213define <16 x i32> @test_2xi32_to_16xi32(<16 x i32> %vec) { 214; CHECK-LABEL: test_2xi32_to_16xi32: 215; CHECK: # %bb.0: 216; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 217; CHECK-NEXT: retq 218 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 219 ret <16 x i32> %res 220} 221define <16 x i32> @test_masked_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) { 222; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask0: 223; CHECK: # %bb.0: 224; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 225; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 226; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 227; CHECK-NEXT: retq 228 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 229 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 230 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 231 ret <16 x i32> %res 232} 233 234define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %mask) { 235; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask0: 236; CHECK: # %bb.0: 237; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 238; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 239; CHECK-NEXT: retq 240 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 241 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 242 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 243 ret <16 x i32> %res 244} 245define <16 x i32> @test_masked_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) { 246; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask1: 247; CHECK: # %bb.0: 248; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 249; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 250; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 251; CHECK-NEXT: retq 252 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 253 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 254 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 255 ret <16 x i32> %res 256} 257 258define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %mask) { 259; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask1: 260; CHECK: # %bb.0: 261; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 262; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 263; CHECK-NEXT: retq 264 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 265 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 266 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 267 ret <16 x i32> %res 268} 269define <16 x i32> @test_masked_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) { 270; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask2: 271; CHECK: # %bb.0: 272; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 273; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 274; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 275; CHECK-NEXT: retq 276 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 277 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 278 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 279 ret <16 x i32> %res 280} 281 282define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %mask) { 283; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask2: 284; CHECK: # %bb.0: 285; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 286; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 287; CHECK-NEXT: retq 288 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 289 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 290 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 291 ret <16 x i32> %res 292} 293define <16 x i32> @test_masked_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) { 294; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask3: 295; CHECK: # %bb.0: 296; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1 297; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 298; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 299; CHECK-NEXT: retq 300 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 301 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 302 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 303 ret <16 x i32> %res 304} 305 306define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %mask) { 307; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask3: 308; CHECK: # %bb.0: 309; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 310; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 311; CHECK-NEXT: retq 312 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 313 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 314 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 315 ret <16 x i32> %res 316} 317define <4 x i32> @test_2xi32_to_4xi32_mem(ptr %vp) { 318; CHECK-LABEL: test_2xi32_to_4xi32_mem: 319; CHECK: # %bb.0: 320; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 321; CHECK-NEXT: retq 322 %vec = load <2 x i32>, ptr %vp 323 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 324 ret <4 x i32> %res 325} 326define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %default, <4 x i32> %mask) { 327; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask0: 328; CHECK: # %bb.0: 329; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 330; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] 331; CHECK-NEXT: retq 332 %vec = load <2 x i32>, ptr %vp 333 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 334 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 335 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 336 ret <4 x i32> %res 337} 338 339define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %mask) { 340; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask0: 341; CHECK: # %bb.0: 342; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 343; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] 344; CHECK-NEXT: retq 345 %vec = load <2 x i32>, ptr %vp 346 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 347 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 348 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 349 ret <4 x i32> %res 350} 351define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %default, <4 x i32> %mask) { 352; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask1: 353; CHECK: # %bb.0: 354; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 355; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] 356; CHECK-NEXT: retq 357 %vec = load <2 x i32>, ptr %vp 358 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 359 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 360 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 361 ret <4 x i32> %res 362} 363 364define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %mask) { 365; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask1: 366; CHECK: # %bb.0: 367; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 368; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] 369; CHECK-NEXT: retq 370 %vec = load <2 x i32>, ptr %vp 371 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 372 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 373 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 374 ret <4 x i32> %res 375} 376define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %default, <4 x i32> %mask) { 377; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask2: 378; CHECK: # %bb.0: 379; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 380; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] 381; CHECK-NEXT: retq 382 %vec = load <2 x i32>, ptr %vp 383 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 384 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 385 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 386 ret <4 x i32> %res 387} 388 389define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %mask) { 390; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask2: 391; CHECK: # %bb.0: 392; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 393; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] 394; CHECK-NEXT: retq 395 %vec = load <2 x i32>, ptr %vp 396 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 397 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 398 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 399 ret <4 x i32> %res 400} 401define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %default, <4 x i32> %mask) { 402; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask3: 403; CHECK: # %bb.0: 404; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1 405; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1] 406; CHECK-NEXT: retq 407 %vec = load <2 x i32>, ptr %vp 408 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 409 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 410 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default 411 ret <4 x i32> %res 412} 413 414define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %mask) { 415; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask3: 416; CHECK: # %bb.0: 417; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1 418; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1] 419; CHECK-NEXT: retq 420 %vec = load <2 x i32>, ptr %vp 421 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 422 %cmp = icmp eq <4 x i32> %mask, zeroinitializer 423 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer 424 ret <4 x i32> %res 425} 426define <8 x i32> @test_2xi32_to_8xi32_mem(ptr %vp) { 427; CHECK-LABEL: test_2xi32_to_8xi32_mem: 428; CHECK: # %bb.0: 429; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 430; CHECK-NEXT: retq 431 %vec = load <2 x i32>, ptr %vp 432 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 433 ret <8 x i32> %res 434} 435define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 436; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask0: 437; CHECK: # %bb.0: 438; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 439; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 440; CHECK-NEXT: retq 441 %vec = load <2 x i32>, ptr %vp 442 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 443 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 444 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 445 ret <8 x i32> %res 446} 447 448define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) { 449; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask0: 450; CHECK: # %bb.0: 451; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 452; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 453; CHECK-NEXT: retq 454 %vec = load <2 x i32>, ptr %vp 455 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 456 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 457 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 458 ret <8 x i32> %res 459} 460define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 461; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask1: 462; CHECK: # %bb.0: 463; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 464; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 465; CHECK-NEXT: retq 466 %vec = load <2 x i32>, ptr %vp 467 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 468 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 469 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 470 ret <8 x i32> %res 471} 472 473define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) { 474; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask1: 475; CHECK: # %bb.0: 476; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 477; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 478; CHECK-NEXT: retq 479 %vec = load <2 x i32>, ptr %vp 480 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 481 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 482 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 483 ret <8 x i32> %res 484} 485define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 486; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask2: 487; CHECK: # %bb.0: 488; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 489; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 490; CHECK-NEXT: retq 491 %vec = load <2 x i32>, ptr %vp 492 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 493 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 494 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 495 ret <8 x i32> %res 496} 497 498define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) { 499; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask2: 500; CHECK: # %bb.0: 501; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 502; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 503; CHECK-NEXT: retq 504 %vec = load <2 x i32>, ptr %vp 505 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 506 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 507 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 508 ret <8 x i32> %res 509} 510define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 511; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask3: 512; CHECK: # %bb.0: 513; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 514; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1] 515; CHECK-NEXT: retq 516 %vec = load <2 x i32>, ptr %vp 517 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 518 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 519 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 520 ret <8 x i32> %res 521} 522 523define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) { 524; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask3: 525; CHECK: # %bb.0: 526; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 527; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 528; CHECK-NEXT: retq 529 %vec = load <2 x i32>, ptr %vp 530 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 531 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 532 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 533 ret <8 x i32> %res 534} 535define <16 x i32> @test_2xi32_to_16xi32_mem(ptr %vp) { 536; CHECK-LABEL: test_2xi32_to_16xi32_mem: 537; CHECK: # %bb.0: 538; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 539; CHECK-NEXT: retq 540 %vec = load <2 x i32>, ptr %vp 541 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 542 ret <16 x i32> %res 543} 544define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 545; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 548; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 549; CHECK-NEXT: retq 550 %vec = load <2 x i32>, ptr %vp 551 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 552 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 553 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 554 ret <16 x i32> %res 555} 556 557define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) { 558; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0: 559; CHECK: # %bb.0: 560; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 561; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 562; CHECK-NEXT: retq 563 %vec = load <2 x i32>, ptr %vp 564 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 565 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 566 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 567 ret <16 x i32> %res 568} 569define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 570; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1: 571; CHECK: # %bb.0: 572; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 573; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 574; CHECK-NEXT: retq 575 %vec = load <2 x i32>, ptr %vp 576 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 577 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 578 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 579 ret <16 x i32> %res 580} 581 582define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) { 583; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1: 584; CHECK: # %bb.0: 585; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 586; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 587; CHECK-NEXT: retq 588 %vec = load <2 x i32>, ptr %vp 589 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 590 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 591 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 592 ret <16 x i32> %res 593} 594define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 595; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2: 596; CHECK: # %bb.0: 597; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 598; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 599; CHECK-NEXT: retq 600 %vec = load <2 x i32>, ptr %vp 601 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 602 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 603 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 604 ret <16 x i32> %res 605} 606 607define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) { 608; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2: 609; CHECK: # %bb.0: 610; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 611; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 612; CHECK-NEXT: retq 613 %vec = load <2 x i32>, ptr %vp 614 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 615 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 616 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 617 ret <16 x i32> %res 618} 619define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 620; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3: 621; CHECK: # %bb.0: 622; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 623; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 624; CHECK-NEXT: retq 625 %vec = load <2 x i32>, ptr %vp 626 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 627 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 628 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 629 ret <16 x i32> %res 630} 631 632define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) { 633; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3: 634; CHECK: # %bb.0: 635; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 636; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] 637; CHECK-NEXT: retq 638 %vec = load <2 x i32>, ptr %vp 639 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 640 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 641 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 642 ret <16 x i32> %res 643} 644define <8 x i32> @test_4xi32_to_8xi32_mem(ptr %vp) { 645; CHECK-LABEL: test_4xi32_to_8xi32_mem: 646; CHECK: # %bb.0: 647; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] 648; CHECK-NEXT: retq 649 %vec = load <4 x i32>, ptr %vp 650 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 651 ret <8 x i32> %res 652} 653define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 654; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask0: 655; CHECK: # %bb.0: 656; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 657; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 658; CHECK-NEXT: retq 659 %vec = load <4 x i32>, ptr %vp 660 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 661 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 662 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 663 ret <8 x i32> %res 664} 665 666define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) { 667; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask0: 668; CHECK: # %bb.0: 669; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 670; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 671; CHECK-NEXT: retq 672 %vec = load <4 x i32>, ptr %vp 673 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 674 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 675 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 676 ret <8 x i32> %res 677} 678define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 679; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask1: 680; CHECK: # %bb.0: 681; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 682; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 683; CHECK-NEXT: retq 684 %vec = load <4 x i32>, ptr %vp 685 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 686 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 687 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 688 ret <8 x i32> %res 689} 690 691define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) { 692; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask1: 693; CHECK: # %bb.0: 694; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 695; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 696; CHECK-NEXT: retq 697 %vec = load <4 x i32>, ptr %vp 698 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 699 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 700 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 701 ret <8 x i32> %res 702} 703define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 704; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask2: 705; CHECK: # %bb.0: 706; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 707; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 708; CHECK-NEXT: retq 709 %vec = load <4 x i32>, ptr %vp 710 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 711 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 712 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 713 ret <8 x i32> %res 714} 715 716define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) { 717; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask2: 718; CHECK: # %bb.0: 719; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 720; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 721; CHECK-NEXT: retq 722 %vec = load <4 x i32>, ptr %vp 723 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 724 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 725 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 726 ret <8 x i32> %res 727} 728define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) { 729; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask3: 730; CHECK: # %bb.0: 731; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1 732; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] 733; CHECK-NEXT: retq 734 %vec = load <4 x i32>, ptr %vp 735 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 736 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 737 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default 738 ret <8 x i32> %res 739} 740 741define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) { 742; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask3: 743; CHECK: # %bb.0: 744; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1 745; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 746; CHECK-NEXT: retq 747 %vec = load <4 x i32>, ptr %vp 748 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 749 %cmp = icmp eq <8 x i32> %mask, zeroinitializer 750 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer 751 ret <8 x i32> %res 752} 753define <16 x i32> @test_4xi32_to_16xi32_mem(ptr %vp) { 754; CHECK-LABEL: test_4xi32_to_16xi32_mem: 755; CHECK: # %bb.0: 756; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 757; CHECK-NEXT: retq 758 %vec = load <4 x i32>, ptr %vp 759 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 760 ret <16 x i32> %res 761} 762define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 763; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask0: 764; CHECK: # %bb.0: 765; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 766; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 767; CHECK-NEXT: retq 768 %vec = load <4 x i32>, ptr %vp 769 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 770 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 771 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 772 ret <16 x i32> %res 773} 774 775define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) { 776; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask0: 777; CHECK: # %bb.0: 778; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 779; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 780; CHECK-NEXT: retq 781 %vec = load <4 x i32>, ptr %vp 782 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 783 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 784 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 785 ret <16 x i32> %res 786} 787define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 788; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask1: 789; CHECK: # %bb.0: 790; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 791; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 792; CHECK-NEXT: retq 793 %vec = load <4 x i32>, ptr %vp 794 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 795 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 796 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 797 ret <16 x i32> %res 798} 799 800define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) { 801; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask1: 802; CHECK: # %bb.0: 803; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 804; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 805; CHECK-NEXT: retq 806 %vec = load <4 x i32>, ptr %vp 807 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 808 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 809 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 810 ret <16 x i32> %res 811} 812define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 813; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask2: 814; CHECK: # %bb.0: 815; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 816; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 817; CHECK-NEXT: retq 818 %vec = load <4 x i32>, ptr %vp 819 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 820 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 821 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 822 ret <16 x i32> %res 823} 824 825define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) { 826; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask2: 827; CHECK: # %bb.0: 828; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 829; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 830; CHECK-NEXT: retq 831 %vec = load <4 x i32>, ptr %vp 832 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 833 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 834 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 835 ret <16 x i32> %res 836} 837define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 838; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask3: 839; CHECK: # %bb.0: 840; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 841; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 842; CHECK-NEXT: retq 843 %vec = load <4 x i32>, ptr %vp 844 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 845 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 846 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 847 ret <16 x i32> %res 848} 849 850define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) { 851; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask3: 852; CHECK: # %bb.0: 853; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 854; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 855; CHECK-NEXT: retq 856 %vec = load <4 x i32>, ptr %vp 857 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 858 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 859 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 860 ret <16 x i32> %res 861} 862define <4 x i64> @test_2xi64_to_4xi64_mem(ptr %vp) { 863; CHECK-LABEL: test_2xi64_to_4xi64_mem: 864; CHECK: # %bb.0: 865; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1] 866; CHECK-NEXT: retq 867 %vec = load <2 x i64>, ptr %vp 868 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 869 ret <4 x i64> %res 870} 871define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %default, <4 x i64> %mask) { 872; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask0: 873; CHECK: # %bb.0: 874; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 875; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 876; CHECK-NEXT: retq 877 %vec = load <2 x i64>, ptr %vp 878 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 879 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 880 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 881 ret <4 x i64> %res 882} 883 884define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %mask) { 885; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask0: 886; CHECK: # %bb.0: 887; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 888; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 889; CHECK-NEXT: retq 890 %vec = load <2 x i64>, ptr %vp 891 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 892 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 893 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 894 ret <4 x i64> %res 895} 896define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %default, <4 x i64> %mask) { 897; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask1: 898; CHECK: # %bb.0: 899; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 900; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 901; CHECK-NEXT: retq 902 %vec = load <2 x i64>, ptr %vp 903 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 904 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 905 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 906 ret <4 x i64> %res 907} 908 909define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %mask) { 910; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask1: 911; CHECK: # %bb.0: 912; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 913; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 914; CHECK-NEXT: retq 915 %vec = load <2 x i64>, ptr %vp 916 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 917 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 918 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 919 ret <4 x i64> %res 920} 921define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %default, <4 x i64> %mask) { 922; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask2: 923; CHECK: # %bb.0: 924; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 925; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 926; CHECK-NEXT: retq 927 %vec = load <2 x i64>, ptr %vp 928 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 929 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 930 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 931 ret <4 x i64> %res 932} 933 934define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %mask) { 935; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask2: 936; CHECK: # %bb.0: 937; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 938; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 939; CHECK-NEXT: retq 940 %vec = load <2 x i64>, ptr %vp 941 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 942 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 943 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 944 ret <4 x i64> %res 945} 946define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %default, <4 x i64> %mask) { 947; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask3: 948; CHECK: # %bb.0: 949; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1 950; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] 951; CHECK-NEXT: retq 952 %vec = load <2 x i64>, ptr %vp 953 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 954 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 955 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default 956 ret <4 x i64> %res 957} 958 959define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %mask) { 960; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask3: 961; CHECK: # %bb.0: 962; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1 963; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] 964; CHECK-NEXT: retq 965 %vec = load <2 x i64>, ptr %vp 966 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 967 %cmp = icmp eq <4 x i64> %mask, zeroinitializer 968 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer 969 ret <4 x i64> %res 970} 971define <8 x i64> @test_2xi64_to_8xi64_mem(ptr %vp) { 972; CHECK-LABEL: test_2xi64_to_8xi64_mem: 973; CHECK: # %bb.0: 974; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] 975; CHECK-NEXT: retq 976 %vec = load <2 x i64>, ptr %vp 977 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 978 ret <8 x i64> %res 979} 980define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 981; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask0: 982; CHECK: # %bb.0: 983; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 984; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 985; CHECK-NEXT: retq 986 %vec = load <2 x i64>, ptr %vp 987 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 988 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 989 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 990 ret <8 x i64> %res 991} 992 993define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) { 994; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask0: 995; CHECK: # %bb.0: 996; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 997; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 998; CHECK-NEXT: retq 999 %vec = load <2 x i64>, ptr %vp 1000 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1001 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1002 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1003 ret <8 x i64> %res 1004} 1005define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1006; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask1: 1007; CHECK: # %bb.0: 1008; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1009; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 1010; CHECK-NEXT: retq 1011 %vec = load <2 x i64>, ptr %vp 1012 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1013 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1014 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1015 ret <8 x i64> %res 1016} 1017 1018define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) { 1019; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask1: 1020; CHECK: # %bb.0: 1021; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1022; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 1023; CHECK-NEXT: retq 1024 %vec = load <2 x i64>, ptr %vp 1025 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1026 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1027 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1028 ret <8 x i64> %res 1029} 1030define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1031; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask2: 1032; CHECK: # %bb.0: 1033; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1034; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 1035; CHECK-NEXT: retq 1036 %vec = load <2 x i64>, ptr %vp 1037 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1038 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1039 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1040 ret <8 x i64> %res 1041} 1042 1043define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) { 1044; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask2: 1045; CHECK: # %bb.0: 1046; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1047; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 1048; CHECK-NEXT: retq 1049 %vec = load <2 x i64>, ptr %vp 1050 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1051 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1052 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1053 ret <8 x i64> %res 1054} 1055define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1056; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask3: 1057; CHECK: # %bb.0: 1058; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1059; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] 1060; CHECK-NEXT: retq 1061 %vec = load <2 x i64>, ptr %vp 1062 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1063 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1064 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1065 ret <8 x i64> %res 1066} 1067 1068define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) { 1069; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask3: 1070; CHECK: # %bb.0: 1071; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1072; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] 1073; CHECK-NEXT: retq 1074 %vec = load <2 x i64>, ptr %vp 1075 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 1076 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1077 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1078 ret <8 x i64> %res 1079} 1080define <16 x i32> @test_8xi32_to_16xi32_mem(ptr %vp) { 1081; CHECK-LABEL: test_8xi32_to_16xi32_mem: 1082; CHECK: # %bb.0: 1083; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] 1084; CHECK-NEXT: retq 1085 %vec = load <8 x i32>, ptr %vp 1086 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1087 ret <16 x i32> %res 1088} 1089define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 1090; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask0: 1091; CHECK: # %bb.0: 1092; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1093; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1094; CHECK-NEXT: retq 1095 %vec = load <8 x i32>, ptr %vp 1096 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1097 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1098 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 1099 ret <16 x i32> %res 1100} 1101 1102define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) { 1103; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask0: 1104; CHECK: # %bb.0: 1105; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1106; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1107; CHECK-NEXT: retq 1108 %vec = load <8 x i32>, ptr %vp 1109 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1110 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1111 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1112 ret <16 x i32> %res 1113} 1114define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 1115; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask1: 1116; CHECK: # %bb.0: 1117; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1118; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1119; CHECK-NEXT: retq 1120 %vec = load <8 x i32>, ptr %vp 1121 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1122 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1123 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 1124 ret <16 x i32> %res 1125} 1126 1127define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) { 1128; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask1: 1129; CHECK: # %bb.0: 1130; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1131; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1132; CHECK-NEXT: retq 1133 %vec = load <8 x i32>, ptr %vp 1134 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1135 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1136 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1137 ret <16 x i32> %res 1138} 1139define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 1140; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask2: 1141; CHECK: # %bb.0: 1142; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1143; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1144; CHECK-NEXT: retq 1145 %vec = load <8 x i32>, ptr %vp 1146 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1147 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1148 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 1149 ret <16 x i32> %res 1150} 1151 1152define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) { 1153; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask2: 1154; CHECK: # %bb.0: 1155; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1156; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1157; CHECK-NEXT: retq 1158 %vec = load <8 x i32>, ptr %vp 1159 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1160 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1161 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1162 ret <16 x i32> %res 1163} 1164define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) { 1165; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask3: 1166; CHECK: # %bb.0: 1167; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1 1168; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1169; CHECK-NEXT: retq 1170 %vec = load <8 x i32>, ptr %vp 1171 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1172 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1173 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default 1174 ret <16 x i32> %res 1175} 1176 1177define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) { 1178; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask3: 1179; CHECK: # %bb.0: 1180; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1 1181; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] 1182; CHECK-NEXT: retq 1183 %vec = load <8 x i32>, ptr %vp 1184 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1185 %cmp = icmp eq <16 x i32> %mask, zeroinitializer 1186 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer 1187 ret <16 x i32> %res 1188} 1189define <8 x i64> @test_4xi64_to_8xi64_mem(ptr %vp) { 1190; CHECK-LABEL: test_4xi64_to_8xi64_mem: 1191; CHECK: # %bb.0: 1192; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3] 1193; CHECK-NEXT: retq 1194 %vec = load <4 x i64>, ptr %vp 1195 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1196 ret <8 x i64> %res 1197} 1198define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1199; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask0: 1200; CHECK: # %bb.0: 1201; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1202; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1203; CHECK-NEXT: retq 1204 %vec = load <4 x i64>, ptr %vp 1205 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1206 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1207 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1208 ret <8 x i64> %res 1209} 1210 1211define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) { 1212; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask0: 1213; CHECK: # %bb.0: 1214; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1215; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1216; CHECK-NEXT: retq 1217 %vec = load <4 x i64>, ptr %vp 1218 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1219 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1220 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1221 ret <8 x i64> %res 1222} 1223define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1224; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask1: 1225; CHECK: # %bb.0: 1226; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1227; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1228; CHECK-NEXT: retq 1229 %vec = load <4 x i64>, ptr %vp 1230 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1231 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1232 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1233 ret <8 x i64> %res 1234} 1235 1236define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) { 1237; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask1: 1238; CHECK: # %bb.0: 1239; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1240; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1241; CHECK-NEXT: retq 1242 %vec = load <4 x i64>, ptr %vp 1243 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1244 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1245 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1246 ret <8 x i64> %res 1247} 1248define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1249; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask2: 1250; CHECK: # %bb.0: 1251; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1252; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1253; CHECK-NEXT: retq 1254 %vec = load <4 x i64>, ptr %vp 1255 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1256 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1257 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1258 ret <8 x i64> %res 1259} 1260 1261define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) { 1262; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask2: 1263; CHECK: # %bb.0: 1264; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1265; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1266; CHECK-NEXT: retq 1267 %vec = load <4 x i64>, ptr %vp 1268 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1269 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1270 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1271 ret <8 x i64> %res 1272} 1273define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) { 1274; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask3: 1275; CHECK: # %bb.0: 1276; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1 1277; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] 1278; CHECK-NEXT: retq 1279 %vec = load <4 x i64>, ptr %vp 1280 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1281 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1282 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default 1283 ret <8 x i64> %res 1284} 1285 1286define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) { 1287; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask3: 1288; CHECK: # %bb.0: 1289; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1 1290; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] 1291; CHECK-NEXT: retq 1292 %vec = load <4 x i64>, ptr %vp 1293 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 1294 %cmp = icmp eq <8 x i64> %mask, zeroinitializer 1295 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer 1296 ret <8 x i64> %res 1297} 1298