1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -disable-peephole -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX 4 5define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: 7; VLX: # %bb.0: # %entry 8; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 9; VLX-NEXT: kmovd %k0, %eax 10; VLX-NEXT: retq 11; 12; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: 13; NoVLX: # %bb.0: # %entry 14; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 15; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 16; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 17; NoVLX-NEXT: kmovw %k0, %eax 18; NoVLX-NEXT: vzeroupper 19; NoVLX-NEXT: retq 20entry: 21 %0 = bitcast <2 x i64> %__a to <16 x i8> 22 %1 = bitcast <2 x i64> %__b to <16 x i8> 23 %2 = icmp eq <16 x i8> %0, %1 24 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 25 %4 = bitcast <32 x i1> %3 to i32 26 ret i32 %4 27} 28 29define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 30; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: 31; VLX: # %bb.0: # %entry 32; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 33; VLX-NEXT: kmovd %k0, %eax 34; VLX-NEXT: retq 35; 36; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: 37; NoVLX: # %bb.0: # %entry 38; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 39; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 40; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 41; NoVLX-NEXT: kmovw %k0, %eax 42; NoVLX-NEXT: vzeroupper 43; NoVLX-NEXT: retq 44entry: 45 %0 = bitcast <2 x i64> %__a to <16 x i8> 46 %load = load <2 x i64>, ptr %__b 47 %1 = bitcast <2 x i64> %load to <16 x i8> 48 %2 = icmp eq <16 x i8> %0, %1 49 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 50 %4 = bitcast <32 x i1> %3 to i32 51 ret i32 %4 52} 53 54define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 55; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: 56; VLX: # %bb.0: # %entry 57; VLX-NEXT: kmovd %edi, %k1 58; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} 59; VLX-NEXT: kmovd %k0, %eax 60; VLX-NEXT: retq 61; 62; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: 63; NoVLX: # %bb.0: # %entry 64; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 65; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 66; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 67; NoVLX-NEXT: kmovw %k0, %eax 68; NoVLX-NEXT: andl %edi, %eax 69; NoVLX-NEXT: vzeroupper 70; NoVLX-NEXT: retq 71entry: 72 %0 = bitcast <2 x i64> %__a to <16 x i8> 73 %1 = bitcast <2 x i64> %__b to <16 x i8> 74 %2 = icmp eq <16 x i8> %0, %1 75 %3 = bitcast i16 %__u to <16 x i1> 76 %4 = and <16 x i1> %2, %3 77 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 78 %6 = bitcast <32 x i1> %5 to i32 79 ret i32 %6 80} 81 82define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 83; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: 84; VLX: # %bb.0: # %entry 85; VLX-NEXT: kmovd %edi, %k1 86; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} 87; VLX-NEXT: kmovd %k0, %eax 88; VLX-NEXT: retq 89; 90; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: 91; NoVLX: # %bb.0: # %entry 92; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 93; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 94; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 95; NoVLX-NEXT: kmovw %k0, %eax 96; NoVLX-NEXT: andl %edi, %eax 97; NoVLX-NEXT: vzeroupper 98; NoVLX-NEXT: retq 99entry: 100 %0 = bitcast <2 x i64> %__a to <16 x i8> 101 %load = load <2 x i64>, ptr %__b 102 %1 = bitcast <2 x i64> %load to <16 x i8> 103 %2 = icmp eq <16 x i8> %0, %1 104 %3 = bitcast i16 %__u to <16 x i1> 105 %4 = and <16 x i1> %2, %3 106 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 107 %6 = bitcast <32 x i1> %5 to i32 108 ret i32 %6 109} 110 111 112define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 113; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: 114; VLX: # %bb.0: # %entry 115; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 116; VLX-NEXT: kmovq %k0, %rax 117; VLX-NEXT: retq 118; 119; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: 120; NoVLX: # %bb.0: # %entry 121; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 122; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 123; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 124; NoVLX-NEXT: kmovw %k0, %eax 125; NoVLX-NEXT: vzeroupper 126; NoVLX-NEXT: retq 127entry: 128 %0 = bitcast <2 x i64> %__a to <16 x i8> 129 %1 = bitcast <2 x i64> %__b to <16 x i8> 130 %2 = icmp eq <16 x i8> %0, %1 131 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 132 %4 = bitcast <64 x i1> %3 to i64 133 ret i64 %4 134} 135 136define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 137; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: 138; VLX: # %bb.0: # %entry 139; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 140; VLX-NEXT: kmovq %k0, %rax 141; VLX-NEXT: retq 142; 143; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: 144; NoVLX: # %bb.0: # %entry 145; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 146; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 147; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 148; NoVLX-NEXT: kmovw %k0, %eax 149; NoVLX-NEXT: vzeroupper 150; NoVLX-NEXT: retq 151entry: 152 %0 = bitcast <2 x i64> %__a to <16 x i8> 153 %load = load <2 x i64>, ptr %__b 154 %1 = bitcast <2 x i64> %load to <16 x i8> 155 %2 = icmp eq <16 x i8> %0, %1 156 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 157 %4 = bitcast <64 x i1> %3 to i64 158 ret i64 %4 159} 160 161define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 162; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: 163; VLX: # %bb.0: # %entry 164; VLX-NEXT: kmovd %edi, %k1 165; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} 166; VLX-NEXT: kmovq %k0, %rax 167; VLX-NEXT: retq 168; 169; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: 170; NoVLX: # %bb.0: # %entry 171; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 172; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 173; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 174; NoVLX-NEXT: kmovw %k0, %eax 175; NoVLX-NEXT: andl %edi, %eax 176; NoVLX-NEXT: vzeroupper 177; NoVLX-NEXT: retq 178entry: 179 %0 = bitcast <2 x i64> %__a to <16 x i8> 180 %1 = bitcast <2 x i64> %__b to <16 x i8> 181 %2 = icmp eq <16 x i8> %0, %1 182 %3 = bitcast i16 %__u to <16 x i1> 183 %4 = and <16 x i1> %2, %3 184 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 185 %6 = bitcast <64 x i1> %5 to i64 186 ret i64 %6 187} 188 189define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 190; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: 191; VLX: # %bb.0: # %entry 192; VLX-NEXT: kmovd %edi, %k1 193; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} 194; VLX-NEXT: kmovq %k0, %rax 195; VLX-NEXT: retq 196; 197; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: 198; NoVLX: # %bb.0: # %entry 199; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 200; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 201; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 202; NoVLX-NEXT: kmovw %k0, %eax 203; NoVLX-NEXT: andl %edi, %eax 204; NoVLX-NEXT: vzeroupper 205; NoVLX-NEXT: retq 206entry: 207 %0 = bitcast <2 x i64> %__a to <16 x i8> 208 %load = load <2 x i64>, ptr %__b 209 %1 = bitcast <2 x i64> %load to <16 x i8> 210 %2 = icmp eq <16 x i8> %0, %1 211 %3 = bitcast i16 %__u to <16 x i1> 212 %4 = and <16 x i1> %2, %3 213 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 214 %6 = bitcast <64 x i1> %5 to i64 215 ret i64 %6 216} 217 218 219define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 220; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: 221; VLX: # %bb.0: # %entry 222; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 223; VLX-NEXT: kmovq %k0, %rax 224; VLX-NEXT: vzeroupper 225; VLX-NEXT: retq 226; 227; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: 228; NoVLX: # %bb.0: # %entry 229; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 230; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 231; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 232; NoVLX-NEXT: kmovw %k0, %ecx 233; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 234; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 235; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 236; NoVLX-NEXT: kmovw %k0, %eax 237; NoVLX-NEXT: shll $16, %eax 238; NoVLX-NEXT: orl %ecx, %eax 239; NoVLX-NEXT: vzeroupper 240; NoVLX-NEXT: retq 241entry: 242 %0 = bitcast <4 x i64> %__a to <32 x i8> 243 %1 = bitcast <4 x i64> %__b to <32 x i8> 244 %2 = icmp eq <32 x i8> %0, %1 245 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 246 %4 = bitcast <64 x i1> %3 to i64 247 ret i64 %4 248} 249 250define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 251; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: 252; VLX: # %bb.0: # %entry 253; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 254; VLX-NEXT: kmovq %k0, %rax 255; VLX-NEXT: vzeroupper 256; VLX-NEXT: retq 257; 258; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: 259; NoVLX: # %bb.0: # %entry 260; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 261; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 262; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 263; NoVLX-NEXT: kmovw %k0, %ecx 264; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 265; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 266; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 267; NoVLX-NEXT: kmovw %k0, %eax 268; NoVLX-NEXT: shll $16, %eax 269; NoVLX-NEXT: orl %ecx, %eax 270; NoVLX-NEXT: vzeroupper 271; NoVLX-NEXT: retq 272entry: 273 %0 = bitcast <4 x i64> %__a to <32 x i8> 274 %load = load <4 x i64>, ptr %__b 275 %1 = bitcast <4 x i64> %load to <32 x i8> 276 %2 = icmp eq <32 x i8> %0, %1 277 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 278 %4 = bitcast <64 x i1> %3 to i64 279 ret i64 %4 280} 281 282define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 283; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: 284; VLX: # %bb.0: # %entry 285; VLX-NEXT: kmovd %edi, %k1 286; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} 287; VLX-NEXT: kmovq %k0, %rax 288; VLX-NEXT: vzeroupper 289; VLX-NEXT: retq 290; 291; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: 292; NoVLX: # %bb.0: # %entry 293; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 294; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 295; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 296; NoVLX-NEXT: kmovw %k0, %eax 297; NoVLX-NEXT: andl %edi, %eax 298; NoVLX-NEXT: shrl $16, %edi 299; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 300; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 301; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 302; NoVLX-NEXT: kmovw %k0, %ecx 303; NoVLX-NEXT: andl %edi, %ecx 304; NoVLX-NEXT: shll $16, %ecx 305; NoVLX-NEXT: movzwl %ax, %eax 306; NoVLX-NEXT: orl %ecx, %eax 307; NoVLX-NEXT: vzeroupper 308; NoVLX-NEXT: retq 309entry: 310 %0 = bitcast <4 x i64> %__a to <32 x i8> 311 %1 = bitcast <4 x i64> %__b to <32 x i8> 312 %2 = icmp eq <32 x i8> %0, %1 313 %3 = bitcast i32 %__u to <32 x i1> 314 %4 = and <32 x i1> %2, %3 315 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 316 %6 = bitcast <64 x i1> %5 to i64 317 ret i64 %6 318} 319 320define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 321; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: 322; VLX: # %bb.0: # %entry 323; VLX-NEXT: kmovd %edi, %k1 324; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1} 325; VLX-NEXT: kmovq %k0, %rax 326; VLX-NEXT: vzeroupper 327; VLX-NEXT: retq 328; 329; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: 330; NoVLX: # %bb.0: # %entry 331; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 332; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 333; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 334; NoVLX-NEXT: kmovw %k0, %eax 335; NoVLX-NEXT: andl %edi, %eax 336; NoVLX-NEXT: shrl $16, %edi 337; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 338; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 339; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 340; NoVLX-NEXT: kmovw %k0, %ecx 341; NoVLX-NEXT: andl %edi, %ecx 342; NoVLX-NEXT: shll $16, %ecx 343; NoVLX-NEXT: movzwl %ax, %eax 344; NoVLX-NEXT: orl %ecx, %eax 345; NoVLX-NEXT: vzeroupper 346; NoVLX-NEXT: retq 347entry: 348 %0 = bitcast <4 x i64> %__a to <32 x i8> 349 %load = load <4 x i64>, ptr %__b 350 %1 = bitcast <4 x i64> %load to <32 x i8> 351 %2 = icmp eq <32 x i8> %0, %1 352 %3 = bitcast i32 %__u to <32 x i1> 353 %4 = and <32 x i1> %2, %3 354 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 355 %6 = bitcast <64 x i1> %5 to i64 356 ret i64 %6 357} 358 359 360define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 361; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: 362; VLX: # %bb.0: # %entry 363; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 364; VLX-NEXT: kmovd %k0, %eax 365; VLX-NEXT: # kill: def $ax killed $ax killed $eax 366; VLX-NEXT: retq 367; 368; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: 369; NoVLX: # %bb.0: # %entry 370; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 371; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 372; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 373; NoVLX-NEXT: kmovw %k0, %eax 374; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 375; NoVLX-NEXT: vzeroupper 376; NoVLX-NEXT: retq 377entry: 378 %0 = bitcast <2 x i64> %__a to <8 x i16> 379 %1 = bitcast <2 x i64> %__b to <8 x i16> 380 %2 = icmp eq <8 x i16> %0, %1 381 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 382 %4 = bitcast <16 x i1> %3 to i16 383 ret i16 %4 384} 385 386define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 387; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: 388; VLX: # %bb.0: # %entry 389; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 390; VLX-NEXT: kmovd %k0, %eax 391; VLX-NEXT: # kill: def $ax killed $ax killed $eax 392; VLX-NEXT: retq 393; 394; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: 395; NoVLX: # %bb.0: # %entry 396; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 397; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 398; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 399; NoVLX-NEXT: kmovw %k0, %eax 400; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 401; NoVLX-NEXT: vzeroupper 402; NoVLX-NEXT: retq 403entry: 404 %0 = bitcast <2 x i64> %__a to <8 x i16> 405 %load = load <2 x i64>, ptr %__b 406 %1 = bitcast <2 x i64> %load to <8 x i16> 407 %2 = icmp eq <8 x i16> %0, %1 408 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 409 %4 = bitcast <16 x i1> %3 to i16 410 ret i16 %4 411} 412 413define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 414; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: 415; VLX: # %bb.0: # %entry 416; VLX-NEXT: kmovd %edi, %k1 417; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 418; VLX-NEXT: kmovd %k0, %eax 419; VLX-NEXT: # kill: def $ax killed $ax killed $eax 420; VLX-NEXT: retq 421; 422; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: 423; NoVLX: # %bb.0: # %entry 424; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 425; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 426; NoVLX-NEXT: kmovw %edi, %k1 427; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 428; NoVLX-NEXT: kmovw %k0, %eax 429; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 430; NoVLX-NEXT: vzeroupper 431; NoVLX-NEXT: retq 432entry: 433 %0 = bitcast <2 x i64> %__a to <8 x i16> 434 %1 = bitcast <2 x i64> %__b to <8 x i16> 435 %2 = icmp eq <8 x i16> %0, %1 436 %3 = bitcast i8 %__u to <8 x i1> 437 %4 = and <8 x i1> %2, %3 438 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 439 %6 = bitcast <16 x i1> %5 to i16 440 ret i16 %6 441} 442 443define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 444; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: 445; VLX: # %bb.0: # %entry 446; VLX-NEXT: kmovd %edi, %k1 447; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 448; VLX-NEXT: kmovd %k0, %eax 449; VLX-NEXT: # kill: def $ax killed $ax killed $eax 450; VLX-NEXT: retq 451; 452; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: 453; NoVLX: # %bb.0: # %entry 454; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 455; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 456; NoVLX-NEXT: kmovw %edi, %k1 457; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 458; NoVLX-NEXT: kmovw %k0, %eax 459; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 460; NoVLX-NEXT: vzeroupper 461; NoVLX-NEXT: retq 462entry: 463 %0 = bitcast <2 x i64> %__a to <8 x i16> 464 %load = load <2 x i64>, ptr %__b 465 %1 = bitcast <2 x i64> %load to <8 x i16> 466 %2 = icmp eq <8 x i16> %0, %1 467 %3 = bitcast i8 %__u to <8 x i1> 468 %4 = and <8 x i1> %2, %3 469 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 470 %6 = bitcast <16 x i1> %5 to i16 471 ret i16 %6 472} 473 474 475define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 476; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: 477; VLX: # %bb.0: # %entry 478; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 479; VLX-NEXT: kmovd %k0, %eax 480; VLX-NEXT: retq 481; 482; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: 483; NoVLX: # %bb.0: # %entry 484; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 485; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 486; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 487; NoVLX-NEXT: kmovw %k0, %eax 488; NoVLX-NEXT: vzeroupper 489; NoVLX-NEXT: retq 490entry: 491 %0 = bitcast <2 x i64> %__a to <8 x i16> 492 %1 = bitcast <2 x i64> %__b to <8 x i16> 493 %2 = icmp eq <8 x i16> %0, %1 494 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 495 %4 = bitcast <32 x i1> %3 to i32 496 ret i32 %4 497} 498 499define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 500; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: 501; VLX: # %bb.0: # %entry 502; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 503; VLX-NEXT: kmovd %k0, %eax 504; VLX-NEXT: retq 505; 506; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: 507; NoVLX: # %bb.0: # %entry 508; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 509; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 510; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 511; NoVLX-NEXT: kmovw %k0, %eax 512; NoVLX-NEXT: vzeroupper 513; NoVLX-NEXT: retq 514entry: 515 %0 = bitcast <2 x i64> %__a to <8 x i16> 516 %load = load <2 x i64>, ptr %__b 517 %1 = bitcast <2 x i64> %load to <8 x i16> 518 %2 = icmp eq <8 x i16> %0, %1 519 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 520 %4 = bitcast <32 x i1> %3 to i32 521 ret i32 %4 522} 523 524define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 525; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: 526; VLX: # %bb.0: # %entry 527; VLX-NEXT: kmovd %edi, %k1 528; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 529; VLX-NEXT: kmovd %k0, %eax 530; VLX-NEXT: retq 531; 532; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: 533; NoVLX: # %bb.0: # %entry 534; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 535; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 536; NoVLX-NEXT: kmovw %edi, %k1 537; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 538; NoVLX-NEXT: kmovw %k0, %eax 539; NoVLX-NEXT: vzeroupper 540; NoVLX-NEXT: retq 541entry: 542 %0 = bitcast <2 x i64> %__a to <8 x i16> 543 %1 = bitcast <2 x i64> %__b to <8 x i16> 544 %2 = icmp eq <8 x i16> %0, %1 545 %3 = bitcast i8 %__u to <8 x i1> 546 %4 = and <8 x i1> %2, %3 547 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 548 %6 = bitcast <32 x i1> %5 to i32 549 ret i32 %6 550} 551 552define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 553; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: 554; VLX: # %bb.0: # %entry 555; VLX-NEXT: kmovd %edi, %k1 556; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 557; VLX-NEXT: kmovd %k0, %eax 558; VLX-NEXT: retq 559; 560; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: 561; NoVLX: # %bb.0: # %entry 562; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 563; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 564; NoVLX-NEXT: kmovw %edi, %k1 565; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 566; NoVLX-NEXT: kmovw %k0, %eax 567; NoVLX-NEXT: vzeroupper 568; NoVLX-NEXT: retq 569entry: 570 %0 = bitcast <2 x i64> %__a to <8 x i16> 571 %load = load <2 x i64>, ptr %__b 572 %1 = bitcast <2 x i64> %load to <8 x i16> 573 %2 = icmp eq <8 x i16> %0, %1 574 %3 = bitcast i8 %__u to <8 x i1> 575 %4 = and <8 x i1> %2, %3 576 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 577 %6 = bitcast <32 x i1> %5 to i32 578 ret i32 %6 579} 580 581 582define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 583; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: 584; VLX: # %bb.0: # %entry 585; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 586; VLX-NEXT: kmovq %k0, %rax 587; VLX-NEXT: retq 588; 589; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: 590; NoVLX: # %bb.0: # %entry 591; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 592; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 593; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 594; NoVLX-NEXT: kmovw %k0, %eax 595; NoVLX-NEXT: vzeroupper 596; NoVLX-NEXT: retq 597entry: 598 %0 = bitcast <2 x i64> %__a to <8 x i16> 599 %1 = bitcast <2 x i64> %__b to <8 x i16> 600 %2 = icmp eq <8 x i16> %0, %1 601 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 602 %4 = bitcast <64 x i1> %3 to i64 603 ret i64 %4 604} 605 606define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 607; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: 608; VLX: # %bb.0: # %entry 609; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 610; VLX-NEXT: kmovq %k0, %rax 611; VLX-NEXT: retq 612; 613; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: 614; NoVLX: # %bb.0: # %entry 615; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 616; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 617; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 618; NoVLX-NEXT: kmovw %k0, %eax 619; NoVLX-NEXT: vzeroupper 620; NoVLX-NEXT: retq 621entry: 622 %0 = bitcast <2 x i64> %__a to <8 x i16> 623 %load = load <2 x i64>, ptr %__b 624 %1 = bitcast <2 x i64> %load to <8 x i16> 625 %2 = icmp eq <8 x i16> %0, %1 626 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 627 %4 = bitcast <64 x i1> %3 to i64 628 ret i64 %4 629} 630 631define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 632; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: 633; VLX: # %bb.0: # %entry 634; VLX-NEXT: kmovd %edi, %k1 635; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} 636; VLX-NEXT: kmovq %k0, %rax 637; VLX-NEXT: retq 638; 639; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: 640; NoVLX: # %bb.0: # %entry 641; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 642; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 643; NoVLX-NEXT: kmovw %edi, %k1 644; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 645; NoVLX-NEXT: kmovw %k0, %eax 646; NoVLX-NEXT: vzeroupper 647; NoVLX-NEXT: retq 648entry: 649 %0 = bitcast <2 x i64> %__a to <8 x i16> 650 %1 = bitcast <2 x i64> %__b to <8 x i16> 651 %2 = icmp eq <8 x i16> %0, %1 652 %3 = bitcast i8 %__u to <8 x i1> 653 %4 = and <8 x i1> %2, %3 654 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 655 %6 = bitcast <64 x i1> %5 to i64 656 ret i64 %6 657} 658 659define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 660; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: 661; VLX: # %bb.0: # %entry 662; VLX-NEXT: kmovd %edi, %k1 663; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} 664; VLX-NEXT: kmovq %k0, %rax 665; VLX-NEXT: retq 666; 667; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: 668; NoVLX: # %bb.0: # %entry 669; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 670; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 671; NoVLX-NEXT: kmovw %edi, %k1 672; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 673; NoVLX-NEXT: kmovw %k0, %eax 674; NoVLX-NEXT: vzeroupper 675; NoVLX-NEXT: retq 676entry: 677 %0 = bitcast <2 x i64> %__a to <8 x i16> 678 %load = load <2 x i64>, ptr %__b 679 %1 = bitcast <2 x i64> %load to <8 x i16> 680 %2 = icmp eq <8 x i16> %0, %1 681 %3 = bitcast i8 %__u to <8 x i1> 682 %4 = and <8 x i1> %2, %3 683 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 684 %6 = bitcast <64 x i1> %5 to i64 685 ret i64 %6 686} 687 688 689define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 690; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: 691; VLX: # %bb.0: # %entry 692; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 693; VLX-NEXT: kmovd %k0, %eax 694; VLX-NEXT: vzeroupper 695; VLX-NEXT: retq 696; 697; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: 698; NoVLX: # %bb.0: # %entry 699; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 700; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 701; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 702; NoVLX-NEXT: kmovw %k0, %eax 703; NoVLX-NEXT: vzeroupper 704; NoVLX-NEXT: retq 705entry: 706 %0 = bitcast <4 x i64> %__a to <16 x i16> 707 %1 = bitcast <4 x i64> %__b to <16 x i16> 708 %2 = icmp eq <16 x i16> %0, %1 709 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 710 %4 = bitcast <32 x i1> %3 to i32 711 ret i32 %4 712} 713 714define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 715; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: 716; VLX: # %bb.0: # %entry 717; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 718; VLX-NEXT: kmovd %k0, %eax 719; VLX-NEXT: vzeroupper 720; VLX-NEXT: retq 721; 722; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: 723; NoVLX: # %bb.0: # %entry 724; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 725; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 726; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 727; NoVLX-NEXT: kmovw %k0, %eax 728; NoVLX-NEXT: vzeroupper 729; NoVLX-NEXT: retq 730entry: 731 %0 = bitcast <4 x i64> %__a to <16 x i16> 732 %load = load <4 x i64>, ptr %__b 733 %1 = bitcast <4 x i64> %load to <16 x i16> 734 %2 = icmp eq <16 x i16> %0, %1 735 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 736 %4 = bitcast <32 x i1> %3 to i32 737 ret i32 %4 738} 739 740define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 741; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: 742; VLX: # %bb.0: # %entry 743; VLX-NEXT: kmovd %edi, %k1 744; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} 745; VLX-NEXT: kmovd %k0, %eax 746; VLX-NEXT: vzeroupper 747; VLX-NEXT: retq 748; 749; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: 750; NoVLX: # %bb.0: # %entry 751; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 752; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 753; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 754; NoVLX-NEXT: kmovw %k0, %eax 755; NoVLX-NEXT: andl %edi, %eax 756; NoVLX-NEXT: vzeroupper 757; NoVLX-NEXT: retq 758entry: 759 %0 = bitcast <4 x i64> %__a to <16 x i16> 760 %1 = bitcast <4 x i64> %__b to <16 x i16> 761 %2 = icmp eq <16 x i16> %0, %1 762 %3 = bitcast i16 %__u to <16 x i1> 763 %4 = and <16 x i1> %2, %3 764 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 765 %6 = bitcast <32 x i1> %5 to i32 766 ret i32 %6 767} 768 769define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 770; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: 771; VLX: # %bb.0: # %entry 772; VLX-NEXT: kmovd %edi, %k1 773; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} 774; VLX-NEXT: kmovd %k0, %eax 775; VLX-NEXT: vzeroupper 776; VLX-NEXT: retq 777; 778; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: 779; NoVLX: # %bb.0: # %entry 780; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 781; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 782; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 783; NoVLX-NEXT: kmovw %k0, %eax 784; NoVLX-NEXT: andl %edi, %eax 785; NoVLX-NEXT: vzeroupper 786; NoVLX-NEXT: retq 787entry: 788 %0 = bitcast <4 x i64> %__a to <16 x i16> 789 %load = load <4 x i64>, ptr %__b 790 %1 = bitcast <4 x i64> %load to <16 x i16> 791 %2 = icmp eq <16 x i16> %0, %1 792 %3 = bitcast i16 %__u to <16 x i1> 793 %4 = and <16 x i1> %2, %3 794 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 795 %6 = bitcast <32 x i1> %5 to i32 796 ret i32 %6 797} 798 799 800define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 801; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: 802; VLX: # %bb.0: # %entry 803; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 804; VLX-NEXT: kmovq %k0, %rax 805; VLX-NEXT: vzeroupper 806; VLX-NEXT: retq 807; 808; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: 809; NoVLX: # %bb.0: # %entry 810; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 811; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 812; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 813; NoVLX-NEXT: kmovw %k0, %eax 814; NoVLX-NEXT: vzeroupper 815; NoVLX-NEXT: retq 816entry: 817 %0 = bitcast <4 x i64> %__a to <16 x i16> 818 %1 = bitcast <4 x i64> %__b to <16 x i16> 819 %2 = icmp eq <16 x i16> %0, %1 820 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 821 %4 = bitcast <64 x i1> %3 to i64 822 ret i64 %4 823} 824 825define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 826; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: 827; VLX: # %bb.0: # %entry 828; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 829; VLX-NEXT: kmovq %k0, %rax 830; VLX-NEXT: vzeroupper 831; VLX-NEXT: retq 832; 833; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: 834; NoVLX: # %bb.0: # %entry 835; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 836; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 837; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 838; NoVLX-NEXT: kmovw %k0, %eax 839; NoVLX-NEXT: vzeroupper 840; NoVLX-NEXT: retq 841entry: 842 %0 = bitcast <4 x i64> %__a to <16 x i16> 843 %load = load <4 x i64>, ptr %__b 844 %1 = bitcast <4 x i64> %load to <16 x i16> 845 %2 = icmp eq <16 x i16> %0, %1 846 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 847 %4 = bitcast <64 x i1> %3 to i64 848 ret i64 %4 849} 850 851define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 852; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: 853; VLX: # %bb.0: # %entry 854; VLX-NEXT: kmovd %edi, %k1 855; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} 856; VLX-NEXT: kmovq %k0, %rax 857; VLX-NEXT: vzeroupper 858; VLX-NEXT: retq 859; 860; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: 861; NoVLX: # %bb.0: # %entry 862; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 863; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 864; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 865; NoVLX-NEXT: kmovw %k0, %eax 866; NoVLX-NEXT: andl %edi, %eax 867; NoVLX-NEXT: vzeroupper 868; NoVLX-NEXT: retq 869entry: 870 %0 = bitcast <4 x i64> %__a to <16 x i16> 871 %1 = bitcast <4 x i64> %__b to <16 x i16> 872 %2 = icmp eq <16 x i16> %0, %1 873 %3 = bitcast i16 %__u to <16 x i1> 874 %4 = and <16 x i1> %2, %3 875 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 876 %6 = bitcast <64 x i1> %5 to i64 877 ret i64 %6 878} 879 880define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 881; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: 882; VLX: # %bb.0: # %entry 883; VLX-NEXT: kmovd %edi, %k1 884; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} 885; VLX-NEXT: kmovq %k0, %rax 886; VLX-NEXT: vzeroupper 887; VLX-NEXT: retq 888; 889; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: 890; NoVLX: # %bb.0: # %entry 891; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 892; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 893; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 894; NoVLX-NEXT: kmovw %k0, %eax 895; NoVLX-NEXT: andl %edi, %eax 896; NoVLX-NEXT: vzeroupper 897; NoVLX-NEXT: retq 898entry: 899 %0 = bitcast <4 x i64> %__a to <16 x i16> 900 %load = load <4 x i64>, ptr %__b 901 %1 = bitcast <4 x i64> %load to <16 x i16> 902 %2 = icmp eq <16 x i16> %0, %1 903 %3 = bitcast i16 %__u to <16 x i1> 904 %4 = and <16 x i1> %2, %3 905 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 906 %6 = bitcast <64 x i1> %5 to i64 907 ret i64 %6 908} 909 910 911define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 912; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: 913; VLX: # %bb.0: # %entry 914; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 915; VLX-NEXT: kmovq %k0, %rax 916; VLX-NEXT: vzeroupper 917; VLX-NEXT: retq 918; 919; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: 920; NoVLX: # %bb.0: # %entry 921; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 922; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 923; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 924; NoVLX-NEXT: kmovw %k0, %ecx 925; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 926; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 927; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 928; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 929; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 930; NoVLX-NEXT: kmovw %k0, %eax 931; NoVLX-NEXT: shll $16, %eax 932; NoVLX-NEXT: orl %ecx, %eax 933; NoVLX-NEXT: vzeroupper 934; NoVLX-NEXT: retq 935entry: 936 %0 = bitcast <8 x i64> %__a to <32 x i16> 937 %1 = bitcast <8 x i64> %__b to <32 x i16> 938 %2 = icmp eq <32 x i16> %0, %1 939 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 940 %4 = bitcast <64 x i1> %3 to i64 941 ret i64 %4 942} 943 944define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 945; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: 946; VLX: # %bb.0: # %entry 947; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0 948; VLX-NEXT: kmovq %k0, %rax 949; VLX-NEXT: vzeroupper 950; VLX-NEXT: retq 951; 952; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: 953; NoVLX: # %bb.0: # %entry 954; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm1 955; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 956; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 957; NoVLX-NEXT: kmovw %k0, %ecx 958; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 959; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm0, %ymm0 960; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 961; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 962; NoVLX-NEXT: kmovw %k0, %eax 963; NoVLX-NEXT: shll $16, %eax 964; NoVLX-NEXT: orl %ecx, %eax 965; NoVLX-NEXT: vzeroupper 966; NoVLX-NEXT: retq 967entry: 968 %0 = bitcast <8 x i64> %__a to <32 x i16> 969 %load = load <8 x i64>, ptr %__b 970 %1 = bitcast <8 x i64> %load to <32 x i16> 971 %2 = icmp eq <32 x i16> %0, %1 972 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 973 %4 = bitcast <64 x i1> %3 to i64 974 ret i64 %4 975} 976 977define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 978; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: 979; VLX: # %bb.0: # %entry 980; VLX-NEXT: kmovd %edi, %k1 981; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} 982; VLX-NEXT: kmovq %k0, %rax 983; VLX-NEXT: vzeroupper 984; VLX-NEXT: retq 985; 986; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: 987; NoVLX: # %bb.0: # %entry 988; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm2 989; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 990; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 991; NoVLX-NEXT: kmovw %k0, %eax 992; NoVLX-NEXT: andl %edi, %eax 993; NoVLX-NEXT: shrl $16, %edi 994; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 995; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 996; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 997; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 998; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 999; NoVLX-NEXT: kmovw %k0, %ecx 1000; NoVLX-NEXT: andl %edi, %ecx 1001; NoVLX-NEXT: shll $16, %ecx 1002; NoVLX-NEXT: movzwl %ax, %eax 1003; NoVLX-NEXT: orl %ecx, %eax 1004; NoVLX-NEXT: vzeroupper 1005; NoVLX-NEXT: retq 1006entry: 1007 %0 = bitcast <8 x i64> %__a to <32 x i16> 1008 %1 = bitcast <8 x i64> %__b to <32 x i16> 1009 %2 = icmp eq <32 x i16> %0, %1 1010 %3 = bitcast i32 %__u to <32 x i1> 1011 %4 = and <32 x i1> %2, %3 1012 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 1013 %6 = bitcast <64 x i1> %5 to i64 1014 ret i64 %6 1015} 1016 1017define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 1018; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: 1019; VLX: # %bb.0: # %entry 1020; VLX-NEXT: kmovd %edi, %k1 1021; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1} 1022; VLX-NEXT: kmovq %k0, %rax 1023; VLX-NEXT: vzeroupper 1024; VLX-NEXT: retq 1025; 1026; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: 1027; NoVLX: # %bb.0: # %entry 1028; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm1 1029; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 1030; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 1031; NoVLX-NEXT: kmovw %k0, %eax 1032; NoVLX-NEXT: andl %edi, %eax 1033; NoVLX-NEXT: shrl $16, %edi 1034; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 1035; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm0, %ymm0 1036; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 1037; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 1038; NoVLX-NEXT: kmovw %k0, %ecx 1039; NoVLX-NEXT: andl %edi, %ecx 1040; NoVLX-NEXT: shll $16, %ecx 1041; NoVLX-NEXT: movzwl %ax, %eax 1042; NoVLX-NEXT: orl %ecx, %eax 1043; NoVLX-NEXT: vzeroupper 1044; NoVLX-NEXT: retq 1045entry: 1046 %0 = bitcast <8 x i64> %__a to <32 x i16> 1047 %load = load <8 x i64>, ptr %__b 1048 %1 = bitcast <8 x i64> %load to <32 x i16> 1049 %2 = icmp eq <32 x i16> %0, %1 1050 %3 = bitcast i32 %__u to <32 x i1> 1051 %4 = and <32 x i1> %2, %3 1052 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 1053 %6 = bitcast <64 x i1> %5 to i64 1054 ret i64 %6 1055} 1056 1057 1058define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1059; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: 1060; VLX: # %bb.0: # %entry 1061; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1062; VLX-NEXT: kmovd %k0, %eax 1063; VLX-NEXT: # kill: def $al killed $al killed $eax 1064; VLX-NEXT: retq 1065; 1066; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: 1067; NoVLX: # %bb.0: # %entry 1068; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1069; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1070; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1071; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1072; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1073; NoVLX-NEXT: kmovw %k0, %eax 1074; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1075; NoVLX-NEXT: vzeroupper 1076; NoVLX-NEXT: retq 1077entry: 1078 %0 = bitcast <2 x i64> %__a to <4 x i32> 1079 %1 = bitcast <2 x i64> %__b to <4 x i32> 1080 %2 = icmp eq <4 x i32> %0, %1 1081 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1082 %4 = bitcast <8 x i1> %3 to i8 1083 ret i8 %4 1084} 1085 1086define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1087; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: 1088; VLX: # %bb.0: # %entry 1089; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1090; VLX-NEXT: kmovd %k0, %eax 1091; VLX-NEXT: # kill: def $al killed $al killed $eax 1092; VLX-NEXT: retq 1093; 1094; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: 1095; NoVLX: # %bb.0: # %entry 1096; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1097; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1098; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1099; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1100; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1101; NoVLX-NEXT: kmovw %k0, %eax 1102; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1103; NoVLX-NEXT: vzeroupper 1104; NoVLX-NEXT: retq 1105entry: 1106 %0 = bitcast <2 x i64> %__a to <4 x i32> 1107 %load = load <2 x i64>, ptr %__b 1108 %1 = bitcast <2 x i64> %load to <4 x i32> 1109 %2 = icmp eq <4 x i32> %0, %1 1110 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1111 %4 = bitcast <8 x i1> %3 to i8 1112 ret i8 %4 1113} 1114 1115define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1116; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: 1117; VLX: # %bb.0: # %entry 1118; VLX-NEXT: kmovd %edi, %k1 1119; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1120; VLX-NEXT: kmovd %k0, %eax 1121; VLX-NEXT: # kill: def $al killed $al killed $eax 1122; VLX-NEXT: retq 1123; 1124; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: 1125; NoVLX: # %bb.0: # %entry 1126; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1127; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1128; NoVLX-NEXT: kmovw %edi, %k1 1129; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1130; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1131; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1132; NoVLX-NEXT: kmovw %k0, %eax 1133; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1134; NoVLX-NEXT: vzeroupper 1135; NoVLX-NEXT: retq 1136entry: 1137 %0 = bitcast <2 x i64> %__a to <4 x i32> 1138 %1 = bitcast <2 x i64> %__b to <4 x i32> 1139 %2 = icmp eq <4 x i32> %0, %1 1140 %3 = bitcast i8 %__u to <8 x i1> 1141 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1142 %4 = and <4 x i1> %2, %extract.i 1143 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1144 %6 = bitcast <8 x i1> %5 to i8 1145 ret i8 %6 1146} 1147 1148define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1149; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: 1150; VLX: # %bb.0: # %entry 1151; VLX-NEXT: kmovd %edi, %k1 1152; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1153; VLX-NEXT: kmovd %k0, %eax 1154; VLX-NEXT: # kill: def $al killed $al killed $eax 1155; VLX-NEXT: retq 1156; 1157; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: 1158; NoVLX: # %bb.0: # %entry 1159; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1160; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1161; NoVLX-NEXT: kmovw %edi, %k1 1162; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1163; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1164; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1165; NoVLX-NEXT: kmovw %k0, %eax 1166; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1167; NoVLX-NEXT: vzeroupper 1168; NoVLX-NEXT: retq 1169entry: 1170 %0 = bitcast <2 x i64> %__a to <4 x i32> 1171 %load = load <2 x i64>, ptr %__b 1172 %1 = bitcast <2 x i64> %load to <4 x i32> 1173 %2 = icmp eq <4 x i32> %0, %1 1174 %3 = bitcast i8 %__u to <8 x i1> 1175 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1176 %4 = and <4 x i1> %2, %extract.i 1177 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1178 %6 = bitcast <8 x i1> %5 to i8 1179 ret i8 %6 1180} 1181 1182 1183define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1184; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1185; VLX: # %bb.0: # %entry 1186; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1187; VLX-NEXT: kmovd %k0, %eax 1188; VLX-NEXT: # kill: def $al killed $al killed $eax 1189; VLX-NEXT: retq 1190; 1191; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1192; NoVLX: # %bb.0: # %entry 1193; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1194; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 1195; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1196; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1197; NoVLX-NEXT: kmovw %k0, %eax 1198; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1199; NoVLX-NEXT: vzeroupper 1200; NoVLX-NEXT: retq 1201entry: 1202 %0 = bitcast <2 x i64> %__a to <4 x i32> 1203 %load = load i32, ptr %__b 1204 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1205 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1206 %2 = icmp eq <4 x i32> %0, %1 1207 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1208 %4 = bitcast <8 x i1> %3 to i8 1209 ret i8 %4 1210} 1211 1212define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1213; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1214; VLX: # %bb.0: # %entry 1215; VLX-NEXT: kmovd %edi, %k1 1216; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1217; VLX-NEXT: kmovd %k0, %eax 1218; VLX-NEXT: # kill: def $al killed $al killed $eax 1219; VLX-NEXT: retq 1220; 1221; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: 1222; NoVLX: # %bb.0: # %entry 1223; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1224; NoVLX-NEXT: kmovw %edi, %k1 1225; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 1226; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1227; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1228; NoVLX-NEXT: kmovw %k0, %eax 1229; NoVLX-NEXT: # kill: def $al killed $al killed $eax 1230; NoVLX-NEXT: vzeroupper 1231; NoVLX-NEXT: retq 1232entry: 1233 %0 = bitcast <2 x i64> %__a to <4 x i32> 1234 %load = load i32, ptr %__b 1235 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1236 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1237 %2 = icmp eq <4 x i32> %0, %1 1238 %3 = bitcast i8 %__u to <8 x i1> 1239 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1240 %4 = and <4 x i1> %extract.i, %2 1241 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1242 %6 = bitcast <8 x i1> %5 to i8 1243 ret i8 %6 1244} 1245 1246 1247define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1248; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: 1249; VLX: # %bb.0: # %entry 1250; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1251; VLX-NEXT: kmovd %k0, %eax 1252; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1253; VLX-NEXT: retq 1254; 1255; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: 1256; NoVLX: # %bb.0: # %entry 1257; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1258; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1259; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1260; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1261; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1262; NoVLX-NEXT: kmovw %k0, %eax 1263; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1264; NoVLX-NEXT: vzeroupper 1265; NoVLX-NEXT: retq 1266entry: 1267 %0 = bitcast <2 x i64> %__a to <4 x i32> 1268 %1 = bitcast <2 x i64> %__b to <4 x i32> 1269 %2 = icmp eq <4 x i32> %0, %1 1270 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1271 %4 = bitcast <16 x i1> %3 to i16 1272 ret i16 %4 1273} 1274 1275define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1276; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: 1277; VLX: # %bb.0: # %entry 1278; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1279; VLX-NEXT: kmovd %k0, %eax 1280; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1281; VLX-NEXT: retq 1282; 1283; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: 1284; NoVLX: # %bb.0: # %entry 1285; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1286; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1287; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1288; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1289; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1290; NoVLX-NEXT: kmovw %k0, %eax 1291; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1292; NoVLX-NEXT: vzeroupper 1293; NoVLX-NEXT: retq 1294entry: 1295 %0 = bitcast <2 x i64> %__a to <4 x i32> 1296 %load = load <2 x i64>, ptr %__b 1297 %1 = bitcast <2 x i64> %load to <4 x i32> 1298 %2 = icmp eq <4 x i32> %0, %1 1299 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1300 %4 = bitcast <16 x i1> %3 to i16 1301 ret i16 %4 1302} 1303 1304define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1305; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: 1306; VLX: # %bb.0: # %entry 1307; VLX-NEXT: kmovd %edi, %k1 1308; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1309; VLX-NEXT: kmovd %k0, %eax 1310; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1311; VLX-NEXT: retq 1312; 1313; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: 1314; NoVLX: # %bb.0: # %entry 1315; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1316; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1317; NoVLX-NEXT: kmovw %edi, %k1 1318; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1319; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1320; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1321; NoVLX-NEXT: kmovw %k0, %eax 1322; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1323; NoVLX-NEXT: vzeroupper 1324; NoVLX-NEXT: retq 1325entry: 1326 %0 = bitcast <2 x i64> %__a to <4 x i32> 1327 %1 = bitcast <2 x i64> %__b to <4 x i32> 1328 %2 = icmp eq <4 x i32> %0, %1 1329 %3 = bitcast i8 %__u to <8 x i1> 1330 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1331 %4 = and <4 x i1> %2, %extract.i 1332 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1333 %6 = bitcast <16 x i1> %5 to i16 1334 ret i16 %6 1335} 1336 1337define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1338; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: 1339; VLX: # %bb.0: # %entry 1340; VLX-NEXT: kmovd %edi, %k1 1341; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1342; VLX-NEXT: kmovd %k0, %eax 1343; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1344; VLX-NEXT: retq 1345; 1346; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: 1347; NoVLX: # %bb.0: # %entry 1348; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1349; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1350; NoVLX-NEXT: kmovw %edi, %k1 1351; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1352; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1353; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1354; NoVLX-NEXT: kmovw %k0, %eax 1355; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1356; NoVLX-NEXT: vzeroupper 1357; NoVLX-NEXT: retq 1358entry: 1359 %0 = bitcast <2 x i64> %__a to <4 x i32> 1360 %load = load <2 x i64>, ptr %__b 1361 %1 = bitcast <2 x i64> %load to <4 x i32> 1362 %2 = icmp eq <4 x i32> %0, %1 1363 %3 = bitcast i8 %__u to <8 x i1> 1364 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1365 %4 = and <4 x i1> %2, %extract.i 1366 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1367 %6 = bitcast <16 x i1> %5 to i16 1368 ret i16 %6 1369} 1370 1371 1372define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1373; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1374; VLX: # %bb.0: # %entry 1375; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1376; VLX-NEXT: kmovd %k0, %eax 1377; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1378; VLX-NEXT: retq 1379; 1380; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1381; NoVLX: # %bb.0: # %entry 1382; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1383; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 1384; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1385; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1386; NoVLX-NEXT: kmovw %k0, %eax 1387; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1388; NoVLX-NEXT: vzeroupper 1389; NoVLX-NEXT: retq 1390entry: 1391 %0 = bitcast <2 x i64> %__a to <4 x i32> 1392 %load = load i32, ptr %__b 1393 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1394 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1395 %2 = icmp eq <4 x i32> %0, %1 1396 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1397 %4 = bitcast <16 x i1> %3 to i16 1398 ret i16 %4 1399} 1400 1401define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1402; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1403; VLX: # %bb.0: # %entry 1404; VLX-NEXT: kmovd %edi, %k1 1405; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1406; VLX-NEXT: kmovd %k0, %eax 1407; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1408; VLX-NEXT: retq 1409; 1410; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: 1411; NoVLX: # %bb.0: # %entry 1412; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1413; NoVLX-NEXT: kmovw %edi, %k1 1414; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 1415; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1416; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1417; NoVLX-NEXT: kmovw %k0, %eax 1418; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1419; NoVLX-NEXT: vzeroupper 1420; NoVLX-NEXT: retq 1421entry: 1422 %0 = bitcast <2 x i64> %__a to <4 x i32> 1423 %load = load i32, ptr %__b 1424 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1425 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1426 %2 = icmp eq <4 x i32> %0, %1 1427 %3 = bitcast i8 %__u to <8 x i1> 1428 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1429 %4 = and <4 x i1> %extract.i, %2 1430 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1431 %6 = bitcast <16 x i1> %5 to i16 1432 ret i16 %6 1433} 1434 1435 1436define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1437; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: 1438; VLX: # %bb.0: # %entry 1439; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1440; VLX-NEXT: kmovd %k0, %eax 1441; VLX-NEXT: retq 1442; 1443; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: 1444; NoVLX: # %bb.0: # %entry 1445; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1446; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1447; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1448; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1449; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1450; NoVLX-NEXT: kmovw %k0, %eax 1451; NoVLX-NEXT: vzeroupper 1452; NoVLX-NEXT: retq 1453entry: 1454 %0 = bitcast <2 x i64> %__a to <4 x i32> 1455 %1 = bitcast <2 x i64> %__b to <4 x i32> 1456 %2 = icmp eq <4 x i32> %0, %1 1457 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1458 %4 = bitcast <32 x i1> %3 to i32 1459 ret i32 %4 1460} 1461 1462define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1463; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: 1464; VLX: # %bb.0: # %entry 1465; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1466; VLX-NEXT: kmovd %k0, %eax 1467; VLX-NEXT: retq 1468; 1469; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: 1470; NoVLX: # %bb.0: # %entry 1471; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1472; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1473; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1474; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1475; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1476; NoVLX-NEXT: kmovw %k0, %eax 1477; NoVLX-NEXT: vzeroupper 1478; NoVLX-NEXT: retq 1479entry: 1480 %0 = bitcast <2 x i64> %__a to <4 x i32> 1481 %load = load <2 x i64>, ptr %__b 1482 %1 = bitcast <2 x i64> %load to <4 x i32> 1483 %2 = icmp eq <4 x i32> %0, %1 1484 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1485 %4 = bitcast <32 x i1> %3 to i32 1486 ret i32 %4 1487} 1488 1489define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1490; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: 1491; VLX: # %bb.0: # %entry 1492; VLX-NEXT: kmovd %edi, %k1 1493; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1494; VLX-NEXT: kmovd %k0, %eax 1495; VLX-NEXT: retq 1496; 1497; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: 1498; NoVLX: # %bb.0: # %entry 1499; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1500; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1501; NoVLX-NEXT: kmovw %edi, %k1 1502; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1503; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1504; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1505; NoVLX-NEXT: kmovw %k0, %eax 1506; NoVLX-NEXT: vzeroupper 1507; NoVLX-NEXT: retq 1508entry: 1509 %0 = bitcast <2 x i64> %__a to <4 x i32> 1510 %1 = bitcast <2 x i64> %__b to <4 x i32> 1511 %2 = icmp eq <4 x i32> %0, %1 1512 %3 = bitcast i8 %__u to <8 x i1> 1513 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1514 %4 = and <4 x i1> %2, %extract.i 1515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1516 %6 = bitcast <32 x i1> %5 to i32 1517 ret i32 %6 1518} 1519 1520define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1521; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: 1522; VLX: # %bb.0: # %entry 1523; VLX-NEXT: kmovd %edi, %k1 1524; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1525; VLX-NEXT: kmovd %k0, %eax 1526; VLX-NEXT: retq 1527; 1528; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: 1529; NoVLX: # %bb.0: # %entry 1530; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1531; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1532; NoVLX-NEXT: kmovw %edi, %k1 1533; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1534; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1535; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1536; NoVLX-NEXT: kmovw %k0, %eax 1537; NoVLX-NEXT: vzeroupper 1538; NoVLX-NEXT: retq 1539entry: 1540 %0 = bitcast <2 x i64> %__a to <4 x i32> 1541 %load = load <2 x i64>, ptr %__b 1542 %1 = bitcast <2 x i64> %load to <4 x i32> 1543 %2 = icmp eq <4 x i32> %0, %1 1544 %3 = bitcast i8 %__u to <8 x i1> 1545 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1546 %4 = and <4 x i1> %2, %extract.i 1547 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1548 %6 = bitcast <32 x i1> %5 to i32 1549 ret i32 %6 1550} 1551 1552 1553define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1554; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1555; VLX: # %bb.0: # %entry 1556; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1557; VLX-NEXT: kmovd %k0, %eax 1558; VLX-NEXT: retq 1559; 1560; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1561; NoVLX: # %bb.0: # %entry 1562; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1563; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 1564; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1565; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1566; NoVLX-NEXT: kmovw %k0, %eax 1567; NoVLX-NEXT: vzeroupper 1568; NoVLX-NEXT: retq 1569entry: 1570 %0 = bitcast <2 x i64> %__a to <4 x i32> 1571 %load = load i32, ptr %__b 1572 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1573 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1574 %2 = icmp eq <4 x i32> %0, %1 1575 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1576 %4 = bitcast <32 x i1> %3 to i32 1577 ret i32 %4 1578} 1579 1580define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1581; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1582; VLX: # %bb.0: # %entry 1583; VLX-NEXT: kmovd %edi, %k1 1584; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1585; VLX-NEXT: kmovd %k0, %eax 1586; VLX-NEXT: retq 1587; 1588; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: 1589; NoVLX: # %bb.0: # %entry 1590; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1591; NoVLX-NEXT: kmovw %edi, %k1 1592; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 1593; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1594; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1595; NoVLX-NEXT: kmovw %k0, %eax 1596; NoVLX-NEXT: vzeroupper 1597; NoVLX-NEXT: retq 1598entry: 1599 %0 = bitcast <2 x i64> %__a to <4 x i32> 1600 %load = load i32, ptr %__b 1601 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1602 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1603 %2 = icmp eq <4 x i32> %0, %1 1604 %3 = bitcast i8 %__u to <8 x i1> 1605 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1606 %4 = and <4 x i1> %extract.i, %2 1607 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1608 %6 = bitcast <32 x i1> %5 to i32 1609 ret i32 %6 1610} 1611 1612 1613define i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_i32(i32 %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1614; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32: 1615; VLX: # %bb.0: # %entry 1616; VLX-NEXT: kmovd %edi, %k1 1617; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1618; VLX-NEXT: kmovb %k0, %eax 1619; VLX-NEXT: retq 1620; 1621; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_i32: 1622; NoVLX: # %bb.0: # %entry 1623; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1624; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1625; NoVLX-NEXT: kmovw %edi, %k1 1626; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1627; NoVLX-NEXT: kmovw %k0, %eax 1628; NoVLX-NEXT: andl $15, %eax 1629; NoVLX-NEXT: vzeroupper 1630; NoVLX-NEXT: retq 1631entry: 1632 %0 = bitcast <2 x i64> %__a to <4 x i32> 1633 %1 = bitcast <2 x i64> %__b to <4 x i32> 1634 %2 = icmp eq <4 x i32> %0, %1 1635 %3 = bitcast i32 %__u to <32 x i1> 1636 %extract.i = shufflevector <32 x i1> %3, <32 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1637 %4 = and <4 x i1> %2, %extract.i 1638 %5 = bitcast <4 x i1> %4 to i4 1639 %6 = zext i4 %5 to i32 1640 ret i32 %6 1641} 1642 1643 1644define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1645; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: 1646; VLX: # %bb.0: # %entry 1647; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 1648; VLX-NEXT: kmovq %k0, %rax 1649; VLX-NEXT: retq 1650; 1651; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: 1652; NoVLX: # %bb.0: # %entry 1653; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1654; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1655; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1656; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1657; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1658; NoVLX-NEXT: kmovw %k0, %eax 1659; NoVLX-NEXT: vzeroupper 1660; NoVLX-NEXT: retq 1661entry: 1662 %0 = bitcast <2 x i64> %__a to <4 x i32> 1663 %1 = bitcast <2 x i64> %__b to <4 x i32> 1664 %2 = icmp eq <4 x i32> %0, %1 1665 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1666 %4 = bitcast <64 x i1> %3 to i64 1667 ret i64 %4 1668} 1669 1670define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1671; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: 1672; VLX: # %bb.0: # %entry 1673; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 1674; VLX-NEXT: kmovq %k0, %rax 1675; VLX-NEXT: retq 1676; 1677; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: 1678; NoVLX: # %bb.0: # %entry 1679; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1680; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 1681; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1682; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1683; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1684; NoVLX-NEXT: kmovw %k0, %eax 1685; NoVLX-NEXT: vzeroupper 1686; NoVLX-NEXT: retq 1687entry: 1688 %0 = bitcast <2 x i64> %__a to <4 x i32> 1689 %load = load <2 x i64>, ptr %__b 1690 %1 = bitcast <2 x i64> %load to <4 x i32> 1691 %2 = icmp eq <4 x i32> %0, %1 1692 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1693 %4 = bitcast <64 x i1> %3 to i64 1694 ret i64 %4 1695} 1696 1697define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 1698; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: 1699; VLX: # %bb.0: # %entry 1700; VLX-NEXT: kmovd %edi, %k1 1701; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} 1702; VLX-NEXT: kmovq %k0, %rax 1703; VLX-NEXT: retq 1704; 1705; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: 1706; NoVLX: # %bb.0: # %entry 1707; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 1708; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1709; NoVLX-NEXT: kmovw %edi, %k1 1710; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1711; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1712; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1713; NoVLX-NEXT: kmovw %k0, %eax 1714; NoVLX-NEXT: vzeroupper 1715; NoVLX-NEXT: retq 1716entry: 1717 %0 = bitcast <2 x i64> %__a to <4 x i32> 1718 %1 = bitcast <2 x i64> %__b to <4 x i32> 1719 %2 = icmp eq <4 x i32> %0, %1 1720 %3 = bitcast i8 %__u to <8 x i1> 1721 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1722 %4 = and <4 x i1> %2, %extract.i 1723 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1724 %6 = bitcast <64 x i1> %5 to i64 1725 ret i64 %6 1726} 1727 1728define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1729; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: 1730; VLX: # %bb.0: # %entry 1731; VLX-NEXT: kmovd %edi, %k1 1732; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} 1733; VLX-NEXT: kmovq %k0, %rax 1734; VLX-NEXT: retq 1735; 1736; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: 1737; NoVLX: # %bb.0: # %entry 1738; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1739; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 1740; NoVLX-NEXT: kmovw %edi, %k1 1741; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1742; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1743; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1744; NoVLX-NEXT: kmovw %k0, %eax 1745; NoVLX-NEXT: vzeroupper 1746; NoVLX-NEXT: retq 1747entry: 1748 %0 = bitcast <2 x i64> %__a to <4 x i32> 1749 %load = load <2 x i64>, ptr %__b 1750 %1 = bitcast <2 x i64> %load to <4 x i32> 1751 %2 = icmp eq <4 x i32> %0, %1 1752 %3 = bitcast i8 %__u to <8 x i1> 1753 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1754 %4 = and <4 x i1> %2, %extract.i 1755 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1756 %6 = bitcast <64 x i1> %5 to i64 1757 ret i64 %6 1758} 1759 1760 1761define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 1762; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1763; VLX: # %bb.0: # %entry 1764; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 1765; VLX-NEXT: kmovq %k0, %rax 1766; VLX-NEXT: retq 1767; 1768; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1769; NoVLX: # %bb.0: # %entry 1770; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1771; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 1772; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1773; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1774; NoVLX-NEXT: kmovw %k0, %eax 1775; NoVLX-NEXT: vzeroupper 1776; NoVLX-NEXT: retq 1777entry: 1778 %0 = bitcast <2 x i64> %__a to <4 x i32> 1779 %load = load i32, ptr %__b 1780 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1781 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1782 %2 = icmp eq <4 x i32> %0, %1 1783 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1784 %4 = bitcast <64 x i1> %3 to i64 1785 ret i64 %4 1786} 1787 1788define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 1789; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1790; VLX: # %bb.0: # %entry 1791; VLX-NEXT: kmovd %edi, %k1 1792; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} 1793; VLX-NEXT: kmovq %k0, %rax 1794; VLX-NEXT: retq 1795; 1796; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: 1797; NoVLX: # %bb.0: # %entry 1798; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1799; NoVLX-NEXT: kmovw %edi, %k1 1800; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 1801; NoVLX-NEXT: kshiftlw $12, %k0, %k0 1802; NoVLX-NEXT: kshiftrw $12, %k0, %k0 1803; NoVLX-NEXT: kmovw %k0, %eax 1804; NoVLX-NEXT: vzeroupper 1805; NoVLX-NEXT: retq 1806entry: 1807 %0 = bitcast <2 x i64> %__a to <4 x i32> 1808 %load = load i32, ptr %__b 1809 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 1810 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 1811 %2 = icmp eq <4 x i32> %0, %1 1812 %3 = bitcast i8 %__u to <8 x i1> 1813 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1814 %4 = and <4 x i1> %extract.i, %2 1815 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 1816 %6 = bitcast <64 x i1> %5 to i64 1817 ret i64 %6 1818} 1819 1820 1821define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 1822; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: 1823; VLX: # %bb.0: # %entry 1824; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 1825; VLX-NEXT: kmovd %k0, %eax 1826; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1827; VLX-NEXT: vzeroupper 1828; VLX-NEXT: retq 1829; 1830; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: 1831; NoVLX: # %bb.0: # %entry 1832; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1833; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1834; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1835; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1836; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1837; NoVLX-NEXT: kmovw %k0, %eax 1838; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1839; NoVLX-NEXT: vzeroupper 1840; NoVLX-NEXT: retq 1841entry: 1842 %0 = bitcast <4 x i64> %__a to <8 x i32> 1843 %1 = bitcast <4 x i64> %__b to <8 x i32> 1844 %2 = icmp eq <8 x i32> %0, %1 1845 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1846 %4 = bitcast <16 x i1> %3 to i16 1847 ret i16 %4 1848} 1849 1850define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 1851; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: 1852; VLX: # %bb.0: # %entry 1853; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 1854; VLX-NEXT: kmovd %k0, %eax 1855; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1856; VLX-NEXT: vzeroupper 1857; VLX-NEXT: retq 1858; 1859; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: 1860; NoVLX: # %bb.0: # %entry 1861; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1862; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 1863; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 1864; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1865; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1866; NoVLX-NEXT: kmovw %k0, %eax 1867; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1868; NoVLX-NEXT: vzeroupper 1869; NoVLX-NEXT: retq 1870entry: 1871 %0 = bitcast <4 x i64> %__a to <8 x i32> 1872 %load = load <4 x i64>, ptr %__b 1873 %1 = bitcast <4 x i64> %load to <8 x i32> 1874 %2 = icmp eq <8 x i32> %0, %1 1875 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1876 %4 = bitcast <16 x i1> %3 to i16 1877 ret i16 %4 1878} 1879 1880define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 1881; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: 1882; VLX: # %bb.0: # %entry 1883; VLX-NEXT: kmovd %edi, %k1 1884; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 1885; VLX-NEXT: kmovd %k0, %eax 1886; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1887; VLX-NEXT: vzeroupper 1888; VLX-NEXT: retq 1889; 1890; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: 1891; NoVLX: # %bb.0: # %entry 1892; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 1893; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1894; NoVLX-NEXT: kmovw %edi, %k1 1895; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1896; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1897; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1898; NoVLX-NEXT: kmovw %k0, %eax 1899; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1900; NoVLX-NEXT: vzeroupper 1901; NoVLX-NEXT: retq 1902entry: 1903 %0 = bitcast <4 x i64> %__a to <8 x i32> 1904 %1 = bitcast <4 x i64> %__b to <8 x i32> 1905 %2 = icmp eq <8 x i32> %0, %1 1906 %3 = bitcast i8 %__u to <8 x i1> 1907 %4 = and <8 x i1> %2, %3 1908 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1909 %6 = bitcast <16 x i1> %5 to i16 1910 ret i16 %6 1911} 1912 1913define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 1914; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: 1915; VLX: # %bb.0: # %entry 1916; VLX-NEXT: kmovd %edi, %k1 1917; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 1918; VLX-NEXT: kmovd %k0, %eax 1919; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1920; VLX-NEXT: vzeroupper 1921; VLX-NEXT: retq 1922; 1923; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: 1924; NoVLX: # %bb.0: # %entry 1925; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1926; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 1927; NoVLX-NEXT: kmovw %edi, %k1 1928; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 1929; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1930; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1931; NoVLX-NEXT: kmovw %k0, %eax 1932; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1933; NoVLX-NEXT: vzeroupper 1934; NoVLX-NEXT: retq 1935entry: 1936 %0 = bitcast <4 x i64> %__a to <8 x i32> 1937 %load = load <4 x i64>, ptr %__b 1938 %1 = bitcast <4 x i64> %load to <8 x i32> 1939 %2 = icmp eq <8 x i32> %0, %1 1940 %3 = bitcast i8 %__u to <8 x i1> 1941 %4 = and <8 x i1> %2, %3 1942 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1943 %6 = bitcast <16 x i1> %5 to i16 1944 ret i16 %6 1945} 1946 1947 1948define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 1949; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1950; VLX: # %bb.0: # %entry 1951; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 1952; VLX-NEXT: kmovd %k0, %eax 1953; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1954; VLX-NEXT: vzeroupper 1955; VLX-NEXT: retq 1956; 1957; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1958; NoVLX: # %bb.0: # %entry 1959; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1960; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 1961; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1962; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1963; NoVLX-NEXT: kmovw %k0, %eax 1964; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1965; NoVLX-NEXT: vzeroupper 1966; NoVLX-NEXT: retq 1967entry: 1968 %0 = bitcast <4 x i64> %__a to <8 x i32> 1969 %load = load i32, ptr %__b 1970 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 1971 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 1972 %2 = icmp eq <8 x i32> %0, %1 1973 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 1974 %4 = bitcast <16 x i1> %3 to i16 1975 ret i16 %4 1976} 1977 1978define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 1979; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1980; VLX: # %bb.0: # %entry 1981; VLX-NEXT: kmovd %edi, %k1 1982; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 1983; VLX-NEXT: kmovd %k0, %eax 1984; VLX-NEXT: # kill: def $ax killed $ax killed $eax 1985; VLX-NEXT: vzeroupper 1986; VLX-NEXT: retq 1987; 1988; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: 1989; NoVLX: # %bb.0: # %entry 1990; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1991; NoVLX-NEXT: kmovw %edi, %k1 1992; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 1993; NoVLX-NEXT: kshiftlw $8, %k0, %k0 1994; NoVLX-NEXT: kshiftrw $8, %k0, %k0 1995; NoVLX-NEXT: kmovw %k0, %eax 1996; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 1997; NoVLX-NEXT: vzeroupper 1998; NoVLX-NEXT: retq 1999entry: 2000 %0 = bitcast <4 x i64> %__a to <8 x i32> 2001 %load = load i32, ptr %__b 2002 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2003 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2004 %2 = icmp eq <8 x i32> %0, %1 2005 %3 = bitcast i8 %__u to <8 x i1> 2006 %4 = and <8 x i1> %3, %2 2007 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2008 %6 = bitcast <16 x i1> %5 to i16 2009 ret i16 %6 2010} 2011 2012 2013define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2014; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: 2015; VLX: # %bb.0: # %entry 2016; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 2017; VLX-NEXT: kmovd %k0, %eax 2018; VLX-NEXT: vzeroupper 2019; VLX-NEXT: retq 2020; 2021; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: 2022; NoVLX: # %bb.0: # %entry 2023; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2024; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2025; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2026; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2027; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2028; NoVLX-NEXT: kmovw %k0, %eax 2029; NoVLX-NEXT: vzeroupper 2030; NoVLX-NEXT: retq 2031entry: 2032 %0 = bitcast <4 x i64> %__a to <8 x i32> 2033 %1 = bitcast <4 x i64> %__b to <8 x i32> 2034 %2 = icmp eq <8 x i32> %0, %1 2035 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2036 %4 = bitcast <32 x i1> %3 to i32 2037 ret i32 %4 2038} 2039 2040define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 2041; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: 2042; VLX: # %bb.0: # %entry 2043; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 2044; VLX-NEXT: kmovd %k0, %eax 2045; VLX-NEXT: vzeroupper 2046; VLX-NEXT: retq 2047; 2048; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: 2049; NoVLX: # %bb.0: # %entry 2050; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2051; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 2052; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2053; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2054; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2055; NoVLX-NEXT: kmovw %k0, %eax 2056; NoVLX-NEXT: vzeroupper 2057; NoVLX-NEXT: retq 2058entry: 2059 %0 = bitcast <4 x i64> %__a to <8 x i32> 2060 %load = load <4 x i64>, ptr %__b 2061 %1 = bitcast <4 x i64> %load to <8 x i32> 2062 %2 = icmp eq <8 x i32> %0, %1 2063 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2064 %4 = bitcast <32 x i1> %3 to i32 2065 ret i32 %4 2066} 2067 2068define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2069; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: 2070; VLX: # %bb.0: # %entry 2071; VLX-NEXT: kmovd %edi, %k1 2072; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 2073; VLX-NEXT: kmovd %k0, %eax 2074; VLX-NEXT: vzeroupper 2075; VLX-NEXT: retq 2076; 2077; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: 2078; NoVLX: # %bb.0: # %entry 2079; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2080; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2081; NoVLX-NEXT: kmovw %edi, %k1 2082; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2083; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2084; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2085; NoVLX-NEXT: kmovw %k0, %eax 2086; NoVLX-NEXT: vzeroupper 2087; NoVLX-NEXT: retq 2088entry: 2089 %0 = bitcast <4 x i64> %__a to <8 x i32> 2090 %1 = bitcast <4 x i64> %__b to <8 x i32> 2091 %2 = icmp eq <8 x i32> %0, %1 2092 %3 = bitcast i8 %__u to <8 x i1> 2093 %4 = and <8 x i1> %2, %3 2094 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2095 %6 = bitcast <32 x i1> %5 to i32 2096 ret i32 %6 2097} 2098 2099define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 2100; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: 2101; VLX: # %bb.0: # %entry 2102; VLX-NEXT: kmovd %edi, %k1 2103; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 2104; VLX-NEXT: kmovd %k0, %eax 2105; VLX-NEXT: vzeroupper 2106; VLX-NEXT: retq 2107; 2108; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: 2109; NoVLX: # %bb.0: # %entry 2110; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2111; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 2112; NoVLX-NEXT: kmovw %edi, %k1 2113; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2114; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2115; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2116; NoVLX-NEXT: kmovw %k0, %eax 2117; NoVLX-NEXT: vzeroupper 2118; NoVLX-NEXT: retq 2119entry: 2120 %0 = bitcast <4 x i64> %__a to <8 x i32> 2121 %load = load <4 x i64>, ptr %__b 2122 %1 = bitcast <4 x i64> %load to <8 x i32> 2123 %2 = icmp eq <8 x i32> %0, %1 2124 %3 = bitcast i8 %__u to <8 x i1> 2125 %4 = and <8 x i1> %2, %3 2126 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2127 %6 = bitcast <32 x i1> %5 to i32 2128 ret i32 %6 2129} 2130 2131 2132define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 2133; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2134; VLX: # %bb.0: # %entry 2135; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 2136; VLX-NEXT: kmovd %k0, %eax 2137; VLX-NEXT: vzeroupper 2138; VLX-NEXT: retq 2139; 2140; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2141; NoVLX: # %bb.0: # %entry 2142; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2143; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2144; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2145; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2146; NoVLX-NEXT: kmovw %k0, %eax 2147; NoVLX-NEXT: vzeroupper 2148; NoVLX-NEXT: retq 2149entry: 2150 %0 = bitcast <4 x i64> %__a to <8 x i32> 2151 %load = load i32, ptr %__b 2152 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2153 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2154 %2 = icmp eq <8 x i32> %0, %1 2155 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2156 %4 = bitcast <32 x i1> %3 to i32 2157 ret i32 %4 2158} 2159 2160define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 2161; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2162; VLX: # %bb.0: # %entry 2163; VLX-NEXT: kmovd %edi, %k1 2164; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 2165; VLX-NEXT: kmovd %k0, %eax 2166; VLX-NEXT: vzeroupper 2167; VLX-NEXT: retq 2168; 2169; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: 2170; NoVLX: # %bb.0: # %entry 2171; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2172; NoVLX-NEXT: kmovw %edi, %k1 2173; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2174; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2175; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2176; NoVLX-NEXT: kmovw %k0, %eax 2177; NoVLX-NEXT: vzeroupper 2178; NoVLX-NEXT: retq 2179entry: 2180 %0 = bitcast <4 x i64> %__a to <8 x i32> 2181 %load = load i32, ptr %__b 2182 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2183 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2184 %2 = icmp eq <8 x i32> %0, %1 2185 %3 = bitcast i8 %__u to <8 x i1> 2186 %4 = and <8 x i1> %3, %2 2187 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2188 %6 = bitcast <32 x i1> %5 to i32 2189 ret i32 %6 2190} 2191 2192 2193define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2194; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: 2195; VLX: # %bb.0: # %entry 2196; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 2197; VLX-NEXT: kmovq %k0, %rax 2198; VLX-NEXT: vzeroupper 2199; VLX-NEXT: retq 2200; 2201; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: 2202; NoVLX: # %bb.0: # %entry 2203; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2204; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2205; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2206; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2207; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2208; NoVLX-NEXT: kmovw %k0, %eax 2209; NoVLX-NEXT: vzeroupper 2210; NoVLX-NEXT: retq 2211entry: 2212 %0 = bitcast <4 x i64> %__a to <8 x i32> 2213 %1 = bitcast <4 x i64> %__b to <8 x i32> 2214 %2 = icmp eq <8 x i32> %0, %1 2215 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2216 %4 = bitcast <64 x i1> %3 to i64 2217 ret i64 %4 2218} 2219 2220define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 2221; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: 2222; VLX: # %bb.0: # %entry 2223; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 2224; VLX-NEXT: kmovq %k0, %rax 2225; VLX-NEXT: vzeroupper 2226; VLX-NEXT: retq 2227; 2228; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: 2229; NoVLX: # %bb.0: # %entry 2230; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2231; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 2232; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2233; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2234; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2235; NoVLX-NEXT: kmovw %k0, %eax 2236; NoVLX-NEXT: vzeroupper 2237; NoVLX-NEXT: retq 2238entry: 2239 %0 = bitcast <4 x i64> %__a to <8 x i32> 2240 %load = load <4 x i64>, ptr %__b 2241 %1 = bitcast <4 x i64> %load to <8 x i32> 2242 %2 = icmp eq <8 x i32> %0, %1 2243 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2244 %4 = bitcast <64 x i1> %3 to i64 2245 ret i64 %4 2246} 2247 2248define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 2249; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: 2250; VLX: # %bb.0: # %entry 2251; VLX-NEXT: kmovd %edi, %k1 2252; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} 2253; VLX-NEXT: kmovq %k0, %rax 2254; VLX-NEXT: vzeroupper 2255; VLX-NEXT: retq 2256; 2257; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: 2258; NoVLX: # %bb.0: # %entry 2259; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 2260; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2261; NoVLX-NEXT: kmovw %edi, %k1 2262; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2263; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2264; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2265; NoVLX-NEXT: kmovw %k0, %eax 2266; NoVLX-NEXT: vzeroupper 2267; NoVLX-NEXT: retq 2268entry: 2269 %0 = bitcast <4 x i64> %__a to <8 x i32> 2270 %1 = bitcast <4 x i64> %__b to <8 x i32> 2271 %2 = icmp eq <8 x i32> %0, %1 2272 %3 = bitcast i8 %__u to <8 x i1> 2273 %4 = and <8 x i1> %2, %3 2274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2275 %6 = bitcast <64 x i1> %5 to i64 2276 ret i64 %6 2277} 2278 2279define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 2280; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: 2281; VLX: # %bb.0: # %entry 2282; VLX-NEXT: kmovd %edi, %k1 2283; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} 2284; VLX-NEXT: kmovq %k0, %rax 2285; VLX-NEXT: vzeroupper 2286; VLX-NEXT: retq 2287; 2288; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: 2289; NoVLX: # %bb.0: # %entry 2290; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2291; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 2292; NoVLX-NEXT: kmovw %edi, %k1 2293; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2294; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2295; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2296; NoVLX-NEXT: kmovw %k0, %eax 2297; NoVLX-NEXT: vzeroupper 2298; NoVLX-NEXT: retq 2299entry: 2300 %0 = bitcast <4 x i64> %__a to <8 x i32> 2301 %load = load <4 x i64>, ptr %__b 2302 %1 = bitcast <4 x i64> %load to <8 x i32> 2303 %2 = icmp eq <8 x i32> %0, %1 2304 %3 = bitcast i8 %__u to <8 x i1> 2305 %4 = and <8 x i1> %2, %3 2306 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2307 %6 = bitcast <64 x i1> %5 to i64 2308 ret i64 %6 2309} 2310 2311 2312define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 2313; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2314; VLX: # %bb.0: # %entry 2315; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 2316; VLX-NEXT: kmovq %k0, %rax 2317; VLX-NEXT: vzeroupper 2318; VLX-NEXT: retq 2319; 2320; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2321; NoVLX: # %bb.0: # %entry 2322; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2323; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2324; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2325; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2326; NoVLX-NEXT: kmovw %k0, %eax 2327; NoVLX-NEXT: vzeroupper 2328; NoVLX-NEXT: retq 2329entry: 2330 %0 = bitcast <4 x i64> %__a to <8 x i32> 2331 %load = load i32, ptr %__b 2332 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2333 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2334 %2 = icmp eq <8 x i32> %0, %1 2335 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2336 %4 = bitcast <64 x i1> %3 to i64 2337 ret i64 %4 2338} 2339 2340define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 2341; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2342; VLX: # %bb.0: # %entry 2343; VLX-NEXT: kmovd %edi, %k1 2344; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} 2345; VLX-NEXT: kmovq %k0, %rax 2346; VLX-NEXT: vzeroupper 2347; VLX-NEXT: retq 2348; 2349; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: 2350; NoVLX: # %bb.0: # %entry 2351; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 2352; NoVLX-NEXT: kmovw %edi, %k1 2353; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2354; NoVLX-NEXT: kshiftlw $8, %k0, %k0 2355; NoVLX-NEXT: kshiftrw $8, %k0, %k0 2356; NoVLX-NEXT: kmovw %k0, %eax 2357; NoVLX-NEXT: vzeroupper 2358; NoVLX-NEXT: retq 2359entry: 2360 %0 = bitcast <4 x i64> %__a to <8 x i32> 2361 %load = load i32, ptr %__b 2362 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 2363 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2364 %2 = icmp eq <8 x i32> %0, %1 2365 %3 = bitcast i8 %__u to <8 x i1> 2366 %4 = and <8 x i1> %3, %2 2367 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 2368 %6 = bitcast <64 x i1> %5 to i64 2369 ret i64 %6 2370} 2371 2372 2373define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2374; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: 2375; VLX: # %bb.0: # %entry 2376; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2377; VLX-NEXT: kmovd %k0, %eax 2378; VLX-NEXT: vzeroupper 2379; VLX-NEXT: retq 2380; 2381; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: 2382; NoVLX: # %bb.0: # %entry 2383; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2384; NoVLX-NEXT: kmovw %k0, %eax 2385; NoVLX-NEXT: vzeroupper 2386; NoVLX-NEXT: retq 2387entry: 2388 %0 = bitcast <8 x i64> %__a to <16 x i32> 2389 %1 = bitcast <8 x i64> %__b to <16 x i32> 2390 %2 = icmp eq <16 x i32> %0, %1 2391 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2392 %4 = bitcast <32 x i1> %3 to i32 2393 ret i32 %4 2394} 2395 2396define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 2397; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: 2398; VLX: # %bb.0: # %entry 2399; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2400; VLX-NEXT: kmovd %k0, %eax 2401; VLX-NEXT: vzeroupper 2402; VLX-NEXT: retq 2403; 2404; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: 2405; NoVLX: # %bb.0: # %entry 2406; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2407; NoVLX-NEXT: kmovw %k0, %eax 2408; NoVLX-NEXT: vzeroupper 2409; NoVLX-NEXT: retq 2410entry: 2411 %0 = bitcast <8 x i64> %__a to <16 x i32> 2412 %load = load <8 x i64>, ptr %__b 2413 %1 = bitcast <8 x i64> %load to <16 x i32> 2414 %2 = icmp eq <16 x i32> %0, %1 2415 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2416 %4 = bitcast <32 x i1> %3 to i32 2417 ret i32 %4 2418} 2419 2420define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2421; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: 2422; VLX: # %bb.0: # %entry 2423; VLX-NEXT: kmovd %edi, %k1 2424; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2425; VLX-NEXT: kmovd %k0, %eax 2426; VLX-NEXT: vzeroupper 2427; VLX-NEXT: retq 2428; 2429; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: 2430; NoVLX: # %bb.0: # %entry 2431; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2432; NoVLX-NEXT: kmovw %k0, %eax 2433; NoVLX-NEXT: andl %edi, %eax 2434; NoVLX-NEXT: vzeroupper 2435; NoVLX-NEXT: retq 2436entry: 2437 %0 = bitcast <8 x i64> %__a to <16 x i32> 2438 %1 = bitcast <8 x i64> %__b to <16 x i32> 2439 %2 = icmp eq <16 x i32> %0, %1 2440 %3 = bitcast i16 %__u to <16 x i1> 2441 %4 = and <16 x i1> %2, %3 2442 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2443 %6 = bitcast <32 x i1> %5 to i32 2444 ret i32 %6 2445} 2446 2447define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 2448; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: 2449; VLX: # %bb.0: # %entry 2450; VLX-NEXT: kmovd %edi, %k1 2451; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} 2452; VLX-NEXT: kmovd %k0, %eax 2453; VLX-NEXT: vzeroupper 2454; VLX-NEXT: retq 2455; 2456; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: 2457; NoVLX: # %bb.0: # %entry 2458; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 2459; NoVLX-NEXT: kmovw %k0, %eax 2460; NoVLX-NEXT: andl %edi, %eax 2461; NoVLX-NEXT: vzeroupper 2462; NoVLX-NEXT: retq 2463entry: 2464 %0 = bitcast <8 x i64> %__a to <16 x i32> 2465 %load = load <8 x i64>, ptr %__b 2466 %1 = bitcast <8 x i64> %load to <16 x i32> 2467 %2 = icmp eq <16 x i32> %0, %1 2468 %3 = bitcast i16 %__u to <16 x i1> 2469 %4 = and <16 x i1> %2, %3 2470 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2471 %6 = bitcast <32 x i1> %5 to i32 2472 ret i32 %6 2473} 2474 2475 2476define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 2477; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2478; VLX: # %bb.0: # %entry 2479; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2480; VLX-NEXT: kmovd %k0, %eax 2481; VLX-NEXT: vzeroupper 2482; VLX-NEXT: retq 2483; 2484; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2485; NoVLX: # %bb.0: # %entry 2486; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2487; NoVLX-NEXT: kmovw %k0, %eax 2488; NoVLX-NEXT: vzeroupper 2489; NoVLX-NEXT: retq 2490entry: 2491 %0 = bitcast <8 x i64> %__a to <16 x i32> 2492 %load = load i32, ptr %__b 2493 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2494 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2495 %2 = icmp eq <16 x i32> %0, %1 2496 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2497 %4 = bitcast <32 x i1> %3 to i32 2498 ret i32 %4 2499} 2500 2501define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 2502; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2503; VLX: # %bb.0: # %entry 2504; VLX-NEXT: kmovd %edi, %k1 2505; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2506; VLX-NEXT: kmovd %k0, %eax 2507; VLX-NEXT: vzeroupper 2508; VLX-NEXT: retq 2509; 2510; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: 2511; NoVLX: # %bb.0: # %entry 2512; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 2513; NoVLX-NEXT: kmovw %k0, %eax 2514; NoVLX-NEXT: andl %edi, %eax 2515; NoVLX-NEXT: vzeroupper 2516; NoVLX-NEXT: retq 2517entry: 2518 %0 = bitcast <8 x i64> %__a to <16 x i32> 2519 %load = load i32, ptr %__b 2520 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2521 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2522 %2 = icmp eq <16 x i32> %0, %1 2523 %3 = bitcast i16 %__u to <16 x i1> 2524 %4 = and <16 x i1> %3, %2 2525 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 2526 %6 = bitcast <32 x i1> %5 to i32 2527 ret i32 %6 2528} 2529 2530 2531define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2532; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: 2533; VLX: # %bb.0: # %entry 2534; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2535; VLX-NEXT: kmovq %k0, %rax 2536; VLX-NEXT: vzeroupper 2537; VLX-NEXT: retq 2538; 2539; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: 2540; NoVLX: # %bb.0: # %entry 2541; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2542; NoVLX-NEXT: kmovw %k0, %eax 2543; NoVLX-NEXT: vzeroupper 2544; NoVLX-NEXT: retq 2545entry: 2546 %0 = bitcast <8 x i64> %__a to <16 x i32> 2547 %1 = bitcast <8 x i64> %__b to <16 x i32> 2548 %2 = icmp eq <16 x i32> %0, %1 2549 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2550 %4 = bitcast <64 x i1> %3 to i64 2551 ret i64 %4 2552} 2553 2554define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 2555; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: 2556; VLX: # %bb.0: # %entry 2557; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2558; VLX-NEXT: kmovq %k0, %rax 2559; VLX-NEXT: vzeroupper 2560; VLX-NEXT: retq 2561; 2562; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: 2563; NoVLX: # %bb.0: # %entry 2564; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 2565; NoVLX-NEXT: kmovw %k0, %eax 2566; NoVLX-NEXT: vzeroupper 2567; NoVLX-NEXT: retq 2568entry: 2569 %0 = bitcast <8 x i64> %__a to <16 x i32> 2570 %load = load <8 x i64>, ptr %__b 2571 %1 = bitcast <8 x i64> %load to <16 x i32> 2572 %2 = icmp eq <16 x i32> %0, %1 2573 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2574 %4 = bitcast <64 x i1> %3 to i64 2575 ret i64 %4 2576} 2577 2578define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 2579; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: 2580; VLX: # %bb.0: # %entry 2581; VLX-NEXT: kmovd %edi, %k1 2582; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} 2583; VLX-NEXT: kmovq %k0, %rax 2584; VLX-NEXT: vzeroupper 2585; VLX-NEXT: retq 2586; 2587; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: 2588; NoVLX: # %bb.0: # %entry 2589; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 2590; NoVLX-NEXT: kmovw %k0, %eax 2591; NoVLX-NEXT: andl %edi, %eax 2592; NoVLX-NEXT: vzeroupper 2593; NoVLX-NEXT: retq 2594entry: 2595 %0 = bitcast <8 x i64> %__a to <16 x i32> 2596 %1 = bitcast <8 x i64> %__b to <16 x i32> 2597 %2 = icmp eq <16 x i32> %0, %1 2598 %3 = bitcast i16 %__u to <16 x i1> 2599 %4 = and <16 x i1> %2, %3 2600 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2601 %6 = bitcast <64 x i1> %5 to i64 2602 ret i64 %6 2603} 2604 2605define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 2606; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: 2607; VLX: # %bb.0: # %entry 2608; VLX-NEXT: kmovd %edi, %k1 2609; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} 2610; VLX-NEXT: kmovq %k0, %rax 2611; VLX-NEXT: vzeroupper 2612; VLX-NEXT: retq 2613; 2614; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: 2615; NoVLX: # %bb.0: # %entry 2616; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 2617; NoVLX-NEXT: kmovw %k0, %eax 2618; NoVLX-NEXT: andl %edi, %eax 2619; NoVLX-NEXT: vzeroupper 2620; NoVLX-NEXT: retq 2621entry: 2622 %0 = bitcast <8 x i64> %__a to <16 x i32> 2623 %load = load <8 x i64>, ptr %__b 2624 %1 = bitcast <8 x i64> %load to <16 x i32> 2625 %2 = icmp eq <16 x i32> %0, %1 2626 %3 = bitcast i16 %__u to <16 x i1> 2627 %4 = and <16 x i1> %2, %3 2628 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2629 %6 = bitcast <64 x i1> %5 to i64 2630 ret i64 %6 2631} 2632 2633 2634define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 2635; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2636; VLX: # %bb.0: # %entry 2637; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2638; VLX-NEXT: kmovq %k0, %rax 2639; VLX-NEXT: vzeroupper 2640; VLX-NEXT: retq 2641; 2642; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2643; NoVLX: # %bb.0: # %entry 2644; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 2645; NoVLX-NEXT: kmovw %k0, %eax 2646; NoVLX-NEXT: vzeroupper 2647; NoVLX-NEXT: retq 2648entry: 2649 %0 = bitcast <8 x i64> %__a to <16 x i32> 2650 %load = load i32, ptr %__b 2651 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2652 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2653 %2 = icmp eq <16 x i32> %0, %1 2654 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2655 %4 = bitcast <64 x i1> %3 to i64 2656 ret i64 %4 2657} 2658 2659define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 2660; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2661; VLX: # %bb.0: # %entry 2662; VLX-NEXT: kmovd %edi, %k1 2663; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} 2664; VLX-NEXT: kmovq %k0, %rax 2665; VLX-NEXT: vzeroupper 2666; VLX-NEXT: retq 2667; 2668; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: 2669; NoVLX: # %bb.0: # %entry 2670; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 2671; NoVLX-NEXT: kmovw %k0, %eax 2672; NoVLX-NEXT: andl %edi, %eax 2673; NoVLX-NEXT: vzeroupper 2674; NoVLX-NEXT: retq 2675entry: 2676 %0 = bitcast <8 x i64> %__a to <16 x i32> 2677 %load = load i32, ptr %__b 2678 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 2679 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 2680 %2 = icmp eq <16 x i32> %0, %1 2681 %3 = bitcast i16 %__u to <16 x i1> 2682 %4 = and <16 x i1> %3, %2 2683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 2684 %6 = bitcast <64 x i1> %5 to i64 2685 ret i64 %6 2686} 2687 2688 2689define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2690; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: 2691; VLX: # %bb.0: # %entry 2692; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 2693; VLX-NEXT: kmovb %k0, %eax 2694; VLX-NEXT: retq 2695; 2696; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: 2697; NoVLX: # %bb.0: # %entry 2698; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2699; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2700; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2701; NoVLX-NEXT: kmovw %k0, %eax 2702; NoVLX-NEXT: andl $3, %eax 2703; NoVLX-NEXT: vzeroupper 2704; NoVLX-NEXT: retq 2705entry: 2706 %0 = bitcast <2 x i64> %__a to <2 x i64> 2707 %1 = bitcast <2 x i64> %__b to <2 x i64> 2708 %2 = icmp eq <2 x i64> %0, %1 2709 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2710 %4 = bitcast <4 x i1> %3 to i4 2711 ret i4 %4 2712} 2713 2714define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 2715; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: 2716; VLX: # %bb.0: # %entry 2717; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 2718; VLX-NEXT: kmovb %k0, %eax 2719; VLX-NEXT: retq 2720; 2721; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: 2722; NoVLX: # %bb.0: # %entry 2723; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2724; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 2725; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2726; NoVLX-NEXT: kmovw %k0, %eax 2727; NoVLX-NEXT: andl $3, %eax 2728; NoVLX-NEXT: vzeroupper 2729; NoVLX-NEXT: retq 2730entry: 2731 %0 = bitcast <2 x i64> %__a to <2 x i64> 2732 %load = load <2 x i64>, ptr %__b 2733 %1 = bitcast <2 x i64> %load to <2 x i64> 2734 %2 = icmp eq <2 x i64> %0, %1 2735 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2736 %4 = bitcast <4 x i1> %3 to i4 2737 ret i4 %4 2738} 2739 2740define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2741; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: 2742; VLX: # %bb.0: # %entry 2743; VLX-NEXT: kmovd %edi, %k1 2744; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 2745; VLX-NEXT: kmovb %k0, %eax 2746; VLX-NEXT: retq 2747; 2748; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: 2749; NoVLX: # %bb.0: # %entry 2750; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2751; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2752; NoVLX-NEXT: kmovw %edi, %k1 2753; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2754; NoVLX-NEXT: kmovw %k0, %eax 2755; NoVLX-NEXT: andl $3, %eax 2756; NoVLX-NEXT: vzeroupper 2757; NoVLX-NEXT: retq 2758entry: 2759 %0 = bitcast <2 x i64> %__a to <2 x i64> 2760 %1 = bitcast <2 x i64> %__b to <2 x i64> 2761 %2 = icmp eq <2 x i64> %0, %1 2762 %3 = bitcast i8 %__u to <8 x i1> 2763 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2764 %4 = and <2 x i1> %2, %extract.i 2765 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2766 %6 = bitcast <4 x i1> %5 to i4 2767 ret i4 %6 2768} 2769 2770define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 2771; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: 2772; VLX: # %bb.0: # %entry 2773; VLX-NEXT: kmovd %edi, %k1 2774; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 2775; VLX-NEXT: kmovb %k0, %eax 2776; VLX-NEXT: retq 2777; 2778; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: 2779; NoVLX: # %bb.0: # %entry 2780; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2781; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 2782; NoVLX-NEXT: kmovw %edi, %k1 2783; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2784; NoVLX-NEXT: kmovw %k0, %eax 2785; NoVLX-NEXT: andl $3, %eax 2786; NoVLX-NEXT: vzeroupper 2787; NoVLX-NEXT: retq 2788entry: 2789 %0 = bitcast <2 x i64> %__a to <2 x i64> 2790 %load = load <2 x i64>, ptr %__b 2791 %1 = bitcast <2 x i64> %load to <2 x i64> 2792 %2 = icmp eq <2 x i64> %0, %1 2793 %3 = bitcast i8 %__u to <8 x i1> 2794 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2795 %4 = and <2 x i1> %2, %extract.i 2796 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2797 %6 = bitcast <4 x i1> %5 to i4 2798 ret i4 %6 2799} 2800 2801 2802define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 2803; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2804; VLX: # %bb.0: # %entry 2805; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 2806; VLX-NEXT: kmovb %k0, %eax 2807; VLX-NEXT: retq 2808; 2809; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2810; NoVLX: # %bb.0: # %entry 2811; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2812; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 2813; NoVLX-NEXT: kmovw %k0, %eax 2814; NoVLX-NEXT: andl $3, %eax 2815; NoVLX-NEXT: vzeroupper 2816; NoVLX-NEXT: retq 2817entry: 2818 %0 = bitcast <2 x i64> %__a to <2 x i64> 2819 %load = load i64, ptr %__b 2820 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 2821 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2822 %2 = icmp eq <2 x i64> %0, %1 2823 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2824 %4 = bitcast <4 x i1> %3 to i4 2825 ret i4 %4 2826} 2827 2828define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 2829; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2830; VLX: # %bb.0: # %entry 2831; VLX-NEXT: kmovd %edi, %k1 2832; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 2833; VLX-NEXT: kmovb %k0, %eax 2834; VLX-NEXT: retq 2835; 2836; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: 2837; NoVLX: # %bb.0: # %entry 2838; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2839; NoVLX-NEXT: kmovw %edi, %k1 2840; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 2841; NoVLX-NEXT: kmovw %k0, %eax 2842; NoVLX-NEXT: andl $3, %eax 2843; NoVLX-NEXT: vzeroupper 2844; NoVLX-NEXT: retq 2845entry: 2846 %0 = bitcast <2 x i64> %__a to <2 x i64> 2847 %load = load i64, ptr %__b 2848 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 2849 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 2850 %2 = icmp eq <2 x i64> %0, %1 2851 %3 = bitcast i8 %__u to <8 x i1> 2852 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2853 %4 = and <2 x i1> %extract.i, %2 2854 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2855 %6 = bitcast <4 x i1> %5 to i4 2856 ret i4 %6 2857} 2858 2859 2860define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2861; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: 2862; VLX: # %bb.0: # %entry 2863; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 2864; VLX-NEXT: kmovd %k0, %eax 2865; VLX-NEXT: # kill: def $al killed $al killed $eax 2866; VLX-NEXT: retq 2867; 2868; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: 2869; NoVLX: # %bb.0: # %entry 2870; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2871; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2872; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2873; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2874; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2875; NoVLX-NEXT: kmovw %k0, %eax 2876; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2877; NoVLX-NEXT: vzeroupper 2878; NoVLX-NEXT: retq 2879entry: 2880 %0 = bitcast <2 x i64> %__a to <2 x i64> 2881 %1 = bitcast <2 x i64> %__b to <2 x i64> 2882 %2 = icmp eq <2 x i64> %0, %1 2883 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2884 %4 = bitcast <8 x i1> %3 to i8 2885 ret i8 %4 2886} 2887 2888define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 2889; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: 2890; VLX: # %bb.0: # %entry 2891; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 2892; VLX-NEXT: kmovd %k0, %eax 2893; VLX-NEXT: # kill: def $al killed $al killed $eax 2894; VLX-NEXT: retq 2895; 2896; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: 2897; NoVLX: # %bb.0: # %entry 2898; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2899; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 2900; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 2901; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2902; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2903; NoVLX-NEXT: kmovw %k0, %eax 2904; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2905; NoVLX-NEXT: vzeroupper 2906; NoVLX-NEXT: retq 2907entry: 2908 %0 = bitcast <2 x i64> %__a to <2 x i64> 2909 %load = load <2 x i64>, ptr %__b 2910 %1 = bitcast <2 x i64> %load to <2 x i64> 2911 %2 = icmp eq <2 x i64> %0, %1 2912 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2913 %4 = bitcast <8 x i1> %3 to i8 2914 ret i8 %4 2915} 2916 2917define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 2918; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: 2919; VLX: # %bb.0: # %entry 2920; VLX-NEXT: kmovd %edi, %k1 2921; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 2922; VLX-NEXT: kmovd %k0, %eax 2923; VLX-NEXT: # kill: def $al killed $al killed $eax 2924; VLX-NEXT: retq 2925; 2926; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: 2927; NoVLX: # %bb.0: # %entry 2928; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 2929; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2930; NoVLX-NEXT: kmovw %edi, %k1 2931; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2932; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2933; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2934; NoVLX-NEXT: kmovw %k0, %eax 2935; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2936; NoVLX-NEXT: vzeroupper 2937; NoVLX-NEXT: retq 2938entry: 2939 %0 = bitcast <2 x i64> %__a to <2 x i64> 2940 %1 = bitcast <2 x i64> %__b to <2 x i64> 2941 %2 = icmp eq <2 x i64> %0, %1 2942 %3 = bitcast i8 %__u to <8 x i1> 2943 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2944 %4 = and <2 x i1> %2, %extract.i 2945 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2946 %6 = bitcast <8 x i1> %5 to i8 2947 ret i8 %6 2948} 2949 2950define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 2951; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: 2952; VLX: # %bb.0: # %entry 2953; VLX-NEXT: kmovd %edi, %k1 2954; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 2955; VLX-NEXT: kmovd %k0, %eax 2956; VLX-NEXT: # kill: def $al killed $al killed $eax 2957; VLX-NEXT: retq 2958; 2959; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: 2960; NoVLX: # %bb.0: # %entry 2961; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2962; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 2963; NoVLX-NEXT: kmovw %edi, %k1 2964; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 2965; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2966; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2967; NoVLX-NEXT: kmovw %k0, %eax 2968; NoVLX-NEXT: # kill: def $al killed $al killed $eax 2969; NoVLX-NEXT: vzeroupper 2970; NoVLX-NEXT: retq 2971entry: 2972 %0 = bitcast <2 x i64> %__a to <2 x i64> 2973 %load = load <2 x i64>, ptr %__b 2974 %1 = bitcast <2 x i64> %load to <2 x i64> 2975 %2 = icmp eq <2 x i64> %0, %1 2976 %3 = bitcast i8 %__u to <8 x i1> 2977 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 2978 %4 = and <2 x i1> %2, %extract.i 2979 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 2980 %6 = bitcast <8 x i1> %5 to i8 2981 ret i8 %6 2982} 2983 2984 2985define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 2986; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: 2987; VLX: # %bb.0: # %entry 2988; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 2989; VLX-NEXT: kmovd %k0, %eax 2990; VLX-NEXT: # kill: def $al killed $al killed $eax 2991; VLX-NEXT: retq 2992; 2993; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: 2994; NoVLX: # %bb.0: # %entry 2995; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 2996; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 2997; NoVLX-NEXT: kshiftlw $14, %k0, %k0 2998; NoVLX-NEXT: kshiftrw $14, %k0, %k0 2999; NoVLX-NEXT: kmovw %k0, %eax 3000; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3001; NoVLX-NEXT: vzeroupper 3002; NoVLX-NEXT: retq 3003entry: 3004 %0 = bitcast <2 x i64> %__a to <2 x i64> 3005 %load = load i64, ptr %__b 3006 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3007 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3008 %2 = icmp eq <2 x i64> %0, %1 3009 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3010 %4 = bitcast <8 x i1> %3 to i8 3011 ret i8 %4 3012} 3013 3014define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3015; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3016; VLX: # %bb.0: # %entry 3017; VLX-NEXT: kmovd %edi, %k1 3018; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3019; VLX-NEXT: kmovd %k0, %eax 3020; VLX-NEXT: # kill: def $al killed $al killed $eax 3021; VLX-NEXT: retq 3022; 3023; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: 3024; NoVLX: # %bb.0: # %entry 3025; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3026; NoVLX-NEXT: kmovw %edi, %k1 3027; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3028; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3029; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3030; NoVLX-NEXT: kmovw %k0, %eax 3031; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3032; NoVLX-NEXT: vzeroupper 3033; NoVLX-NEXT: retq 3034entry: 3035 %0 = bitcast <2 x i64> %__a to <2 x i64> 3036 %load = load i64, ptr %__b 3037 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3038 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3039 %2 = icmp eq <2 x i64> %0, %1 3040 %3 = bitcast i8 %__u to <8 x i1> 3041 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3042 %4 = and <2 x i1> %extract.i, %2 3043 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3044 %6 = bitcast <8 x i1> %5 to i8 3045 ret i8 %6 3046} 3047 3048 3049define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3050; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: 3051; VLX: # %bb.0: # %entry 3052; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3053; VLX-NEXT: kmovd %k0, %eax 3054; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3055; VLX-NEXT: retq 3056; 3057; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: 3058; NoVLX: # %bb.0: # %entry 3059; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3060; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3061; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3062; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3063; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3064; NoVLX-NEXT: kmovw %k0, %eax 3065; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3066; NoVLX-NEXT: vzeroupper 3067; NoVLX-NEXT: retq 3068entry: 3069 %0 = bitcast <2 x i64> %__a to <2 x i64> 3070 %1 = bitcast <2 x i64> %__b to <2 x i64> 3071 %2 = icmp eq <2 x i64> %0, %1 3072 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3073 %4 = bitcast <16 x i1> %3 to i16 3074 ret i16 %4 3075} 3076 3077define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3078; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: 3079; VLX: # %bb.0: # %entry 3080; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3081; VLX-NEXT: kmovd %k0, %eax 3082; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3083; VLX-NEXT: retq 3084; 3085; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: 3086; NoVLX: # %bb.0: # %entry 3087; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3088; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3089; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3090; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3091; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3092; NoVLX-NEXT: kmovw %k0, %eax 3093; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3094; NoVLX-NEXT: vzeroupper 3095; NoVLX-NEXT: retq 3096entry: 3097 %0 = bitcast <2 x i64> %__a to <2 x i64> 3098 %load = load <2 x i64>, ptr %__b 3099 %1 = bitcast <2 x i64> %load to <2 x i64> 3100 %2 = icmp eq <2 x i64> %0, %1 3101 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3102 %4 = bitcast <16 x i1> %3 to i16 3103 ret i16 %4 3104} 3105 3106define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3107; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: 3108; VLX: # %bb.0: # %entry 3109; VLX-NEXT: kmovd %edi, %k1 3110; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3111; VLX-NEXT: kmovd %k0, %eax 3112; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3113; VLX-NEXT: retq 3114; 3115; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: 3116; NoVLX: # %bb.0: # %entry 3117; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3118; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3119; NoVLX-NEXT: kmovw %edi, %k1 3120; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3121; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3122; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3123; NoVLX-NEXT: kmovw %k0, %eax 3124; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3125; NoVLX-NEXT: vzeroupper 3126; NoVLX-NEXT: retq 3127entry: 3128 %0 = bitcast <2 x i64> %__a to <2 x i64> 3129 %1 = bitcast <2 x i64> %__b to <2 x i64> 3130 %2 = icmp eq <2 x i64> %0, %1 3131 %3 = bitcast i8 %__u to <8 x i1> 3132 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3133 %4 = and <2 x i1> %2, %extract.i 3134 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3135 %6 = bitcast <16 x i1> %5 to i16 3136 ret i16 %6 3137} 3138 3139define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3140; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: 3141; VLX: # %bb.0: # %entry 3142; VLX-NEXT: kmovd %edi, %k1 3143; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3144; VLX-NEXT: kmovd %k0, %eax 3145; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3146; VLX-NEXT: retq 3147; 3148; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: 3149; NoVLX: # %bb.0: # %entry 3150; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3151; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3152; NoVLX-NEXT: kmovw %edi, %k1 3153; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3154; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3155; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3156; NoVLX-NEXT: kmovw %k0, %eax 3157; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3158; NoVLX-NEXT: vzeroupper 3159; NoVLX-NEXT: retq 3160entry: 3161 %0 = bitcast <2 x i64> %__a to <2 x i64> 3162 %load = load <2 x i64>, ptr %__b 3163 %1 = bitcast <2 x i64> %load to <2 x i64> 3164 %2 = icmp eq <2 x i64> %0, %1 3165 %3 = bitcast i8 %__u to <8 x i1> 3166 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3167 %4 = and <2 x i1> %2, %extract.i 3168 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3169 %6 = bitcast <16 x i1> %5 to i16 3170 ret i16 %6 3171} 3172 3173 3174define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3175; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3176; VLX: # %bb.0: # %entry 3177; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3178; VLX-NEXT: kmovd %k0, %eax 3179; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3180; VLX-NEXT: retq 3181; 3182; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3183; NoVLX: # %bb.0: # %entry 3184; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3185; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 3186; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3187; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3188; NoVLX-NEXT: kmovw %k0, %eax 3189; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3190; NoVLX-NEXT: vzeroupper 3191; NoVLX-NEXT: retq 3192entry: 3193 %0 = bitcast <2 x i64> %__a to <2 x i64> 3194 %load = load i64, ptr %__b 3195 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3196 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3197 %2 = icmp eq <2 x i64> %0, %1 3198 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3199 %4 = bitcast <16 x i1> %3 to i16 3200 ret i16 %4 3201} 3202 3203define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3204; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3205; VLX: # %bb.0: # %entry 3206; VLX-NEXT: kmovd %edi, %k1 3207; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3208; VLX-NEXT: kmovd %k0, %eax 3209; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3210; VLX-NEXT: retq 3211; 3212; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: 3213; NoVLX: # %bb.0: # %entry 3214; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3215; NoVLX-NEXT: kmovw %edi, %k1 3216; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3217; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3218; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3219; NoVLX-NEXT: kmovw %k0, %eax 3220; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3221; NoVLX-NEXT: vzeroupper 3222; NoVLX-NEXT: retq 3223entry: 3224 %0 = bitcast <2 x i64> %__a to <2 x i64> 3225 %load = load i64, ptr %__b 3226 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3227 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3228 %2 = icmp eq <2 x i64> %0, %1 3229 %3 = bitcast i8 %__u to <8 x i1> 3230 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3231 %4 = and <2 x i1> %extract.i, %2 3232 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3233 %6 = bitcast <16 x i1> %5 to i16 3234 ret i16 %6 3235} 3236 3237 3238define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3239; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: 3240; VLX: # %bb.0: # %entry 3241; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3242; VLX-NEXT: kmovd %k0, %eax 3243; VLX-NEXT: retq 3244; 3245; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: 3246; NoVLX: # %bb.0: # %entry 3247; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3248; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3249; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3250; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3251; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3252; NoVLX-NEXT: kmovw %k0, %eax 3253; NoVLX-NEXT: vzeroupper 3254; NoVLX-NEXT: retq 3255entry: 3256 %0 = bitcast <2 x i64> %__a to <2 x i64> 3257 %1 = bitcast <2 x i64> %__b to <2 x i64> 3258 %2 = icmp eq <2 x i64> %0, %1 3259 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3260 %4 = bitcast <32 x i1> %3 to i32 3261 ret i32 %4 3262} 3263 3264define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3265; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: 3266; VLX: # %bb.0: # %entry 3267; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3268; VLX-NEXT: kmovd %k0, %eax 3269; VLX-NEXT: retq 3270; 3271; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: 3272; NoVLX: # %bb.0: # %entry 3273; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3274; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3275; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3276; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3277; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3278; NoVLX-NEXT: kmovw %k0, %eax 3279; NoVLX-NEXT: vzeroupper 3280; NoVLX-NEXT: retq 3281entry: 3282 %0 = bitcast <2 x i64> %__a to <2 x i64> 3283 %load = load <2 x i64>, ptr %__b 3284 %1 = bitcast <2 x i64> %load to <2 x i64> 3285 %2 = icmp eq <2 x i64> %0, %1 3286 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3287 %4 = bitcast <32 x i1> %3 to i32 3288 ret i32 %4 3289} 3290 3291define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3292; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: 3293; VLX: # %bb.0: # %entry 3294; VLX-NEXT: kmovd %edi, %k1 3295; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3296; VLX-NEXT: kmovd %k0, %eax 3297; VLX-NEXT: retq 3298; 3299; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: 3300; NoVLX: # %bb.0: # %entry 3301; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3302; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3303; NoVLX-NEXT: kmovw %edi, %k1 3304; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3305; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3306; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3307; NoVLX-NEXT: kmovw %k0, %eax 3308; NoVLX-NEXT: vzeroupper 3309; NoVLX-NEXT: retq 3310entry: 3311 %0 = bitcast <2 x i64> %__a to <2 x i64> 3312 %1 = bitcast <2 x i64> %__b to <2 x i64> 3313 %2 = icmp eq <2 x i64> %0, %1 3314 %3 = bitcast i8 %__u to <8 x i1> 3315 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3316 %4 = and <2 x i1> %2, %extract.i 3317 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3318 %6 = bitcast <32 x i1> %5 to i32 3319 ret i32 %6 3320} 3321 3322define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3323; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: 3324; VLX: # %bb.0: # %entry 3325; VLX-NEXT: kmovd %edi, %k1 3326; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3327; VLX-NEXT: kmovd %k0, %eax 3328; VLX-NEXT: retq 3329; 3330; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: 3331; NoVLX: # %bb.0: # %entry 3332; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3333; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3334; NoVLX-NEXT: kmovw %edi, %k1 3335; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3336; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3337; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3338; NoVLX-NEXT: kmovw %k0, %eax 3339; NoVLX-NEXT: vzeroupper 3340; NoVLX-NEXT: retq 3341entry: 3342 %0 = bitcast <2 x i64> %__a to <2 x i64> 3343 %load = load <2 x i64>, ptr %__b 3344 %1 = bitcast <2 x i64> %load to <2 x i64> 3345 %2 = icmp eq <2 x i64> %0, %1 3346 %3 = bitcast i8 %__u to <8 x i1> 3347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3348 %4 = and <2 x i1> %2, %extract.i 3349 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3350 %6 = bitcast <32 x i1> %5 to i32 3351 ret i32 %6 3352} 3353 3354 3355define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3356; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3357; VLX: # %bb.0: # %entry 3358; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3359; VLX-NEXT: kmovd %k0, %eax 3360; VLX-NEXT: retq 3361; 3362; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3363; NoVLX: # %bb.0: # %entry 3364; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3365; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 3366; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3367; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3368; NoVLX-NEXT: kmovw %k0, %eax 3369; NoVLX-NEXT: vzeroupper 3370; NoVLX-NEXT: retq 3371entry: 3372 %0 = bitcast <2 x i64> %__a to <2 x i64> 3373 %load = load i64, ptr %__b 3374 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3375 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3376 %2 = icmp eq <2 x i64> %0, %1 3377 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3378 %4 = bitcast <32 x i1> %3 to i32 3379 ret i32 %4 3380} 3381 3382define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3383; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3384; VLX: # %bb.0: # %entry 3385; VLX-NEXT: kmovd %edi, %k1 3386; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3387; VLX-NEXT: kmovd %k0, %eax 3388; VLX-NEXT: retq 3389; 3390; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: 3391; NoVLX: # %bb.0: # %entry 3392; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3393; NoVLX-NEXT: kmovw %edi, %k1 3394; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3395; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3396; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3397; NoVLX-NEXT: kmovw %k0, %eax 3398; NoVLX-NEXT: vzeroupper 3399; NoVLX-NEXT: retq 3400entry: 3401 %0 = bitcast <2 x i64> %__a to <2 x i64> 3402 %load = load i64, ptr %__b 3403 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3404 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3405 %2 = icmp eq <2 x i64> %0, %1 3406 %3 = bitcast i8 %__u to <8 x i1> 3407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3408 %4 = and <2 x i1> %extract.i, %2 3409 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3410 %6 = bitcast <32 x i1> %5 to i32 3411 ret i32 %6 3412} 3413 3414 3415define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3416; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: 3417; VLX: # %bb.0: # %entry 3418; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 3419; VLX-NEXT: kmovq %k0, %rax 3420; VLX-NEXT: retq 3421; 3422; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: 3423; NoVLX: # %bb.0: # %entry 3424; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3425; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3426; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3427; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3428; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3429; NoVLX-NEXT: kmovw %k0, %eax 3430; NoVLX-NEXT: vzeroupper 3431; NoVLX-NEXT: retq 3432entry: 3433 %0 = bitcast <2 x i64> %__a to <2 x i64> 3434 %1 = bitcast <2 x i64> %__b to <2 x i64> 3435 %2 = icmp eq <2 x i64> %0, %1 3436 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3437 %4 = bitcast <64 x i1> %3 to i64 3438 ret i64 %4 3439} 3440 3441define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3442; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: 3443; VLX: # %bb.0: # %entry 3444; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 3445; VLX-NEXT: kmovq %k0, %rax 3446; VLX-NEXT: retq 3447; 3448; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: 3449; NoVLX: # %bb.0: # %entry 3450; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3451; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 3452; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3453; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3454; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3455; NoVLX-NEXT: kmovw %k0, %eax 3456; NoVLX-NEXT: vzeroupper 3457; NoVLX-NEXT: retq 3458entry: 3459 %0 = bitcast <2 x i64> %__a to <2 x i64> 3460 %load = load <2 x i64>, ptr %__b 3461 %1 = bitcast <2 x i64> %load to <2 x i64> 3462 %2 = icmp eq <2 x i64> %0, %1 3463 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3464 %4 = bitcast <64 x i1> %3 to i64 3465 ret i64 %4 3466} 3467 3468define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 3469; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: 3470; VLX: # %bb.0: # %entry 3471; VLX-NEXT: kmovd %edi, %k1 3472; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} 3473; VLX-NEXT: kmovq %k0, %rax 3474; VLX-NEXT: retq 3475; 3476; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: 3477; NoVLX: # %bb.0: # %entry 3478; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 3479; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3480; NoVLX-NEXT: kmovw %edi, %k1 3481; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3482; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3483; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3484; NoVLX-NEXT: kmovw %k0, %eax 3485; NoVLX-NEXT: vzeroupper 3486; NoVLX-NEXT: retq 3487entry: 3488 %0 = bitcast <2 x i64> %__a to <2 x i64> 3489 %1 = bitcast <2 x i64> %__b to <2 x i64> 3490 %2 = icmp eq <2 x i64> %0, %1 3491 %3 = bitcast i8 %__u to <8 x i1> 3492 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3493 %4 = and <2 x i1> %2, %extract.i 3494 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3495 %6 = bitcast <64 x i1> %5 to i64 3496 ret i64 %6 3497} 3498 3499define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3500; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: 3501; VLX: # %bb.0: # %entry 3502; VLX-NEXT: kmovd %edi, %k1 3503; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} 3504; VLX-NEXT: kmovq %k0, %rax 3505; VLX-NEXT: retq 3506; 3507; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: 3508; NoVLX: # %bb.0: # %entry 3509; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3510; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 3511; NoVLX-NEXT: kmovw %edi, %k1 3512; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3513; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3514; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3515; NoVLX-NEXT: kmovw %k0, %eax 3516; NoVLX-NEXT: vzeroupper 3517; NoVLX-NEXT: retq 3518entry: 3519 %0 = bitcast <2 x i64> %__a to <2 x i64> 3520 %load = load <2 x i64>, ptr %__b 3521 %1 = bitcast <2 x i64> %load to <2 x i64> 3522 %2 = icmp eq <2 x i64> %0, %1 3523 %3 = bitcast i8 %__u to <8 x i1> 3524 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3525 %4 = and <2 x i1> %2, %extract.i 3526 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3527 %6 = bitcast <64 x i1> %5 to i64 3528 ret i64 %6 3529} 3530 3531 3532define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 3533; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3534; VLX: # %bb.0: # %entry 3535; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 3536; VLX-NEXT: kmovq %k0, %rax 3537; VLX-NEXT: retq 3538; 3539; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3540; NoVLX: # %bb.0: # %entry 3541; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3542; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 3543; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3544; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3545; NoVLX-NEXT: kmovw %k0, %eax 3546; NoVLX-NEXT: vzeroupper 3547; NoVLX-NEXT: retq 3548entry: 3549 %0 = bitcast <2 x i64> %__a to <2 x i64> 3550 %load = load i64, ptr %__b 3551 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3552 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3553 %2 = icmp eq <2 x i64> %0, %1 3554 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3555 %4 = bitcast <64 x i1> %3 to i64 3556 ret i64 %4 3557} 3558 3559define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 3560; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3561; VLX: # %bb.0: # %entry 3562; VLX-NEXT: kmovd %edi, %k1 3563; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} 3564; VLX-NEXT: kmovq %k0, %rax 3565; VLX-NEXT: retq 3566; 3567; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: 3568; NoVLX: # %bb.0: # %entry 3569; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 3570; NoVLX-NEXT: kmovw %edi, %k1 3571; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3572; NoVLX-NEXT: kshiftlw $14, %k0, %k0 3573; NoVLX-NEXT: kshiftrw $14, %k0, %k0 3574; NoVLX-NEXT: kmovw %k0, %eax 3575; NoVLX-NEXT: vzeroupper 3576; NoVLX-NEXT: retq 3577entry: 3578 %0 = bitcast <2 x i64> %__a to <2 x i64> 3579 %load = load i64, ptr %__b 3580 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 3581 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 3582 %2 = icmp eq <2 x i64> %0, %1 3583 %3 = bitcast i8 %__u to <8 x i1> 3584 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 3585 %4 = and <2 x i1> %extract.i, %2 3586 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 3587 %6 = bitcast <64 x i1> %5 to i64 3588 ret i64 %6 3589} 3590 3591 3592define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3593; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: 3594; VLX: # %bb.0: # %entry 3595; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 3596; VLX-NEXT: kmovd %k0, %eax 3597; VLX-NEXT: # kill: def $al killed $al killed $eax 3598; VLX-NEXT: vzeroupper 3599; VLX-NEXT: retq 3600; 3601; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: 3602; NoVLX: # %bb.0: # %entry 3603; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3604; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3605; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3606; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3607; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3608; NoVLX-NEXT: kmovw %k0, %eax 3609; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3610; NoVLX-NEXT: vzeroupper 3611; NoVLX-NEXT: retq 3612entry: 3613 %0 = bitcast <4 x i64> %__a to <4 x i64> 3614 %1 = bitcast <4 x i64> %__b to <4 x i64> 3615 %2 = icmp eq <4 x i64> %0, %1 3616 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3617 %4 = bitcast <8 x i1> %3 to i8 3618 ret i8 %4 3619} 3620 3621define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 3622; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: 3623; VLX: # %bb.0: # %entry 3624; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 3625; VLX-NEXT: kmovd %k0, %eax 3626; VLX-NEXT: # kill: def $al killed $al killed $eax 3627; VLX-NEXT: vzeroupper 3628; VLX-NEXT: retq 3629; 3630; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: 3631; NoVLX: # %bb.0: # %entry 3632; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3633; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 3634; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3635; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3636; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3637; NoVLX-NEXT: kmovw %k0, %eax 3638; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3639; NoVLX-NEXT: vzeroupper 3640; NoVLX-NEXT: retq 3641entry: 3642 %0 = bitcast <4 x i64> %__a to <4 x i64> 3643 %load = load <4 x i64>, ptr %__b 3644 %1 = bitcast <4 x i64> %load to <4 x i64> 3645 %2 = icmp eq <4 x i64> %0, %1 3646 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3647 %4 = bitcast <8 x i1> %3 to i8 3648 ret i8 %4 3649} 3650 3651define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3652; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: 3653; VLX: # %bb.0: # %entry 3654; VLX-NEXT: kmovd %edi, %k1 3655; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 3656; VLX-NEXT: kmovd %k0, %eax 3657; VLX-NEXT: # kill: def $al killed $al killed $eax 3658; VLX-NEXT: vzeroupper 3659; VLX-NEXT: retq 3660; 3661; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: 3662; NoVLX: # %bb.0: # %entry 3663; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3664; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3665; NoVLX-NEXT: kmovw %edi, %k1 3666; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3667; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3668; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3669; NoVLX-NEXT: kmovw %k0, %eax 3670; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3671; NoVLX-NEXT: vzeroupper 3672; NoVLX-NEXT: retq 3673entry: 3674 %0 = bitcast <4 x i64> %__a to <4 x i64> 3675 %1 = bitcast <4 x i64> %__b to <4 x i64> 3676 %2 = icmp eq <4 x i64> %0, %1 3677 %3 = bitcast i8 %__u to <8 x i1> 3678 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3679 %4 = and <4 x i1> %2, %extract.i 3680 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3681 %6 = bitcast <8 x i1> %5 to i8 3682 ret i8 %6 3683} 3684 3685define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 3686; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: 3687; VLX: # %bb.0: # %entry 3688; VLX-NEXT: kmovd %edi, %k1 3689; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 3690; VLX-NEXT: kmovd %k0, %eax 3691; VLX-NEXT: # kill: def $al killed $al killed $eax 3692; VLX-NEXT: vzeroupper 3693; VLX-NEXT: retq 3694; 3695; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: 3696; NoVLX: # %bb.0: # %entry 3697; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3698; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 3699; NoVLX-NEXT: kmovw %edi, %k1 3700; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3701; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3702; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3703; NoVLX-NEXT: kmovw %k0, %eax 3704; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3705; NoVLX-NEXT: vzeroupper 3706; NoVLX-NEXT: retq 3707entry: 3708 %0 = bitcast <4 x i64> %__a to <4 x i64> 3709 %load = load <4 x i64>, ptr %__b 3710 %1 = bitcast <4 x i64> %load to <4 x i64> 3711 %2 = icmp eq <4 x i64> %0, %1 3712 %3 = bitcast i8 %__u to <8 x i1> 3713 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3714 %4 = and <4 x i1> %2, %extract.i 3715 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3716 %6 = bitcast <8 x i1> %5 to i8 3717 ret i8 %6 3718} 3719 3720 3721define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 3722; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3723; VLX: # %bb.0: # %entry 3724; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 3725; VLX-NEXT: kmovd %k0, %eax 3726; VLX-NEXT: # kill: def $al killed $al killed $eax 3727; VLX-NEXT: vzeroupper 3728; VLX-NEXT: retq 3729; 3730; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3731; NoVLX: # %bb.0: # %entry 3732; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3733; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 3734; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3735; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3736; NoVLX-NEXT: kmovw %k0, %eax 3737; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3738; NoVLX-NEXT: vzeroupper 3739; NoVLX-NEXT: retq 3740entry: 3741 %0 = bitcast <4 x i64> %__a to <4 x i64> 3742 %load = load i64, ptr %__b 3743 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3744 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3745 %2 = icmp eq <4 x i64> %0, %1 3746 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3747 %4 = bitcast <8 x i1> %3 to i8 3748 ret i8 %4 3749} 3750 3751define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 3752; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3753; VLX: # %bb.0: # %entry 3754; VLX-NEXT: kmovd %edi, %k1 3755; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 3756; VLX-NEXT: kmovd %k0, %eax 3757; VLX-NEXT: # kill: def $al killed $al killed $eax 3758; VLX-NEXT: vzeroupper 3759; VLX-NEXT: retq 3760; 3761; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: 3762; NoVLX: # %bb.0: # %entry 3763; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3764; NoVLX-NEXT: kmovw %edi, %k1 3765; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3766; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3767; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3768; NoVLX-NEXT: kmovw %k0, %eax 3769; NoVLX-NEXT: # kill: def $al killed $al killed $eax 3770; NoVLX-NEXT: vzeroupper 3771; NoVLX-NEXT: retq 3772entry: 3773 %0 = bitcast <4 x i64> %__a to <4 x i64> 3774 %load = load i64, ptr %__b 3775 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3776 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3777 %2 = icmp eq <4 x i64> %0, %1 3778 %3 = bitcast i8 %__u to <8 x i1> 3779 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3780 %4 = and <4 x i1> %extract.i, %2 3781 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3782 %6 = bitcast <8 x i1> %5 to i8 3783 ret i8 %6 3784} 3785 3786 3787define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3788; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: 3789; VLX: # %bb.0: # %entry 3790; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 3791; VLX-NEXT: kmovd %k0, %eax 3792; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3793; VLX-NEXT: vzeroupper 3794; VLX-NEXT: retq 3795; 3796; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: 3797; NoVLX: # %bb.0: # %entry 3798; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3799; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3800; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3801; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3802; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3803; NoVLX-NEXT: kmovw %k0, %eax 3804; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3805; NoVLX-NEXT: vzeroupper 3806; NoVLX-NEXT: retq 3807entry: 3808 %0 = bitcast <4 x i64> %__a to <4 x i64> 3809 %1 = bitcast <4 x i64> %__b to <4 x i64> 3810 %2 = icmp eq <4 x i64> %0, %1 3811 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3812 %4 = bitcast <16 x i1> %3 to i16 3813 ret i16 %4 3814} 3815 3816define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 3817; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: 3818; VLX: # %bb.0: # %entry 3819; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 3820; VLX-NEXT: kmovd %k0, %eax 3821; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3822; VLX-NEXT: vzeroupper 3823; VLX-NEXT: retq 3824; 3825; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: 3826; NoVLX: # %bb.0: # %entry 3827; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3828; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 3829; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3830; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3831; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3832; NoVLX-NEXT: kmovw %k0, %eax 3833; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3834; NoVLX-NEXT: vzeroupper 3835; NoVLX-NEXT: retq 3836entry: 3837 %0 = bitcast <4 x i64> %__a to <4 x i64> 3838 %load = load <4 x i64>, ptr %__b 3839 %1 = bitcast <4 x i64> %load to <4 x i64> 3840 %2 = icmp eq <4 x i64> %0, %1 3841 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3842 %4 = bitcast <16 x i1> %3 to i16 3843 ret i16 %4 3844} 3845 3846define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3847; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: 3848; VLX: # %bb.0: # %entry 3849; VLX-NEXT: kmovd %edi, %k1 3850; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 3851; VLX-NEXT: kmovd %k0, %eax 3852; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3853; VLX-NEXT: vzeroupper 3854; VLX-NEXT: retq 3855; 3856; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: 3857; NoVLX: # %bb.0: # %entry 3858; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3859; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3860; NoVLX-NEXT: kmovw %edi, %k1 3861; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3862; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3863; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3864; NoVLX-NEXT: kmovw %k0, %eax 3865; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3866; NoVLX-NEXT: vzeroupper 3867; NoVLX-NEXT: retq 3868entry: 3869 %0 = bitcast <4 x i64> %__a to <4 x i64> 3870 %1 = bitcast <4 x i64> %__b to <4 x i64> 3871 %2 = icmp eq <4 x i64> %0, %1 3872 %3 = bitcast i8 %__u to <8 x i1> 3873 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3874 %4 = and <4 x i1> %2, %extract.i 3875 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3876 %6 = bitcast <16 x i1> %5 to i16 3877 ret i16 %6 3878} 3879 3880define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 3881; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: 3882; VLX: # %bb.0: # %entry 3883; VLX-NEXT: kmovd %edi, %k1 3884; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 3885; VLX-NEXT: kmovd %k0, %eax 3886; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3887; VLX-NEXT: vzeroupper 3888; VLX-NEXT: retq 3889; 3890; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: 3891; NoVLX: # %bb.0: # %entry 3892; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3893; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 3894; NoVLX-NEXT: kmovw %edi, %k1 3895; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 3896; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3897; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3898; NoVLX-NEXT: kmovw %k0, %eax 3899; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3900; NoVLX-NEXT: vzeroupper 3901; NoVLX-NEXT: retq 3902entry: 3903 %0 = bitcast <4 x i64> %__a to <4 x i64> 3904 %load = load <4 x i64>, ptr %__b 3905 %1 = bitcast <4 x i64> %load to <4 x i64> 3906 %2 = icmp eq <4 x i64> %0, %1 3907 %3 = bitcast i8 %__u to <8 x i1> 3908 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3909 %4 = and <4 x i1> %2, %extract.i 3910 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3911 %6 = bitcast <16 x i1> %5 to i16 3912 ret i16 %6 3913} 3914 3915 3916define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 3917; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3918; VLX: # %bb.0: # %entry 3919; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 3920; VLX-NEXT: kmovd %k0, %eax 3921; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3922; VLX-NEXT: vzeroupper 3923; VLX-NEXT: retq 3924; 3925; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3926; NoVLX: # %bb.0: # %entry 3927; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3928; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 3929; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3930; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3931; NoVLX-NEXT: kmovw %k0, %eax 3932; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3933; NoVLX-NEXT: vzeroupper 3934; NoVLX-NEXT: retq 3935entry: 3936 %0 = bitcast <4 x i64> %__a to <4 x i64> 3937 %load = load i64, ptr %__b 3938 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3939 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3940 %2 = icmp eq <4 x i64> %0, %1 3941 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3942 %4 = bitcast <16 x i1> %3 to i16 3943 ret i16 %4 3944} 3945 3946define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 3947; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3948; VLX: # %bb.0: # %entry 3949; VLX-NEXT: kmovd %edi, %k1 3950; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 3951; VLX-NEXT: kmovd %k0, %eax 3952; VLX-NEXT: # kill: def $ax killed $ax killed $eax 3953; VLX-NEXT: vzeroupper 3954; VLX-NEXT: retq 3955; 3956; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: 3957; NoVLX: # %bb.0: # %entry 3958; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3959; NoVLX-NEXT: kmovw %edi, %k1 3960; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 3961; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3962; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3963; NoVLX-NEXT: kmovw %k0, %eax 3964; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 3965; NoVLX-NEXT: vzeroupper 3966; NoVLX-NEXT: retq 3967entry: 3968 %0 = bitcast <4 x i64> %__a to <4 x i64> 3969 %load = load i64, ptr %__b 3970 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 3971 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 3972 %2 = icmp eq <4 x i64> %0, %1 3973 %3 = bitcast i8 %__u to <8 x i1> 3974 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 3975 %4 = and <4 x i1> %extract.i, %2 3976 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 3977 %6 = bitcast <16 x i1> %5 to i16 3978 ret i16 %6 3979} 3980 3981 3982define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 3983; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: 3984; VLX: # %bb.0: # %entry 3985; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 3986; VLX-NEXT: kmovd %k0, %eax 3987; VLX-NEXT: vzeroupper 3988; VLX-NEXT: retq 3989; 3990; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: 3991; NoVLX: # %bb.0: # %entry 3992; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 3993; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 3994; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 3995; NoVLX-NEXT: kshiftlw $12, %k0, %k0 3996; NoVLX-NEXT: kshiftrw $12, %k0, %k0 3997; NoVLX-NEXT: kmovw %k0, %eax 3998; NoVLX-NEXT: vzeroupper 3999; NoVLX-NEXT: retq 4000entry: 4001 %0 = bitcast <4 x i64> %__a to <4 x i64> 4002 %1 = bitcast <4 x i64> %__b to <4 x i64> 4003 %2 = icmp eq <4 x i64> %0, %1 4004 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4005 %4 = bitcast <32 x i1> %3 to i32 4006 ret i32 %4 4007} 4008 4009define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 4010; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: 4011; VLX: # %bb.0: # %entry 4012; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 4013; VLX-NEXT: kmovd %k0, %eax 4014; VLX-NEXT: vzeroupper 4015; VLX-NEXT: retq 4016; 4017; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: 4018; NoVLX: # %bb.0: # %entry 4019; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4020; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 4021; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4022; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4023; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4024; NoVLX-NEXT: kmovw %k0, %eax 4025; NoVLX-NEXT: vzeroupper 4026; NoVLX-NEXT: retq 4027entry: 4028 %0 = bitcast <4 x i64> %__a to <4 x i64> 4029 %load = load <4 x i64>, ptr %__b 4030 %1 = bitcast <4 x i64> %load to <4 x i64> 4031 %2 = icmp eq <4 x i64> %0, %1 4032 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4033 %4 = bitcast <32 x i1> %3 to i32 4034 ret i32 %4 4035} 4036 4037define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4038; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: 4039; VLX: # %bb.0: # %entry 4040; VLX-NEXT: kmovd %edi, %k1 4041; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 4042; VLX-NEXT: kmovd %k0, %eax 4043; VLX-NEXT: vzeroupper 4044; VLX-NEXT: retq 4045; 4046; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: 4047; NoVLX: # %bb.0: # %entry 4048; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4049; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4050; NoVLX-NEXT: kmovw %edi, %k1 4051; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4052; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4053; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4054; NoVLX-NEXT: kmovw %k0, %eax 4055; NoVLX-NEXT: vzeroupper 4056; NoVLX-NEXT: retq 4057entry: 4058 %0 = bitcast <4 x i64> %__a to <4 x i64> 4059 %1 = bitcast <4 x i64> %__b to <4 x i64> 4060 %2 = icmp eq <4 x i64> %0, %1 4061 %3 = bitcast i8 %__u to <8 x i1> 4062 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4063 %4 = and <4 x i1> %2, %extract.i 4064 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4065 %6 = bitcast <32 x i1> %5 to i32 4066 ret i32 %6 4067} 4068 4069define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 4070; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: 4071; VLX: # %bb.0: # %entry 4072; VLX-NEXT: kmovd %edi, %k1 4073; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 4074; VLX-NEXT: kmovd %k0, %eax 4075; VLX-NEXT: vzeroupper 4076; VLX-NEXT: retq 4077; 4078; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: 4079; NoVLX: # %bb.0: # %entry 4080; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4081; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 4082; NoVLX-NEXT: kmovw %edi, %k1 4083; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4084; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4085; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4086; NoVLX-NEXT: kmovw %k0, %eax 4087; NoVLX-NEXT: vzeroupper 4088; NoVLX-NEXT: retq 4089entry: 4090 %0 = bitcast <4 x i64> %__a to <4 x i64> 4091 %load = load <4 x i64>, ptr %__b 4092 %1 = bitcast <4 x i64> %load to <4 x i64> 4093 %2 = icmp eq <4 x i64> %0, %1 4094 %3 = bitcast i8 %__u to <8 x i1> 4095 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4096 %4 = and <4 x i1> %2, %extract.i 4097 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4098 %6 = bitcast <32 x i1> %5 to i32 4099 ret i32 %6 4100} 4101 4102 4103define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 4104; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4105; VLX: # %bb.0: # %entry 4106; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 4107; VLX-NEXT: kmovd %k0, %eax 4108; VLX-NEXT: vzeroupper 4109; VLX-NEXT: retq 4110; 4111; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4112; NoVLX: # %bb.0: # %entry 4113; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4114; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4115; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4116; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4117; NoVLX-NEXT: kmovw %k0, %eax 4118; NoVLX-NEXT: vzeroupper 4119; NoVLX-NEXT: retq 4120entry: 4121 %0 = bitcast <4 x i64> %__a to <4 x i64> 4122 %load = load i64, ptr %__b 4123 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4124 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4125 %2 = icmp eq <4 x i64> %0, %1 4126 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4127 %4 = bitcast <32 x i1> %3 to i32 4128 ret i32 %4 4129} 4130 4131define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 4132; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4133; VLX: # %bb.0: # %entry 4134; VLX-NEXT: kmovd %edi, %k1 4135; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 4136; VLX-NEXT: kmovd %k0, %eax 4137; VLX-NEXT: vzeroupper 4138; VLX-NEXT: retq 4139; 4140; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: 4141; NoVLX: # %bb.0: # %entry 4142; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4143; NoVLX-NEXT: kmovw %edi, %k1 4144; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4145; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4146; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4147; NoVLX-NEXT: kmovw %k0, %eax 4148; NoVLX-NEXT: vzeroupper 4149; NoVLX-NEXT: retq 4150entry: 4151 %0 = bitcast <4 x i64> %__a to <4 x i64> 4152 %load = load i64, ptr %__b 4153 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4154 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4155 %2 = icmp eq <4 x i64> %0, %1 4156 %3 = bitcast i8 %__u to <8 x i1> 4157 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4158 %4 = and <4 x i1> %extract.i, %2 4159 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4160 %6 = bitcast <32 x i1> %5 to i32 4161 ret i32 %6 4162} 4163 4164 4165define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4166; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: 4167; VLX: # %bb.0: # %entry 4168; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 4169; VLX-NEXT: kmovq %k0, %rax 4170; VLX-NEXT: vzeroupper 4171; VLX-NEXT: retq 4172; 4173; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: 4174; NoVLX: # %bb.0: # %entry 4175; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4176; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4177; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4178; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4179; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4180; NoVLX-NEXT: kmovw %k0, %eax 4181; NoVLX-NEXT: vzeroupper 4182; NoVLX-NEXT: retq 4183entry: 4184 %0 = bitcast <4 x i64> %__a to <4 x i64> 4185 %1 = bitcast <4 x i64> %__b to <4 x i64> 4186 %2 = icmp eq <4 x i64> %0, %1 4187 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4188 %4 = bitcast <64 x i1> %3 to i64 4189 ret i64 %4 4190} 4191 4192define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 4193; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: 4194; VLX: # %bb.0: # %entry 4195; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 4196; VLX-NEXT: kmovq %k0, %rax 4197; VLX-NEXT: vzeroupper 4198; VLX-NEXT: retq 4199; 4200; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: 4201; NoVLX: # %bb.0: # %entry 4202; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4203; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 4204; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4205; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4206; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4207; NoVLX-NEXT: kmovw %k0, %eax 4208; NoVLX-NEXT: vzeroupper 4209; NoVLX-NEXT: retq 4210entry: 4211 %0 = bitcast <4 x i64> %__a to <4 x i64> 4212 %load = load <4 x i64>, ptr %__b 4213 %1 = bitcast <4 x i64> %load to <4 x i64> 4214 %2 = icmp eq <4 x i64> %0, %1 4215 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4216 %4 = bitcast <64 x i1> %3 to i64 4217 ret i64 %4 4218} 4219 4220define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 4221; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: 4222; VLX: # %bb.0: # %entry 4223; VLX-NEXT: kmovd %edi, %k1 4224; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} 4225; VLX-NEXT: kmovq %k0, %rax 4226; VLX-NEXT: vzeroupper 4227; VLX-NEXT: retq 4228; 4229; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: 4230; NoVLX: # %bb.0: # %entry 4231; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 4232; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4233; NoVLX-NEXT: kmovw %edi, %k1 4234; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4235; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4236; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4237; NoVLX-NEXT: kmovw %k0, %eax 4238; NoVLX-NEXT: vzeroupper 4239; NoVLX-NEXT: retq 4240entry: 4241 %0 = bitcast <4 x i64> %__a to <4 x i64> 4242 %1 = bitcast <4 x i64> %__b to <4 x i64> 4243 %2 = icmp eq <4 x i64> %0, %1 4244 %3 = bitcast i8 %__u to <8 x i1> 4245 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4246 %4 = and <4 x i1> %2, %extract.i 4247 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4248 %6 = bitcast <64 x i1> %5 to i64 4249 ret i64 %6 4250} 4251 4252define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 4253; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: 4254; VLX: # %bb.0: # %entry 4255; VLX-NEXT: kmovd %edi, %k1 4256; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} 4257; VLX-NEXT: kmovq %k0, %rax 4258; VLX-NEXT: vzeroupper 4259; VLX-NEXT: retq 4260; 4261; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: 4262; NoVLX: # %bb.0: # %entry 4263; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4264; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 4265; NoVLX-NEXT: kmovw %edi, %k1 4266; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4267; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4268; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4269; NoVLX-NEXT: kmovw %k0, %eax 4270; NoVLX-NEXT: vzeroupper 4271; NoVLX-NEXT: retq 4272entry: 4273 %0 = bitcast <4 x i64> %__a to <4 x i64> 4274 %load = load <4 x i64>, ptr %__b 4275 %1 = bitcast <4 x i64> %load to <4 x i64> 4276 %2 = icmp eq <4 x i64> %0, %1 4277 %3 = bitcast i8 %__u to <8 x i1> 4278 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4279 %4 = and <4 x i1> %2, %extract.i 4280 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4281 %6 = bitcast <64 x i1> %5 to i64 4282 ret i64 %6 4283} 4284 4285 4286define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 4287; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4288; VLX: # %bb.0: # %entry 4289; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 4290; VLX-NEXT: kmovq %k0, %rax 4291; VLX-NEXT: vzeroupper 4292; VLX-NEXT: retq 4293; 4294; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4295; NoVLX: # %bb.0: # %entry 4296; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4297; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4298; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4299; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4300; NoVLX-NEXT: kmovw %k0, %eax 4301; NoVLX-NEXT: vzeroupper 4302; NoVLX-NEXT: retq 4303entry: 4304 %0 = bitcast <4 x i64> %__a to <4 x i64> 4305 %load = load i64, ptr %__b 4306 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4307 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4308 %2 = icmp eq <4 x i64> %0, %1 4309 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4310 %4 = bitcast <64 x i1> %3 to i64 4311 ret i64 %4 4312} 4313 4314define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 4315; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4316; VLX: # %bb.0: # %entry 4317; VLX-NEXT: kmovd %edi, %k1 4318; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} 4319; VLX-NEXT: kmovq %k0, %rax 4320; VLX-NEXT: vzeroupper 4321; VLX-NEXT: retq 4322; 4323; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: 4324; NoVLX: # %bb.0: # %entry 4325; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 4326; NoVLX-NEXT: kmovw %edi, %k1 4327; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4328; NoVLX-NEXT: kshiftlw $12, %k0, %k0 4329; NoVLX-NEXT: kshiftrw $12, %k0, %k0 4330; NoVLX-NEXT: kmovw %k0, %eax 4331; NoVLX-NEXT: vzeroupper 4332; NoVLX-NEXT: retq 4333entry: 4334 %0 = bitcast <4 x i64> %__a to <4 x i64> 4335 %load = load i64, ptr %__b 4336 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 4337 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 4338 %2 = icmp eq <4 x i64> %0, %1 4339 %3 = bitcast i8 %__u to <8 x i1> 4340 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 4341 %4 = and <4 x i1> %extract.i, %2 4342 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 4343 %6 = bitcast <64 x i1> %5 to i64 4344 ret i64 %6 4345} 4346 4347 4348define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4349; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: 4350; VLX: # %bb.0: # %entry 4351; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4352; VLX-NEXT: kmovd %k0, %eax 4353; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4354; VLX-NEXT: vzeroupper 4355; VLX-NEXT: retq 4356; 4357; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: 4358; NoVLX: # %bb.0: # %entry 4359; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4360; NoVLX-NEXT: kmovw %k0, %eax 4361; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4362; NoVLX-NEXT: vzeroupper 4363; NoVLX-NEXT: retq 4364entry: 4365 %0 = bitcast <8 x i64> %__a to <8 x i64> 4366 %1 = bitcast <8 x i64> %__b to <8 x i64> 4367 %2 = icmp eq <8 x i64> %0, %1 4368 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4369 %4 = bitcast <16 x i1> %3 to i16 4370 ret i16 %4 4371} 4372 4373define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4374; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: 4375; VLX: # %bb.0: # %entry 4376; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4377; VLX-NEXT: kmovd %k0, %eax 4378; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4379; VLX-NEXT: vzeroupper 4380; VLX-NEXT: retq 4381; 4382; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: 4383; NoVLX: # %bb.0: # %entry 4384; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4385; NoVLX-NEXT: kmovw %k0, %eax 4386; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4387; NoVLX-NEXT: vzeroupper 4388; NoVLX-NEXT: retq 4389entry: 4390 %0 = bitcast <8 x i64> %__a to <8 x i64> 4391 %load = load <8 x i64>, ptr %__b 4392 %1 = bitcast <8 x i64> %load to <8 x i64> 4393 %2 = icmp eq <8 x i64> %0, %1 4394 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4395 %4 = bitcast <16 x i1> %3 to i16 4396 ret i16 %4 4397} 4398 4399define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4400; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: 4401; VLX: # %bb.0: # %entry 4402; VLX-NEXT: kmovd %edi, %k1 4403; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4404; VLX-NEXT: kmovd %k0, %eax 4405; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4406; VLX-NEXT: vzeroupper 4407; VLX-NEXT: retq 4408; 4409; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: 4410; NoVLX: # %bb.0: # %entry 4411; NoVLX-NEXT: kmovw %edi, %k1 4412; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4413; NoVLX-NEXT: kmovw %k0, %eax 4414; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4415; NoVLX-NEXT: vzeroupper 4416; NoVLX-NEXT: retq 4417entry: 4418 %0 = bitcast <8 x i64> %__a to <8 x i64> 4419 %1 = bitcast <8 x i64> %__b to <8 x i64> 4420 %2 = icmp eq <8 x i64> %0, %1 4421 %3 = bitcast i8 %__u to <8 x i1> 4422 %4 = and <8 x i1> %2, %3 4423 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4424 %6 = bitcast <16 x i1> %5 to i16 4425 ret i16 %6 4426} 4427 4428define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4429; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: 4430; VLX: # %bb.0: # %entry 4431; VLX-NEXT: kmovd %edi, %k1 4432; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4433; VLX-NEXT: kmovd %k0, %eax 4434; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4435; VLX-NEXT: vzeroupper 4436; VLX-NEXT: retq 4437; 4438; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: 4439; NoVLX: # %bb.0: # %entry 4440; NoVLX-NEXT: kmovw %edi, %k1 4441; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4442; NoVLX-NEXT: kmovw %k0, %eax 4443; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4444; NoVLX-NEXT: vzeroupper 4445; NoVLX-NEXT: retq 4446entry: 4447 %0 = bitcast <8 x i64> %__a to <8 x i64> 4448 %load = load <8 x i64>, ptr %__b 4449 %1 = bitcast <8 x i64> %load to <8 x i64> 4450 %2 = icmp eq <8 x i64> %0, %1 4451 %3 = bitcast i8 %__u to <8 x i1> 4452 %4 = and <8 x i1> %2, %3 4453 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4454 %6 = bitcast <16 x i1> %5 to i16 4455 ret i16 %6 4456} 4457 4458 4459define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4460; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4461; VLX: # %bb.0: # %entry 4462; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4463; VLX-NEXT: kmovd %k0, %eax 4464; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4465; VLX-NEXT: vzeroupper 4466; VLX-NEXT: retq 4467; 4468; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4469; NoVLX: # %bb.0: # %entry 4470; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4471; NoVLX-NEXT: kmovw %k0, %eax 4472; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4473; NoVLX-NEXT: vzeroupper 4474; NoVLX-NEXT: retq 4475entry: 4476 %0 = bitcast <8 x i64> %__a to <8 x i64> 4477 %load = load i64, ptr %__b 4478 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4479 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4480 %2 = icmp eq <8 x i64> %0, %1 4481 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4482 %4 = bitcast <16 x i1> %3 to i16 4483 ret i16 %4 4484} 4485 4486define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4487; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4488; VLX: # %bb.0: # %entry 4489; VLX-NEXT: kmovd %edi, %k1 4490; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4491; VLX-NEXT: kmovd %k0, %eax 4492; VLX-NEXT: # kill: def $ax killed $ax killed $eax 4493; VLX-NEXT: vzeroupper 4494; VLX-NEXT: retq 4495; 4496; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: 4497; NoVLX: # %bb.0: # %entry 4498; NoVLX-NEXT: kmovw %edi, %k1 4499; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4500; NoVLX-NEXT: kmovw %k0, %eax 4501; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 4502; NoVLX-NEXT: vzeroupper 4503; NoVLX-NEXT: retq 4504entry: 4505 %0 = bitcast <8 x i64> %__a to <8 x i64> 4506 %load = load i64, ptr %__b 4507 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4508 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4509 %2 = icmp eq <8 x i64> %0, %1 4510 %3 = bitcast i8 %__u to <8 x i1> 4511 %4 = and <8 x i1> %3, %2 4512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4513 %6 = bitcast <16 x i1> %5 to i16 4514 ret i16 %6 4515} 4516 4517 4518define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4519; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: 4520; VLX: # %bb.0: # %entry 4521; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4522; VLX-NEXT: kmovd %k0, %eax 4523; VLX-NEXT: vzeroupper 4524; VLX-NEXT: retq 4525; 4526; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: 4527; NoVLX: # %bb.0: # %entry 4528; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4529; NoVLX-NEXT: kmovw %k0, %eax 4530; NoVLX-NEXT: vzeroupper 4531; NoVLX-NEXT: retq 4532entry: 4533 %0 = bitcast <8 x i64> %__a to <8 x i64> 4534 %1 = bitcast <8 x i64> %__b to <8 x i64> 4535 %2 = icmp eq <8 x i64> %0, %1 4536 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4537 %4 = bitcast <32 x i1> %3 to i32 4538 ret i32 %4 4539} 4540 4541define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4542; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: 4543; VLX: # %bb.0: # %entry 4544; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4545; VLX-NEXT: kmovd %k0, %eax 4546; VLX-NEXT: vzeroupper 4547; VLX-NEXT: retq 4548; 4549; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: 4550; NoVLX: # %bb.0: # %entry 4551; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4552; NoVLX-NEXT: kmovw %k0, %eax 4553; NoVLX-NEXT: vzeroupper 4554; NoVLX-NEXT: retq 4555entry: 4556 %0 = bitcast <8 x i64> %__a to <8 x i64> 4557 %load = load <8 x i64>, ptr %__b 4558 %1 = bitcast <8 x i64> %load to <8 x i64> 4559 %2 = icmp eq <8 x i64> %0, %1 4560 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4561 %4 = bitcast <32 x i1> %3 to i32 4562 ret i32 %4 4563} 4564 4565define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4566; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: 4567; VLX: # %bb.0: # %entry 4568; VLX-NEXT: kmovd %edi, %k1 4569; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4570; VLX-NEXT: kmovd %k0, %eax 4571; VLX-NEXT: vzeroupper 4572; VLX-NEXT: retq 4573; 4574; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: 4575; NoVLX: # %bb.0: # %entry 4576; NoVLX-NEXT: kmovw %edi, %k1 4577; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4578; NoVLX-NEXT: kmovw %k0, %eax 4579; NoVLX-NEXT: vzeroupper 4580; NoVLX-NEXT: retq 4581entry: 4582 %0 = bitcast <8 x i64> %__a to <8 x i64> 4583 %1 = bitcast <8 x i64> %__b to <8 x i64> 4584 %2 = icmp eq <8 x i64> %0, %1 4585 %3 = bitcast i8 %__u to <8 x i1> 4586 %4 = and <8 x i1> %2, %3 4587 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4588 %6 = bitcast <32 x i1> %5 to i32 4589 ret i32 %6 4590} 4591 4592define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4593; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: 4594; VLX: # %bb.0: # %entry 4595; VLX-NEXT: kmovd %edi, %k1 4596; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4597; VLX-NEXT: kmovd %k0, %eax 4598; VLX-NEXT: vzeroupper 4599; VLX-NEXT: retq 4600; 4601; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: 4602; NoVLX: # %bb.0: # %entry 4603; NoVLX-NEXT: kmovw %edi, %k1 4604; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4605; NoVLX-NEXT: kmovw %k0, %eax 4606; NoVLX-NEXT: vzeroupper 4607; NoVLX-NEXT: retq 4608entry: 4609 %0 = bitcast <8 x i64> %__a to <8 x i64> 4610 %load = load <8 x i64>, ptr %__b 4611 %1 = bitcast <8 x i64> %load to <8 x i64> 4612 %2 = icmp eq <8 x i64> %0, %1 4613 %3 = bitcast i8 %__u to <8 x i1> 4614 %4 = and <8 x i1> %2, %3 4615 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4616 %6 = bitcast <32 x i1> %5 to i32 4617 ret i32 %6 4618} 4619 4620 4621define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4622; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4623; VLX: # %bb.0: # %entry 4624; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4625; VLX-NEXT: kmovd %k0, %eax 4626; VLX-NEXT: vzeroupper 4627; VLX-NEXT: retq 4628; 4629; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4630; NoVLX: # %bb.0: # %entry 4631; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4632; NoVLX-NEXT: kmovw %k0, %eax 4633; NoVLX-NEXT: vzeroupper 4634; NoVLX-NEXT: retq 4635entry: 4636 %0 = bitcast <8 x i64> %__a to <8 x i64> 4637 %load = load i64, ptr %__b 4638 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4639 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4640 %2 = icmp eq <8 x i64> %0, %1 4641 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4642 %4 = bitcast <32 x i1> %3 to i32 4643 ret i32 %4 4644} 4645 4646define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4647; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4648; VLX: # %bb.0: # %entry 4649; VLX-NEXT: kmovd %edi, %k1 4650; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4651; VLX-NEXT: kmovd %k0, %eax 4652; VLX-NEXT: vzeroupper 4653; VLX-NEXT: retq 4654; 4655; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: 4656; NoVLX: # %bb.0: # %entry 4657; NoVLX-NEXT: kmovw %edi, %k1 4658; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4659; NoVLX-NEXT: kmovw %k0, %eax 4660; NoVLX-NEXT: vzeroupper 4661; NoVLX-NEXT: retq 4662entry: 4663 %0 = bitcast <8 x i64> %__a to <8 x i64> 4664 %load = load i64, ptr %__b 4665 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4666 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4667 %2 = icmp eq <8 x i64> %0, %1 4668 %3 = bitcast i8 %__u to <8 x i1> 4669 %4 = and <8 x i1> %3, %2 4670 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4671 %6 = bitcast <32 x i1> %5 to i32 4672 ret i32 %6 4673} 4674 4675 4676define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4677; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: 4678; VLX: # %bb.0: # %entry 4679; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4680; VLX-NEXT: kmovq %k0, %rax 4681; VLX-NEXT: vzeroupper 4682; VLX-NEXT: retq 4683; 4684; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: 4685; NoVLX: # %bb.0: # %entry 4686; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 4687; NoVLX-NEXT: kmovw %k0, %eax 4688; NoVLX-NEXT: vzeroupper 4689; NoVLX-NEXT: retq 4690entry: 4691 %0 = bitcast <8 x i64> %__a to <8 x i64> 4692 %1 = bitcast <8 x i64> %__b to <8 x i64> 4693 %2 = icmp eq <8 x i64> %0, %1 4694 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4695 %4 = bitcast <64 x i1> %3 to i64 4696 ret i64 %4 4697} 4698 4699define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4700; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: 4701; VLX: # %bb.0: # %entry 4702; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4703; VLX-NEXT: kmovq %k0, %rax 4704; VLX-NEXT: vzeroupper 4705; VLX-NEXT: retq 4706; 4707; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: 4708; NoVLX: # %bb.0: # %entry 4709; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 4710; NoVLX-NEXT: kmovw %k0, %eax 4711; NoVLX-NEXT: vzeroupper 4712; NoVLX-NEXT: retq 4713entry: 4714 %0 = bitcast <8 x i64> %__a to <8 x i64> 4715 %load = load <8 x i64>, ptr %__b 4716 %1 = bitcast <8 x i64> %load to <8 x i64> 4717 %2 = icmp eq <8 x i64> %0, %1 4718 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4719 %4 = bitcast <64 x i1> %3 to i64 4720 ret i64 %4 4721} 4722 4723define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 4724; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: 4725; VLX: # %bb.0: # %entry 4726; VLX-NEXT: kmovd %edi, %k1 4727; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4728; VLX-NEXT: kmovq %k0, %rax 4729; VLX-NEXT: vzeroupper 4730; VLX-NEXT: retq 4731; 4732; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: 4733; NoVLX: # %bb.0: # %entry 4734; NoVLX-NEXT: kmovw %edi, %k1 4735; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} 4736; NoVLX-NEXT: kmovw %k0, %eax 4737; NoVLX-NEXT: vzeroupper 4738; NoVLX-NEXT: retq 4739entry: 4740 %0 = bitcast <8 x i64> %__a to <8 x i64> 4741 %1 = bitcast <8 x i64> %__b to <8 x i64> 4742 %2 = icmp eq <8 x i64> %0, %1 4743 %3 = bitcast i8 %__u to <8 x i1> 4744 %4 = and <8 x i1> %2, %3 4745 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4746 %6 = bitcast <64 x i1> %5 to i64 4747 ret i64 %6 4748} 4749 4750define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4751; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: 4752; VLX: # %bb.0: # %entry 4753; VLX-NEXT: kmovd %edi, %k1 4754; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4755; VLX-NEXT: kmovq %k0, %rax 4756; VLX-NEXT: vzeroupper 4757; VLX-NEXT: retq 4758; 4759; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: 4760; NoVLX: # %bb.0: # %entry 4761; NoVLX-NEXT: kmovw %edi, %k1 4762; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} 4763; NoVLX-NEXT: kmovw %k0, %eax 4764; NoVLX-NEXT: vzeroupper 4765; NoVLX-NEXT: retq 4766entry: 4767 %0 = bitcast <8 x i64> %__a to <8 x i64> 4768 %load = load <8 x i64>, ptr %__b 4769 %1 = bitcast <8 x i64> %load to <8 x i64> 4770 %2 = icmp eq <8 x i64> %0, %1 4771 %3 = bitcast i8 %__u to <8 x i1> 4772 %4 = and <8 x i1> %2, %3 4773 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4774 %6 = bitcast <64 x i1> %5 to i64 4775 ret i64 %6 4776} 4777 4778 4779define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 4780; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4781; VLX: # %bb.0: # %entry 4782; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4783; VLX-NEXT: kmovq %k0, %rax 4784; VLX-NEXT: vzeroupper 4785; VLX-NEXT: retq 4786; 4787; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4788; NoVLX: # %bb.0: # %entry 4789; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 4790; NoVLX-NEXT: kmovw %k0, %eax 4791; NoVLX-NEXT: vzeroupper 4792; NoVLX-NEXT: retq 4793entry: 4794 %0 = bitcast <8 x i64> %__a to <8 x i64> 4795 %load = load i64, ptr %__b 4796 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4797 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4798 %2 = icmp eq <8 x i64> %0, %1 4799 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4800 %4 = bitcast <64 x i1> %3 to i64 4801 ret i64 %4 4802} 4803 4804define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 4805; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4806; VLX: # %bb.0: # %entry 4807; VLX-NEXT: kmovd %edi, %k1 4808; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4809; VLX-NEXT: kmovq %k0, %rax 4810; VLX-NEXT: vzeroupper 4811; VLX-NEXT: retq 4812; 4813; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: 4814; NoVLX: # %bb.0: # %entry 4815; NoVLX-NEXT: kmovw %edi, %k1 4816; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} 4817; NoVLX-NEXT: kmovw %k0, %eax 4818; NoVLX-NEXT: vzeroupper 4819; NoVLX-NEXT: retq 4820entry: 4821 %0 = bitcast <8 x i64> %__a to <8 x i64> 4822 %load = load i64, ptr %__b 4823 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 4824 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 4825 %2 = icmp eq <8 x i64> %0, %1 4826 %3 = bitcast i8 %__u to <8 x i1> 4827 %4 = and <8 x i1> %3, %2 4828 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 4829 %6 = bitcast <64 x i1> %5 to i64 4830 ret i64 %6 4831} 4832 4833 4834define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4835; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: 4836; VLX: # %bb.0: # %entry 4837; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 4838; VLX-NEXT: kmovd %k0, %eax 4839; VLX-NEXT: retq 4840; 4841; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: 4842; NoVLX: # %bb.0: # %entry 4843; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 4844; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4845; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4846; NoVLX-NEXT: kmovw %k0, %eax 4847; NoVLX-NEXT: vzeroupper 4848; NoVLX-NEXT: retq 4849entry: 4850 %0 = bitcast <2 x i64> %__a to <16 x i8> 4851 %1 = bitcast <2 x i64> %__b to <16 x i8> 4852 %2 = icmp sgt <16 x i8> %0, %1 4853 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4854 %4 = bitcast <32 x i1> %3 to i32 4855 ret i32 %4 4856} 4857 4858define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 4859; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: 4860; VLX: # %bb.0: # %entry 4861; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 4862; VLX-NEXT: kmovd %k0, %eax 4863; VLX-NEXT: retq 4864; 4865; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: 4866; NoVLX: # %bb.0: # %entry 4867; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 4868; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4869; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4870; NoVLX-NEXT: kmovw %k0, %eax 4871; NoVLX-NEXT: vzeroupper 4872; NoVLX-NEXT: retq 4873entry: 4874 %0 = bitcast <2 x i64> %__a to <16 x i8> 4875 %load = load <2 x i64>, ptr %__b 4876 %1 = bitcast <2 x i64> %load to <16 x i8> 4877 %2 = icmp sgt <16 x i8> %0, %1 4878 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4879 %4 = bitcast <32 x i1> %3 to i32 4880 ret i32 %4 4881} 4882 4883define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4884; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: 4885; VLX: # %bb.0: # %entry 4886; VLX-NEXT: kmovd %edi, %k1 4887; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 4888; VLX-NEXT: kmovd %k0, %eax 4889; VLX-NEXT: retq 4890; 4891; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: 4892; NoVLX: # %bb.0: # %entry 4893; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 4894; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4895; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4896; NoVLX-NEXT: kmovw %k0, %eax 4897; NoVLX-NEXT: andl %edi, %eax 4898; NoVLX-NEXT: vzeroupper 4899; NoVLX-NEXT: retq 4900entry: 4901 %0 = bitcast <2 x i64> %__a to <16 x i8> 4902 %1 = bitcast <2 x i64> %__b to <16 x i8> 4903 %2 = icmp sgt <16 x i8> %0, %1 4904 %3 = bitcast i16 %__u to <16 x i1> 4905 %4 = and <16 x i1> %2, %3 4906 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4907 %6 = bitcast <32 x i1> %5 to i32 4908 ret i32 %6 4909} 4910 4911define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 4912; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: 4913; VLX: # %bb.0: # %entry 4914; VLX-NEXT: kmovd %edi, %k1 4915; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} 4916; VLX-NEXT: kmovd %k0, %eax 4917; VLX-NEXT: retq 4918; 4919; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: 4920; NoVLX: # %bb.0: # %entry 4921; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 4922; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4923; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4924; NoVLX-NEXT: kmovw %k0, %eax 4925; NoVLX-NEXT: andl %edi, %eax 4926; NoVLX-NEXT: vzeroupper 4927; NoVLX-NEXT: retq 4928entry: 4929 %0 = bitcast <2 x i64> %__a to <16 x i8> 4930 %load = load <2 x i64>, ptr %__b 4931 %1 = bitcast <2 x i64> %load to <16 x i8> 4932 %2 = icmp sgt <16 x i8> %0, %1 4933 %3 = bitcast i16 %__u to <16 x i1> 4934 %4 = and <16 x i1> %2, %3 4935 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4936 %6 = bitcast <32 x i1> %5 to i32 4937 ret i32 %6 4938} 4939 4940 4941define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4942; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: 4943; VLX: # %bb.0: # %entry 4944; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 4945; VLX-NEXT: kmovq %k0, %rax 4946; VLX-NEXT: retq 4947; 4948; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: 4949; NoVLX: # %bb.0: # %entry 4950; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 4951; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4952; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4953; NoVLX-NEXT: kmovw %k0, %eax 4954; NoVLX-NEXT: vzeroupper 4955; NoVLX-NEXT: retq 4956entry: 4957 %0 = bitcast <2 x i64> %__a to <16 x i8> 4958 %1 = bitcast <2 x i64> %__b to <16 x i8> 4959 %2 = icmp sgt <16 x i8> %0, %1 4960 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4961 %4 = bitcast <64 x i1> %3 to i64 4962 ret i64 %4 4963} 4964 4965define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 4966; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: 4967; VLX: # %bb.0: # %entry 4968; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 4969; VLX-NEXT: kmovq %k0, %rax 4970; VLX-NEXT: retq 4971; 4972; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: 4973; NoVLX: # %bb.0: # %entry 4974; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 4975; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 4976; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 4977; NoVLX-NEXT: kmovw %k0, %eax 4978; NoVLX-NEXT: vzeroupper 4979; NoVLX-NEXT: retq 4980entry: 4981 %0 = bitcast <2 x i64> %__a to <16 x i8> 4982 %load = load <2 x i64>, ptr %__b 4983 %1 = bitcast <2 x i64> %load to <16 x i8> 4984 %2 = icmp sgt <16 x i8> %0, %1 4985 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 4986 %4 = bitcast <64 x i1> %3 to i64 4987 ret i64 %4 4988} 4989 4990define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 4991; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: 4992; VLX: # %bb.0: # %entry 4993; VLX-NEXT: kmovd %edi, %k1 4994; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} 4995; VLX-NEXT: kmovq %k0, %rax 4996; VLX-NEXT: retq 4997; 4998; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: 4999; NoVLX: # %bb.0: # %entry 5000; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 5001; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5002; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5003; NoVLX-NEXT: kmovw %k0, %eax 5004; NoVLX-NEXT: andl %edi, %eax 5005; NoVLX-NEXT: vzeroupper 5006; NoVLX-NEXT: retq 5007entry: 5008 %0 = bitcast <2 x i64> %__a to <16 x i8> 5009 %1 = bitcast <2 x i64> %__b to <16 x i8> 5010 %2 = icmp sgt <16 x i8> %0, %1 5011 %3 = bitcast i16 %__u to <16 x i1> 5012 %4 = and <16 x i1> %2, %3 5013 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5014 %6 = bitcast <64 x i1> %5 to i64 5015 ret i64 %6 5016} 5017 5018define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 5019; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: 5020; VLX: # %bb.0: # %entry 5021; VLX-NEXT: kmovd %edi, %k1 5022; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} 5023; VLX-NEXT: kmovq %k0, %rax 5024; VLX-NEXT: retq 5025; 5026; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: 5027; NoVLX: # %bb.0: # %entry 5028; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 5029; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5030; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5031; NoVLX-NEXT: kmovw %k0, %eax 5032; NoVLX-NEXT: andl %edi, %eax 5033; NoVLX-NEXT: vzeroupper 5034; NoVLX-NEXT: retq 5035entry: 5036 %0 = bitcast <2 x i64> %__a to <16 x i8> 5037 %load = load <2 x i64>, ptr %__b 5038 %1 = bitcast <2 x i64> %load to <16 x i8> 5039 %2 = icmp sgt <16 x i8> %0, %1 5040 %3 = bitcast i16 %__u to <16 x i1> 5041 %4 = and <16 x i1> %2, %3 5042 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5043 %6 = bitcast <64 x i1> %5 to i64 5044 ret i64 %6 5045} 5046 5047 5048define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5049; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: 5050; VLX: # %bb.0: # %entry 5051; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 5052; VLX-NEXT: kmovq %k0, %rax 5053; VLX-NEXT: vzeroupper 5054; VLX-NEXT: retq 5055; 5056; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: 5057; NoVLX: # %bb.0: # %entry 5058; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 5059; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5060; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5061; NoVLX-NEXT: kmovw %k0, %ecx 5062; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5063; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5064; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5065; NoVLX-NEXT: kmovw %k0, %eax 5066; NoVLX-NEXT: shll $16, %eax 5067; NoVLX-NEXT: orl %ecx, %eax 5068; NoVLX-NEXT: vzeroupper 5069; NoVLX-NEXT: retq 5070entry: 5071 %0 = bitcast <4 x i64> %__a to <32 x i8> 5072 %1 = bitcast <4 x i64> %__b to <32 x i8> 5073 %2 = icmp sgt <32 x i8> %0, %1 5074 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5075 %4 = bitcast <64 x i1> %3 to i64 5076 ret i64 %4 5077} 5078 5079define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 5080; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: 5081; VLX: # %bb.0: # %entry 5082; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 5083; VLX-NEXT: kmovq %k0, %rax 5084; VLX-NEXT: vzeroupper 5085; VLX-NEXT: retq 5086; 5087; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: 5088; NoVLX: # %bb.0: # %entry 5089; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 5090; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5091; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5092; NoVLX-NEXT: kmovw %k0, %ecx 5093; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5094; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5095; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5096; NoVLX-NEXT: kmovw %k0, %eax 5097; NoVLX-NEXT: shll $16, %eax 5098; NoVLX-NEXT: orl %ecx, %eax 5099; NoVLX-NEXT: vzeroupper 5100; NoVLX-NEXT: retq 5101entry: 5102 %0 = bitcast <4 x i64> %__a to <32 x i8> 5103 %load = load <4 x i64>, ptr %__b 5104 %1 = bitcast <4 x i64> %load to <32 x i8> 5105 %2 = icmp sgt <32 x i8> %0, %1 5106 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5107 %4 = bitcast <64 x i1> %3 to i64 5108 ret i64 %4 5109} 5110 5111define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5112; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: 5113; VLX: # %bb.0: # %entry 5114; VLX-NEXT: kmovd %edi, %k1 5115; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} 5116; VLX-NEXT: kmovq %k0, %rax 5117; VLX-NEXT: vzeroupper 5118; VLX-NEXT: retq 5119; 5120; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: 5121; NoVLX: # %bb.0: # %entry 5122; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 5123; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5124; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5125; NoVLX-NEXT: kmovw %k0, %eax 5126; NoVLX-NEXT: andl %edi, %eax 5127; NoVLX-NEXT: shrl $16, %edi 5128; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5129; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5130; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5131; NoVLX-NEXT: kmovw %k0, %ecx 5132; NoVLX-NEXT: andl %edi, %ecx 5133; NoVLX-NEXT: shll $16, %ecx 5134; NoVLX-NEXT: movzwl %ax, %eax 5135; NoVLX-NEXT: orl %ecx, %eax 5136; NoVLX-NEXT: vzeroupper 5137; NoVLX-NEXT: retq 5138entry: 5139 %0 = bitcast <4 x i64> %__a to <32 x i8> 5140 %1 = bitcast <4 x i64> %__b to <32 x i8> 5141 %2 = icmp sgt <32 x i8> %0, %1 5142 %3 = bitcast i32 %__u to <32 x i1> 5143 %4 = and <32 x i1> %2, %3 5144 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5145 %6 = bitcast <64 x i1> %5 to i64 5146 ret i64 %6 5147} 5148 5149define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 5150; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: 5151; VLX: # %bb.0: # %entry 5152; VLX-NEXT: kmovd %edi, %k1 5153; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1} 5154; VLX-NEXT: kmovq %k0, %rax 5155; VLX-NEXT: vzeroupper 5156; VLX-NEXT: retq 5157; 5158; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: 5159; NoVLX: # %bb.0: # %entry 5160; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 5161; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 5162; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5163; NoVLX-NEXT: kmovw %k0, %eax 5164; NoVLX-NEXT: andl %edi, %eax 5165; NoVLX-NEXT: shrl $16, %edi 5166; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 5167; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 5168; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5169; NoVLX-NEXT: kmovw %k0, %ecx 5170; NoVLX-NEXT: andl %edi, %ecx 5171; NoVLX-NEXT: shll $16, %ecx 5172; NoVLX-NEXT: movzwl %ax, %eax 5173; NoVLX-NEXT: orl %ecx, %eax 5174; NoVLX-NEXT: vzeroupper 5175; NoVLX-NEXT: retq 5176entry: 5177 %0 = bitcast <4 x i64> %__a to <32 x i8> 5178 %load = load <4 x i64>, ptr %__b 5179 %1 = bitcast <4 x i64> %load to <32 x i8> 5180 %2 = icmp sgt <32 x i8> %0, %1 5181 %3 = bitcast i32 %__u to <32 x i1> 5182 %4 = and <32 x i1> %2, %3 5183 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5184 %6 = bitcast <64 x i1> %5 to i64 5185 ret i64 %6 5186} 5187 5188 5189define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5190; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: 5191; VLX: # %bb.0: # %entry 5192; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5193; VLX-NEXT: kmovd %k0, %eax 5194; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5195; VLX-NEXT: retq 5196; 5197; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: 5198; NoVLX: # %bb.0: # %entry 5199; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5200; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5201; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5202; NoVLX-NEXT: kmovw %k0, %eax 5203; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5204; NoVLX-NEXT: vzeroupper 5205; NoVLX-NEXT: retq 5206entry: 5207 %0 = bitcast <2 x i64> %__a to <8 x i16> 5208 %1 = bitcast <2 x i64> %__b to <8 x i16> 5209 %2 = icmp sgt <8 x i16> %0, %1 5210 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5211 %4 = bitcast <16 x i1> %3 to i16 5212 ret i16 %4 5213} 5214 5215define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 5216; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: 5217; VLX: # %bb.0: # %entry 5218; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5219; VLX-NEXT: kmovd %k0, %eax 5220; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5221; VLX-NEXT: retq 5222; 5223; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: 5224; NoVLX: # %bb.0: # %entry 5225; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5226; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5227; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5228; NoVLX-NEXT: kmovw %k0, %eax 5229; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5230; NoVLX-NEXT: vzeroupper 5231; NoVLX-NEXT: retq 5232entry: 5233 %0 = bitcast <2 x i64> %__a to <8 x i16> 5234 %load = load <2 x i64>, ptr %__b 5235 %1 = bitcast <2 x i64> %load to <8 x i16> 5236 %2 = icmp sgt <8 x i16> %0, %1 5237 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5238 %4 = bitcast <16 x i1> %3 to i16 5239 ret i16 %4 5240} 5241 5242define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5243; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: 5244; VLX: # %bb.0: # %entry 5245; VLX-NEXT: kmovd %edi, %k1 5246; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5247; VLX-NEXT: kmovd %k0, %eax 5248; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5249; VLX-NEXT: retq 5250; 5251; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: 5252; NoVLX: # %bb.0: # %entry 5253; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5254; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5255; NoVLX-NEXT: kmovw %edi, %k1 5256; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5257; NoVLX-NEXT: kmovw %k0, %eax 5258; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5259; NoVLX-NEXT: vzeroupper 5260; NoVLX-NEXT: retq 5261entry: 5262 %0 = bitcast <2 x i64> %__a to <8 x i16> 5263 %1 = bitcast <2 x i64> %__b to <8 x i16> 5264 %2 = icmp sgt <8 x i16> %0, %1 5265 %3 = bitcast i8 %__u to <8 x i1> 5266 %4 = and <8 x i1> %2, %3 5267 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5268 %6 = bitcast <16 x i1> %5 to i16 5269 ret i16 %6 5270} 5271 5272define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 5273; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: 5274; VLX: # %bb.0: # %entry 5275; VLX-NEXT: kmovd %edi, %k1 5276; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5277; VLX-NEXT: kmovd %k0, %eax 5278; VLX-NEXT: # kill: def $ax killed $ax killed $eax 5279; VLX-NEXT: retq 5280; 5281; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: 5282; NoVLX: # %bb.0: # %entry 5283; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5284; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5285; NoVLX-NEXT: kmovw %edi, %k1 5286; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5287; NoVLX-NEXT: kmovw %k0, %eax 5288; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 5289; NoVLX-NEXT: vzeroupper 5290; NoVLX-NEXT: retq 5291entry: 5292 %0 = bitcast <2 x i64> %__a to <8 x i16> 5293 %load = load <2 x i64>, ptr %__b 5294 %1 = bitcast <2 x i64> %load to <8 x i16> 5295 %2 = icmp sgt <8 x i16> %0, %1 5296 %3 = bitcast i8 %__u to <8 x i1> 5297 %4 = and <8 x i1> %2, %3 5298 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5299 %6 = bitcast <16 x i1> %5 to i16 5300 ret i16 %6 5301} 5302 5303 5304define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5305; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: 5306; VLX: # %bb.0: # %entry 5307; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5308; VLX-NEXT: kmovd %k0, %eax 5309; VLX-NEXT: retq 5310; 5311; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: 5312; NoVLX: # %bb.0: # %entry 5313; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5314; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5315; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5316; NoVLX-NEXT: kmovw %k0, %eax 5317; NoVLX-NEXT: vzeroupper 5318; NoVLX-NEXT: retq 5319entry: 5320 %0 = bitcast <2 x i64> %__a to <8 x i16> 5321 %1 = bitcast <2 x i64> %__b to <8 x i16> 5322 %2 = icmp sgt <8 x i16> %0, %1 5323 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5324 %4 = bitcast <32 x i1> %3 to i32 5325 ret i32 %4 5326} 5327 5328define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 5329; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: 5330; VLX: # %bb.0: # %entry 5331; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5332; VLX-NEXT: kmovd %k0, %eax 5333; VLX-NEXT: retq 5334; 5335; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: 5336; NoVLX: # %bb.0: # %entry 5337; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5338; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5339; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5340; NoVLX-NEXT: kmovw %k0, %eax 5341; NoVLX-NEXT: vzeroupper 5342; NoVLX-NEXT: retq 5343entry: 5344 %0 = bitcast <2 x i64> %__a to <8 x i16> 5345 %load = load <2 x i64>, ptr %__b 5346 %1 = bitcast <2 x i64> %load to <8 x i16> 5347 %2 = icmp sgt <8 x i16> %0, %1 5348 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5349 %4 = bitcast <32 x i1> %3 to i32 5350 ret i32 %4 5351} 5352 5353define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5354; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: 5355; VLX: # %bb.0: # %entry 5356; VLX-NEXT: kmovd %edi, %k1 5357; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5358; VLX-NEXT: kmovd %k0, %eax 5359; VLX-NEXT: retq 5360; 5361; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: 5362; NoVLX: # %bb.0: # %entry 5363; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5364; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5365; NoVLX-NEXT: kmovw %edi, %k1 5366; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5367; NoVLX-NEXT: kmovw %k0, %eax 5368; NoVLX-NEXT: vzeroupper 5369; NoVLX-NEXT: retq 5370entry: 5371 %0 = bitcast <2 x i64> %__a to <8 x i16> 5372 %1 = bitcast <2 x i64> %__b to <8 x i16> 5373 %2 = icmp sgt <8 x i16> %0, %1 5374 %3 = bitcast i8 %__u to <8 x i1> 5375 %4 = and <8 x i1> %2, %3 5376 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5377 %6 = bitcast <32 x i1> %5 to i32 5378 ret i32 %6 5379} 5380 5381define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 5382; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: 5383; VLX: # %bb.0: # %entry 5384; VLX-NEXT: kmovd %edi, %k1 5385; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5386; VLX-NEXT: kmovd %k0, %eax 5387; VLX-NEXT: retq 5388; 5389; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: 5390; NoVLX: # %bb.0: # %entry 5391; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5392; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5393; NoVLX-NEXT: kmovw %edi, %k1 5394; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5395; NoVLX-NEXT: kmovw %k0, %eax 5396; NoVLX-NEXT: vzeroupper 5397; NoVLX-NEXT: retq 5398entry: 5399 %0 = bitcast <2 x i64> %__a to <8 x i16> 5400 %load = load <2 x i64>, ptr %__b 5401 %1 = bitcast <2 x i64> %load to <8 x i16> 5402 %2 = icmp sgt <8 x i16> %0, %1 5403 %3 = bitcast i8 %__u to <8 x i1> 5404 %4 = and <8 x i1> %2, %3 5405 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5406 %6 = bitcast <32 x i1> %5 to i32 5407 ret i32 %6 5408} 5409 5410 5411define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5412; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: 5413; VLX: # %bb.0: # %entry 5414; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 5415; VLX-NEXT: kmovq %k0, %rax 5416; VLX-NEXT: retq 5417; 5418; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: 5419; NoVLX: # %bb.0: # %entry 5420; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5421; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5422; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5423; NoVLX-NEXT: kmovw %k0, %eax 5424; NoVLX-NEXT: vzeroupper 5425; NoVLX-NEXT: retq 5426entry: 5427 %0 = bitcast <2 x i64> %__a to <8 x i16> 5428 %1 = bitcast <2 x i64> %__b to <8 x i16> 5429 %2 = icmp sgt <8 x i16> %0, %1 5430 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5431 %4 = bitcast <64 x i1> %3 to i64 5432 ret i64 %4 5433} 5434 5435define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 5436; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: 5437; VLX: # %bb.0: # %entry 5438; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 5439; VLX-NEXT: kmovq %k0, %rax 5440; VLX-NEXT: retq 5441; 5442; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: 5443; NoVLX: # %bb.0: # %entry 5444; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 5445; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5446; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 5447; NoVLX-NEXT: kmovw %k0, %eax 5448; NoVLX-NEXT: vzeroupper 5449; NoVLX-NEXT: retq 5450entry: 5451 %0 = bitcast <2 x i64> %__a to <8 x i16> 5452 %load = load <2 x i64>, ptr %__b 5453 %1 = bitcast <2 x i64> %load to <8 x i16> 5454 %2 = icmp sgt <8 x i16> %0, %1 5455 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5456 %4 = bitcast <64 x i1> %3 to i64 5457 ret i64 %4 5458} 5459 5460define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5461; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: 5462; VLX: # %bb.0: # %entry 5463; VLX-NEXT: kmovd %edi, %k1 5464; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} 5465; VLX-NEXT: kmovq %k0, %rax 5466; VLX-NEXT: retq 5467; 5468; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: 5469; NoVLX: # %bb.0: # %entry 5470; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 5471; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5472; NoVLX-NEXT: kmovw %edi, %k1 5473; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5474; NoVLX-NEXT: kmovw %k0, %eax 5475; NoVLX-NEXT: vzeroupper 5476; NoVLX-NEXT: retq 5477entry: 5478 %0 = bitcast <2 x i64> %__a to <8 x i16> 5479 %1 = bitcast <2 x i64> %__b to <8 x i16> 5480 %2 = icmp sgt <8 x i16> %0, %1 5481 %3 = bitcast i8 %__u to <8 x i1> 5482 %4 = and <8 x i1> %2, %3 5483 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5484 %6 = bitcast <64 x i1> %5 to i64 5485 ret i64 %6 5486} 5487 5488define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 5489; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: 5490; VLX: # %bb.0: # %entry 5491; VLX-NEXT: kmovd %edi, %k1 5492; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} 5493; VLX-NEXT: kmovq %k0, %rax 5494; VLX-NEXT: retq 5495; 5496; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: 5497; NoVLX: # %bb.0: # %entry 5498; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 5499; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 5500; NoVLX-NEXT: kmovw %edi, %k1 5501; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 5502; NoVLX-NEXT: kmovw %k0, %eax 5503; NoVLX-NEXT: vzeroupper 5504; NoVLX-NEXT: retq 5505entry: 5506 %0 = bitcast <2 x i64> %__a to <8 x i16> 5507 %load = load <2 x i64>, ptr %__b 5508 %1 = bitcast <2 x i64> %load to <8 x i16> 5509 %2 = icmp sgt <8 x i16> %0, %1 5510 %3 = bitcast i8 %__u to <8 x i1> 5511 %4 = and <8 x i1> %2, %3 5512 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5513 %6 = bitcast <64 x i1> %5 to i64 5514 ret i64 %6 5515} 5516 5517 5518define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5519; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: 5520; VLX: # %bb.0: # %entry 5521; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 5522; VLX-NEXT: kmovd %k0, %eax 5523; VLX-NEXT: vzeroupper 5524; VLX-NEXT: retq 5525; 5526; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: 5527; NoVLX: # %bb.0: # %entry 5528; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5529; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5530; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5531; NoVLX-NEXT: kmovw %k0, %eax 5532; NoVLX-NEXT: vzeroupper 5533; NoVLX-NEXT: retq 5534entry: 5535 %0 = bitcast <4 x i64> %__a to <16 x i16> 5536 %1 = bitcast <4 x i64> %__b to <16 x i16> 5537 %2 = icmp sgt <16 x i16> %0, %1 5538 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5539 %4 = bitcast <32 x i1> %3 to i32 5540 ret i32 %4 5541} 5542 5543define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 5544; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: 5545; VLX: # %bb.0: # %entry 5546; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 5547; VLX-NEXT: kmovd %k0, %eax 5548; VLX-NEXT: vzeroupper 5549; VLX-NEXT: retq 5550; 5551; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: 5552; NoVLX: # %bb.0: # %entry 5553; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 5554; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5555; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5556; NoVLX-NEXT: kmovw %k0, %eax 5557; NoVLX-NEXT: vzeroupper 5558; NoVLX-NEXT: retq 5559entry: 5560 %0 = bitcast <4 x i64> %__a to <16 x i16> 5561 %load = load <4 x i64>, ptr %__b 5562 %1 = bitcast <4 x i64> %load to <16 x i16> 5563 %2 = icmp sgt <16 x i16> %0, %1 5564 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5565 %4 = bitcast <32 x i1> %3 to i32 5566 ret i32 %4 5567} 5568 5569define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5570; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: 5571; VLX: # %bb.0: # %entry 5572; VLX-NEXT: kmovd %edi, %k1 5573; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} 5574; VLX-NEXT: kmovd %k0, %eax 5575; VLX-NEXT: vzeroupper 5576; VLX-NEXT: retq 5577; 5578; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: 5579; NoVLX: # %bb.0: # %entry 5580; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5581; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5582; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5583; NoVLX-NEXT: kmovw %k0, %eax 5584; NoVLX-NEXT: andl %edi, %eax 5585; NoVLX-NEXT: vzeroupper 5586; NoVLX-NEXT: retq 5587entry: 5588 %0 = bitcast <4 x i64> %__a to <16 x i16> 5589 %1 = bitcast <4 x i64> %__b to <16 x i16> 5590 %2 = icmp sgt <16 x i16> %0, %1 5591 %3 = bitcast i16 %__u to <16 x i1> 5592 %4 = and <16 x i1> %2, %3 5593 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5594 %6 = bitcast <32 x i1> %5 to i32 5595 ret i32 %6 5596} 5597 5598define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 5599; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: 5600; VLX: # %bb.0: # %entry 5601; VLX-NEXT: kmovd %edi, %k1 5602; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} 5603; VLX-NEXT: kmovd %k0, %eax 5604; VLX-NEXT: vzeroupper 5605; VLX-NEXT: retq 5606; 5607; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: 5608; NoVLX: # %bb.0: # %entry 5609; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 5610; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5611; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5612; NoVLX-NEXT: kmovw %k0, %eax 5613; NoVLX-NEXT: andl %edi, %eax 5614; NoVLX-NEXT: vzeroupper 5615; NoVLX-NEXT: retq 5616entry: 5617 %0 = bitcast <4 x i64> %__a to <16 x i16> 5618 %load = load <4 x i64>, ptr %__b 5619 %1 = bitcast <4 x i64> %load to <16 x i16> 5620 %2 = icmp sgt <16 x i16> %0, %1 5621 %3 = bitcast i16 %__u to <16 x i1> 5622 %4 = and <16 x i1> %2, %3 5623 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5624 %6 = bitcast <32 x i1> %5 to i32 5625 ret i32 %6 5626} 5627 5628 5629define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5630; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: 5631; VLX: # %bb.0: # %entry 5632; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 5633; VLX-NEXT: kmovq %k0, %rax 5634; VLX-NEXT: vzeroupper 5635; VLX-NEXT: retq 5636; 5637; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: 5638; NoVLX: # %bb.0: # %entry 5639; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5640; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5641; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5642; NoVLX-NEXT: kmovw %k0, %eax 5643; NoVLX-NEXT: vzeroupper 5644; NoVLX-NEXT: retq 5645entry: 5646 %0 = bitcast <4 x i64> %__a to <16 x i16> 5647 %1 = bitcast <4 x i64> %__b to <16 x i16> 5648 %2 = icmp sgt <16 x i16> %0, %1 5649 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5650 %4 = bitcast <64 x i1> %3 to i64 5651 ret i64 %4 5652} 5653 5654define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 5655; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: 5656; VLX: # %bb.0: # %entry 5657; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 5658; VLX-NEXT: kmovq %k0, %rax 5659; VLX-NEXT: vzeroupper 5660; VLX-NEXT: retq 5661; 5662; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: 5663; NoVLX: # %bb.0: # %entry 5664; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 5665; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5666; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5667; NoVLX-NEXT: kmovw %k0, %eax 5668; NoVLX-NEXT: vzeroupper 5669; NoVLX-NEXT: retq 5670entry: 5671 %0 = bitcast <4 x i64> %__a to <16 x i16> 5672 %load = load <4 x i64>, ptr %__b 5673 %1 = bitcast <4 x i64> %load to <16 x i16> 5674 %2 = icmp sgt <16 x i16> %0, %1 5675 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5676 %4 = bitcast <64 x i1> %3 to i64 5677 ret i64 %4 5678} 5679 5680define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 5681; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: 5682; VLX: # %bb.0: # %entry 5683; VLX-NEXT: kmovd %edi, %k1 5684; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} 5685; VLX-NEXT: kmovq %k0, %rax 5686; VLX-NEXT: vzeroupper 5687; VLX-NEXT: retq 5688; 5689; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: 5690; NoVLX: # %bb.0: # %entry 5691; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5692; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5693; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5694; NoVLX-NEXT: kmovw %k0, %eax 5695; NoVLX-NEXT: andl %edi, %eax 5696; NoVLX-NEXT: vzeroupper 5697; NoVLX-NEXT: retq 5698entry: 5699 %0 = bitcast <4 x i64> %__a to <16 x i16> 5700 %1 = bitcast <4 x i64> %__b to <16 x i16> 5701 %2 = icmp sgt <16 x i16> %0, %1 5702 %3 = bitcast i16 %__u to <16 x i1> 5703 %4 = and <16 x i1> %2, %3 5704 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5705 %6 = bitcast <64 x i1> %5 to i64 5706 ret i64 %6 5707} 5708 5709define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 5710; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: 5711; VLX: # %bb.0: # %entry 5712; VLX-NEXT: kmovd %edi, %k1 5713; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} 5714; VLX-NEXT: kmovq %k0, %rax 5715; VLX-NEXT: vzeroupper 5716; VLX-NEXT: retq 5717; 5718; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: 5719; NoVLX: # %bb.0: # %entry 5720; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 5721; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5722; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5723; NoVLX-NEXT: kmovw %k0, %eax 5724; NoVLX-NEXT: andl %edi, %eax 5725; NoVLX-NEXT: vzeroupper 5726; NoVLX-NEXT: retq 5727entry: 5728 %0 = bitcast <4 x i64> %__a to <16 x i16> 5729 %load = load <4 x i64>, ptr %__b 5730 %1 = bitcast <4 x i64> %load to <16 x i16> 5731 %2 = icmp sgt <16 x i16> %0, %1 5732 %3 = bitcast i16 %__u to <16 x i1> 5733 %4 = and <16 x i1> %2, %3 5734 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 5735 %6 = bitcast <64 x i1> %5 to i64 5736 ret i64 %6 5737} 5738 5739 5740define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 5741; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: 5742; VLX: # %bb.0: # %entry 5743; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 5744; VLX-NEXT: kmovq %k0, %rax 5745; VLX-NEXT: vzeroupper 5746; VLX-NEXT: retq 5747; 5748; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: 5749; NoVLX: # %bb.0: # %entry 5750; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 5751; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 5752; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 5753; NoVLX-NEXT: kmovw %k0, %ecx 5754; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 5755; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5756; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5757; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5758; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5759; NoVLX-NEXT: kmovw %k0, %eax 5760; NoVLX-NEXT: shll $16, %eax 5761; NoVLX-NEXT: orl %ecx, %eax 5762; NoVLX-NEXT: vzeroupper 5763; NoVLX-NEXT: retq 5764entry: 5765 %0 = bitcast <8 x i64> %__a to <32 x i16> 5766 %1 = bitcast <8 x i64> %__b to <32 x i16> 5767 %2 = icmp sgt <32 x i16> %0, %1 5768 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5769 %4 = bitcast <64 x i1> %3 to i64 5770 ret i64 %4 5771} 5772 5773define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 5774; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: 5775; VLX: # %bb.0: # %entry 5776; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0 5777; VLX-NEXT: kmovq %k0, %rax 5778; VLX-NEXT: vzeroupper 5779; VLX-NEXT: retq 5780; 5781; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: 5782; NoVLX: # %bb.0: # %entry 5783; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm1 5784; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 5785; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5786; NoVLX-NEXT: kmovw %k0, %ecx 5787; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5788; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm0, %ymm0 5789; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5790; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5791; NoVLX-NEXT: kmovw %k0, %eax 5792; NoVLX-NEXT: shll $16, %eax 5793; NoVLX-NEXT: orl %ecx, %eax 5794; NoVLX-NEXT: vzeroupper 5795; NoVLX-NEXT: retq 5796entry: 5797 %0 = bitcast <8 x i64> %__a to <32 x i16> 5798 %load = load <8 x i64>, ptr %__b 5799 %1 = bitcast <8 x i64> %load to <32 x i16> 5800 %2 = icmp sgt <32 x i16> %0, %1 5801 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5802 %4 = bitcast <64 x i1> %3 to i64 5803 ret i64 %4 5804} 5805 5806define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 5807; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: 5808; VLX: # %bb.0: # %entry 5809; VLX-NEXT: kmovd %edi, %k1 5810; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} 5811; VLX-NEXT: kmovq %k0, %rax 5812; VLX-NEXT: vzeroupper 5813; VLX-NEXT: retq 5814; 5815; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: 5816; NoVLX: # %bb.0: # %entry 5817; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm2 5818; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 5819; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 5820; NoVLX-NEXT: kmovw %k0, %eax 5821; NoVLX-NEXT: andl %edi, %eax 5822; NoVLX-NEXT: shrl $16, %edi 5823; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 5824; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5825; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 5826; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5827; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5828; NoVLX-NEXT: kmovw %k0, %ecx 5829; NoVLX-NEXT: andl %edi, %ecx 5830; NoVLX-NEXT: shll $16, %ecx 5831; NoVLX-NEXT: movzwl %ax, %eax 5832; NoVLX-NEXT: orl %ecx, %eax 5833; NoVLX-NEXT: vzeroupper 5834; NoVLX-NEXT: retq 5835entry: 5836 %0 = bitcast <8 x i64> %__a to <32 x i16> 5837 %1 = bitcast <8 x i64> %__b to <32 x i16> 5838 %2 = icmp sgt <32 x i16> %0, %1 5839 %3 = bitcast i32 %__u to <32 x i1> 5840 %4 = and <32 x i1> %2, %3 5841 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5842 %6 = bitcast <64 x i1> %5 to i64 5843 ret i64 %6 5844} 5845 5846define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 5847; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: 5848; VLX: # %bb.0: # %entry 5849; VLX-NEXT: kmovd %edi, %k1 5850; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1} 5851; VLX-NEXT: kmovq %k0, %rax 5852; VLX-NEXT: vzeroupper 5853; VLX-NEXT: retq 5854; 5855; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: 5856; NoVLX: # %bb.0: # %entry 5857; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm1 5858; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 5859; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 5860; NoVLX-NEXT: kmovw %k0, %eax 5861; NoVLX-NEXT: andl %edi, %eax 5862; NoVLX-NEXT: shrl $16, %edi 5863; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 5864; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm0, %ymm0 5865; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 5866; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 5867; NoVLX-NEXT: kmovw %k0, %ecx 5868; NoVLX-NEXT: andl %edi, %ecx 5869; NoVLX-NEXT: shll $16, %ecx 5870; NoVLX-NEXT: movzwl %ax, %eax 5871; NoVLX-NEXT: orl %ecx, %eax 5872; NoVLX-NEXT: vzeroupper 5873; NoVLX-NEXT: retq 5874entry: 5875 %0 = bitcast <8 x i64> %__a to <32 x i16> 5876 %load = load <8 x i64>, ptr %__b 5877 %1 = bitcast <8 x i64> %load to <32 x i16> 5878 %2 = icmp sgt <32 x i16> %0, %1 5879 %3 = bitcast i32 %__u to <32 x i1> 5880 %4 = and <32 x i1> %2, %3 5881 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 5882 %6 = bitcast <64 x i1> %5 to i64 5883 ret i64 %6 5884} 5885 5886 5887define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5888; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: 5889; VLX: # %bb.0: # %entry 5890; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 5891; VLX-NEXT: kmovd %k0, %eax 5892; VLX-NEXT: # kill: def $al killed $al killed $eax 5893; VLX-NEXT: retq 5894; 5895; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: 5896; NoVLX: # %bb.0: # %entry 5897; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 5898; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5899; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 5900; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5901; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5902; NoVLX-NEXT: kmovw %k0, %eax 5903; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5904; NoVLX-NEXT: vzeroupper 5905; NoVLX-NEXT: retq 5906entry: 5907 %0 = bitcast <2 x i64> %__a to <4 x i32> 5908 %1 = bitcast <2 x i64> %__b to <4 x i32> 5909 %2 = icmp sgt <4 x i32> %0, %1 5910 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5911 %4 = bitcast <8 x i1> %3 to i8 5912 ret i8 %4 5913} 5914 5915define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 5916; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: 5917; VLX: # %bb.0: # %entry 5918; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 5919; VLX-NEXT: kmovd %k0, %eax 5920; VLX-NEXT: # kill: def $al killed $al killed $eax 5921; VLX-NEXT: retq 5922; 5923; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: 5924; NoVLX: # %bb.0: # %entry 5925; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5926; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 5927; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 5928; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5929; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5930; NoVLX-NEXT: kmovw %k0, %eax 5931; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5932; NoVLX-NEXT: vzeroupper 5933; NoVLX-NEXT: retq 5934entry: 5935 %0 = bitcast <2 x i64> %__a to <4 x i32> 5936 %load = load <2 x i64>, ptr %__b 5937 %1 = bitcast <2 x i64> %load to <4 x i32> 5938 %2 = icmp sgt <4 x i32> %0, %1 5939 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5940 %4 = bitcast <8 x i1> %3 to i8 5941 ret i8 %4 5942} 5943 5944define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 5945; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: 5946; VLX: # %bb.0: # %entry 5947; VLX-NEXT: kmovd %edi, %k1 5948; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 5949; VLX-NEXT: kmovd %k0, %eax 5950; VLX-NEXT: # kill: def $al killed $al killed $eax 5951; VLX-NEXT: retq 5952; 5953; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: 5954; NoVLX: # %bb.0: # %entry 5955; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 5956; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5957; NoVLX-NEXT: kmovw %edi, %k1 5958; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 5959; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5960; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5961; NoVLX-NEXT: kmovw %k0, %eax 5962; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5963; NoVLX-NEXT: vzeroupper 5964; NoVLX-NEXT: retq 5965entry: 5966 %0 = bitcast <2 x i64> %__a to <4 x i32> 5967 %1 = bitcast <2 x i64> %__b to <4 x i32> 5968 %2 = icmp sgt <4 x i32> %0, %1 5969 %3 = bitcast i8 %__u to <8 x i1> 5970 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5971 %4 = and <4 x i1> %2, %extract.i 5972 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5973 %6 = bitcast <8 x i1> %5 to i8 5974 ret i8 %6 5975} 5976 5977define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 5978; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: 5979; VLX: # %bb.0: # %entry 5980; VLX-NEXT: kmovd %edi, %k1 5981; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 5982; VLX-NEXT: kmovd %k0, %eax 5983; VLX-NEXT: # kill: def $al killed $al killed $eax 5984; VLX-NEXT: retq 5985; 5986; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: 5987; NoVLX: # %bb.0: # %entry 5988; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 5989; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 5990; NoVLX-NEXT: kmovw %edi, %k1 5991; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 5992; NoVLX-NEXT: kshiftlw $12, %k0, %k0 5993; NoVLX-NEXT: kshiftrw $12, %k0, %k0 5994; NoVLX-NEXT: kmovw %k0, %eax 5995; NoVLX-NEXT: # kill: def $al killed $al killed $eax 5996; NoVLX-NEXT: vzeroupper 5997; NoVLX-NEXT: retq 5998entry: 5999 %0 = bitcast <2 x i64> %__a to <4 x i32> 6000 %load = load <2 x i64>, ptr %__b 6001 %1 = bitcast <2 x i64> %load to <4 x i32> 6002 %2 = icmp sgt <4 x i32> %0, %1 6003 %3 = bitcast i8 %__u to <8 x i1> 6004 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6005 %4 = and <4 x i1> %2, %extract.i 6006 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6007 %6 = bitcast <8 x i1> %5 to i8 6008 ret i8 %6 6009} 6010 6011 6012define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6013; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6014; VLX: # %bb.0: # %entry 6015; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6016; VLX-NEXT: kmovd %k0, %eax 6017; VLX-NEXT: # kill: def $al killed $al killed $eax 6018; VLX-NEXT: retq 6019; 6020; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6021; NoVLX: # %bb.0: # %entry 6022; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6023; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6024; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6025; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6026; NoVLX-NEXT: kmovw %k0, %eax 6027; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6028; NoVLX-NEXT: vzeroupper 6029; NoVLX-NEXT: retq 6030entry: 6031 %0 = bitcast <2 x i64> %__a to <4 x i32> 6032 %load = load i32, ptr %__b 6033 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6034 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6035 %2 = icmp sgt <4 x i32> %0, %1 6036 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6037 %4 = bitcast <8 x i1> %3 to i8 6038 ret i8 %4 6039} 6040 6041define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6042; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6043; VLX: # %bb.0: # %entry 6044; VLX-NEXT: kmovd %edi, %k1 6045; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6046; VLX-NEXT: kmovd %k0, %eax 6047; VLX-NEXT: # kill: def $al killed $al killed $eax 6048; VLX-NEXT: retq 6049; 6050; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: 6051; NoVLX: # %bb.0: # %entry 6052; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6053; NoVLX-NEXT: kmovw %edi, %k1 6054; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6055; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6056; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6057; NoVLX-NEXT: kmovw %k0, %eax 6058; NoVLX-NEXT: # kill: def $al killed $al killed $eax 6059; NoVLX-NEXT: vzeroupper 6060; NoVLX-NEXT: retq 6061entry: 6062 %0 = bitcast <2 x i64> %__a to <4 x i32> 6063 %load = load i32, ptr %__b 6064 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6065 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6066 %2 = icmp sgt <4 x i32> %0, %1 6067 %3 = bitcast i8 %__u to <8 x i1> 6068 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6069 %4 = and <4 x i1> %extract.i, %2 6070 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6071 %6 = bitcast <8 x i1> %5 to i8 6072 ret i8 %6 6073} 6074 6075 6076define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6077; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: 6078; VLX: # %bb.0: # %entry 6079; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6080; VLX-NEXT: kmovd %k0, %eax 6081; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6082; VLX-NEXT: retq 6083; 6084; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: 6085; NoVLX: # %bb.0: # %entry 6086; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6087; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6088; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6089; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6090; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6091; NoVLX-NEXT: kmovw %k0, %eax 6092; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6093; NoVLX-NEXT: vzeroupper 6094; NoVLX-NEXT: retq 6095entry: 6096 %0 = bitcast <2 x i64> %__a to <4 x i32> 6097 %1 = bitcast <2 x i64> %__b to <4 x i32> 6098 %2 = icmp sgt <4 x i32> %0, %1 6099 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6100 %4 = bitcast <16 x i1> %3 to i16 6101 ret i16 %4 6102} 6103 6104define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6105; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: 6106; VLX: # %bb.0: # %entry 6107; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6108; VLX-NEXT: kmovd %k0, %eax 6109; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6110; VLX-NEXT: retq 6111; 6112; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: 6113; NoVLX: # %bb.0: # %entry 6114; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6115; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6116; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6117; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6118; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6119; NoVLX-NEXT: kmovw %k0, %eax 6120; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6121; NoVLX-NEXT: vzeroupper 6122; NoVLX-NEXT: retq 6123entry: 6124 %0 = bitcast <2 x i64> %__a to <4 x i32> 6125 %load = load <2 x i64>, ptr %__b 6126 %1 = bitcast <2 x i64> %load to <4 x i32> 6127 %2 = icmp sgt <4 x i32> %0, %1 6128 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6129 %4 = bitcast <16 x i1> %3 to i16 6130 ret i16 %4 6131} 6132 6133define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6134; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: 6135; VLX: # %bb.0: # %entry 6136; VLX-NEXT: kmovd %edi, %k1 6137; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6138; VLX-NEXT: kmovd %k0, %eax 6139; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6140; VLX-NEXT: retq 6141; 6142; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: 6143; NoVLX: # %bb.0: # %entry 6144; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6145; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6146; NoVLX-NEXT: kmovw %edi, %k1 6147; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6148; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6149; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6150; NoVLX-NEXT: kmovw %k0, %eax 6151; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6152; NoVLX-NEXT: vzeroupper 6153; NoVLX-NEXT: retq 6154entry: 6155 %0 = bitcast <2 x i64> %__a to <4 x i32> 6156 %1 = bitcast <2 x i64> %__b to <4 x i32> 6157 %2 = icmp sgt <4 x i32> %0, %1 6158 %3 = bitcast i8 %__u to <8 x i1> 6159 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6160 %4 = and <4 x i1> %2, %extract.i 6161 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6162 %6 = bitcast <16 x i1> %5 to i16 6163 ret i16 %6 6164} 6165 6166define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6167; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: 6168; VLX: # %bb.0: # %entry 6169; VLX-NEXT: kmovd %edi, %k1 6170; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6171; VLX-NEXT: kmovd %k0, %eax 6172; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6173; VLX-NEXT: retq 6174; 6175; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: 6176; NoVLX: # %bb.0: # %entry 6177; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6178; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6179; NoVLX-NEXT: kmovw %edi, %k1 6180; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6181; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6182; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6183; NoVLX-NEXT: kmovw %k0, %eax 6184; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6185; NoVLX-NEXT: vzeroupper 6186; NoVLX-NEXT: retq 6187entry: 6188 %0 = bitcast <2 x i64> %__a to <4 x i32> 6189 %load = load <2 x i64>, ptr %__b 6190 %1 = bitcast <2 x i64> %load to <4 x i32> 6191 %2 = icmp sgt <4 x i32> %0, %1 6192 %3 = bitcast i8 %__u to <8 x i1> 6193 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6194 %4 = and <4 x i1> %2, %extract.i 6195 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6196 %6 = bitcast <16 x i1> %5 to i16 6197 ret i16 %6 6198} 6199 6200 6201define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6202; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6203; VLX: # %bb.0: # %entry 6204; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6205; VLX-NEXT: kmovd %k0, %eax 6206; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6207; VLX-NEXT: retq 6208; 6209; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6210; NoVLX: # %bb.0: # %entry 6211; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6212; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6213; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6214; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6215; NoVLX-NEXT: kmovw %k0, %eax 6216; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6217; NoVLX-NEXT: vzeroupper 6218; NoVLX-NEXT: retq 6219entry: 6220 %0 = bitcast <2 x i64> %__a to <4 x i32> 6221 %load = load i32, ptr %__b 6222 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6223 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6224 %2 = icmp sgt <4 x i32> %0, %1 6225 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6226 %4 = bitcast <16 x i1> %3 to i16 6227 ret i16 %4 6228} 6229 6230define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6231; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6232; VLX: # %bb.0: # %entry 6233; VLX-NEXT: kmovd %edi, %k1 6234; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6235; VLX-NEXT: kmovd %k0, %eax 6236; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6237; VLX-NEXT: retq 6238; 6239; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: 6240; NoVLX: # %bb.0: # %entry 6241; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6242; NoVLX-NEXT: kmovw %edi, %k1 6243; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6244; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6245; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6246; NoVLX-NEXT: kmovw %k0, %eax 6247; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6248; NoVLX-NEXT: vzeroupper 6249; NoVLX-NEXT: retq 6250entry: 6251 %0 = bitcast <2 x i64> %__a to <4 x i32> 6252 %load = load i32, ptr %__b 6253 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6254 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6255 %2 = icmp sgt <4 x i32> %0, %1 6256 %3 = bitcast i8 %__u to <8 x i1> 6257 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6258 %4 = and <4 x i1> %extract.i, %2 6259 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6260 %6 = bitcast <16 x i1> %5 to i16 6261 ret i16 %6 6262} 6263 6264 6265define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6266; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: 6267; VLX: # %bb.0: # %entry 6268; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6269; VLX-NEXT: kmovd %k0, %eax 6270; VLX-NEXT: retq 6271; 6272; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: 6273; NoVLX: # %bb.0: # %entry 6274; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6275; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6276; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6277; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6278; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6279; NoVLX-NEXT: kmovw %k0, %eax 6280; NoVLX-NEXT: vzeroupper 6281; NoVLX-NEXT: retq 6282entry: 6283 %0 = bitcast <2 x i64> %__a to <4 x i32> 6284 %1 = bitcast <2 x i64> %__b to <4 x i32> 6285 %2 = icmp sgt <4 x i32> %0, %1 6286 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6287 %4 = bitcast <32 x i1> %3 to i32 6288 ret i32 %4 6289} 6290 6291define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6292; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: 6293; VLX: # %bb.0: # %entry 6294; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6295; VLX-NEXT: kmovd %k0, %eax 6296; VLX-NEXT: retq 6297; 6298; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: 6299; NoVLX: # %bb.0: # %entry 6300; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6301; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6302; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6303; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6304; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6305; NoVLX-NEXT: kmovw %k0, %eax 6306; NoVLX-NEXT: vzeroupper 6307; NoVLX-NEXT: retq 6308entry: 6309 %0 = bitcast <2 x i64> %__a to <4 x i32> 6310 %load = load <2 x i64>, ptr %__b 6311 %1 = bitcast <2 x i64> %load to <4 x i32> 6312 %2 = icmp sgt <4 x i32> %0, %1 6313 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6314 %4 = bitcast <32 x i1> %3 to i32 6315 ret i32 %4 6316} 6317 6318define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6319; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: 6320; VLX: # %bb.0: # %entry 6321; VLX-NEXT: kmovd %edi, %k1 6322; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6323; VLX-NEXT: kmovd %k0, %eax 6324; VLX-NEXT: retq 6325; 6326; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: 6327; NoVLX: # %bb.0: # %entry 6328; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6329; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6330; NoVLX-NEXT: kmovw %edi, %k1 6331; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6332; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6333; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6334; NoVLX-NEXT: kmovw %k0, %eax 6335; NoVLX-NEXT: vzeroupper 6336; NoVLX-NEXT: retq 6337entry: 6338 %0 = bitcast <2 x i64> %__a to <4 x i32> 6339 %1 = bitcast <2 x i64> %__b to <4 x i32> 6340 %2 = icmp sgt <4 x i32> %0, %1 6341 %3 = bitcast i8 %__u to <8 x i1> 6342 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6343 %4 = and <4 x i1> %2, %extract.i 6344 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6345 %6 = bitcast <32 x i1> %5 to i32 6346 ret i32 %6 6347} 6348 6349define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6350; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: 6351; VLX: # %bb.0: # %entry 6352; VLX-NEXT: kmovd %edi, %k1 6353; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6354; VLX-NEXT: kmovd %k0, %eax 6355; VLX-NEXT: retq 6356; 6357; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: 6358; NoVLX: # %bb.0: # %entry 6359; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6360; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6361; NoVLX-NEXT: kmovw %edi, %k1 6362; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6363; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6364; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6365; NoVLX-NEXT: kmovw %k0, %eax 6366; NoVLX-NEXT: vzeroupper 6367; NoVLX-NEXT: retq 6368entry: 6369 %0 = bitcast <2 x i64> %__a to <4 x i32> 6370 %load = load <2 x i64>, ptr %__b 6371 %1 = bitcast <2 x i64> %load to <4 x i32> 6372 %2 = icmp sgt <4 x i32> %0, %1 6373 %3 = bitcast i8 %__u to <8 x i1> 6374 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6375 %4 = and <4 x i1> %2, %extract.i 6376 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6377 %6 = bitcast <32 x i1> %5 to i32 6378 ret i32 %6 6379} 6380 6381 6382define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6383; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6384; VLX: # %bb.0: # %entry 6385; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6386; VLX-NEXT: kmovd %k0, %eax 6387; VLX-NEXT: retq 6388; 6389; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6390; NoVLX: # %bb.0: # %entry 6391; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6392; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6393; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6394; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6395; NoVLX-NEXT: kmovw %k0, %eax 6396; NoVLX-NEXT: vzeroupper 6397; NoVLX-NEXT: retq 6398entry: 6399 %0 = bitcast <2 x i64> %__a to <4 x i32> 6400 %load = load i32, ptr %__b 6401 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6402 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6403 %2 = icmp sgt <4 x i32> %0, %1 6404 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6405 %4 = bitcast <32 x i1> %3 to i32 6406 ret i32 %4 6407} 6408 6409define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6410; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6411; VLX: # %bb.0: # %entry 6412; VLX-NEXT: kmovd %edi, %k1 6413; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6414; VLX-NEXT: kmovd %k0, %eax 6415; VLX-NEXT: retq 6416; 6417; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: 6418; NoVLX: # %bb.0: # %entry 6419; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6420; NoVLX-NEXT: kmovw %edi, %k1 6421; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6422; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6423; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6424; NoVLX-NEXT: kmovw %k0, %eax 6425; NoVLX-NEXT: vzeroupper 6426; NoVLX-NEXT: retq 6427entry: 6428 %0 = bitcast <2 x i64> %__a to <4 x i32> 6429 %load = load i32, ptr %__b 6430 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6431 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6432 %2 = icmp sgt <4 x i32> %0, %1 6433 %3 = bitcast i8 %__u to <8 x i1> 6434 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6435 %4 = and <4 x i1> %extract.i, %2 6436 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6437 %6 = bitcast <32 x i1> %5 to i32 6438 ret i32 %6 6439} 6440 6441 6442define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6443; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: 6444; VLX: # %bb.0: # %entry 6445; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 6446; VLX-NEXT: kmovq %k0, %rax 6447; VLX-NEXT: retq 6448; 6449; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: 6450; NoVLX: # %bb.0: # %entry 6451; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6452; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6453; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6454; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6455; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6456; NoVLX-NEXT: kmovw %k0, %eax 6457; NoVLX-NEXT: vzeroupper 6458; NoVLX-NEXT: retq 6459entry: 6460 %0 = bitcast <2 x i64> %__a to <4 x i32> 6461 %1 = bitcast <2 x i64> %__b to <4 x i32> 6462 %2 = icmp sgt <4 x i32> %0, %1 6463 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6464 %4 = bitcast <64 x i1> %3 to i64 6465 ret i64 %4 6466} 6467 6468define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6469; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: 6470; VLX: # %bb.0: # %entry 6471; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 6472; VLX-NEXT: kmovq %k0, %rax 6473; VLX-NEXT: retq 6474; 6475; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: 6476; NoVLX: # %bb.0: # %entry 6477; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6478; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 6479; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6480; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6481; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6482; NoVLX-NEXT: kmovw %k0, %eax 6483; NoVLX-NEXT: vzeroupper 6484; NoVLX-NEXT: retq 6485entry: 6486 %0 = bitcast <2 x i64> %__a to <4 x i32> 6487 %load = load <2 x i64>, ptr %__b 6488 %1 = bitcast <2 x i64> %load to <4 x i32> 6489 %2 = icmp sgt <4 x i32> %0, %1 6490 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6491 %4 = bitcast <64 x i1> %3 to i64 6492 ret i64 %4 6493} 6494 6495define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 6496; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: 6497; VLX: # %bb.0: # %entry 6498; VLX-NEXT: kmovd %edi, %k1 6499; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} 6500; VLX-NEXT: kmovq %k0, %rax 6501; VLX-NEXT: retq 6502; 6503; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: 6504; NoVLX: # %bb.0: # %entry 6505; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 6506; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6507; NoVLX-NEXT: kmovw %edi, %k1 6508; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6509; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6510; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6511; NoVLX-NEXT: kmovw %k0, %eax 6512; NoVLX-NEXT: vzeroupper 6513; NoVLX-NEXT: retq 6514entry: 6515 %0 = bitcast <2 x i64> %__a to <4 x i32> 6516 %1 = bitcast <2 x i64> %__b to <4 x i32> 6517 %2 = icmp sgt <4 x i32> %0, %1 6518 %3 = bitcast i8 %__u to <8 x i1> 6519 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6520 %4 = and <4 x i1> %2, %extract.i 6521 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6522 %6 = bitcast <64 x i1> %5 to i64 6523 ret i64 %6 6524} 6525 6526define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6527; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: 6528; VLX: # %bb.0: # %entry 6529; VLX-NEXT: kmovd %edi, %k1 6530; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} 6531; VLX-NEXT: kmovq %k0, %rax 6532; VLX-NEXT: retq 6533; 6534; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: 6535; NoVLX: # %bb.0: # %entry 6536; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6537; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 6538; NoVLX-NEXT: kmovw %edi, %k1 6539; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6540; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6541; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6542; NoVLX-NEXT: kmovw %k0, %eax 6543; NoVLX-NEXT: vzeroupper 6544; NoVLX-NEXT: retq 6545entry: 6546 %0 = bitcast <2 x i64> %__a to <4 x i32> 6547 %load = load <2 x i64>, ptr %__b 6548 %1 = bitcast <2 x i64> %load to <4 x i32> 6549 %2 = icmp sgt <4 x i32> %0, %1 6550 %3 = bitcast i8 %__u to <8 x i1> 6551 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6552 %4 = and <4 x i1> %2, %extract.i 6553 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6554 %6 = bitcast <64 x i1> %5 to i64 6555 ret i64 %6 6556} 6557 6558 6559define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 6560; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6561; VLX: # %bb.0: # %entry 6562; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 6563; VLX-NEXT: kmovq %k0, %rax 6564; VLX-NEXT: retq 6565; 6566; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6567; NoVLX: # %bb.0: # %entry 6568; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6569; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6570; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6571; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6572; NoVLX-NEXT: kmovw %k0, %eax 6573; NoVLX-NEXT: vzeroupper 6574; NoVLX-NEXT: retq 6575entry: 6576 %0 = bitcast <2 x i64> %__a to <4 x i32> 6577 %load = load i32, ptr %__b 6578 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6579 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6580 %2 = icmp sgt <4 x i32> %0, %1 6581 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6582 %4 = bitcast <64 x i1> %3 to i64 6583 ret i64 %4 6584} 6585 6586define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 6587; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6588; VLX: # %bb.0: # %entry 6589; VLX-NEXT: kmovd %edi, %k1 6590; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} 6591; VLX-NEXT: kmovq %k0, %rax 6592; VLX-NEXT: retq 6593; 6594; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: 6595; NoVLX: # %bb.0: # %entry 6596; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 6597; NoVLX-NEXT: kmovw %edi, %k1 6598; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6599; NoVLX-NEXT: kshiftlw $12, %k0, %k0 6600; NoVLX-NEXT: kshiftrw $12, %k0, %k0 6601; NoVLX-NEXT: kmovw %k0, %eax 6602; NoVLX-NEXT: vzeroupper 6603; NoVLX-NEXT: retq 6604entry: 6605 %0 = bitcast <2 x i64> %__a to <4 x i32> 6606 %load = load i32, ptr %__b 6607 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 6608 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 6609 %2 = icmp sgt <4 x i32> %0, %1 6610 %3 = bitcast i8 %__u to <8 x i1> 6611 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6612 %4 = and <4 x i1> %extract.i, %2 6613 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 6614 %6 = bitcast <64 x i1> %5 to i64 6615 ret i64 %6 6616} 6617 6618 6619define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6620; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: 6621; VLX: # %bb.0: # %entry 6622; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 6623; VLX-NEXT: kmovd %k0, %eax 6624; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6625; VLX-NEXT: vzeroupper 6626; VLX-NEXT: retq 6627; 6628; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: 6629; NoVLX: # %bb.0: # %entry 6630; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6631; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6632; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6633; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6634; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6635; NoVLX-NEXT: kmovw %k0, %eax 6636; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6637; NoVLX-NEXT: vzeroupper 6638; NoVLX-NEXT: retq 6639entry: 6640 %0 = bitcast <4 x i64> %__a to <8 x i32> 6641 %1 = bitcast <4 x i64> %__b to <8 x i32> 6642 %2 = icmp sgt <8 x i32> %0, %1 6643 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6644 %4 = bitcast <16 x i1> %3 to i16 6645 ret i16 %4 6646} 6647 6648define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 6649; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: 6650; VLX: # %bb.0: # %entry 6651; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 6652; VLX-NEXT: kmovd %k0, %eax 6653; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6654; VLX-NEXT: vzeroupper 6655; VLX-NEXT: retq 6656; 6657; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: 6658; NoVLX: # %bb.0: # %entry 6659; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6660; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 6661; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6662; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6663; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6664; NoVLX-NEXT: kmovw %k0, %eax 6665; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6666; NoVLX-NEXT: vzeroupper 6667; NoVLX-NEXT: retq 6668entry: 6669 %0 = bitcast <4 x i64> %__a to <8 x i32> 6670 %load = load <4 x i64>, ptr %__b 6671 %1 = bitcast <4 x i64> %load to <8 x i32> 6672 %2 = icmp sgt <8 x i32> %0, %1 6673 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6674 %4 = bitcast <16 x i1> %3 to i16 6675 ret i16 %4 6676} 6677 6678define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6679; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: 6680; VLX: # %bb.0: # %entry 6681; VLX-NEXT: kmovd %edi, %k1 6682; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 6683; VLX-NEXT: kmovd %k0, %eax 6684; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6685; VLX-NEXT: vzeroupper 6686; VLX-NEXT: retq 6687; 6688; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: 6689; NoVLX: # %bb.0: # %entry 6690; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6691; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6692; NoVLX-NEXT: kmovw %edi, %k1 6693; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6694; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6695; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6696; NoVLX-NEXT: kmovw %k0, %eax 6697; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6698; NoVLX-NEXT: vzeroupper 6699; NoVLX-NEXT: retq 6700entry: 6701 %0 = bitcast <4 x i64> %__a to <8 x i32> 6702 %1 = bitcast <4 x i64> %__b to <8 x i32> 6703 %2 = icmp sgt <8 x i32> %0, %1 6704 %3 = bitcast i8 %__u to <8 x i1> 6705 %4 = and <8 x i1> %2, %3 6706 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6707 %6 = bitcast <16 x i1> %5 to i16 6708 ret i16 %6 6709} 6710 6711define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 6712; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: 6713; VLX: # %bb.0: # %entry 6714; VLX-NEXT: kmovd %edi, %k1 6715; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 6716; VLX-NEXT: kmovd %k0, %eax 6717; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6718; VLX-NEXT: vzeroupper 6719; VLX-NEXT: retq 6720; 6721; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: 6722; NoVLX: # %bb.0: # %entry 6723; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6724; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 6725; NoVLX-NEXT: kmovw %edi, %k1 6726; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6727; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6728; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6729; NoVLX-NEXT: kmovw %k0, %eax 6730; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6731; NoVLX-NEXT: vzeroupper 6732; NoVLX-NEXT: retq 6733entry: 6734 %0 = bitcast <4 x i64> %__a to <8 x i32> 6735 %load = load <4 x i64>, ptr %__b 6736 %1 = bitcast <4 x i64> %load to <8 x i32> 6737 %2 = icmp sgt <8 x i32> %0, %1 6738 %3 = bitcast i8 %__u to <8 x i1> 6739 %4 = and <8 x i1> %2, %3 6740 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6741 %6 = bitcast <16 x i1> %5 to i16 6742 ret i16 %6 6743} 6744 6745 6746define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 6747; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6748; VLX: # %bb.0: # %entry 6749; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 6750; VLX-NEXT: kmovd %k0, %eax 6751; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6752; VLX-NEXT: vzeroupper 6753; VLX-NEXT: retq 6754; 6755; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6756; NoVLX: # %bb.0: # %entry 6757; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6758; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6759; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6760; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6761; NoVLX-NEXT: kmovw %k0, %eax 6762; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6763; NoVLX-NEXT: vzeroupper 6764; NoVLX-NEXT: retq 6765entry: 6766 %0 = bitcast <4 x i64> %__a to <8 x i32> 6767 %load = load i32, ptr %__b 6768 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6769 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6770 %2 = icmp sgt <8 x i32> %0, %1 6771 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6772 %4 = bitcast <16 x i1> %3 to i16 6773 ret i16 %4 6774} 6775 6776define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 6777; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6778; VLX: # %bb.0: # %entry 6779; VLX-NEXT: kmovd %edi, %k1 6780; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 6781; VLX-NEXT: kmovd %k0, %eax 6782; VLX-NEXT: # kill: def $ax killed $ax killed $eax 6783; VLX-NEXT: vzeroupper 6784; VLX-NEXT: retq 6785; 6786; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: 6787; NoVLX: # %bb.0: # %entry 6788; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6789; NoVLX-NEXT: kmovw %edi, %k1 6790; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6791; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6792; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6793; NoVLX-NEXT: kmovw %k0, %eax 6794; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 6795; NoVLX-NEXT: vzeroupper 6796; NoVLX-NEXT: retq 6797entry: 6798 %0 = bitcast <4 x i64> %__a to <8 x i32> 6799 %load = load i32, ptr %__b 6800 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6801 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6802 %2 = icmp sgt <8 x i32> %0, %1 6803 %3 = bitcast i8 %__u to <8 x i1> 6804 %4 = and <8 x i1> %3, %2 6805 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6806 %6 = bitcast <16 x i1> %5 to i16 6807 ret i16 %6 6808} 6809 6810 6811define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6812; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: 6813; VLX: # %bb.0: # %entry 6814; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 6815; VLX-NEXT: kmovd %k0, %eax 6816; VLX-NEXT: vzeroupper 6817; VLX-NEXT: retq 6818; 6819; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: 6820; NoVLX: # %bb.0: # %entry 6821; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6822; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6823; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6824; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6825; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6826; NoVLX-NEXT: kmovw %k0, %eax 6827; NoVLX-NEXT: vzeroupper 6828; NoVLX-NEXT: retq 6829entry: 6830 %0 = bitcast <4 x i64> %__a to <8 x i32> 6831 %1 = bitcast <4 x i64> %__b to <8 x i32> 6832 %2 = icmp sgt <8 x i32> %0, %1 6833 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6834 %4 = bitcast <32 x i1> %3 to i32 6835 ret i32 %4 6836} 6837 6838define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 6839; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: 6840; VLX: # %bb.0: # %entry 6841; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 6842; VLX-NEXT: kmovd %k0, %eax 6843; VLX-NEXT: vzeroupper 6844; VLX-NEXT: retq 6845; 6846; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: 6847; NoVLX: # %bb.0: # %entry 6848; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6849; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 6850; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 6851; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6852; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6853; NoVLX-NEXT: kmovw %k0, %eax 6854; NoVLX-NEXT: vzeroupper 6855; NoVLX-NEXT: retq 6856entry: 6857 %0 = bitcast <4 x i64> %__a to <8 x i32> 6858 %load = load <4 x i64>, ptr %__b 6859 %1 = bitcast <4 x i64> %load to <8 x i32> 6860 %2 = icmp sgt <8 x i32> %0, %1 6861 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6862 %4 = bitcast <32 x i1> %3 to i32 6863 ret i32 %4 6864} 6865 6866define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6867; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: 6868; VLX: # %bb.0: # %entry 6869; VLX-NEXT: kmovd %edi, %k1 6870; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 6871; VLX-NEXT: kmovd %k0, %eax 6872; VLX-NEXT: vzeroupper 6873; VLX-NEXT: retq 6874; 6875; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: 6876; NoVLX: # %bb.0: # %entry 6877; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 6878; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6879; NoVLX-NEXT: kmovw %edi, %k1 6880; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6881; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6882; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6883; NoVLX-NEXT: kmovw %k0, %eax 6884; NoVLX-NEXT: vzeroupper 6885; NoVLX-NEXT: retq 6886entry: 6887 %0 = bitcast <4 x i64> %__a to <8 x i32> 6888 %1 = bitcast <4 x i64> %__b to <8 x i32> 6889 %2 = icmp sgt <8 x i32> %0, %1 6890 %3 = bitcast i8 %__u to <8 x i1> 6891 %4 = and <8 x i1> %2, %3 6892 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6893 %6 = bitcast <32 x i1> %5 to i32 6894 ret i32 %6 6895} 6896 6897define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 6898; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: 6899; VLX: # %bb.0: # %entry 6900; VLX-NEXT: kmovd %edi, %k1 6901; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 6902; VLX-NEXT: kmovd %k0, %eax 6903; VLX-NEXT: vzeroupper 6904; VLX-NEXT: retq 6905; 6906; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: 6907; NoVLX: # %bb.0: # %entry 6908; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6909; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 6910; NoVLX-NEXT: kmovw %edi, %k1 6911; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 6912; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6913; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6914; NoVLX-NEXT: kmovw %k0, %eax 6915; NoVLX-NEXT: vzeroupper 6916; NoVLX-NEXT: retq 6917entry: 6918 %0 = bitcast <4 x i64> %__a to <8 x i32> 6919 %load = load <4 x i64>, ptr %__b 6920 %1 = bitcast <4 x i64> %load to <8 x i32> 6921 %2 = icmp sgt <8 x i32> %0, %1 6922 %3 = bitcast i8 %__u to <8 x i1> 6923 %4 = and <8 x i1> %2, %3 6924 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6925 %6 = bitcast <32 x i1> %5 to i32 6926 ret i32 %6 6927} 6928 6929 6930define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 6931; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 6932; VLX: # %bb.0: # %entry 6933; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 6934; VLX-NEXT: kmovd %k0, %eax 6935; VLX-NEXT: vzeroupper 6936; VLX-NEXT: retq 6937; 6938; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 6939; NoVLX: # %bb.0: # %entry 6940; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6941; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 6942; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6943; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6944; NoVLX-NEXT: kmovw %k0, %eax 6945; NoVLX-NEXT: vzeroupper 6946; NoVLX-NEXT: retq 6947entry: 6948 %0 = bitcast <4 x i64> %__a to <8 x i32> 6949 %load = load i32, ptr %__b 6950 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6951 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6952 %2 = icmp sgt <8 x i32> %0, %1 6953 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6954 %4 = bitcast <32 x i1> %3 to i32 6955 ret i32 %4 6956} 6957 6958define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 6959; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 6960; VLX: # %bb.0: # %entry 6961; VLX-NEXT: kmovd %edi, %k1 6962; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 6963; VLX-NEXT: kmovd %k0, %eax 6964; VLX-NEXT: vzeroupper 6965; VLX-NEXT: retq 6966; 6967; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: 6968; NoVLX: # %bb.0: # %entry 6969; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 6970; NoVLX-NEXT: kmovw %edi, %k1 6971; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 6972; NoVLX-NEXT: kshiftlw $8, %k0, %k0 6973; NoVLX-NEXT: kshiftrw $8, %k0, %k0 6974; NoVLX-NEXT: kmovw %k0, %eax 6975; NoVLX-NEXT: vzeroupper 6976; NoVLX-NEXT: retq 6977entry: 6978 %0 = bitcast <4 x i64> %__a to <8 x i32> 6979 %load = load i32, ptr %__b 6980 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 6981 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 6982 %2 = icmp sgt <8 x i32> %0, %1 6983 %3 = bitcast i8 %__u to <8 x i1> 6984 %4 = and <8 x i1> %3, %2 6985 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6986 %6 = bitcast <32 x i1> %5 to i32 6987 ret i32 %6 6988} 6989 6990 6991define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 6992; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: 6993; VLX: # %bb.0: # %entry 6994; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 6995; VLX-NEXT: kmovq %k0, %rax 6996; VLX-NEXT: vzeroupper 6997; VLX-NEXT: retq 6998; 6999; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: 7000; NoVLX: # %bb.0: # %entry 7001; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 7002; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7003; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7004; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7005; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7006; NoVLX-NEXT: kmovw %k0, %eax 7007; NoVLX-NEXT: vzeroupper 7008; NoVLX-NEXT: retq 7009entry: 7010 %0 = bitcast <4 x i64> %__a to <8 x i32> 7011 %1 = bitcast <4 x i64> %__b to <8 x i32> 7012 %2 = icmp sgt <8 x i32> %0, %1 7013 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7014 %4 = bitcast <64 x i1> %3 to i64 7015 ret i64 %4 7016} 7017 7018define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 7019; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: 7020; VLX: # %bb.0: # %entry 7021; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 7022; VLX-NEXT: kmovq %k0, %rax 7023; VLX-NEXT: vzeroupper 7024; VLX-NEXT: retq 7025; 7026; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: 7027; NoVLX: # %bb.0: # %entry 7028; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7029; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 7030; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7031; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7032; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7033; NoVLX-NEXT: kmovw %k0, %eax 7034; NoVLX-NEXT: vzeroupper 7035; NoVLX-NEXT: retq 7036entry: 7037 %0 = bitcast <4 x i64> %__a to <8 x i32> 7038 %load = load <4 x i64>, ptr %__b 7039 %1 = bitcast <4 x i64> %load to <8 x i32> 7040 %2 = icmp sgt <8 x i32> %0, %1 7041 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7042 %4 = bitcast <64 x i1> %3 to i64 7043 ret i64 %4 7044} 7045 7046define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 7047; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: 7048; VLX: # %bb.0: # %entry 7049; VLX-NEXT: kmovd %edi, %k1 7050; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} 7051; VLX-NEXT: kmovq %k0, %rax 7052; VLX-NEXT: vzeroupper 7053; VLX-NEXT: retq 7054; 7055; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: 7056; NoVLX: # %bb.0: # %entry 7057; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 7058; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7059; NoVLX-NEXT: kmovw %edi, %k1 7060; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7061; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7062; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7063; NoVLX-NEXT: kmovw %k0, %eax 7064; NoVLX-NEXT: vzeroupper 7065; NoVLX-NEXT: retq 7066entry: 7067 %0 = bitcast <4 x i64> %__a to <8 x i32> 7068 %1 = bitcast <4 x i64> %__b to <8 x i32> 7069 %2 = icmp sgt <8 x i32> %0, %1 7070 %3 = bitcast i8 %__u to <8 x i1> 7071 %4 = and <8 x i1> %2, %3 7072 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7073 %6 = bitcast <64 x i1> %5 to i64 7074 ret i64 %6 7075} 7076 7077define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 7078; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: 7079; VLX: # %bb.0: # %entry 7080; VLX-NEXT: kmovd %edi, %k1 7081; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} 7082; VLX-NEXT: kmovq %k0, %rax 7083; VLX-NEXT: vzeroupper 7084; VLX-NEXT: retq 7085; 7086; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: 7087; NoVLX: # %bb.0: # %entry 7088; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7089; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 7090; NoVLX-NEXT: kmovw %edi, %k1 7091; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7092; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7093; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7094; NoVLX-NEXT: kmovw %k0, %eax 7095; NoVLX-NEXT: vzeroupper 7096; NoVLX-NEXT: retq 7097entry: 7098 %0 = bitcast <4 x i64> %__a to <8 x i32> 7099 %load = load <4 x i64>, ptr %__b 7100 %1 = bitcast <4 x i64> %load to <8 x i32> 7101 %2 = icmp sgt <8 x i32> %0, %1 7102 %3 = bitcast i8 %__u to <8 x i1> 7103 %4 = and <8 x i1> %2, %3 7104 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7105 %6 = bitcast <64 x i1> %5 to i64 7106 ret i64 %6 7107} 7108 7109 7110define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 7111; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7112; VLX: # %bb.0: # %entry 7113; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 7114; VLX-NEXT: kmovq %k0, %rax 7115; VLX-NEXT: vzeroupper 7116; VLX-NEXT: retq 7117; 7118; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7119; NoVLX: # %bb.0: # %entry 7120; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7121; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7122; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7123; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7124; NoVLX-NEXT: kmovw %k0, %eax 7125; NoVLX-NEXT: vzeroupper 7126; NoVLX-NEXT: retq 7127entry: 7128 %0 = bitcast <4 x i64> %__a to <8 x i32> 7129 %load = load i32, ptr %__b 7130 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7131 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7132 %2 = icmp sgt <8 x i32> %0, %1 7133 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7134 %4 = bitcast <64 x i1> %3 to i64 7135 ret i64 %4 7136} 7137 7138define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 7139; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7140; VLX: # %bb.0: # %entry 7141; VLX-NEXT: kmovd %edi, %k1 7142; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} 7143; VLX-NEXT: kmovq %k0, %rax 7144; VLX-NEXT: vzeroupper 7145; VLX-NEXT: retq 7146; 7147; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: 7148; NoVLX: # %bb.0: # %entry 7149; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 7150; NoVLX-NEXT: kmovw %edi, %k1 7151; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 7152; NoVLX-NEXT: kshiftlw $8, %k0, %k0 7153; NoVLX-NEXT: kshiftrw $8, %k0, %k0 7154; NoVLX-NEXT: kmovw %k0, %eax 7155; NoVLX-NEXT: vzeroupper 7156; NoVLX-NEXT: retq 7157entry: 7158 %0 = bitcast <4 x i64> %__a to <8 x i32> 7159 %load = load i32, ptr %__b 7160 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 7161 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7162 %2 = icmp sgt <8 x i32> %0, %1 7163 %3 = bitcast i8 %__u to <8 x i1> 7164 %4 = and <8 x i1> %3, %2 7165 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7166 %6 = bitcast <64 x i1> %5 to i64 7167 ret i64 %6 7168} 7169 7170 7171define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7172; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: 7173; VLX: # %bb.0: # %entry 7174; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7175; VLX-NEXT: kmovd %k0, %eax 7176; VLX-NEXT: vzeroupper 7177; VLX-NEXT: retq 7178; 7179; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: 7180; NoVLX: # %bb.0: # %entry 7181; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7182; NoVLX-NEXT: kmovw %k0, %eax 7183; NoVLX-NEXT: vzeroupper 7184; NoVLX-NEXT: retq 7185entry: 7186 %0 = bitcast <8 x i64> %__a to <16 x i32> 7187 %1 = bitcast <8 x i64> %__b to <16 x i32> 7188 %2 = icmp sgt <16 x i32> %0, %1 7189 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7190 %4 = bitcast <32 x i1> %3 to i32 7191 ret i32 %4 7192} 7193 7194define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 7195; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: 7196; VLX: # %bb.0: # %entry 7197; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7198; VLX-NEXT: kmovd %k0, %eax 7199; VLX-NEXT: vzeroupper 7200; VLX-NEXT: retq 7201; 7202; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: 7203; NoVLX: # %bb.0: # %entry 7204; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7205; NoVLX-NEXT: kmovw %k0, %eax 7206; NoVLX-NEXT: vzeroupper 7207; NoVLX-NEXT: retq 7208entry: 7209 %0 = bitcast <8 x i64> %__a to <16 x i32> 7210 %load = load <8 x i64>, ptr %__b 7211 %1 = bitcast <8 x i64> %load to <16 x i32> 7212 %2 = icmp sgt <16 x i32> %0, %1 7213 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7214 %4 = bitcast <32 x i1> %3 to i32 7215 ret i32 %4 7216} 7217 7218define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7219; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: 7220; VLX: # %bb.0: # %entry 7221; VLX-NEXT: kmovd %edi, %k1 7222; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7223; VLX-NEXT: kmovd %k0, %eax 7224; VLX-NEXT: vzeroupper 7225; VLX-NEXT: retq 7226; 7227; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: 7228; NoVLX: # %bb.0: # %entry 7229; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7230; NoVLX-NEXT: kmovw %k0, %eax 7231; NoVLX-NEXT: andl %edi, %eax 7232; NoVLX-NEXT: vzeroupper 7233; NoVLX-NEXT: retq 7234entry: 7235 %0 = bitcast <8 x i64> %__a to <16 x i32> 7236 %1 = bitcast <8 x i64> %__b to <16 x i32> 7237 %2 = icmp sgt <16 x i32> %0, %1 7238 %3 = bitcast i16 %__u to <16 x i1> 7239 %4 = and <16 x i1> %2, %3 7240 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7241 %6 = bitcast <32 x i1> %5 to i32 7242 ret i32 %6 7243} 7244 7245define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 7246; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: 7247; VLX: # %bb.0: # %entry 7248; VLX-NEXT: kmovd %edi, %k1 7249; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} 7250; VLX-NEXT: kmovd %k0, %eax 7251; VLX-NEXT: vzeroupper 7252; VLX-NEXT: retq 7253; 7254; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: 7255; NoVLX: # %bb.0: # %entry 7256; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 7257; NoVLX-NEXT: kmovw %k0, %eax 7258; NoVLX-NEXT: andl %edi, %eax 7259; NoVLX-NEXT: vzeroupper 7260; NoVLX-NEXT: retq 7261entry: 7262 %0 = bitcast <8 x i64> %__a to <16 x i32> 7263 %load = load <8 x i64>, ptr %__b 7264 %1 = bitcast <8 x i64> %load to <16 x i32> 7265 %2 = icmp sgt <16 x i32> %0, %1 7266 %3 = bitcast i16 %__u to <16 x i1> 7267 %4 = and <16 x i1> %2, %3 7268 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7269 %6 = bitcast <32 x i1> %5 to i32 7270 ret i32 %6 7271} 7272 7273 7274define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 7275; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7276; VLX: # %bb.0: # %entry 7277; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7278; VLX-NEXT: kmovd %k0, %eax 7279; VLX-NEXT: vzeroupper 7280; VLX-NEXT: retq 7281; 7282; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7283; NoVLX: # %bb.0: # %entry 7284; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7285; NoVLX-NEXT: kmovw %k0, %eax 7286; NoVLX-NEXT: vzeroupper 7287; NoVLX-NEXT: retq 7288entry: 7289 %0 = bitcast <8 x i64> %__a to <16 x i32> 7290 %load = load i32, ptr %__b 7291 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7292 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7293 %2 = icmp sgt <16 x i32> %0, %1 7294 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7295 %4 = bitcast <32 x i1> %3 to i32 7296 ret i32 %4 7297} 7298 7299define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 7300; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7301; VLX: # %bb.0: # %entry 7302; VLX-NEXT: kmovd %edi, %k1 7303; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 7304; VLX-NEXT: kmovd %k0, %eax 7305; VLX-NEXT: vzeroupper 7306; VLX-NEXT: retq 7307; 7308; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: 7309; NoVLX: # %bb.0: # %entry 7310; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 7311; NoVLX-NEXT: kmovw %k0, %eax 7312; NoVLX-NEXT: andl %edi, %eax 7313; NoVLX-NEXT: vzeroupper 7314; NoVLX-NEXT: retq 7315entry: 7316 %0 = bitcast <8 x i64> %__a to <16 x i32> 7317 %load = load i32, ptr %__b 7318 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7319 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7320 %2 = icmp sgt <16 x i32> %0, %1 7321 %3 = bitcast i16 %__u to <16 x i1> 7322 %4 = and <16 x i1> %3, %2 7323 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 7324 %6 = bitcast <32 x i1> %5 to i32 7325 ret i32 %6 7326} 7327 7328 7329define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7330; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: 7331; VLX: # %bb.0: # %entry 7332; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7333; VLX-NEXT: kmovq %k0, %rax 7334; VLX-NEXT: vzeroupper 7335; VLX-NEXT: retq 7336; 7337; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: 7338; NoVLX: # %bb.0: # %entry 7339; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7340; NoVLX-NEXT: kmovw %k0, %eax 7341; NoVLX-NEXT: vzeroupper 7342; NoVLX-NEXT: retq 7343entry: 7344 %0 = bitcast <8 x i64> %__a to <16 x i32> 7345 %1 = bitcast <8 x i64> %__b to <16 x i32> 7346 %2 = icmp sgt <16 x i32> %0, %1 7347 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7348 %4 = bitcast <64 x i1> %3 to i64 7349 ret i64 %4 7350} 7351 7352define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 7353; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: 7354; VLX: # %bb.0: # %entry 7355; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7356; VLX-NEXT: kmovq %k0, %rax 7357; VLX-NEXT: vzeroupper 7358; VLX-NEXT: retq 7359; 7360; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: 7361; NoVLX: # %bb.0: # %entry 7362; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 7363; NoVLX-NEXT: kmovw %k0, %eax 7364; NoVLX-NEXT: vzeroupper 7365; NoVLX-NEXT: retq 7366entry: 7367 %0 = bitcast <8 x i64> %__a to <16 x i32> 7368 %load = load <8 x i64>, ptr %__b 7369 %1 = bitcast <8 x i64> %load to <16 x i32> 7370 %2 = icmp sgt <16 x i32> %0, %1 7371 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7372 %4 = bitcast <64 x i1> %3 to i64 7373 ret i64 %4 7374} 7375 7376define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 7377; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: 7378; VLX: # %bb.0: # %entry 7379; VLX-NEXT: kmovd %edi, %k1 7380; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} 7381; VLX-NEXT: kmovq %k0, %rax 7382; VLX-NEXT: vzeroupper 7383; VLX-NEXT: retq 7384; 7385; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: 7386; NoVLX: # %bb.0: # %entry 7387; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 7388; NoVLX-NEXT: kmovw %k0, %eax 7389; NoVLX-NEXT: andl %edi, %eax 7390; NoVLX-NEXT: vzeroupper 7391; NoVLX-NEXT: retq 7392entry: 7393 %0 = bitcast <8 x i64> %__a to <16 x i32> 7394 %1 = bitcast <8 x i64> %__b to <16 x i32> 7395 %2 = icmp sgt <16 x i32> %0, %1 7396 %3 = bitcast i16 %__u to <16 x i1> 7397 %4 = and <16 x i1> %2, %3 7398 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7399 %6 = bitcast <64 x i1> %5 to i64 7400 ret i64 %6 7401} 7402 7403define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 7404; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: 7405; VLX: # %bb.0: # %entry 7406; VLX-NEXT: kmovd %edi, %k1 7407; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} 7408; VLX-NEXT: kmovq %k0, %rax 7409; VLX-NEXT: vzeroupper 7410; VLX-NEXT: retq 7411; 7412; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: 7413; NoVLX: # %bb.0: # %entry 7414; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 7415; NoVLX-NEXT: kmovw %k0, %eax 7416; NoVLX-NEXT: andl %edi, %eax 7417; NoVLX-NEXT: vzeroupper 7418; NoVLX-NEXT: retq 7419entry: 7420 %0 = bitcast <8 x i64> %__a to <16 x i32> 7421 %load = load <8 x i64>, ptr %__b 7422 %1 = bitcast <8 x i64> %load to <16 x i32> 7423 %2 = icmp sgt <16 x i32> %0, %1 7424 %3 = bitcast i16 %__u to <16 x i1> 7425 %4 = and <16 x i1> %2, %3 7426 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7427 %6 = bitcast <64 x i1> %5 to i64 7428 ret i64 %6 7429} 7430 7431 7432define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 7433; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7434; VLX: # %bb.0: # %entry 7435; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7436; VLX-NEXT: kmovq %k0, %rax 7437; VLX-NEXT: vzeroupper 7438; VLX-NEXT: retq 7439; 7440; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7441; NoVLX: # %bb.0: # %entry 7442; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 7443; NoVLX-NEXT: kmovw %k0, %eax 7444; NoVLX-NEXT: vzeroupper 7445; NoVLX-NEXT: retq 7446entry: 7447 %0 = bitcast <8 x i64> %__a to <16 x i32> 7448 %load = load i32, ptr %__b 7449 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7450 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7451 %2 = icmp sgt <16 x i32> %0, %1 7452 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7453 %4 = bitcast <64 x i1> %3 to i64 7454 ret i64 %4 7455} 7456 7457define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 7458; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7459; VLX: # %bb.0: # %entry 7460; VLX-NEXT: kmovd %edi, %k1 7461; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} 7462; VLX-NEXT: kmovq %k0, %rax 7463; VLX-NEXT: vzeroupper 7464; VLX-NEXT: retq 7465; 7466; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: 7467; NoVLX: # %bb.0: # %entry 7468; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 7469; NoVLX-NEXT: kmovw %k0, %eax 7470; NoVLX-NEXT: andl %edi, %eax 7471; NoVLX-NEXT: vzeroupper 7472; NoVLX-NEXT: retq 7473entry: 7474 %0 = bitcast <8 x i64> %__a to <16 x i32> 7475 %load = load i32, ptr %__b 7476 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 7477 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 7478 %2 = icmp sgt <16 x i32> %0, %1 7479 %3 = bitcast i16 %__u to <16 x i1> 7480 %4 = and <16 x i1> %3, %2 7481 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 7482 %6 = bitcast <64 x i1> %5 to i64 7483 ret i64 %6 7484} 7485 7486 7487define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7488; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: 7489; VLX: # %bb.0: # %entry 7490; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7491; VLX-NEXT: kmovb %k0, %eax 7492; VLX-NEXT: retq 7493; 7494; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: 7495; NoVLX: # %bb.0: # %entry 7496; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7497; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7498; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7499; NoVLX-NEXT: kmovw %k0, %eax 7500; NoVLX-NEXT: andl $3, %eax 7501; NoVLX-NEXT: vzeroupper 7502; NoVLX-NEXT: retq 7503entry: 7504 %0 = bitcast <2 x i64> %__a to <2 x i64> 7505 %1 = bitcast <2 x i64> %__b to <2 x i64> 7506 %2 = icmp sgt <2 x i64> %0, %1 7507 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7508 %4 = bitcast <4 x i1> %3 to i4 7509 ret i4 %4 7510} 7511 7512define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7513; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: 7514; VLX: # %bb.0: # %entry 7515; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7516; VLX-NEXT: kmovb %k0, %eax 7517; VLX-NEXT: retq 7518; 7519; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: 7520; NoVLX: # %bb.0: # %entry 7521; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7522; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7523; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7524; NoVLX-NEXT: kmovw %k0, %eax 7525; NoVLX-NEXT: andl $3, %eax 7526; NoVLX-NEXT: vzeroupper 7527; NoVLX-NEXT: retq 7528entry: 7529 %0 = bitcast <2 x i64> %__a to <2 x i64> 7530 %load = load <2 x i64>, ptr %__b 7531 %1 = bitcast <2 x i64> %load to <2 x i64> 7532 %2 = icmp sgt <2 x i64> %0, %1 7533 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7534 %4 = bitcast <4 x i1> %3 to i4 7535 ret i4 %4 7536} 7537 7538define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7539; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: 7540; VLX: # %bb.0: # %entry 7541; VLX-NEXT: kmovd %edi, %k1 7542; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 7543; VLX-NEXT: kmovb %k0, %eax 7544; VLX-NEXT: retq 7545; 7546; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: 7547; NoVLX: # %bb.0: # %entry 7548; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7549; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7550; NoVLX-NEXT: kmovw %edi, %k1 7551; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7552; NoVLX-NEXT: kmovw %k0, %eax 7553; NoVLX-NEXT: andl $3, %eax 7554; NoVLX-NEXT: vzeroupper 7555; NoVLX-NEXT: retq 7556entry: 7557 %0 = bitcast <2 x i64> %__a to <2 x i64> 7558 %1 = bitcast <2 x i64> %__b to <2 x i64> 7559 %2 = icmp sgt <2 x i64> %0, %1 7560 %3 = bitcast i8 %__u to <8 x i1> 7561 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7562 %4 = and <2 x i1> %2, %extract.i 7563 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7564 %6 = bitcast <4 x i1> %5 to i4 7565 ret i4 %6 7566} 7567 7568define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 7569; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: 7570; VLX: # %bb.0: # %entry 7571; VLX-NEXT: kmovd %edi, %k1 7572; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 7573; VLX-NEXT: kmovb %k0, %eax 7574; VLX-NEXT: retq 7575; 7576; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: 7577; NoVLX: # %bb.0: # %entry 7578; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7579; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 7580; NoVLX-NEXT: kmovw %edi, %k1 7581; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7582; NoVLX-NEXT: kmovw %k0, %eax 7583; NoVLX-NEXT: andl $3, %eax 7584; NoVLX-NEXT: vzeroupper 7585; NoVLX-NEXT: retq 7586entry: 7587 %0 = bitcast <2 x i64> %__a to <2 x i64> 7588 %load = load <2 x i64>, ptr %__b 7589 %1 = bitcast <2 x i64> %load to <2 x i64> 7590 %2 = icmp sgt <2 x i64> %0, %1 7591 %3 = bitcast i8 %__u to <8 x i1> 7592 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7593 %4 = and <2 x i1> %2, %extract.i 7594 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7595 %6 = bitcast <4 x i1> %5 to i4 7596 ret i4 %6 7597} 7598 7599 7600define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7601; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7602; VLX: # %bb.0: # %entry 7603; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 7604; VLX-NEXT: kmovb %k0, %eax 7605; VLX-NEXT: retq 7606; 7607; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7608; NoVLX: # %bb.0: # %entry 7609; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7610; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 7611; NoVLX-NEXT: kmovw %k0, %eax 7612; NoVLX-NEXT: andl $3, %eax 7613; NoVLX-NEXT: vzeroupper 7614; NoVLX-NEXT: retq 7615entry: 7616 %0 = bitcast <2 x i64> %__a to <2 x i64> 7617 %load = load i64, ptr %__b 7618 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7619 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7620 %2 = icmp sgt <2 x i64> %0, %1 7621 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7622 %4 = bitcast <4 x i1> %3 to i4 7623 ret i4 %4 7624} 7625 7626define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 7627; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7628; VLX: # %bb.0: # %entry 7629; VLX-NEXT: kmovd %edi, %k1 7630; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 7631; VLX-NEXT: kmovb %k0, %eax 7632; VLX-NEXT: retq 7633; 7634; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: 7635; NoVLX: # %bb.0: # %entry 7636; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7637; NoVLX-NEXT: kmovw %edi, %k1 7638; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 7639; NoVLX-NEXT: kmovw %k0, %eax 7640; NoVLX-NEXT: andl $3, %eax 7641; NoVLX-NEXT: vzeroupper 7642; NoVLX-NEXT: retq 7643entry: 7644 %0 = bitcast <2 x i64> %__a to <2 x i64> 7645 %load = load i64, ptr %__b 7646 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7647 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7648 %2 = icmp sgt <2 x i64> %0, %1 7649 %3 = bitcast i8 %__u to <8 x i1> 7650 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7651 %4 = and <2 x i1> %extract.i, %2 7652 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7653 %6 = bitcast <4 x i1> %5 to i4 7654 ret i4 %6 7655} 7656 7657 7658define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7659; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: 7660; VLX: # %bb.0: # %entry 7661; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7662; VLX-NEXT: kmovd %k0, %eax 7663; VLX-NEXT: # kill: def $al killed $al killed $eax 7664; VLX-NEXT: retq 7665; 7666; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: 7667; NoVLX: # %bb.0: # %entry 7668; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7669; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7670; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7671; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7672; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7673; NoVLX-NEXT: kmovw %k0, %eax 7674; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7675; NoVLX-NEXT: vzeroupper 7676; NoVLX-NEXT: retq 7677entry: 7678 %0 = bitcast <2 x i64> %__a to <2 x i64> 7679 %1 = bitcast <2 x i64> %__b to <2 x i64> 7680 %2 = icmp sgt <2 x i64> %0, %1 7681 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7682 %4 = bitcast <8 x i1> %3 to i8 7683 ret i8 %4 7684} 7685 7686define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7687; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: 7688; VLX: # %bb.0: # %entry 7689; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7690; VLX-NEXT: kmovd %k0, %eax 7691; VLX-NEXT: # kill: def $al killed $al killed $eax 7692; VLX-NEXT: retq 7693; 7694; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: 7695; NoVLX: # %bb.0: # %entry 7696; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7697; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7698; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7699; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7700; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7701; NoVLX-NEXT: kmovw %k0, %eax 7702; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7703; NoVLX-NEXT: vzeroupper 7704; NoVLX-NEXT: retq 7705entry: 7706 %0 = bitcast <2 x i64> %__a to <2 x i64> 7707 %load = load <2 x i64>, ptr %__b 7708 %1 = bitcast <2 x i64> %load to <2 x i64> 7709 %2 = icmp sgt <2 x i64> %0, %1 7710 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7711 %4 = bitcast <8 x i1> %3 to i8 7712 ret i8 %4 7713} 7714 7715define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7716; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: 7717; VLX: # %bb.0: # %entry 7718; VLX-NEXT: kmovd %edi, %k1 7719; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 7720; VLX-NEXT: kmovd %k0, %eax 7721; VLX-NEXT: # kill: def $al killed $al killed $eax 7722; VLX-NEXT: retq 7723; 7724; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: 7725; NoVLX: # %bb.0: # %entry 7726; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7727; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7728; NoVLX-NEXT: kmovw %edi, %k1 7729; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7730; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7731; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7732; NoVLX-NEXT: kmovw %k0, %eax 7733; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7734; NoVLX-NEXT: vzeroupper 7735; NoVLX-NEXT: retq 7736entry: 7737 %0 = bitcast <2 x i64> %__a to <2 x i64> 7738 %1 = bitcast <2 x i64> %__b to <2 x i64> 7739 %2 = icmp sgt <2 x i64> %0, %1 7740 %3 = bitcast i8 %__u to <8 x i1> 7741 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7742 %4 = and <2 x i1> %2, %extract.i 7743 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7744 %6 = bitcast <8 x i1> %5 to i8 7745 ret i8 %6 7746} 7747 7748define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 7749; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: 7750; VLX: # %bb.0: # %entry 7751; VLX-NEXT: kmovd %edi, %k1 7752; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 7753; VLX-NEXT: kmovd %k0, %eax 7754; VLX-NEXT: # kill: def $al killed $al killed $eax 7755; VLX-NEXT: retq 7756; 7757; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: 7758; NoVLX: # %bb.0: # %entry 7759; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7760; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 7761; NoVLX-NEXT: kmovw %edi, %k1 7762; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7763; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7764; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7765; NoVLX-NEXT: kmovw %k0, %eax 7766; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7767; NoVLX-NEXT: vzeroupper 7768; NoVLX-NEXT: retq 7769entry: 7770 %0 = bitcast <2 x i64> %__a to <2 x i64> 7771 %load = load <2 x i64>, ptr %__b 7772 %1 = bitcast <2 x i64> %load to <2 x i64> 7773 %2 = icmp sgt <2 x i64> %0, %1 7774 %3 = bitcast i8 %__u to <8 x i1> 7775 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7776 %4 = and <2 x i1> %2, %extract.i 7777 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7778 %6 = bitcast <8 x i1> %5 to i8 7779 ret i8 %6 7780} 7781 7782 7783define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7784; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7785; VLX: # %bb.0: # %entry 7786; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 7787; VLX-NEXT: kmovd %k0, %eax 7788; VLX-NEXT: # kill: def $al killed $al killed $eax 7789; VLX-NEXT: retq 7790; 7791; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7792; NoVLX: # %bb.0: # %entry 7793; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7794; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 7795; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7796; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7797; NoVLX-NEXT: kmovw %k0, %eax 7798; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7799; NoVLX-NEXT: vzeroupper 7800; NoVLX-NEXT: retq 7801entry: 7802 %0 = bitcast <2 x i64> %__a to <2 x i64> 7803 %load = load i64, ptr %__b 7804 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7805 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7806 %2 = icmp sgt <2 x i64> %0, %1 7807 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7808 %4 = bitcast <8 x i1> %3 to i8 7809 ret i8 %4 7810} 7811 7812define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 7813; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7814; VLX: # %bb.0: # %entry 7815; VLX-NEXT: kmovd %edi, %k1 7816; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 7817; VLX-NEXT: kmovd %k0, %eax 7818; VLX-NEXT: # kill: def $al killed $al killed $eax 7819; VLX-NEXT: retq 7820; 7821; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: 7822; NoVLX: # %bb.0: # %entry 7823; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7824; NoVLX-NEXT: kmovw %edi, %k1 7825; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 7826; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7827; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7828; NoVLX-NEXT: kmovw %k0, %eax 7829; NoVLX-NEXT: # kill: def $al killed $al killed $eax 7830; NoVLX-NEXT: vzeroupper 7831; NoVLX-NEXT: retq 7832entry: 7833 %0 = bitcast <2 x i64> %__a to <2 x i64> 7834 %load = load i64, ptr %__b 7835 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7836 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7837 %2 = icmp sgt <2 x i64> %0, %1 7838 %3 = bitcast i8 %__u to <8 x i1> 7839 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7840 %4 = and <2 x i1> %extract.i, %2 7841 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7842 %6 = bitcast <8 x i1> %5 to i8 7843 ret i8 %6 7844} 7845 7846 7847define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7848; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: 7849; VLX: # %bb.0: # %entry 7850; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 7851; VLX-NEXT: kmovd %k0, %eax 7852; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7853; VLX-NEXT: retq 7854; 7855; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: 7856; NoVLX: # %bb.0: # %entry 7857; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7858; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7859; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7860; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7861; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7862; NoVLX-NEXT: kmovw %k0, %eax 7863; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7864; NoVLX-NEXT: vzeroupper 7865; NoVLX-NEXT: retq 7866entry: 7867 %0 = bitcast <2 x i64> %__a to <2 x i64> 7868 %1 = bitcast <2 x i64> %__b to <2 x i64> 7869 %2 = icmp sgt <2 x i64> %0, %1 7870 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7871 %4 = bitcast <16 x i1> %3 to i16 7872 ret i16 %4 7873} 7874 7875define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7876; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: 7877; VLX: # %bb.0: # %entry 7878; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 7879; VLX-NEXT: kmovd %k0, %eax 7880; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7881; VLX-NEXT: retq 7882; 7883; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: 7884; NoVLX: # %bb.0: # %entry 7885; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7886; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 7887; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 7888; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7889; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7890; NoVLX-NEXT: kmovw %k0, %eax 7891; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7892; NoVLX-NEXT: vzeroupper 7893; NoVLX-NEXT: retq 7894entry: 7895 %0 = bitcast <2 x i64> %__a to <2 x i64> 7896 %load = load <2 x i64>, ptr %__b 7897 %1 = bitcast <2 x i64> %load to <2 x i64> 7898 %2 = icmp sgt <2 x i64> %0, %1 7899 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7900 %4 = bitcast <16 x i1> %3 to i16 7901 ret i16 %4 7902} 7903 7904define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 7905; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: 7906; VLX: # %bb.0: # %entry 7907; VLX-NEXT: kmovd %edi, %k1 7908; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 7909; VLX-NEXT: kmovd %k0, %eax 7910; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7911; VLX-NEXT: retq 7912; 7913; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: 7914; NoVLX: # %bb.0: # %entry 7915; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 7916; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7917; NoVLX-NEXT: kmovw %edi, %k1 7918; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7919; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7920; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7921; NoVLX-NEXT: kmovw %k0, %eax 7922; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7923; NoVLX-NEXT: vzeroupper 7924; NoVLX-NEXT: retq 7925entry: 7926 %0 = bitcast <2 x i64> %__a to <2 x i64> 7927 %1 = bitcast <2 x i64> %__b to <2 x i64> 7928 %2 = icmp sgt <2 x i64> %0, %1 7929 %3 = bitcast i8 %__u to <8 x i1> 7930 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7931 %4 = and <2 x i1> %2, %extract.i 7932 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7933 %6 = bitcast <16 x i1> %5 to i16 7934 ret i16 %6 7935} 7936 7937define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 7938; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: 7939; VLX: # %bb.0: # %entry 7940; VLX-NEXT: kmovd %edi, %k1 7941; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 7942; VLX-NEXT: kmovd %k0, %eax 7943; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7944; VLX-NEXT: retq 7945; 7946; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: 7947; NoVLX: # %bb.0: # %entry 7948; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7949; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 7950; NoVLX-NEXT: kmovw %edi, %k1 7951; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 7952; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7953; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7954; NoVLX-NEXT: kmovw %k0, %eax 7955; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7956; NoVLX-NEXT: vzeroupper 7957; NoVLX-NEXT: retq 7958entry: 7959 %0 = bitcast <2 x i64> %__a to <2 x i64> 7960 %load = load <2 x i64>, ptr %__b 7961 %1 = bitcast <2 x i64> %load to <2 x i64> 7962 %2 = icmp sgt <2 x i64> %0, %1 7963 %3 = bitcast i8 %__u to <8 x i1> 7964 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 7965 %4 = and <2 x i1> %2, %extract.i 7966 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7967 %6 = bitcast <16 x i1> %5 to i16 7968 ret i16 %6 7969} 7970 7971 7972define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 7973; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 7974; VLX: # %bb.0: # %entry 7975; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 7976; VLX-NEXT: kmovd %k0, %eax 7977; VLX-NEXT: # kill: def $ax killed $ax killed $eax 7978; VLX-NEXT: retq 7979; 7980; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 7981; NoVLX: # %bb.0: # %entry 7982; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 7983; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 7984; NoVLX-NEXT: kshiftlw $14, %k0, %k0 7985; NoVLX-NEXT: kshiftrw $14, %k0, %k0 7986; NoVLX-NEXT: kmovw %k0, %eax 7987; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 7988; NoVLX-NEXT: vzeroupper 7989; NoVLX-NEXT: retq 7990entry: 7991 %0 = bitcast <2 x i64> %__a to <2 x i64> 7992 %load = load i64, ptr %__b 7993 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 7994 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 7995 %2 = icmp sgt <2 x i64> %0, %1 7996 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 7997 %4 = bitcast <16 x i1> %3 to i16 7998 ret i16 %4 7999} 8000 8001define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 8002; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8003; VLX: # %bb.0: # %entry 8004; VLX-NEXT: kmovd %edi, %k1 8005; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8006; VLX-NEXT: kmovd %k0, %eax 8007; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8008; VLX-NEXT: retq 8009; 8010; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: 8011; NoVLX: # %bb.0: # %entry 8012; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8013; NoVLX-NEXT: kmovw %edi, %k1 8014; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8015; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8016; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8017; NoVLX-NEXT: kmovw %k0, %eax 8018; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8019; NoVLX-NEXT: vzeroupper 8020; NoVLX-NEXT: retq 8021entry: 8022 %0 = bitcast <2 x i64> %__a to <2 x i64> 8023 %load = load i64, ptr %__b 8024 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8025 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8026 %2 = icmp sgt <2 x i64> %0, %1 8027 %3 = bitcast i8 %__u to <8 x i1> 8028 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8029 %4 = and <2 x i1> %extract.i, %2 8030 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8031 %6 = bitcast <16 x i1> %5 to i16 8032 ret i16 %6 8033} 8034 8035 8036define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8037; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: 8038; VLX: # %bb.0: # %entry 8039; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 8040; VLX-NEXT: kmovd %k0, %eax 8041; VLX-NEXT: retq 8042; 8043; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: 8044; NoVLX: # %bb.0: # %entry 8045; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8046; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8047; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8048; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8049; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8050; NoVLX-NEXT: kmovw %k0, %eax 8051; NoVLX-NEXT: vzeroupper 8052; NoVLX-NEXT: retq 8053entry: 8054 %0 = bitcast <2 x i64> %__a to <2 x i64> 8055 %1 = bitcast <2 x i64> %__b to <2 x i64> 8056 %2 = icmp sgt <2 x i64> %0, %1 8057 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8058 %4 = bitcast <32 x i1> %3 to i32 8059 ret i32 %4 8060} 8061 8062define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 8063; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: 8064; VLX: # %bb.0: # %entry 8065; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 8066; VLX-NEXT: kmovd %k0, %eax 8067; VLX-NEXT: retq 8068; 8069; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: 8070; NoVLX: # %bb.0: # %entry 8071; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8072; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 8073; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8074; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8075; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8076; NoVLX-NEXT: kmovw %k0, %eax 8077; NoVLX-NEXT: vzeroupper 8078; NoVLX-NEXT: retq 8079entry: 8080 %0 = bitcast <2 x i64> %__a to <2 x i64> 8081 %load = load <2 x i64>, ptr %__b 8082 %1 = bitcast <2 x i64> %load to <2 x i64> 8083 %2 = icmp sgt <2 x i64> %0, %1 8084 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8085 %4 = bitcast <32 x i1> %3 to i32 8086 ret i32 %4 8087} 8088 8089define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8090; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: 8091; VLX: # %bb.0: # %entry 8092; VLX-NEXT: kmovd %edi, %k1 8093; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 8094; VLX-NEXT: kmovd %k0, %eax 8095; VLX-NEXT: retq 8096; 8097; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: 8098; NoVLX: # %bb.0: # %entry 8099; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8100; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8101; NoVLX-NEXT: kmovw %edi, %k1 8102; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8103; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8104; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8105; NoVLX-NEXT: kmovw %k0, %eax 8106; NoVLX-NEXT: vzeroupper 8107; NoVLX-NEXT: retq 8108entry: 8109 %0 = bitcast <2 x i64> %__a to <2 x i64> 8110 %1 = bitcast <2 x i64> %__b to <2 x i64> 8111 %2 = icmp sgt <2 x i64> %0, %1 8112 %3 = bitcast i8 %__u to <8 x i1> 8113 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8114 %4 = and <2 x i1> %2, %extract.i 8115 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8116 %6 = bitcast <32 x i1> %5 to i32 8117 ret i32 %6 8118} 8119 8120define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 8121; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: 8122; VLX: # %bb.0: # %entry 8123; VLX-NEXT: kmovd %edi, %k1 8124; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 8125; VLX-NEXT: kmovd %k0, %eax 8126; VLX-NEXT: retq 8127; 8128; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: 8129; NoVLX: # %bb.0: # %entry 8130; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8131; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 8132; NoVLX-NEXT: kmovw %edi, %k1 8133; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8134; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8135; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8136; NoVLX-NEXT: kmovw %k0, %eax 8137; NoVLX-NEXT: vzeroupper 8138; NoVLX-NEXT: retq 8139entry: 8140 %0 = bitcast <2 x i64> %__a to <2 x i64> 8141 %load = load <2 x i64>, ptr %__b 8142 %1 = bitcast <2 x i64> %load to <2 x i64> 8143 %2 = icmp sgt <2 x i64> %0, %1 8144 %3 = bitcast i8 %__u to <8 x i1> 8145 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8146 %4 = and <2 x i1> %2, %extract.i 8147 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8148 %6 = bitcast <32 x i1> %5 to i32 8149 ret i32 %6 8150} 8151 8152 8153define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 8154; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8155; VLX: # %bb.0: # %entry 8156; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 8157; VLX-NEXT: kmovd %k0, %eax 8158; VLX-NEXT: retq 8159; 8160; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8161; NoVLX: # %bb.0: # %entry 8162; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8163; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 8164; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8165; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8166; NoVLX-NEXT: kmovw %k0, %eax 8167; NoVLX-NEXT: vzeroupper 8168; NoVLX-NEXT: retq 8169entry: 8170 %0 = bitcast <2 x i64> %__a to <2 x i64> 8171 %load = load i64, ptr %__b 8172 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8173 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8174 %2 = icmp sgt <2 x i64> %0, %1 8175 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8176 %4 = bitcast <32 x i1> %3 to i32 8177 ret i32 %4 8178} 8179 8180define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 8181; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8182; VLX: # %bb.0: # %entry 8183; VLX-NEXT: kmovd %edi, %k1 8184; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8185; VLX-NEXT: kmovd %k0, %eax 8186; VLX-NEXT: retq 8187; 8188; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: 8189; NoVLX: # %bb.0: # %entry 8190; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8191; NoVLX-NEXT: kmovw %edi, %k1 8192; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8193; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8194; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8195; NoVLX-NEXT: kmovw %k0, %eax 8196; NoVLX-NEXT: vzeroupper 8197; NoVLX-NEXT: retq 8198entry: 8199 %0 = bitcast <2 x i64> %__a to <2 x i64> 8200 %load = load i64, ptr %__b 8201 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8202 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8203 %2 = icmp sgt <2 x i64> %0, %1 8204 %3 = bitcast i8 %__u to <8 x i1> 8205 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8206 %4 = and <2 x i1> %extract.i, %2 8207 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8208 %6 = bitcast <32 x i1> %5 to i32 8209 ret i32 %6 8210} 8211 8212 8213define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8214; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: 8215; VLX: # %bb.0: # %entry 8216; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 8217; VLX-NEXT: kmovq %k0, %rax 8218; VLX-NEXT: retq 8219; 8220; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: 8221; NoVLX: # %bb.0: # %entry 8222; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8223; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8224; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8225; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8226; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8227; NoVLX-NEXT: kmovw %k0, %eax 8228; NoVLX-NEXT: vzeroupper 8229; NoVLX-NEXT: retq 8230entry: 8231 %0 = bitcast <2 x i64> %__a to <2 x i64> 8232 %1 = bitcast <2 x i64> %__b to <2 x i64> 8233 %2 = icmp sgt <2 x i64> %0, %1 8234 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8235 %4 = bitcast <64 x i1> %3 to i64 8236 ret i64 %4 8237} 8238 8239define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 8240; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: 8241; VLX: # %bb.0: # %entry 8242; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 8243; VLX-NEXT: kmovq %k0, %rax 8244; VLX-NEXT: retq 8245; 8246; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: 8247; NoVLX: # %bb.0: # %entry 8248; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8249; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 8250; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8251; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8252; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8253; NoVLX-NEXT: kmovw %k0, %eax 8254; NoVLX-NEXT: vzeroupper 8255; NoVLX-NEXT: retq 8256entry: 8257 %0 = bitcast <2 x i64> %__a to <2 x i64> 8258 %load = load <2 x i64>, ptr %__b 8259 %1 = bitcast <2 x i64> %load to <2 x i64> 8260 %2 = icmp sgt <2 x i64> %0, %1 8261 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8262 %4 = bitcast <64 x i1> %3 to i64 8263 ret i64 %4 8264} 8265 8266define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 8267; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: 8268; VLX: # %bb.0: # %entry 8269; VLX-NEXT: kmovd %edi, %k1 8270; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} 8271; VLX-NEXT: kmovq %k0, %rax 8272; VLX-NEXT: retq 8273; 8274; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: 8275; NoVLX: # %bb.0: # %entry 8276; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 8277; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8278; NoVLX-NEXT: kmovw %edi, %k1 8279; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8280; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8281; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8282; NoVLX-NEXT: kmovw %k0, %eax 8283; NoVLX-NEXT: vzeroupper 8284; NoVLX-NEXT: retq 8285entry: 8286 %0 = bitcast <2 x i64> %__a to <2 x i64> 8287 %1 = bitcast <2 x i64> %__b to <2 x i64> 8288 %2 = icmp sgt <2 x i64> %0, %1 8289 %3 = bitcast i8 %__u to <8 x i1> 8290 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8291 %4 = and <2 x i1> %2, %extract.i 8292 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8293 %6 = bitcast <64 x i1> %5 to i64 8294 ret i64 %6 8295} 8296 8297define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 8298; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: 8299; VLX: # %bb.0: # %entry 8300; VLX-NEXT: kmovd %edi, %k1 8301; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} 8302; VLX-NEXT: kmovq %k0, %rax 8303; VLX-NEXT: retq 8304; 8305; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: 8306; NoVLX: # %bb.0: # %entry 8307; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8308; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 8309; NoVLX-NEXT: kmovw %edi, %k1 8310; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8311; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8312; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8313; NoVLX-NEXT: kmovw %k0, %eax 8314; NoVLX-NEXT: vzeroupper 8315; NoVLX-NEXT: retq 8316entry: 8317 %0 = bitcast <2 x i64> %__a to <2 x i64> 8318 %load = load <2 x i64>, ptr %__b 8319 %1 = bitcast <2 x i64> %load to <2 x i64> 8320 %2 = icmp sgt <2 x i64> %0, %1 8321 %3 = bitcast i8 %__u to <8 x i1> 8322 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8323 %4 = and <2 x i1> %2, %extract.i 8324 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8325 %6 = bitcast <64 x i1> %5 to i64 8326 ret i64 %6 8327} 8328 8329 8330define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 8331; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8332; VLX: # %bb.0: # %entry 8333; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 8334; VLX-NEXT: kmovq %k0, %rax 8335; VLX-NEXT: retq 8336; 8337; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8338; NoVLX: # %bb.0: # %entry 8339; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8340; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 8341; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8342; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8343; NoVLX-NEXT: kmovw %k0, %eax 8344; NoVLX-NEXT: vzeroupper 8345; NoVLX-NEXT: retq 8346entry: 8347 %0 = bitcast <2 x i64> %__a to <2 x i64> 8348 %load = load i64, ptr %__b 8349 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8350 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8351 %2 = icmp sgt <2 x i64> %0, %1 8352 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8353 %4 = bitcast <64 x i1> %3 to i64 8354 ret i64 %4 8355} 8356 8357define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 8358; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8359; VLX: # %bb.0: # %entry 8360; VLX-NEXT: kmovd %edi, %k1 8361; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} 8362; VLX-NEXT: kmovq %k0, %rax 8363; VLX-NEXT: retq 8364; 8365; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: 8366; NoVLX: # %bb.0: # %entry 8367; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 8368; NoVLX-NEXT: kmovw %edi, %k1 8369; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8370; NoVLX-NEXT: kshiftlw $14, %k0, %k0 8371; NoVLX-NEXT: kshiftrw $14, %k0, %k0 8372; NoVLX-NEXT: kmovw %k0, %eax 8373; NoVLX-NEXT: vzeroupper 8374; NoVLX-NEXT: retq 8375entry: 8376 %0 = bitcast <2 x i64> %__a to <2 x i64> 8377 %load = load i64, ptr %__b 8378 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 8379 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 8380 %2 = icmp sgt <2 x i64> %0, %1 8381 %3 = bitcast i8 %__u to <8 x i1> 8382 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 8383 %4 = and <2 x i1> %extract.i, %2 8384 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 8385 %6 = bitcast <64 x i1> %5 to i64 8386 ret i64 %6 8387} 8388 8389 8390define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8391; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: 8392; VLX: # %bb.0: # %entry 8393; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8394; VLX-NEXT: kmovd %k0, %eax 8395; VLX-NEXT: # kill: def $al killed $al killed $eax 8396; VLX-NEXT: vzeroupper 8397; VLX-NEXT: retq 8398; 8399; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: 8400; NoVLX: # %bb.0: # %entry 8401; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8402; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8403; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8404; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8405; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8406; NoVLX-NEXT: kmovw %k0, %eax 8407; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8408; NoVLX-NEXT: vzeroupper 8409; NoVLX-NEXT: retq 8410entry: 8411 %0 = bitcast <4 x i64> %__a to <4 x i64> 8412 %1 = bitcast <4 x i64> %__b to <4 x i64> 8413 %2 = icmp sgt <4 x i64> %0, %1 8414 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8415 %4 = bitcast <8 x i1> %3 to i8 8416 ret i8 %4 8417} 8418 8419define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8420; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: 8421; VLX: # %bb.0: # %entry 8422; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8423; VLX-NEXT: kmovd %k0, %eax 8424; VLX-NEXT: # kill: def $al killed $al killed $eax 8425; VLX-NEXT: vzeroupper 8426; VLX-NEXT: retq 8427; 8428; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: 8429; NoVLX: # %bb.0: # %entry 8430; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8431; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8432; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8433; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8434; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8435; NoVLX-NEXT: kmovw %k0, %eax 8436; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8437; NoVLX-NEXT: vzeroupper 8438; NoVLX-NEXT: retq 8439entry: 8440 %0 = bitcast <4 x i64> %__a to <4 x i64> 8441 %load = load <4 x i64>, ptr %__b 8442 %1 = bitcast <4 x i64> %load to <4 x i64> 8443 %2 = icmp sgt <4 x i64> %0, %1 8444 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8445 %4 = bitcast <8 x i1> %3 to i8 8446 ret i8 %4 8447} 8448 8449define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8450; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: 8451; VLX: # %bb.0: # %entry 8452; VLX-NEXT: kmovd %edi, %k1 8453; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8454; VLX-NEXT: kmovd %k0, %eax 8455; VLX-NEXT: # kill: def $al killed $al killed $eax 8456; VLX-NEXT: vzeroupper 8457; VLX-NEXT: retq 8458; 8459; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: 8460; NoVLX: # %bb.0: # %entry 8461; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8462; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8463; NoVLX-NEXT: kmovw %edi, %k1 8464; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8465; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8466; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8467; NoVLX-NEXT: kmovw %k0, %eax 8468; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8469; NoVLX-NEXT: vzeroupper 8470; NoVLX-NEXT: retq 8471entry: 8472 %0 = bitcast <4 x i64> %__a to <4 x i64> 8473 %1 = bitcast <4 x i64> %__b to <4 x i64> 8474 %2 = icmp sgt <4 x i64> %0, %1 8475 %3 = bitcast i8 %__u to <8 x i1> 8476 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8477 %4 = and <4 x i1> %2, %extract.i 8478 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8479 %6 = bitcast <8 x i1> %5 to i8 8480 ret i8 %6 8481} 8482 8483define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8484; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: 8485; VLX: # %bb.0: # %entry 8486; VLX-NEXT: kmovd %edi, %k1 8487; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8488; VLX-NEXT: kmovd %k0, %eax 8489; VLX-NEXT: # kill: def $al killed $al killed $eax 8490; VLX-NEXT: vzeroupper 8491; VLX-NEXT: retq 8492; 8493; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: 8494; NoVLX: # %bb.0: # %entry 8495; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8496; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 8497; NoVLX-NEXT: kmovw %edi, %k1 8498; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8499; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8500; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8501; NoVLX-NEXT: kmovw %k0, %eax 8502; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8503; NoVLX-NEXT: vzeroupper 8504; NoVLX-NEXT: retq 8505entry: 8506 %0 = bitcast <4 x i64> %__a to <4 x i64> 8507 %load = load <4 x i64>, ptr %__b 8508 %1 = bitcast <4 x i64> %load to <4 x i64> 8509 %2 = icmp sgt <4 x i64> %0, %1 8510 %3 = bitcast i8 %__u to <8 x i1> 8511 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8512 %4 = and <4 x i1> %2, %extract.i 8513 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8514 %6 = bitcast <8 x i1> %5 to i8 8515 ret i8 %6 8516} 8517 8518 8519define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8520; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8521; VLX: # %bb.0: # %entry 8522; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 8523; VLX-NEXT: kmovd %k0, %eax 8524; VLX-NEXT: # kill: def $al killed $al killed $eax 8525; VLX-NEXT: vzeroupper 8526; VLX-NEXT: retq 8527; 8528; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8529; NoVLX: # %bb.0: # %entry 8530; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8531; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 8532; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8533; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8534; NoVLX-NEXT: kmovw %k0, %eax 8535; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8536; NoVLX-NEXT: vzeroupper 8537; NoVLX-NEXT: retq 8538entry: 8539 %0 = bitcast <4 x i64> %__a to <4 x i64> 8540 %load = load i64, ptr %__b 8541 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8542 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8543 %2 = icmp sgt <4 x i64> %0, %1 8544 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8545 %4 = bitcast <8 x i1> %3 to i8 8546 ret i8 %4 8547} 8548 8549define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8550; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8551; VLX: # %bb.0: # %entry 8552; VLX-NEXT: kmovd %edi, %k1 8553; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 8554; VLX-NEXT: kmovd %k0, %eax 8555; VLX-NEXT: # kill: def $al killed $al killed $eax 8556; VLX-NEXT: vzeroupper 8557; VLX-NEXT: retq 8558; 8559; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: 8560; NoVLX: # %bb.0: # %entry 8561; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8562; NoVLX-NEXT: kmovw %edi, %k1 8563; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8564; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8565; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8566; NoVLX-NEXT: kmovw %k0, %eax 8567; NoVLX-NEXT: # kill: def $al killed $al killed $eax 8568; NoVLX-NEXT: vzeroupper 8569; NoVLX-NEXT: retq 8570entry: 8571 %0 = bitcast <4 x i64> %__a to <4 x i64> 8572 %load = load i64, ptr %__b 8573 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8574 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8575 %2 = icmp sgt <4 x i64> %0, %1 8576 %3 = bitcast i8 %__u to <8 x i1> 8577 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8578 %4 = and <4 x i1> %extract.i, %2 8579 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 8580 %6 = bitcast <8 x i1> %5 to i8 8581 ret i8 %6 8582} 8583 8584 8585define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8586; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: 8587; VLX: # %bb.0: # %entry 8588; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8589; VLX-NEXT: kmovd %k0, %eax 8590; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8591; VLX-NEXT: vzeroupper 8592; VLX-NEXT: retq 8593; 8594; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: 8595; NoVLX: # %bb.0: # %entry 8596; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8597; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8598; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8599; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8600; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8601; NoVLX-NEXT: kmovw %k0, %eax 8602; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8603; NoVLX-NEXT: vzeroupper 8604; NoVLX-NEXT: retq 8605entry: 8606 %0 = bitcast <4 x i64> %__a to <4 x i64> 8607 %1 = bitcast <4 x i64> %__b to <4 x i64> 8608 %2 = icmp sgt <4 x i64> %0, %1 8609 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8610 %4 = bitcast <16 x i1> %3 to i16 8611 ret i16 %4 8612} 8613 8614define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8615; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: 8616; VLX: # %bb.0: # %entry 8617; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8618; VLX-NEXT: kmovd %k0, %eax 8619; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8620; VLX-NEXT: vzeroupper 8621; VLX-NEXT: retq 8622; 8623; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: 8624; NoVLX: # %bb.0: # %entry 8625; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8626; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8627; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8628; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8629; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8630; NoVLX-NEXT: kmovw %k0, %eax 8631; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8632; NoVLX-NEXT: vzeroupper 8633; NoVLX-NEXT: retq 8634entry: 8635 %0 = bitcast <4 x i64> %__a to <4 x i64> 8636 %load = load <4 x i64>, ptr %__b 8637 %1 = bitcast <4 x i64> %load to <4 x i64> 8638 %2 = icmp sgt <4 x i64> %0, %1 8639 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8640 %4 = bitcast <16 x i1> %3 to i16 8641 ret i16 %4 8642} 8643 8644define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8645; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: 8646; VLX: # %bb.0: # %entry 8647; VLX-NEXT: kmovd %edi, %k1 8648; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8649; VLX-NEXT: kmovd %k0, %eax 8650; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8651; VLX-NEXT: vzeroupper 8652; VLX-NEXT: retq 8653; 8654; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: 8655; NoVLX: # %bb.0: # %entry 8656; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8657; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8658; NoVLX-NEXT: kmovw %edi, %k1 8659; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8660; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8661; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8662; NoVLX-NEXT: kmovw %k0, %eax 8663; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8664; NoVLX-NEXT: vzeroupper 8665; NoVLX-NEXT: retq 8666entry: 8667 %0 = bitcast <4 x i64> %__a to <4 x i64> 8668 %1 = bitcast <4 x i64> %__b to <4 x i64> 8669 %2 = icmp sgt <4 x i64> %0, %1 8670 %3 = bitcast i8 %__u to <8 x i1> 8671 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8672 %4 = and <4 x i1> %2, %extract.i 8673 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8674 %6 = bitcast <16 x i1> %5 to i16 8675 ret i16 %6 8676} 8677 8678define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8679; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: 8680; VLX: # %bb.0: # %entry 8681; VLX-NEXT: kmovd %edi, %k1 8682; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8683; VLX-NEXT: kmovd %k0, %eax 8684; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8685; VLX-NEXT: vzeroupper 8686; VLX-NEXT: retq 8687; 8688; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: 8689; NoVLX: # %bb.0: # %entry 8690; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8691; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 8692; NoVLX-NEXT: kmovw %edi, %k1 8693; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8694; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8695; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8696; NoVLX-NEXT: kmovw %k0, %eax 8697; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8698; NoVLX-NEXT: vzeroupper 8699; NoVLX-NEXT: retq 8700entry: 8701 %0 = bitcast <4 x i64> %__a to <4 x i64> 8702 %load = load <4 x i64>, ptr %__b 8703 %1 = bitcast <4 x i64> %load to <4 x i64> 8704 %2 = icmp sgt <4 x i64> %0, %1 8705 %3 = bitcast i8 %__u to <8 x i1> 8706 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8707 %4 = and <4 x i1> %2, %extract.i 8708 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8709 %6 = bitcast <16 x i1> %5 to i16 8710 ret i16 %6 8711} 8712 8713 8714define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8715; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8716; VLX: # %bb.0: # %entry 8717; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 8718; VLX-NEXT: kmovd %k0, %eax 8719; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8720; VLX-NEXT: vzeroupper 8721; VLX-NEXT: retq 8722; 8723; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8724; NoVLX: # %bb.0: # %entry 8725; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8726; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 8727; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8728; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8729; NoVLX-NEXT: kmovw %k0, %eax 8730; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8731; NoVLX-NEXT: vzeroupper 8732; NoVLX-NEXT: retq 8733entry: 8734 %0 = bitcast <4 x i64> %__a to <4 x i64> 8735 %load = load i64, ptr %__b 8736 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8737 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8738 %2 = icmp sgt <4 x i64> %0, %1 8739 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8740 %4 = bitcast <16 x i1> %3 to i16 8741 ret i16 %4 8742} 8743 8744define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8745; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8746; VLX: # %bb.0: # %entry 8747; VLX-NEXT: kmovd %edi, %k1 8748; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 8749; VLX-NEXT: kmovd %k0, %eax 8750; VLX-NEXT: # kill: def $ax killed $ax killed $eax 8751; VLX-NEXT: vzeroupper 8752; VLX-NEXT: retq 8753; 8754; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: 8755; NoVLX: # %bb.0: # %entry 8756; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8757; NoVLX-NEXT: kmovw %edi, %k1 8758; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8759; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8760; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8761; NoVLX-NEXT: kmovw %k0, %eax 8762; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 8763; NoVLX-NEXT: vzeroupper 8764; NoVLX-NEXT: retq 8765entry: 8766 %0 = bitcast <4 x i64> %__a to <4 x i64> 8767 %load = load i64, ptr %__b 8768 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8769 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8770 %2 = icmp sgt <4 x i64> %0, %1 8771 %3 = bitcast i8 %__u to <8 x i1> 8772 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8773 %4 = and <4 x i1> %extract.i, %2 8774 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8775 %6 = bitcast <16 x i1> %5 to i16 8776 ret i16 %6 8777} 8778 8779 8780define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8781; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: 8782; VLX: # %bb.0: # %entry 8783; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8784; VLX-NEXT: kmovd %k0, %eax 8785; VLX-NEXT: vzeroupper 8786; VLX-NEXT: retq 8787; 8788; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: 8789; NoVLX: # %bb.0: # %entry 8790; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8791; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8792; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8793; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8794; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8795; NoVLX-NEXT: kmovw %k0, %eax 8796; NoVLX-NEXT: vzeroupper 8797; NoVLX-NEXT: retq 8798entry: 8799 %0 = bitcast <4 x i64> %__a to <4 x i64> 8800 %1 = bitcast <4 x i64> %__b to <4 x i64> 8801 %2 = icmp sgt <4 x i64> %0, %1 8802 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8803 %4 = bitcast <32 x i1> %3 to i32 8804 ret i32 %4 8805} 8806 8807define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8808; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: 8809; VLX: # %bb.0: # %entry 8810; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8811; VLX-NEXT: kmovd %k0, %eax 8812; VLX-NEXT: vzeroupper 8813; VLX-NEXT: retq 8814; 8815; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: 8816; NoVLX: # %bb.0: # %entry 8817; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8818; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 8819; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8820; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8821; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8822; NoVLX-NEXT: kmovw %k0, %eax 8823; NoVLX-NEXT: vzeroupper 8824; NoVLX-NEXT: retq 8825entry: 8826 %0 = bitcast <4 x i64> %__a to <4 x i64> 8827 %load = load <4 x i64>, ptr %__b 8828 %1 = bitcast <4 x i64> %load to <4 x i64> 8829 %2 = icmp sgt <4 x i64> %0, %1 8830 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8831 %4 = bitcast <32 x i1> %3 to i32 8832 ret i32 %4 8833} 8834 8835define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8836; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: 8837; VLX: # %bb.0: # %entry 8838; VLX-NEXT: kmovd %edi, %k1 8839; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 8840; VLX-NEXT: kmovd %k0, %eax 8841; VLX-NEXT: vzeroupper 8842; VLX-NEXT: retq 8843; 8844; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: 8845; NoVLX: # %bb.0: # %entry 8846; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8847; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8848; NoVLX-NEXT: kmovw %edi, %k1 8849; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8850; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8851; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8852; NoVLX-NEXT: kmovw %k0, %eax 8853; NoVLX-NEXT: vzeroupper 8854; NoVLX-NEXT: retq 8855entry: 8856 %0 = bitcast <4 x i64> %__a to <4 x i64> 8857 %1 = bitcast <4 x i64> %__b to <4 x i64> 8858 %2 = icmp sgt <4 x i64> %0, %1 8859 %3 = bitcast i8 %__u to <8 x i1> 8860 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8861 %4 = and <4 x i1> %2, %extract.i 8862 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8863 %6 = bitcast <32 x i1> %5 to i32 8864 ret i32 %6 8865} 8866 8867define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8868; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: 8869; VLX: # %bb.0: # %entry 8870; VLX-NEXT: kmovd %edi, %k1 8871; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 8872; VLX-NEXT: kmovd %k0, %eax 8873; VLX-NEXT: vzeroupper 8874; VLX-NEXT: retq 8875; 8876; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: 8877; NoVLX: # %bb.0: # %entry 8878; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8879; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 8880; NoVLX-NEXT: kmovw %edi, %k1 8881; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 8882; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8883; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8884; NoVLX-NEXT: kmovw %k0, %eax 8885; NoVLX-NEXT: vzeroupper 8886; NoVLX-NEXT: retq 8887entry: 8888 %0 = bitcast <4 x i64> %__a to <4 x i64> 8889 %load = load <4 x i64>, ptr %__b 8890 %1 = bitcast <4 x i64> %load to <4 x i64> 8891 %2 = icmp sgt <4 x i64> %0, %1 8892 %3 = bitcast i8 %__u to <8 x i1> 8893 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8894 %4 = and <4 x i1> %2, %extract.i 8895 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8896 %6 = bitcast <32 x i1> %5 to i32 8897 ret i32 %6 8898} 8899 8900 8901define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8902; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 8903; VLX: # %bb.0: # %entry 8904; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 8905; VLX-NEXT: kmovd %k0, %eax 8906; VLX-NEXT: vzeroupper 8907; VLX-NEXT: retq 8908; 8909; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 8910; NoVLX: # %bb.0: # %entry 8911; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8912; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 8913; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8914; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8915; NoVLX-NEXT: kmovw %k0, %eax 8916; NoVLX-NEXT: vzeroupper 8917; NoVLX-NEXT: retq 8918entry: 8919 %0 = bitcast <4 x i64> %__a to <4 x i64> 8920 %load = load i64, ptr %__b 8921 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8922 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8923 %2 = icmp sgt <4 x i64> %0, %1 8924 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8925 %4 = bitcast <32 x i1> %3 to i32 8926 ret i32 %4 8927} 8928 8929define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 8930; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 8931; VLX: # %bb.0: # %entry 8932; VLX-NEXT: kmovd %edi, %k1 8933; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 8934; VLX-NEXT: kmovd %k0, %eax 8935; VLX-NEXT: vzeroupper 8936; VLX-NEXT: retq 8937; 8938; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: 8939; NoVLX: # %bb.0: # %entry 8940; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8941; NoVLX-NEXT: kmovw %edi, %k1 8942; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 8943; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8944; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8945; NoVLX-NEXT: kmovw %k0, %eax 8946; NoVLX-NEXT: vzeroupper 8947; NoVLX-NEXT: retq 8948entry: 8949 %0 = bitcast <4 x i64> %__a to <4 x i64> 8950 %load = load i64, ptr %__b 8951 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 8952 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 8953 %2 = icmp sgt <4 x i64> %0, %1 8954 %3 = bitcast i8 %__u to <8 x i1> 8955 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 8956 %4 = and <4 x i1> %extract.i, %2 8957 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8958 %6 = bitcast <32 x i1> %5 to i32 8959 ret i32 %6 8960} 8961 8962 8963define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 8964; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: 8965; VLX: # %bb.0: # %entry 8966; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 8967; VLX-NEXT: kmovq %k0, %rax 8968; VLX-NEXT: vzeroupper 8969; VLX-NEXT: retq 8970; 8971; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: 8972; NoVLX: # %bb.0: # %entry 8973; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 8974; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 8975; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 8976; NoVLX-NEXT: kshiftlw $12, %k0, %k0 8977; NoVLX-NEXT: kshiftrw $12, %k0, %k0 8978; NoVLX-NEXT: kmovw %k0, %eax 8979; NoVLX-NEXT: vzeroupper 8980; NoVLX-NEXT: retq 8981entry: 8982 %0 = bitcast <4 x i64> %__a to <4 x i64> 8983 %1 = bitcast <4 x i64> %__b to <4 x i64> 8984 %2 = icmp sgt <4 x i64> %0, %1 8985 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 8986 %4 = bitcast <64 x i1> %3 to i64 8987 ret i64 %4 8988} 8989 8990define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 8991; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: 8992; VLX: # %bb.0: # %entry 8993; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 8994; VLX-NEXT: kmovq %k0, %rax 8995; VLX-NEXT: vzeroupper 8996; VLX-NEXT: retq 8997; 8998; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: 8999; NoVLX: # %bb.0: # %entry 9000; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9001; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 9002; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9003; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9004; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9005; NoVLX-NEXT: kmovw %k0, %eax 9006; NoVLX-NEXT: vzeroupper 9007; NoVLX-NEXT: retq 9008entry: 9009 %0 = bitcast <4 x i64> %__a to <4 x i64> 9010 %load = load <4 x i64>, ptr %__b 9011 %1 = bitcast <4 x i64> %load to <4 x i64> 9012 %2 = icmp sgt <4 x i64> %0, %1 9013 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9014 %4 = bitcast <64 x i1> %3 to i64 9015 ret i64 %4 9016} 9017 9018define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 9019; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: 9020; VLX: # %bb.0: # %entry 9021; VLX-NEXT: kmovd %edi, %k1 9022; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} 9023; VLX-NEXT: kmovq %k0, %rax 9024; VLX-NEXT: vzeroupper 9025; VLX-NEXT: retq 9026; 9027; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: 9028; NoVLX: # %bb.0: # %entry 9029; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 9030; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9031; NoVLX-NEXT: kmovw %edi, %k1 9032; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9033; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9034; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9035; NoVLX-NEXT: kmovw %k0, %eax 9036; NoVLX-NEXT: vzeroupper 9037; NoVLX-NEXT: retq 9038entry: 9039 %0 = bitcast <4 x i64> %__a to <4 x i64> 9040 %1 = bitcast <4 x i64> %__b to <4 x i64> 9041 %2 = icmp sgt <4 x i64> %0, %1 9042 %3 = bitcast i8 %__u to <8 x i1> 9043 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9044 %4 = and <4 x i1> %2, %extract.i 9045 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9046 %6 = bitcast <64 x i1> %5 to i64 9047 ret i64 %6 9048} 9049 9050define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 9051; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: 9052; VLX: # %bb.0: # %entry 9053; VLX-NEXT: kmovd %edi, %k1 9054; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} 9055; VLX-NEXT: kmovq %k0, %rax 9056; VLX-NEXT: vzeroupper 9057; VLX-NEXT: retq 9058; 9059; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: 9060; NoVLX: # %bb.0: # %entry 9061; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9062; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 9063; NoVLX-NEXT: kmovw %edi, %k1 9064; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9065; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9066; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9067; NoVLX-NEXT: kmovw %k0, %eax 9068; NoVLX-NEXT: vzeroupper 9069; NoVLX-NEXT: retq 9070entry: 9071 %0 = bitcast <4 x i64> %__a to <4 x i64> 9072 %load = load <4 x i64>, ptr %__b 9073 %1 = bitcast <4 x i64> %load to <4 x i64> 9074 %2 = icmp sgt <4 x i64> %0, %1 9075 %3 = bitcast i8 %__u to <8 x i1> 9076 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9077 %4 = and <4 x i1> %2, %extract.i 9078 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9079 %6 = bitcast <64 x i1> %5 to i64 9080 ret i64 %6 9081} 9082 9083 9084define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 9085; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9086; VLX: # %bb.0: # %entry 9087; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 9088; VLX-NEXT: kmovq %k0, %rax 9089; VLX-NEXT: vzeroupper 9090; VLX-NEXT: retq 9091; 9092; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9093; NoVLX: # %bb.0: # %entry 9094; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9095; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9096; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9097; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9098; NoVLX-NEXT: kmovw %k0, %eax 9099; NoVLX-NEXT: vzeroupper 9100; NoVLX-NEXT: retq 9101entry: 9102 %0 = bitcast <4 x i64> %__a to <4 x i64> 9103 %load = load i64, ptr %__b 9104 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9105 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9106 %2 = icmp sgt <4 x i64> %0, %1 9107 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9108 %4 = bitcast <64 x i1> %3 to i64 9109 ret i64 %4 9110} 9111 9112define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 9113; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9114; VLX: # %bb.0: # %entry 9115; VLX-NEXT: kmovd %edi, %k1 9116; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} 9117; VLX-NEXT: kmovq %k0, %rax 9118; VLX-NEXT: vzeroupper 9119; VLX-NEXT: retq 9120; 9121; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: 9122; NoVLX: # %bb.0: # %entry 9123; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 9124; NoVLX-NEXT: kmovw %edi, %k1 9125; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9126; NoVLX-NEXT: kshiftlw $12, %k0, %k0 9127; NoVLX-NEXT: kshiftrw $12, %k0, %k0 9128; NoVLX-NEXT: kmovw %k0, %eax 9129; NoVLX-NEXT: vzeroupper 9130; NoVLX-NEXT: retq 9131entry: 9132 %0 = bitcast <4 x i64> %__a to <4 x i64> 9133 %load = load i64, ptr %__b 9134 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 9135 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 9136 %2 = icmp sgt <4 x i64> %0, %1 9137 %3 = bitcast i8 %__u to <8 x i1> 9138 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 9139 %4 = and <4 x i1> %extract.i, %2 9140 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 9141 %6 = bitcast <64 x i1> %5 to i64 9142 ret i64 %6 9143} 9144 9145 9146define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9147; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: 9148; VLX: # %bb.0: # %entry 9149; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9150; VLX-NEXT: kmovd %k0, %eax 9151; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9152; VLX-NEXT: vzeroupper 9153; VLX-NEXT: retq 9154; 9155; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: 9156; NoVLX: # %bb.0: # %entry 9157; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9158; NoVLX-NEXT: kmovw %k0, %eax 9159; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9160; NoVLX-NEXT: vzeroupper 9161; NoVLX-NEXT: retq 9162entry: 9163 %0 = bitcast <8 x i64> %__a to <8 x i64> 9164 %1 = bitcast <8 x i64> %__b to <8 x i64> 9165 %2 = icmp sgt <8 x i64> %0, %1 9166 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9167 %4 = bitcast <16 x i1> %3 to i16 9168 ret i16 %4 9169} 9170 9171define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9172; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: 9173; VLX: # %bb.0: # %entry 9174; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9175; VLX-NEXT: kmovd %k0, %eax 9176; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9177; VLX-NEXT: vzeroupper 9178; VLX-NEXT: retq 9179; 9180; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: 9181; NoVLX: # %bb.0: # %entry 9182; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9183; NoVLX-NEXT: kmovw %k0, %eax 9184; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9185; NoVLX-NEXT: vzeroupper 9186; NoVLX-NEXT: retq 9187entry: 9188 %0 = bitcast <8 x i64> %__a to <8 x i64> 9189 %load = load <8 x i64>, ptr %__b 9190 %1 = bitcast <8 x i64> %load to <8 x i64> 9191 %2 = icmp sgt <8 x i64> %0, %1 9192 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9193 %4 = bitcast <16 x i1> %3 to i16 9194 ret i16 %4 9195} 9196 9197define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9198; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: 9199; VLX: # %bb.0: # %entry 9200; VLX-NEXT: kmovd %edi, %k1 9201; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9202; VLX-NEXT: kmovd %k0, %eax 9203; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9204; VLX-NEXT: vzeroupper 9205; VLX-NEXT: retq 9206; 9207; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: 9208; NoVLX: # %bb.0: # %entry 9209; NoVLX-NEXT: kmovw %edi, %k1 9210; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9211; NoVLX-NEXT: kmovw %k0, %eax 9212; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9213; NoVLX-NEXT: vzeroupper 9214; NoVLX-NEXT: retq 9215entry: 9216 %0 = bitcast <8 x i64> %__a to <8 x i64> 9217 %1 = bitcast <8 x i64> %__b to <8 x i64> 9218 %2 = icmp sgt <8 x i64> %0, %1 9219 %3 = bitcast i8 %__u to <8 x i1> 9220 %4 = and <8 x i1> %2, %3 9221 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9222 %6 = bitcast <16 x i1> %5 to i16 9223 ret i16 %6 9224} 9225 9226define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9227; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: 9228; VLX: # %bb.0: # %entry 9229; VLX-NEXT: kmovd %edi, %k1 9230; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9231; VLX-NEXT: kmovd %k0, %eax 9232; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9233; VLX-NEXT: vzeroupper 9234; VLX-NEXT: retq 9235; 9236; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: 9237; NoVLX: # %bb.0: # %entry 9238; NoVLX-NEXT: kmovw %edi, %k1 9239; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9240; NoVLX-NEXT: kmovw %k0, %eax 9241; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9242; NoVLX-NEXT: vzeroupper 9243; NoVLX-NEXT: retq 9244entry: 9245 %0 = bitcast <8 x i64> %__a to <8 x i64> 9246 %load = load <8 x i64>, ptr %__b 9247 %1 = bitcast <8 x i64> %load to <8 x i64> 9248 %2 = icmp sgt <8 x i64> %0, %1 9249 %3 = bitcast i8 %__u to <8 x i1> 9250 %4 = and <8 x i1> %2, %3 9251 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9252 %6 = bitcast <16 x i1> %5 to i16 9253 ret i16 %6 9254} 9255 9256 9257define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9258; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9259; VLX: # %bb.0: # %entry 9260; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9261; VLX-NEXT: kmovd %k0, %eax 9262; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9263; VLX-NEXT: vzeroupper 9264; VLX-NEXT: retq 9265; 9266; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9267; NoVLX: # %bb.0: # %entry 9268; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9269; NoVLX-NEXT: kmovw %k0, %eax 9270; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9271; NoVLX-NEXT: vzeroupper 9272; NoVLX-NEXT: retq 9273entry: 9274 %0 = bitcast <8 x i64> %__a to <8 x i64> 9275 %load = load i64, ptr %__b 9276 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9277 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9278 %2 = icmp sgt <8 x i64> %0, %1 9279 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9280 %4 = bitcast <16 x i1> %3 to i16 9281 ret i16 %4 9282} 9283 9284define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9285; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9286; VLX: # %bb.0: # %entry 9287; VLX-NEXT: kmovd %edi, %k1 9288; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9289; VLX-NEXT: kmovd %k0, %eax 9290; VLX-NEXT: # kill: def $ax killed $ax killed $eax 9291; VLX-NEXT: vzeroupper 9292; VLX-NEXT: retq 9293; 9294; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: 9295; NoVLX: # %bb.0: # %entry 9296; NoVLX-NEXT: kmovw %edi, %k1 9297; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9298; NoVLX-NEXT: kmovw %k0, %eax 9299; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 9300; NoVLX-NEXT: vzeroupper 9301; NoVLX-NEXT: retq 9302entry: 9303 %0 = bitcast <8 x i64> %__a to <8 x i64> 9304 %load = load i64, ptr %__b 9305 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9306 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9307 %2 = icmp sgt <8 x i64> %0, %1 9308 %3 = bitcast i8 %__u to <8 x i1> 9309 %4 = and <8 x i1> %3, %2 9310 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9311 %6 = bitcast <16 x i1> %5 to i16 9312 ret i16 %6 9313} 9314 9315 9316define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9317; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: 9318; VLX: # %bb.0: # %entry 9319; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9320; VLX-NEXT: kmovd %k0, %eax 9321; VLX-NEXT: vzeroupper 9322; VLX-NEXT: retq 9323; 9324; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: 9325; NoVLX: # %bb.0: # %entry 9326; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9327; NoVLX-NEXT: kmovw %k0, %eax 9328; NoVLX-NEXT: vzeroupper 9329; NoVLX-NEXT: retq 9330entry: 9331 %0 = bitcast <8 x i64> %__a to <8 x i64> 9332 %1 = bitcast <8 x i64> %__b to <8 x i64> 9333 %2 = icmp sgt <8 x i64> %0, %1 9334 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9335 %4 = bitcast <32 x i1> %3 to i32 9336 ret i32 %4 9337} 9338 9339define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9340; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: 9341; VLX: # %bb.0: # %entry 9342; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9343; VLX-NEXT: kmovd %k0, %eax 9344; VLX-NEXT: vzeroupper 9345; VLX-NEXT: retq 9346; 9347; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: 9348; NoVLX: # %bb.0: # %entry 9349; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9350; NoVLX-NEXT: kmovw %k0, %eax 9351; NoVLX-NEXT: vzeroupper 9352; NoVLX-NEXT: retq 9353entry: 9354 %0 = bitcast <8 x i64> %__a to <8 x i64> 9355 %load = load <8 x i64>, ptr %__b 9356 %1 = bitcast <8 x i64> %load to <8 x i64> 9357 %2 = icmp sgt <8 x i64> %0, %1 9358 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9359 %4 = bitcast <32 x i1> %3 to i32 9360 ret i32 %4 9361} 9362 9363define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9364; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: 9365; VLX: # %bb.0: # %entry 9366; VLX-NEXT: kmovd %edi, %k1 9367; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9368; VLX-NEXT: kmovd %k0, %eax 9369; VLX-NEXT: vzeroupper 9370; VLX-NEXT: retq 9371; 9372; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: 9373; NoVLX: # %bb.0: # %entry 9374; NoVLX-NEXT: kmovw %edi, %k1 9375; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9376; NoVLX-NEXT: kmovw %k0, %eax 9377; NoVLX-NEXT: vzeroupper 9378; NoVLX-NEXT: retq 9379entry: 9380 %0 = bitcast <8 x i64> %__a to <8 x i64> 9381 %1 = bitcast <8 x i64> %__b to <8 x i64> 9382 %2 = icmp sgt <8 x i64> %0, %1 9383 %3 = bitcast i8 %__u to <8 x i1> 9384 %4 = and <8 x i1> %2, %3 9385 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9386 %6 = bitcast <32 x i1> %5 to i32 9387 ret i32 %6 9388} 9389 9390define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9391; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: 9392; VLX: # %bb.0: # %entry 9393; VLX-NEXT: kmovd %edi, %k1 9394; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9395; VLX-NEXT: kmovd %k0, %eax 9396; VLX-NEXT: vzeroupper 9397; VLX-NEXT: retq 9398; 9399; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: 9400; NoVLX: # %bb.0: # %entry 9401; NoVLX-NEXT: kmovw %edi, %k1 9402; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9403; NoVLX-NEXT: kmovw %k0, %eax 9404; NoVLX-NEXT: vzeroupper 9405; NoVLX-NEXT: retq 9406entry: 9407 %0 = bitcast <8 x i64> %__a to <8 x i64> 9408 %load = load <8 x i64>, ptr %__b 9409 %1 = bitcast <8 x i64> %load to <8 x i64> 9410 %2 = icmp sgt <8 x i64> %0, %1 9411 %3 = bitcast i8 %__u to <8 x i1> 9412 %4 = and <8 x i1> %2, %3 9413 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9414 %6 = bitcast <32 x i1> %5 to i32 9415 ret i32 %6 9416} 9417 9418 9419define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9420; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9421; VLX: # %bb.0: # %entry 9422; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9423; VLX-NEXT: kmovd %k0, %eax 9424; VLX-NEXT: vzeroupper 9425; VLX-NEXT: retq 9426; 9427; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9428; NoVLX: # %bb.0: # %entry 9429; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9430; NoVLX-NEXT: kmovw %k0, %eax 9431; NoVLX-NEXT: vzeroupper 9432; NoVLX-NEXT: retq 9433entry: 9434 %0 = bitcast <8 x i64> %__a to <8 x i64> 9435 %load = load i64, ptr %__b 9436 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9437 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9438 %2 = icmp sgt <8 x i64> %0, %1 9439 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9440 %4 = bitcast <32 x i1> %3 to i32 9441 ret i32 %4 9442} 9443 9444define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9445; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9446; VLX: # %bb.0: # %entry 9447; VLX-NEXT: kmovd %edi, %k1 9448; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9449; VLX-NEXT: kmovd %k0, %eax 9450; VLX-NEXT: vzeroupper 9451; VLX-NEXT: retq 9452; 9453; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: 9454; NoVLX: # %bb.0: # %entry 9455; NoVLX-NEXT: kmovw %edi, %k1 9456; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9457; NoVLX-NEXT: kmovw %k0, %eax 9458; NoVLX-NEXT: vzeroupper 9459; NoVLX-NEXT: retq 9460entry: 9461 %0 = bitcast <8 x i64> %__a to <8 x i64> 9462 %load = load i64, ptr %__b 9463 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9464 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9465 %2 = icmp sgt <8 x i64> %0, %1 9466 %3 = bitcast i8 %__u to <8 x i1> 9467 %4 = and <8 x i1> %3, %2 9468 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9469 %6 = bitcast <32 x i1> %5 to i32 9470 ret i32 %6 9471} 9472 9473 9474define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9475; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: 9476; VLX: # %bb.0: # %entry 9477; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9478; VLX-NEXT: kmovq %k0, %rax 9479; VLX-NEXT: vzeroupper 9480; VLX-NEXT: retq 9481; 9482; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: 9483; NoVLX: # %bb.0: # %entry 9484; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 9485; NoVLX-NEXT: kmovw %k0, %eax 9486; NoVLX-NEXT: vzeroupper 9487; NoVLX-NEXT: retq 9488entry: 9489 %0 = bitcast <8 x i64> %__a to <8 x i64> 9490 %1 = bitcast <8 x i64> %__b to <8 x i64> 9491 %2 = icmp sgt <8 x i64> %0, %1 9492 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9493 %4 = bitcast <64 x i1> %3 to i64 9494 ret i64 %4 9495} 9496 9497define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9498; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: 9499; VLX: # %bb.0: # %entry 9500; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9501; VLX-NEXT: kmovq %k0, %rax 9502; VLX-NEXT: vzeroupper 9503; VLX-NEXT: retq 9504; 9505; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: 9506; NoVLX: # %bb.0: # %entry 9507; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 9508; NoVLX-NEXT: kmovw %k0, %eax 9509; NoVLX-NEXT: vzeroupper 9510; NoVLX-NEXT: retq 9511entry: 9512 %0 = bitcast <8 x i64> %__a to <8 x i64> 9513 %load = load <8 x i64>, ptr %__b 9514 %1 = bitcast <8 x i64> %load to <8 x i64> 9515 %2 = icmp sgt <8 x i64> %0, %1 9516 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9517 %4 = bitcast <64 x i1> %3 to i64 9518 ret i64 %4 9519} 9520 9521define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 9522; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: 9523; VLX: # %bb.0: # %entry 9524; VLX-NEXT: kmovd %edi, %k1 9525; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9526; VLX-NEXT: kmovq %k0, %rax 9527; VLX-NEXT: vzeroupper 9528; VLX-NEXT: retq 9529; 9530; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: 9531; NoVLX: # %bb.0: # %entry 9532; NoVLX-NEXT: kmovw %edi, %k1 9533; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} 9534; NoVLX-NEXT: kmovw %k0, %eax 9535; NoVLX-NEXT: vzeroupper 9536; NoVLX-NEXT: retq 9537entry: 9538 %0 = bitcast <8 x i64> %__a to <8 x i64> 9539 %1 = bitcast <8 x i64> %__b to <8 x i64> 9540 %2 = icmp sgt <8 x i64> %0, %1 9541 %3 = bitcast i8 %__u to <8 x i1> 9542 %4 = and <8 x i1> %2, %3 9543 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9544 %6 = bitcast <64 x i1> %5 to i64 9545 ret i64 %6 9546} 9547 9548define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9549; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: 9550; VLX: # %bb.0: # %entry 9551; VLX-NEXT: kmovd %edi, %k1 9552; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9553; VLX-NEXT: kmovq %k0, %rax 9554; VLX-NEXT: vzeroupper 9555; VLX-NEXT: retq 9556; 9557; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: 9558; NoVLX: # %bb.0: # %entry 9559; NoVLX-NEXT: kmovw %edi, %k1 9560; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} 9561; NoVLX-NEXT: kmovw %k0, %eax 9562; NoVLX-NEXT: vzeroupper 9563; NoVLX-NEXT: retq 9564entry: 9565 %0 = bitcast <8 x i64> %__a to <8 x i64> 9566 %load = load <8 x i64>, ptr %__b 9567 %1 = bitcast <8 x i64> %load to <8 x i64> 9568 %2 = icmp sgt <8 x i64> %0, %1 9569 %3 = bitcast i8 %__u to <8 x i1> 9570 %4 = and <8 x i1> %2, %3 9571 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9572 %6 = bitcast <64 x i1> %5 to i64 9573 ret i64 %6 9574} 9575 9576 9577define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 9578; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9579; VLX: # %bb.0: # %entry 9580; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9581; VLX-NEXT: kmovq %k0, %rax 9582; VLX-NEXT: vzeroupper 9583; VLX-NEXT: retq 9584; 9585; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9586; NoVLX: # %bb.0: # %entry 9587; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 9588; NoVLX-NEXT: kmovw %k0, %eax 9589; NoVLX-NEXT: vzeroupper 9590; NoVLX-NEXT: retq 9591entry: 9592 %0 = bitcast <8 x i64> %__a to <8 x i64> 9593 %load = load i64, ptr %__b 9594 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9595 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9596 %2 = icmp sgt <8 x i64> %0, %1 9597 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9598 %4 = bitcast <64 x i1> %3 to i64 9599 ret i64 %4 9600} 9601 9602define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 9603; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9604; VLX: # %bb.0: # %entry 9605; VLX-NEXT: kmovd %edi, %k1 9606; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9607; VLX-NEXT: kmovq %k0, %rax 9608; VLX-NEXT: vzeroupper 9609; VLX-NEXT: retq 9610; 9611; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: 9612; NoVLX: # %bb.0: # %entry 9613; NoVLX-NEXT: kmovw %edi, %k1 9614; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} 9615; NoVLX-NEXT: kmovw %k0, %eax 9616; NoVLX-NEXT: vzeroupper 9617; NoVLX-NEXT: retq 9618entry: 9619 %0 = bitcast <8 x i64> %__a to <8 x i64> 9620 %load = load i64, ptr %__b 9621 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 9622 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 9623 %2 = icmp sgt <8 x i64> %0, %1 9624 %3 = bitcast i8 %__u to <8 x i1> 9625 %4 = and <8 x i1> %3, %2 9626 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 9627 %6 = bitcast <64 x i1> %5 to i64 9628 ret i64 %6 9629} 9630 9631 9632define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9633; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: 9634; VLX: # %bb.0: # %entry 9635; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 9636; VLX-NEXT: kmovd %k0, %eax 9637; VLX-NEXT: retq 9638; 9639; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: 9640; NoVLX: # %bb.0: # %entry 9641; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9642; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9643; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9644; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9645; NoVLX-NEXT: kmovw %k0, %eax 9646; NoVLX-NEXT: vzeroupper 9647; NoVLX-NEXT: retq 9648entry: 9649 %0 = bitcast <2 x i64> %__a to <16 x i8> 9650 %1 = bitcast <2 x i64> %__b to <16 x i8> 9651 %2 = icmp sge <16 x i8> %0, %1 9652 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9653 %4 = bitcast <32 x i1> %3 to i32 9654 ret i32 %4 9655} 9656 9657define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 9658; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: 9659; VLX: # %bb.0: # %entry 9660; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 9661; VLX-NEXT: kmovd %k0, %eax 9662; VLX-NEXT: retq 9663; 9664; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: 9665; NoVLX: # %bb.0: # %entry 9666; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 9667; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9668; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9669; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9670; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9671; NoVLX-NEXT: kmovw %k0, %eax 9672; NoVLX-NEXT: vzeroupper 9673; NoVLX-NEXT: retq 9674entry: 9675 %0 = bitcast <2 x i64> %__a to <16 x i8> 9676 %load = load <2 x i64>, ptr %__b 9677 %1 = bitcast <2 x i64> %load to <16 x i8> 9678 %2 = icmp sge <16 x i8> %0, %1 9679 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9680 %4 = bitcast <32 x i1> %3 to i32 9681 ret i32 %4 9682} 9683 9684define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9685; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: 9686; VLX: # %bb.0: # %entry 9687; VLX-NEXT: kmovd %edi, %k1 9688; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} 9689; VLX-NEXT: kmovd %k0, %eax 9690; VLX-NEXT: retq 9691; 9692; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: 9693; NoVLX: # %bb.0: # %entry 9694; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9695; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9696; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9697; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9698; NoVLX-NEXT: kmovw %k0, %eax 9699; NoVLX-NEXT: andl %edi, %eax 9700; NoVLX-NEXT: vzeroupper 9701; NoVLX-NEXT: retq 9702entry: 9703 %0 = bitcast <2 x i64> %__a to <16 x i8> 9704 %1 = bitcast <2 x i64> %__b to <16 x i8> 9705 %2 = icmp sge <16 x i8> %0, %1 9706 %3 = bitcast i16 %__u to <16 x i1> 9707 %4 = and <16 x i1> %2, %3 9708 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9709 %6 = bitcast <32 x i1> %5 to i32 9710 ret i32 %6 9711} 9712 9713define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 9714; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: 9715; VLX: # %bb.0: # %entry 9716; VLX-NEXT: kmovd %edi, %k1 9717; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} 9718; VLX-NEXT: kmovd %k0, %eax 9719; VLX-NEXT: retq 9720; 9721; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: 9722; NoVLX: # %bb.0: # %entry 9723; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 9724; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9725; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9726; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9727; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9728; NoVLX-NEXT: kmovw %k0, %eax 9729; NoVLX-NEXT: andl %edi, %eax 9730; NoVLX-NEXT: vzeroupper 9731; NoVLX-NEXT: retq 9732entry: 9733 %0 = bitcast <2 x i64> %__a to <16 x i8> 9734 %load = load <2 x i64>, ptr %__b 9735 %1 = bitcast <2 x i64> %load to <16 x i8> 9736 %2 = icmp sge <16 x i8> %0, %1 9737 %3 = bitcast i16 %__u to <16 x i1> 9738 %4 = and <16 x i1> %2, %3 9739 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9740 %6 = bitcast <32 x i1> %5 to i32 9741 ret i32 %6 9742} 9743 9744 9745define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9746; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: 9747; VLX: # %bb.0: # %entry 9748; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 9749; VLX-NEXT: kmovq %k0, %rax 9750; VLX-NEXT: retq 9751; 9752; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: 9753; NoVLX: # %bb.0: # %entry 9754; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9755; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9756; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9757; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9758; NoVLX-NEXT: kmovw %k0, %eax 9759; NoVLX-NEXT: vzeroupper 9760; NoVLX-NEXT: retq 9761entry: 9762 %0 = bitcast <2 x i64> %__a to <16 x i8> 9763 %1 = bitcast <2 x i64> %__b to <16 x i8> 9764 %2 = icmp sge <16 x i8> %0, %1 9765 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9766 %4 = bitcast <64 x i1> %3 to i64 9767 ret i64 %4 9768} 9769 9770define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 9771; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: 9772; VLX: # %bb.0: # %entry 9773; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 9774; VLX-NEXT: kmovq %k0, %rax 9775; VLX-NEXT: retq 9776; 9777; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: 9778; NoVLX: # %bb.0: # %entry 9779; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 9780; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9781; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9782; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9783; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9784; NoVLX-NEXT: kmovw %k0, %eax 9785; NoVLX-NEXT: vzeroupper 9786; NoVLX-NEXT: retq 9787entry: 9788 %0 = bitcast <2 x i64> %__a to <16 x i8> 9789 %load = load <2 x i64>, ptr %__b 9790 %1 = bitcast <2 x i64> %load to <16 x i8> 9791 %2 = icmp sge <16 x i8> %0, %1 9792 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9793 %4 = bitcast <64 x i1> %3 to i64 9794 ret i64 %4 9795} 9796 9797define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 9798; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: 9799; VLX: # %bb.0: # %entry 9800; VLX-NEXT: kmovd %edi, %k1 9801; VLX-NEXT: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} 9802; VLX-NEXT: kmovq %k0, %rax 9803; VLX-NEXT: retq 9804; 9805; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: 9806; NoVLX: # %bb.0: # %entry 9807; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9808; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9809; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9810; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9811; NoVLX-NEXT: kmovw %k0, %eax 9812; NoVLX-NEXT: andl %edi, %eax 9813; NoVLX-NEXT: vzeroupper 9814; NoVLX-NEXT: retq 9815entry: 9816 %0 = bitcast <2 x i64> %__a to <16 x i8> 9817 %1 = bitcast <2 x i64> %__b to <16 x i8> 9818 %2 = icmp sge <16 x i8> %0, %1 9819 %3 = bitcast i16 %__u to <16 x i1> 9820 %4 = and <16 x i1> %2, %3 9821 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9822 %6 = bitcast <64 x i1> %5 to i64 9823 ret i64 %6 9824} 9825 9826define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 9827; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: 9828; VLX: # %bb.0: # %entry 9829; VLX-NEXT: kmovd %edi, %k1 9830; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} 9831; VLX-NEXT: kmovq %k0, %rax 9832; VLX-NEXT: retq 9833; 9834; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: 9835; NoVLX: # %bb.0: # %entry 9836; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 9837; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 9838; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9839; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9840; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9841; NoVLX-NEXT: kmovw %k0, %eax 9842; NoVLX-NEXT: andl %edi, %eax 9843; NoVLX-NEXT: vzeroupper 9844; NoVLX-NEXT: retq 9845entry: 9846 %0 = bitcast <2 x i64> %__a to <16 x i8> 9847 %load = load <2 x i64>, ptr %__b 9848 %1 = bitcast <2 x i64> %load to <16 x i8> 9849 %2 = icmp sge <16 x i8> %0, %1 9850 %3 = bitcast i16 %__u to <16 x i1> 9851 %4 = and <16 x i1> %2, %3 9852 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 9853 %6 = bitcast <64 x i1> %5 to i64 9854 ret i64 %6 9855} 9856 9857 9858define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 9859; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: 9860; VLX: # %bb.0: # %entry 9861; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 9862; VLX-NEXT: kmovq %k0, %rax 9863; VLX-NEXT: vzeroupper 9864; VLX-NEXT: retq 9865; 9866; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: 9867; NoVLX: # %bb.0: # %entry 9868; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 9869; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9870; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 9871; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 9872; NoVLX-NEXT: kmovw %k0, %ecx 9873; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 9874; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9875; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9876; NoVLX-NEXT: kmovw %k0, %eax 9877; NoVLX-NEXT: shll $16, %eax 9878; NoVLX-NEXT: orl %ecx, %eax 9879; NoVLX-NEXT: vzeroupper 9880; NoVLX-NEXT: retq 9881entry: 9882 %0 = bitcast <4 x i64> %__a to <32 x i8> 9883 %1 = bitcast <4 x i64> %__b to <32 x i8> 9884 %2 = icmp sge <32 x i8> %0, %1 9885 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 9886 %4 = bitcast <64 x i1> %3 to i64 9887 ret i64 %4 9888} 9889 9890define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 9891; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: 9892; VLX: # %bb.0: # %entry 9893; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0 9894; VLX-NEXT: kmovq %k0, %rax 9895; VLX-NEXT: vzeroupper 9896; VLX-NEXT: retq 9897; 9898; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: 9899; NoVLX: # %bb.0: # %entry 9900; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 9901; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 9902; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9903; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 9904; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 9905; NoVLX-NEXT: kmovw %k0, %ecx 9906; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 9907; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9908; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9909; NoVLX-NEXT: kmovw %k0, %eax 9910; NoVLX-NEXT: shll $16, %eax 9911; NoVLX-NEXT: orl %ecx, %eax 9912; NoVLX-NEXT: vzeroupper 9913; NoVLX-NEXT: retq 9914entry: 9915 %0 = bitcast <4 x i64> %__a to <32 x i8> 9916 %load = load <4 x i64>, ptr %__b 9917 %1 = bitcast <4 x i64> %load to <32 x i8> 9918 %2 = icmp sge <32 x i8> %0, %1 9919 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 9920 %4 = bitcast <64 x i1> %3 to i64 9921 ret i64 %4 9922} 9923 9924define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 9925; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: 9926; VLX: # %bb.0: # %entry 9927; VLX-NEXT: kmovd %edi, %k1 9928; VLX-NEXT: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} 9929; VLX-NEXT: kmovq %k0, %rax 9930; VLX-NEXT: vzeroupper 9931; VLX-NEXT: retq 9932; 9933; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: 9934; NoVLX: # %bb.0: # %entry 9935; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 9936; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9937; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 9938; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 9939; NoVLX-NEXT: kmovw %k0, %eax 9940; NoVLX-NEXT: andl %edi, %eax 9941; NoVLX-NEXT: shrl $16, %edi 9942; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 9943; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9944; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9945; NoVLX-NEXT: kmovw %k0, %ecx 9946; NoVLX-NEXT: andl %edi, %ecx 9947; NoVLX-NEXT: shll $16, %ecx 9948; NoVLX-NEXT: movzwl %ax, %eax 9949; NoVLX-NEXT: orl %ecx, %eax 9950; NoVLX-NEXT: vzeroupper 9951; NoVLX-NEXT: retq 9952entry: 9953 %0 = bitcast <4 x i64> %__a to <32 x i8> 9954 %1 = bitcast <4 x i64> %__b to <32 x i8> 9955 %2 = icmp sge <32 x i8> %0, %1 9956 %3 = bitcast i32 %__u to <32 x i1> 9957 %4 = and <32 x i1> %2, %3 9958 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 9959 %6 = bitcast <64 x i1> %5 to i64 9960 ret i64 %6 9961} 9962 9963define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 9964; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: 9965; VLX: # %bb.0: # %entry 9966; VLX-NEXT: kmovd %edi, %k1 9967; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1} 9968; VLX-NEXT: kmovq %k0, %rax 9969; VLX-NEXT: vzeroupper 9970; VLX-NEXT: retq 9971; 9972; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: 9973; NoVLX: # %bb.0: # %entry 9974; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 9975; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 9976; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 9977; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 9978; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 9979; NoVLX-NEXT: kmovw %k0, %eax 9980; NoVLX-NEXT: andl %edi, %eax 9981; NoVLX-NEXT: shrl $16, %edi 9982; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 9983; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 9984; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 9985; NoVLX-NEXT: kmovw %k0, %ecx 9986; NoVLX-NEXT: andl %edi, %ecx 9987; NoVLX-NEXT: shll $16, %ecx 9988; NoVLX-NEXT: movzwl %ax, %eax 9989; NoVLX-NEXT: orl %ecx, %eax 9990; NoVLX-NEXT: vzeroupper 9991; NoVLX-NEXT: retq 9992entry: 9993 %0 = bitcast <4 x i64> %__a to <32 x i8> 9994 %load = load <4 x i64>, ptr %__b 9995 %1 = bitcast <4 x i64> %load to <32 x i8> 9996 %2 = icmp sge <32 x i8> %0, %1 9997 %3 = bitcast i32 %__u to <32 x i1> 9998 %4 = and <32 x i1> %2, %3 9999 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10000 %6 = bitcast <64 x i1> %5 to i64 10001 ret i64 %6 10002} 10003 10004 10005define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10006; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: 10007; VLX: # %bb.0: # %entry 10008; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10009; VLX-NEXT: kmovd %k0, %eax 10010; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10011; VLX-NEXT: retq 10012; 10013; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: 10014; NoVLX: # %bb.0: # %entry 10015; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10016; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10017; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10018; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10019; NoVLX-NEXT: kmovw %k0, %eax 10020; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10021; NoVLX-NEXT: vzeroupper 10022; NoVLX-NEXT: retq 10023entry: 10024 %0 = bitcast <2 x i64> %__a to <8 x i16> 10025 %1 = bitcast <2 x i64> %__b to <8 x i16> 10026 %2 = icmp sge <8 x i16> %0, %1 10027 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10028 %4 = bitcast <16 x i1> %3 to i16 10029 ret i16 %4 10030} 10031 10032define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10033; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: 10034; VLX: # %bb.0: # %entry 10035; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10036; VLX-NEXT: kmovd %k0, %eax 10037; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10038; VLX-NEXT: retq 10039; 10040; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: 10041; NoVLX: # %bb.0: # %entry 10042; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10043; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10044; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10045; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10046; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10047; NoVLX-NEXT: kmovw %k0, %eax 10048; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10049; NoVLX-NEXT: vzeroupper 10050; NoVLX-NEXT: retq 10051entry: 10052 %0 = bitcast <2 x i64> %__a to <8 x i16> 10053 %load = load <2 x i64>, ptr %__b 10054 %1 = bitcast <2 x i64> %load to <8 x i16> 10055 %2 = icmp sge <8 x i16> %0, %1 10056 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10057 %4 = bitcast <16 x i1> %3 to i16 10058 ret i16 %4 10059} 10060 10061define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10062; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: 10063; VLX: # %bb.0: # %entry 10064; VLX-NEXT: kmovd %edi, %k1 10065; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10066; VLX-NEXT: kmovd %k0, %eax 10067; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10068; VLX-NEXT: retq 10069; 10070; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: 10071; NoVLX: # %bb.0: # %entry 10072; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10073; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10074; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10075; NoVLX-NEXT: kmovw %edi, %k1 10076; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10077; NoVLX-NEXT: kmovw %k0, %eax 10078; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10079; NoVLX-NEXT: vzeroupper 10080; NoVLX-NEXT: retq 10081entry: 10082 %0 = bitcast <2 x i64> %__a to <8 x i16> 10083 %1 = bitcast <2 x i64> %__b to <8 x i16> 10084 %2 = icmp sge <8 x i16> %0, %1 10085 %3 = bitcast i8 %__u to <8 x i1> 10086 %4 = and <8 x i1> %2, %3 10087 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10088 %6 = bitcast <16 x i1> %5 to i16 10089 ret i16 %6 10090} 10091 10092define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 10093; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: 10094; VLX: # %bb.0: # %entry 10095; VLX-NEXT: kmovd %edi, %k1 10096; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10097; VLX-NEXT: kmovd %k0, %eax 10098; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10099; VLX-NEXT: retq 10100; 10101; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: 10102; NoVLX: # %bb.0: # %entry 10103; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10104; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10105; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10106; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10107; NoVLX-NEXT: kmovw %edi, %k1 10108; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10109; NoVLX-NEXT: kmovw %k0, %eax 10110; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10111; NoVLX-NEXT: vzeroupper 10112; NoVLX-NEXT: retq 10113entry: 10114 %0 = bitcast <2 x i64> %__a to <8 x i16> 10115 %load = load <2 x i64>, ptr %__b 10116 %1 = bitcast <2 x i64> %load to <8 x i16> 10117 %2 = icmp sge <8 x i16> %0, %1 10118 %3 = bitcast i8 %__u to <8 x i1> 10119 %4 = and <8 x i1> %2, %3 10120 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10121 %6 = bitcast <16 x i1> %5 to i16 10122 ret i16 %6 10123} 10124 10125 10126define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10127; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: 10128; VLX: # %bb.0: # %entry 10129; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10130; VLX-NEXT: kmovd %k0, %eax 10131; VLX-NEXT: retq 10132; 10133; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: 10134; NoVLX: # %bb.0: # %entry 10135; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10136; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10137; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10138; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10139; NoVLX-NEXT: kmovw %k0, %eax 10140; NoVLX-NEXT: vzeroupper 10141; NoVLX-NEXT: retq 10142entry: 10143 %0 = bitcast <2 x i64> %__a to <8 x i16> 10144 %1 = bitcast <2 x i64> %__b to <8 x i16> 10145 %2 = icmp sge <8 x i16> %0, %1 10146 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10147 %4 = bitcast <32 x i1> %3 to i32 10148 ret i32 %4 10149} 10150 10151define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10152; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: 10153; VLX: # %bb.0: # %entry 10154; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10155; VLX-NEXT: kmovd %k0, %eax 10156; VLX-NEXT: retq 10157; 10158; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: 10159; NoVLX: # %bb.0: # %entry 10160; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10161; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10162; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10163; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10164; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10165; NoVLX-NEXT: kmovw %k0, %eax 10166; NoVLX-NEXT: vzeroupper 10167; NoVLX-NEXT: retq 10168entry: 10169 %0 = bitcast <2 x i64> %__a to <8 x i16> 10170 %load = load <2 x i64>, ptr %__b 10171 %1 = bitcast <2 x i64> %load to <8 x i16> 10172 %2 = icmp sge <8 x i16> %0, %1 10173 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10174 %4 = bitcast <32 x i1> %3 to i32 10175 ret i32 %4 10176} 10177 10178define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10179; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: 10180; VLX: # %bb.0: # %entry 10181; VLX-NEXT: kmovd %edi, %k1 10182; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10183; VLX-NEXT: kmovd %k0, %eax 10184; VLX-NEXT: retq 10185; 10186; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: 10187; NoVLX: # %bb.0: # %entry 10188; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10189; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10190; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10191; NoVLX-NEXT: kmovw %edi, %k1 10192; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10193; NoVLX-NEXT: kmovw %k0, %eax 10194; NoVLX-NEXT: vzeroupper 10195; NoVLX-NEXT: retq 10196entry: 10197 %0 = bitcast <2 x i64> %__a to <8 x i16> 10198 %1 = bitcast <2 x i64> %__b to <8 x i16> 10199 %2 = icmp sge <8 x i16> %0, %1 10200 %3 = bitcast i8 %__u to <8 x i1> 10201 %4 = and <8 x i1> %2, %3 10202 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10203 %6 = bitcast <32 x i1> %5 to i32 10204 ret i32 %6 10205} 10206 10207define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 10208; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: 10209; VLX: # %bb.0: # %entry 10210; VLX-NEXT: kmovd %edi, %k1 10211; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10212; VLX-NEXT: kmovd %k0, %eax 10213; VLX-NEXT: retq 10214; 10215; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: 10216; NoVLX: # %bb.0: # %entry 10217; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10218; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10219; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10220; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10221; NoVLX-NEXT: kmovw %edi, %k1 10222; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10223; NoVLX-NEXT: kmovw %k0, %eax 10224; NoVLX-NEXT: vzeroupper 10225; NoVLX-NEXT: retq 10226entry: 10227 %0 = bitcast <2 x i64> %__a to <8 x i16> 10228 %load = load <2 x i64>, ptr %__b 10229 %1 = bitcast <2 x i64> %load to <8 x i16> 10230 %2 = icmp sge <8 x i16> %0, %1 10231 %3 = bitcast i8 %__u to <8 x i1> 10232 %4 = and <8 x i1> %2, %3 10233 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10234 %6 = bitcast <32 x i1> %5 to i32 10235 ret i32 %6 10236} 10237 10238 10239define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10240; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: 10241; VLX: # %bb.0: # %entry 10242; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 10243; VLX-NEXT: kmovq %k0, %rax 10244; VLX-NEXT: retq 10245; 10246; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: 10247; NoVLX: # %bb.0: # %entry 10248; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10249; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10250; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10251; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10252; NoVLX-NEXT: kmovw %k0, %eax 10253; NoVLX-NEXT: vzeroupper 10254; NoVLX-NEXT: retq 10255entry: 10256 %0 = bitcast <2 x i64> %__a to <8 x i16> 10257 %1 = bitcast <2 x i64> %__b to <8 x i16> 10258 %2 = icmp sge <8 x i16> %0, %1 10259 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10260 %4 = bitcast <64 x i1> %3 to i64 10261 ret i64 %4 10262} 10263 10264define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10265; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: 10266; VLX: # %bb.0: # %entry 10267; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 10268; VLX-NEXT: kmovq %k0, %rax 10269; VLX-NEXT: retq 10270; 10271; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: 10272; NoVLX: # %bb.0: # %entry 10273; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10274; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10275; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10276; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10277; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 10278; NoVLX-NEXT: kmovw %k0, %eax 10279; NoVLX-NEXT: vzeroupper 10280; NoVLX-NEXT: retq 10281entry: 10282 %0 = bitcast <2 x i64> %__a to <8 x i16> 10283 %load = load <2 x i64>, ptr %__b 10284 %1 = bitcast <2 x i64> %load to <8 x i16> 10285 %2 = icmp sge <8 x i16> %0, %1 10286 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10287 %4 = bitcast <64 x i1> %3 to i64 10288 ret i64 %4 10289} 10290 10291define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10292; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: 10293; VLX: # %bb.0: # %entry 10294; VLX-NEXT: kmovd %edi, %k1 10295; VLX-NEXT: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} 10296; VLX-NEXT: kmovq %k0, %rax 10297; VLX-NEXT: retq 10298; 10299; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: 10300; NoVLX: # %bb.0: # %entry 10301; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10302; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10303; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10304; NoVLX-NEXT: kmovw %edi, %k1 10305; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10306; NoVLX-NEXT: kmovw %k0, %eax 10307; NoVLX-NEXT: vzeroupper 10308; NoVLX-NEXT: retq 10309entry: 10310 %0 = bitcast <2 x i64> %__a to <8 x i16> 10311 %1 = bitcast <2 x i64> %__b to <8 x i16> 10312 %2 = icmp sge <8 x i16> %0, %1 10313 %3 = bitcast i8 %__u to <8 x i1> 10314 %4 = and <8 x i1> %2, %3 10315 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10316 %6 = bitcast <64 x i1> %5 to i64 10317 ret i64 %6 10318} 10319 10320define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 10321; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: 10322; VLX: # %bb.0: # %entry 10323; VLX-NEXT: kmovd %edi, %k1 10324; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} 10325; VLX-NEXT: kmovq %k0, %rax 10326; VLX-NEXT: retq 10327; 10328; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: 10329; NoVLX: # %bb.0: # %entry 10330; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10331; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 10332; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10333; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 10334; NoVLX-NEXT: kmovw %edi, %k1 10335; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 10336; NoVLX-NEXT: kmovw %k0, %eax 10337; NoVLX-NEXT: vzeroupper 10338; NoVLX-NEXT: retq 10339entry: 10340 %0 = bitcast <2 x i64> %__a to <8 x i16> 10341 %load = load <2 x i64>, ptr %__b 10342 %1 = bitcast <2 x i64> %load to <8 x i16> 10343 %2 = icmp sge <8 x i16> %0, %1 10344 %3 = bitcast i8 %__u to <8 x i1> 10345 %4 = and <8 x i1> %2, %3 10346 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 10347 %6 = bitcast <64 x i1> %5 to i64 10348 ret i64 %6 10349} 10350 10351 10352define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10353; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: 10354; VLX: # %bb.0: # %entry 10355; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 10356; VLX-NEXT: kmovd %k0, %eax 10357; VLX-NEXT: vzeroupper 10358; VLX-NEXT: retq 10359; 10360; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: 10361; NoVLX: # %bb.0: # %entry 10362; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10363; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10364; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10365; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10366; NoVLX-NEXT: kmovw %k0, %eax 10367; NoVLX-NEXT: vzeroupper 10368; NoVLX-NEXT: retq 10369entry: 10370 %0 = bitcast <4 x i64> %__a to <16 x i16> 10371 %1 = bitcast <4 x i64> %__b to <16 x i16> 10372 %2 = icmp sge <16 x i16> %0, %1 10373 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10374 %4 = bitcast <32 x i1> %3 to i32 10375 ret i32 %4 10376} 10377 10378define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 10379; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: 10380; VLX: # %bb.0: # %entry 10381; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 10382; VLX-NEXT: kmovd %k0, %eax 10383; VLX-NEXT: vzeroupper 10384; VLX-NEXT: retq 10385; 10386; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: 10387; NoVLX: # %bb.0: # %entry 10388; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10389; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10390; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10391; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10392; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10393; NoVLX-NEXT: kmovw %k0, %eax 10394; NoVLX-NEXT: vzeroupper 10395; NoVLX-NEXT: retq 10396entry: 10397 %0 = bitcast <4 x i64> %__a to <16 x i16> 10398 %load = load <4 x i64>, ptr %__b 10399 %1 = bitcast <4 x i64> %load to <16 x i16> 10400 %2 = icmp sge <16 x i16> %0, %1 10401 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10402 %4 = bitcast <32 x i1> %3 to i32 10403 ret i32 %4 10404} 10405 10406define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10407; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: 10408; VLX: # %bb.0: # %entry 10409; VLX-NEXT: kmovd %edi, %k1 10410; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} 10411; VLX-NEXT: kmovd %k0, %eax 10412; VLX-NEXT: vzeroupper 10413; VLX-NEXT: retq 10414; 10415; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: 10416; NoVLX: # %bb.0: # %entry 10417; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10418; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10419; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10420; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10421; NoVLX-NEXT: kmovw %k0, %eax 10422; NoVLX-NEXT: andl %edi, %eax 10423; NoVLX-NEXT: vzeroupper 10424; NoVLX-NEXT: retq 10425entry: 10426 %0 = bitcast <4 x i64> %__a to <16 x i16> 10427 %1 = bitcast <4 x i64> %__b to <16 x i16> 10428 %2 = icmp sge <16 x i16> %0, %1 10429 %3 = bitcast i16 %__u to <16 x i1> 10430 %4 = and <16 x i1> %2, %3 10431 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10432 %6 = bitcast <32 x i1> %5 to i32 10433 ret i32 %6 10434} 10435 10436define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 10437; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: 10438; VLX: # %bb.0: # %entry 10439; VLX-NEXT: kmovd %edi, %k1 10440; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} 10441; VLX-NEXT: kmovd %k0, %eax 10442; VLX-NEXT: vzeroupper 10443; VLX-NEXT: retq 10444; 10445; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: 10446; NoVLX: # %bb.0: # %entry 10447; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10448; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10449; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10450; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10451; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10452; NoVLX-NEXT: kmovw %k0, %eax 10453; NoVLX-NEXT: andl %edi, %eax 10454; NoVLX-NEXT: vzeroupper 10455; NoVLX-NEXT: retq 10456entry: 10457 %0 = bitcast <4 x i64> %__a to <16 x i16> 10458 %load = load <4 x i64>, ptr %__b 10459 %1 = bitcast <4 x i64> %load to <16 x i16> 10460 %2 = icmp sge <16 x i16> %0, %1 10461 %3 = bitcast i16 %__u to <16 x i1> 10462 %4 = and <16 x i1> %2, %3 10463 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10464 %6 = bitcast <32 x i1> %5 to i32 10465 ret i32 %6 10466} 10467 10468 10469define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10470; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: 10471; VLX: # %bb.0: # %entry 10472; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 10473; VLX-NEXT: kmovq %k0, %rax 10474; VLX-NEXT: vzeroupper 10475; VLX-NEXT: retq 10476; 10477; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: 10478; NoVLX: # %bb.0: # %entry 10479; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10480; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10481; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10482; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10483; NoVLX-NEXT: kmovw %k0, %eax 10484; NoVLX-NEXT: vzeroupper 10485; NoVLX-NEXT: retq 10486entry: 10487 %0 = bitcast <4 x i64> %__a to <16 x i16> 10488 %1 = bitcast <4 x i64> %__b to <16 x i16> 10489 %2 = icmp sge <16 x i16> %0, %1 10490 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10491 %4 = bitcast <64 x i1> %3 to i64 10492 ret i64 %4 10493} 10494 10495define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 10496; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: 10497; VLX: # %bb.0: # %entry 10498; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 10499; VLX-NEXT: kmovq %k0, %rax 10500; VLX-NEXT: vzeroupper 10501; VLX-NEXT: retq 10502; 10503; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: 10504; NoVLX: # %bb.0: # %entry 10505; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10506; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10507; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10508; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10509; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10510; NoVLX-NEXT: kmovw %k0, %eax 10511; NoVLX-NEXT: vzeroupper 10512; NoVLX-NEXT: retq 10513entry: 10514 %0 = bitcast <4 x i64> %__a to <16 x i16> 10515 %load = load <4 x i64>, ptr %__b 10516 %1 = bitcast <4 x i64> %load to <16 x i16> 10517 %2 = icmp sge <16 x i16> %0, %1 10518 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10519 %4 = bitcast <64 x i1> %3 to i64 10520 ret i64 %4 10521} 10522 10523define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 10524; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: 10525; VLX: # %bb.0: # %entry 10526; VLX-NEXT: kmovd %edi, %k1 10527; VLX-NEXT: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} 10528; VLX-NEXT: kmovq %k0, %rax 10529; VLX-NEXT: vzeroupper 10530; VLX-NEXT: retq 10531; 10532; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: 10533; NoVLX: # %bb.0: # %entry 10534; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10535; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10536; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10537; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10538; NoVLX-NEXT: kmovw %k0, %eax 10539; NoVLX-NEXT: andl %edi, %eax 10540; NoVLX-NEXT: vzeroupper 10541; NoVLX-NEXT: retq 10542entry: 10543 %0 = bitcast <4 x i64> %__a to <16 x i16> 10544 %1 = bitcast <4 x i64> %__b to <16 x i16> 10545 %2 = icmp sge <16 x i16> %0, %1 10546 %3 = bitcast i16 %__u to <16 x i1> 10547 %4 = and <16 x i1> %2, %3 10548 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10549 %6 = bitcast <64 x i1> %5 to i64 10550 ret i64 %6 10551} 10552 10553define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 10554; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: 10555; VLX: # %bb.0: # %entry 10556; VLX-NEXT: kmovd %edi, %k1 10557; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} 10558; VLX-NEXT: kmovq %k0, %rax 10559; VLX-NEXT: vzeroupper 10560; VLX-NEXT: retq 10561; 10562; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: 10563; NoVLX: # %bb.0: # %entry 10564; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10565; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10566; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10567; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10568; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10569; NoVLX-NEXT: kmovw %k0, %eax 10570; NoVLX-NEXT: andl %edi, %eax 10571; NoVLX-NEXT: vzeroupper 10572; NoVLX-NEXT: retq 10573entry: 10574 %0 = bitcast <4 x i64> %__a to <16 x i16> 10575 %load = load <4 x i64>, ptr %__b 10576 %1 = bitcast <4 x i64> %load to <16 x i16> 10577 %2 = icmp sge <16 x i16> %0, %1 10578 %3 = bitcast i16 %__u to <16 x i1> 10579 %4 = and <16 x i1> %2, %3 10580 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 10581 %6 = bitcast <64 x i1> %5 to i64 10582 ret i64 %6 10583} 10584 10585 10586define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 10587; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: 10588; VLX: # %bb.0: # %entry 10589; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 10590; VLX-NEXT: kmovq %k0, %rax 10591; VLX-NEXT: vzeroupper 10592; VLX-NEXT: retq 10593; 10594; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: 10595; NoVLX: # %bb.0: # %entry 10596; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 10597; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 10598; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 10599; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 10600; NoVLX-NEXT: kmovw %k0, %ecx 10601; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10602; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 10603; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10604; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10605; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10606; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10607; NoVLX-NEXT: kmovw %k0, %eax 10608; NoVLX-NEXT: shll $16, %eax 10609; NoVLX-NEXT: orl %ecx, %eax 10610; NoVLX-NEXT: vzeroupper 10611; NoVLX-NEXT: retq 10612entry: 10613 %0 = bitcast <8 x i64> %__a to <32 x i16> 10614 %1 = bitcast <8 x i64> %__b to <32 x i16> 10615 %2 = icmp sge <32 x i16> %0, %1 10616 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10617 %4 = bitcast <64 x i1> %3 to i64 10618 ret i64 %4 10619} 10620 10621define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 10622; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: 10623; VLX: # %bb.0: # %entry 10624; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0 10625; VLX-NEXT: kmovq %k0, %rax 10626; VLX-NEXT: vzeroupper 10627; VLX-NEXT: retq 10628; 10629; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: 10630; NoVLX: # %bb.0: # %entry 10631; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 10632; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 10633; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 10634; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 10635; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 10636; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10637; NoVLX-NEXT: kmovw %k0, %ecx 10638; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10639; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 10640; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10641; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10642; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10643; NoVLX-NEXT: kmovw %k0, %eax 10644; NoVLX-NEXT: shll $16, %eax 10645; NoVLX-NEXT: orl %ecx, %eax 10646; NoVLX-NEXT: vzeroupper 10647; NoVLX-NEXT: retq 10648entry: 10649 %0 = bitcast <8 x i64> %__a to <32 x i16> 10650 %load = load <8 x i64>, ptr %__b 10651 %1 = bitcast <8 x i64> %load to <32 x i16> 10652 %2 = icmp sge <32 x i16> %0, %1 10653 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10654 %4 = bitcast <64 x i1> %3 to i64 10655 ret i64 %4 10656} 10657 10658define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 10659; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: 10660; VLX: # %bb.0: # %entry 10661; VLX-NEXT: kmovd %edi, %k1 10662; VLX-NEXT: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} 10663; VLX-NEXT: kmovq %k0, %rax 10664; VLX-NEXT: vzeroupper 10665; VLX-NEXT: retq 10666; 10667; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: 10668; NoVLX: # %bb.0: # %entry 10669; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm2 10670; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 10671; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 10672; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 10673; NoVLX-NEXT: kmovw %k0, %eax 10674; NoVLX-NEXT: andl %edi, %eax 10675; NoVLX-NEXT: shrl $16, %edi 10676; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10677; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 10678; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10679; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10680; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10681; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10682; NoVLX-NEXT: kmovw %k0, %ecx 10683; NoVLX-NEXT: andl %edi, %ecx 10684; NoVLX-NEXT: shll $16, %ecx 10685; NoVLX-NEXT: movzwl %ax, %eax 10686; NoVLX-NEXT: orl %ecx, %eax 10687; NoVLX-NEXT: vzeroupper 10688; NoVLX-NEXT: retq 10689entry: 10690 %0 = bitcast <8 x i64> %__a to <32 x i16> 10691 %1 = bitcast <8 x i64> %__b to <32 x i16> 10692 %2 = icmp sge <32 x i16> %0, %1 10693 %3 = bitcast i32 %__u to <32 x i1> 10694 %4 = and <32 x i1> %2, %3 10695 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10696 %6 = bitcast <64 x i1> %5 to i64 10697 ret i64 %6 10698} 10699 10700define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 10701; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: 10702; VLX: # %bb.0: # %entry 10703; VLX-NEXT: kmovd %edi, %k1 10704; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1} 10705; VLX-NEXT: kmovq %k0, %rax 10706; VLX-NEXT: vzeroupper 10707; VLX-NEXT: retq 10708; 10709; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: 10710; NoVLX: # %bb.0: # %entry 10711; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 10712; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm1 10713; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 10714; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 10715; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 10716; NoVLX-NEXT: kmovw %k0, %eax 10717; NoVLX-NEXT: andl %edi, %eax 10718; NoVLX-NEXT: shrl $16, %edi 10719; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm1 10720; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 10721; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 10722; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 10723; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 10724; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 10725; NoVLX-NEXT: kmovw %k0, %ecx 10726; NoVLX-NEXT: andl %edi, %ecx 10727; NoVLX-NEXT: shll $16, %ecx 10728; NoVLX-NEXT: movzwl %ax, %eax 10729; NoVLX-NEXT: orl %ecx, %eax 10730; NoVLX-NEXT: vzeroupper 10731; NoVLX-NEXT: retq 10732entry: 10733 %0 = bitcast <8 x i64> %__a to <32 x i16> 10734 %load = load <8 x i64>, ptr %__b 10735 %1 = bitcast <8 x i64> %load to <32 x i16> 10736 %2 = icmp sge <32 x i16> %0, %1 10737 %3 = bitcast i32 %__u to <32 x i1> 10738 %4 = and <32 x i1> %2, %3 10739 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 10740 %6 = bitcast <64 x i1> %5 to i64 10741 ret i64 %6 10742} 10743 10744 10745define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10746; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: 10747; VLX: # %bb.0: # %entry 10748; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 10749; VLX-NEXT: kmovd %k0, %eax 10750; VLX-NEXT: # kill: def $al killed $al killed $eax 10751; VLX-NEXT: retq 10752; 10753; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: 10754; NoVLX: # %bb.0: # %entry 10755; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 10756; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10757; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10758; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10759; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10760; NoVLX-NEXT: kmovw %k0, %eax 10761; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10762; NoVLX-NEXT: vzeroupper 10763; NoVLX-NEXT: retq 10764entry: 10765 %0 = bitcast <2 x i64> %__a to <4 x i32> 10766 %1 = bitcast <2 x i64> %__b to <4 x i32> 10767 %2 = icmp sge <4 x i32> %0, %1 10768 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10769 %4 = bitcast <8 x i1> %3 to i8 10770 ret i8 %4 10771} 10772 10773define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10774; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: 10775; VLX: # %bb.0: # %entry 10776; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 10777; VLX-NEXT: kmovd %k0, %eax 10778; VLX-NEXT: # kill: def $al killed $al killed $eax 10779; VLX-NEXT: retq 10780; 10781; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: 10782; NoVLX: # %bb.0: # %entry 10783; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10784; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10785; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10786; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10787; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10788; NoVLX-NEXT: kmovw %k0, %eax 10789; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10790; NoVLX-NEXT: vzeroupper 10791; NoVLX-NEXT: retq 10792entry: 10793 %0 = bitcast <2 x i64> %__a to <4 x i32> 10794 %load = load <2 x i64>, ptr %__b 10795 %1 = bitcast <2 x i64> %load to <4 x i32> 10796 %2 = icmp sge <4 x i32> %0, %1 10797 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10798 %4 = bitcast <8 x i1> %3 to i8 10799 ret i8 %4 10800} 10801 10802define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10803; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: 10804; VLX: # %bb.0: # %entry 10805; VLX-NEXT: kmovd %edi, %k1 10806; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 10807; VLX-NEXT: kmovd %k0, %eax 10808; VLX-NEXT: # kill: def $al killed $al killed $eax 10809; VLX-NEXT: retq 10810; 10811; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: 10812; NoVLX: # %bb.0: # %entry 10813; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 10814; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10815; NoVLX-NEXT: kmovw %edi, %k1 10816; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 10817; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10818; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10819; NoVLX-NEXT: kmovw %k0, %eax 10820; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10821; NoVLX-NEXT: vzeroupper 10822; NoVLX-NEXT: retq 10823entry: 10824 %0 = bitcast <2 x i64> %__a to <4 x i32> 10825 %1 = bitcast <2 x i64> %__b to <4 x i32> 10826 %2 = icmp sge <4 x i32> %0, %1 10827 %3 = bitcast i8 %__u to <8 x i1> 10828 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 10829 %4 = and <4 x i1> %2, %extract.i 10830 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10831 %6 = bitcast <8 x i1> %5 to i8 10832 ret i8 %6 10833} 10834 10835define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 10836; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: 10837; VLX: # %bb.0: # %entry 10838; VLX-NEXT: kmovd %edi, %k1 10839; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 10840; VLX-NEXT: kmovd %k0, %eax 10841; VLX-NEXT: # kill: def $al killed $al killed $eax 10842; VLX-NEXT: retq 10843; 10844; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: 10845; NoVLX: # %bb.0: # %entry 10846; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10847; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 10848; NoVLX-NEXT: kmovw %edi, %k1 10849; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 10850; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10851; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10852; NoVLX-NEXT: kmovw %k0, %eax 10853; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10854; NoVLX-NEXT: vzeroupper 10855; NoVLX-NEXT: retq 10856entry: 10857 %0 = bitcast <2 x i64> %__a to <4 x i32> 10858 %load = load <2 x i64>, ptr %__b 10859 %1 = bitcast <2 x i64> %load to <4 x i32> 10860 %2 = icmp sge <4 x i32> %0, %1 10861 %3 = bitcast i8 %__u to <8 x i1> 10862 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 10863 %4 = and <4 x i1> %2, %extract.i 10864 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10865 %6 = bitcast <8 x i1> %5 to i8 10866 ret i8 %6 10867} 10868 10869 10870define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10871; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: 10872; VLX: # %bb.0: # %entry 10873; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 10874; VLX-NEXT: kmovd %k0, %eax 10875; VLX-NEXT: # kill: def $al killed $al killed $eax 10876; VLX-NEXT: retq 10877; 10878; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: 10879; NoVLX: # %bb.0: # %entry 10880; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10881; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 10882; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10883; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10884; NoVLX-NEXT: kmovw %k0, %eax 10885; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10886; NoVLX-NEXT: vzeroupper 10887; NoVLX-NEXT: retq 10888entry: 10889 %0 = bitcast <2 x i64> %__a to <4 x i32> 10890 %load = load i32, ptr %__b 10891 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 10892 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 10893 %2 = icmp sge <4 x i32> %0, %1 10894 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10895 %4 = bitcast <8 x i1> %3 to i8 10896 ret i8 %4 10897} 10898 10899define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 10900; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: 10901; VLX: # %bb.0: # %entry 10902; VLX-NEXT: kmovd %edi, %k1 10903; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 10904; VLX-NEXT: kmovd %k0, %eax 10905; VLX-NEXT: # kill: def $al killed $al killed $eax 10906; VLX-NEXT: retq 10907; 10908; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: 10909; NoVLX: # %bb.0: # %entry 10910; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10911; NoVLX-NEXT: kmovw %edi, %k1 10912; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 10913; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10914; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10915; NoVLX-NEXT: kmovw %k0, %eax 10916; NoVLX-NEXT: # kill: def $al killed $al killed $eax 10917; NoVLX-NEXT: vzeroupper 10918; NoVLX-NEXT: retq 10919entry: 10920 %0 = bitcast <2 x i64> %__a to <4 x i32> 10921 %load = load i32, ptr %__b 10922 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 10923 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 10924 %2 = icmp sge <4 x i32> %0, %1 10925 %3 = bitcast i8 %__u to <8 x i1> 10926 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 10927 %4 = and <4 x i1> %extract.i, %2 10928 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 10929 %6 = bitcast <8 x i1> %5 to i8 10930 ret i8 %6 10931} 10932 10933 10934define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10935; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: 10936; VLX: # %bb.0: # %entry 10937; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 10938; VLX-NEXT: kmovd %k0, %eax 10939; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10940; VLX-NEXT: retq 10941; 10942; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: 10943; NoVLX: # %bb.0: # %entry 10944; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 10945; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10946; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10947; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10948; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10949; NoVLX-NEXT: kmovw %k0, %eax 10950; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10951; NoVLX-NEXT: vzeroupper 10952; NoVLX-NEXT: retq 10953entry: 10954 %0 = bitcast <2 x i64> %__a to <4 x i32> 10955 %1 = bitcast <2 x i64> %__b to <4 x i32> 10956 %2 = icmp sge <4 x i32> %0, %1 10957 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 10958 %4 = bitcast <16 x i1> %3 to i16 10959 ret i16 %4 10960} 10961 10962define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 10963; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: 10964; VLX: # %bb.0: # %entry 10965; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 10966; VLX-NEXT: kmovd %k0, %eax 10967; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10968; VLX-NEXT: retq 10969; 10970; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: 10971; NoVLX: # %bb.0: # %entry 10972; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 10973; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 10974; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 10975; NoVLX-NEXT: kshiftlw $12, %k0, %k0 10976; NoVLX-NEXT: kshiftrw $12, %k0, %k0 10977; NoVLX-NEXT: kmovw %k0, %eax 10978; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 10979; NoVLX-NEXT: vzeroupper 10980; NoVLX-NEXT: retq 10981entry: 10982 %0 = bitcast <2 x i64> %__a to <4 x i32> 10983 %load = load <2 x i64>, ptr %__b 10984 %1 = bitcast <2 x i64> %load to <4 x i32> 10985 %2 = icmp sge <4 x i32> %0, %1 10986 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 10987 %4 = bitcast <16 x i1> %3 to i16 10988 ret i16 %4 10989} 10990 10991define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 10992; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: 10993; VLX: # %bb.0: # %entry 10994; VLX-NEXT: kmovd %edi, %k1 10995; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 10996; VLX-NEXT: kmovd %k0, %eax 10997; VLX-NEXT: # kill: def $ax killed $ax killed $eax 10998; VLX-NEXT: retq 10999; 11000; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: 11001; NoVLX: # %bb.0: # %entry 11002; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11003; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11004; NoVLX-NEXT: kmovw %edi, %k1 11005; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11006; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11007; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11008; NoVLX-NEXT: kmovw %k0, %eax 11009; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11010; NoVLX-NEXT: vzeroupper 11011; NoVLX-NEXT: retq 11012entry: 11013 %0 = bitcast <2 x i64> %__a to <4 x i32> 11014 %1 = bitcast <2 x i64> %__b to <4 x i32> 11015 %2 = icmp sge <4 x i32> %0, %1 11016 %3 = bitcast i8 %__u to <8 x i1> 11017 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11018 %4 = and <4 x i1> %2, %extract.i 11019 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11020 %6 = bitcast <16 x i1> %5 to i16 11021 ret i16 %6 11022} 11023 11024define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11025; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: 11026; VLX: # %bb.0: # %entry 11027; VLX-NEXT: kmovd %edi, %k1 11028; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11029; VLX-NEXT: kmovd %k0, %eax 11030; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11031; VLX-NEXT: retq 11032; 11033; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: 11034; NoVLX: # %bb.0: # %entry 11035; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11036; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11037; NoVLX-NEXT: kmovw %edi, %k1 11038; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11039; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11040; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11041; NoVLX-NEXT: kmovw %k0, %eax 11042; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11043; NoVLX-NEXT: vzeroupper 11044; NoVLX-NEXT: retq 11045entry: 11046 %0 = bitcast <2 x i64> %__a to <4 x i32> 11047 %load = load <2 x i64>, ptr %__b 11048 %1 = bitcast <2 x i64> %load to <4 x i32> 11049 %2 = icmp sge <4 x i32> %0, %1 11050 %3 = bitcast i8 %__u to <8 x i1> 11051 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11052 %4 = and <4 x i1> %2, %extract.i 11053 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11054 %6 = bitcast <16 x i1> %5 to i16 11055 ret i16 %6 11056} 11057 11058 11059define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 11060; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: 11061; VLX: # %bb.0: # %entry 11062; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11063; VLX-NEXT: kmovd %k0, %eax 11064; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11065; VLX-NEXT: retq 11066; 11067; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: 11068; NoVLX: # %bb.0: # %entry 11069; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11070; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11071; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11072; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11073; NoVLX-NEXT: kmovw %k0, %eax 11074; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11075; NoVLX-NEXT: vzeroupper 11076; NoVLX-NEXT: retq 11077entry: 11078 %0 = bitcast <2 x i64> %__a to <4 x i32> 11079 %load = load i32, ptr %__b 11080 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11081 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11082 %2 = icmp sge <4 x i32> %0, %1 11083 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11084 %4 = bitcast <16 x i1> %3 to i16 11085 ret i16 %4 11086} 11087 11088define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11089; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: 11090; VLX: # %bb.0: # %entry 11091; VLX-NEXT: kmovd %edi, %k1 11092; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11093; VLX-NEXT: kmovd %k0, %eax 11094; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11095; VLX-NEXT: retq 11096; 11097; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: 11098; NoVLX: # %bb.0: # %entry 11099; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11100; NoVLX-NEXT: kmovw %edi, %k1 11101; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 11102; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11103; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11104; NoVLX-NEXT: kmovw %k0, %eax 11105; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11106; NoVLX-NEXT: vzeroupper 11107; NoVLX-NEXT: retq 11108entry: 11109 %0 = bitcast <2 x i64> %__a to <4 x i32> 11110 %load = load i32, ptr %__b 11111 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11112 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11113 %2 = icmp sge <4 x i32> %0, %1 11114 %3 = bitcast i8 %__u to <8 x i1> 11115 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11116 %4 = and <4 x i1> %extract.i, %2 11117 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11118 %6 = bitcast <16 x i1> %5 to i16 11119 ret i16 %6 11120} 11121 11122 11123define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11124; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: 11125; VLX: # %bb.0: # %entry 11126; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 11127; VLX-NEXT: kmovd %k0, %eax 11128; VLX-NEXT: retq 11129; 11130; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: 11131; NoVLX: # %bb.0: # %entry 11132; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11133; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11134; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11135; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11136; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11137; NoVLX-NEXT: kmovw %k0, %eax 11138; NoVLX-NEXT: vzeroupper 11139; NoVLX-NEXT: retq 11140entry: 11141 %0 = bitcast <2 x i64> %__a to <4 x i32> 11142 %1 = bitcast <2 x i64> %__b to <4 x i32> 11143 %2 = icmp sge <4 x i32> %0, %1 11144 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11145 %4 = bitcast <32 x i1> %3 to i32 11146 ret i32 %4 11147} 11148 11149define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 11150; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: 11151; VLX: # %bb.0: # %entry 11152; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 11153; VLX-NEXT: kmovd %k0, %eax 11154; VLX-NEXT: retq 11155; 11156; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: 11157; NoVLX: # %bb.0: # %entry 11158; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11159; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 11160; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11161; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11162; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11163; NoVLX-NEXT: kmovw %k0, %eax 11164; NoVLX-NEXT: vzeroupper 11165; NoVLX-NEXT: retq 11166entry: 11167 %0 = bitcast <2 x i64> %__a to <4 x i32> 11168 %load = load <2 x i64>, ptr %__b 11169 %1 = bitcast <2 x i64> %load to <4 x i32> 11170 %2 = icmp sge <4 x i32> %0, %1 11171 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11172 %4 = bitcast <32 x i1> %3 to i32 11173 ret i32 %4 11174} 11175 11176define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11177; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: 11178; VLX: # %bb.0: # %entry 11179; VLX-NEXT: kmovd %edi, %k1 11180; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 11181; VLX-NEXT: kmovd %k0, %eax 11182; VLX-NEXT: retq 11183; 11184; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: 11185; NoVLX: # %bb.0: # %entry 11186; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11187; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11188; NoVLX-NEXT: kmovw %edi, %k1 11189; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11190; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11191; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11192; NoVLX-NEXT: kmovw %k0, %eax 11193; NoVLX-NEXT: vzeroupper 11194; NoVLX-NEXT: retq 11195entry: 11196 %0 = bitcast <2 x i64> %__a to <4 x i32> 11197 %1 = bitcast <2 x i64> %__b to <4 x i32> 11198 %2 = icmp sge <4 x i32> %0, %1 11199 %3 = bitcast i8 %__u to <8 x i1> 11200 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11201 %4 = and <4 x i1> %2, %extract.i 11202 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11203 %6 = bitcast <32 x i1> %5 to i32 11204 ret i32 %6 11205} 11206 11207define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11208; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: 11209; VLX: # %bb.0: # %entry 11210; VLX-NEXT: kmovd %edi, %k1 11211; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11212; VLX-NEXT: kmovd %k0, %eax 11213; VLX-NEXT: retq 11214; 11215; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: 11216; NoVLX: # %bb.0: # %entry 11217; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11218; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11219; NoVLX-NEXT: kmovw %edi, %k1 11220; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11221; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11222; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11223; NoVLX-NEXT: kmovw %k0, %eax 11224; NoVLX-NEXT: vzeroupper 11225; NoVLX-NEXT: retq 11226entry: 11227 %0 = bitcast <2 x i64> %__a to <4 x i32> 11228 %load = load <2 x i64>, ptr %__b 11229 %1 = bitcast <2 x i64> %load to <4 x i32> 11230 %2 = icmp sge <4 x i32> %0, %1 11231 %3 = bitcast i8 %__u to <8 x i1> 11232 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11233 %4 = and <4 x i1> %2, %extract.i 11234 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11235 %6 = bitcast <32 x i1> %5 to i32 11236 ret i32 %6 11237} 11238 11239 11240define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 11241; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: 11242; VLX: # %bb.0: # %entry 11243; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11244; VLX-NEXT: kmovd %k0, %eax 11245; VLX-NEXT: retq 11246; 11247; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: 11248; NoVLX: # %bb.0: # %entry 11249; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11250; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11251; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11252; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11253; NoVLX-NEXT: kmovw %k0, %eax 11254; NoVLX-NEXT: vzeroupper 11255; NoVLX-NEXT: retq 11256entry: 11257 %0 = bitcast <2 x i64> %__a to <4 x i32> 11258 %load = load i32, ptr %__b 11259 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11260 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11261 %2 = icmp sge <4 x i32> %0, %1 11262 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11263 %4 = bitcast <32 x i1> %3 to i32 11264 ret i32 %4 11265} 11266 11267define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11268; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: 11269; VLX: # %bb.0: # %entry 11270; VLX-NEXT: kmovd %edi, %k1 11271; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11272; VLX-NEXT: kmovd %k0, %eax 11273; VLX-NEXT: retq 11274; 11275; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: 11276; NoVLX: # %bb.0: # %entry 11277; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11278; NoVLX-NEXT: kmovw %edi, %k1 11279; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 11280; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11281; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11282; NoVLX-NEXT: kmovw %k0, %eax 11283; NoVLX-NEXT: vzeroupper 11284; NoVLX-NEXT: retq 11285entry: 11286 %0 = bitcast <2 x i64> %__a to <4 x i32> 11287 %load = load i32, ptr %__b 11288 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11289 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11290 %2 = icmp sge <4 x i32> %0, %1 11291 %3 = bitcast i8 %__u to <8 x i1> 11292 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11293 %4 = and <4 x i1> %extract.i, %2 11294 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11295 %6 = bitcast <32 x i1> %5 to i32 11296 ret i32 %6 11297} 11298 11299 11300define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11301; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: 11302; VLX: # %bb.0: # %entry 11303; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 11304; VLX-NEXT: kmovq %k0, %rax 11305; VLX-NEXT: retq 11306; 11307; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: 11308; NoVLX: # %bb.0: # %entry 11309; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11310; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11311; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11312; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11313; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11314; NoVLX-NEXT: kmovw %k0, %eax 11315; NoVLX-NEXT: vzeroupper 11316; NoVLX-NEXT: retq 11317entry: 11318 %0 = bitcast <2 x i64> %__a to <4 x i32> 11319 %1 = bitcast <2 x i64> %__b to <4 x i32> 11320 %2 = icmp sge <4 x i32> %0, %1 11321 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11322 %4 = bitcast <64 x i1> %3 to i64 11323 ret i64 %4 11324} 11325 11326define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 11327; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: 11328; VLX: # %bb.0: # %entry 11329; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 11330; VLX-NEXT: kmovq %k0, %rax 11331; VLX-NEXT: retq 11332; 11333; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: 11334; NoVLX: # %bb.0: # %entry 11335; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11336; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 11337; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11338; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11339; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11340; NoVLX-NEXT: kmovw %k0, %eax 11341; NoVLX-NEXT: vzeroupper 11342; NoVLX-NEXT: retq 11343entry: 11344 %0 = bitcast <2 x i64> %__a to <4 x i32> 11345 %load = load <2 x i64>, ptr %__b 11346 %1 = bitcast <2 x i64> %load to <4 x i32> 11347 %2 = icmp sge <4 x i32> %0, %1 11348 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11349 %4 = bitcast <64 x i1> %3 to i64 11350 ret i64 %4 11351} 11352 11353define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 11354; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: 11355; VLX: # %bb.0: # %entry 11356; VLX-NEXT: kmovd %edi, %k1 11357; VLX-NEXT: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} 11358; VLX-NEXT: kmovq %k0, %rax 11359; VLX-NEXT: retq 11360; 11361; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: 11362; NoVLX: # %bb.0: # %entry 11363; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 11364; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11365; NoVLX-NEXT: kmovw %edi, %k1 11366; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11367; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11368; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11369; NoVLX-NEXT: kmovw %k0, %eax 11370; NoVLX-NEXT: vzeroupper 11371; NoVLX-NEXT: retq 11372entry: 11373 %0 = bitcast <2 x i64> %__a to <4 x i32> 11374 %1 = bitcast <2 x i64> %__b to <4 x i32> 11375 %2 = icmp sge <4 x i32> %0, %1 11376 %3 = bitcast i8 %__u to <8 x i1> 11377 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11378 %4 = and <4 x i1> %2, %extract.i 11379 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11380 %6 = bitcast <64 x i1> %5 to i64 11381 ret i64 %6 11382} 11383 11384define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11385; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: 11386; VLX: # %bb.0: # %entry 11387; VLX-NEXT: kmovd %edi, %k1 11388; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} 11389; VLX-NEXT: kmovq %k0, %rax 11390; VLX-NEXT: retq 11391; 11392; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: 11393; NoVLX: # %bb.0: # %entry 11394; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11395; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 11396; NoVLX-NEXT: kmovw %edi, %k1 11397; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11398; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11399; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11400; NoVLX-NEXT: kmovw %k0, %eax 11401; NoVLX-NEXT: vzeroupper 11402; NoVLX-NEXT: retq 11403entry: 11404 %0 = bitcast <2 x i64> %__a to <4 x i32> 11405 %load = load <2 x i64>, ptr %__b 11406 %1 = bitcast <2 x i64> %load to <4 x i32> 11407 %2 = icmp sge <4 x i32> %0, %1 11408 %3 = bitcast i8 %__u to <8 x i1> 11409 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11410 %4 = and <4 x i1> %2, %extract.i 11411 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11412 %6 = bitcast <64 x i1> %5 to i64 11413 ret i64 %6 11414} 11415 11416 11417define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 11418; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: 11419; VLX: # %bb.0: # %entry 11420; VLX-NEXT: vpcmpnltd (%rdi){1to4}, %xmm0, %k0 11421; VLX-NEXT: kmovq %k0, %rax 11422; VLX-NEXT: retq 11423; 11424; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: 11425; NoVLX: # %bb.0: # %entry 11426; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11427; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11428; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11429; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11430; NoVLX-NEXT: kmovw %k0, %eax 11431; NoVLX-NEXT: vzeroupper 11432; NoVLX-NEXT: retq 11433entry: 11434 %0 = bitcast <2 x i64> %__a to <4 x i32> 11435 %load = load i32, ptr %__b 11436 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11437 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11438 %2 = icmp sge <4 x i32> %0, %1 11439 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11440 %4 = bitcast <64 x i1> %3 to i64 11441 ret i64 %4 11442} 11443 11444define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 11445; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: 11446; VLX: # %bb.0: # %entry 11447; VLX-NEXT: kmovd %edi, %k1 11448; VLX-NEXT: vpcmpnltd (%rsi){1to4}, %xmm0, %k0 {%k1} 11449; VLX-NEXT: kmovq %k0, %rax 11450; VLX-NEXT: retq 11451; 11452; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: 11453; NoVLX: # %bb.0: # %entry 11454; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 11455; NoVLX-NEXT: kmovw %edi, %k1 11456; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 11457; NoVLX-NEXT: kshiftlw $12, %k0, %k0 11458; NoVLX-NEXT: kshiftrw $12, %k0, %k0 11459; NoVLX-NEXT: kmovw %k0, %eax 11460; NoVLX-NEXT: vzeroupper 11461; NoVLX-NEXT: retq 11462entry: 11463 %0 = bitcast <2 x i64> %__a to <4 x i32> 11464 %load = load i32, ptr %__b 11465 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 11466 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 11467 %2 = icmp sge <4 x i32> %0, %1 11468 %3 = bitcast i8 %__u to <8 x i1> 11469 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 11470 %4 = and <4 x i1> %extract.i, %2 11471 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 11472 %6 = bitcast <64 x i1> %5 to i64 11473 ret i64 %6 11474} 11475 11476 11477define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11478; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: 11479; VLX: # %bb.0: # %entry 11480; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 11481; VLX-NEXT: kmovd %k0, %eax 11482; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11483; VLX-NEXT: vzeroupper 11484; VLX-NEXT: retq 11485; 11486; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: 11487; NoVLX: # %bb.0: # %entry 11488; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11489; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11490; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11491; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11492; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11493; NoVLX-NEXT: kmovw %k0, %eax 11494; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11495; NoVLX-NEXT: vzeroupper 11496; NoVLX-NEXT: retq 11497entry: 11498 %0 = bitcast <4 x i64> %__a to <8 x i32> 11499 %1 = bitcast <4 x i64> %__b to <8 x i32> 11500 %2 = icmp sge <8 x i32> %0, %1 11501 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11502 %4 = bitcast <16 x i1> %3 to i16 11503 ret i16 %4 11504} 11505 11506define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11507; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: 11508; VLX: # %bb.0: # %entry 11509; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 11510; VLX-NEXT: kmovd %k0, %eax 11511; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11512; VLX-NEXT: vzeroupper 11513; VLX-NEXT: retq 11514; 11515; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: 11516; NoVLX: # %bb.0: # %entry 11517; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11518; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 11519; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11520; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11521; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11522; NoVLX-NEXT: kmovw %k0, %eax 11523; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11524; NoVLX-NEXT: vzeroupper 11525; NoVLX-NEXT: retq 11526entry: 11527 %0 = bitcast <4 x i64> %__a to <8 x i32> 11528 %load = load <4 x i64>, ptr %__b 11529 %1 = bitcast <4 x i64> %load to <8 x i32> 11530 %2 = icmp sge <8 x i32> %0, %1 11531 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11532 %4 = bitcast <16 x i1> %3 to i16 11533 ret i16 %4 11534} 11535 11536define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11537; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: 11538; VLX: # %bb.0: # %entry 11539; VLX-NEXT: kmovd %edi, %k1 11540; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 11541; VLX-NEXT: kmovd %k0, %eax 11542; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11543; VLX-NEXT: vzeroupper 11544; VLX-NEXT: retq 11545; 11546; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: 11547; NoVLX: # %bb.0: # %entry 11548; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11549; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11550; NoVLX-NEXT: kmovw %edi, %k1 11551; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11552; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11553; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11554; NoVLX-NEXT: kmovw %k0, %eax 11555; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11556; NoVLX-NEXT: vzeroupper 11557; NoVLX-NEXT: retq 11558entry: 11559 %0 = bitcast <4 x i64> %__a to <8 x i32> 11560 %1 = bitcast <4 x i64> %__b to <8 x i32> 11561 %2 = icmp sge <8 x i32> %0, %1 11562 %3 = bitcast i8 %__u to <8 x i1> 11563 %4 = and <8 x i1> %2, %3 11564 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11565 %6 = bitcast <16 x i1> %5 to i16 11566 ret i16 %6 11567} 11568 11569define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11570; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: 11571; VLX: # %bb.0: # %entry 11572; VLX-NEXT: kmovd %edi, %k1 11573; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 11574; VLX-NEXT: kmovd %k0, %eax 11575; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11576; VLX-NEXT: vzeroupper 11577; VLX-NEXT: retq 11578; 11579; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: 11580; NoVLX: # %bb.0: # %entry 11581; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11582; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 11583; NoVLX-NEXT: kmovw %edi, %k1 11584; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11585; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11586; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11587; NoVLX-NEXT: kmovw %k0, %eax 11588; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11589; NoVLX-NEXT: vzeroupper 11590; NoVLX-NEXT: retq 11591entry: 11592 %0 = bitcast <4 x i64> %__a to <8 x i32> 11593 %load = load <4 x i64>, ptr %__b 11594 %1 = bitcast <4 x i64> %load to <8 x i32> 11595 %2 = icmp sge <8 x i32> %0, %1 11596 %3 = bitcast i8 %__u to <8 x i1> 11597 %4 = and <8 x i1> %2, %3 11598 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11599 %6 = bitcast <16 x i1> %5 to i16 11600 ret i16 %6 11601} 11602 11603 11604define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11605; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: 11606; VLX: # %bb.0: # %entry 11607; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 11608; VLX-NEXT: kmovd %k0, %eax 11609; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11610; VLX-NEXT: vzeroupper 11611; VLX-NEXT: retq 11612; 11613; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: 11614; NoVLX: # %bb.0: # %entry 11615; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11616; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11617; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11618; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11619; NoVLX-NEXT: kmovw %k0, %eax 11620; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11621; NoVLX-NEXT: vzeroupper 11622; NoVLX-NEXT: retq 11623entry: 11624 %0 = bitcast <4 x i64> %__a to <8 x i32> 11625 %load = load i32, ptr %__b 11626 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11627 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11628 %2 = icmp sge <8 x i32> %0, %1 11629 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11630 %4 = bitcast <16 x i1> %3 to i16 11631 ret i16 %4 11632} 11633 11634define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11635; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: 11636; VLX: # %bb.0: # %entry 11637; VLX-NEXT: kmovd %edi, %k1 11638; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 11639; VLX-NEXT: kmovd %k0, %eax 11640; VLX-NEXT: # kill: def $ax killed $ax killed $eax 11641; VLX-NEXT: vzeroupper 11642; VLX-NEXT: retq 11643; 11644; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: 11645; NoVLX: # %bb.0: # %entry 11646; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11647; NoVLX-NEXT: kmovw %edi, %k1 11648; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 11649; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11650; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11651; NoVLX-NEXT: kmovw %k0, %eax 11652; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 11653; NoVLX-NEXT: vzeroupper 11654; NoVLX-NEXT: retq 11655entry: 11656 %0 = bitcast <4 x i64> %__a to <8 x i32> 11657 %load = load i32, ptr %__b 11658 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11659 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11660 %2 = icmp sge <8 x i32> %0, %1 11661 %3 = bitcast i8 %__u to <8 x i1> 11662 %4 = and <8 x i1> %3, %2 11663 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11664 %6 = bitcast <16 x i1> %5 to i16 11665 ret i16 %6 11666} 11667 11668 11669define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11670; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: 11671; VLX: # %bb.0: # %entry 11672; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 11673; VLX-NEXT: kmovd %k0, %eax 11674; VLX-NEXT: vzeroupper 11675; VLX-NEXT: retq 11676; 11677; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: 11678; NoVLX: # %bb.0: # %entry 11679; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11680; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11681; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11682; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11683; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11684; NoVLX-NEXT: kmovw %k0, %eax 11685; NoVLX-NEXT: vzeroupper 11686; NoVLX-NEXT: retq 11687entry: 11688 %0 = bitcast <4 x i64> %__a to <8 x i32> 11689 %1 = bitcast <4 x i64> %__b to <8 x i32> 11690 %2 = icmp sge <8 x i32> %0, %1 11691 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11692 %4 = bitcast <32 x i1> %3 to i32 11693 ret i32 %4 11694} 11695 11696define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11697; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: 11698; VLX: # %bb.0: # %entry 11699; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 11700; VLX-NEXT: kmovd %k0, %eax 11701; VLX-NEXT: vzeroupper 11702; VLX-NEXT: retq 11703; 11704; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: 11705; NoVLX: # %bb.0: # %entry 11706; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11707; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 11708; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11709; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11710; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11711; NoVLX-NEXT: kmovw %k0, %eax 11712; NoVLX-NEXT: vzeroupper 11713; NoVLX-NEXT: retq 11714entry: 11715 %0 = bitcast <4 x i64> %__a to <8 x i32> 11716 %load = load <4 x i64>, ptr %__b 11717 %1 = bitcast <4 x i64> %load to <8 x i32> 11718 %2 = icmp sge <8 x i32> %0, %1 11719 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11720 %4 = bitcast <32 x i1> %3 to i32 11721 ret i32 %4 11722} 11723 11724define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11725; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: 11726; VLX: # %bb.0: # %entry 11727; VLX-NEXT: kmovd %edi, %k1 11728; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 11729; VLX-NEXT: kmovd %k0, %eax 11730; VLX-NEXT: vzeroupper 11731; VLX-NEXT: retq 11732; 11733; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: 11734; NoVLX: # %bb.0: # %entry 11735; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11736; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11737; NoVLX-NEXT: kmovw %edi, %k1 11738; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11739; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11740; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11741; NoVLX-NEXT: kmovw %k0, %eax 11742; NoVLX-NEXT: vzeroupper 11743; NoVLX-NEXT: retq 11744entry: 11745 %0 = bitcast <4 x i64> %__a to <8 x i32> 11746 %1 = bitcast <4 x i64> %__b to <8 x i32> 11747 %2 = icmp sge <8 x i32> %0, %1 11748 %3 = bitcast i8 %__u to <8 x i1> 11749 %4 = and <8 x i1> %2, %3 11750 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11751 %6 = bitcast <32 x i1> %5 to i32 11752 ret i32 %6 11753} 11754 11755define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11756; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: 11757; VLX: # %bb.0: # %entry 11758; VLX-NEXT: kmovd %edi, %k1 11759; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 11760; VLX-NEXT: kmovd %k0, %eax 11761; VLX-NEXT: vzeroupper 11762; VLX-NEXT: retq 11763; 11764; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: 11765; NoVLX: # %bb.0: # %entry 11766; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11767; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 11768; NoVLX-NEXT: kmovw %edi, %k1 11769; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11770; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11771; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11772; NoVLX-NEXT: kmovw %k0, %eax 11773; NoVLX-NEXT: vzeroupper 11774; NoVLX-NEXT: retq 11775entry: 11776 %0 = bitcast <4 x i64> %__a to <8 x i32> 11777 %load = load <4 x i64>, ptr %__b 11778 %1 = bitcast <4 x i64> %load to <8 x i32> 11779 %2 = icmp sge <8 x i32> %0, %1 11780 %3 = bitcast i8 %__u to <8 x i1> 11781 %4 = and <8 x i1> %2, %3 11782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11783 %6 = bitcast <32 x i1> %5 to i32 11784 ret i32 %6 11785} 11786 11787 11788define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11789; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: 11790; VLX: # %bb.0: # %entry 11791; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 11792; VLX-NEXT: kmovd %k0, %eax 11793; VLX-NEXT: vzeroupper 11794; VLX-NEXT: retq 11795; 11796; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: 11797; NoVLX: # %bb.0: # %entry 11798; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11799; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11800; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11801; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11802; NoVLX-NEXT: kmovw %k0, %eax 11803; NoVLX-NEXT: vzeroupper 11804; NoVLX-NEXT: retq 11805entry: 11806 %0 = bitcast <4 x i64> %__a to <8 x i32> 11807 %load = load i32, ptr %__b 11808 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11809 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11810 %2 = icmp sge <8 x i32> %0, %1 11811 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11812 %4 = bitcast <32 x i1> %3 to i32 11813 ret i32 %4 11814} 11815 11816define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11817; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: 11818; VLX: # %bb.0: # %entry 11819; VLX-NEXT: kmovd %edi, %k1 11820; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 11821; VLX-NEXT: kmovd %k0, %eax 11822; VLX-NEXT: vzeroupper 11823; VLX-NEXT: retq 11824; 11825; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: 11826; NoVLX: # %bb.0: # %entry 11827; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11828; NoVLX-NEXT: kmovw %edi, %k1 11829; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 11830; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11831; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11832; NoVLX-NEXT: kmovw %k0, %eax 11833; NoVLX-NEXT: vzeroupper 11834; NoVLX-NEXT: retq 11835entry: 11836 %0 = bitcast <4 x i64> %__a to <8 x i32> 11837 %load = load i32, ptr %__b 11838 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11839 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11840 %2 = icmp sge <8 x i32> %0, %1 11841 %3 = bitcast i8 %__u to <8 x i1> 11842 %4 = and <8 x i1> %3, %2 11843 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11844 %6 = bitcast <32 x i1> %5 to i32 11845 ret i32 %6 11846} 11847 11848 11849define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11850; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: 11851; VLX: # %bb.0: # %entry 11852; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 11853; VLX-NEXT: kmovq %k0, %rax 11854; VLX-NEXT: vzeroupper 11855; VLX-NEXT: retq 11856; 11857; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: 11858; NoVLX: # %bb.0: # %entry 11859; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11860; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11861; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11862; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11863; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11864; NoVLX-NEXT: kmovw %k0, %eax 11865; NoVLX-NEXT: vzeroupper 11866; NoVLX-NEXT: retq 11867entry: 11868 %0 = bitcast <4 x i64> %__a to <8 x i32> 11869 %1 = bitcast <4 x i64> %__b to <8 x i32> 11870 %2 = icmp sge <8 x i32> %0, %1 11871 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11872 %4 = bitcast <64 x i1> %3 to i64 11873 ret i64 %4 11874} 11875 11876define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11877; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: 11878; VLX: # %bb.0: # %entry 11879; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 11880; VLX-NEXT: kmovq %k0, %rax 11881; VLX-NEXT: vzeroupper 11882; VLX-NEXT: retq 11883; 11884; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: 11885; NoVLX: # %bb.0: # %entry 11886; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11887; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 11888; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 11889; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11890; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11891; NoVLX-NEXT: kmovw %k0, %eax 11892; NoVLX-NEXT: vzeroupper 11893; NoVLX-NEXT: retq 11894entry: 11895 %0 = bitcast <4 x i64> %__a to <8 x i32> 11896 %load = load <4 x i64>, ptr %__b 11897 %1 = bitcast <4 x i64> %load to <8 x i32> 11898 %2 = icmp sge <8 x i32> %0, %1 11899 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11900 %4 = bitcast <64 x i1> %3 to i64 11901 ret i64 %4 11902} 11903 11904define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 11905; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: 11906; VLX: # %bb.0: # %entry 11907; VLX-NEXT: kmovd %edi, %k1 11908; VLX-NEXT: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} 11909; VLX-NEXT: kmovq %k0, %rax 11910; VLX-NEXT: vzeroupper 11911; VLX-NEXT: retq 11912; 11913; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: 11914; NoVLX: # %bb.0: # %entry 11915; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 11916; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11917; NoVLX-NEXT: kmovw %edi, %k1 11918; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11919; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11920; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11921; NoVLX-NEXT: kmovw %k0, %eax 11922; NoVLX-NEXT: vzeroupper 11923; NoVLX-NEXT: retq 11924entry: 11925 %0 = bitcast <4 x i64> %__a to <8 x i32> 11926 %1 = bitcast <4 x i64> %__b to <8 x i32> 11927 %2 = icmp sge <8 x i32> %0, %1 11928 %3 = bitcast i8 %__u to <8 x i1> 11929 %4 = and <8 x i1> %2, %3 11930 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11931 %6 = bitcast <64 x i1> %5 to i64 11932 ret i64 %6 11933} 11934 11935define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11936; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: 11937; VLX: # %bb.0: # %entry 11938; VLX-NEXT: kmovd %edi, %k1 11939; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} 11940; VLX-NEXT: kmovq %k0, %rax 11941; VLX-NEXT: vzeroupper 11942; VLX-NEXT: retq 11943; 11944; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: 11945; NoVLX: # %bb.0: # %entry 11946; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11947; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 11948; NoVLX-NEXT: kmovw %edi, %k1 11949; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 11950; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11951; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11952; NoVLX-NEXT: kmovw %k0, %eax 11953; NoVLX-NEXT: vzeroupper 11954; NoVLX-NEXT: retq 11955entry: 11956 %0 = bitcast <4 x i64> %__a to <8 x i32> 11957 %load = load <4 x i64>, ptr %__b 11958 %1 = bitcast <4 x i64> %load to <8 x i32> 11959 %2 = icmp sge <8 x i32> %0, %1 11960 %3 = bitcast i8 %__u to <8 x i1> 11961 %4 = and <8 x i1> %2, %3 11962 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11963 %6 = bitcast <64 x i1> %5 to i64 11964 ret i64 %6 11965} 11966 11967 11968define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 11969; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: 11970; VLX: # %bb.0: # %entry 11971; VLX-NEXT: vpcmpnltd (%rdi){1to8}, %ymm0, %k0 11972; VLX-NEXT: kmovq %k0, %rax 11973; VLX-NEXT: vzeroupper 11974; VLX-NEXT: retq 11975; 11976; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: 11977; NoVLX: # %bb.0: # %entry 11978; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 11979; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 11980; NoVLX-NEXT: kshiftlw $8, %k0, %k0 11981; NoVLX-NEXT: kshiftrw $8, %k0, %k0 11982; NoVLX-NEXT: kmovw %k0, %eax 11983; NoVLX-NEXT: vzeroupper 11984; NoVLX-NEXT: retq 11985entry: 11986 %0 = bitcast <4 x i64> %__a to <8 x i32> 11987 %load = load i32, ptr %__b 11988 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 11989 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 11990 %2 = icmp sge <8 x i32> %0, %1 11991 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 11992 %4 = bitcast <64 x i1> %3 to i64 11993 ret i64 %4 11994} 11995 11996define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 11997; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: 11998; VLX: # %bb.0: # %entry 11999; VLX-NEXT: kmovd %edi, %k1 12000; VLX-NEXT: vpcmpnltd (%rsi){1to8}, %ymm0, %k0 {%k1} 12001; VLX-NEXT: kmovq %k0, %rax 12002; VLX-NEXT: vzeroupper 12003; VLX-NEXT: retq 12004; 12005; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: 12006; NoVLX: # %bb.0: # %entry 12007; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 12008; NoVLX-NEXT: kmovw %edi, %k1 12009; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 12010; NoVLX-NEXT: kshiftlw $8, %k0, %k0 12011; NoVLX-NEXT: kshiftrw $8, %k0, %k0 12012; NoVLX-NEXT: kmovw %k0, %eax 12013; NoVLX-NEXT: vzeroupper 12014; NoVLX-NEXT: retq 12015entry: 12016 %0 = bitcast <4 x i64> %__a to <8 x i32> 12017 %load = load i32, ptr %__b 12018 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 12019 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12020 %2 = icmp sge <8 x i32> %0, %1 12021 %3 = bitcast i8 %__u to <8 x i1> 12022 %4 = and <8 x i1> %3, %2 12023 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 12024 %6 = bitcast <64 x i1> %5 to i64 12025 ret i64 %6 12026} 12027 12028 12029define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12030; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: 12031; VLX: # %bb.0: # %entry 12032; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12033; VLX-NEXT: kmovd %k0, %eax 12034; VLX-NEXT: vzeroupper 12035; VLX-NEXT: retq 12036; 12037; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: 12038; NoVLX: # %bb.0: # %entry 12039; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12040; NoVLX-NEXT: kmovw %k0, %eax 12041; NoVLX-NEXT: vzeroupper 12042; NoVLX-NEXT: retq 12043entry: 12044 %0 = bitcast <8 x i64> %__a to <16 x i32> 12045 %1 = bitcast <8 x i64> %__b to <16 x i32> 12046 %2 = icmp sge <16 x i32> %0, %1 12047 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12048 %4 = bitcast <32 x i1> %3 to i32 12049 ret i32 %4 12050} 12051 12052define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 12053; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: 12054; VLX: # %bb.0: # %entry 12055; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12056; VLX-NEXT: kmovd %k0, %eax 12057; VLX-NEXT: vzeroupper 12058; VLX-NEXT: retq 12059; 12060; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: 12061; NoVLX: # %bb.0: # %entry 12062; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12063; NoVLX-NEXT: kmovw %k0, %eax 12064; NoVLX-NEXT: vzeroupper 12065; NoVLX-NEXT: retq 12066entry: 12067 %0 = bitcast <8 x i64> %__a to <16 x i32> 12068 %load = load <8 x i64>, ptr %__b 12069 %1 = bitcast <8 x i64> %load to <16 x i32> 12070 %2 = icmp sge <16 x i32> %0, %1 12071 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12072 %4 = bitcast <32 x i1> %3 to i32 12073 ret i32 %4 12074} 12075 12076define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12077; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: 12078; VLX: # %bb.0: # %entry 12079; VLX-NEXT: kmovd %edi, %k1 12080; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12081; VLX-NEXT: kmovd %k0, %eax 12082; VLX-NEXT: vzeroupper 12083; VLX-NEXT: retq 12084; 12085; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: 12086; NoVLX: # %bb.0: # %entry 12087; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12088; NoVLX-NEXT: kmovw %k0, %eax 12089; NoVLX-NEXT: andl %edi, %eax 12090; NoVLX-NEXT: vzeroupper 12091; NoVLX-NEXT: retq 12092entry: 12093 %0 = bitcast <8 x i64> %__a to <16 x i32> 12094 %1 = bitcast <8 x i64> %__b to <16 x i32> 12095 %2 = icmp sge <16 x i32> %0, %1 12096 %3 = bitcast i16 %__u to <16 x i1> 12097 %4 = and <16 x i1> %2, %3 12098 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12099 %6 = bitcast <32 x i1> %5 to i32 12100 ret i32 %6 12101} 12102 12103define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 12104; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: 12105; VLX: # %bb.0: # %entry 12106; VLX-NEXT: kmovd %edi, %k1 12107; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} 12108; VLX-NEXT: kmovd %k0, %eax 12109; VLX-NEXT: vzeroupper 12110; VLX-NEXT: retq 12111; 12112; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: 12113; NoVLX: # %bb.0: # %entry 12114; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 12115; NoVLX-NEXT: kmovw %k0, %eax 12116; NoVLX-NEXT: andl %edi, %eax 12117; NoVLX-NEXT: vzeroupper 12118; NoVLX-NEXT: retq 12119entry: 12120 %0 = bitcast <8 x i64> %__a to <16 x i32> 12121 %load = load <8 x i64>, ptr %__b 12122 %1 = bitcast <8 x i64> %load to <16 x i32> 12123 %2 = icmp sge <16 x i32> %0, %1 12124 %3 = bitcast i16 %__u to <16 x i1> 12125 %4 = and <16 x i1> %2, %3 12126 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12127 %6 = bitcast <32 x i1> %5 to i32 12128 ret i32 %6 12129} 12130 12131 12132define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 12133; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: 12134; VLX: # %bb.0: # %entry 12135; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12136; VLX-NEXT: kmovd %k0, %eax 12137; VLX-NEXT: vzeroupper 12138; VLX-NEXT: retq 12139; 12140; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: 12141; NoVLX: # %bb.0: # %entry 12142; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12143; NoVLX-NEXT: kmovw %k0, %eax 12144; NoVLX-NEXT: vzeroupper 12145; NoVLX-NEXT: retq 12146entry: 12147 %0 = bitcast <8 x i64> %__a to <16 x i32> 12148 %load = load i32, ptr %__b 12149 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12150 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12151 %2 = icmp sge <16 x i32> %0, %1 12152 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12153 %4 = bitcast <32 x i1> %3 to i32 12154 ret i32 %4 12155} 12156 12157define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 12158; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: 12159; VLX: # %bb.0: # %entry 12160; VLX-NEXT: kmovd %edi, %k1 12161; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 12162; VLX-NEXT: kmovd %k0, %eax 12163; VLX-NEXT: vzeroupper 12164; VLX-NEXT: retq 12165; 12166; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: 12167; NoVLX: # %bb.0: # %entry 12168; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 12169; NoVLX-NEXT: kmovw %k0, %eax 12170; NoVLX-NEXT: andl %edi, %eax 12171; NoVLX-NEXT: vzeroupper 12172; NoVLX-NEXT: retq 12173entry: 12174 %0 = bitcast <8 x i64> %__a to <16 x i32> 12175 %load = load i32, ptr %__b 12176 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12177 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12178 %2 = icmp sge <16 x i32> %0, %1 12179 %3 = bitcast i16 %__u to <16 x i1> 12180 %4 = and <16 x i1> %3, %2 12181 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 12182 %6 = bitcast <32 x i1> %5 to i32 12183 ret i32 %6 12184} 12185 12186 12187define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12188; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: 12189; VLX: # %bb.0: # %entry 12190; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12191; VLX-NEXT: kmovq %k0, %rax 12192; VLX-NEXT: vzeroupper 12193; VLX-NEXT: retq 12194; 12195; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: 12196; NoVLX: # %bb.0: # %entry 12197; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12198; NoVLX-NEXT: kmovw %k0, %eax 12199; NoVLX-NEXT: vzeroupper 12200; NoVLX-NEXT: retq 12201entry: 12202 %0 = bitcast <8 x i64> %__a to <16 x i32> 12203 %1 = bitcast <8 x i64> %__b to <16 x i32> 12204 %2 = icmp sge <16 x i32> %0, %1 12205 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12206 %4 = bitcast <64 x i1> %3 to i64 12207 ret i64 %4 12208} 12209 12210define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 12211; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: 12212; VLX: # %bb.0: # %entry 12213; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12214; VLX-NEXT: kmovq %k0, %rax 12215; VLX-NEXT: vzeroupper 12216; VLX-NEXT: retq 12217; 12218; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: 12219; NoVLX: # %bb.0: # %entry 12220; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 12221; NoVLX-NEXT: kmovw %k0, %eax 12222; NoVLX-NEXT: vzeroupper 12223; NoVLX-NEXT: retq 12224entry: 12225 %0 = bitcast <8 x i64> %__a to <16 x i32> 12226 %load = load <8 x i64>, ptr %__b 12227 %1 = bitcast <8 x i64> %load to <16 x i32> 12228 %2 = icmp sge <16 x i32> %0, %1 12229 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12230 %4 = bitcast <64 x i1> %3 to i64 12231 ret i64 %4 12232} 12233 12234define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 12235; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: 12236; VLX: # %bb.0: # %entry 12237; VLX-NEXT: kmovd %edi, %k1 12238; VLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} 12239; VLX-NEXT: kmovq %k0, %rax 12240; VLX-NEXT: vzeroupper 12241; VLX-NEXT: retq 12242; 12243; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: 12244; NoVLX: # %bb.0: # %entry 12245; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 12246; NoVLX-NEXT: kmovw %k0, %eax 12247; NoVLX-NEXT: andl %edi, %eax 12248; NoVLX-NEXT: vzeroupper 12249; NoVLX-NEXT: retq 12250entry: 12251 %0 = bitcast <8 x i64> %__a to <16 x i32> 12252 %1 = bitcast <8 x i64> %__b to <16 x i32> 12253 %2 = icmp sge <16 x i32> %0, %1 12254 %3 = bitcast i16 %__u to <16 x i1> 12255 %4 = and <16 x i1> %2, %3 12256 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12257 %6 = bitcast <64 x i1> %5 to i64 12258 ret i64 %6 12259} 12260 12261define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 12262; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: 12263; VLX: # %bb.0: # %entry 12264; VLX-NEXT: kmovd %edi, %k1 12265; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} 12266; VLX-NEXT: kmovq %k0, %rax 12267; VLX-NEXT: vzeroupper 12268; VLX-NEXT: retq 12269; 12270; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: 12271; NoVLX: # %bb.0: # %entry 12272; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 12273; NoVLX-NEXT: kmovw %k0, %eax 12274; NoVLX-NEXT: andl %edi, %eax 12275; NoVLX-NEXT: vzeroupper 12276; NoVLX-NEXT: retq 12277entry: 12278 %0 = bitcast <8 x i64> %__a to <16 x i32> 12279 %load = load <8 x i64>, ptr %__b 12280 %1 = bitcast <8 x i64> %load to <16 x i32> 12281 %2 = icmp sge <16 x i32> %0, %1 12282 %3 = bitcast i16 %__u to <16 x i1> 12283 %4 = and <16 x i1> %2, %3 12284 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12285 %6 = bitcast <64 x i1> %5 to i64 12286 ret i64 %6 12287} 12288 12289 12290define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 12291; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: 12292; VLX: # %bb.0: # %entry 12293; VLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12294; VLX-NEXT: kmovq %k0, %rax 12295; VLX-NEXT: vzeroupper 12296; VLX-NEXT: retq 12297; 12298; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: 12299; NoVLX: # %bb.0: # %entry 12300; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 12301; NoVLX-NEXT: kmovw %k0, %eax 12302; NoVLX-NEXT: vzeroupper 12303; NoVLX-NEXT: retq 12304entry: 12305 %0 = bitcast <8 x i64> %__a to <16 x i32> 12306 %load = load i32, ptr %__b 12307 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12308 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12309 %2 = icmp sge <16 x i32> %0, %1 12310 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12311 %4 = bitcast <64 x i1> %3 to i64 12312 ret i64 %4 12313} 12314 12315define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 12316; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: 12317; VLX: # %bb.0: # %entry 12318; VLX-NEXT: kmovd %edi, %k1 12319; VLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} 12320; VLX-NEXT: kmovq %k0, %rax 12321; VLX-NEXT: vzeroupper 12322; VLX-NEXT: retq 12323; 12324; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: 12325; NoVLX: # %bb.0: # %entry 12326; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 12327; NoVLX-NEXT: kmovw %k0, %eax 12328; NoVLX-NEXT: andl %edi, %eax 12329; NoVLX-NEXT: vzeroupper 12330; NoVLX-NEXT: retq 12331entry: 12332 %0 = bitcast <8 x i64> %__a to <16 x i32> 12333 %load = load i32, ptr %__b 12334 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 12335 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 12336 %2 = icmp sge <16 x i32> %0, %1 12337 %3 = bitcast i16 %__u to <16 x i1> 12338 %4 = and <16 x i1> %3, %2 12339 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 12340 %6 = bitcast <64 x i1> %5 to i64 12341 ret i64 %6 12342} 12343 12344 12345define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12346; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: 12347; VLX: # %bb.0: # %entry 12348; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12349; VLX-NEXT: kmovb %k0, %eax 12350; VLX-NEXT: retq 12351; 12352; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: 12353; NoVLX: # %bb.0: # %entry 12354; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12355; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12356; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12357; NoVLX-NEXT: kmovw %k0, %eax 12358; NoVLX-NEXT: andl $3, %eax 12359; NoVLX-NEXT: vzeroupper 12360; NoVLX-NEXT: retq 12361entry: 12362 %0 = bitcast <2 x i64> %__a to <2 x i64> 12363 %1 = bitcast <2 x i64> %__b to <2 x i64> 12364 %2 = icmp sge <2 x i64> %0, %1 12365 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12366 %4 = bitcast <4 x i1> %3 to i4 12367 ret i4 %4 12368} 12369 12370define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12371; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: 12372; VLX: # %bb.0: # %entry 12373; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12374; VLX-NEXT: kmovb %k0, %eax 12375; VLX-NEXT: retq 12376; 12377; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: 12378; NoVLX: # %bb.0: # %entry 12379; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12380; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12381; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12382; NoVLX-NEXT: kmovw %k0, %eax 12383; NoVLX-NEXT: andl $3, %eax 12384; NoVLX-NEXT: vzeroupper 12385; NoVLX-NEXT: retq 12386entry: 12387 %0 = bitcast <2 x i64> %__a to <2 x i64> 12388 %load = load <2 x i64>, ptr %__b 12389 %1 = bitcast <2 x i64> %load to <2 x i64> 12390 %2 = icmp sge <2 x i64> %0, %1 12391 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12392 %4 = bitcast <4 x i1> %3 to i4 12393 ret i4 %4 12394} 12395 12396define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12397; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: 12398; VLX: # %bb.0: # %entry 12399; VLX-NEXT: kmovd %edi, %k1 12400; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12401; VLX-NEXT: kmovb %k0, %eax 12402; VLX-NEXT: retq 12403; 12404; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: 12405; NoVLX: # %bb.0: # %entry 12406; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12407; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12408; NoVLX-NEXT: kmovw %edi, %k1 12409; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12410; NoVLX-NEXT: kmovw %k0, %eax 12411; NoVLX-NEXT: andl $3, %eax 12412; NoVLX-NEXT: vzeroupper 12413; NoVLX-NEXT: retq 12414entry: 12415 %0 = bitcast <2 x i64> %__a to <2 x i64> 12416 %1 = bitcast <2 x i64> %__b to <2 x i64> 12417 %2 = icmp sge <2 x i64> %0, %1 12418 %3 = bitcast i8 %__u to <8 x i1> 12419 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12420 %4 = and <2 x i1> %2, %extract.i 12421 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12422 %6 = bitcast <4 x i1> %5 to i4 12423 ret i4 %6 12424} 12425 12426define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12427; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: 12428; VLX: # %bb.0: # %entry 12429; VLX-NEXT: kmovd %edi, %k1 12430; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12431; VLX-NEXT: kmovb %k0, %eax 12432; VLX-NEXT: retq 12433; 12434; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: 12435; NoVLX: # %bb.0: # %entry 12436; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12437; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12438; NoVLX-NEXT: kmovw %edi, %k1 12439; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12440; NoVLX-NEXT: kmovw %k0, %eax 12441; NoVLX-NEXT: andl $3, %eax 12442; NoVLX-NEXT: vzeroupper 12443; NoVLX-NEXT: retq 12444entry: 12445 %0 = bitcast <2 x i64> %__a to <2 x i64> 12446 %load = load <2 x i64>, ptr %__b 12447 %1 = bitcast <2 x i64> %load to <2 x i64> 12448 %2 = icmp sge <2 x i64> %0, %1 12449 %3 = bitcast i8 %__u to <8 x i1> 12450 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12451 %4 = and <2 x i1> %2, %extract.i 12452 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12453 %6 = bitcast <4 x i1> %5 to i4 12454 ret i4 %6 12455} 12456 12457 12458define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12459; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12460; VLX: # %bb.0: # %entry 12461; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 12462; VLX-NEXT: kmovb %k0, %eax 12463; VLX-NEXT: retq 12464; 12465; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12466; NoVLX: # %bb.0: # %entry 12467; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12468; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 12469; NoVLX-NEXT: kmovw %k0, %eax 12470; NoVLX-NEXT: andl $3, %eax 12471; NoVLX-NEXT: vzeroupper 12472; NoVLX-NEXT: retq 12473entry: 12474 %0 = bitcast <2 x i64> %__a to <2 x i64> 12475 %load = load i64, ptr %__b 12476 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12477 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12478 %2 = icmp sge <2 x i64> %0, %1 12479 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12480 %4 = bitcast <4 x i1> %3 to i4 12481 ret i4 %4 12482} 12483 12484define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12485; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12486; VLX: # %bb.0: # %entry 12487; VLX-NEXT: kmovd %edi, %k1 12488; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 12489; VLX-NEXT: kmovb %k0, %eax 12490; VLX-NEXT: retq 12491; 12492; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: 12493; NoVLX: # %bb.0: # %entry 12494; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12495; NoVLX-NEXT: kmovw %edi, %k1 12496; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 12497; NoVLX-NEXT: kmovw %k0, %eax 12498; NoVLX-NEXT: andl $3, %eax 12499; NoVLX-NEXT: vzeroupper 12500; NoVLX-NEXT: retq 12501entry: 12502 %0 = bitcast <2 x i64> %__a to <2 x i64> 12503 %load = load i64, ptr %__b 12504 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12505 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12506 %2 = icmp sge <2 x i64> %0, %1 12507 %3 = bitcast i8 %__u to <8 x i1> 12508 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12509 %4 = and <2 x i1> %extract.i, %2 12510 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 12511 %6 = bitcast <4 x i1> %5 to i4 12512 ret i4 %6 12513} 12514 12515 12516define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12517; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: 12518; VLX: # %bb.0: # %entry 12519; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12520; VLX-NEXT: kmovd %k0, %eax 12521; VLX-NEXT: # kill: def $al killed $al killed $eax 12522; VLX-NEXT: retq 12523; 12524; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: 12525; NoVLX: # %bb.0: # %entry 12526; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12527; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12528; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12529; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12530; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12531; NoVLX-NEXT: kmovw %k0, %eax 12532; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12533; NoVLX-NEXT: vzeroupper 12534; NoVLX-NEXT: retq 12535entry: 12536 %0 = bitcast <2 x i64> %__a to <2 x i64> 12537 %1 = bitcast <2 x i64> %__b to <2 x i64> 12538 %2 = icmp sge <2 x i64> %0, %1 12539 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12540 %4 = bitcast <8 x i1> %3 to i8 12541 ret i8 %4 12542} 12543 12544define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12545; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: 12546; VLX: # %bb.0: # %entry 12547; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12548; VLX-NEXT: kmovd %k0, %eax 12549; VLX-NEXT: # kill: def $al killed $al killed $eax 12550; VLX-NEXT: retq 12551; 12552; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: 12553; NoVLX: # %bb.0: # %entry 12554; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12555; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12556; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12557; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12558; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12559; NoVLX-NEXT: kmovw %k0, %eax 12560; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12561; NoVLX-NEXT: vzeroupper 12562; NoVLX-NEXT: retq 12563entry: 12564 %0 = bitcast <2 x i64> %__a to <2 x i64> 12565 %load = load <2 x i64>, ptr %__b 12566 %1 = bitcast <2 x i64> %load to <2 x i64> 12567 %2 = icmp sge <2 x i64> %0, %1 12568 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12569 %4 = bitcast <8 x i1> %3 to i8 12570 ret i8 %4 12571} 12572 12573define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12574; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: 12575; VLX: # %bb.0: # %entry 12576; VLX-NEXT: kmovd %edi, %k1 12577; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12578; VLX-NEXT: kmovd %k0, %eax 12579; VLX-NEXT: # kill: def $al killed $al killed $eax 12580; VLX-NEXT: retq 12581; 12582; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: 12583; NoVLX: # %bb.0: # %entry 12584; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12585; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12586; NoVLX-NEXT: kmovw %edi, %k1 12587; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12588; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12589; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12590; NoVLX-NEXT: kmovw %k0, %eax 12591; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12592; NoVLX-NEXT: vzeroupper 12593; NoVLX-NEXT: retq 12594entry: 12595 %0 = bitcast <2 x i64> %__a to <2 x i64> 12596 %1 = bitcast <2 x i64> %__b to <2 x i64> 12597 %2 = icmp sge <2 x i64> %0, %1 12598 %3 = bitcast i8 %__u to <8 x i1> 12599 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12600 %4 = and <2 x i1> %2, %extract.i 12601 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12602 %6 = bitcast <8 x i1> %5 to i8 12603 ret i8 %6 12604} 12605 12606define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12607; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: 12608; VLX: # %bb.0: # %entry 12609; VLX-NEXT: kmovd %edi, %k1 12610; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12611; VLX-NEXT: kmovd %k0, %eax 12612; VLX-NEXT: # kill: def $al killed $al killed $eax 12613; VLX-NEXT: retq 12614; 12615; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: 12616; NoVLX: # %bb.0: # %entry 12617; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12618; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12619; NoVLX-NEXT: kmovw %edi, %k1 12620; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12621; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12622; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12623; NoVLX-NEXT: kmovw %k0, %eax 12624; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12625; NoVLX-NEXT: vzeroupper 12626; NoVLX-NEXT: retq 12627entry: 12628 %0 = bitcast <2 x i64> %__a to <2 x i64> 12629 %load = load <2 x i64>, ptr %__b 12630 %1 = bitcast <2 x i64> %load to <2 x i64> 12631 %2 = icmp sge <2 x i64> %0, %1 12632 %3 = bitcast i8 %__u to <8 x i1> 12633 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12634 %4 = and <2 x i1> %2, %extract.i 12635 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12636 %6 = bitcast <8 x i1> %5 to i8 12637 ret i8 %6 12638} 12639 12640 12641define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12642; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12643; VLX: # %bb.0: # %entry 12644; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 12645; VLX-NEXT: kmovd %k0, %eax 12646; VLX-NEXT: # kill: def $al killed $al killed $eax 12647; VLX-NEXT: retq 12648; 12649; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12650; NoVLX: # %bb.0: # %entry 12651; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12652; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 12653; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12654; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12655; NoVLX-NEXT: kmovw %k0, %eax 12656; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12657; NoVLX-NEXT: vzeroupper 12658; NoVLX-NEXT: retq 12659entry: 12660 %0 = bitcast <2 x i64> %__a to <2 x i64> 12661 %load = load i64, ptr %__b 12662 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12663 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12664 %2 = icmp sge <2 x i64> %0, %1 12665 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12666 %4 = bitcast <8 x i1> %3 to i8 12667 ret i8 %4 12668} 12669 12670define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12671; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12672; VLX: # %bb.0: # %entry 12673; VLX-NEXT: kmovd %edi, %k1 12674; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 12675; VLX-NEXT: kmovd %k0, %eax 12676; VLX-NEXT: # kill: def $al killed $al killed $eax 12677; VLX-NEXT: retq 12678; 12679; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: 12680; NoVLX: # %bb.0: # %entry 12681; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12682; NoVLX-NEXT: kmovw %edi, %k1 12683; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 12684; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12685; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12686; NoVLX-NEXT: kmovw %k0, %eax 12687; NoVLX-NEXT: # kill: def $al killed $al killed $eax 12688; NoVLX-NEXT: vzeroupper 12689; NoVLX-NEXT: retq 12690entry: 12691 %0 = bitcast <2 x i64> %__a to <2 x i64> 12692 %load = load i64, ptr %__b 12693 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12694 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12695 %2 = icmp sge <2 x i64> %0, %1 12696 %3 = bitcast i8 %__u to <8 x i1> 12697 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12698 %4 = and <2 x i1> %extract.i, %2 12699 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12700 %6 = bitcast <8 x i1> %5 to i8 12701 ret i8 %6 12702} 12703 12704 12705define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12706; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: 12707; VLX: # %bb.0: # %entry 12708; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12709; VLX-NEXT: kmovd %k0, %eax 12710; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12711; VLX-NEXT: retq 12712; 12713; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: 12714; NoVLX: # %bb.0: # %entry 12715; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12716; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12717; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12718; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12719; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12720; NoVLX-NEXT: kmovw %k0, %eax 12721; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12722; NoVLX-NEXT: vzeroupper 12723; NoVLX-NEXT: retq 12724entry: 12725 %0 = bitcast <2 x i64> %__a to <2 x i64> 12726 %1 = bitcast <2 x i64> %__b to <2 x i64> 12727 %2 = icmp sge <2 x i64> %0, %1 12728 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12729 %4 = bitcast <16 x i1> %3 to i16 12730 ret i16 %4 12731} 12732 12733define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12734; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: 12735; VLX: # %bb.0: # %entry 12736; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12737; VLX-NEXT: kmovd %k0, %eax 12738; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12739; VLX-NEXT: retq 12740; 12741; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: 12742; NoVLX: # %bb.0: # %entry 12743; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12744; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12745; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12746; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12747; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12748; NoVLX-NEXT: kmovw %k0, %eax 12749; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12750; NoVLX-NEXT: vzeroupper 12751; NoVLX-NEXT: retq 12752entry: 12753 %0 = bitcast <2 x i64> %__a to <2 x i64> 12754 %load = load <2 x i64>, ptr %__b 12755 %1 = bitcast <2 x i64> %load to <2 x i64> 12756 %2 = icmp sge <2 x i64> %0, %1 12757 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12758 %4 = bitcast <16 x i1> %3 to i16 12759 ret i16 %4 12760} 12761 12762define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12763; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: 12764; VLX: # %bb.0: # %entry 12765; VLX-NEXT: kmovd %edi, %k1 12766; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12767; VLX-NEXT: kmovd %k0, %eax 12768; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12769; VLX-NEXT: retq 12770; 12771; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: 12772; NoVLX: # %bb.0: # %entry 12773; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12774; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12775; NoVLX-NEXT: kmovw %edi, %k1 12776; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12777; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12778; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12779; NoVLX-NEXT: kmovw %k0, %eax 12780; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12781; NoVLX-NEXT: vzeroupper 12782; NoVLX-NEXT: retq 12783entry: 12784 %0 = bitcast <2 x i64> %__a to <2 x i64> 12785 %1 = bitcast <2 x i64> %__b to <2 x i64> 12786 %2 = icmp sge <2 x i64> %0, %1 12787 %3 = bitcast i8 %__u to <8 x i1> 12788 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12789 %4 = and <2 x i1> %2, %extract.i 12790 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12791 %6 = bitcast <16 x i1> %5 to i16 12792 ret i16 %6 12793} 12794 12795define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12796; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: 12797; VLX: # %bb.0: # %entry 12798; VLX-NEXT: kmovd %edi, %k1 12799; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12800; VLX-NEXT: kmovd %k0, %eax 12801; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12802; VLX-NEXT: retq 12803; 12804; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: 12805; NoVLX: # %bb.0: # %entry 12806; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12807; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12808; NoVLX-NEXT: kmovw %edi, %k1 12809; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12810; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12811; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12812; NoVLX-NEXT: kmovw %k0, %eax 12813; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12814; NoVLX-NEXT: vzeroupper 12815; NoVLX-NEXT: retq 12816entry: 12817 %0 = bitcast <2 x i64> %__a to <2 x i64> 12818 %load = load <2 x i64>, ptr %__b 12819 %1 = bitcast <2 x i64> %load to <2 x i64> 12820 %2 = icmp sge <2 x i64> %0, %1 12821 %3 = bitcast i8 %__u to <8 x i1> 12822 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12823 %4 = and <2 x i1> %2, %extract.i 12824 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12825 %6 = bitcast <16 x i1> %5 to i16 12826 ret i16 %6 12827} 12828 12829 12830define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12831; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 12832; VLX: # %bb.0: # %entry 12833; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 12834; VLX-NEXT: kmovd %k0, %eax 12835; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12836; VLX-NEXT: retq 12837; 12838; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 12839; NoVLX: # %bb.0: # %entry 12840; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12841; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 12842; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12843; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12844; NoVLX-NEXT: kmovw %k0, %eax 12845; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12846; NoVLX-NEXT: vzeroupper 12847; NoVLX-NEXT: retq 12848entry: 12849 %0 = bitcast <2 x i64> %__a to <2 x i64> 12850 %load = load i64, ptr %__b 12851 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12852 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12853 %2 = icmp sge <2 x i64> %0, %1 12854 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12855 %4 = bitcast <16 x i1> %3 to i16 12856 ret i16 %4 12857} 12858 12859define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12860; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 12861; VLX: # %bb.0: # %entry 12862; VLX-NEXT: kmovd %edi, %k1 12863; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 12864; VLX-NEXT: kmovd %k0, %eax 12865; VLX-NEXT: # kill: def $ax killed $ax killed $eax 12866; VLX-NEXT: retq 12867; 12868; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: 12869; NoVLX: # %bb.0: # %entry 12870; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12871; NoVLX-NEXT: kmovw %edi, %k1 12872; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 12873; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12874; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12875; NoVLX-NEXT: kmovw %k0, %eax 12876; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 12877; NoVLX-NEXT: vzeroupper 12878; NoVLX-NEXT: retq 12879entry: 12880 %0 = bitcast <2 x i64> %__a to <2 x i64> 12881 %load = load i64, ptr %__b 12882 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 12883 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 12884 %2 = icmp sge <2 x i64> %0, %1 12885 %3 = bitcast i8 %__u to <8 x i1> 12886 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12887 %4 = and <2 x i1> %extract.i, %2 12888 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12889 %6 = bitcast <16 x i1> %5 to i16 12890 ret i16 %6 12891} 12892 12893 12894define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12895; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: 12896; VLX: # %bb.0: # %entry 12897; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 12898; VLX-NEXT: kmovd %k0, %eax 12899; VLX-NEXT: retq 12900; 12901; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: 12902; NoVLX: # %bb.0: # %entry 12903; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12904; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12905; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12906; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12907; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12908; NoVLX-NEXT: kmovw %k0, %eax 12909; NoVLX-NEXT: vzeroupper 12910; NoVLX-NEXT: retq 12911entry: 12912 %0 = bitcast <2 x i64> %__a to <2 x i64> 12913 %1 = bitcast <2 x i64> %__b to <2 x i64> 12914 %2 = icmp sge <2 x i64> %0, %1 12915 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12916 %4 = bitcast <32 x i1> %3 to i32 12917 ret i32 %4 12918} 12919 12920define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 12921; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: 12922; VLX: # %bb.0: # %entry 12923; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 12924; VLX-NEXT: kmovd %k0, %eax 12925; VLX-NEXT: retq 12926; 12927; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: 12928; NoVLX: # %bb.0: # %entry 12929; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12930; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 12931; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 12932; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12933; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12934; NoVLX-NEXT: kmovw %k0, %eax 12935; NoVLX-NEXT: vzeroupper 12936; NoVLX-NEXT: retq 12937entry: 12938 %0 = bitcast <2 x i64> %__a to <2 x i64> 12939 %load = load <2 x i64>, ptr %__b 12940 %1 = bitcast <2 x i64> %load to <2 x i64> 12941 %2 = icmp sge <2 x i64> %0, %1 12942 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12943 %4 = bitcast <32 x i1> %3 to i32 12944 ret i32 %4 12945} 12946 12947define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 12948; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: 12949; VLX: # %bb.0: # %entry 12950; VLX-NEXT: kmovd %edi, %k1 12951; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 12952; VLX-NEXT: kmovd %k0, %eax 12953; VLX-NEXT: retq 12954; 12955; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: 12956; NoVLX: # %bb.0: # %entry 12957; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 12958; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12959; NoVLX-NEXT: kmovw %edi, %k1 12960; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12961; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12962; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12963; NoVLX-NEXT: kmovw %k0, %eax 12964; NoVLX-NEXT: vzeroupper 12965; NoVLX-NEXT: retq 12966entry: 12967 %0 = bitcast <2 x i64> %__a to <2 x i64> 12968 %1 = bitcast <2 x i64> %__b to <2 x i64> 12969 %2 = icmp sge <2 x i64> %0, %1 12970 %3 = bitcast i8 %__u to <8 x i1> 12971 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 12972 %4 = and <2 x i1> %2, %extract.i 12973 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 12974 %6 = bitcast <32 x i1> %5 to i32 12975 ret i32 %6 12976} 12977 12978define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 12979; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: 12980; VLX: # %bb.0: # %entry 12981; VLX-NEXT: kmovd %edi, %k1 12982; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 12983; VLX-NEXT: kmovd %k0, %eax 12984; VLX-NEXT: retq 12985; 12986; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: 12987; NoVLX: # %bb.0: # %entry 12988; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 12989; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 12990; NoVLX-NEXT: kmovw %edi, %k1 12991; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 12992; NoVLX-NEXT: kshiftlw $14, %k0, %k0 12993; NoVLX-NEXT: kshiftrw $14, %k0, %k0 12994; NoVLX-NEXT: kmovw %k0, %eax 12995; NoVLX-NEXT: vzeroupper 12996; NoVLX-NEXT: retq 12997entry: 12998 %0 = bitcast <2 x i64> %__a to <2 x i64> 12999 %load = load <2 x i64>, ptr %__b 13000 %1 = bitcast <2 x i64> %load to <2 x i64> 13001 %2 = icmp sge <2 x i64> %0, %1 13002 %3 = bitcast i8 %__u to <8 x i1> 13003 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13004 %4 = and <2 x i1> %2, %extract.i 13005 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13006 %6 = bitcast <32 x i1> %5 to i32 13007 ret i32 %6 13008} 13009 13010 13011define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 13012; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13013; VLX: # %bb.0: # %entry 13014; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 13015; VLX-NEXT: kmovd %k0, %eax 13016; VLX-NEXT: retq 13017; 13018; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13019; NoVLX: # %bb.0: # %entry 13020; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13021; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13022; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13023; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13024; NoVLX-NEXT: kmovw %k0, %eax 13025; NoVLX-NEXT: vzeroupper 13026; NoVLX-NEXT: retq 13027entry: 13028 %0 = bitcast <2 x i64> %__a to <2 x i64> 13029 %load = load i64, ptr %__b 13030 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13031 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13032 %2 = icmp sge <2 x i64> %0, %1 13033 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13034 %4 = bitcast <32 x i1> %3 to i32 13035 ret i32 %4 13036} 13037 13038define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 13039; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13040; VLX: # %bb.0: # %entry 13041; VLX-NEXT: kmovd %edi, %k1 13042; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 13043; VLX-NEXT: kmovd %k0, %eax 13044; VLX-NEXT: retq 13045; 13046; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: 13047; NoVLX: # %bb.0: # %entry 13048; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13049; NoVLX-NEXT: kmovw %edi, %k1 13050; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13051; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13052; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13053; NoVLX-NEXT: kmovw %k0, %eax 13054; NoVLX-NEXT: vzeroupper 13055; NoVLX-NEXT: retq 13056entry: 13057 %0 = bitcast <2 x i64> %__a to <2 x i64> 13058 %load = load i64, ptr %__b 13059 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13060 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13061 %2 = icmp sge <2 x i64> %0, %1 13062 %3 = bitcast i8 %__u to <8 x i1> 13063 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13064 %4 = and <2 x i1> %extract.i, %2 13065 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13066 %6 = bitcast <32 x i1> %5 to i32 13067 ret i32 %6 13068} 13069 13070 13071define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13072; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: 13073; VLX: # %bb.0: # %entry 13074; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 13075; VLX-NEXT: kmovq %k0, %rax 13076; VLX-NEXT: retq 13077; 13078; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: 13079; NoVLX: # %bb.0: # %entry 13080; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13081; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13082; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13083; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13084; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13085; NoVLX-NEXT: kmovw %k0, %eax 13086; NoVLX-NEXT: vzeroupper 13087; NoVLX-NEXT: retq 13088entry: 13089 %0 = bitcast <2 x i64> %__a to <2 x i64> 13090 %1 = bitcast <2 x i64> %__b to <2 x i64> 13091 %2 = icmp sge <2 x i64> %0, %1 13092 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13093 %4 = bitcast <64 x i1> %3 to i64 13094 ret i64 %4 13095} 13096 13097define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 13098; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: 13099; VLX: # %bb.0: # %entry 13100; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 13101; VLX-NEXT: kmovq %k0, %rax 13102; VLX-NEXT: retq 13103; 13104; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: 13105; NoVLX: # %bb.0: # %entry 13106; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13107; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 13108; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13109; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13110; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13111; NoVLX-NEXT: kmovw %k0, %eax 13112; NoVLX-NEXT: vzeroupper 13113; NoVLX-NEXT: retq 13114entry: 13115 %0 = bitcast <2 x i64> %__a to <2 x i64> 13116 %load = load <2 x i64>, ptr %__b 13117 %1 = bitcast <2 x i64> %load to <2 x i64> 13118 %2 = icmp sge <2 x i64> %0, %1 13119 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13120 %4 = bitcast <64 x i1> %3 to i64 13121 ret i64 %4 13122} 13123 13124define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 13125; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: 13126; VLX: # %bb.0: # %entry 13127; VLX-NEXT: kmovd %edi, %k1 13128; VLX-NEXT: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} 13129; VLX-NEXT: kmovq %k0, %rax 13130; VLX-NEXT: retq 13131; 13132; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: 13133; NoVLX: # %bb.0: # %entry 13134; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 13135; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13136; NoVLX-NEXT: kmovw %edi, %k1 13137; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13138; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13139; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13140; NoVLX-NEXT: kmovw %k0, %eax 13141; NoVLX-NEXT: vzeroupper 13142; NoVLX-NEXT: retq 13143entry: 13144 %0 = bitcast <2 x i64> %__a to <2 x i64> 13145 %1 = bitcast <2 x i64> %__b to <2 x i64> 13146 %2 = icmp sge <2 x i64> %0, %1 13147 %3 = bitcast i8 %__u to <8 x i1> 13148 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13149 %4 = and <2 x i1> %2, %extract.i 13150 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13151 %6 = bitcast <64 x i1> %5 to i64 13152 ret i64 %6 13153} 13154 13155define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 13156; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: 13157; VLX: # %bb.0: # %entry 13158; VLX-NEXT: kmovd %edi, %k1 13159; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} 13160; VLX-NEXT: kmovq %k0, %rax 13161; VLX-NEXT: retq 13162; 13163; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: 13164; NoVLX: # %bb.0: # %entry 13165; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13166; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 13167; NoVLX-NEXT: kmovw %edi, %k1 13168; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13169; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13170; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13171; NoVLX-NEXT: kmovw %k0, %eax 13172; NoVLX-NEXT: vzeroupper 13173; NoVLX-NEXT: retq 13174entry: 13175 %0 = bitcast <2 x i64> %__a to <2 x i64> 13176 %load = load <2 x i64>, ptr %__b 13177 %1 = bitcast <2 x i64> %load to <2 x i64> 13178 %2 = icmp sge <2 x i64> %0, %1 13179 %3 = bitcast i8 %__u to <8 x i1> 13180 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13181 %4 = and <2 x i1> %2, %extract.i 13182 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13183 %6 = bitcast <64 x i1> %5 to i64 13184 ret i64 %6 13185} 13186 13187 13188define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 13189; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13190; VLX: # %bb.0: # %entry 13191; VLX-NEXT: vpcmpnltq (%rdi){1to2}, %xmm0, %k0 13192; VLX-NEXT: kmovq %k0, %rax 13193; VLX-NEXT: retq 13194; 13195; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13196; NoVLX: # %bb.0: # %entry 13197; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13198; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13199; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13200; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13201; NoVLX-NEXT: kmovw %k0, %eax 13202; NoVLX-NEXT: vzeroupper 13203; NoVLX-NEXT: retq 13204entry: 13205 %0 = bitcast <2 x i64> %__a to <2 x i64> 13206 %load = load i64, ptr %__b 13207 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13208 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13209 %2 = icmp sge <2 x i64> %0, %1 13210 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13211 %4 = bitcast <64 x i1> %3 to i64 13212 ret i64 %4 13213} 13214 13215define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 13216; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13217; VLX: # %bb.0: # %entry 13218; VLX-NEXT: kmovd %edi, %k1 13219; VLX-NEXT: vpcmpnltq (%rsi){1to2}, %xmm0, %k0 {%k1} 13220; VLX-NEXT: kmovq %k0, %rax 13221; VLX-NEXT: retq 13222; 13223; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: 13224; NoVLX: # %bb.0: # %entry 13225; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 13226; NoVLX-NEXT: kmovw %edi, %k1 13227; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13228; NoVLX-NEXT: kshiftlw $14, %k0, %k0 13229; NoVLX-NEXT: kshiftrw $14, %k0, %k0 13230; NoVLX-NEXT: kmovw %k0, %eax 13231; NoVLX-NEXT: vzeroupper 13232; NoVLX-NEXT: retq 13233entry: 13234 %0 = bitcast <2 x i64> %__a to <2 x i64> 13235 %load = load i64, ptr %__b 13236 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 13237 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 13238 %2 = icmp sge <2 x i64> %0, %1 13239 %3 = bitcast i8 %__u to <8 x i1> 13240 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 13241 %4 = and <2 x i1> %extract.i, %2 13242 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 13243 %6 = bitcast <64 x i1> %5 to i64 13244 ret i64 %6 13245} 13246 13247 13248define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13249; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: 13250; VLX: # %bb.0: # %entry 13251; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13252; VLX-NEXT: kmovd %k0, %eax 13253; VLX-NEXT: # kill: def $al killed $al killed $eax 13254; VLX-NEXT: vzeroupper 13255; VLX-NEXT: retq 13256; 13257; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: 13258; NoVLX: # %bb.0: # %entry 13259; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13260; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13261; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13262; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13263; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13264; NoVLX-NEXT: kmovw %k0, %eax 13265; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13266; NoVLX-NEXT: vzeroupper 13267; NoVLX-NEXT: retq 13268entry: 13269 %0 = bitcast <4 x i64> %__a to <4 x i64> 13270 %1 = bitcast <4 x i64> %__b to <4 x i64> 13271 %2 = icmp sge <4 x i64> %0, %1 13272 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13273 %4 = bitcast <8 x i1> %3 to i8 13274 ret i8 %4 13275} 13276 13277define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13278; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: 13279; VLX: # %bb.0: # %entry 13280; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13281; VLX-NEXT: kmovd %k0, %eax 13282; VLX-NEXT: # kill: def $al killed $al killed $eax 13283; VLX-NEXT: vzeroupper 13284; VLX-NEXT: retq 13285; 13286; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: 13287; NoVLX: # %bb.0: # %entry 13288; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13289; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13290; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13291; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13292; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13293; NoVLX-NEXT: kmovw %k0, %eax 13294; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13295; NoVLX-NEXT: vzeroupper 13296; NoVLX-NEXT: retq 13297entry: 13298 %0 = bitcast <4 x i64> %__a to <4 x i64> 13299 %load = load <4 x i64>, ptr %__b 13300 %1 = bitcast <4 x i64> %load to <4 x i64> 13301 %2 = icmp sge <4 x i64> %0, %1 13302 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13303 %4 = bitcast <8 x i1> %3 to i8 13304 ret i8 %4 13305} 13306 13307define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13308; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: 13309; VLX: # %bb.0: # %entry 13310; VLX-NEXT: kmovd %edi, %k1 13311; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13312; VLX-NEXT: kmovd %k0, %eax 13313; VLX-NEXT: # kill: def $al killed $al killed $eax 13314; VLX-NEXT: vzeroupper 13315; VLX-NEXT: retq 13316; 13317; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: 13318; NoVLX: # %bb.0: # %entry 13319; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13320; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13321; NoVLX-NEXT: kmovw %edi, %k1 13322; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13323; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13324; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13325; NoVLX-NEXT: kmovw %k0, %eax 13326; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13327; NoVLX-NEXT: vzeroupper 13328; NoVLX-NEXT: retq 13329entry: 13330 %0 = bitcast <4 x i64> %__a to <4 x i64> 13331 %1 = bitcast <4 x i64> %__b to <4 x i64> 13332 %2 = icmp sge <4 x i64> %0, %1 13333 %3 = bitcast i8 %__u to <8 x i1> 13334 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13335 %4 = and <4 x i1> %2, %extract.i 13336 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13337 %6 = bitcast <8 x i1> %5 to i8 13338 ret i8 %6 13339} 13340 13341define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13342; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: 13343; VLX: # %bb.0: # %entry 13344; VLX-NEXT: kmovd %edi, %k1 13345; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13346; VLX-NEXT: kmovd %k0, %eax 13347; VLX-NEXT: # kill: def $al killed $al killed $eax 13348; VLX-NEXT: vzeroupper 13349; VLX-NEXT: retq 13350; 13351; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: 13352; NoVLX: # %bb.0: # %entry 13353; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13354; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13355; NoVLX-NEXT: kmovw %edi, %k1 13356; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13357; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13358; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13359; NoVLX-NEXT: kmovw %k0, %eax 13360; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13361; NoVLX-NEXT: vzeroupper 13362; NoVLX-NEXT: retq 13363entry: 13364 %0 = bitcast <4 x i64> %__a to <4 x i64> 13365 %load = load <4 x i64>, ptr %__b 13366 %1 = bitcast <4 x i64> %load to <4 x i64> 13367 %2 = icmp sge <4 x i64> %0, %1 13368 %3 = bitcast i8 %__u to <8 x i1> 13369 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13370 %4 = and <4 x i1> %2, %extract.i 13371 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13372 %6 = bitcast <8 x i1> %5 to i8 13373 ret i8 %6 13374} 13375 13376 13377define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13378; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13379; VLX: # %bb.0: # %entry 13380; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13381; VLX-NEXT: kmovd %k0, %eax 13382; VLX-NEXT: # kill: def $al killed $al killed $eax 13383; VLX-NEXT: vzeroupper 13384; VLX-NEXT: retq 13385; 13386; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13387; NoVLX: # %bb.0: # %entry 13388; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13389; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13390; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13391; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13392; NoVLX-NEXT: kmovw %k0, %eax 13393; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13394; NoVLX-NEXT: vzeroupper 13395; NoVLX-NEXT: retq 13396entry: 13397 %0 = bitcast <4 x i64> %__a to <4 x i64> 13398 %load = load i64, ptr %__b 13399 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13400 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13401 %2 = icmp sge <4 x i64> %0, %1 13402 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13403 %4 = bitcast <8 x i1> %3 to i8 13404 ret i8 %4 13405} 13406 13407define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13408; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13409; VLX: # %bb.0: # %entry 13410; VLX-NEXT: kmovd %edi, %k1 13411; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13412; VLX-NEXT: kmovd %k0, %eax 13413; VLX-NEXT: # kill: def $al killed $al killed $eax 13414; VLX-NEXT: vzeroupper 13415; VLX-NEXT: retq 13416; 13417; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: 13418; NoVLX: # %bb.0: # %entry 13419; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13420; NoVLX-NEXT: kmovw %edi, %k1 13421; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13422; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13423; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13424; NoVLX-NEXT: kmovw %k0, %eax 13425; NoVLX-NEXT: # kill: def $al killed $al killed $eax 13426; NoVLX-NEXT: vzeroupper 13427; NoVLX-NEXT: retq 13428entry: 13429 %0 = bitcast <4 x i64> %__a to <4 x i64> 13430 %load = load i64, ptr %__b 13431 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13432 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13433 %2 = icmp sge <4 x i64> %0, %1 13434 %3 = bitcast i8 %__u to <8 x i1> 13435 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13436 %4 = and <4 x i1> %extract.i, %2 13437 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 13438 %6 = bitcast <8 x i1> %5 to i8 13439 ret i8 %6 13440} 13441 13442 13443define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13444; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: 13445; VLX: # %bb.0: # %entry 13446; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13447; VLX-NEXT: kmovd %k0, %eax 13448; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13449; VLX-NEXT: vzeroupper 13450; VLX-NEXT: retq 13451; 13452; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: 13453; NoVLX: # %bb.0: # %entry 13454; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13455; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13456; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13457; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13458; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13459; NoVLX-NEXT: kmovw %k0, %eax 13460; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13461; NoVLX-NEXT: vzeroupper 13462; NoVLX-NEXT: retq 13463entry: 13464 %0 = bitcast <4 x i64> %__a to <4 x i64> 13465 %1 = bitcast <4 x i64> %__b to <4 x i64> 13466 %2 = icmp sge <4 x i64> %0, %1 13467 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13468 %4 = bitcast <16 x i1> %3 to i16 13469 ret i16 %4 13470} 13471 13472define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13473; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: 13474; VLX: # %bb.0: # %entry 13475; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13476; VLX-NEXT: kmovd %k0, %eax 13477; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13478; VLX-NEXT: vzeroupper 13479; VLX-NEXT: retq 13480; 13481; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: 13482; NoVLX: # %bb.0: # %entry 13483; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13484; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13485; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13486; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13487; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13488; NoVLX-NEXT: kmovw %k0, %eax 13489; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13490; NoVLX-NEXT: vzeroupper 13491; NoVLX-NEXT: retq 13492entry: 13493 %0 = bitcast <4 x i64> %__a to <4 x i64> 13494 %load = load <4 x i64>, ptr %__b 13495 %1 = bitcast <4 x i64> %load to <4 x i64> 13496 %2 = icmp sge <4 x i64> %0, %1 13497 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13498 %4 = bitcast <16 x i1> %3 to i16 13499 ret i16 %4 13500} 13501 13502define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13503; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: 13504; VLX: # %bb.0: # %entry 13505; VLX-NEXT: kmovd %edi, %k1 13506; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13507; VLX-NEXT: kmovd %k0, %eax 13508; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13509; VLX-NEXT: vzeroupper 13510; VLX-NEXT: retq 13511; 13512; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: 13513; NoVLX: # %bb.0: # %entry 13514; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13515; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13516; NoVLX-NEXT: kmovw %edi, %k1 13517; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13518; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13519; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13520; NoVLX-NEXT: kmovw %k0, %eax 13521; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13522; NoVLX-NEXT: vzeroupper 13523; NoVLX-NEXT: retq 13524entry: 13525 %0 = bitcast <4 x i64> %__a to <4 x i64> 13526 %1 = bitcast <4 x i64> %__b to <4 x i64> 13527 %2 = icmp sge <4 x i64> %0, %1 13528 %3 = bitcast i8 %__u to <8 x i1> 13529 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13530 %4 = and <4 x i1> %2, %extract.i 13531 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13532 %6 = bitcast <16 x i1> %5 to i16 13533 ret i16 %6 13534} 13535 13536define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13537; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: 13538; VLX: # %bb.0: # %entry 13539; VLX-NEXT: kmovd %edi, %k1 13540; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13541; VLX-NEXT: kmovd %k0, %eax 13542; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13543; VLX-NEXT: vzeroupper 13544; VLX-NEXT: retq 13545; 13546; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: 13547; NoVLX: # %bb.0: # %entry 13548; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13549; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13550; NoVLX-NEXT: kmovw %edi, %k1 13551; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13552; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13553; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13554; NoVLX-NEXT: kmovw %k0, %eax 13555; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13556; NoVLX-NEXT: vzeroupper 13557; NoVLX-NEXT: retq 13558entry: 13559 %0 = bitcast <4 x i64> %__a to <4 x i64> 13560 %load = load <4 x i64>, ptr %__b 13561 %1 = bitcast <4 x i64> %load to <4 x i64> 13562 %2 = icmp sge <4 x i64> %0, %1 13563 %3 = bitcast i8 %__u to <8 x i1> 13564 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13565 %4 = and <4 x i1> %2, %extract.i 13566 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13567 %6 = bitcast <16 x i1> %5 to i16 13568 ret i16 %6 13569} 13570 13571 13572define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13573; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13574; VLX: # %bb.0: # %entry 13575; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13576; VLX-NEXT: kmovd %k0, %eax 13577; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13578; VLX-NEXT: vzeroupper 13579; VLX-NEXT: retq 13580; 13581; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13582; NoVLX: # %bb.0: # %entry 13583; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13584; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13585; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13586; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13587; NoVLX-NEXT: kmovw %k0, %eax 13588; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13589; NoVLX-NEXT: vzeroupper 13590; NoVLX-NEXT: retq 13591entry: 13592 %0 = bitcast <4 x i64> %__a to <4 x i64> 13593 %load = load i64, ptr %__b 13594 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13595 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13596 %2 = icmp sge <4 x i64> %0, %1 13597 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13598 %4 = bitcast <16 x i1> %3 to i16 13599 ret i16 %4 13600} 13601 13602define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13603; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13604; VLX: # %bb.0: # %entry 13605; VLX-NEXT: kmovd %edi, %k1 13606; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13607; VLX-NEXT: kmovd %k0, %eax 13608; VLX-NEXT: # kill: def $ax killed $ax killed $eax 13609; VLX-NEXT: vzeroupper 13610; VLX-NEXT: retq 13611; 13612; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: 13613; NoVLX: # %bb.0: # %entry 13614; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13615; NoVLX-NEXT: kmovw %edi, %k1 13616; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13617; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13618; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13619; NoVLX-NEXT: kmovw %k0, %eax 13620; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 13621; NoVLX-NEXT: vzeroupper 13622; NoVLX-NEXT: retq 13623entry: 13624 %0 = bitcast <4 x i64> %__a to <4 x i64> 13625 %load = load i64, ptr %__b 13626 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13627 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13628 %2 = icmp sge <4 x i64> %0, %1 13629 %3 = bitcast i8 %__u to <8 x i1> 13630 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13631 %4 = and <4 x i1> %extract.i, %2 13632 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13633 %6 = bitcast <16 x i1> %5 to i16 13634 ret i16 %6 13635} 13636 13637 13638define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13639; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: 13640; VLX: # %bb.0: # %entry 13641; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13642; VLX-NEXT: kmovd %k0, %eax 13643; VLX-NEXT: vzeroupper 13644; VLX-NEXT: retq 13645; 13646; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: 13647; NoVLX: # %bb.0: # %entry 13648; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13649; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13650; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13651; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13652; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13653; NoVLX-NEXT: kmovw %k0, %eax 13654; NoVLX-NEXT: vzeroupper 13655; NoVLX-NEXT: retq 13656entry: 13657 %0 = bitcast <4 x i64> %__a to <4 x i64> 13658 %1 = bitcast <4 x i64> %__b to <4 x i64> 13659 %2 = icmp sge <4 x i64> %0, %1 13660 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13661 %4 = bitcast <32 x i1> %3 to i32 13662 ret i32 %4 13663} 13664 13665define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13666; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: 13667; VLX: # %bb.0: # %entry 13668; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13669; VLX-NEXT: kmovd %k0, %eax 13670; VLX-NEXT: vzeroupper 13671; VLX-NEXT: retq 13672; 13673; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: 13674; NoVLX: # %bb.0: # %entry 13675; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13676; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13677; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13678; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13679; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13680; NoVLX-NEXT: kmovw %k0, %eax 13681; NoVLX-NEXT: vzeroupper 13682; NoVLX-NEXT: retq 13683entry: 13684 %0 = bitcast <4 x i64> %__a to <4 x i64> 13685 %load = load <4 x i64>, ptr %__b 13686 %1 = bitcast <4 x i64> %load to <4 x i64> 13687 %2 = icmp sge <4 x i64> %0, %1 13688 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13689 %4 = bitcast <32 x i1> %3 to i32 13690 ret i32 %4 13691} 13692 13693define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13694; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: 13695; VLX: # %bb.0: # %entry 13696; VLX-NEXT: kmovd %edi, %k1 13697; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13698; VLX-NEXT: kmovd %k0, %eax 13699; VLX-NEXT: vzeroupper 13700; VLX-NEXT: retq 13701; 13702; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: 13703; NoVLX: # %bb.0: # %entry 13704; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13705; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13706; NoVLX-NEXT: kmovw %edi, %k1 13707; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13708; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13709; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13710; NoVLX-NEXT: kmovw %k0, %eax 13711; NoVLX-NEXT: vzeroupper 13712; NoVLX-NEXT: retq 13713entry: 13714 %0 = bitcast <4 x i64> %__a to <4 x i64> 13715 %1 = bitcast <4 x i64> %__b to <4 x i64> 13716 %2 = icmp sge <4 x i64> %0, %1 13717 %3 = bitcast i8 %__u to <8 x i1> 13718 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13719 %4 = and <4 x i1> %2, %extract.i 13720 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13721 %6 = bitcast <32 x i1> %5 to i32 13722 ret i32 %6 13723} 13724 13725define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13726; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: 13727; VLX: # %bb.0: # %entry 13728; VLX-NEXT: kmovd %edi, %k1 13729; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13730; VLX-NEXT: kmovd %k0, %eax 13731; VLX-NEXT: vzeroupper 13732; VLX-NEXT: retq 13733; 13734; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: 13735; NoVLX: # %bb.0: # %entry 13736; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13737; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13738; NoVLX-NEXT: kmovw %edi, %k1 13739; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13740; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13741; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13742; NoVLX-NEXT: kmovw %k0, %eax 13743; NoVLX-NEXT: vzeroupper 13744; NoVLX-NEXT: retq 13745entry: 13746 %0 = bitcast <4 x i64> %__a to <4 x i64> 13747 %load = load <4 x i64>, ptr %__b 13748 %1 = bitcast <4 x i64> %load to <4 x i64> 13749 %2 = icmp sge <4 x i64> %0, %1 13750 %3 = bitcast i8 %__u to <8 x i1> 13751 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13752 %4 = and <4 x i1> %2, %extract.i 13753 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13754 %6 = bitcast <32 x i1> %5 to i32 13755 ret i32 %6 13756} 13757 13758 13759define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13760; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 13761; VLX: # %bb.0: # %entry 13762; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13763; VLX-NEXT: kmovd %k0, %eax 13764; VLX-NEXT: vzeroupper 13765; VLX-NEXT: retq 13766; 13767; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 13768; NoVLX: # %bb.0: # %entry 13769; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13770; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13771; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13772; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13773; NoVLX-NEXT: kmovw %k0, %eax 13774; NoVLX-NEXT: vzeroupper 13775; NoVLX-NEXT: retq 13776entry: 13777 %0 = bitcast <4 x i64> %__a to <4 x i64> 13778 %load = load i64, ptr %__b 13779 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13780 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13781 %2 = icmp sge <4 x i64> %0, %1 13782 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13783 %4 = bitcast <32 x i1> %3 to i32 13784 ret i32 %4 13785} 13786 13787define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13788; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 13789; VLX: # %bb.0: # %entry 13790; VLX-NEXT: kmovd %edi, %k1 13791; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13792; VLX-NEXT: kmovd %k0, %eax 13793; VLX-NEXT: vzeroupper 13794; VLX-NEXT: retq 13795; 13796; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: 13797; NoVLX: # %bb.0: # %entry 13798; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13799; NoVLX-NEXT: kmovw %edi, %k1 13800; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13801; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13802; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13803; NoVLX-NEXT: kmovw %k0, %eax 13804; NoVLX-NEXT: vzeroupper 13805; NoVLX-NEXT: retq 13806entry: 13807 %0 = bitcast <4 x i64> %__a to <4 x i64> 13808 %load = load i64, ptr %__b 13809 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13810 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13811 %2 = icmp sge <4 x i64> %0, %1 13812 %3 = bitcast i8 %__u to <8 x i1> 13813 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13814 %4 = and <4 x i1> %extract.i, %2 13815 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13816 %6 = bitcast <32 x i1> %5 to i32 13817 ret i32 %6 13818} 13819 13820 13821define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13822; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: 13823; VLX: # %bb.0: # %entry 13824; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 13825; VLX-NEXT: kmovq %k0, %rax 13826; VLX-NEXT: vzeroupper 13827; VLX-NEXT: retq 13828; 13829; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: 13830; NoVLX: # %bb.0: # %entry 13831; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13832; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13833; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13834; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13835; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13836; NoVLX-NEXT: kmovw %k0, %eax 13837; NoVLX-NEXT: vzeroupper 13838; NoVLX-NEXT: retq 13839entry: 13840 %0 = bitcast <4 x i64> %__a to <4 x i64> 13841 %1 = bitcast <4 x i64> %__b to <4 x i64> 13842 %2 = icmp sge <4 x i64> %0, %1 13843 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13844 %4 = bitcast <64 x i1> %3 to i64 13845 ret i64 %4 13846} 13847 13848define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13849; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: 13850; VLX: # %bb.0: # %entry 13851; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 13852; VLX-NEXT: kmovq %k0, %rax 13853; VLX-NEXT: vzeroupper 13854; VLX-NEXT: retq 13855; 13856; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: 13857; NoVLX: # %bb.0: # %entry 13858; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13859; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 13860; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 13861; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13862; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13863; NoVLX-NEXT: kmovw %k0, %eax 13864; NoVLX-NEXT: vzeroupper 13865; NoVLX-NEXT: retq 13866entry: 13867 %0 = bitcast <4 x i64> %__a to <4 x i64> 13868 %load = load <4 x i64>, ptr %__b 13869 %1 = bitcast <4 x i64> %load to <4 x i64> 13870 %2 = icmp sge <4 x i64> %0, %1 13871 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13872 %4 = bitcast <64 x i1> %3 to i64 13873 ret i64 %4 13874} 13875 13876define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 13877; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: 13878; VLX: # %bb.0: # %entry 13879; VLX-NEXT: kmovd %edi, %k1 13880; VLX-NEXT: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} 13881; VLX-NEXT: kmovq %k0, %rax 13882; VLX-NEXT: vzeroupper 13883; VLX-NEXT: retq 13884; 13885; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: 13886; NoVLX: # %bb.0: # %entry 13887; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 13888; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13889; NoVLX-NEXT: kmovw %edi, %k1 13890; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13891; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13892; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13893; NoVLX-NEXT: kmovw %k0, %eax 13894; NoVLX-NEXT: vzeroupper 13895; NoVLX-NEXT: retq 13896entry: 13897 %0 = bitcast <4 x i64> %__a to <4 x i64> 13898 %1 = bitcast <4 x i64> %__b to <4 x i64> 13899 %2 = icmp sge <4 x i64> %0, %1 13900 %3 = bitcast i8 %__u to <8 x i1> 13901 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13902 %4 = and <4 x i1> %2, %extract.i 13903 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13904 %6 = bitcast <64 x i1> %5 to i64 13905 ret i64 %6 13906} 13907 13908define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13909; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: 13910; VLX: # %bb.0: # %entry 13911; VLX-NEXT: kmovd %edi, %k1 13912; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} 13913; VLX-NEXT: kmovq %k0, %rax 13914; VLX-NEXT: vzeroupper 13915; VLX-NEXT: retq 13916; 13917; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: 13918; NoVLX: # %bb.0: # %entry 13919; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13920; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 13921; NoVLX-NEXT: kmovw %edi, %k1 13922; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 13923; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13924; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13925; NoVLX-NEXT: kmovw %k0, %eax 13926; NoVLX-NEXT: vzeroupper 13927; NoVLX-NEXT: retq 13928entry: 13929 %0 = bitcast <4 x i64> %__a to <4 x i64> 13930 %load = load <4 x i64>, ptr %__b 13931 %1 = bitcast <4 x i64> %load to <4 x i64> 13932 %2 = icmp sge <4 x i64> %0, %1 13933 %3 = bitcast i8 %__u to <8 x i1> 13934 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13935 %4 = and <4 x i1> %2, %extract.i 13936 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13937 %6 = bitcast <64 x i1> %5 to i64 13938 ret i64 %6 13939} 13940 13941 13942define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 13943; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 13944; VLX: # %bb.0: # %entry 13945; VLX-NEXT: vpcmpnltq (%rdi){1to4}, %ymm0, %k0 13946; VLX-NEXT: kmovq %k0, %rax 13947; VLX-NEXT: vzeroupper 13948; VLX-NEXT: retq 13949; 13950; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 13951; NoVLX: # %bb.0: # %entry 13952; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13953; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 13954; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13955; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13956; NoVLX-NEXT: kmovw %k0, %eax 13957; NoVLX-NEXT: vzeroupper 13958; NoVLX-NEXT: retq 13959entry: 13960 %0 = bitcast <4 x i64> %__a to <4 x i64> 13961 %load = load i64, ptr %__b 13962 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13963 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13964 %2 = icmp sge <4 x i64> %0, %1 13965 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13966 %4 = bitcast <64 x i1> %3 to i64 13967 ret i64 %4 13968} 13969 13970define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 13971; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 13972; VLX: # %bb.0: # %entry 13973; VLX-NEXT: kmovd %edi, %k1 13974; VLX-NEXT: vpcmpnltq (%rsi){1to4}, %ymm0, %k0 {%k1} 13975; VLX-NEXT: kmovq %k0, %rax 13976; VLX-NEXT: vzeroupper 13977; VLX-NEXT: retq 13978; 13979; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: 13980; NoVLX: # %bb.0: # %entry 13981; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 13982; NoVLX-NEXT: kmovw %edi, %k1 13983; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 13984; NoVLX-NEXT: kshiftlw $12, %k0, %k0 13985; NoVLX-NEXT: kshiftrw $12, %k0, %k0 13986; NoVLX-NEXT: kmovw %k0, %eax 13987; NoVLX-NEXT: vzeroupper 13988; NoVLX-NEXT: retq 13989entry: 13990 %0 = bitcast <4 x i64> %__a to <4 x i64> 13991 %load = load i64, ptr %__b 13992 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 13993 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 13994 %2 = icmp sge <4 x i64> %0, %1 13995 %3 = bitcast i8 %__u to <8 x i1> 13996 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 13997 %4 = and <4 x i1> %extract.i, %2 13998 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 13999 %6 = bitcast <64 x i1> %5 to i64 14000 ret i64 %6 14001} 14002 14003 14004define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14005; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: 14006; VLX: # %bb.0: # %entry 14007; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14008; VLX-NEXT: kmovd %k0, %eax 14009; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14010; VLX-NEXT: vzeroupper 14011; VLX-NEXT: retq 14012; 14013; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: 14014; NoVLX: # %bb.0: # %entry 14015; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14016; NoVLX-NEXT: kmovw %k0, %eax 14017; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14018; NoVLX-NEXT: vzeroupper 14019; NoVLX-NEXT: retq 14020entry: 14021 %0 = bitcast <8 x i64> %__a to <8 x i64> 14022 %1 = bitcast <8 x i64> %__b to <8 x i64> 14023 %2 = icmp sge <8 x i64> %0, %1 14024 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14025 %4 = bitcast <16 x i1> %3 to i16 14026 ret i16 %4 14027} 14028 14029define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14030; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: 14031; VLX: # %bb.0: # %entry 14032; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14033; VLX-NEXT: kmovd %k0, %eax 14034; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14035; VLX-NEXT: vzeroupper 14036; VLX-NEXT: retq 14037; 14038; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: 14039; NoVLX: # %bb.0: # %entry 14040; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14041; NoVLX-NEXT: kmovw %k0, %eax 14042; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14043; NoVLX-NEXT: vzeroupper 14044; NoVLX-NEXT: retq 14045entry: 14046 %0 = bitcast <8 x i64> %__a to <8 x i64> 14047 %load = load <8 x i64>, ptr %__b 14048 %1 = bitcast <8 x i64> %load to <8 x i64> 14049 %2 = icmp sge <8 x i64> %0, %1 14050 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14051 %4 = bitcast <16 x i1> %3 to i16 14052 ret i16 %4 14053} 14054 14055define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14056; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: 14057; VLX: # %bb.0: # %entry 14058; VLX-NEXT: kmovd %edi, %k1 14059; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14060; VLX-NEXT: kmovd %k0, %eax 14061; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14062; VLX-NEXT: vzeroupper 14063; VLX-NEXT: retq 14064; 14065; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: 14066; NoVLX: # %bb.0: # %entry 14067; NoVLX-NEXT: kmovw %edi, %k1 14068; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14069; NoVLX-NEXT: kmovw %k0, %eax 14070; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14071; NoVLX-NEXT: vzeroupper 14072; NoVLX-NEXT: retq 14073entry: 14074 %0 = bitcast <8 x i64> %__a to <8 x i64> 14075 %1 = bitcast <8 x i64> %__b to <8 x i64> 14076 %2 = icmp sge <8 x i64> %0, %1 14077 %3 = bitcast i8 %__u to <8 x i1> 14078 %4 = and <8 x i1> %2, %3 14079 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14080 %6 = bitcast <16 x i1> %5 to i16 14081 ret i16 %6 14082} 14083 14084define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14085; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: 14086; VLX: # %bb.0: # %entry 14087; VLX-NEXT: kmovd %edi, %k1 14088; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14089; VLX-NEXT: kmovd %k0, %eax 14090; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14091; VLX-NEXT: vzeroupper 14092; VLX-NEXT: retq 14093; 14094; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: 14095; NoVLX: # %bb.0: # %entry 14096; NoVLX-NEXT: kmovw %edi, %k1 14097; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14098; NoVLX-NEXT: kmovw %k0, %eax 14099; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14100; NoVLX-NEXT: vzeroupper 14101; NoVLX-NEXT: retq 14102entry: 14103 %0 = bitcast <8 x i64> %__a to <8 x i64> 14104 %load = load <8 x i64>, ptr %__b 14105 %1 = bitcast <8 x i64> %load to <8 x i64> 14106 %2 = icmp sge <8 x i64> %0, %1 14107 %3 = bitcast i8 %__u to <8 x i1> 14108 %4 = and <8 x i1> %2, %3 14109 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14110 %6 = bitcast <16 x i1> %5 to i16 14111 ret i16 %6 14112} 14113 14114 14115define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14116; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14117; VLX: # %bb.0: # %entry 14118; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14119; VLX-NEXT: kmovd %k0, %eax 14120; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14121; VLX-NEXT: vzeroupper 14122; VLX-NEXT: retq 14123; 14124; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14125; NoVLX: # %bb.0: # %entry 14126; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14127; NoVLX-NEXT: kmovw %k0, %eax 14128; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14129; NoVLX-NEXT: vzeroupper 14130; NoVLX-NEXT: retq 14131entry: 14132 %0 = bitcast <8 x i64> %__a to <8 x i64> 14133 %load = load i64, ptr %__b 14134 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14135 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14136 %2 = icmp sge <8 x i64> %0, %1 14137 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14138 %4 = bitcast <16 x i1> %3 to i16 14139 ret i16 %4 14140} 14141 14142define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14143; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14144; VLX: # %bb.0: # %entry 14145; VLX-NEXT: kmovd %edi, %k1 14146; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14147; VLX-NEXT: kmovd %k0, %eax 14148; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14149; VLX-NEXT: vzeroupper 14150; VLX-NEXT: retq 14151; 14152; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: 14153; NoVLX: # %bb.0: # %entry 14154; NoVLX-NEXT: kmovw %edi, %k1 14155; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14156; NoVLX-NEXT: kmovw %k0, %eax 14157; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14158; NoVLX-NEXT: vzeroupper 14159; NoVLX-NEXT: retq 14160entry: 14161 %0 = bitcast <8 x i64> %__a to <8 x i64> 14162 %load = load i64, ptr %__b 14163 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14164 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14165 %2 = icmp sge <8 x i64> %0, %1 14166 %3 = bitcast i8 %__u to <8 x i1> 14167 %4 = and <8 x i1> %3, %2 14168 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14169 %6 = bitcast <16 x i1> %5 to i16 14170 ret i16 %6 14171} 14172 14173 14174define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14175; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: 14176; VLX: # %bb.0: # %entry 14177; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14178; VLX-NEXT: kmovd %k0, %eax 14179; VLX-NEXT: vzeroupper 14180; VLX-NEXT: retq 14181; 14182; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: 14183; NoVLX: # %bb.0: # %entry 14184; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14185; NoVLX-NEXT: kmovw %k0, %eax 14186; NoVLX-NEXT: vzeroupper 14187; NoVLX-NEXT: retq 14188entry: 14189 %0 = bitcast <8 x i64> %__a to <8 x i64> 14190 %1 = bitcast <8 x i64> %__b to <8 x i64> 14191 %2 = icmp sge <8 x i64> %0, %1 14192 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14193 %4 = bitcast <32 x i1> %3 to i32 14194 ret i32 %4 14195} 14196 14197define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14198; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: 14199; VLX: # %bb.0: # %entry 14200; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14201; VLX-NEXT: kmovd %k0, %eax 14202; VLX-NEXT: vzeroupper 14203; VLX-NEXT: retq 14204; 14205; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: 14206; NoVLX: # %bb.0: # %entry 14207; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14208; NoVLX-NEXT: kmovw %k0, %eax 14209; NoVLX-NEXT: vzeroupper 14210; NoVLX-NEXT: retq 14211entry: 14212 %0 = bitcast <8 x i64> %__a to <8 x i64> 14213 %load = load <8 x i64>, ptr %__b 14214 %1 = bitcast <8 x i64> %load to <8 x i64> 14215 %2 = icmp sge <8 x i64> %0, %1 14216 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14217 %4 = bitcast <32 x i1> %3 to i32 14218 ret i32 %4 14219} 14220 14221define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14222; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: 14223; VLX: # %bb.0: # %entry 14224; VLX-NEXT: kmovd %edi, %k1 14225; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14226; VLX-NEXT: kmovd %k0, %eax 14227; VLX-NEXT: vzeroupper 14228; VLX-NEXT: retq 14229; 14230; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: 14231; NoVLX: # %bb.0: # %entry 14232; NoVLX-NEXT: kmovw %edi, %k1 14233; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14234; NoVLX-NEXT: kmovw %k0, %eax 14235; NoVLX-NEXT: vzeroupper 14236; NoVLX-NEXT: retq 14237entry: 14238 %0 = bitcast <8 x i64> %__a to <8 x i64> 14239 %1 = bitcast <8 x i64> %__b to <8 x i64> 14240 %2 = icmp sge <8 x i64> %0, %1 14241 %3 = bitcast i8 %__u to <8 x i1> 14242 %4 = and <8 x i1> %2, %3 14243 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14244 %6 = bitcast <32 x i1> %5 to i32 14245 ret i32 %6 14246} 14247 14248define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14249; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: 14250; VLX: # %bb.0: # %entry 14251; VLX-NEXT: kmovd %edi, %k1 14252; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14253; VLX-NEXT: kmovd %k0, %eax 14254; VLX-NEXT: vzeroupper 14255; VLX-NEXT: retq 14256; 14257; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: 14258; NoVLX: # %bb.0: # %entry 14259; NoVLX-NEXT: kmovw %edi, %k1 14260; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14261; NoVLX-NEXT: kmovw %k0, %eax 14262; NoVLX-NEXT: vzeroupper 14263; NoVLX-NEXT: retq 14264entry: 14265 %0 = bitcast <8 x i64> %__a to <8 x i64> 14266 %load = load <8 x i64>, ptr %__b 14267 %1 = bitcast <8 x i64> %load to <8 x i64> 14268 %2 = icmp sge <8 x i64> %0, %1 14269 %3 = bitcast i8 %__u to <8 x i1> 14270 %4 = and <8 x i1> %2, %3 14271 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14272 %6 = bitcast <32 x i1> %5 to i32 14273 ret i32 %6 14274} 14275 14276 14277define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14278; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14279; VLX: # %bb.0: # %entry 14280; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14281; VLX-NEXT: kmovd %k0, %eax 14282; VLX-NEXT: vzeroupper 14283; VLX-NEXT: retq 14284; 14285; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14286; NoVLX: # %bb.0: # %entry 14287; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14288; NoVLX-NEXT: kmovw %k0, %eax 14289; NoVLX-NEXT: vzeroupper 14290; NoVLX-NEXT: retq 14291entry: 14292 %0 = bitcast <8 x i64> %__a to <8 x i64> 14293 %load = load i64, ptr %__b 14294 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14295 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14296 %2 = icmp sge <8 x i64> %0, %1 14297 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14298 %4 = bitcast <32 x i1> %3 to i32 14299 ret i32 %4 14300} 14301 14302define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14303; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14304; VLX: # %bb.0: # %entry 14305; VLX-NEXT: kmovd %edi, %k1 14306; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14307; VLX-NEXT: kmovd %k0, %eax 14308; VLX-NEXT: vzeroupper 14309; VLX-NEXT: retq 14310; 14311; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: 14312; NoVLX: # %bb.0: # %entry 14313; NoVLX-NEXT: kmovw %edi, %k1 14314; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14315; NoVLX-NEXT: kmovw %k0, %eax 14316; NoVLX-NEXT: vzeroupper 14317; NoVLX-NEXT: retq 14318entry: 14319 %0 = bitcast <8 x i64> %__a to <8 x i64> 14320 %load = load i64, ptr %__b 14321 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14322 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14323 %2 = icmp sge <8 x i64> %0, %1 14324 %3 = bitcast i8 %__u to <8 x i1> 14325 %4 = and <8 x i1> %3, %2 14326 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14327 %6 = bitcast <32 x i1> %5 to i32 14328 ret i32 %6 14329} 14330 14331 14332define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14333; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: 14334; VLX: # %bb.0: # %entry 14335; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14336; VLX-NEXT: kmovq %k0, %rax 14337; VLX-NEXT: vzeroupper 14338; VLX-NEXT: retq 14339; 14340; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: 14341; NoVLX: # %bb.0: # %entry 14342; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 14343; NoVLX-NEXT: kmovw %k0, %eax 14344; NoVLX-NEXT: vzeroupper 14345; NoVLX-NEXT: retq 14346entry: 14347 %0 = bitcast <8 x i64> %__a to <8 x i64> 14348 %1 = bitcast <8 x i64> %__b to <8 x i64> 14349 %2 = icmp sge <8 x i64> %0, %1 14350 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14351 %4 = bitcast <64 x i1> %3 to i64 14352 ret i64 %4 14353} 14354 14355define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14356; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: 14357; VLX: # %bb.0: # %entry 14358; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14359; VLX-NEXT: kmovq %k0, %rax 14360; VLX-NEXT: vzeroupper 14361; VLX-NEXT: retq 14362; 14363; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: 14364; NoVLX: # %bb.0: # %entry 14365; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 14366; NoVLX-NEXT: kmovw %k0, %eax 14367; NoVLX-NEXT: vzeroupper 14368; NoVLX-NEXT: retq 14369entry: 14370 %0 = bitcast <8 x i64> %__a to <8 x i64> 14371 %load = load <8 x i64>, ptr %__b 14372 %1 = bitcast <8 x i64> %load to <8 x i64> 14373 %2 = icmp sge <8 x i64> %0, %1 14374 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14375 %4 = bitcast <64 x i1> %3 to i64 14376 ret i64 %4 14377} 14378 14379define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 14380; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: 14381; VLX: # %bb.0: # %entry 14382; VLX-NEXT: kmovd %edi, %k1 14383; VLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14384; VLX-NEXT: kmovq %k0, %rax 14385; VLX-NEXT: vzeroupper 14386; VLX-NEXT: retq 14387; 14388; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: 14389; NoVLX: # %bb.0: # %entry 14390; NoVLX-NEXT: kmovw %edi, %k1 14391; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} 14392; NoVLX-NEXT: kmovw %k0, %eax 14393; NoVLX-NEXT: vzeroupper 14394; NoVLX-NEXT: retq 14395entry: 14396 %0 = bitcast <8 x i64> %__a to <8 x i64> 14397 %1 = bitcast <8 x i64> %__b to <8 x i64> 14398 %2 = icmp sge <8 x i64> %0, %1 14399 %3 = bitcast i8 %__u to <8 x i1> 14400 %4 = and <8 x i1> %2, %3 14401 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14402 %6 = bitcast <64 x i1> %5 to i64 14403 ret i64 %6 14404} 14405 14406define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14407; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: 14408; VLX: # %bb.0: # %entry 14409; VLX-NEXT: kmovd %edi, %k1 14410; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14411; VLX-NEXT: kmovq %k0, %rax 14412; VLX-NEXT: vzeroupper 14413; VLX-NEXT: retq 14414; 14415; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: 14416; NoVLX: # %bb.0: # %entry 14417; NoVLX-NEXT: kmovw %edi, %k1 14418; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} 14419; NoVLX-NEXT: kmovw %k0, %eax 14420; NoVLX-NEXT: vzeroupper 14421; NoVLX-NEXT: retq 14422entry: 14423 %0 = bitcast <8 x i64> %__a to <8 x i64> 14424 %load = load <8 x i64>, ptr %__b 14425 %1 = bitcast <8 x i64> %load to <8 x i64> 14426 %2 = icmp sge <8 x i64> %0, %1 14427 %3 = bitcast i8 %__u to <8 x i1> 14428 %4 = and <8 x i1> %2, %3 14429 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14430 %6 = bitcast <64 x i1> %5 to i64 14431 ret i64 %6 14432} 14433 14434 14435define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 14436; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14437; VLX: # %bb.0: # %entry 14438; VLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14439; VLX-NEXT: kmovq %k0, %rax 14440; VLX-NEXT: vzeroupper 14441; VLX-NEXT: retq 14442; 14443; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14444; NoVLX: # %bb.0: # %entry 14445; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 14446; NoVLX-NEXT: kmovw %k0, %eax 14447; NoVLX-NEXT: vzeroupper 14448; NoVLX-NEXT: retq 14449entry: 14450 %0 = bitcast <8 x i64> %__a to <8 x i64> 14451 %load = load i64, ptr %__b 14452 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14453 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14454 %2 = icmp sge <8 x i64> %0, %1 14455 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14456 %4 = bitcast <64 x i1> %3 to i64 14457 ret i64 %4 14458} 14459 14460define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 14461; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14462; VLX: # %bb.0: # %entry 14463; VLX-NEXT: kmovd %edi, %k1 14464; VLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14465; VLX-NEXT: kmovq %k0, %rax 14466; VLX-NEXT: vzeroupper 14467; VLX-NEXT: retq 14468; 14469; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: 14470; NoVLX: # %bb.0: # %entry 14471; NoVLX-NEXT: kmovw %edi, %k1 14472; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} 14473; NoVLX-NEXT: kmovw %k0, %eax 14474; NoVLX-NEXT: vzeroupper 14475; NoVLX-NEXT: retq 14476entry: 14477 %0 = bitcast <8 x i64> %__a to <8 x i64> 14478 %load = load i64, ptr %__b 14479 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 14480 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 14481 %2 = icmp sge <8 x i64> %0, %1 14482 %3 = bitcast i8 %__u to <8 x i1> 14483 %4 = and <8 x i1> %3, %2 14484 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14485 %6 = bitcast <64 x i1> %5 to i64 14486 ret i64 %6 14487} 14488 14489 14490define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14491; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: 14492; VLX: # %bb.0: # %entry 14493; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 14494; VLX-NEXT: kmovd %k0, %eax 14495; VLX-NEXT: retq 14496; 14497; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: 14498; NoVLX: # %bb.0: # %entry 14499; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14500; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14501; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14502; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14503; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14504; NoVLX-NEXT: kmovw %k0, %eax 14505; NoVLX-NEXT: vzeroupper 14506; NoVLX-NEXT: retq 14507entry: 14508 %0 = bitcast <2 x i64> %__a to <16 x i8> 14509 %1 = bitcast <2 x i64> %__b to <16 x i8> 14510 %2 = icmp ult <16 x i8> %0, %1 14511 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14512 %4 = bitcast <32 x i1> %3 to i32 14513 ret i32 %4 14514} 14515 14516define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 14517; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: 14518; VLX: # %bb.0: # %entry 14519; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 14520; VLX-NEXT: kmovd %k0, %eax 14521; VLX-NEXT: retq 14522; 14523; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: 14524; NoVLX: # %bb.0: # %entry 14525; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 14526; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14527; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14528; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14529; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14530; NoVLX-NEXT: kmovw %k0, %eax 14531; NoVLX-NEXT: vzeroupper 14532; NoVLX-NEXT: retq 14533entry: 14534 %0 = bitcast <2 x i64> %__a to <16 x i8> 14535 %load = load <2 x i64>, ptr %__b 14536 %1 = bitcast <2 x i64> %load to <16 x i8> 14537 %2 = icmp ult <16 x i8> %0, %1 14538 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14539 %4 = bitcast <32 x i1> %3 to i32 14540 ret i32 %4 14541} 14542 14543define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14544; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: 14545; VLX: # %bb.0: # %entry 14546; VLX-NEXT: kmovd %edi, %k1 14547; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} 14548; VLX-NEXT: kmovd %k0, %eax 14549; VLX-NEXT: retq 14550; 14551; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: 14552; NoVLX: # %bb.0: # %entry 14553; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14554; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14555; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14556; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14557; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14558; NoVLX-NEXT: kmovw %k0, %eax 14559; NoVLX-NEXT: andl %edi, %eax 14560; NoVLX-NEXT: vzeroupper 14561; NoVLX-NEXT: retq 14562entry: 14563 %0 = bitcast <2 x i64> %__a to <16 x i8> 14564 %1 = bitcast <2 x i64> %__b to <16 x i8> 14565 %2 = icmp ult <16 x i8> %0, %1 14566 %3 = bitcast i16 %__u to <16 x i1> 14567 %4 = and <16 x i1> %2, %3 14568 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14569 %6 = bitcast <32 x i1> %5 to i32 14570 ret i32 %6 14571} 14572 14573define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 14574; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: 14575; VLX: # %bb.0: # %entry 14576; VLX-NEXT: kmovd %edi, %k1 14577; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} 14578; VLX-NEXT: kmovd %k0, %eax 14579; VLX-NEXT: retq 14580; 14581; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: 14582; NoVLX: # %bb.0: # %entry 14583; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 14584; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14585; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14586; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14587; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14588; NoVLX-NEXT: kmovw %k0, %eax 14589; NoVLX-NEXT: andl %edi, %eax 14590; NoVLX-NEXT: vzeroupper 14591; NoVLX-NEXT: retq 14592entry: 14593 %0 = bitcast <2 x i64> %__a to <16 x i8> 14594 %load = load <2 x i64>, ptr %__b 14595 %1 = bitcast <2 x i64> %load to <16 x i8> 14596 %2 = icmp ult <16 x i8> %0, %1 14597 %3 = bitcast i16 %__u to <16 x i1> 14598 %4 = and <16 x i1> %2, %3 14599 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14600 %6 = bitcast <32 x i1> %5 to i32 14601 ret i32 %6 14602} 14603 14604 14605define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14606; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: 14607; VLX: # %bb.0: # %entry 14608; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 14609; VLX-NEXT: kmovq %k0, %rax 14610; VLX-NEXT: retq 14611; 14612; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: 14613; NoVLX: # %bb.0: # %entry 14614; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14615; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14616; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14617; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14618; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14619; NoVLX-NEXT: kmovw %k0, %eax 14620; NoVLX-NEXT: vzeroupper 14621; NoVLX-NEXT: retq 14622entry: 14623 %0 = bitcast <2 x i64> %__a to <16 x i8> 14624 %1 = bitcast <2 x i64> %__b to <16 x i8> 14625 %2 = icmp ult <16 x i8> %0, %1 14626 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14627 %4 = bitcast <64 x i1> %3 to i64 14628 ret i64 %4 14629} 14630 14631define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 14632; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: 14633; VLX: # %bb.0: # %entry 14634; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 14635; VLX-NEXT: kmovq %k0, %rax 14636; VLX-NEXT: retq 14637; 14638; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: 14639; NoVLX: # %bb.0: # %entry 14640; NoVLX-NEXT: vpmaxub (%rdi), %xmm0, %xmm1 14641; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14642; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14643; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14644; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14645; NoVLX-NEXT: kmovw %k0, %eax 14646; NoVLX-NEXT: vzeroupper 14647; NoVLX-NEXT: retq 14648entry: 14649 %0 = bitcast <2 x i64> %__a to <16 x i8> 14650 %load = load <2 x i64>, ptr %__b 14651 %1 = bitcast <2 x i64> %load to <16 x i8> 14652 %2 = icmp ult <16 x i8> %0, %1 14653 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14654 %4 = bitcast <64 x i1> %3 to i64 14655 ret i64 %4 14656} 14657 14658define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14659; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: 14660; VLX: # %bb.0: # %entry 14661; VLX-NEXT: kmovd %edi, %k1 14662; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} 14663; VLX-NEXT: kmovq %k0, %rax 14664; VLX-NEXT: retq 14665; 14666; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: 14667; NoVLX: # %bb.0: # %entry 14668; NoVLX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 14669; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14670; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14671; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14672; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14673; NoVLX-NEXT: kmovw %k0, %eax 14674; NoVLX-NEXT: andl %edi, %eax 14675; NoVLX-NEXT: vzeroupper 14676; NoVLX-NEXT: retq 14677entry: 14678 %0 = bitcast <2 x i64> %__a to <16 x i8> 14679 %1 = bitcast <2 x i64> %__b to <16 x i8> 14680 %2 = icmp ult <16 x i8> %0, %1 14681 %3 = bitcast i16 %__u to <16 x i1> 14682 %4 = and <16 x i1> %2, %3 14683 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14684 %6 = bitcast <64 x i1> %5 to i64 14685 ret i64 %6 14686} 14687 14688define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 14689; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: 14690; VLX: # %bb.0: # %entry 14691; VLX-NEXT: kmovd %edi, %k1 14692; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} 14693; VLX-NEXT: kmovq %k0, %rax 14694; VLX-NEXT: retq 14695; 14696; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: 14697; NoVLX: # %bb.0: # %entry 14698; NoVLX-NEXT: vpmaxub (%rsi), %xmm0, %xmm1 14699; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 14700; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14701; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14702; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14703; NoVLX-NEXT: kmovw %k0, %eax 14704; NoVLX-NEXT: andl %edi, %eax 14705; NoVLX-NEXT: vzeroupper 14706; NoVLX-NEXT: retq 14707entry: 14708 %0 = bitcast <2 x i64> %__a to <16 x i8> 14709 %load = load <2 x i64>, ptr %__b 14710 %1 = bitcast <2 x i64> %load to <16 x i8> 14711 %2 = icmp ult <16 x i8> %0, %1 14712 %3 = bitcast i16 %__u to <16 x i1> 14713 %4 = and <16 x i1> %2, %3 14714 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14715 %6 = bitcast <64 x i1> %5 to i64 14716 ret i64 %6 14717} 14718 14719 14720define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 14721; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: 14722; VLX: # %bb.0: # %entry 14723; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 14724; VLX-NEXT: kmovq %k0, %rax 14725; VLX-NEXT: vzeroupper 14726; VLX-NEXT: retq 14727; 14728; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: 14729; NoVLX: # %bb.0: # %entry 14730; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 14731; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 14732; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14733; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 14734; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 14735; NoVLX-NEXT: kmovw %k0, %ecx 14736; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 14737; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14738; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14739; NoVLX-NEXT: kmovw %k0, %eax 14740; NoVLX-NEXT: shll $16, %eax 14741; NoVLX-NEXT: orl %ecx, %eax 14742; NoVLX-NEXT: vzeroupper 14743; NoVLX-NEXT: retq 14744entry: 14745 %0 = bitcast <4 x i64> %__a to <32 x i8> 14746 %1 = bitcast <4 x i64> %__b to <32 x i8> 14747 %2 = icmp ult <32 x i8> %0, %1 14748 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 14749 %4 = bitcast <64 x i1> %3 to i64 14750 ret i64 %4 14751} 14752 14753define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 14754; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: 14755; VLX: # %bb.0: # %entry 14756; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0 14757; VLX-NEXT: kmovq %k0, %rax 14758; VLX-NEXT: vzeroupper 14759; VLX-NEXT: retq 14760; 14761; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: 14762; NoVLX: # %bb.0: # %entry 14763; NoVLX-NEXT: vpmaxub (%rdi), %ymm0, %ymm1 14764; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 14765; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14766; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 14767; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 14768; NoVLX-NEXT: kmovw %k0, %ecx 14769; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 14770; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14771; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14772; NoVLX-NEXT: kmovw %k0, %eax 14773; NoVLX-NEXT: shll $16, %eax 14774; NoVLX-NEXT: orl %ecx, %eax 14775; NoVLX-NEXT: vzeroupper 14776; NoVLX-NEXT: retq 14777entry: 14778 %0 = bitcast <4 x i64> %__a to <32 x i8> 14779 %load = load <4 x i64>, ptr %__b 14780 %1 = bitcast <4 x i64> %load to <32 x i8> 14781 %2 = icmp ult <32 x i8> %0, %1 14782 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 14783 %4 = bitcast <64 x i1> %3 to i64 14784 ret i64 %4 14785} 14786 14787define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 14788; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: 14789; VLX: # %bb.0: # %entry 14790; VLX-NEXT: kmovd %edi, %k1 14791; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} 14792; VLX-NEXT: kmovq %k0, %rax 14793; VLX-NEXT: vzeroupper 14794; VLX-NEXT: retq 14795; 14796; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: 14797; NoVLX: # %bb.0: # %entry 14798; NoVLX-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 14799; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 14800; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14801; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 14802; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 14803; NoVLX-NEXT: kmovw %k0, %eax 14804; NoVLX-NEXT: andl %edi, %eax 14805; NoVLX-NEXT: shrl $16, %edi 14806; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 14807; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14808; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14809; NoVLX-NEXT: kmovw %k0, %ecx 14810; NoVLX-NEXT: andl %edi, %ecx 14811; NoVLX-NEXT: shll $16, %ecx 14812; NoVLX-NEXT: movzwl %ax, %eax 14813; NoVLX-NEXT: orl %ecx, %eax 14814; NoVLX-NEXT: vzeroupper 14815; NoVLX-NEXT: retq 14816entry: 14817 %0 = bitcast <4 x i64> %__a to <32 x i8> 14818 %1 = bitcast <4 x i64> %__b to <32 x i8> 14819 %2 = icmp ult <32 x i8> %0, %1 14820 %3 = bitcast i32 %__u to <32 x i1> 14821 %4 = and <32 x i1> %2, %3 14822 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 14823 %6 = bitcast <64 x i1> %5 to i64 14824 ret i64 %6 14825} 14826 14827define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 14828; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: 14829; VLX: # %bb.0: # %entry 14830; VLX-NEXT: kmovd %edi, %k1 14831; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1} 14832; VLX-NEXT: kmovq %k0, %rax 14833; VLX-NEXT: vzeroupper 14834; VLX-NEXT: retq 14835; 14836; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: 14837; NoVLX: # %bb.0: # %entry 14838; NoVLX-NEXT: vpmaxub (%rsi), %ymm0, %ymm1 14839; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 14840; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14841; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1 14842; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 14843; NoVLX-NEXT: kmovw %k0, %eax 14844; NoVLX-NEXT: andl %edi, %eax 14845; NoVLX-NEXT: shrl $16, %edi 14846; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0 14847; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 14848; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 14849; NoVLX-NEXT: kmovw %k0, %ecx 14850; NoVLX-NEXT: andl %edi, %ecx 14851; NoVLX-NEXT: shll $16, %ecx 14852; NoVLX-NEXT: movzwl %ax, %eax 14853; NoVLX-NEXT: orl %ecx, %eax 14854; NoVLX-NEXT: vzeroupper 14855; NoVLX-NEXT: retq 14856entry: 14857 %0 = bitcast <4 x i64> %__a to <32 x i8> 14858 %load = load <4 x i64>, ptr %__b 14859 %1 = bitcast <4 x i64> %load to <32 x i8> 14860 %2 = icmp ult <32 x i8> %0, %1 14861 %3 = bitcast i32 %__u to <32 x i1> 14862 %4 = and <32 x i1> %2, %3 14863 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 14864 %6 = bitcast <64 x i1> %5 to i64 14865 ret i64 %6 14866} 14867 14868 14869define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14870; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: 14871; VLX: # %bb.0: # %entry 14872; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 14873; VLX-NEXT: kmovd %k0, %eax 14874; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14875; VLX-NEXT: retq 14876; 14877; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: 14878; NoVLX: # %bb.0: # %entry 14879; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 14880; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 14881; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14882; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 14883; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 14884; NoVLX-NEXT: kmovw %k0, %eax 14885; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14886; NoVLX-NEXT: vzeroupper 14887; NoVLX-NEXT: retq 14888entry: 14889 %0 = bitcast <2 x i64> %__a to <8 x i16> 14890 %1 = bitcast <2 x i64> %__b to <8 x i16> 14891 %2 = icmp ult <8 x i16> %0, %1 14892 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14893 %4 = bitcast <16 x i1> %3 to i16 14894 ret i16 %4 14895} 14896 14897define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 14898; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: 14899; VLX: # %bb.0: # %entry 14900; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 14901; VLX-NEXT: kmovd %k0, %eax 14902; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14903; VLX-NEXT: retq 14904; 14905; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: 14906; NoVLX: # %bb.0: # %entry 14907; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 14908; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 14909; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14910; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 14911; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 14912; NoVLX-NEXT: kmovw %k0, %eax 14913; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14914; NoVLX-NEXT: vzeroupper 14915; NoVLX-NEXT: retq 14916entry: 14917 %0 = bitcast <2 x i64> %__a to <8 x i16> 14918 %load = load <2 x i64>, ptr %__b 14919 %1 = bitcast <2 x i64> %load to <8 x i16> 14920 %2 = icmp ult <8 x i16> %0, %1 14921 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14922 %4 = bitcast <16 x i1> %3 to i16 14923 ret i16 %4 14924} 14925 14926define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14927; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: 14928; VLX: # %bb.0: # %entry 14929; VLX-NEXT: kmovd %edi, %k1 14930; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 14931; VLX-NEXT: kmovd %k0, %eax 14932; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14933; VLX-NEXT: retq 14934; 14935; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: 14936; NoVLX: # %bb.0: # %entry 14937; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 14938; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 14939; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14940; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 14941; NoVLX-NEXT: kmovw %edi, %k1 14942; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 14943; NoVLX-NEXT: kmovw %k0, %eax 14944; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14945; NoVLX-NEXT: vzeroupper 14946; NoVLX-NEXT: retq 14947entry: 14948 %0 = bitcast <2 x i64> %__a to <8 x i16> 14949 %1 = bitcast <2 x i64> %__b to <8 x i16> 14950 %2 = icmp ult <8 x i16> %0, %1 14951 %3 = bitcast i8 %__u to <8 x i1> 14952 %4 = and <8 x i1> %2, %3 14953 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14954 %6 = bitcast <16 x i1> %5 to i16 14955 ret i16 %6 14956} 14957 14958define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 14959; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: 14960; VLX: # %bb.0: # %entry 14961; VLX-NEXT: kmovd %edi, %k1 14962; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 14963; VLX-NEXT: kmovd %k0, %eax 14964; VLX-NEXT: # kill: def $ax killed $ax killed $eax 14965; VLX-NEXT: retq 14966; 14967; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: 14968; NoVLX: # %bb.0: # %entry 14969; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 14970; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 14971; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 14972; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 14973; NoVLX-NEXT: kmovw %edi, %k1 14974; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 14975; NoVLX-NEXT: kmovw %k0, %eax 14976; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 14977; NoVLX-NEXT: vzeroupper 14978; NoVLX-NEXT: retq 14979entry: 14980 %0 = bitcast <2 x i64> %__a to <8 x i16> 14981 %load = load <2 x i64>, ptr %__b 14982 %1 = bitcast <2 x i64> %load to <8 x i16> 14983 %2 = icmp ult <8 x i16> %0, %1 14984 %3 = bitcast i8 %__u to <8 x i1> 14985 %4 = and <8 x i1> %2, %3 14986 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 14987 %6 = bitcast <16 x i1> %5 to i16 14988 ret i16 %6 14989} 14990 14991 14992define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 14993; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: 14994; VLX: # %bb.0: # %entry 14995; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 14996; VLX-NEXT: kmovd %k0, %eax 14997; VLX-NEXT: retq 14998; 14999; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: 15000; NoVLX: # %bb.0: # %entry 15001; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15002; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15003; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15004; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15005; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15006; NoVLX-NEXT: kmovw %k0, %eax 15007; NoVLX-NEXT: vzeroupper 15008; NoVLX-NEXT: retq 15009entry: 15010 %0 = bitcast <2 x i64> %__a to <8 x i16> 15011 %1 = bitcast <2 x i64> %__b to <8 x i16> 15012 %2 = icmp ult <8 x i16> %0, %1 15013 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15014 %4 = bitcast <32 x i1> %3 to i32 15015 ret i32 %4 15016} 15017 15018define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15019; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: 15020; VLX: # %bb.0: # %entry 15021; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 15022; VLX-NEXT: kmovd %k0, %eax 15023; VLX-NEXT: retq 15024; 15025; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: 15026; NoVLX: # %bb.0: # %entry 15027; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 15028; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15029; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15030; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15031; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15032; NoVLX-NEXT: kmovw %k0, %eax 15033; NoVLX-NEXT: vzeroupper 15034; NoVLX-NEXT: retq 15035entry: 15036 %0 = bitcast <2 x i64> %__a to <8 x i16> 15037 %load = load <2 x i64>, ptr %__b 15038 %1 = bitcast <2 x i64> %load to <8 x i16> 15039 %2 = icmp ult <8 x i16> %0, %1 15040 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15041 %4 = bitcast <32 x i1> %3 to i32 15042 ret i32 %4 15043} 15044 15045define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15046; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: 15047; VLX: # %bb.0: # %entry 15048; VLX-NEXT: kmovd %edi, %k1 15049; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 15050; VLX-NEXT: kmovd %k0, %eax 15051; VLX-NEXT: retq 15052; 15053; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: 15054; NoVLX: # %bb.0: # %entry 15055; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15056; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15057; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15058; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15059; NoVLX-NEXT: kmovw %edi, %k1 15060; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15061; NoVLX-NEXT: kmovw %k0, %eax 15062; NoVLX-NEXT: vzeroupper 15063; NoVLX-NEXT: retq 15064entry: 15065 %0 = bitcast <2 x i64> %__a to <8 x i16> 15066 %1 = bitcast <2 x i64> %__b to <8 x i16> 15067 %2 = icmp ult <8 x i16> %0, %1 15068 %3 = bitcast i8 %__u to <8 x i1> 15069 %4 = and <8 x i1> %2, %3 15070 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15071 %6 = bitcast <32 x i1> %5 to i32 15072 ret i32 %6 15073} 15074 15075define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15076; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: 15077; VLX: # %bb.0: # %entry 15078; VLX-NEXT: kmovd %edi, %k1 15079; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 15080; VLX-NEXT: kmovd %k0, %eax 15081; VLX-NEXT: retq 15082; 15083; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: 15084; NoVLX: # %bb.0: # %entry 15085; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 15086; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15087; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15088; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15089; NoVLX-NEXT: kmovw %edi, %k1 15090; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15091; NoVLX-NEXT: kmovw %k0, %eax 15092; NoVLX-NEXT: vzeroupper 15093; NoVLX-NEXT: retq 15094entry: 15095 %0 = bitcast <2 x i64> %__a to <8 x i16> 15096 %load = load <2 x i64>, ptr %__b 15097 %1 = bitcast <2 x i64> %load to <8 x i16> 15098 %2 = icmp ult <8 x i16> %0, %1 15099 %3 = bitcast i8 %__u to <8 x i1> 15100 %4 = and <8 x i1> %2, %3 15101 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15102 %6 = bitcast <32 x i1> %5 to i32 15103 ret i32 %6 15104} 15105 15106 15107define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15108; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: 15109; VLX: # %bb.0: # %entry 15110; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 15111; VLX-NEXT: kmovq %k0, %rax 15112; VLX-NEXT: retq 15113; 15114; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: 15115; NoVLX: # %bb.0: # %entry 15116; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15117; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15118; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15119; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15120; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15121; NoVLX-NEXT: kmovw %k0, %eax 15122; NoVLX-NEXT: vzeroupper 15123; NoVLX-NEXT: retq 15124entry: 15125 %0 = bitcast <2 x i64> %__a to <8 x i16> 15126 %1 = bitcast <2 x i64> %__b to <8 x i16> 15127 %2 = icmp ult <8 x i16> %0, %1 15128 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15129 %4 = bitcast <64 x i1> %3 to i64 15130 ret i64 %4 15131} 15132 15133define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15134; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: 15135; VLX: # %bb.0: # %entry 15136; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 15137; VLX-NEXT: kmovq %k0, %rax 15138; VLX-NEXT: retq 15139; 15140; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: 15141; NoVLX: # %bb.0: # %entry 15142; NoVLX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm1 15143; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15144; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15145; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15146; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 15147; NoVLX-NEXT: kmovw %k0, %eax 15148; NoVLX-NEXT: vzeroupper 15149; NoVLX-NEXT: retq 15150entry: 15151 %0 = bitcast <2 x i64> %__a to <8 x i16> 15152 %load = load <2 x i64>, ptr %__b 15153 %1 = bitcast <2 x i64> %load to <8 x i16> 15154 %2 = icmp ult <8 x i16> %0, %1 15155 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15156 %4 = bitcast <64 x i1> %3 to i64 15157 ret i64 %4 15158} 15159 15160define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15161; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: 15162; VLX: # %bb.0: # %entry 15163; VLX-NEXT: kmovd %edi, %k1 15164; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} 15165; VLX-NEXT: kmovq %k0, %rax 15166; VLX-NEXT: retq 15167; 15168; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: 15169; NoVLX: # %bb.0: # %entry 15170; NoVLX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 15171; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15172; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15173; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15174; NoVLX-NEXT: kmovw %edi, %k1 15175; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15176; NoVLX-NEXT: kmovw %k0, %eax 15177; NoVLX-NEXT: vzeroupper 15178; NoVLX-NEXT: retq 15179entry: 15180 %0 = bitcast <2 x i64> %__a to <8 x i16> 15181 %1 = bitcast <2 x i64> %__b to <8 x i16> 15182 %2 = icmp ult <8 x i16> %0, %1 15183 %3 = bitcast i8 %__u to <8 x i1> 15184 %4 = and <8 x i1> %2, %3 15185 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15186 %6 = bitcast <64 x i1> %5 to i64 15187 ret i64 %6 15188} 15189 15190define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15191; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: 15192; VLX: # %bb.0: # %entry 15193; VLX-NEXT: kmovd %edi, %k1 15194; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} 15195; VLX-NEXT: kmovq %k0, %rax 15196; VLX-NEXT: retq 15197; 15198; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: 15199; NoVLX: # %bb.0: # %entry 15200; NoVLX-NEXT: vpmaxuw (%rsi), %xmm0, %xmm1 15201; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 15202; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15203; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 15204; NoVLX-NEXT: kmovw %edi, %k1 15205; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} 15206; NoVLX-NEXT: kmovw %k0, %eax 15207; NoVLX-NEXT: vzeroupper 15208; NoVLX-NEXT: retq 15209entry: 15210 %0 = bitcast <2 x i64> %__a to <8 x i16> 15211 %load = load <2 x i64>, ptr %__b 15212 %1 = bitcast <2 x i64> %load to <8 x i16> 15213 %2 = icmp ult <8 x i16> %0, %1 15214 %3 = bitcast i8 %__u to <8 x i1> 15215 %4 = and <8 x i1> %2, %3 15216 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 15217 %6 = bitcast <64 x i1> %5 to i64 15218 ret i64 %6 15219} 15220 15221 15222define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15223; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: 15224; VLX: # %bb.0: # %entry 15225; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 15226; VLX-NEXT: kmovd %k0, %eax 15227; VLX-NEXT: vzeroupper 15228; VLX-NEXT: retq 15229; 15230; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: 15231; NoVLX: # %bb.0: # %entry 15232; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15233; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15234; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15235; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15236; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15237; NoVLX-NEXT: kmovw %k0, %eax 15238; NoVLX-NEXT: vzeroupper 15239; NoVLX-NEXT: retq 15240entry: 15241 %0 = bitcast <4 x i64> %__a to <16 x i16> 15242 %1 = bitcast <4 x i64> %__b to <16 x i16> 15243 %2 = icmp ult <16 x i16> %0, %1 15244 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15245 %4 = bitcast <32 x i1> %3 to i32 15246 ret i32 %4 15247} 15248 15249define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 15250; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: 15251; VLX: # %bb.0: # %entry 15252; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 15253; VLX-NEXT: kmovd %k0, %eax 15254; VLX-NEXT: vzeroupper 15255; VLX-NEXT: retq 15256; 15257; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: 15258; NoVLX: # %bb.0: # %entry 15259; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 15260; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15261; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15262; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15263; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15264; NoVLX-NEXT: kmovw %k0, %eax 15265; NoVLX-NEXT: vzeroupper 15266; NoVLX-NEXT: retq 15267entry: 15268 %0 = bitcast <4 x i64> %__a to <16 x i16> 15269 %load = load <4 x i64>, ptr %__b 15270 %1 = bitcast <4 x i64> %load to <16 x i16> 15271 %2 = icmp ult <16 x i16> %0, %1 15272 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15273 %4 = bitcast <32 x i1> %3 to i32 15274 ret i32 %4 15275} 15276 15277define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15278; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: 15279; VLX: # %bb.0: # %entry 15280; VLX-NEXT: kmovd %edi, %k1 15281; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} 15282; VLX-NEXT: kmovd %k0, %eax 15283; VLX-NEXT: vzeroupper 15284; VLX-NEXT: retq 15285; 15286; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: 15287; NoVLX: # %bb.0: # %entry 15288; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15289; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15290; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15291; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15292; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15293; NoVLX-NEXT: kmovw %k0, %eax 15294; NoVLX-NEXT: andl %edi, %eax 15295; NoVLX-NEXT: vzeroupper 15296; NoVLX-NEXT: retq 15297entry: 15298 %0 = bitcast <4 x i64> %__a to <16 x i16> 15299 %1 = bitcast <4 x i64> %__b to <16 x i16> 15300 %2 = icmp ult <16 x i16> %0, %1 15301 %3 = bitcast i16 %__u to <16 x i1> 15302 %4 = and <16 x i1> %2, %3 15303 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15304 %6 = bitcast <32 x i1> %5 to i32 15305 ret i32 %6 15306} 15307 15308define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 15309; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: 15310; VLX: # %bb.0: # %entry 15311; VLX-NEXT: kmovd %edi, %k1 15312; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} 15313; VLX-NEXT: kmovd %k0, %eax 15314; VLX-NEXT: vzeroupper 15315; VLX-NEXT: retq 15316; 15317; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: 15318; NoVLX: # %bb.0: # %entry 15319; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15320; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15321; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15322; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15323; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15324; NoVLX-NEXT: kmovw %k0, %eax 15325; NoVLX-NEXT: andl %edi, %eax 15326; NoVLX-NEXT: vzeroupper 15327; NoVLX-NEXT: retq 15328entry: 15329 %0 = bitcast <4 x i64> %__a to <16 x i16> 15330 %load = load <4 x i64>, ptr %__b 15331 %1 = bitcast <4 x i64> %load to <16 x i16> 15332 %2 = icmp ult <16 x i16> %0, %1 15333 %3 = bitcast i16 %__u to <16 x i1> 15334 %4 = and <16 x i1> %2, %3 15335 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15336 %6 = bitcast <32 x i1> %5 to i32 15337 ret i32 %6 15338} 15339 15340 15341define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15342; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: 15343; VLX: # %bb.0: # %entry 15344; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 15345; VLX-NEXT: kmovq %k0, %rax 15346; VLX-NEXT: vzeroupper 15347; VLX-NEXT: retq 15348; 15349; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: 15350; NoVLX: # %bb.0: # %entry 15351; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15352; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15353; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15354; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15355; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15356; NoVLX-NEXT: kmovw %k0, %eax 15357; NoVLX-NEXT: vzeroupper 15358; NoVLX-NEXT: retq 15359entry: 15360 %0 = bitcast <4 x i64> %__a to <16 x i16> 15361 %1 = bitcast <4 x i64> %__b to <16 x i16> 15362 %2 = icmp ult <16 x i16> %0, %1 15363 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15364 %4 = bitcast <64 x i1> %3 to i64 15365 ret i64 %4 15366} 15367 15368define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 15369; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: 15370; VLX: # %bb.0: # %entry 15371; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 15372; VLX-NEXT: kmovq %k0, %rax 15373; VLX-NEXT: vzeroupper 15374; VLX-NEXT: retq 15375; 15376; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: 15377; NoVLX: # %bb.0: # %entry 15378; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 15379; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15380; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15381; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15382; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15383; NoVLX-NEXT: kmovw %k0, %eax 15384; NoVLX-NEXT: vzeroupper 15385; NoVLX-NEXT: retq 15386entry: 15387 %0 = bitcast <4 x i64> %__a to <16 x i16> 15388 %load = load <4 x i64>, ptr %__b 15389 %1 = bitcast <4 x i64> %load to <16 x i16> 15390 %2 = icmp ult <16 x i16> %0, %1 15391 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15392 %4 = bitcast <64 x i1> %3 to i64 15393 ret i64 %4 15394} 15395 15396define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 15397; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: 15398; VLX: # %bb.0: # %entry 15399; VLX-NEXT: kmovd %edi, %k1 15400; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} 15401; VLX-NEXT: kmovq %k0, %rax 15402; VLX-NEXT: vzeroupper 15403; VLX-NEXT: retq 15404; 15405; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: 15406; NoVLX: # %bb.0: # %entry 15407; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15408; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15409; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15410; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15411; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15412; NoVLX-NEXT: kmovw %k0, %eax 15413; NoVLX-NEXT: andl %edi, %eax 15414; NoVLX-NEXT: vzeroupper 15415; NoVLX-NEXT: retq 15416entry: 15417 %0 = bitcast <4 x i64> %__a to <16 x i16> 15418 %1 = bitcast <4 x i64> %__b to <16 x i16> 15419 %2 = icmp ult <16 x i16> %0, %1 15420 %3 = bitcast i16 %__u to <16 x i1> 15421 %4 = and <16 x i1> %2, %3 15422 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15423 %6 = bitcast <64 x i1> %5 to i64 15424 ret i64 %6 15425} 15426 15427define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 15428; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: 15429; VLX: # %bb.0: # %entry 15430; VLX-NEXT: kmovd %edi, %k1 15431; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} 15432; VLX-NEXT: kmovq %k0, %rax 15433; VLX-NEXT: vzeroupper 15434; VLX-NEXT: retq 15435; 15436; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: 15437; NoVLX: # %bb.0: # %entry 15438; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15439; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15440; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15441; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15442; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15443; NoVLX-NEXT: kmovw %k0, %eax 15444; NoVLX-NEXT: andl %edi, %eax 15445; NoVLX-NEXT: vzeroupper 15446; NoVLX-NEXT: retq 15447entry: 15448 %0 = bitcast <4 x i64> %__a to <16 x i16> 15449 %load = load <4 x i64>, ptr %__b 15450 %1 = bitcast <4 x i64> %load to <16 x i16> 15451 %2 = icmp ult <16 x i16> %0, %1 15452 %3 = bitcast i16 %__u to <16 x i1> 15453 %4 = and <16 x i1> %2, %3 15454 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 15455 %6 = bitcast <64 x i1> %5 to i64 15456 ret i64 %6 15457} 15458 15459 15460define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 15461; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: 15462; VLX: # %bb.0: # %entry 15463; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 15464; VLX-NEXT: kmovq %k0, %rax 15465; VLX-NEXT: vzeroupper 15466; VLX-NEXT: retq 15467; 15468; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: 15469; NoVLX: # %bb.0: # %entry 15470; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 15471; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 15472; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 15473; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 15474; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 15475; NoVLX-NEXT: kmovw %k0, %ecx 15476; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 15477; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15478; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15479; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15480; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15481; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15482; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15483; NoVLX-NEXT: kmovw %k0, %eax 15484; NoVLX-NEXT: shll $16, %eax 15485; NoVLX-NEXT: orl %ecx, %eax 15486; NoVLX-NEXT: vzeroupper 15487; NoVLX-NEXT: retq 15488entry: 15489 %0 = bitcast <8 x i64> %__a to <32 x i16> 15490 %1 = bitcast <8 x i64> %__b to <32 x i16> 15491 %2 = icmp ult <32 x i16> %0, %1 15492 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15493 %4 = bitcast <64 x i1> %3 to i64 15494 ret i64 %4 15495} 15496 15497define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 15498; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: 15499; VLX: # %bb.0: # %entry 15500; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0 15501; VLX-NEXT: kmovq %k0, %rax 15502; VLX-NEXT: vzeroupper 15503; VLX-NEXT: retq 15504; 15505; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: 15506; NoVLX: # %bb.0: # %entry 15507; NoVLX-NEXT: vpmaxuw (%rdi), %ymm0, %ymm1 15508; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 15509; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 15510; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 15511; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15512; NoVLX-NEXT: kmovw %k0, %ecx 15513; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15514; NoVLX-NEXT: vpmaxuw 32(%rdi), %ymm0, %ymm1 15515; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15516; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15517; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15518; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15519; NoVLX-NEXT: kmovw %k0, %eax 15520; NoVLX-NEXT: shll $16, %eax 15521; NoVLX-NEXT: orl %ecx, %eax 15522; NoVLX-NEXT: vzeroupper 15523; NoVLX-NEXT: retq 15524entry: 15525 %0 = bitcast <8 x i64> %__a to <32 x i16> 15526 %load = load <8 x i64>, ptr %__b 15527 %1 = bitcast <8 x i64> %load to <32 x i16> 15528 %2 = icmp ult <32 x i16> %0, %1 15529 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15530 %4 = bitcast <64 x i1> %3 to i64 15531 ret i64 %4 15532} 15533 15534define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 15535; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: 15536; VLX: # %bb.0: # %entry 15537; VLX-NEXT: kmovd %edi, %k1 15538; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} 15539; VLX-NEXT: kmovq %k0, %rax 15540; VLX-NEXT: vzeroupper 15541; VLX-NEXT: retq 15542; 15543; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: 15544; NoVLX: # %bb.0: # %entry 15545; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm2 15546; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm2 15547; NoVLX-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 15548; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 15549; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 15550; NoVLX-NEXT: kmovw %k0, %eax 15551; NoVLX-NEXT: andl %edi, %eax 15552; NoVLX-NEXT: shrl $16, %edi 15553; NoVLX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 15554; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15555; NoVLX-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 15556; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15557; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15558; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15559; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15560; NoVLX-NEXT: kmovw %k0, %ecx 15561; NoVLX-NEXT: andl %edi, %ecx 15562; NoVLX-NEXT: shll $16, %ecx 15563; NoVLX-NEXT: movzwl %ax, %eax 15564; NoVLX-NEXT: orl %ecx, %eax 15565; NoVLX-NEXT: vzeroupper 15566; NoVLX-NEXT: retq 15567entry: 15568 %0 = bitcast <8 x i64> %__a to <32 x i16> 15569 %1 = bitcast <8 x i64> %__b to <32 x i16> 15570 %2 = icmp ult <32 x i16> %0, %1 15571 %3 = bitcast i32 %__u to <32 x i1> 15572 %4 = and <32 x i1> %2, %3 15573 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15574 %6 = bitcast <64 x i1> %5 to i64 15575 ret i64 %6 15576} 15577 15578define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 15579; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: 15580; VLX: # %bb.0: # %entry 15581; VLX-NEXT: kmovd %edi, %k1 15582; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1} 15583; VLX-NEXT: kmovq %k0, %rax 15584; VLX-NEXT: vzeroupper 15585; VLX-NEXT: retq 15586; 15587; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: 15588; NoVLX: # %bb.0: # %entry 15589; NoVLX-NEXT: vpmaxuw (%rsi), %ymm0, %ymm1 15590; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm1 15591; NoVLX-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 15592; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 15593; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 15594; NoVLX-NEXT: kmovw %k0, %eax 15595; NoVLX-NEXT: andl %edi, %eax 15596; NoVLX-NEXT: shrl $16, %edi 15597; NoVLX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 15598; NoVLX-NEXT: vpmaxuw 32(%rsi), %ymm0, %ymm1 15599; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 15600; NoVLX-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 15601; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 15602; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 15603; NoVLX-NEXT: kmovw %k0, %ecx 15604; NoVLX-NEXT: andl %edi, %ecx 15605; NoVLX-NEXT: shll $16, %ecx 15606; NoVLX-NEXT: movzwl %ax, %eax 15607; NoVLX-NEXT: orl %ecx, %eax 15608; NoVLX-NEXT: vzeroupper 15609; NoVLX-NEXT: retq 15610entry: 15611 %0 = bitcast <8 x i64> %__a to <32 x i16> 15612 %load = load <8 x i64>, ptr %__b 15613 %1 = bitcast <8 x i64> %load to <32 x i16> 15614 %2 = icmp ult <32 x i16> %0, %1 15615 %3 = bitcast i32 %__u to <32 x i1> 15616 %4 = and <32 x i1> %2, %3 15617 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> 15618 %6 = bitcast <64 x i1> %5 to i64 15619 ret i64 %6 15620} 15621 15622 15623define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15624; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: 15625; VLX: # %bb.0: # %entry 15626; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 15627; VLX-NEXT: kmovd %k0, %eax 15628; VLX-NEXT: # kill: def $al killed $al killed $eax 15629; VLX-NEXT: retq 15630; 15631; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: 15632; NoVLX: # %bb.0: # %entry 15633; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 15634; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15635; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15636; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15637; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15638; NoVLX-NEXT: kmovw %k0, %eax 15639; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15640; NoVLX-NEXT: vzeroupper 15641; NoVLX-NEXT: retq 15642entry: 15643 %0 = bitcast <2 x i64> %__a to <4 x i32> 15644 %1 = bitcast <2 x i64> %__b to <4 x i32> 15645 %2 = icmp ult <4 x i32> %0, %1 15646 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15647 %4 = bitcast <8 x i1> %3 to i8 15648 ret i8 %4 15649} 15650 15651define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15652; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: 15653; VLX: # %bb.0: # %entry 15654; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 15655; VLX-NEXT: kmovd %k0, %eax 15656; VLX-NEXT: # kill: def $al killed $al killed $eax 15657; VLX-NEXT: retq 15658; 15659; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: 15660; NoVLX: # %bb.0: # %entry 15661; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15662; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 15663; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15664; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15665; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15666; NoVLX-NEXT: kmovw %k0, %eax 15667; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15668; NoVLX-NEXT: vzeroupper 15669; NoVLX-NEXT: retq 15670entry: 15671 %0 = bitcast <2 x i64> %__a to <4 x i32> 15672 %load = load <2 x i64>, ptr %__b 15673 %1 = bitcast <2 x i64> %load to <4 x i32> 15674 %2 = icmp ult <4 x i32> %0, %1 15675 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15676 %4 = bitcast <8 x i1> %3 to i8 15677 ret i8 %4 15678} 15679 15680define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15681; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: 15682; VLX: # %bb.0: # %entry 15683; VLX-NEXT: kmovd %edi, %k1 15684; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 15685; VLX-NEXT: kmovd %k0, %eax 15686; VLX-NEXT: # kill: def $al killed $al killed $eax 15687; VLX-NEXT: retq 15688; 15689; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: 15690; NoVLX: # %bb.0: # %entry 15691; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 15692; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15693; NoVLX-NEXT: kmovw %edi, %k1 15694; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 15695; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15696; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15697; NoVLX-NEXT: kmovw %k0, %eax 15698; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15699; NoVLX-NEXT: vzeroupper 15700; NoVLX-NEXT: retq 15701entry: 15702 %0 = bitcast <2 x i64> %__a to <4 x i32> 15703 %1 = bitcast <2 x i64> %__b to <4 x i32> 15704 %2 = icmp ult <4 x i32> %0, %1 15705 %3 = bitcast i8 %__u to <8 x i1> 15706 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15707 %4 = and <4 x i1> %2, %extract.i 15708 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15709 %6 = bitcast <8 x i1> %5 to i8 15710 ret i8 %6 15711} 15712 15713define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15714; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: 15715; VLX: # %bb.0: # %entry 15716; VLX-NEXT: kmovd %edi, %k1 15717; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 15718; VLX-NEXT: kmovd %k0, %eax 15719; VLX-NEXT: # kill: def $al killed $al killed $eax 15720; VLX-NEXT: retq 15721; 15722; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: 15723; NoVLX: # %bb.0: # %entry 15724; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15725; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 15726; NoVLX-NEXT: kmovw %edi, %k1 15727; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 15728; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15729; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15730; NoVLX-NEXT: kmovw %k0, %eax 15731; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15732; NoVLX-NEXT: vzeroupper 15733; NoVLX-NEXT: retq 15734entry: 15735 %0 = bitcast <2 x i64> %__a to <4 x i32> 15736 %load = load <2 x i64>, ptr %__b 15737 %1 = bitcast <2 x i64> %load to <4 x i32> 15738 %2 = icmp ult <4 x i32> %0, %1 15739 %3 = bitcast i8 %__u to <8 x i1> 15740 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15741 %4 = and <4 x i1> %2, %extract.i 15742 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15743 %6 = bitcast <8 x i1> %5 to i8 15744 ret i8 %6 15745} 15746 15747 15748define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15749; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: 15750; VLX: # %bb.0: # %entry 15751; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 15752; VLX-NEXT: kmovd %k0, %eax 15753; VLX-NEXT: # kill: def $al killed $al killed $eax 15754; VLX-NEXT: retq 15755; 15756; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: 15757; NoVLX: # %bb.0: # %entry 15758; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15759; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 15760; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15761; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15762; NoVLX-NEXT: kmovw %k0, %eax 15763; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15764; NoVLX-NEXT: vzeroupper 15765; NoVLX-NEXT: retq 15766entry: 15767 %0 = bitcast <2 x i64> %__a to <4 x i32> 15768 %load = load i32, ptr %__b 15769 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 15770 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 15771 %2 = icmp ult <4 x i32> %0, %1 15772 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15773 %4 = bitcast <8 x i1> %3 to i8 15774 ret i8 %4 15775} 15776 15777define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15778; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: 15779; VLX: # %bb.0: # %entry 15780; VLX-NEXT: kmovd %edi, %k1 15781; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 15782; VLX-NEXT: kmovd %k0, %eax 15783; VLX-NEXT: # kill: def $al killed $al killed $eax 15784; VLX-NEXT: retq 15785; 15786; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: 15787; NoVLX: # %bb.0: # %entry 15788; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15789; NoVLX-NEXT: kmovw %edi, %k1 15790; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 15791; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15792; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15793; NoVLX-NEXT: kmovw %k0, %eax 15794; NoVLX-NEXT: # kill: def $al killed $al killed $eax 15795; NoVLX-NEXT: vzeroupper 15796; NoVLX-NEXT: retq 15797entry: 15798 %0 = bitcast <2 x i64> %__a to <4 x i32> 15799 %load = load i32, ptr %__b 15800 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 15801 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 15802 %2 = icmp ult <4 x i32> %0, %1 15803 %3 = bitcast i8 %__u to <8 x i1> 15804 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15805 %4 = and <4 x i1> %extract.i, %2 15806 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15807 %6 = bitcast <8 x i1> %5 to i8 15808 ret i8 %6 15809} 15810 15811 15812define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15813; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: 15814; VLX: # %bb.0: # %entry 15815; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 15816; VLX-NEXT: kmovd %k0, %eax 15817; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15818; VLX-NEXT: retq 15819; 15820; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: 15821; NoVLX: # %bb.0: # %entry 15822; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 15823; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15824; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15825; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15826; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15827; NoVLX-NEXT: kmovw %k0, %eax 15828; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15829; NoVLX-NEXT: vzeroupper 15830; NoVLX-NEXT: retq 15831entry: 15832 %0 = bitcast <2 x i64> %__a to <4 x i32> 15833 %1 = bitcast <2 x i64> %__b to <4 x i32> 15834 %2 = icmp ult <4 x i32> %0, %1 15835 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15836 %4 = bitcast <16 x i1> %3 to i16 15837 ret i16 %4 15838} 15839 15840define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15841; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: 15842; VLX: # %bb.0: # %entry 15843; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 15844; VLX-NEXT: kmovd %k0, %eax 15845; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15846; VLX-NEXT: retq 15847; 15848; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: 15849; NoVLX: # %bb.0: # %entry 15850; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15851; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 15852; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 15853; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15854; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15855; NoVLX-NEXT: kmovw %k0, %eax 15856; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15857; NoVLX-NEXT: vzeroupper 15858; NoVLX-NEXT: retq 15859entry: 15860 %0 = bitcast <2 x i64> %__a to <4 x i32> 15861 %load = load <2 x i64>, ptr %__b 15862 %1 = bitcast <2 x i64> %load to <4 x i32> 15863 %2 = icmp ult <4 x i32> %0, %1 15864 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15865 %4 = bitcast <16 x i1> %3 to i16 15866 ret i16 %4 15867} 15868 15869define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 15870; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: 15871; VLX: # %bb.0: # %entry 15872; VLX-NEXT: kmovd %edi, %k1 15873; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 15874; VLX-NEXT: kmovd %k0, %eax 15875; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15876; VLX-NEXT: retq 15877; 15878; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: 15879; NoVLX: # %bb.0: # %entry 15880; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 15881; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15882; NoVLX-NEXT: kmovw %edi, %k1 15883; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 15884; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15885; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15886; NoVLX-NEXT: kmovw %k0, %eax 15887; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15888; NoVLX-NEXT: vzeroupper 15889; NoVLX-NEXT: retq 15890entry: 15891 %0 = bitcast <2 x i64> %__a to <4 x i32> 15892 %1 = bitcast <2 x i64> %__b to <4 x i32> 15893 %2 = icmp ult <4 x i32> %0, %1 15894 %3 = bitcast i8 %__u to <8 x i1> 15895 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15896 %4 = and <4 x i1> %2, %extract.i 15897 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15898 %6 = bitcast <16 x i1> %5 to i16 15899 ret i16 %6 15900} 15901 15902define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15903; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: 15904; VLX: # %bb.0: # %entry 15905; VLX-NEXT: kmovd %edi, %k1 15906; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 15907; VLX-NEXT: kmovd %k0, %eax 15908; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15909; VLX-NEXT: retq 15910; 15911; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: 15912; NoVLX: # %bb.0: # %entry 15913; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15914; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 15915; NoVLX-NEXT: kmovw %edi, %k1 15916; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 15917; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15918; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15919; NoVLX-NEXT: kmovw %k0, %eax 15920; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15921; NoVLX-NEXT: vzeroupper 15922; NoVLX-NEXT: retq 15923entry: 15924 %0 = bitcast <2 x i64> %__a to <4 x i32> 15925 %load = load <2 x i64>, ptr %__b 15926 %1 = bitcast <2 x i64> %load to <4 x i32> 15927 %2 = icmp ult <4 x i32> %0, %1 15928 %3 = bitcast i8 %__u to <8 x i1> 15929 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15930 %4 = and <4 x i1> %2, %extract.i 15931 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15932 %6 = bitcast <16 x i1> %5 to i16 15933 ret i16 %6 15934} 15935 15936 15937define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 15938; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: 15939; VLX: # %bb.0: # %entry 15940; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 15941; VLX-NEXT: kmovd %k0, %eax 15942; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15943; VLX-NEXT: retq 15944; 15945; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: 15946; NoVLX: # %bb.0: # %entry 15947; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15948; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 15949; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15950; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15951; NoVLX-NEXT: kmovw %k0, %eax 15952; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15953; NoVLX-NEXT: vzeroupper 15954; NoVLX-NEXT: retq 15955entry: 15956 %0 = bitcast <2 x i64> %__a to <4 x i32> 15957 %load = load i32, ptr %__b 15958 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 15959 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 15960 %2 = icmp ult <4 x i32> %0, %1 15961 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15962 %4 = bitcast <16 x i1> %3 to i16 15963 ret i16 %4 15964} 15965 15966define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 15967; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: 15968; VLX: # %bb.0: # %entry 15969; VLX-NEXT: kmovd %edi, %k1 15970; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 15971; VLX-NEXT: kmovd %k0, %eax 15972; VLX-NEXT: # kill: def $ax killed $ax killed $eax 15973; VLX-NEXT: retq 15974; 15975; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: 15976; NoVLX: # %bb.0: # %entry 15977; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 15978; NoVLX-NEXT: kmovw %edi, %k1 15979; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 15980; NoVLX-NEXT: kshiftlw $12, %k0, %k0 15981; NoVLX-NEXT: kshiftrw $12, %k0, %k0 15982; NoVLX-NEXT: kmovw %k0, %eax 15983; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 15984; NoVLX-NEXT: vzeroupper 15985; NoVLX-NEXT: retq 15986entry: 15987 %0 = bitcast <2 x i64> %__a to <4 x i32> 15988 %load = load i32, ptr %__b 15989 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 15990 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 15991 %2 = icmp ult <4 x i32> %0, %1 15992 %3 = bitcast i8 %__u to <8 x i1> 15993 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 15994 %4 = and <4 x i1> %extract.i, %2 15995 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 15996 %6 = bitcast <16 x i1> %5 to i16 15997 ret i16 %6 15998} 15999 16000 16001define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16002; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: 16003; VLX: # %bb.0: # %entry 16004; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 16005; VLX-NEXT: kmovd %k0, %eax 16006; VLX-NEXT: retq 16007; 16008; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: 16009; NoVLX: # %bb.0: # %entry 16010; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16011; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16012; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16013; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16014; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16015; NoVLX-NEXT: kmovw %k0, %eax 16016; NoVLX-NEXT: vzeroupper 16017; NoVLX-NEXT: retq 16018entry: 16019 %0 = bitcast <2 x i64> %__a to <4 x i32> 16020 %1 = bitcast <2 x i64> %__b to <4 x i32> 16021 %2 = icmp ult <4 x i32> %0, %1 16022 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16023 %4 = bitcast <32 x i1> %3 to i32 16024 ret i32 %4 16025} 16026 16027define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 16028; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: 16029; VLX: # %bb.0: # %entry 16030; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 16031; VLX-NEXT: kmovd %k0, %eax 16032; VLX-NEXT: retq 16033; 16034; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: 16035; NoVLX: # %bb.0: # %entry 16036; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16037; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 16038; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16039; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16040; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16041; NoVLX-NEXT: kmovw %k0, %eax 16042; NoVLX-NEXT: vzeroupper 16043; NoVLX-NEXT: retq 16044entry: 16045 %0 = bitcast <2 x i64> %__a to <4 x i32> 16046 %load = load <2 x i64>, ptr %__b 16047 %1 = bitcast <2 x i64> %load to <4 x i32> 16048 %2 = icmp ult <4 x i32> %0, %1 16049 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16050 %4 = bitcast <32 x i1> %3 to i32 16051 ret i32 %4 16052} 16053 16054define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16055; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: 16056; VLX: # %bb.0: # %entry 16057; VLX-NEXT: kmovd %edi, %k1 16058; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16059; VLX-NEXT: kmovd %k0, %eax 16060; VLX-NEXT: retq 16061; 16062; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: 16063; NoVLX: # %bb.0: # %entry 16064; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16065; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16066; NoVLX-NEXT: kmovw %edi, %k1 16067; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16068; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16069; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16070; NoVLX-NEXT: kmovw %k0, %eax 16071; NoVLX-NEXT: vzeroupper 16072; NoVLX-NEXT: retq 16073entry: 16074 %0 = bitcast <2 x i64> %__a to <4 x i32> 16075 %1 = bitcast <2 x i64> %__b to <4 x i32> 16076 %2 = icmp ult <4 x i32> %0, %1 16077 %3 = bitcast i8 %__u to <8 x i1> 16078 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16079 %4 = and <4 x i1> %2, %extract.i 16080 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16081 %6 = bitcast <32 x i1> %5 to i32 16082 ret i32 %6 16083} 16084 16085define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 16086; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: 16087; VLX: # %bb.0: # %entry 16088; VLX-NEXT: kmovd %edi, %k1 16089; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16090; VLX-NEXT: kmovd %k0, %eax 16091; VLX-NEXT: retq 16092; 16093; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: 16094; NoVLX: # %bb.0: # %entry 16095; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16096; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16097; NoVLX-NEXT: kmovw %edi, %k1 16098; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16099; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16100; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16101; NoVLX-NEXT: kmovw %k0, %eax 16102; NoVLX-NEXT: vzeroupper 16103; NoVLX-NEXT: retq 16104entry: 16105 %0 = bitcast <2 x i64> %__a to <4 x i32> 16106 %load = load <2 x i64>, ptr %__b 16107 %1 = bitcast <2 x i64> %load to <4 x i32> 16108 %2 = icmp ult <4 x i32> %0, %1 16109 %3 = bitcast i8 %__u to <8 x i1> 16110 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16111 %4 = and <4 x i1> %2, %extract.i 16112 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16113 %6 = bitcast <32 x i1> %5 to i32 16114 ret i32 %6 16115} 16116 16117 16118define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 16119; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: 16120; VLX: # %bb.0: # %entry 16121; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16122; VLX-NEXT: kmovd %k0, %eax 16123; VLX-NEXT: retq 16124; 16125; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: 16126; NoVLX: # %bb.0: # %entry 16127; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16128; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 16129; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16130; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16131; NoVLX-NEXT: kmovw %k0, %eax 16132; NoVLX-NEXT: vzeroupper 16133; NoVLX-NEXT: retq 16134entry: 16135 %0 = bitcast <2 x i64> %__a to <4 x i32> 16136 %load = load i32, ptr %__b 16137 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16138 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16139 %2 = icmp ult <4 x i32> %0, %1 16140 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16141 %4 = bitcast <32 x i1> %3 to i32 16142 ret i32 %4 16143} 16144 16145define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 16146; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: 16147; VLX: # %bb.0: # %entry 16148; VLX-NEXT: kmovd %edi, %k1 16149; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16150; VLX-NEXT: kmovd %k0, %eax 16151; VLX-NEXT: retq 16152; 16153; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: 16154; NoVLX: # %bb.0: # %entry 16155; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16156; NoVLX-NEXT: kmovw %edi, %k1 16157; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 16158; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16159; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16160; NoVLX-NEXT: kmovw %k0, %eax 16161; NoVLX-NEXT: vzeroupper 16162; NoVLX-NEXT: retq 16163entry: 16164 %0 = bitcast <2 x i64> %__a to <4 x i32> 16165 %load = load i32, ptr %__b 16166 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16167 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16168 %2 = icmp ult <4 x i32> %0, %1 16169 %3 = bitcast i8 %__u to <8 x i1> 16170 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16171 %4 = and <4 x i1> %extract.i, %2 16172 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16173 %6 = bitcast <32 x i1> %5 to i32 16174 ret i32 %6 16175} 16176 16177 16178define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16179; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: 16180; VLX: # %bb.0: # %entry 16181; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 16182; VLX-NEXT: kmovq %k0, %rax 16183; VLX-NEXT: retq 16184; 16185; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: 16186; NoVLX: # %bb.0: # %entry 16187; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16188; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16189; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16190; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16191; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16192; NoVLX-NEXT: kmovw %k0, %eax 16193; NoVLX-NEXT: vzeroupper 16194; NoVLX-NEXT: retq 16195entry: 16196 %0 = bitcast <2 x i64> %__a to <4 x i32> 16197 %1 = bitcast <2 x i64> %__b to <4 x i32> 16198 %2 = icmp ult <4 x i32> %0, %1 16199 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16200 %4 = bitcast <64 x i1> %3 to i64 16201 ret i64 %4 16202} 16203 16204define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 16205; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: 16206; VLX: # %bb.0: # %entry 16207; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 16208; VLX-NEXT: kmovq %k0, %rax 16209; VLX-NEXT: retq 16210; 16211; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: 16212; NoVLX: # %bb.0: # %entry 16213; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16214; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 16215; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16216; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16217; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16218; NoVLX-NEXT: kmovw %k0, %eax 16219; NoVLX-NEXT: vzeroupper 16220; NoVLX-NEXT: retq 16221entry: 16222 %0 = bitcast <2 x i64> %__a to <4 x i32> 16223 %load = load <2 x i64>, ptr %__b 16224 %1 = bitcast <2 x i64> %load to <4 x i32> 16225 %2 = icmp ult <4 x i32> %0, %1 16226 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16227 %4 = bitcast <64 x i1> %3 to i64 16228 ret i64 %4 16229} 16230 16231define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 16232; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: 16233; VLX: # %bb.0: # %entry 16234; VLX-NEXT: kmovd %edi, %k1 16235; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} 16236; VLX-NEXT: kmovq %k0, %rax 16237; VLX-NEXT: retq 16238; 16239; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: 16240; NoVLX: # %bb.0: # %entry 16241; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 16242; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16243; NoVLX-NEXT: kmovw %edi, %k1 16244; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16245; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16246; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16247; NoVLX-NEXT: kmovw %k0, %eax 16248; NoVLX-NEXT: vzeroupper 16249; NoVLX-NEXT: retq 16250entry: 16251 %0 = bitcast <2 x i64> %__a to <4 x i32> 16252 %1 = bitcast <2 x i64> %__b to <4 x i32> 16253 %2 = icmp ult <4 x i32> %0, %1 16254 %3 = bitcast i8 %__u to <8 x i1> 16255 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16256 %4 = and <4 x i1> %2, %extract.i 16257 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16258 %6 = bitcast <64 x i1> %5 to i64 16259 ret i64 %6 16260} 16261 16262define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 16263; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: 16264; VLX: # %bb.0: # %entry 16265; VLX-NEXT: kmovd %edi, %k1 16266; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} 16267; VLX-NEXT: kmovq %k0, %rax 16268; VLX-NEXT: retq 16269; 16270; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: 16271; NoVLX: # %bb.0: # %entry 16272; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16273; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 16274; NoVLX-NEXT: kmovw %edi, %k1 16275; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16276; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16277; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16278; NoVLX-NEXT: kmovw %k0, %eax 16279; NoVLX-NEXT: vzeroupper 16280; NoVLX-NEXT: retq 16281entry: 16282 %0 = bitcast <2 x i64> %__a to <4 x i32> 16283 %load = load <2 x i64>, ptr %__b 16284 %1 = bitcast <2 x i64> %load to <4 x i32> 16285 %2 = icmp ult <4 x i32> %0, %1 16286 %3 = bitcast i8 %__u to <8 x i1> 16287 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16288 %4 = and <4 x i1> %2, %extract.i 16289 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16290 %6 = bitcast <64 x i1> %5 to i64 16291 ret i64 %6 16292} 16293 16294 16295define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 16296; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: 16297; VLX: # %bb.0: # %entry 16298; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 16299; VLX-NEXT: kmovq %k0, %rax 16300; VLX-NEXT: retq 16301; 16302; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: 16303; NoVLX: # %bb.0: # %entry 16304; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16305; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 16306; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16307; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16308; NoVLX-NEXT: kmovw %k0, %eax 16309; NoVLX-NEXT: vzeroupper 16310; NoVLX-NEXT: retq 16311entry: 16312 %0 = bitcast <2 x i64> %__a to <4 x i32> 16313 %load = load i32, ptr %__b 16314 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16315 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16316 %2 = icmp ult <4 x i32> %0, %1 16317 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16318 %4 = bitcast <64 x i1> %3 to i64 16319 ret i64 %4 16320} 16321 16322define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 16323; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: 16324; VLX: # %bb.0: # %entry 16325; VLX-NEXT: kmovd %edi, %k1 16326; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} 16327; VLX-NEXT: kmovq %k0, %rax 16328; VLX-NEXT: retq 16329; 16330; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: 16331; NoVLX: # %bb.0: # %entry 16332; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 16333; NoVLX-NEXT: kmovw %edi, %k1 16334; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 16335; NoVLX-NEXT: kshiftlw $12, %k0, %k0 16336; NoVLX-NEXT: kshiftrw $12, %k0, %k0 16337; NoVLX-NEXT: kmovw %k0, %eax 16338; NoVLX-NEXT: vzeroupper 16339; NoVLX-NEXT: retq 16340entry: 16341 %0 = bitcast <2 x i64> %__a to <4 x i32> 16342 %load = load i32, ptr %__b 16343 %vec = insertelement <4 x i32> undef, i32 %load, i32 0 16344 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 16345 %2 = icmp ult <4 x i32> %0, %1 16346 %3 = bitcast i8 %__u to <8 x i1> 16347 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 16348 %4 = and <4 x i1> %extract.i, %2 16349 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 16350 %6 = bitcast <64 x i1> %5 to i64 16351 ret i64 %6 16352} 16353 16354 16355define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16356; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: 16357; VLX: # %bb.0: # %entry 16358; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 16359; VLX-NEXT: kmovd %k0, %eax 16360; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16361; VLX-NEXT: vzeroupper 16362; VLX-NEXT: retq 16363; 16364; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: 16365; NoVLX: # %bb.0: # %entry 16366; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16367; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16368; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16369; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16370; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16371; NoVLX-NEXT: kmovw %k0, %eax 16372; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16373; NoVLX-NEXT: vzeroupper 16374; NoVLX-NEXT: retq 16375entry: 16376 %0 = bitcast <4 x i64> %__a to <8 x i32> 16377 %1 = bitcast <4 x i64> %__b to <8 x i32> 16378 %2 = icmp ult <8 x i32> %0, %1 16379 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16380 %4 = bitcast <16 x i1> %3 to i16 16381 ret i16 %4 16382} 16383 16384define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16385; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: 16386; VLX: # %bb.0: # %entry 16387; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 16388; VLX-NEXT: kmovd %k0, %eax 16389; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16390; VLX-NEXT: vzeroupper 16391; VLX-NEXT: retq 16392; 16393; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: 16394; NoVLX: # %bb.0: # %entry 16395; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16396; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 16397; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16398; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16399; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16400; NoVLX-NEXT: kmovw %k0, %eax 16401; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16402; NoVLX-NEXT: vzeroupper 16403; NoVLX-NEXT: retq 16404entry: 16405 %0 = bitcast <4 x i64> %__a to <8 x i32> 16406 %load = load <4 x i64>, ptr %__b 16407 %1 = bitcast <4 x i64> %load to <8 x i32> 16408 %2 = icmp ult <8 x i32> %0, %1 16409 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16410 %4 = bitcast <16 x i1> %3 to i16 16411 ret i16 %4 16412} 16413 16414define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16415; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: 16416; VLX: # %bb.0: # %entry 16417; VLX-NEXT: kmovd %edi, %k1 16418; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 16419; VLX-NEXT: kmovd %k0, %eax 16420; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16421; VLX-NEXT: vzeroupper 16422; VLX-NEXT: retq 16423; 16424; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: 16425; NoVLX: # %bb.0: # %entry 16426; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16427; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16428; NoVLX-NEXT: kmovw %edi, %k1 16429; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16430; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16431; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16432; NoVLX-NEXT: kmovw %k0, %eax 16433; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16434; NoVLX-NEXT: vzeroupper 16435; NoVLX-NEXT: retq 16436entry: 16437 %0 = bitcast <4 x i64> %__a to <8 x i32> 16438 %1 = bitcast <4 x i64> %__b to <8 x i32> 16439 %2 = icmp ult <8 x i32> %0, %1 16440 %3 = bitcast i8 %__u to <8 x i1> 16441 %4 = and <8 x i1> %2, %3 16442 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16443 %6 = bitcast <16 x i1> %5 to i16 16444 ret i16 %6 16445} 16446 16447define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16448; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: 16449; VLX: # %bb.0: # %entry 16450; VLX-NEXT: kmovd %edi, %k1 16451; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 16452; VLX-NEXT: kmovd %k0, %eax 16453; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16454; VLX-NEXT: vzeroupper 16455; VLX-NEXT: retq 16456; 16457; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: 16458; NoVLX: # %bb.0: # %entry 16459; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16460; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 16461; NoVLX-NEXT: kmovw %edi, %k1 16462; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16463; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16464; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16465; NoVLX-NEXT: kmovw %k0, %eax 16466; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16467; NoVLX-NEXT: vzeroupper 16468; NoVLX-NEXT: retq 16469entry: 16470 %0 = bitcast <4 x i64> %__a to <8 x i32> 16471 %load = load <4 x i64>, ptr %__b 16472 %1 = bitcast <4 x i64> %load to <8 x i32> 16473 %2 = icmp ult <8 x i32> %0, %1 16474 %3 = bitcast i8 %__u to <8 x i1> 16475 %4 = and <8 x i1> %2, %3 16476 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16477 %6 = bitcast <16 x i1> %5 to i16 16478 ret i16 %6 16479} 16480 16481 16482define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16483; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: 16484; VLX: # %bb.0: # %entry 16485; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 16486; VLX-NEXT: kmovd %k0, %eax 16487; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16488; VLX-NEXT: vzeroupper 16489; VLX-NEXT: retq 16490; 16491; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: 16492; NoVLX: # %bb.0: # %entry 16493; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16494; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 16495; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16496; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16497; NoVLX-NEXT: kmovw %k0, %eax 16498; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16499; NoVLX-NEXT: vzeroupper 16500; NoVLX-NEXT: retq 16501entry: 16502 %0 = bitcast <4 x i64> %__a to <8 x i32> 16503 %load = load i32, ptr %__b 16504 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16505 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16506 %2 = icmp ult <8 x i32> %0, %1 16507 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16508 %4 = bitcast <16 x i1> %3 to i16 16509 ret i16 %4 16510} 16511 16512define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16513; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: 16514; VLX: # %bb.0: # %entry 16515; VLX-NEXT: kmovd %edi, %k1 16516; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 16517; VLX-NEXT: kmovd %k0, %eax 16518; VLX-NEXT: # kill: def $ax killed $ax killed $eax 16519; VLX-NEXT: vzeroupper 16520; VLX-NEXT: retq 16521; 16522; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: 16523; NoVLX: # %bb.0: # %entry 16524; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16525; NoVLX-NEXT: kmovw %edi, %k1 16526; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 16527; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16528; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16529; NoVLX-NEXT: kmovw %k0, %eax 16530; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 16531; NoVLX-NEXT: vzeroupper 16532; NoVLX-NEXT: retq 16533entry: 16534 %0 = bitcast <4 x i64> %__a to <8 x i32> 16535 %load = load i32, ptr %__b 16536 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16537 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16538 %2 = icmp ult <8 x i32> %0, %1 16539 %3 = bitcast i8 %__u to <8 x i1> 16540 %4 = and <8 x i1> %3, %2 16541 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16542 %6 = bitcast <16 x i1> %5 to i16 16543 ret i16 %6 16544} 16545 16546 16547define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16548; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: 16549; VLX: # %bb.0: # %entry 16550; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 16551; VLX-NEXT: kmovd %k0, %eax 16552; VLX-NEXT: vzeroupper 16553; VLX-NEXT: retq 16554; 16555; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: 16556; NoVLX: # %bb.0: # %entry 16557; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16558; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16559; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16560; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16561; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16562; NoVLX-NEXT: kmovw %k0, %eax 16563; NoVLX-NEXT: vzeroupper 16564; NoVLX-NEXT: retq 16565entry: 16566 %0 = bitcast <4 x i64> %__a to <8 x i32> 16567 %1 = bitcast <4 x i64> %__b to <8 x i32> 16568 %2 = icmp ult <8 x i32> %0, %1 16569 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16570 %4 = bitcast <32 x i1> %3 to i32 16571 ret i32 %4 16572} 16573 16574define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16575; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: 16576; VLX: # %bb.0: # %entry 16577; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 16578; VLX-NEXT: kmovd %k0, %eax 16579; VLX-NEXT: vzeroupper 16580; VLX-NEXT: retq 16581; 16582; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: 16583; NoVLX: # %bb.0: # %entry 16584; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16585; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 16586; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16587; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16588; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16589; NoVLX-NEXT: kmovw %k0, %eax 16590; NoVLX-NEXT: vzeroupper 16591; NoVLX-NEXT: retq 16592entry: 16593 %0 = bitcast <4 x i64> %__a to <8 x i32> 16594 %load = load <4 x i64>, ptr %__b 16595 %1 = bitcast <4 x i64> %load to <8 x i32> 16596 %2 = icmp ult <8 x i32> %0, %1 16597 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16598 %4 = bitcast <32 x i1> %3 to i32 16599 ret i32 %4 16600} 16601 16602define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16603; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: 16604; VLX: # %bb.0: # %entry 16605; VLX-NEXT: kmovd %edi, %k1 16606; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 16607; VLX-NEXT: kmovd %k0, %eax 16608; VLX-NEXT: vzeroupper 16609; VLX-NEXT: retq 16610; 16611; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: 16612; NoVLX: # %bb.0: # %entry 16613; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16614; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16615; NoVLX-NEXT: kmovw %edi, %k1 16616; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16617; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16618; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16619; NoVLX-NEXT: kmovw %k0, %eax 16620; NoVLX-NEXT: vzeroupper 16621; NoVLX-NEXT: retq 16622entry: 16623 %0 = bitcast <4 x i64> %__a to <8 x i32> 16624 %1 = bitcast <4 x i64> %__b to <8 x i32> 16625 %2 = icmp ult <8 x i32> %0, %1 16626 %3 = bitcast i8 %__u to <8 x i1> 16627 %4 = and <8 x i1> %2, %3 16628 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16629 %6 = bitcast <32 x i1> %5 to i32 16630 ret i32 %6 16631} 16632 16633define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16634; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: 16635; VLX: # %bb.0: # %entry 16636; VLX-NEXT: kmovd %edi, %k1 16637; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 16638; VLX-NEXT: kmovd %k0, %eax 16639; VLX-NEXT: vzeroupper 16640; VLX-NEXT: retq 16641; 16642; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: 16643; NoVLX: # %bb.0: # %entry 16644; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16645; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 16646; NoVLX-NEXT: kmovw %edi, %k1 16647; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16648; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16649; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16650; NoVLX-NEXT: kmovw %k0, %eax 16651; NoVLX-NEXT: vzeroupper 16652; NoVLX-NEXT: retq 16653entry: 16654 %0 = bitcast <4 x i64> %__a to <8 x i32> 16655 %load = load <4 x i64>, ptr %__b 16656 %1 = bitcast <4 x i64> %load to <8 x i32> 16657 %2 = icmp ult <8 x i32> %0, %1 16658 %3 = bitcast i8 %__u to <8 x i1> 16659 %4 = and <8 x i1> %2, %3 16660 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16661 %6 = bitcast <32 x i1> %5 to i32 16662 ret i32 %6 16663} 16664 16665 16666define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16667; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: 16668; VLX: # %bb.0: # %entry 16669; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 16670; VLX-NEXT: kmovd %k0, %eax 16671; VLX-NEXT: vzeroupper 16672; VLX-NEXT: retq 16673; 16674; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: 16675; NoVLX: # %bb.0: # %entry 16676; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16677; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 16678; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16679; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16680; NoVLX-NEXT: kmovw %k0, %eax 16681; NoVLX-NEXT: vzeroupper 16682; NoVLX-NEXT: retq 16683entry: 16684 %0 = bitcast <4 x i64> %__a to <8 x i32> 16685 %load = load i32, ptr %__b 16686 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16687 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16688 %2 = icmp ult <8 x i32> %0, %1 16689 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16690 %4 = bitcast <32 x i1> %3 to i32 16691 ret i32 %4 16692} 16693 16694define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16695; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: 16696; VLX: # %bb.0: # %entry 16697; VLX-NEXT: kmovd %edi, %k1 16698; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 16699; VLX-NEXT: kmovd %k0, %eax 16700; VLX-NEXT: vzeroupper 16701; VLX-NEXT: retq 16702; 16703; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: 16704; NoVLX: # %bb.0: # %entry 16705; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16706; NoVLX-NEXT: kmovw %edi, %k1 16707; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 16708; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16709; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16710; NoVLX-NEXT: kmovw %k0, %eax 16711; NoVLX-NEXT: vzeroupper 16712; NoVLX-NEXT: retq 16713entry: 16714 %0 = bitcast <4 x i64> %__a to <8 x i32> 16715 %load = load i32, ptr %__b 16716 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16717 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16718 %2 = icmp ult <8 x i32> %0, %1 16719 %3 = bitcast i8 %__u to <8 x i1> 16720 %4 = and <8 x i1> %3, %2 16721 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16722 %6 = bitcast <32 x i1> %5 to i32 16723 ret i32 %6 16724} 16725 16726 16727define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16728; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: 16729; VLX: # %bb.0: # %entry 16730; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 16731; VLX-NEXT: kmovq %k0, %rax 16732; VLX-NEXT: vzeroupper 16733; VLX-NEXT: retq 16734; 16735; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: 16736; NoVLX: # %bb.0: # %entry 16737; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16738; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16739; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16740; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16741; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16742; NoVLX-NEXT: kmovw %k0, %eax 16743; NoVLX-NEXT: vzeroupper 16744; NoVLX-NEXT: retq 16745entry: 16746 %0 = bitcast <4 x i64> %__a to <8 x i32> 16747 %1 = bitcast <4 x i64> %__b to <8 x i32> 16748 %2 = icmp ult <8 x i32> %0, %1 16749 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16750 %4 = bitcast <64 x i1> %3 to i64 16751 ret i64 %4 16752} 16753 16754define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16755; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: 16756; VLX: # %bb.0: # %entry 16757; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 16758; VLX-NEXT: kmovq %k0, %rax 16759; VLX-NEXT: vzeroupper 16760; VLX-NEXT: retq 16761; 16762; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: 16763; NoVLX: # %bb.0: # %entry 16764; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16765; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 16766; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16767; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16768; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16769; NoVLX-NEXT: kmovw %k0, %eax 16770; NoVLX-NEXT: vzeroupper 16771; NoVLX-NEXT: retq 16772entry: 16773 %0 = bitcast <4 x i64> %__a to <8 x i32> 16774 %load = load <4 x i64>, ptr %__b 16775 %1 = bitcast <4 x i64> %load to <8 x i32> 16776 %2 = icmp ult <8 x i32> %0, %1 16777 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16778 %4 = bitcast <64 x i1> %3 to i64 16779 ret i64 %4 16780} 16781 16782define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 16783; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: 16784; VLX: # %bb.0: # %entry 16785; VLX-NEXT: kmovd %edi, %k1 16786; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} 16787; VLX-NEXT: kmovq %k0, %rax 16788; VLX-NEXT: vzeroupper 16789; VLX-NEXT: retq 16790; 16791; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: 16792; NoVLX: # %bb.0: # %entry 16793; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 16794; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16795; NoVLX-NEXT: kmovw %edi, %k1 16796; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16797; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16798; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16799; NoVLX-NEXT: kmovw %k0, %eax 16800; NoVLX-NEXT: vzeroupper 16801; NoVLX-NEXT: retq 16802entry: 16803 %0 = bitcast <4 x i64> %__a to <8 x i32> 16804 %1 = bitcast <4 x i64> %__b to <8 x i32> 16805 %2 = icmp ult <8 x i32> %0, %1 16806 %3 = bitcast i8 %__u to <8 x i1> 16807 %4 = and <8 x i1> %2, %3 16808 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16809 %6 = bitcast <64 x i1> %5 to i64 16810 ret i64 %6 16811} 16812 16813define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16814; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: 16815; VLX: # %bb.0: # %entry 16816; VLX-NEXT: kmovd %edi, %k1 16817; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} 16818; VLX-NEXT: kmovq %k0, %rax 16819; VLX-NEXT: vzeroupper 16820; VLX-NEXT: retq 16821; 16822; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: 16823; NoVLX: # %bb.0: # %entry 16824; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16825; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 16826; NoVLX-NEXT: kmovw %edi, %k1 16827; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16828; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16829; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16830; NoVLX-NEXT: kmovw %k0, %eax 16831; NoVLX-NEXT: vzeroupper 16832; NoVLX-NEXT: retq 16833entry: 16834 %0 = bitcast <4 x i64> %__a to <8 x i32> 16835 %load = load <4 x i64>, ptr %__b 16836 %1 = bitcast <4 x i64> %load to <8 x i32> 16837 %2 = icmp ult <8 x i32> %0, %1 16838 %3 = bitcast i8 %__u to <8 x i1> 16839 %4 = and <8 x i1> %2, %3 16840 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16841 %6 = bitcast <64 x i1> %5 to i64 16842 ret i64 %6 16843} 16844 16845 16846define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 16847; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: 16848; VLX: # %bb.0: # %entry 16849; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 16850; VLX-NEXT: kmovq %k0, %rax 16851; VLX-NEXT: vzeroupper 16852; VLX-NEXT: retq 16853; 16854; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: 16855; NoVLX: # %bb.0: # %entry 16856; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16857; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 16858; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16859; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16860; NoVLX-NEXT: kmovw %k0, %eax 16861; NoVLX-NEXT: vzeroupper 16862; NoVLX-NEXT: retq 16863entry: 16864 %0 = bitcast <4 x i64> %__a to <8 x i32> 16865 %load = load i32, ptr %__b 16866 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16867 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16868 %2 = icmp ult <8 x i32> %0, %1 16869 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16870 %4 = bitcast <64 x i1> %3 to i64 16871 ret i64 %4 16872} 16873 16874define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 16875; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: 16876; VLX: # %bb.0: # %entry 16877; VLX-NEXT: kmovd %edi, %k1 16878; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} 16879; VLX-NEXT: kmovq %k0, %rax 16880; VLX-NEXT: vzeroupper 16881; VLX-NEXT: retq 16882; 16883; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: 16884; NoVLX: # %bb.0: # %entry 16885; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 16886; NoVLX-NEXT: kmovw %edi, %k1 16887; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 16888; NoVLX-NEXT: kshiftlw $8, %k0, %k0 16889; NoVLX-NEXT: kshiftrw $8, %k0, %k0 16890; NoVLX-NEXT: kmovw %k0, %eax 16891; NoVLX-NEXT: vzeroupper 16892; NoVLX-NEXT: retq 16893entry: 16894 %0 = bitcast <4 x i64> %__a to <8 x i32> 16895 %load = load i32, ptr %__b 16896 %vec = insertelement <8 x i32> undef, i32 %load, i32 0 16897 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 16898 %2 = icmp ult <8 x i32> %0, %1 16899 %3 = bitcast i8 %__u to <8 x i1> 16900 %4 = and <8 x i1> %3, %2 16901 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 16902 %6 = bitcast <64 x i1> %5 to i64 16903 ret i64 %6 16904} 16905 16906 16907define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 16908; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: 16909; VLX: # %bb.0: # %entry 16910; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16911; VLX-NEXT: kmovd %k0, %eax 16912; VLX-NEXT: vzeroupper 16913; VLX-NEXT: retq 16914; 16915; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: 16916; NoVLX: # %bb.0: # %entry 16917; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16918; NoVLX-NEXT: kmovw %k0, %eax 16919; NoVLX-NEXT: vzeroupper 16920; NoVLX-NEXT: retq 16921entry: 16922 %0 = bitcast <8 x i64> %__a to <16 x i32> 16923 %1 = bitcast <8 x i64> %__b to <16 x i32> 16924 %2 = icmp ult <16 x i32> %0, %1 16925 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 16926 %4 = bitcast <32 x i1> %3 to i32 16927 ret i32 %4 16928} 16929 16930define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 16931; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: 16932; VLX: # %bb.0: # %entry 16933; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 16934; VLX-NEXT: kmovd %k0, %eax 16935; VLX-NEXT: vzeroupper 16936; VLX-NEXT: retq 16937; 16938; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: 16939; NoVLX: # %bb.0: # %entry 16940; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 16941; NoVLX-NEXT: kmovw %k0, %eax 16942; NoVLX-NEXT: vzeroupper 16943; NoVLX-NEXT: retq 16944entry: 16945 %0 = bitcast <8 x i64> %__a to <16 x i32> 16946 %load = load <8 x i64>, ptr %__b 16947 %1 = bitcast <8 x i64> %load to <16 x i32> 16948 %2 = icmp ult <16 x i32> %0, %1 16949 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 16950 %4 = bitcast <32 x i1> %3 to i32 16951 ret i32 %4 16952} 16953 16954define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 16955; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: 16956; VLX: # %bb.0: # %entry 16957; VLX-NEXT: kmovd %edi, %k1 16958; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 16959; VLX-NEXT: kmovd %k0, %eax 16960; VLX-NEXT: vzeroupper 16961; VLX-NEXT: retq 16962; 16963; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: 16964; NoVLX: # %bb.0: # %entry 16965; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 16966; NoVLX-NEXT: kmovw %k0, %eax 16967; NoVLX-NEXT: andl %edi, %eax 16968; NoVLX-NEXT: vzeroupper 16969; NoVLX-NEXT: retq 16970entry: 16971 %0 = bitcast <8 x i64> %__a to <16 x i32> 16972 %1 = bitcast <8 x i64> %__b to <16 x i32> 16973 %2 = icmp ult <16 x i32> %0, %1 16974 %3 = bitcast i16 %__u to <16 x i1> 16975 %4 = and <16 x i1> %2, %3 16976 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 16977 %6 = bitcast <32 x i1> %5 to i32 16978 ret i32 %6 16979} 16980 16981define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 16982; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: 16983; VLX: # %bb.0: # %entry 16984; VLX-NEXT: kmovd %edi, %k1 16985; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} 16986; VLX-NEXT: kmovd %k0, %eax 16987; VLX-NEXT: vzeroupper 16988; VLX-NEXT: retq 16989; 16990; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: 16991; NoVLX: # %bb.0: # %entry 16992; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 16993; NoVLX-NEXT: kmovw %k0, %eax 16994; NoVLX-NEXT: andl %edi, %eax 16995; NoVLX-NEXT: vzeroupper 16996; NoVLX-NEXT: retq 16997entry: 16998 %0 = bitcast <8 x i64> %__a to <16 x i32> 16999 %load = load <8 x i64>, ptr %__b 17000 %1 = bitcast <8 x i64> %load to <16 x i32> 17001 %2 = icmp ult <16 x i32> %0, %1 17002 %3 = bitcast i16 %__u to <16 x i1> 17003 %4 = and <16 x i1> %2, %3 17004 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17005 %6 = bitcast <32 x i1> %5 to i32 17006 ret i32 %6 17007} 17008 17009 17010define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 17011; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: 17012; VLX: # %bb.0: # %entry 17013; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17014; VLX-NEXT: kmovd %k0, %eax 17015; VLX-NEXT: vzeroupper 17016; VLX-NEXT: retq 17017; 17018; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: 17019; NoVLX: # %bb.0: # %entry 17020; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17021; NoVLX-NEXT: kmovw %k0, %eax 17022; NoVLX-NEXT: vzeroupper 17023; NoVLX-NEXT: retq 17024entry: 17025 %0 = bitcast <8 x i64> %__a to <16 x i32> 17026 %load = load i32, ptr %__b 17027 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17028 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17029 %2 = icmp ult <16 x i32> %0, %1 17030 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17031 %4 = bitcast <32 x i1> %3 to i32 17032 ret i32 %4 17033} 17034 17035define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 17036; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: 17037; VLX: # %bb.0: # %entry 17038; VLX-NEXT: kmovd %edi, %k1 17039; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 17040; VLX-NEXT: kmovd %k0, %eax 17041; VLX-NEXT: vzeroupper 17042; VLX-NEXT: retq 17043; 17044; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: 17045; NoVLX: # %bb.0: # %entry 17046; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 17047; NoVLX-NEXT: kmovw %k0, %eax 17048; NoVLX-NEXT: andl %edi, %eax 17049; NoVLX-NEXT: vzeroupper 17050; NoVLX-NEXT: retq 17051entry: 17052 %0 = bitcast <8 x i64> %__a to <16 x i32> 17053 %load = load i32, ptr %__b 17054 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17055 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17056 %2 = icmp ult <16 x i32> %0, %1 17057 %3 = bitcast i16 %__u to <16 x i1> 17058 %4 = and <16 x i1> %3, %2 17059 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 17060 %6 = bitcast <32 x i1> %5 to i32 17061 ret i32 %6 17062} 17063 17064 17065define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17066; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: 17067; VLX: # %bb.0: # %entry 17068; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17069; VLX-NEXT: kmovq %k0, %rax 17070; VLX-NEXT: vzeroupper 17071; VLX-NEXT: retq 17072; 17073; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: 17074; NoVLX: # %bb.0: # %entry 17075; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17076; NoVLX-NEXT: kmovw %k0, %eax 17077; NoVLX-NEXT: vzeroupper 17078; NoVLX-NEXT: retq 17079entry: 17080 %0 = bitcast <8 x i64> %__a to <16 x i32> 17081 %1 = bitcast <8 x i64> %__b to <16 x i32> 17082 %2 = icmp ult <16 x i32> %0, %1 17083 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17084 %4 = bitcast <64 x i1> %3 to i64 17085 ret i64 %4 17086} 17087 17088define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 17089; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: 17090; VLX: # %bb.0: # %entry 17091; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17092; VLX-NEXT: kmovq %k0, %rax 17093; VLX-NEXT: vzeroupper 17094; VLX-NEXT: retq 17095; 17096; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: 17097; NoVLX: # %bb.0: # %entry 17098; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 17099; NoVLX-NEXT: kmovw %k0, %eax 17100; NoVLX-NEXT: vzeroupper 17101; NoVLX-NEXT: retq 17102entry: 17103 %0 = bitcast <8 x i64> %__a to <16 x i32> 17104 %load = load <8 x i64>, ptr %__b 17105 %1 = bitcast <8 x i64> %load to <16 x i32> 17106 %2 = icmp ult <16 x i32> %0, %1 17107 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17108 %4 = bitcast <64 x i1> %3 to i64 17109 ret i64 %4 17110} 17111 17112define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 17113; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: 17114; VLX: # %bb.0: # %entry 17115; VLX-NEXT: kmovd %edi, %k1 17116; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} 17117; VLX-NEXT: kmovq %k0, %rax 17118; VLX-NEXT: vzeroupper 17119; VLX-NEXT: retq 17120; 17121; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: 17122; NoVLX: # %bb.0: # %entry 17123; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 17124; NoVLX-NEXT: kmovw %k0, %eax 17125; NoVLX-NEXT: andl %edi, %eax 17126; NoVLX-NEXT: vzeroupper 17127; NoVLX-NEXT: retq 17128entry: 17129 %0 = bitcast <8 x i64> %__a to <16 x i32> 17130 %1 = bitcast <8 x i64> %__b to <16 x i32> 17131 %2 = icmp ult <16 x i32> %0, %1 17132 %3 = bitcast i16 %__u to <16 x i1> 17133 %4 = and <16 x i1> %2, %3 17134 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17135 %6 = bitcast <64 x i1> %5 to i64 17136 ret i64 %6 17137} 17138 17139define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 17140; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: 17141; VLX: # %bb.0: # %entry 17142; VLX-NEXT: kmovd %edi, %k1 17143; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} 17144; VLX-NEXT: kmovq %k0, %rax 17145; VLX-NEXT: vzeroupper 17146; VLX-NEXT: retq 17147; 17148; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: 17149; NoVLX: # %bb.0: # %entry 17150; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 17151; NoVLX-NEXT: kmovw %k0, %eax 17152; NoVLX-NEXT: andl %edi, %eax 17153; NoVLX-NEXT: vzeroupper 17154; NoVLX-NEXT: retq 17155entry: 17156 %0 = bitcast <8 x i64> %__a to <16 x i32> 17157 %load = load <8 x i64>, ptr %__b 17158 %1 = bitcast <8 x i64> %load to <16 x i32> 17159 %2 = icmp ult <16 x i32> %0, %1 17160 %3 = bitcast i16 %__u to <16 x i1> 17161 %4 = and <16 x i1> %2, %3 17162 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17163 %6 = bitcast <64 x i1> %5 to i64 17164 ret i64 %6 17165} 17166 17167 17168define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 17169; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: 17170; VLX: # %bb.0: # %entry 17171; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17172; VLX-NEXT: kmovq %k0, %rax 17173; VLX-NEXT: vzeroupper 17174; VLX-NEXT: retq 17175; 17176; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: 17177; NoVLX: # %bb.0: # %entry 17178; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 17179; NoVLX-NEXT: kmovw %k0, %eax 17180; NoVLX-NEXT: vzeroupper 17181; NoVLX-NEXT: retq 17182entry: 17183 %0 = bitcast <8 x i64> %__a to <16 x i32> 17184 %load = load i32, ptr %__b 17185 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17186 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17187 %2 = icmp ult <16 x i32> %0, %1 17188 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17189 %4 = bitcast <64 x i1> %3 to i64 17190 ret i64 %4 17191} 17192 17193define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 17194; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: 17195; VLX: # %bb.0: # %entry 17196; VLX-NEXT: kmovd %edi, %k1 17197; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} 17198; VLX-NEXT: kmovq %k0, %rax 17199; VLX-NEXT: vzeroupper 17200; VLX-NEXT: retq 17201; 17202; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: 17203; NoVLX: # %bb.0: # %entry 17204; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 17205; NoVLX-NEXT: kmovw %k0, %eax 17206; NoVLX-NEXT: andl %edi, %eax 17207; NoVLX-NEXT: vzeroupper 17208; NoVLX-NEXT: retq 17209entry: 17210 %0 = bitcast <8 x i64> %__a to <16 x i32> 17211 %load = load i32, ptr %__b 17212 %vec = insertelement <16 x i32> undef, i32 %load, i32 0 17213 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 17214 %2 = icmp ult <16 x i32> %0, %1 17215 %3 = bitcast i16 %__u to <16 x i1> 17216 %4 = and <16 x i1> %3, %2 17217 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 17218 %6 = bitcast <64 x i1> %5 to i64 17219 ret i64 %6 17220} 17221 17222 17223define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17224; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: 17225; VLX: # %bb.0: # %entry 17226; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17227; VLX-NEXT: kmovb %k0, %eax 17228; VLX-NEXT: retq 17229; 17230; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: 17231; NoVLX: # %bb.0: # %entry 17232; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17233; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17234; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17235; NoVLX-NEXT: kmovw %k0, %eax 17236; NoVLX-NEXT: andl $3, %eax 17237; NoVLX-NEXT: vzeroupper 17238; NoVLX-NEXT: retq 17239entry: 17240 %0 = bitcast <2 x i64> %__a to <2 x i64> 17241 %1 = bitcast <2 x i64> %__b to <2 x i64> 17242 %2 = icmp ult <2 x i64> %0, %1 17243 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17244 %4 = bitcast <4 x i1> %3 to i4 17245 ret i4 %4 17246} 17247 17248define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17249; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: 17250; VLX: # %bb.0: # %entry 17251; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17252; VLX-NEXT: kmovb %k0, %eax 17253; VLX-NEXT: retq 17254; 17255; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: 17256; NoVLX: # %bb.0: # %entry 17257; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17258; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17259; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17260; NoVLX-NEXT: kmovw %k0, %eax 17261; NoVLX-NEXT: andl $3, %eax 17262; NoVLX-NEXT: vzeroupper 17263; NoVLX-NEXT: retq 17264entry: 17265 %0 = bitcast <2 x i64> %__a to <2 x i64> 17266 %load = load <2 x i64>, ptr %__b 17267 %1 = bitcast <2 x i64> %load to <2 x i64> 17268 %2 = icmp ult <2 x i64> %0, %1 17269 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17270 %4 = bitcast <4 x i1> %3 to i4 17271 ret i4 %4 17272} 17273 17274define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17275; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: 17276; VLX: # %bb.0: # %entry 17277; VLX-NEXT: kmovd %edi, %k1 17278; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17279; VLX-NEXT: kmovb %k0, %eax 17280; VLX-NEXT: retq 17281; 17282; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: 17283; NoVLX: # %bb.0: # %entry 17284; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17285; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17286; NoVLX-NEXT: kmovw %edi, %k1 17287; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17288; NoVLX-NEXT: kmovw %k0, %eax 17289; NoVLX-NEXT: andl $3, %eax 17290; NoVLX-NEXT: vzeroupper 17291; NoVLX-NEXT: retq 17292entry: 17293 %0 = bitcast <2 x i64> %__a to <2 x i64> 17294 %1 = bitcast <2 x i64> %__b to <2 x i64> 17295 %2 = icmp ult <2 x i64> %0, %1 17296 %3 = bitcast i8 %__u to <8 x i1> 17297 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17298 %4 = and <2 x i1> %2, %extract.i 17299 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17300 %6 = bitcast <4 x i1> %5 to i4 17301 ret i4 %6 17302} 17303 17304define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17305; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: 17306; VLX: # %bb.0: # %entry 17307; VLX-NEXT: kmovd %edi, %k1 17308; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17309; VLX-NEXT: kmovb %k0, %eax 17310; VLX-NEXT: retq 17311; 17312; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: 17313; NoVLX: # %bb.0: # %entry 17314; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17315; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17316; NoVLX-NEXT: kmovw %edi, %k1 17317; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17318; NoVLX-NEXT: kmovw %k0, %eax 17319; NoVLX-NEXT: andl $3, %eax 17320; NoVLX-NEXT: vzeroupper 17321; NoVLX-NEXT: retq 17322entry: 17323 %0 = bitcast <2 x i64> %__a to <2 x i64> 17324 %load = load <2 x i64>, ptr %__b 17325 %1 = bitcast <2 x i64> %load to <2 x i64> 17326 %2 = icmp ult <2 x i64> %0, %1 17327 %3 = bitcast i8 %__u to <8 x i1> 17328 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17329 %4 = and <2 x i1> %2, %extract.i 17330 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17331 %6 = bitcast <4 x i1> %5 to i4 17332 ret i4 %6 17333} 17334 17335 17336define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17337; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: 17338; VLX: # %bb.0: # %entry 17339; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17340; VLX-NEXT: kmovb %k0, %eax 17341; VLX-NEXT: retq 17342; 17343; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: 17344; NoVLX: # %bb.0: # %entry 17345; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17346; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 17347; NoVLX-NEXT: kmovw %k0, %eax 17348; NoVLX-NEXT: andl $3, %eax 17349; NoVLX-NEXT: vzeroupper 17350; NoVLX-NEXT: retq 17351entry: 17352 %0 = bitcast <2 x i64> %__a to <2 x i64> 17353 %load = load i64, ptr %__b 17354 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17355 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17356 %2 = icmp ult <2 x i64> %0, %1 17357 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17358 %4 = bitcast <4 x i1> %3 to i4 17359 ret i4 %4 17360} 17361 17362define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17363; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: 17364; VLX: # %bb.0: # %entry 17365; VLX-NEXT: kmovd %edi, %k1 17366; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17367; VLX-NEXT: kmovb %k0, %eax 17368; VLX-NEXT: retq 17369; 17370; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: 17371; NoVLX: # %bb.0: # %entry 17372; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17373; NoVLX-NEXT: kmovw %edi, %k1 17374; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 17375; NoVLX-NEXT: kmovw %k0, %eax 17376; NoVLX-NEXT: andl $3, %eax 17377; NoVLX-NEXT: vzeroupper 17378; NoVLX-NEXT: retq 17379entry: 17380 %0 = bitcast <2 x i64> %__a to <2 x i64> 17381 %load = load i64, ptr %__b 17382 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17383 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17384 %2 = icmp ult <2 x i64> %0, %1 17385 %3 = bitcast i8 %__u to <8 x i1> 17386 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17387 %4 = and <2 x i1> %extract.i, %2 17388 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 17389 %6 = bitcast <4 x i1> %5 to i4 17390 ret i4 %6 17391} 17392 17393 17394define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17395; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: 17396; VLX: # %bb.0: # %entry 17397; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17398; VLX-NEXT: kmovd %k0, %eax 17399; VLX-NEXT: # kill: def $al killed $al killed $eax 17400; VLX-NEXT: retq 17401; 17402; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: 17403; NoVLX: # %bb.0: # %entry 17404; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17405; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17406; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17407; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17408; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17409; NoVLX-NEXT: kmovw %k0, %eax 17410; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17411; NoVLX-NEXT: vzeroupper 17412; NoVLX-NEXT: retq 17413entry: 17414 %0 = bitcast <2 x i64> %__a to <2 x i64> 17415 %1 = bitcast <2 x i64> %__b to <2 x i64> 17416 %2 = icmp ult <2 x i64> %0, %1 17417 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17418 %4 = bitcast <8 x i1> %3 to i8 17419 ret i8 %4 17420} 17421 17422define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17423; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: 17424; VLX: # %bb.0: # %entry 17425; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17426; VLX-NEXT: kmovd %k0, %eax 17427; VLX-NEXT: # kill: def $al killed $al killed $eax 17428; VLX-NEXT: retq 17429; 17430; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: 17431; NoVLX: # %bb.0: # %entry 17432; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17433; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17434; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17435; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17436; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17437; NoVLX-NEXT: kmovw %k0, %eax 17438; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17439; NoVLX-NEXT: vzeroupper 17440; NoVLX-NEXT: retq 17441entry: 17442 %0 = bitcast <2 x i64> %__a to <2 x i64> 17443 %load = load <2 x i64>, ptr %__b 17444 %1 = bitcast <2 x i64> %load to <2 x i64> 17445 %2 = icmp ult <2 x i64> %0, %1 17446 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17447 %4 = bitcast <8 x i1> %3 to i8 17448 ret i8 %4 17449} 17450 17451define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17452; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: 17453; VLX: # %bb.0: # %entry 17454; VLX-NEXT: kmovd %edi, %k1 17455; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17456; VLX-NEXT: kmovd %k0, %eax 17457; VLX-NEXT: # kill: def $al killed $al killed $eax 17458; VLX-NEXT: retq 17459; 17460; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: 17461; NoVLX: # %bb.0: # %entry 17462; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17463; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17464; NoVLX-NEXT: kmovw %edi, %k1 17465; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17466; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17467; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17468; NoVLX-NEXT: kmovw %k0, %eax 17469; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17470; NoVLX-NEXT: vzeroupper 17471; NoVLX-NEXT: retq 17472entry: 17473 %0 = bitcast <2 x i64> %__a to <2 x i64> 17474 %1 = bitcast <2 x i64> %__b to <2 x i64> 17475 %2 = icmp ult <2 x i64> %0, %1 17476 %3 = bitcast i8 %__u to <8 x i1> 17477 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17478 %4 = and <2 x i1> %2, %extract.i 17479 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17480 %6 = bitcast <8 x i1> %5 to i8 17481 ret i8 %6 17482} 17483 17484define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17485; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: 17486; VLX: # %bb.0: # %entry 17487; VLX-NEXT: kmovd %edi, %k1 17488; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17489; VLX-NEXT: kmovd %k0, %eax 17490; VLX-NEXT: # kill: def $al killed $al killed $eax 17491; VLX-NEXT: retq 17492; 17493; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: 17494; NoVLX: # %bb.0: # %entry 17495; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17496; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17497; NoVLX-NEXT: kmovw %edi, %k1 17498; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17499; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17500; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17501; NoVLX-NEXT: kmovw %k0, %eax 17502; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17503; NoVLX-NEXT: vzeroupper 17504; NoVLX-NEXT: retq 17505entry: 17506 %0 = bitcast <2 x i64> %__a to <2 x i64> 17507 %load = load <2 x i64>, ptr %__b 17508 %1 = bitcast <2 x i64> %load to <2 x i64> 17509 %2 = icmp ult <2 x i64> %0, %1 17510 %3 = bitcast i8 %__u to <8 x i1> 17511 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17512 %4 = and <2 x i1> %2, %extract.i 17513 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17514 %6 = bitcast <8 x i1> %5 to i8 17515 ret i8 %6 17516} 17517 17518 17519define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17520; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: 17521; VLX: # %bb.0: # %entry 17522; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17523; VLX-NEXT: kmovd %k0, %eax 17524; VLX-NEXT: # kill: def $al killed $al killed $eax 17525; VLX-NEXT: retq 17526; 17527; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: 17528; NoVLX: # %bb.0: # %entry 17529; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17530; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 17531; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17532; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17533; NoVLX-NEXT: kmovw %k0, %eax 17534; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17535; NoVLX-NEXT: vzeroupper 17536; NoVLX-NEXT: retq 17537entry: 17538 %0 = bitcast <2 x i64> %__a to <2 x i64> 17539 %load = load i64, ptr %__b 17540 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17541 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17542 %2 = icmp ult <2 x i64> %0, %1 17543 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17544 %4 = bitcast <8 x i1> %3 to i8 17545 ret i8 %4 17546} 17547 17548define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17549; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: 17550; VLX: # %bb.0: # %entry 17551; VLX-NEXT: kmovd %edi, %k1 17552; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17553; VLX-NEXT: kmovd %k0, %eax 17554; VLX-NEXT: # kill: def $al killed $al killed $eax 17555; VLX-NEXT: retq 17556; 17557; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: 17558; NoVLX: # %bb.0: # %entry 17559; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17560; NoVLX-NEXT: kmovw %edi, %k1 17561; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 17562; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17563; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17564; NoVLX-NEXT: kmovw %k0, %eax 17565; NoVLX-NEXT: # kill: def $al killed $al killed $eax 17566; NoVLX-NEXT: vzeroupper 17567; NoVLX-NEXT: retq 17568entry: 17569 %0 = bitcast <2 x i64> %__a to <2 x i64> 17570 %load = load i64, ptr %__b 17571 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17572 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17573 %2 = icmp ult <2 x i64> %0, %1 17574 %3 = bitcast i8 %__u to <8 x i1> 17575 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17576 %4 = and <2 x i1> %extract.i, %2 17577 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17578 %6 = bitcast <8 x i1> %5 to i8 17579 ret i8 %6 17580} 17581 17582 17583define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17584; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: 17585; VLX: # %bb.0: # %entry 17586; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17587; VLX-NEXT: kmovd %k0, %eax 17588; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17589; VLX-NEXT: retq 17590; 17591; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: 17592; NoVLX: # %bb.0: # %entry 17593; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17594; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17595; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17596; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17597; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17598; NoVLX-NEXT: kmovw %k0, %eax 17599; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17600; NoVLX-NEXT: vzeroupper 17601; NoVLX-NEXT: retq 17602entry: 17603 %0 = bitcast <2 x i64> %__a to <2 x i64> 17604 %1 = bitcast <2 x i64> %__b to <2 x i64> 17605 %2 = icmp ult <2 x i64> %0, %1 17606 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17607 %4 = bitcast <16 x i1> %3 to i16 17608 ret i16 %4 17609} 17610 17611define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17612; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: 17613; VLX: # %bb.0: # %entry 17614; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17615; VLX-NEXT: kmovd %k0, %eax 17616; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17617; VLX-NEXT: retq 17618; 17619; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: 17620; NoVLX: # %bb.0: # %entry 17621; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17622; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17623; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17624; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17625; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17626; NoVLX-NEXT: kmovw %k0, %eax 17627; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17628; NoVLX-NEXT: vzeroupper 17629; NoVLX-NEXT: retq 17630entry: 17631 %0 = bitcast <2 x i64> %__a to <2 x i64> 17632 %load = load <2 x i64>, ptr %__b 17633 %1 = bitcast <2 x i64> %load to <2 x i64> 17634 %2 = icmp ult <2 x i64> %0, %1 17635 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17636 %4 = bitcast <16 x i1> %3 to i16 17637 ret i16 %4 17638} 17639 17640define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17641; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: 17642; VLX: # %bb.0: # %entry 17643; VLX-NEXT: kmovd %edi, %k1 17644; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17645; VLX-NEXT: kmovd %k0, %eax 17646; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17647; VLX-NEXT: retq 17648; 17649; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: 17650; NoVLX: # %bb.0: # %entry 17651; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17652; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17653; NoVLX-NEXT: kmovw %edi, %k1 17654; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17655; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17656; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17657; NoVLX-NEXT: kmovw %k0, %eax 17658; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17659; NoVLX-NEXT: vzeroupper 17660; NoVLX-NEXT: retq 17661entry: 17662 %0 = bitcast <2 x i64> %__a to <2 x i64> 17663 %1 = bitcast <2 x i64> %__b to <2 x i64> 17664 %2 = icmp ult <2 x i64> %0, %1 17665 %3 = bitcast i8 %__u to <8 x i1> 17666 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17667 %4 = and <2 x i1> %2, %extract.i 17668 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17669 %6 = bitcast <16 x i1> %5 to i16 17670 ret i16 %6 17671} 17672 17673define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17674; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: 17675; VLX: # %bb.0: # %entry 17676; VLX-NEXT: kmovd %edi, %k1 17677; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17678; VLX-NEXT: kmovd %k0, %eax 17679; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17680; VLX-NEXT: retq 17681; 17682; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: 17683; NoVLX: # %bb.0: # %entry 17684; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17685; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17686; NoVLX-NEXT: kmovw %edi, %k1 17687; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17688; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17689; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17690; NoVLX-NEXT: kmovw %k0, %eax 17691; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17692; NoVLX-NEXT: vzeroupper 17693; NoVLX-NEXT: retq 17694entry: 17695 %0 = bitcast <2 x i64> %__a to <2 x i64> 17696 %load = load <2 x i64>, ptr %__b 17697 %1 = bitcast <2 x i64> %load to <2 x i64> 17698 %2 = icmp ult <2 x i64> %0, %1 17699 %3 = bitcast i8 %__u to <8 x i1> 17700 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17701 %4 = and <2 x i1> %2, %extract.i 17702 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17703 %6 = bitcast <16 x i1> %5 to i16 17704 ret i16 %6 17705} 17706 17707 17708define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17709; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: 17710; VLX: # %bb.0: # %entry 17711; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17712; VLX-NEXT: kmovd %k0, %eax 17713; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17714; VLX-NEXT: retq 17715; 17716; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: 17717; NoVLX: # %bb.0: # %entry 17718; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17719; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 17720; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17721; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17722; NoVLX-NEXT: kmovw %k0, %eax 17723; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17724; NoVLX-NEXT: vzeroupper 17725; NoVLX-NEXT: retq 17726entry: 17727 %0 = bitcast <2 x i64> %__a to <2 x i64> 17728 %load = load i64, ptr %__b 17729 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17730 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17731 %2 = icmp ult <2 x i64> %0, %1 17732 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17733 %4 = bitcast <16 x i1> %3 to i16 17734 ret i16 %4 17735} 17736 17737define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17738; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: 17739; VLX: # %bb.0: # %entry 17740; VLX-NEXT: kmovd %edi, %k1 17741; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17742; VLX-NEXT: kmovd %k0, %eax 17743; VLX-NEXT: # kill: def $ax killed $ax killed $eax 17744; VLX-NEXT: retq 17745; 17746; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: 17747; NoVLX: # %bb.0: # %entry 17748; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17749; NoVLX-NEXT: kmovw %edi, %k1 17750; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 17751; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17752; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17753; NoVLX-NEXT: kmovw %k0, %eax 17754; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 17755; NoVLX-NEXT: vzeroupper 17756; NoVLX-NEXT: retq 17757entry: 17758 %0 = bitcast <2 x i64> %__a to <2 x i64> 17759 %load = load i64, ptr %__b 17760 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17761 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17762 %2 = icmp ult <2 x i64> %0, %1 17763 %3 = bitcast i8 %__u to <8 x i1> 17764 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17765 %4 = and <2 x i1> %extract.i, %2 17766 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17767 %6 = bitcast <16 x i1> %5 to i16 17768 ret i16 %6 17769} 17770 17771 17772define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17773; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: 17774; VLX: # %bb.0: # %entry 17775; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17776; VLX-NEXT: kmovd %k0, %eax 17777; VLX-NEXT: retq 17778; 17779; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: 17780; NoVLX: # %bb.0: # %entry 17781; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17782; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17783; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17784; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17785; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17786; NoVLX-NEXT: kmovw %k0, %eax 17787; NoVLX-NEXT: vzeroupper 17788; NoVLX-NEXT: retq 17789entry: 17790 %0 = bitcast <2 x i64> %__a to <2 x i64> 17791 %1 = bitcast <2 x i64> %__b to <2 x i64> 17792 %2 = icmp ult <2 x i64> %0, %1 17793 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17794 %4 = bitcast <32 x i1> %3 to i32 17795 ret i32 %4 17796} 17797 17798define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17799; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: 17800; VLX: # %bb.0: # %entry 17801; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17802; VLX-NEXT: kmovd %k0, %eax 17803; VLX-NEXT: retq 17804; 17805; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: 17806; NoVLX: # %bb.0: # %entry 17807; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17808; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17809; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17810; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17811; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17812; NoVLX-NEXT: kmovw %k0, %eax 17813; NoVLX-NEXT: vzeroupper 17814; NoVLX-NEXT: retq 17815entry: 17816 %0 = bitcast <2 x i64> %__a to <2 x i64> 17817 %load = load <2 x i64>, ptr %__b 17818 %1 = bitcast <2 x i64> %load to <2 x i64> 17819 %2 = icmp ult <2 x i64> %0, %1 17820 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17821 %4 = bitcast <32 x i1> %3 to i32 17822 ret i32 %4 17823} 17824 17825define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17826; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: 17827; VLX: # %bb.0: # %entry 17828; VLX-NEXT: kmovd %edi, %k1 17829; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 17830; VLX-NEXT: kmovd %k0, %eax 17831; VLX-NEXT: retq 17832; 17833; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: 17834; NoVLX: # %bb.0: # %entry 17835; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17836; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17837; NoVLX-NEXT: kmovw %edi, %k1 17838; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17839; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17840; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17841; NoVLX-NEXT: kmovw %k0, %eax 17842; NoVLX-NEXT: vzeroupper 17843; NoVLX-NEXT: retq 17844entry: 17845 %0 = bitcast <2 x i64> %__a to <2 x i64> 17846 %1 = bitcast <2 x i64> %__b to <2 x i64> 17847 %2 = icmp ult <2 x i64> %0, %1 17848 %3 = bitcast i8 %__u to <8 x i1> 17849 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17850 %4 = and <2 x i1> %2, %extract.i 17851 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17852 %6 = bitcast <32 x i1> %5 to i32 17853 ret i32 %6 17854} 17855 17856define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17857; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: 17858; VLX: # %bb.0: # %entry 17859; VLX-NEXT: kmovd %edi, %k1 17860; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 17861; VLX-NEXT: kmovd %k0, %eax 17862; VLX-NEXT: retq 17863; 17864; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: 17865; NoVLX: # %bb.0: # %entry 17866; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17867; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 17868; NoVLX-NEXT: kmovw %edi, %k1 17869; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 17870; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17871; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17872; NoVLX-NEXT: kmovw %k0, %eax 17873; NoVLX-NEXT: vzeroupper 17874; NoVLX-NEXT: retq 17875entry: 17876 %0 = bitcast <2 x i64> %__a to <2 x i64> 17877 %load = load <2 x i64>, ptr %__b 17878 %1 = bitcast <2 x i64> %load to <2 x i64> 17879 %2 = icmp ult <2 x i64> %0, %1 17880 %3 = bitcast i8 %__u to <8 x i1> 17881 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17882 %4 = and <2 x i1> %2, %extract.i 17883 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17884 %6 = bitcast <32 x i1> %5 to i32 17885 ret i32 %6 17886} 17887 17888 17889define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17890; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: 17891; VLX: # %bb.0: # %entry 17892; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 17893; VLX-NEXT: kmovd %k0, %eax 17894; VLX-NEXT: retq 17895; 17896; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: 17897; NoVLX: # %bb.0: # %entry 17898; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17899; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 17900; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17901; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17902; NoVLX-NEXT: kmovw %k0, %eax 17903; NoVLX-NEXT: vzeroupper 17904; NoVLX-NEXT: retq 17905entry: 17906 %0 = bitcast <2 x i64> %__a to <2 x i64> 17907 %load = load i64, ptr %__b 17908 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17909 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17910 %2 = icmp ult <2 x i64> %0, %1 17911 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17912 %4 = bitcast <32 x i1> %3 to i32 17913 ret i32 %4 17914} 17915 17916define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 17917; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: 17918; VLX: # %bb.0: # %entry 17919; VLX-NEXT: kmovd %edi, %k1 17920; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 17921; VLX-NEXT: kmovd %k0, %eax 17922; VLX-NEXT: retq 17923; 17924; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: 17925; NoVLX: # %bb.0: # %entry 17926; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17927; NoVLX-NEXT: kmovw %edi, %k1 17928; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 17929; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17930; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17931; NoVLX-NEXT: kmovw %k0, %eax 17932; NoVLX-NEXT: vzeroupper 17933; NoVLX-NEXT: retq 17934entry: 17935 %0 = bitcast <2 x i64> %__a to <2 x i64> 17936 %load = load i64, ptr %__b 17937 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 17938 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 17939 %2 = icmp ult <2 x i64> %0, %1 17940 %3 = bitcast i8 %__u to <8 x i1> 17941 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 17942 %4 = and <2 x i1> %extract.i, %2 17943 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17944 %6 = bitcast <32 x i1> %5 to i32 17945 ret i32 %6 17946} 17947 17948 17949define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 17950; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: 17951; VLX: # %bb.0: # %entry 17952; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 17953; VLX-NEXT: kmovq %k0, %rax 17954; VLX-NEXT: retq 17955; 17956; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: 17957; NoVLX: # %bb.0: # %entry 17958; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 17959; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17960; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17961; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17962; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17963; NoVLX-NEXT: kmovw %k0, %eax 17964; NoVLX-NEXT: vzeroupper 17965; NoVLX-NEXT: retq 17966entry: 17967 %0 = bitcast <2 x i64> %__a to <2 x i64> 17968 %1 = bitcast <2 x i64> %__b to <2 x i64> 17969 %2 = icmp ult <2 x i64> %0, %1 17970 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17971 %4 = bitcast <64 x i1> %3 to i64 17972 ret i64 %4 17973} 17974 17975define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 17976; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: 17977; VLX: # %bb.0: # %entry 17978; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 17979; VLX-NEXT: kmovq %k0, %rax 17980; VLX-NEXT: retq 17981; 17982; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: 17983; NoVLX: # %bb.0: # %entry 17984; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 17985; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 17986; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 17987; NoVLX-NEXT: kshiftlw $14, %k0, %k0 17988; NoVLX-NEXT: kshiftrw $14, %k0, %k0 17989; NoVLX-NEXT: kmovw %k0, %eax 17990; NoVLX-NEXT: vzeroupper 17991; NoVLX-NEXT: retq 17992entry: 17993 %0 = bitcast <2 x i64> %__a to <2 x i64> 17994 %load = load <2 x i64>, ptr %__b 17995 %1 = bitcast <2 x i64> %load to <2 x i64> 17996 %2 = icmp ult <2 x i64> %0, %1 17997 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 17998 %4 = bitcast <64 x i1> %3 to i64 17999 ret i64 %4 18000} 18001 18002define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 18003; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: 18004; VLX: # %bb.0: # %entry 18005; VLX-NEXT: kmovd %edi, %k1 18006; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} 18007; VLX-NEXT: kmovq %k0, %rax 18008; VLX-NEXT: retq 18009; 18010; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: 18011; NoVLX: # %bb.0: # %entry 18012; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 18013; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18014; NoVLX-NEXT: kmovw %edi, %k1 18015; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18016; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18017; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18018; NoVLX-NEXT: kmovw %k0, %eax 18019; NoVLX-NEXT: vzeroupper 18020; NoVLX-NEXT: retq 18021entry: 18022 %0 = bitcast <2 x i64> %__a to <2 x i64> 18023 %1 = bitcast <2 x i64> %__b to <2 x i64> 18024 %2 = icmp ult <2 x i64> %0, %1 18025 %3 = bitcast i8 %__u to <8 x i1> 18026 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18027 %4 = and <2 x i1> %2, %extract.i 18028 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18029 %6 = bitcast <64 x i1> %5 to i64 18030 ret i64 %6 18031} 18032 18033define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 18034; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: 18035; VLX: # %bb.0: # %entry 18036; VLX-NEXT: kmovd %edi, %k1 18037; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} 18038; VLX-NEXT: kmovq %k0, %rax 18039; VLX-NEXT: retq 18040; 18041; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: 18042; NoVLX: # %bb.0: # %entry 18043; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18044; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 18045; NoVLX-NEXT: kmovw %edi, %k1 18046; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18047; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18048; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18049; NoVLX-NEXT: kmovw %k0, %eax 18050; NoVLX-NEXT: vzeroupper 18051; NoVLX-NEXT: retq 18052entry: 18053 %0 = bitcast <2 x i64> %__a to <2 x i64> 18054 %load = load <2 x i64>, ptr %__b 18055 %1 = bitcast <2 x i64> %load to <2 x i64> 18056 %2 = icmp ult <2 x i64> %0, %1 18057 %3 = bitcast i8 %__u to <8 x i1> 18058 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18059 %4 = and <2 x i1> %2, %extract.i 18060 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18061 %6 = bitcast <64 x i1> %5 to i64 18062 ret i64 %6 18063} 18064 18065 18066define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 18067; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: 18068; VLX: # %bb.0: # %entry 18069; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 18070; VLX-NEXT: kmovq %k0, %rax 18071; VLX-NEXT: retq 18072; 18073; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: 18074; NoVLX: # %bb.0: # %entry 18075; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18076; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18077; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18078; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18079; NoVLX-NEXT: kmovw %k0, %eax 18080; NoVLX-NEXT: vzeroupper 18081; NoVLX-NEXT: retq 18082entry: 18083 %0 = bitcast <2 x i64> %__a to <2 x i64> 18084 %load = load i64, ptr %__b 18085 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18086 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18087 %2 = icmp ult <2 x i64> %0, %1 18088 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18089 %4 = bitcast <64 x i1> %3 to i64 18090 ret i64 %4 18091} 18092 18093define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 18094; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: 18095; VLX: # %bb.0: # %entry 18096; VLX-NEXT: kmovd %edi, %k1 18097; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} 18098; VLX-NEXT: kmovq %k0, %rax 18099; VLX-NEXT: retq 18100; 18101; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: 18102; NoVLX: # %bb.0: # %entry 18103; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 18104; NoVLX-NEXT: kmovw %edi, %k1 18105; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 18106; NoVLX-NEXT: kshiftlw $14, %k0, %k0 18107; NoVLX-NEXT: kshiftrw $14, %k0, %k0 18108; NoVLX-NEXT: kmovw %k0, %eax 18109; NoVLX-NEXT: vzeroupper 18110; NoVLX-NEXT: retq 18111entry: 18112 %0 = bitcast <2 x i64> %__a to <2 x i64> 18113 %load = load i64, ptr %__b 18114 %vec = insertelement <2 x i64> undef, i64 %load, i32 0 18115 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0> 18116 %2 = icmp ult <2 x i64> %0, %1 18117 %3 = bitcast i8 %__u to <8 x i1> 18118 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 18119 %4 = and <2 x i1> %extract.i, %2 18120 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 18121 %6 = bitcast <64 x i1> %5 to i64 18122 ret i64 %6 18123} 18124 18125 18126define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18127; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: 18128; VLX: # %bb.0: # %entry 18129; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18130; VLX-NEXT: kmovd %k0, %eax 18131; VLX-NEXT: # kill: def $al killed $al killed $eax 18132; VLX-NEXT: vzeroupper 18133; VLX-NEXT: retq 18134; 18135; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: 18136; NoVLX: # %bb.0: # %entry 18137; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18138; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18139; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18140; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18141; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18142; NoVLX-NEXT: kmovw %k0, %eax 18143; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18144; NoVLX-NEXT: vzeroupper 18145; NoVLX-NEXT: retq 18146entry: 18147 %0 = bitcast <4 x i64> %__a to <4 x i64> 18148 %1 = bitcast <4 x i64> %__b to <4 x i64> 18149 %2 = icmp ult <4 x i64> %0, %1 18150 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18151 %4 = bitcast <8 x i1> %3 to i8 18152 ret i8 %4 18153} 18154 18155define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18156; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: 18157; VLX: # %bb.0: # %entry 18158; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18159; VLX-NEXT: kmovd %k0, %eax 18160; VLX-NEXT: # kill: def $al killed $al killed $eax 18161; VLX-NEXT: vzeroupper 18162; VLX-NEXT: retq 18163; 18164; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: 18165; NoVLX: # %bb.0: # %entry 18166; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18167; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18168; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18169; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18170; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18171; NoVLX-NEXT: kmovw %k0, %eax 18172; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18173; NoVLX-NEXT: vzeroupper 18174; NoVLX-NEXT: retq 18175entry: 18176 %0 = bitcast <4 x i64> %__a to <4 x i64> 18177 %load = load <4 x i64>, ptr %__b 18178 %1 = bitcast <4 x i64> %load to <4 x i64> 18179 %2 = icmp ult <4 x i64> %0, %1 18180 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18181 %4 = bitcast <8 x i1> %3 to i8 18182 ret i8 %4 18183} 18184 18185define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18186; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: 18187; VLX: # %bb.0: # %entry 18188; VLX-NEXT: kmovd %edi, %k1 18189; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18190; VLX-NEXT: kmovd %k0, %eax 18191; VLX-NEXT: # kill: def $al killed $al killed $eax 18192; VLX-NEXT: vzeroupper 18193; VLX-NEXT: retq 18194; 18195; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: 18196; NoVLX: # %bb.0: # %entry 18197; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18198; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18199; NoVLX-NEXT: kmovw %edi, %k1 18200; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18201; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18202; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18203; NoVLX-NEXT: kmovw %k0, %eax 18204; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18205; NoVLX-NEXT: vzeroupper 18206; NoVLX-NEXT: retq 18207entry: 18208 %0 = bitcast <4 x i64> %__a to <4 x i64> 18209 %1 = bitcast <4 x i64> %__b to <4 x i64> 18210 %2 = icmp ult <4 x i64> %0, %1 18211 %3 = bitcast i8 %__u to <8 x i1> 18212 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18213 %4 = and <4 x i1> %2, %extract.i 18214 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18215 %6 = bitcast <8 x i1> %5 to i8 18216 ret i8 %6 18217} 18218 18219define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18220; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: 18221; VLX: # %bb.0: # %entry 18222; VLX-NEXT: kmovd %edi, %k1 18223; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18224; VLX-NEXT: kmovd %k0, %eax 18225; VLX-NEXT: # kill: def $al killed $al killed $eax 18226; VLX-NEXT: vzeroupper 18227; VLX-NEXT: retq 18228; 18229; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: 18230; NoVLX: # %bb.0: # %entry 18231; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18232; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18233; NoVLX-NEXT: kmovw %edi, %k1 18234; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18235; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18236; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18237; NoVLX-NEXT: kmovw %k0, %eax 18238; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18239; NoVLX-NEXT: vzeroupper 18240; NoVLX-NEXT: retq 18241entry: 18242 %0 = bitcast <4 x i64> %__a to <4 x i64> 18243 %load = load <4 x i64>, ptr %__b 18244 %1 = bitcast <4 x i64> %load to <4 x i64> 18245 %2 = icmp ult <4 x i64> %0, %1 18246 %3 = bitcast i8 %__u to <8 x i1> 18247 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18248 %4 = and <4 x i1> %2, %extract.i 18249 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18250 %6 = bitcast <8 x i1> %5 to i8 18251 ret i8 %6 18252} 18253 18254 18255define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18256; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: 18257; VLX: # %bb.0: # %entry 18258; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18259; VLX-NEXT: kmovd %k0, %eax 18260; VLX-NEXT: # kill: def $al killed $al killed $eax 18261; VLX-NEXT: vzeroupper 18262; VLX-NEXT: retq 18263; 18264; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: 18265; NoVLX: # %bb.0: # %entry 18266; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18267; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18268; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18269; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18270; NoVLX-NEXT: kmovw %k0, %eax 18271; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18272; NoVLX-NEXT: vzeroupper 18273; NoVLX-NEXT: retq 18274entry: 18275 %0 = bitcast <4 x i64> %__a to <4 x i64> 18276 %load = load i64, ptr %__b 18277 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18278 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18279 %2 = icmp ult <4 x i64> %0, %1 18280 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18281 %4 = bitcast <8 x i1> %3 to i8 18282 ret i8 %4 18283} 18284 18285define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18286; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: 18287; VLX: # %bb.0: # %entry 18288; VLX-NEXT: kmovd %edi, %k1 18289; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18290; VLX-NEXT: kmovd %k0, %eax 18291; VLX-NEXT: # kill: def $al killed $al killed $eax 18292; VLX-NEXT: vzeroupper 18293; VLX-NEXT: retq 18294; 18295; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: 18296; NoVLX: # %bb.0: # %entry 18297; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18298; NoVLX-NEXT: kmovw %edi, %k1 18299; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 18300; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18301; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18302; NoVLX-NEXT: kmovw %k0, %eax 18303; NoVLX-NEXT: # kill: def $al killed $al killed $eax 18304; NoVLX-NEXT: vzeroupper 18305; NoVLX-NEXT: retq 18306entry: 18307 %0 = bitcast <4 x i64> %__a to <4 x i64> 18308 %load = load i64, ptr %__b 18309 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18310 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18311 %2 = icmp ult <4 x i64> %0, %1 18312 %3 = bitcast i8 %__u to <8 x i1> 18313 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18314 %4 = and <4 x i1> %extract.i, %2 18315 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 18316 %6 = bitcast <8 x i1> %5 to i8 18317 ret i8 %6 18318} 18319 18320 18321define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18322; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: 18323; VLX: # %bb.0: # %entry 18324; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18325; VLX-NEXT: kmovd %k0, %eax 18326; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18327; VLX-NEXT: vzeroupper 18328; VLX-NEXT: retq 18329; 18330; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: 18331; NoVLX: # %bb.0: # %entry 18332; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18333; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18334; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18335; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18336; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18337; NoVLX-NEXT: kmovw %k0, %eax 18338; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18339; NoVLX-NEXT: vzeroupper 18340; NoVLX-NEXT: retq 18341entry: 18342 %0 = bitcast <4 x i64> %__a to <4 x i64> 18343 %1 = bitcast <4 x i64> %__b to <4 x i64> 18344 %2 = icmp ult <4 x i64> %0, %1 18345 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18346 %4 = bitcast <16 x i1> %3 to i16 18347 ret i16 %4 18348} 18349 18350define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18351; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: 18352; VLX: # %bb.0: # %entry 18353; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18354; VLX-NEXT: kmovd %k0, %eax 18355; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18356; VLX-NEXT: vzeroupper 18357; VLX-NEXT: retq 18358; 18359; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: 18360; NoVLX: # %bb.0: # %entry 18361; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18362; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18363; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18364; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18365; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18366; NoVLX-NEXT: kmovw %k0, %eax 18367; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18368; NoVLX-NEXT: vzeroupper 18369; NoVLX-NEXT: retq 18370entry: 18371 %0 = bitcast <4 x i64> %__a to <4 x i64> 18372 %load = load <4 x i64>, ptr %__b 18373 %1 = bitcast <4 x i64> %load to <4 x i64> 18374 %2 = icmp ult <4 x i64> %0, %1 18375 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18376 %4 = bitcast <16 x i1> %3 to i16 18377 ret i16 %4 18378} 18379 18380define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18381; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: 18382; VLX: # %bb.0: # %entry 18383; VLX-NEXT: kmovd %edi, %k1 18384; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18385; VLX-NEXT: kmovd %k0, %eax 18386; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18387; VLX-NEXT: vzeroupper 18388; VLX-NEXT: retq 18389; 18390; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: 18391; NoVLX: # %bb.0: # %entry 18392; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18393; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18394; NoVLX-NEXT: kmovw %edi, %k1 18395; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18396; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18397; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18398; NoVLX-NEXT: kmovw %k0, %eax 18399; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18400; NoVLX-NEXT: vzeroupper 18401; NoVLX-NEXT: retq 18402entry: 18403 %0 = bitcast <4 x i64> %__a to <4 x i64> 18404 %1 = bitcast <4 x i64> %__b to <4 x i64> 18405 %2 = icmp ult <4 x i64> %0, %1 18406 %3 = bitcast i8 %__u to <8 x i1> 18407 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18408 %4 = and <4 x i1> %2, %extract.i 18409 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18410 %6 = bitcast <16 x i1> %5 to i16 18411 ret i16 %6 18412} 18413 18414define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18415; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: 18416; VLX: # %bb.0: # %entry 18417; VLX-NEXT: kmovd %edi, %k1 18418; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18419; VLX-NEXT: kmovd %k0, %eax 18420; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18421; VLX-NEXT: vzeroupper 18422; VLX-NEXT: retq 18423; 18424; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: 18425; NoVLX: # %bb.0: # %entry 18426; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18427; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18428; NoVLX-NEXT: kmovw %edi, %k1 18429; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18430; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18431; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18432; NoVLX-NEXT: kmovw %k0, %eax 18433; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18434; NoVLX-NEXT: vzeroupper 18435; NoVLX-NEXT: retq 18436entry: 18437 %0 = bitcast <4 x i64> %__a to <4 x i64> 18438 %load = load <4 x i64>, ptr %__b 18439 %1 = bitcast <4 x i64> %load to <4 x i64> 18440 %2 = icmp ult <4 x i64> %0, %1 18441 %3 = bitcast i8 %__u to <8 x i1> 18442 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18443 %4 = and <4 x i1> %2, %extract.i 18444 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18445 %6 = bitcast <16 x i1> %5 to i16 18446 ret i16 %6 18447} 18448 18449 18450define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18451; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: 18452; VLX: # %bb.0: # %entry 18453; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18454; VLX-NEXT: kmovd %k0, %eax 18455; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18456; VLX-NEXT: vzeroupper 18457; VLX-NEXT: retq 18458; 18459; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: 18460; NoVLX: # %bb.0: # %entry 18461; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18462; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18463; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18464; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18465; NoVLX-NEXT: kmovw %k0, %eax 18466; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18467; NoVLX-NEXT: vzeroupper 18468; NoVLX-NEXT: retq 18469entry: 18470 %0 = bitcast <4 x i64> %__a to <4 x i64> 18471 %load = load i64, ptr %__b 18472 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18473 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18474 %2 = icmp ult <4 x i64> %0, %1 18475 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18476 %4 = bitcast <16 x i1> %3 to i16 18477 ret i16 %4 18478} 18479 18480define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18481; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: 18482; VLX: # %bb.0: # %entry 18483; VLX-NEXT: kmovd %edi, %k1 18484; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18485; VLX-NEXT: kmovd %k0, %eax 18486; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18487; VLX-NEXT: vzeroupper 18488; VLX-NEXT: retq 18489; 18490; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: 18491; NoVLX: # %bb.0: # %entry 18492; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18493; NoVLX-NEXT: kmovw %edi, %k1 18494; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 18495; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18496; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18497; NoVLX-NEXT: kmovw %k0, %eax 18498; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18499; NoVLX-NEXT: vzeroupper 18500; NoVLX-NEXT: retq 18501entry: 18502 %0 = bitcast <4 x i64> %__a to <4 x i64> 18503 %load = load i64, ptr %__b 18504 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18505 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18506 %2 = icmp ult <4 x i64> %0, %1 18507 %3 = bitcast i8 %__u to <8 x i1> 18508 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18509 %4 = and <4 x i1> %extract.i, %2 18510 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18511 %6 = bitcast <16 x i1> %5 to i16 18512 ret i16 %6 18513} 18514 18515 18516define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18517; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: 18518; VLX: # %bb.0: # %entry 18519; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18520; VLX-NEXT: kmovd %k0, %eax 18521; VLX-NEXT: vzeroupper 18522; VLX-NEXT: retq 18523; 18524; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: 18525; NoVLX: # %bb.0: # %entry 18526; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18527; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18528; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18529; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18530; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18531; NoVLX-NEXT: kmovw %k0, %eax 18532; NoVLX-NEXT: vzeroupper 18533; NoVLX-NEXT: retq 18534entry: 18535 %0 = bitcast <4 x i64> %__a to <4 x i64> 18536 %1 = bitcast <4 x i64> %__b to <4 x i64> 18537 %2 = icmp ult <4 x i64> %0, %1 18538 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18539 %4 = bitcast <32 x i1> %3 to i32 18540 ret i32 %4 18541} 18542 18543define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18544; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: 18545; VLX: # %bb.0: # %entry 18546; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18547; VLX-NEXT: kmovd %k0, %eax 18548; VLX-NEXT: vzeroupper 18549; VLX-NEXT: retq 18550; 18551; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: 18552; NoVLX: # %bb.0: # %entry 18553; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18554; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18555; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18556; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18557; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18558; NoVLX-NEXT: kmovw %k0, %eax 18559; NoVLX-NEXT: vzeroupper 18560; NoVLX-NEXT: retq 18561entry: 18562 %0 = bitcast <4 x i64> %__a to <4 x i64> 18563 %load = load <4 x i64>, ptr %__b 18564 %1 = bitcast <4 x i64> %load to <4 x i64> 18565 %2 = icmp ult <4 x i64> %0, %1 18566 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18567 %4 = bitcast <32 x i1> %3 to i32 18568 ret i32 %4 18569} 18570 18571define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18572; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: 18573; VLX: # %bb.0: # %entry 18574; VLX-NEXT: kmovd %edi, %k1 18575; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18576; VLX-NEXT: kmovd %k0, %eax 18577; VLX-NEXT: vzeroupper 18578; VLX-NEXT: retq 18579; 18580; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: 18581; NoVLX: # %bb.0: # %entry 18582; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18583; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18584; NoVLX-NEXT: kmovw %edi, %k1 18585; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18586; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18587; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18588; NoVLX-NEXT: kmovw %k0, %eax 18589; NoVLX-NEXT: vzeroupper 18590; NoVLX-NEXT: retq 18591entry: 18592 %0 = bitcast <4 x i64> %__a to <4 x i64> 18593 %1 = bitcast <4 x i64> %__b to <4 x i64> 18594 %2 = icmp ult <4 x i64> %0, %1 18595 %3 = bitcast i8 %__u to <8 x i1> 18596 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18597 %4 = and <4 x i1> %2, %extract.i 18598 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18599 %6 = bitcast <32 x i1> %5 to i32 18600 ret i32 %6 18601} 18602 18603define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18604; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: 18605; VLX: # %bb.0: # %entry 18606; VLX-NEXT: kmovd %edi, %k1 18607; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18608; VLX-NEXT: kmovd %k0, %eax 18609; VLX-NEXT: vzeroupper 18610; VLX-NEXT: retq 18611; 18612; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: 18613; NoVLX: # %bb.0: # %entry 18614; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18615; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18616; NoVLX-NEXT: kmovw %edi, %k1 18617; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18618; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18619; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18620; NoVLX-NEXT: kmovw %k0, %eax 18621; NoVLX-NEXT: vzeroupper 18622; NoVLX-NEXT: retq 18623entry: 18624 %0 = bitcast <4 x i64> %__a to <4 x i64> 18625 %load = load <4 x i64>, ptr %__b 18626 %1 = bitcast <4 x i64> %load to <4 x i64> 18627 %2 = icmp ult <4 x i64> %0, %1 18628 %3 = bitcast i8 %__u to <8 x i1> 18629 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18630 %4 = and <4 x i1> %2, %extract.i 18631 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18632 %6 = bitcast <32 x i1> %5 to i32 18633 ret i32 %6 18634} 18635 18636 18637define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18638; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: 18639; VLX: # %bb.0: # %entry 18640; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18641; VLX-NEXT: kmovd %k0, %eax 18642; VLX-NEXT: vzeroupper 18643; VLX-NEXT: retq 18644; 18645; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: 18646; NoVLX: # %bb.0: # %entry 18647; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18648; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18649; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18650; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18651; NoVLX-NEXT: kmovw %k0, %eax 18652; NoVLX-NEXT: vzeroupper 18653; NoVLX-NEXT: retq 18654entry: 18655 %0 = bitcast <4 x i64> %__a to <4 x i64> 18656 %load = load i64, ptr %__b 18657 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18658 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18659 %2 = icmp ult <4 x i64> %0, %1 18660 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18661 %4 = bitcast <32 x i1> %3 to i32 18662 ret i32 %4 18663} 18664 18665define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18666; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: 18667; VLX: # %bb.0: # %entry 18668; VLX-NEXT: kmovd %edi, %k1 18669; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18670; VLX-NEXT: kmovd %k0, %eax 18671; VLX-NEXT: vzeroupper 18672; VLX-NEXT: retq 18673; 18674; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: 18675; NoVLX: # %bb.0: # %entry 18676; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18677; NoVLX-NEXT: kmovw %edi, %k1 18678; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 18679; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18680; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18681; NoVLX-NEXT: kmovw %k0, %eax 18682; NoVLX-NEXT: vzeroupper 18683; NoVLX-NEXT: retq 18684entry: 18685 %0 = bitcast <4 x i64> %__a to <4 x i64> 18686 %load = load i64, ptr %__b 18687 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18688 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18689 %2 = icmp ult <4 x i64> %0, %1 18690 %3 = bitcast i8 %__u to <8 x i1> 18691 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18692 %4 = and <4 x i1> %extract.i, %2 18693 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18694 %6 = bitcast <32 x i1> %5 to i32 18695 ret i32 %6 18696} 18697 18698 18699define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18700; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: 18701; VLX: # %bb.0: # %entry 18702; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 18703; VLX-NEXT: kmovq %k0, %rax 18704; VLX-NEXT: vzeroupper 18705; VLX-NEXT: retq 18706; 18707; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: 18708; NoVLX: # %bb.0: # %entry 18709; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18710; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18711; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18712; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18713; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18714; NoVLX-NEXT: kmovw %k0, %eax 18715; NoVLX-NEXT: vzeroupper 18716; NoVLX-NEXT: retq 18717entry: 18718 %0 = bitcast <4 x i64> %__a to <4 x i64> 18719 %1 = bitcast <4 x i64> %__b to <4 x i64> 18720 %2 = icmp ult <4 x i64> %0, %1 18721 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18722 %4 = bitcast <64 x i1> %3 to i64 18723 ret i64 %4 18724} 18725 18726define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18727; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: 18728; VLX: # %bb.0: # %entry 18729; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 18730; VLX-NEXT: kmovq %k0, %rax 18731; VLX-NEXT: vzeroupper 18732; VLX-NEXT: retq 18733; 18734; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: 18735; NoVLX: # %bb.0: # %entry 18736; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18737; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 18738; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18739; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18740; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18741; NoVLX-NEXT: kmovw %k0, %eax 18742; NoVLX-NEXT: vzeroupper 18743; NoVLX-NEXT: retq 18744entry: 18745 %0 = bitcast <4 x i64> %__a to <4 x i64> 18746 %load = load <4 x i64>, ptr %__b 18747 %1 = bitcast <4 x i64> %load to <4 x i64> 18748 %2 = icmp ult <4 x i64> %0, %1 18749 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18750 %4 = bitcast <64 x i1> %3 to i64 18751 ret i64 %4 18752} 18753 18754define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 18755; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: 18756; VLX: # %bb.0: # %entry 18757; VLX-NEXT: kmovd %edi, %k1 18758; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} 18759; VLX-NEXT: kmovq %k0, %rax 18760; VLX-NEXT: vzeroupper 18761; VLX-NEXT: retq 18762; 18763; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: 18764; NoVLX: # %bb.0: # %entry 18765; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 18766; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18767; NoVLX-NEXT: kmovw %edi, %k1 18768; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18769; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18770; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18771; NoVLX-NEXT: kmovw %k0, %eax 18772; NoVLX-NEXT: vzeroupper 18773; NoVLX-NEXT: retq 18774entry: 18775 %0 = bitcast <4 x i64> %__a to <4 x i64> 18776 %1 = bitcast <4 x i64> %__b to <4 x i64> 18777 %2 = icmp ult <4 x i64> %0, %1 18778 %3 = bitcast i8 %__u to <8 x i1> 18779 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18780 %4 = and <4 x i1> %2, %extract.i 18781 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18782 %6 = bitcast <64 x i1> %5 to i64 18783 ret i64 %6 18784} 18785 18786define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18787; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: 18788; VLX: # %bb.0: # %entry 18789; VLX-NEXT: kmovd %edi, %k1 18790; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} 18791; VLX-NEXT: kmovq %k0, %rax 18792; VLX-NEXT: vzeroupper 18793; VLX-NEXT: retq 18794; 18795; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: 18796; NoVLX: # %bb.0: # %entry 18797; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18798; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 18799; NoVLX-NEXT: kmovw %edi, %k1 18800; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18801; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18802; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18803; NoVLX-NEXT: kmovw %k0, %eax 18804; NoVLX-NEXT: vzeroupper 18805; NoVLX-NEXT: retq 18806entry: 18807 %0 = bitcast <4 x i64> %__a to <4 x i64> 18808 %load = load <4 x i64>, ptr %__b 18809 %1 = bitcast <4 x i64> %load to <4 x i64> 18810 %2 = icmp ult <4 x i64> %0, %1 18811 %3 = bitcast i8 %__u to <8 x i1> 18812 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18813 %4 = and <4 x i1> %2, %extract.i 18814 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18815 %6 = bitcast <64 x i1> %5 to i64 18816 ret i64 %6 18817} 18818 18819 18820define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 18821; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: 18822; VLX: # %bb.0: # %entry 18823; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 18824; VLX-NEXT: kmovq %k0, %rax 18825; VLX-NEXT: vzeroupper 18826; VLX-NEXT: retq 18827; 18828; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: 18829; NoVLX: # %bb.0: # %entry 18830; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18831; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18832; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18833; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18834; NoVLX-NEXT: kmovw %k0, %eax 18835; NoVLX-NEXT: vzeroupper 18836; NoVLX-NEXT: retq 18837entry: 18838 %0 = bitcast <4 x i64> %__a to <4 x i64> 18839 %load = load i64, ptr %__b 18840 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18841 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18842 %2 = icmp ult <4 x i64> %0, %1 18843 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18844 %4 = bitcast <64 x i1> %3 to i64 18845 ret i64 %4 18846} 18847 18848define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 18849; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: 18850; VLX: # %bb.0: # %entry 18851; VLX-NEXT: kmovd %edi, %k1 18852; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} 18853; VLX-NEXT: kmovq %k0, %rax 18854; VLX-NEXT: vzeroupper 18855; VLX-NEXT: retq 18856; 18857; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: 18858; NoVLX: # %bb.0: # %entry 18859; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 18860; NoVLX-NEXT: kmovw %edi, %k1 18861; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 18862; NoVLX-NEXT: kshiftlw $12, %k0, %k0 18863; NoVLX-NEXT: kshiftrw $12, %k0, %k0 18864; NoVLX-NEXT: kmovw %k0, %eax 18865; NoVLX-NEXT: vzeroupper 18866; NoVLX-NEXT: retq 18867entry: 18868 %0 = bitcast <4 x i64> %__a to <4 x i64> 18869 %load = load i64, ptr %__b 18870 %vec = insertelement <4 x i64> undef, i64 %load, i32 0 18871 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18872 %2 = icmp ult <4 x i64> %0, %1 18873 %3 = bitcast i8 %__u to <8 x i1> 18874 %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 18875 %4 = and <4 x i1> %extract.i, %2 18876 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 18877 %6 = bitcast <64 x i1> %5 to i64 18878 ret i64 %6 18879} 18880 18881 18882define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 18883; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: 18884; VLX: # %bb.0: # %entry 18885; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18886; VLX-NEXT: kmovd %k0, %eax 18887; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18888; VLX-NEXT: vzeroupper 18889; VLX-NEXT: retq 18890; 18891; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: 18892; NoVLX: # %bb.0: # %entry 18893; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 18894; NoVLX-NEXT: kmovw %k0, %eax 18895; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18896; NoVLX-NEXT: vzeroupper 18897; NoVLX-NEXT: retq 18898entry: 18899 %0 = bitcast <8 x i64> %__a to <8 x i64> 18900 %1 = bitcast <8 x i64> %__b to <8 x i64> 18901 %2 = icmp ult <8 x i64> %0, %1 18902 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 18903 %4 = bitcast <16 x i1> %3 to i16 18904 ret i16 %4 18905} 18906 18907define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 18908; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: 18909; VLX: # %bb.0: # %entry 18910; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 18911; VLX-NEXT: kmovd %k0, %eax 18912; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18913; VLX-NEXT: vzeroupper 18914; VLX-NEXT: retq 18915; 18916; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: 18917; NoVLX: # %bb.0: # %entry 18918; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 18919; NoVLX-NEXT: kmovw %k0, %eax 18920; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18921; NoVLX-NEXT: vzeroupper 18922; NoVLX-NEXT: retq 18923entry: 18924 %0 = bitcast <8 x i64> %__a to <8 x i64> 18925 %load = load <8 x i64>, ptr %__b 18926 %1 = bitcast <8 x i64> %load to <8 x i64> 18927 %2 = icmp ult <8 x i64> %0, %1 18928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 18929 %4 = bitcast <16 x i1> %3 to i16 18930 ret i16 %4 18931} 18932 18933define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 18934; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: 18935; VLX: # %bb.0: # %entry 18936; VLX-NEXT: kmovd %edi, %k1 18937; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18938; VLX-NEXT: kmovd %k0, %eax 18939; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18940; VLX-NEXT: vzeroupper 18941; VLX-NEXT: retq 18942; 18943; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: 18944; NoVLX: # %bb.0: # %entry 18945; NoVLX-NEXT: kmovw %edi, %k1 18946; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 18947; NoVLX-NEXT: kmovw %k0, %eax 18948; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18949; NoVLX-NEXT: vzeroupper 18950; NoVLX-NEXT: retq 18951entry: 18952 %0 = bitcast <8 x i64> %__a to <8 x i64> 18953 %1 = bitcast <8 x i64> %__b to <8 x i64> 18954 %2 = icmp ult <8 x i64> %0, %1 18955 %3 = bitcast i8 %__u to <8 x i1> 18956 %4 = and <8 x i1> %2, %3 18957 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 18958 %6 = bitcast <16 x i1> %5 to i16 18959 ret i16 %6 18960} 18961 18962define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 18963; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: 18964; VLX: # %bb.0: # %entry 18965; VLX-NEXT: kmovd %edi, %k1 18966; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 18967; VLX-NEXT: kmovd %k0, %eax 18968; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18969; VLX-NEXT: vzeroupper 18970; VLX-NEXT: retq 18971; 18972; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: 18973; NoVLX: # %bb.0: # %entry 18974; NoVLX-NEXT: kmovw %edi, %k1 18975; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 18976; NoVLX-NEXT: kmovw %k0, %eax 18977; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 18978; NoVLX-NEXT: vzeroupper 18979; NoVLX-NEXT: retq 18980entry: 18981 %0 = bitcast <8 x i64> %__a to <8 x i64> 18982 %load = load <8 x i64>, ptr %__b 18983 %1 = bitcast <8 x i64> %load to <8 x i64> 18984 %2 = icmp ult <8 x i64> %0, %1 18985 %3 = bitcast i8 %__u to <8 x i1> 18986 %4 = and <8 x i1> %2, %3 18987 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 18988 %6 = bitcast <16 x i1> %5 to i16 18989 ret i16 %6 18990} 18991 18992 18993define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 18994; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: 18995; VLX: # %bb.0: # %entry 18996; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 18997; VLX-NEXT: kmovd %k0, %eax 18998; VLX-NEXT: # kill: def $ax killed $ax killed $eax 18999; VLX-NEXT: vzeroupper 19000; VLX-NEXT: retq 19001; 19002; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: 19003; NoVLX: # %bb.0: # %entry 19004; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19005; NoVLX-NEXT: kmovw %k0, %eax 19006; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19007; NoVLX-NEXT: vzeroupper 19008; NoVLX-NEXT: retq 19009entry: 19010 %0 = bitcast <8 x i64> %__a to <8 x i64> 19011 %load = load i64, ptr %__b 19012 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19013 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19014 %2 = icmp ult <8 x i64> %0, %1 19015 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19016 %4 = bitcast <16 x i1> %3 to i16 19017 ret i16 %4 19018} 19019 19020define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 19021; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: 19022; VLX: # %bb.0: # %entry 19023; VLX-NEXT: kmovd %edi, %k1 19024; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19025; VLX-NEXT: kmovd %k0, %eax 19026; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19027; VLX-NEXT: vzeroupper 19028; VLX-NEXT: retq 19029; 19030; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: 19031; NoVLX: # %bb.0: # %entry 19032; NoVLX-NEXT: kmovw %edi, %k1 19033; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19034; NoVLX-NEXT: kmovw %k0, %eax 19035; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19036; NoVLX-NEXT: vzeroupper 19037; NoVLX-NEXT: retq 19038entry: 19039 %0 = bitcast <8 x i64> %__a to <8 x i64> 19040 %load = load i64, ptr %__b 19041 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19042 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19043 %2 = icmp ult <8 x i64> %0, %1 19044 %3 = bitcast i8 %__u to <8 x i1> 19045 %4 = and <8 x i1> %3, %2 19046 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19047 %6 = bitcast <16 x i1> %5 to i16 19048 ret i16 %6 19049} 19050 19051 19052define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19053; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: 19054; VLX: # %bb.0: # %entry 19055; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19056; VLX-NEXT: kmovd %k0, %eax 19057; VLX-NEXT: vzeroupper 19058; VLX-NEXT: retq 19059; 19060; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: 19061; NoVLX: # %bb.0: # %entry 19062; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19063; NoVLX-NEXT: kmovw %k0, %eax 19064; NoVLX-NEXT: vzeroupper 19065; NoVLX-NEXT: retq 19066entry: 19067 %0 = bitcast <8 x i64> %__a to <8 x i64> 19068 %1 = bitcast <8 x i64> %__b to <8 x i64> 19069 %2 = icmp ult <8 x i64> %0, %1 19070 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19071 %4 = bitcast <32 x i1> %3 to i32 19072 ret i32 %4 19073} 19074 19075define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 19076; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: 19077; VLX: # %bb.0: # %entry 19078; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19079; VLX-NEXT: kmovd %k0, %eax 19080; VLX-NEXT: vzeroupper 19081; VLX-NEXT: retq 19082; 19083; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: 19084; NoVLX: # %bb.0: # %entry 19085; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19086; NoVLX-NEXT: kmovw %k0, %eax 19087; NoVLX-NEXT: vzeroupper 19088; NoVLX-NEXT: retq 19089entry: 19090 %0 = bitcast <8 x i64> %__a to <8 x i64> 19091 %load = load <8 x i64>, ptr %__b 19092 %1 = bitcast <8 x i64> %load to <8 x i64> 19093 %2 = icmp ult <8 x i64> %0, %1 19094 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19095 %4 = bitcast <32 x i1> %3 to i32 19096 ret i32 %4 19097} 19098 19099define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19100; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: 19101; VLX: # %bb.0: # %entry 19102; VLX-NEXT: kmovd %edi, %k1 19103; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19104; VLX-NEXT: kmovd %k0, %eax 19105; VLX-NEXT: vzeroupper 19106; VLX-NEXT: retq 19107; 19108; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: 19109; NoVLX: # %bb.0: # %entry 19110; NoVLX-NEXT: kmovw %edi, %k1 19111; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19112; NoVLX-NEXT: kmovw %k0, %eax 19113; NoVLX-NEXT: vzeroupper 19114; NoVLX-NEXT: retq 19115entry: 19116 %0 = bitcast <8 x i64> %__a to <8 x i64> 19117 %1 = bitcast <8 x i64> %__b to <8 x i64> 19118 %2 = icmp ult <8 x i64> %0, %1 19119 %3 = bitcast i8 %__u to <8 x i1> 19120 %4 = and <8 x i1> %2, %3 19121 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19122 %6 = bitcast <32 x i1> %5 to i32 19123 ret i32 %6 19124} 19125 19126define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 19127; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: 19128; VLX: # %bb.0: # %entry 19129; VLX-NEXT: kmovd %edi, %k1 19130; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19131; VLX-NEXT: kmovd %k0, %eax 19132; VLX-NEXT: vzeroupper 19133; VLX-NEXT: retq 19134; 19135; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: 19136; NoVLX: # %bb.0: # %entry 19137; NoVLX-NEXT: kmovw %edi, %k1 19138; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19139; NoVLX-NEXT: kmovw %k0, %eax 19140; NoVLX-NEXT: vzeroupper 19141; NoVLX-NEXT: retq 19142entry: 19143 %0 = bitcast <8 x i64> %__a to <8 x i64> 19144 %load = load <8 x i64>, ptr %__b 19145 %1 = bitcast <8 x i64> %load to <8 x i64> 19146 %2 = icmp ult <8 x i64> %0, %1 19147 %3 = bitcast i8 %__u to <8 x i1> 19148 %4 = and <8 x i1> %2, %3 19149 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19150 %6 = bitcast <32 x i1> %5 to i32 19151 ret i32 %6 19152} 19153 19154 19155define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 19156; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: 19157; VLX: # %bb.0: # %entry 19158; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19159; VLX-NEXT: kmovd %k0, %eax 19160; VLX-NEXT: vzeroupper 19161; VLX-NEXT: retq 19162; 19163; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: 19164; NoVLX: # %bb.0: # %entry 19165; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19166; NoVLX-NEXT: kmovw %k0, %eax 19167; NoVLX-NEXT: vzeroupper 19168; NoVLX-NEXT: retq 19169entry: 19170 %0 = bitcast <8 x i64> %__a to <8 x i64> 19171 %load = load i64, ptr %__b 19172 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19173 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19174 %2 = icmp ult <8 x i64> %0, %1 19175 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19176 %4 = bitcast <32 x i1> %3 to i32 19177 ret i32 %4 19178} 19179 19180define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 19181; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: 19182; VLX: # %bb.0: # %entry 19183; VLX-NEXT: kmovd %edi, %k1 19184; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19185; VLX-NEXT: kmovd %k0, %eax 19186; VLX-NEXT: vzeroupper 19187; VLX-NEXT: retq 19188; 19189; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: 19190; NoVLX: # %bb.0: # %entry 19191; NoVLX-NEXT: kmovw %edi, %k1 19192; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19193; NoVLX-NEXT: kmovw %k0, %eax 19194; NoVLX-NEXT: vzeroupper 19195; NoVLX-NEXT: retq 19196entry: 19197 %0 = bitcast <8 x i64> %__a to <8 x i64> 19198 %load = load i64, ptr %__b 19199 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19200 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19201 %2 = icmp ult <8 x i64> %0, %1 19202 %3 = bitcast i8 %__u to <8 x i1> 19203 %4 = and <8 x i1> %3, %2 19204 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19205 %6 = bitcast <32 x i1> %5 to i32 19206 ret i32 %6 19207} 19208 19209 19210define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19211; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: 19212; VLX: # %bb.0: # %entry 19213; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19214; VLX-NEXT: kmovq %k0, %rax 19215; VLX-NEXT: vzeroupper 19216; VLX-NEXT: retq 19217; 19218; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: 19219; NoVLX: # %bb.0: # %entry 19220; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 19221; NoVLX-NEXT: kmovw %k0, %eax 19222; NoVLX-NEXT: vzeroupper 19223; NoVLX-NEXT: retq 19224entry: 19225 %0 = bitcast <8 x i64> %__a to <8 x i64> 19226 %1 = bitcast <8 x i64> %__b to <8 x i64> 19227 %2 = icmp ult <8 x i64> %0, %1 19228 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19229 %4 = bitcast <64 x i1> %3 to i64 19230 ret i64 %4 19231} 19232 19233define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 19234; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: 19235; VLX: # %bb.0: # %entry 19236; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19237; VLX-NEXT: kmovq %k0, %rax 19238; VLX-NEXT: vzeroupper 19239; VLX-NEXT: retq 19240; 19241; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: 19242; NoVLX: # %bb.0: # %entry 19243; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 19244; NoVLX-NEXT: kmovw %k0, %eax 19245; NoVLX-NEXT: vzeroupper 19246; NoVLX-NEXT: retq 19247entry: 19248 %0 = bitcast <8 x i64> %__a to <8 x i64> 19249 %load = load <8 x i64>, ptr %__b 19250 %1 = bitcast <8 x i64> %load to <8 x i64> 19251 %2 = icmp ult <8 x i64> %0, %1 19252 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19253 %4 = bitcast <64 x i1> %3 to i64 19254 ret i64 %4 19255} 19256 19257define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 19258; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: 19259; VLX: # %bb.0: # %entry 19260; VLX-NEXT: kmovd %edi, %k1 19261; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19262; VLX-NEXT: kmovq %k0, %rax 19263; VLX-NEXT: vzeroupper 19264; VLX-NEXT: retq 19265; 19266; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: 19267; NoVLX: # %bb.0: # %entry 19268; NoVLX-NEXT: kmovw %edi, %k1 19269; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} 19270; NoVLX-NEXT: kmovw %k0, %eax 19271; NoVLX-NEXT: vzeroupper 19272; NoVLX-NEXT: retq 19273entry: 19274 %0 = bitcast <8 x i64> %__a to <8 x i64> 19275 %1 = bitcast <8 x i64> %__b to <8 x i64> 19276 %2 = icmp ult <8 x i64> %0, %1 19277 %3 = bitcast i8 %__u to <8 x i1> 19278 %4 = and <8 x i1> %2, %3 19279 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19280 %6 = bitcast <64 x i1> %5 to i64 19281 ret i64 %6 19282} 19283 19284define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 19285; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: 19286; VLX: # %bb.0: # %entry 19287; VLX-NEXT: kmovd %edi, %k1 19288; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19289; VLX-NEXT: kmovq %k0, %rax 19290; VLX-NEXT: vzeroupper 19291; VLX-NEXT: retq 19292; 19293; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: 19294; NoVLX: # %bb.0: # %entry 19295; NoVLX-NEXT: kmovw %edi, %k1 19296; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} 19297; NoVLX-NEXT: kmovw %k0, %eax 19298; NoVLX-NEXT: vzeroupper 19299; NoVLX-NEXT: retq 19300entry: 19301 %0 = bitcast <8 x i64> %__a to <8 x i64> 19302 %load = load <8 x i64>, ptr %__b 19303 %1 = bitcast <8 x i64> %load to <8 x i64> 19304 %2 = icmp ult <8 x i64> %0, %1 19305 %3 = bitcast i8 %__u to <8 x i1> 19306 %4 = and <8 x i1> %2, %3 19307 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19308 %6 = bitcast <64 x i1> %5 to i64 19309 ret i64 %6 19310} 19311 19312 19313define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 19314; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: 19315; VLX: # %bb.0: # %entry 19316; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19317; VLX-NEXT: kmovq %k0, %rax 19318; VLX-NEXT: vzeroupper 19319; VLX-NEXT: retq 19320; 19321; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: 19322; NoVLX: # %bb.0: # %entry 19323; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 19324; NoVLX-NEXT: kmovw %k0, %eax 19325; NoVLX-NEXT: vzeroupper 19326; NoVLX-NEXT: retq 19327entry: 19328 %0 = bitcast <8 x i64> %__a to <8 x i64> 19329 %load = load i64, ptr %__b 19330 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19331 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19332 %2 = icmp ult <8 x i64> %0, %1 19333 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19334 %4 = bitcast <64 x i1> %3 to i64 19335 ret i64 %4 19336} 19337 19338define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 19339; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: 19340; VLX: # %bb.0: # %entry 19341; VLX-NEXT: kmovd %edi, %k1 19342; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19343; VLX-NEXT: kmovq %k0, %rax 19344; VLX-NEXT: vzeroupper 19345; VLX-NEXT: retq 19346; 19347; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: 19348; NoVLX: # %bb.0: # %entry 19349; NoVLX-NEXT: kmovw %edi, %k1 19350; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} 19351; NoVLX-NEXT: kmovw %k0, %eax 19352; NoVLX-NEXT: vzeroupper 19353; NoVLX-NEXT: retq 19354entry: 19355 %0 = bitcast <8 x i64> %__a to <8 x i64> 19356 %load = load i64, ptr %__b 19357 %vec = insertelement <8 x i64> undef, i64 %load, i32 0 19358 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 19359 %2 = icmp ult <8 x i64> %0, %1 19360 %3 = bitcast i8 %__u to <8 x i1> 19361 %4 = and <8 x i1> %3, %2 19362 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 19363 %6 = bitcast <64 x i1> %5 to i64 19364 ret i64 %6 19365} 19366 19367 19368declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) 19369define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19370; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: 19371; VLX: # %bb.0: # %entry 19372; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19373; VLX-NEXT: kmovd %k0, %eax 19374; VLX-NEXT: # kill: def $al killed $al killed $eax 19375; VLX-NEXT: retq 19376; 19377; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: 19378; NoVLX: # %bb.0: # %entry 19379; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19380; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19381; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19382; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19383; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19384; NoVLX-NEXT: kmovw %k0, %eax 19385; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19386; NoVLX-NEXT: vzeroupper 19387; NoVLX-NEXT: retq 19388entry: 19389 %0 = bitcast <2 x i64> %__a to <4 x float> 19390 %1 = bitcast <2 x i64> %__b to <4 x float> 19391 %2 = fcmp oeq <4 x float> %0, %1 19392 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19393 %4 = bitcast <8 x i1> %3 to i8 19394 ret i8 %4 19395} 19396 19397define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19398; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: 19399; VLX: # %bb.0: # %entry 19400; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19401; VLX-NEXT: kmovd %k0, %eax 19402; VLX-NEXT: # kill: def $al killed $al killed $eax 19403; VLX-NEXT: retq 19404; 19405; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: 19406; NoVLX: # %bb.0: # %entry 19407; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19408; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19409; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19410; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19411; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19412; NoVLX-NEXT: kmovw %k0, %eax 19413; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19414; NoVLX-NEXT: vzeroupper 19415; NoVLX-NEXT: retq 19416entry: 19417 %0 = bitcast <2 x i64> %__a to <4 x float> 19418 %load = load <2 x i64>, ptr %__b 19419 %1 = bitcast <2 x i64> %load to <4 x float> 19420 %2 = fcmp oeq <4 x float> %0, %1 19421 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19422 %4 = bitcast <8 x i1> %3 to i8 19423 ret i8 %4 19424} 19425 19426define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19427; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19428; VLX: # %bb.0: # %entry 19429; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 19430; VLX-NEXT: kmovd %k0, %eax 19431; VLX-NEXT: # kill: def $al killed $al killed $eax 19432; VLX-NEXT: retq 19433; 19434; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19435; NoVLX: # %bb.0: # %entry 19436; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19437; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 19438; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19439; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19440; NoVLX-NEXT: kmovw %k0, %eax 19441; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19442; NoVLX-NEXT: vzeroupper 19443; NoVLX-NEXT: retq 19444entry: 19445 %0 = bitcast <2 x i64> %__a to <4 x float> 19446 %load = load float, ptr %__b 19447 %vec = insertelement <4 x float> undef, float %load, i32 0 19448 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19449 %2 = fcmp oeq <4 x float> %0, %1 19450 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19451 %4 = bitcast <8 x i1> %3 to i8 19452 ret i8 %4 19453} 19454 19455define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19456; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: 19457; VLX: # %bb.0: # %entry 19458; VLX-NEXT: kmovd %edi, %k1 19459; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 19460; VLX-NEXT: kmovd %k0, %eax 19461; VLX-NEXT: # kill: def $al killed $al killed $eax 19462; VLX-NEXT: retq 19463; 19464; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask: 19465; NoVLX: # %bb.0: # %entry 19466; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19467; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19468; NoVLX-NEXT: kmovw %edi, %k1 19469; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19470; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19471; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19472; NoVLX-NEXT: kmovw %k0, %eax 19473; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19474; NoVLX-NEXT: vzeroupper 19475; NoVLX-NEXT: retq 19476entry: 19477 %0 = bitcast <2 x i64> %__a to <4 x float> 19478 %1 = bitcast <2 x i64> %__b to <4 x float> 19479 %2 = fcmp oeq <4 x float> %0, %1 19480 %3 = bitcast i4 %__u to <4 x i1> 19481 %4 = and <4 x i1> %2, %3 19482 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19483 %6 = bitcast <8 x i1> %5 to i8 19484 ret i8 %6 19485} 19486 19487define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19488; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: 19489; VLX: # %bb.0: # %entry 19490; VLX-NEXT: kmovd %edi, %k1 19491; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 19492; VLX-NEXT: kmovd %k0, %eax 19493; VLX-NEXT: # kill: def $al killed $al killed $eax 19494; VLX-NEXT: retq 19495; 19496; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem: 19497; NoVLX: # %bb.0: # %entry 19498; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19499; NoVLX-NEXT: kmovw %edi, %k1 19500; NoVLX-NEXT: vmovaps (%rsi), %xmm1 19501; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19502; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19503; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19504; NoVLX-NEXT: kmovw %k0, %eax 19505; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19506; NoVLX-NEXT: vzeroupper 19507; NoVLX-NEXT: retq 19508entry: 19509 %0 = bitcast <2 x i64> %__a to <4 x float> 19510 %load = load <2 x i64>, ptr %__b 19511 %1 = bitcast <2 x i64> %load to <4 x float> 19512 %2 = fcmp oeq <4 x float> %0, %1 19513 %3 = bitcast i4 %__u to <4 x i1> 19514 %4 = and <4 x i1> %2, %3 19515 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19516 %6 = bitcast <8 x i1> %5 to i8 19517 ret i8 %6 19518} 19519 19520define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19521; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19522; VLX: # %bb.0: # %entry 19523; VLX-NEXT: kmovd %edi, %k1 19524; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 19525; VLX-NEXT: kmovd %k0, %eax 19526; VLX-NEXT: # kill: def $al killed $al killed $eax 19527; VLX-NEXT: retq 19528; 19529; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b: 19530; NoVLX: # %bb.0: # %entry 19531; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19532; NoVLX-NEXT: kmovw %edi, %k1 19533; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 19534; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19535; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19536; NoVLX-NEXT: kmovw %k0, %eax 19537; NoVLX-NEXT: # kill: def $al killed $al killed $eax 19538; NoVLX-NEXT: vzeroupper 19539; NoVLX-NEXT: retq 19540entry: 19541 %0 = bitcast <2 x i64> %__a to <4 x float> 19542 %load = load float, ptr %__b 19543 %vec = insertelement <4 x float> undef, float %load, i32 0 19544 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19545 %2 = fcmp oeq <4 x float> %0, %1 19546 %3 = bitcast i4 %__u to <4 x i1> 19547 %4 = and <4 x i1> %2, %3 19548 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 19549 %6 = bitcast <8 x i1> %5 to i8 19550 ret i8 %6 19551} 19552 19553 19554 19555define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19556; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: 19557; VLX: # %bb.0: # %entry 19558; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19559; VLX-NEXT: kmovd %k0, %eax 19560; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19561; VLX-NEXT: retq 19562; 19563; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: 19564; NoVLX: # %bb.0: # %entry 19565; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19566; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19567; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19568; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19569; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19570; NoVLX-NEXT: kmovw %k0, %eax 19571; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19572; NoVLX-NEXT: vzeroupper 19573; NoVLX-NEXT: retq 19574entry: 19575 %0 = bitcast <2 x i64> %__a to <4 x float> 19576 %1 = bitcast <2 x i64> %__b to <4 x float> 19577 %2 = fcmp oeq <4 x float> %0, %1 19578 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19579 %4 = bitcast <16 x i1> %3 to i16 19580 ret i16 %4 19581} 19582 19583define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19584; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: 19585; VLX: # %bb.0: # %entry 19586; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19587; VLX-NEXT: kmovd %k0, %eax 19588; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19589; VLX-NEXT: retq 19590; 19591; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: 19592; NoVLX: # %bb.0: # %entry 19593; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19594; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19595; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19596; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19597; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19598; NoVLX-NEXT: kmovw %k0, %eax 19599; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19600; NoVLX-NEXT: vzeroupper 19601; NoVLX-NEXT: retq 19602entry: 19603 %0 = bitcast <2 x i64> %__a to <4 x float> 19604 %load = load <2 x i64>, ptr %__b 19605 %1 = bitcast <2 x i64> %load to <4 x float> 19606 %2 = fcmp oeq <4 x float> %0, %1 19607 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19608 %4 = bitcast <16 x i1> %3 to i16 19609 ret i16 %4 19610} 19611 19612define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19613; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: 19614; VLX: # %bb.0: # %entry 19615; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 19616; VLX-NEXT: kmovd %k0, %eax 19617; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19618; VLX-NEXT: retq 19619; 19620; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: 19621; NoVLX: # %bb.0: # %entry 19622; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19623; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 19624; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19625; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19626; NoVLX-NEXT: kmovw %k0, %eax 19627; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19628; NoVLX-NEXT: vzeroupper 19629; NoVLX-NEXT: retq 19630entry: 19631 %0 = bitcast <2 x i64> %__a to <4 x float> 19632 %load = load float, ptr %__b 19633 %vec = insertelement <4 x float> undef, float %load, i32 0 19634 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19635 %2 = fcmp oeq <4 x float> %0, %1 19636 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19637 %4 = bitcast <16 x i1> %3 to i16 19638 ret i16 %4 19639} 19640 19641define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19642; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: 19643; VLX: # %bb.0: # %entry 19644; VLX-NEXT: kmovd %edi, %k1 19645; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 19646; VLX-NEXT: kmovd %k0, %eax 19647; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19648; VLX-NEXT: retq 19649; 19650; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask: 19651; NoVLX: # %bb.0: # %entry 19652; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19653; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19654; NoVLX-NEXT: kmovw %edi, %k1 19655; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19656; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19657; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19658; NoVLX-NEXT: kmovw %k0, %eax 19659; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19660; NoVLX-NEXT: vzeroupper 19661; NoVLX-NEXT: retq 19662entry: 19663 %0 = bitcast <2 x i64> %__a to <4 x float> 19664 %1 = bitcast <2 x i64> %__b to <4 x float> 19665 %2 = fcmp oeq <4 x float> %0, %1 19666 %3 = bitcast i4 %__u to <4 x i1> 19667 %4 = and <4 x i1> %2, %3 19668 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19669 %6 = bitcast <16 x i1> %5 to i16 19670 ret i16 %6 19671} 19672 19673define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19674; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: 19675; VLX: # %bb.0: # %entry 19676; VLX-NEXT: kmovd %edi, %k1 19677; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 19678; VLX-NEXT: kmovd %k0, %eax 19679; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19680; VLX-NEXT: retq 19681; 19682; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem: 19683; NoVLX: # %bb.0: # %entry 19684; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19685; NoVLX-NEXT: kmovw %edi, %k1 19686; NoVLX-NEXT: vmovaps (%rsi), %xmm1 19687; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19688; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19689; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19690; NoVLX-NEXT: kmovw %k0, %eax 19691; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19692; NoVLX-NEXT: vzeroupper 19693; NoVLX-NEXT: retq 19694entry: 19695 %0 = bitcast <2 x i64> %__a to <4 x float> 19696 %load = load <2 x i64>, ptr %__b 19697 %1 = bitcast <2 x i64> %load to <4 x float> 19698 %2 = fcmp oeq <4 x float> %0, %1 19699 %3 = bitcast i4 %__u to <4 x i1> 19700 %4 = and <4 x i1> %2, %3 19701 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19702 %6 = bitcast <16 x i1> %5 to i16 19703 ret i16 %6 19704} 19705 19706define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19707; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: 19708; VLX: # %bb.0: # %entry 19709; VLX-NEXT: kmovd %edi, %k1 19710; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 19711; VLX-NEXT: kmovd %k0, %eax 19712; VLX-NEXT: # kill: def $ax killed $ax killed $eax 19713; VLX-NEXT: retq 19714; 19715; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b: 19716; NoVLX: # %bb.0: # %entry 19717; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19718; NoVLX-NEXT: kmovw %edi, %k1 19719; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 19720; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19721; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19722; NoVLX-NEXT: kmovw %k0, %eax 19723; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 19724; NoVLX-NEXT: vzeroupper 19725; NoVLX-NEXT: retq 19726entry: 19727 %0 = bitcast <2 x i64> %__a to <4 x float> 19728 %load = load float, ptr %__b 19729 %vec = insertelement <4 x float> undef, float %load, i32 0 19730 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19731 %2 = fcmp oeq <4 x float> %0, %1 19732 %3 = bitcast i4 %__u to <4 x i1> 19733 %4 = and <4 x i1> %2, %3 19734 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19735 %6 = bitcast <16 x i1> %5 to i16 19736 ret i16 %6 19737} 19738 19739 19740 19741define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19742; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: 19743; VLX: # %bb.0: # %entry 19744; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19745; VLX-NEXT: kmovd %k0, %eax 19746; VLX-NEXT: retq 19747; 19748; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: 19749; NoVLX: # %bb.0: # %entry 19750; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19751; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19752; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19753; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19754; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19755; NoVLX-NEXT: kmovw %k0, %eax 19756; NoVLX-NEXT: vzeroupper 19757; NoVLX-NEXT: retq 19758entry: 19759 %0 = bitcast <2 x i64> %__a to <4 x float> 19760 %1 = bitcast <2 x i64> %__b to <4 x float> 19761 %2 = fcmp oeq <4 x float> %0, %1 19762 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19763 %4 = bitcast <32 x i1> %3 to i32 19764 ret i32 %4 19765} 19766 19767define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19768; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: 19769; VLX: # %bb.0: # %entry 19770; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19771; VLX-NEXT: kmovd %k0, %eax 19772; VLX-NEXT: retq 19773; 19774; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: 19775; NoVLX: # %bb.0: # %entry 19776; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19777; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19778; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19779; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19780; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19781; NoVLX-NEXT: kmovw %k0, %eax 19782; NoVLX-NEXT: vzeroupper 19783; NoVLX-NEXT: retq 19784entry: 19785 %0 = bitcast <2 x i64> %__a to <4 x float> 19786 %load = load <2 x i64>, ptr %__b 19787 %1 = bitcast <2 x i64> %load to <4 x float> 19788 %2 = fcmp oeq <4 x float> %0, %1 19789 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19790 %4 = bitcast <32 x i1> %3 to i32 19791 ret i32 %4 19792} 19793 19794define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19795; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: 19796; VLX: # %bb.0: # %entry 19797; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 19798; VLX-NEXT: kmovd %k0, %eax 19799; VLX-NEXT: retq 19800; 19801; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: 19802; NoVLX: # %bb.0: # %entry 19803; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19804; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 19805; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19806; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19807; NoVLX-NEXT: kmovw %k0, %eax 19808; NoVLX-NEXT: vzeroupper 19809; NoVLX-NEXT: retq 19810entry: 19811 %0 = bitcast <2 x i64> %__a to <4 x float> 19812 %load = load float, ptr %__b 19813 %vec = insertelement <4 x float> undef, float %load, i32 0 19814 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19815 %2 = fcmp oeq <4 x float> %0, %1 19816 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19817 %4 = bitcast <32 x i1> %3 to i32 19818 ret i32 %4 19819} 19820 19821define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19822; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: 19823; VLX: # %bb.0: # %entry 19824; VLX-NEXT: kmovd %edi, %k1 19825; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 19826; VLX-NEXT: kmovd %k0, %eax 19827; VLX-NEXT: retq 19828; 19829; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask: 19830; NoVLX: # %bb.0: # %entry 19831; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19832; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19833; NoVLX-NEXT: kmovw %edi, %k1 19834; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19835; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19836; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19837; NoVLX-NEXT: kmovw %k0, %eax 19838; NoVLX-NEXT: vzeroupper 19839; NoVLX-NEXT: retq 19840entry: 19841 %0 = bitcast <2 x i64> %__a to <4 x float> 19842 %1 = bitcast <2 x i64> %__b to <4 x float> 19843 %2 = fcmp oeq <4 x float> %0, %1 19844 %3 = bitcast i4 %__u to <4 x i1> 19845 %4 = and <4 x i1> %2, %3 19846 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19847 %6 = bitcast <32 x i1> %5 to i32 19848 ret i32 %6 19849} 19850 19851define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19852; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: 19853; VLX: # %bb.0: # %entry 19854; VLX-NEXT: kmovd %edi, %k1 19855; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 19856; VLX-NEXT: kmovd %k0, %eax 19857; VLX-NEXT: retq 19858; 19859; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem: 19860; NoVLX: # %bb.0: # %entry 19861; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19862; NoVLX-NEXT: kmovw %edi, %k1 19863; NoVLX-NEXT: vmovaps (%rsi), %xmm1 19864; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 19865; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19866; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19867; NoVLX-NEXT: kmovw %k0, %eax 19868; NoVLX-NEXT: vzeroupper 19869; NoVLX-NEXT: retq 19870entry: 19871 %0 = bitcast <2 x i64> %__a to <4 x float> 19872 %load = load <2 x i64>, ptr %__b 19873 %1 = bitcast <2 x i64> %load to <4 x float> 19874 %2 = fcmp oeq <4 x float> %0, %1 19875 %3 = bitcast i4 %__u to <4 x i1> 19876 %4 = and <4 x i1> %2, %3 19877 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19878 %6 = bitcast <32 x i1> %5 to i32 19879 ret i32 %6 19880} 19881 19882define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 19883; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: 19884; VLX: # %bb.0: # %entry 19885; VLX-NEXT: kmovd %edi, %k1 19886; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 19887; VLX-NEXT: kmovd %k0, %eax 19888; VLX-NEXT: retq 19889; 19890; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b: 19891; NoVLX: # %bb.0: # %entry 19892; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19893; NoVLX-NEXT: kmovw %edi, %k1 19894; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 19895; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19896; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19897; NoVLX-NEXT: kmovw %k0, %eax 19898; NoVLX-NEXT: vzeroupper 19899; NoVLX-NEXT: retq 19900entry: 19901 %0 = bitcast <2 x i64> %__a to <4 x float> 19902 %load = load float, ptr %__b 19903 %vec = insertelement <4 x float> undef, float %load, i32 0 19904 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19905 %2 = fcmp oeq <4 x float> %0, %1 19906 %3 = bitcast i4 %__u to <4 x i1> 19907 %4 = and <4 x i1> %2, %3 19908 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19909 %6 = bitcast <32 x i1> %5 to i32 19910 ret i32 %6 19911} 19912 19913 19914 19915define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19916; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: 19917; VLX: # %bb.0: # %entry 19918; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 19919; VLX-NEXT: kmovq %k0, %rax 19920; VLX-NEXT: retq 19921; 19922; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: 19923; NoVLX: # %bb.0: # %entry 19924; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 19925; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19926; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19927; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19928; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19929; NoVLX-NEXT: kmovw %k0, %eax 19930; NoVLX-NEXT: vzeroupper 19931; NoVLX-NEXT: retq 19932entry: 19933 %0 = bitcast <2 x i64> %__a to <4 x float> 19934 %1 = bitcast <2 x i64> %__b to <4 x float> 19935 %2 = fcmp oeq <4 x float> %0, %1 19936 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19937 %4 = bitcast <64 x i1> %3 to i64 19938 ret i64 %4 19939} 19940 19941define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19942; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: 19943; VLX: # %bb.0: # %entry 19944; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 19945; VLX-NEXT: kmovq %k0, %rax 19946; VLX-NEXT: retq 19947; 19948; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: 19949; NoVLX: # %bb.0: # %entry 19950; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19951; NoVLX-NEXT: vmovaps (%rdi), %xmm1 19952; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 19953; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19954; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19955; NoVLX-NEXT: kmovw %k0, %eax 19956; NoVLX-NEXT: vzeroupper 19957; NoVLX-NEXT: retq 19958entry: 19959 %0 = bitcast <2 x i64> %__a to <4 x float> 19960 %load = load <2 x i64>, ptr %__b 19961 %1 = bitcast <2 x i64> %load to <4 x float> 19962 %2 = fcmp oeq <4 x float> %0, %1 19963 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19964 %4 = bitcast <64 x i1> %3 to i64 19965 ret i64 %4 19966} 19967 19968define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 19969; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: 19970; VLX: # %bb.0: # %entry 19971; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 19972; VLX-NEXT: kmovq %k0, %rax 19973; VLX-NEXT: retq 19974; 19975; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: 19976; NoVLX: # %bb.0: # %entry 19977; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 19978; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 19979; NoVLX-NEXT: kshiftlw $12, %k0, %k0 19980; NoVLX-NEXT: kshiftrw $12, %k0, %k0 19981; NoVLX-NEXT: kmovw %k0, %eax 19982; NoVLX-NEXT: vzeroupper 19983; NoVLX-NEXT: retq 19984entry: 19985 %0 = bitcast <2 x i64> %__a to <4 x float> 19986 %load = load float, ptr %__b 19987 %vec = insertelement <4 x float> undef, float %load, i32 0 19988 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 19989 %2 = fcmp oeq <4 x float> %0, %1 19990 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 19991 %4 = bitcast <64 x i1> %3 to i64 19992 ret i64 %4 19993} 19994 19995define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 19996; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: 19997; VLX: # %bb.0: # %entry 19998; VLX-NEXT: kmovd %edi, %k1 19999; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1} 20000; VLX-NEXT: kmovq %k0, %rax 20001; VLX-NEXT: retq 20002; 20003; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask: 20004; NoVLX: # %bb.0: # %entry 20005; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 20006; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20007; NoVLX-NEXT: kmovw %edi, %k1 20008; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20009; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20010; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20011; NoVLX-NEXT: kmovw %k0, %eax 20012; NoVLX-NEXT: vzeroupper 20013; NoVLX-NEXT: retq 20014entry: 20015 %0 = bitcast <2 x i64> %__a to <4 x float> 20016 %1 = bitcast <2 x i64> %__b to <4 x float> 20017 %2 = fcmp oeq <4 x float> %0, %1 20018 %3 = bitcast i4 %__u to <4 x i1> 20019 %4 = and <4 x i1> %2, %3 20020 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20021 %6 = bitcast <64 x i1> %5 to i64 20022 ret i64 %6 20023} 20024 20025define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 20026; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: 20027; VLX: # %bb.0: # %entry 20028; VLX-NEXT: kmovd %edi, %k1 20029; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1} 20030; VLX-NEXT: kmovq %k0, %rax 20031; VLX-NEXT: retq 20032; 20033; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem: 20034; NoVLX: # %bb.0: # %entry 20035; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20036; NoVLX-NEXT: kmovw %edi, %k1 20037; NoVLX-NEXT: vmovaps (%rsi), %xmm1 20038; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20039; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20040; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20041; NoVLX-NEXT: kmovw %k0, %eax 20042; NoVLX-NEXT: vzeroupper 20043; NoVLX-NEXT: retq 20044entry: 20045 %0 = bitcast <2 x i64> %__a to <4 x float> 20046 %load = load <2 x i64>, ptr %__b 20047 %1 = bitcast <2 x i64> %load to <4 x float> 20048 %2 = fcmp oeq <4 x float> %0, %1 20049 %3 = bitcast i4 %__u to <4 x i1> 20050 %4 = and <4 x i1> %2, %3 20051 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20052 %6 = bitcast <64 x i1> %5 to i64 20053 ret i64 %6 20054} 20055 20056define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 20057; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20058; VLX: # %bb.0: # %entry 20059; VLX-NEXT: kmovd %edi, %k1 20060; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1} 20061; VLX-NEXT: kmovq %k0, %rax 20062; VLX-NEXT: retq 20063; 20064; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b: 20065; NoVLX: # %bb.0: # %entry 20066; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 20067; NoVLX-NEXT: kmovw %edi, %k1 20068; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20069; NoVLX-NEXT: kshiftlw $12, %k0, %k0 20070; NoVLX-NEXT: kshiftrw $12, %k0, %k0 20071; NoVLX-NEXT: kmovw %k0, %eax 20072; NoVLX-NEXT: vzeroupper 20073; NoVLX-NEXT: retq 20074entry: 20075 %0 = bitcast <2 x i64> %__a to <4 x float> 20076 %load = load float, ptr %__b 20077 %vec = insertelement <4 x float> undef, float %load, i32 0 20078 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 20079 %2 = fcmp oeq <4 x float> %0, %1 20080 %3 = bitcast i4 %__u to <4 x i1> 20081 %4 = and <4 x i1> %2, %3 20082 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 20083 %6 = bitcast <64 x i1> %5 to i64 20084 ret i64 %6 20085} 20086 20087 20088 20089define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20090; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: 20091; VLX: # %bb.0: # %entry 20092; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20093; VLX-NEXT: kmovd %k0, %eax 20094; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20095; VLX-NEXT: vzeroupper 20096; VLX-NEXT: retq 20097; 20098; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: 20099; NoVLX: # %bb.0: # %entry 20100; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20101; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20102; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20103; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20104; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20105; NoVLX-NEXT: kmovw %k0, %eax 20106; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20107; NoVLX-NEXT: vzeroupper 20108; NoVLX-NEXT: retq 20109entry: 20110 %0 = bitcast <4 x i64> %__a to <8 x float> 20111 %1 = bitcast <4 x i64> %__b to <8 x float> 20112 %2 = fcmp oeq <8 x float> %0, %1 20113 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20114 %4 = bitcast <16 x i1> %3 to i16 20115 ret i16 %4 20116} 20117 20118define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20119; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: 20120; VLX: # %bb.0: # %entry 20121; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20122; VLX-NEXT: kmovd %k0, %eax 20123; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20124; VLX-NEXT: vzeroupper 20125; VLX-NEXT: retq 20126; 20127; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: 20128; NoVLX: # %bb.0: # %entry 20129; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20130; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20131; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20132; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20133; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20134; NoVLX-NEXT: kmovw %k0, %eax 20135; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20136; NoVLX-NEXT: vzeroupper 20137; NoVLX-NEXT: retq 20138entry: 20139 %0 = bitcast <4 x i64> %__a to <8 x float> 20140 %load = load <4 x i64>, ptr %__b 20141 %1 = bitcast <4 x i64> %load to <8 x float> 20142 %2 = fcmp oeq <8 x float> %0, %1 20143 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20144 %4 = bitcast <16 x i1> %3 to i16 20145 ret i16 %4 20146} 20147 20148define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20149; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20150; VLX: # %bb.0: # %entry 20151; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20152; VLX-NEXT: kmovd %k0, %eax 20153; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20154; VLX-NEXT: vzeroupper 20155; VLX-NEXT: retq 20156; 20157; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20158; NoVLX: # %bb.0: # %entry 20159; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20160; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20161; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20162; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20163; NoVLX-NEXT: kmovw %k0, %eax 20164; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20165; NoVLX-NEXT: vzeroupper 20166; NoVLX-NEXT: retq 20167entry: 20168 %0 = bitcast <4 x i64> %__a to <8 x float> 20169 %load = load float, ptr %__b 20170 %vec = insertelement <8 x float> undef, float %load, i32 0 20171 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20172 %2 = fcmp oeq <8 x float> %0, %1 20173 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20174 %4 = bitcast <16 x i1> %3 to i16 20175 ret i16 %4 20176} 20177 20178define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20179; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: 20180; VLX: # %bb.0: # %entry 20181; VLX-NEXT: kmovd %edi, %k1 20182; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20183; VLX-NEXT: kmovd %k0, %eax 20184; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20185; VLX-NEXT: vzeroupper 20186; VLX-NEXT: retq 20187; 20188; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask: 20189; NoVLX: # %bb.0: # %entry 20190; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20191; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20192; NoVLX-NEXT: kmovw %edi, %k1 20193; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20194; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20195; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20196; NoVLX-NEXT: kmovw %k0, %eax 20197; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20198; NoVLX-NEXT: vzeroupper 20199; NoVLX-NEXT: retq 20200entry: 20201 %0 = bitcast <4 x i64> %__a to <8 x float> 20202 %1 = bitcast <4 x i64> %__b to <8 x float> 20203 %2 = fcmp oeq <8 x float> %0, %1 20204 %3 = bitcast i8 %__u to <8 x i1> 20205 %4 = and <8 x i1> %2, %3 20206 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20207 %6 = bitcast <16 x i1> %5 to i16 20208 ret i16 %6 20209} 20210 20211define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20212; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: 20213; VLX: # %bb.0: # %entry 20214; VLX-NEXT: kmovd %edi, %k1 20215; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 20216; VLX-NEXT: kmovd %k0, %eax 20217; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20218; VLX-NEXT: vzeroupper 20219; VLX-NEXT: retq 20220; 20221; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem: 20222; NoVLX: # %bb.0: # %entry 20223; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20224; NoVLX-NEXT: vmovaps (%rsi), %ymm1 20225; NoVLX-NEXT: kmovw %edi, %k1 20226; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20227; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20228; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20229; NoVLX-NEXT: kmovw %k0, %eax 20230; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20231; NoVLX-NEXT: vzeroupper 20232; NoVLX-NEXT: retq 20233entry: 20234 %0 = bitcast <4 x i64> %__a to <8 x float> 20235 %load = load <4 x i64>, ptr %__b 20236 %1 = bitcast <4 x i64> %load to <8 x float> 20237 %2 = fcmp oeq <8 x float> %0, %1 20238 %3 = bitcast i8 %__u to <8 x i1> 20239 %4 = and <8 x i1> %2, %3 20240 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20241 %6 = bitcast <16 x i1> %5 to i16 20242 ret i16 %6 20243} 20244 20245define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20246; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20247; VLX: # %bb.0: # %entry 20248; VLX-NEXT: kmovd %edi, %k1 20249; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 20250; VLX-NEXT: kmovd %k0, %eax 20251; VLX-NEXT: # kill: def $ax killed $ax killed $eax 20252; VLX-NEXT: vzeroupper 20253; VLX-NEXT: retq 20254; 20255; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: 20256; NoVLX: # %bb.0: # %entry 20257; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20258; NoVLX-NEXT: kmovw %edi, %k1 20259; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20260; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20261; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20262; NoVLX-NEXT: kmovw %k0, %eax 20263; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 20264; NoVLX-NEXT: vzeroupper 20265; NoVLX-NEXT: retq 20266entry: 20267 %0 = bitcast <4 x i64> %__a to <8 x float> 20268 %load = load float, ptr %__b 20269 %vec = insertelement <8 x float> undef, float %load, i32 0 20270 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20271 %2 = fcmp oeq <8 x float> %0, %1 20272 %3 = bitcast i8 %__u to <8 x i1> 20273 %4 = and <8 x i1> %2, %3 20274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20275 %6 = bitcast <16 x i1> %5 to i16 20276 ret i16 %6 20277} 20278 20279 20280 20281define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20282; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: 20283; VLX: # %bb.0: # %entry 20284; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20285; VLX-NEXT: kmovd %k0, %eax 20286; VLX-NEXT: vzeroupper 20287; VLX-NEXT: retq 20288; 20289; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: 20290; NoVLX: # %bb.0: # %entry 20291; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20292; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20293; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20294; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20295; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20296; NoVLX-NEXT: kmovw %k0, %eax 20297; NoVLX-NEXT: vzeroupper 20298; NoVLX-NEXT: retq 20299entry: 20300 %0 = bitcast <4 x i64> %__a to <8 x float> 20301 %1 = bitcast <4 x i64> %__b to <8 x float> 20302 %2 = fcmp oeq <8 x float> %0, %1 20303 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20304 %4 = bitcast <32 x i1> %3 to i32 20305 ret i32 %4 20306} 20307 20308define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20309; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: 20310; VLX: # %bb.0: # %entry 20311; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20312; VLX-NEXT: kmovd %k0, %eax 20313; VLX-NEXT: vzeroupper 20314; VLX-NEXT: retq 20315; 20316; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: 20317; NoVLX: # %bb.0: # %entry 20318; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20319; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20320; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20321; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20322; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20323; NoVLX-NEXT: kmovw %k0, %eax 20324; NoVLX-NEXT: vzeroupper 20325; NoVLX-NEXT: retq 20326entry: 20327 %0 = bitcast <4 x i64> %__a to <8 x float> 20328 %load = load <4 x i64>, ptr %__b 20329 %1 = bitcast <4 x i64> %load to <8 x float> 20330 %2 = fcmp oeq <8 x float> %0, %1 20331 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20332 %4 = bitcast <32 x i1> %3 to i32 20333 ret i32 %4 20334} 20335 20336define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20337; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20338; VLX: # %bb.0: # %entry 20339; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20340; VLX-NEXT: kmovd %k0, %eax 20341; VLX-NEXT: vzeroupper 20342; VLX-NEXT: retq 20343; 20344; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20345; NoVLX: # %bb.0: # %entry 20346; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20347; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20348; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20349; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20350; NoVLX-NEXT: kmovw %k0, %eax 20351; NoVLX-NEXT: vzeroupper 20352; NoVLX-NEXT: retq 20353entry: 20354 %0 = bitcast <4 x i64> %__a to <8 x float> 20355 %load = load float, ptr %__b 20356 %vec = insertelement <8 x float> undef, float %load, i32 0 20357 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20358 %2 = fcmp oeq <8 x float> %0, %1 20359 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20360 %4 = bitcast <32 x i1> %3 to i32 20361 ret i32 %4 20362} 20363 20364define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20365; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: 20366; VLX: # %bb.0: # %entry 20367; VLX-NEXT: kmovd %edi, %k1 20368; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20369; VLX-NEXT: kmovd %k0, %eax 20370; VLX-NEXT: vzeroupper 20371; VLX-NEXT: retq 20372; 20373; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask: 20374; NoVLX: # %bb.0: # %entry 20375; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20376; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20377; NoVLX-NEXT: kmovw %edi, %k1 20378; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20379; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20380; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20381; NoVLX-NEXT: kmovw %k0, %eax 20382; NoVLX-NEXT: vzeroupper 20383; NoVLX-NEXT: retq 20384entry: 20385 %0 = bitcast <4 x i64> %__a to <8 x float> 20386 %1 = bitcast <4 x i64> %__b to <8 x float> 20387 %2 = fcmp oeq <8 x float> %0, %1 20388 %3 = bitcast i8 %__u to <8 x i1> 20389 %4 = and <8 x i1> %2, %3 20390 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20391 %6 = bitcast <32 x i1> %5 to i32 20392 ret i32 %6 20393} 20394 20395define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20396; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: 20397; VLX: # %bb.0: # %entry 20398; VLX-NEXT: kmovd %edi, %k1 20399; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 20400; VLX-NEXT: kmovd %k0, %eax 20401; VLX-NEXT: vzeroupper 20402; VLX-NEXT: retq 20403; 20404; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem: 20405; NoVLX: # %bb.0: # %entry 20406; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20407; NoVLX-NEXT: vmovaps (%rsi), %ymm1 20408; NoVLX-NEXT: kmovw %edi, %k1 20409; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20410; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20411; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20412; NoVLX-NEXT: kmovw %k0, %eax 20413; NoVLX-NEXT: vzeroupper 20414; NoVLX-NEXT: retq 20415entry: 20416 %0 = bitcast <4 x i64> %__a to <8 x float> 20417 %load = load <4 x i64>, ptr %__b 20418 %1 = bitcast <4 x i64> %load to <8 x float> 20419 %2 = fcmp oeq <8 x float> %0, %1 20420 %3 = bitcast i8 %__u to <8 x i1> 20421 %4 = and <8 x i1> %2, %3 20422 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20423 %6 = bitcast <32 x i1> %5 to i32 20424 ret i32 %6 20425} 20426 20427define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20428; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20429; VLX: # %bb.0: # %entry 20430; VLX-NEXT: kmovd %edi, %k1 20431; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 20432; VLX-NEXT: kmovd %k0, %eax 20433; VLX-NEXT: vzeroupper 20434; VLX-NEXT: retq 20435; 20436; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: 20437; NoVLX: # %bb.0: # %entry 20438; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20439; NoVLX-NEXT: kmovw %edi, %k1 20440; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20441; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20442; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20443; NoVLX-NEXT: kmovw %k0, %eax 20444; NoVLX-NEXT: vzeroupper 20445; NoVLX-NEXT: retq 20446entry: 20447 %0 = bitcast <4 x i64> %__a to <8 x float> 20448 %load = load float, ptr %__b 20449 %vec = insertelement <8 x float> undef, float %load, i32 0 20450 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20451 %2 = fcmp oeq <8 x float> %0, %1 20452 %3 = bitcast i8 %__u to <8 x i1> 20453 %4 = and <8 x i1> %2, %3 20454 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20455 %6 = bitcast <32 x i1> %5 to i32 20456 ret i32 %6 20457} 20458 20459 20460 20461define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20462; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: 20463; VLX: # %bb.0: # %entry 20464; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 20465; VLX-NEXT: kmovq %k0, %rax 20466; VLX-NEXT: vzeroupper 20467; VLX-NEXT: retq 20468; 20469; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: 20470; NoVLX: # %bb.0: # %entry 20471; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20472; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20473; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20474; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20475; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20476; NoVLX-NEXT: kmovw %k0, %eax 20477; NoVLX-NEXT: vzeroupper 20478; NoVLX-NEXT: retq 20479entry: 20480 %0 = bitcast <4 x i64> %__a to <8 x float> 20481 %1 = bitcast <4 x i64> %__b to <8 x float> 20482 %2 = fcmp oeq <8 x float> %0, %1 20483 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20484 %4 = bitcast <64 x i1> %3 to i64 20485 ret i64 %4 20486} 20487 20488define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20489; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: 20490; VLX: # %bb.0: # %entry 20491; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 20492; VLX-NEXT: kmovq %k0, %rax 20493; VLX-NEXT: vzeroupper 20494; VLX-NEXT: retq 20495; 20496; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: 20497; NoVLX: # %bb.0: # %entry 20498; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20499; NoVLX-NEXT: vmovaps (%rdi), %ymm1 20500; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20501; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20502; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20503; NoVLX-NEXT: kmovw %k0, %eax 20504; NoVLX-NEXT: vzeroupper 20505; NoVLX-NEXT: retq 20506entry: 20507 %0 = bitcast <4 x i64> %__a to <8 x float> 20508 %load = load <4 x i64>, ptr %__b 20509 %1 = bitcast <4 x i64> %load to <8 x float> 20510 %2 = fcmp oeq <8 x float> %0, %1 20511 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20512 %4 = bitcast <64 x i1> %3 to i64 20513 ret i64 %4 20514} 20515 20516define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 20517; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20518; VLX: # %bb.0: # %entry 20519; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 20520; VLX-NEXT: kmovq %k0, %rax 20521; VLX-NEXT: vzeroupper 20522; VLX-NEXT: retq 20523; 20524; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20525; NoVLX: # %bb.0: # %entry 20526; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20527; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20528; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20529; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20530; NoVLX-NEXT: kmovw %k0, %eax 20531; NoVLX-NEXT: vzeroupper 20532; NoVLX-NEXT: retq 20533entry: 20534 %0 = bitcast <4 x i64> %__a to <8 x float> 20535 %load = load float, ptr %__b 20536 %vec = insertelement <8 x float> undef, float %load, i32 0 20537 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20538 %2 = fcmp oeq <8 x float> %0, %1 20539 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20540 %4 = bitcast <64 x i1> %3 to i64 20541 ret i64 %4 20542} 20543 20544define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 20545; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: 20546; VLX: # %bb.0: # %entry 20547; VLX-NEXT: kmovd %edi, %k1 20548; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 {%k1} 20549; VLX-NEXT: kmovq %k0, %rax 20550; VLX-NEXT: vzeroupper 20551; VLX-NEXT: retq 20552; 20553; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask: 20554; NoVLX: # %bb.0: # %entry 20555; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 20556; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20557; NoVLX-NEXT: kmovw %edi, %k1 20558; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20559; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20560; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20561; NoVLX-NEXT: kmovw %k0, %eax 20562; NoVLX-NEXT: vzeroupper 20563; NoVLX-NEXT: retq 20564entry: 20565 %0 = bitcast <4 x i64> %__a to <8 x float> 20566 %1 = bitcast <4 x i64> %__b to <8 x float> 20567 %2 = fcmp oeq <8 x float> %0, %1 20568 %3 = bitcast i8 %__u to <8 x i1> 20569 %4 = and <8 x i1> %2, %3 20570 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20571 %6 = bitcast <64 x i1> %5 to i64 20572 ret i64 %6 20573} 20574 20575define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20576; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: 20577; VLX: # %bb.0: # %entry 20578; VLX-NEXT: kmovd %edi, %k1 20579; VLX-NEXT: vcmpeqps (%rsi), %ymm0, %k0 {%k1} 20580; VLX-NEXT: kmovq %k0, %rax 20581; VLX-NEXT: vzeroupper 20582; VLX-NEXT: retq 20583; 20584; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem: 20585; NoVLX: # %bb.0: # %entry 20586; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20587; NoVLX-NEXT: vmovaps (%rsi), %ymm1 20588; NoVLX-NEXT: kmovw %edi, %k1 20589; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20590; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20591; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20592; NoVLX-NEXT: kmovw %k0, %eax 20593; NoVLX-NEXT: vzeroupper 20594; NoVLX-NEXT: retq 20595entry: 20596 %0 = bitcast <4 x i64> %__a to <8 x float> 20597 %load = load <4 x i64>, ptr %__b 20598 %1 = bitcast <4 x i64> %load to <8 x float> 20599 %2 = fcmp oeq <8 x float> %0, %1 20600 %3 = bitcast i8 %__u to <8 x i1> 20601 %4 = and <8 x i1> %2, %3 20602 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20603 %6 = bitcast <64 x i1> %5 to i64 20604 ret i64 %6 20605} 20606 20607define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 20608; VLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20609; VLX: # %bb.0: # %entry 20610; VLX-NEXT: kmovd %edi, %k1 20611; VLX-NEXT: vcmpeqps (%rsi){1to8}, %ymm0, %k0 {%k1} 20612; VLX-NEXT: kmovq %k0, %rax 20613; VLX-NEXT: vzeroupper 20614; VLX-NEXT: retq 20615; 20616; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: 20617; NoVLX: # %bb.0: # %entry 20618; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 20619; NoVLX-NEXT: kmovw %edi, %k1 20620; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20621; NoVLX-NEXT: kshiftlw $8, %k0, %k0 20622; NoVLX-NEXT: kshiftrw $8, %k0, %k0 20623; NoVLX-NEXT: kmovw %k0, %eax 20624; NoVLX-NEXT: vzeroupper 20625; NoVLX-NEXT: retq 20626entry: 20627 %0 = bitcast <4 x i64> %__a to <8 x float> 20628 %load = load float, ptr %__b 20629 %vec = insertelement <8 x float> undef, float %load, i32 0 20630 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20631 %2 = fcmp oeq <8 x float> %0, %1 20632 %3 = bitcast i8 %__u to <8 x i1> 20633 %4 = and <8 x i1> %2, %3 20634 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 20635 %6 = bitcast <64 x i1> %5 to i64 20636 ret i64 %6 20637} 20638 20639 20640 20641define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20642; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: 20643; VLX: # %bb.0: # %entry 20644; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20645; VLX-NEXT: kmovd %k0, %eax 20646; VLX-NEXT: vzeroupper 20647; VLX-NEXT: retq 20648; 20649; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: 20650; NoVLX: # %bb.0: # %entry 20651; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20652; NoVLX-NEXT: kmovw %k0, %eax 20653; NoVLX-NEXT: vzeroupper 20654; NoVLX-NEXT: retq 20655entry: 20656 %0 = bitcast <8 x i64> %__a to <16 x float> 20657 %1 = bitcast <8 x i64> %__b to <16 x float> 20658 %2 = fcmp oeq <16 x float> %0, %1 20659 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20660 %4 = bitcast <32 x i1> %3 to i32 20661 ret i32 %4 20662} 20663 20664define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 20665; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: 20666; VLX: # %bb.0: # %entry 20667; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 20668; VLX-NEXT: kmovd %k0, %eax 20669; VLX-NEXT: vzeroupper 20670; VLX-NEXT: retq 20671; 20672; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: 20673; NoVLX: # %bb.0: # %entry 20674; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 20675; NoVLX-NEXT: kmovw %k0, %eax 20676; NoVLX-NEXT: vzeroupper 20677; NoVLX-NEXT: retq 20678entry: 20679 %0 = bitcast <8 x i64> %__a to <16 x float> 20680 %load = load <8 x i64>, ptr %__b 20681 %1 = bitcast <8 x i64> %load to <16 x float> 20682 %2 = fcmp oeq <16 x float> %0, %1 20683 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20684 %4 = bitcast <32 x i1> %3 to i32 20685 ret i32 %4 20686} 20687 20688define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 20689; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: 20690; VLX: # %bb.0: # %entry 20691; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20692; VLX-NEXT: kmovd %k0, %eax 20693; VLX-NEXT: vzeroupper 20694; VLX-NEXT: retq 20695; 20696; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: 20697; NoVLX: # %bb.0: # %entry 20698; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20699; NoVLX-NEXT: kmovw %k0, %eax 20700; NoVLX-NEXT: vzeroupper 20701; NoVLX-NEXT: retq 20702entry: 20703 %0 = bitcast <8 x i64> %__a to <16 x float> 20704 %load = load float, ptr %__b 20705 %vec = insertelement <16 x float> undef, float %load, i32 0 20706 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20707 %2 = fcmp oeq <16 x float> %0, %1 20708 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20709 %4 = bitcast <32 x i1> %3 to i32 20710 ret i32 %4 20711} 20712 20713define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20714; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: 20715; VLX: # %bb.0: # %entry 20716; VLX-NEXT: kmovd %edi, %k1 20717; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20718; VLX-NEXT: kmovd %k0, %eax 20719; VLX-NEXT: vzeroupper 20720; VLX-NEXT: retq 20721; 20722; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask: 20723; NoVLX: # %bb.0: # %entry 20724; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20725; NoVLX-NEXT: kmovw %k0, %eax 20726; NoVLX-NEXT: andl %edi, %eax 20727; NoVLX-NEXT: vzeroupper 20728; NoVLX-NEXT: retq 20729entry: 20730 %0 = bitcast <8 x i64> %__a to <16 x float> 20731 %1 = bitcast <8 x i64> %__b to <16 x float> 20732 %2 = fcmp oeq <16 x float> %0, %1 20733 %3 = bitcast i16 %__u to <16 x i1> 20734 %4 = and <16 x i1> %2, %3 20735 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20736 %6 = bitcast <32 x i1> %5 to i32 20737 ret i32 %6 20738} 20739 20740define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 20741; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: 20742; VLX: # %bb.0: # %entry 20743; VLX-NEXT: kmovd %edi, %k1 20744; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} 20745; VLX-NEXT: kmovd %k0, %eax 20746; VLX-NEXT: vzeroupper 20747; VLX-NEXT: retq 20748; 20749; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem: 20750; NoVLX: # %bb.0: # %entry 20751; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 20752; NoVLX-NEXT: kmovw %k0, %eax 20753; NoVLX-NEXT: andl %edi, %eax 20754; NoVLX-NEXT: vzeroupper 20755; NoVLX-NEXT: retq 20756entry: 20757 %0 = bitcast <8 x i64> %__a to <16 x float> 20758 %load = load <8 x i64>, ptr %__b 20759 %1 = bitcast <8 x i64> %load to <16 x float> 20760 %2 = fcmp oeq <16 x float> %0, %1 20761 %3 = bitcast i16 %__u to <16 x i1> 20762 %4 = and <16 x i1> %2, %3 20763 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20764 %6 = bitcast <32 x i1> %5 to i32 20765 ret i32 %6 20766} 20767 20768define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 20769; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: 20770; VLX: # %bb.0: # %entry 20771; VLX-NEXT: kmovd %edi, %k1 20772; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20773; VLX-NEXT: kmovd %k0, %eax 20774; VLX-NEXT: vzeroupper 20775; VLX-NEXT: retq 20776; 20777; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b: 20778; NoVLX: # %bb.0: # %entry 20779; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 20780; NoVLX-NEXT: kmovw %k0, %eax 20781; NoVLX-NEXT: andl %edi, %eax 20782; NoVLX-NEXT: vzeroupper 20783; NoVLX-NEXT: retq 20784entry: 20785 %0 = bitcast <8 x i64> %__a to <16 x float> 20786 %load = load float, ptr %__b 20787 %vec = insertelement <16 x float> undef, float %load, i32 0 20788 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20789 %2 = fcmp oeq <16 x float> %0, %1 20790 %3 = bitcast i16 %__u to <16 x i1> 20791 %4 = and <16 x i1> %2, %3 20792 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 20793 %6 = bitcast <32 x i1> %5 to i32 20794 ret i32 %6 20795} 20796 20797 20798 20799define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20800; CHECK-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: 20801; CHECK: # %bb.0: # %entry 20802; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 20803; CHECK-NEXT: kmovw %k0, %eax 20804; CHECK-NEXT: vzeroupper 20805; CHECK-NEXT: retq 20806entry: 20807 %0 = bitcast <8 x i64> %__a to <16 x float> 20808 %1 = bitcast <8 x i64> %__b to <16 x float> 20809 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 20810 %3 = bitcast <16 x i1> %2 to i16 20811 %4 = zext i16 %3 to i32 20812 ret i32 %4 20813} 20814 20815define zeroext i32 @test_masked_vcmpoeqps_v16i1_v32i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20816; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: 20817; VLX: # %bb.0: # %entry 20818; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 20819; VLX-NEXT: kmovd %k0, %eax 20820; VLX-NEXT: andl %edi, %eax 20821; VLX-NEXT: vzeroupper 20822; VLX-NEXT: retq 20823; 20824; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_sae_mask: 20825; NoVLX: # %bb.0: # %entry 20826; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 20827; NoVLX-NEXT: kmovw %k0, %eax 20828; NoVLX-NEXT: andl %edi, %eax 20829; NoVLX-NEXT: vzeroupper 20830; NoVLX-NEXT: retq 20831entry: 20832 %0 = bitcast <8 x i64> %__a to <16 x float> 20833 %1 = bitcast <8 x i64> %__b to <16 x float> 20834 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 20835 %3 = bitcast i16 %__u to <16 x i1> 20836 %4 = and <16 x i1> %2, %3 20837 %5 = bitcast <16 x i1> %4 to i16 20838 %6 = zext i16 %5 to i32 20839 ret i32 %6 20840} 20841 20842 20843 20844define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20845; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: 20846; VLX: # %bb.0: # %entry 20847; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20848; VLX-NEXT: kmovq %k0, %rax 20849; VLX-NEXT: vzeroupper 20850; VLX-NEXT: retq 20851; 20852; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: 20853; NoVLX: # %bb.0: # %entry 20854; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20855; NoVLX-NEXT: kmovw %k0, %eax 20856; NoVLX-NEXT: vzeroupper 20857; NoVLX-NEXT: retq 20858entry: 20859 %0 = bitcast <8 x i64> %__a to <16 x float> 20860 %1 = bitcast <8 x i64> %__b to <16 x float> 20861 %2 = fcmp oeq <16 x float> %0, %1 20862 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20863 %4 = bitcast <64 x i1> %3 to i64 20864 ret i64 %4 20865} 20866 20867define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 20868; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: 20869; VLX: # %bb.0: # %entry 20870; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 20871; VLX-NEXT: kmovq %k0, %rax 20872; VLX-NEXT: vzeroupper 20873; VLX-NEXT: retq 20874; 20875; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: 20876; NoVLX: # %bb.0: # %entry 20877; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 20878; NoVLX-NEXT: kmovw %k0, %eax 20879; NoVLX-NEXT: vzeroupper 20880; NoVLX-NEXT: retq 20881entry: 20882 %0 = bitcast <8 x i64> %__a to <16 x float> 20883 %load = load <8 x i64>, ptr %__b 20884 %1 = bitcast <8 x i64> %load to <16 x float> 20885 %2 = fcmp oeq <16 x float> %0, %1 20886 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20887 %4 = bitcast <64 x i1> %3 to i64 20888 ret i64 %4 20889} 20890 20891define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 20892; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: 20893; VLX: # %bb.0: # %entry 20894; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20895; VLX-NEXT: kmovq %k0, %rax 20896; VLX-NEXT: vzeroupper 20897; VLX-NEXT: retq 20898; 20899; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: 20900; NoVLX: # %bb.0: # %entry 20901; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 20902; NoVLX-NEXT: kmovw %k0, %eax 20903; NoVLX-NEXT: vzeroupper 20904; NoVLX-NEXT: retq 20905entry: 20906 %0 = bitcast <8 x i64> %__a to <16 x float> 20907 %load = load float, ptr %__b 20908 %vec = insertelement <16 x float> undef, float %load, i32 0 20909 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20910 %2 = fcmp oeq <16 x float> %0, %1 20911 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20912 %4 = bitcast <64 x i1> %3 to i64 20913 ret i64 %4 20914} 20915 20916define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 20917; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: 20918; VLX: # %bb.0: # %entry 20919; VLX-NEXT: kmovd %edi, %k1 20920; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} 20921; VLX-NEXT: kmovq %k0, %rax 20922; VLX-NEXT: vzeroupper 20923; VLX-NEXT: retq 20924; 20925; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask: 20926; NoVLX: # %bb.0: # %entry 20927; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 20928; NoVLX-NEXT: kmovw %k0, %eax 20929; NoVLX-NEXT: andl %edi, %eax 20930; NoVLX-NEXT: vzeroupper 20931; NoVLX-NEXT: retq 20932entry: 20933 %0 = bitcast <8 x i64> %__a to <16 x float> 20934 %1 = bitcast <8 x i64> %__b to <16 x float> 20935 %2 = fcmp oeq <16 x float> %0, %1 20936 %3 = bitcast i16 %__u to <16 x i1> 20937 %4 = and <16 x i1> %2, %3 20938 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20939 %6 = bitcast <64 x i1> %5 to i64 20940 ret i64 %6 20941} 20942 20943define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 20944; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: 20945; VLX: # %bb.0: # %entry 20946; VLX-NEXT: kmovd %edi, %k1 20947; VLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1} 20948; VLX-NEXT: kmovq %k0, %rax 20949; VLX-NEXT: vzeroupper 20950; VLX-NEXT: retq 20951; 20952; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem: 20953; NoVLX: # %bb.0: # %entry 20954; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 20955; NoVLX-NEXT: kmovw %k0, %eax 20956; NoVLX-NEXT: andl %edi, %eax 20957; NoVLX-NEXT: vzeroupper 20958; NoVLX-NEXT: retq 20959entry: 20960 %0 = bitcast <8 x i64> %__a to <16 x float> 20961 %load = load <8 x i64>, ptr %__b 20962 %1 = bitcast <8 x i64> %load to <16 x float> 20963 %2 = fcmp oeq <16 x float> %0, %1 20964 %3 = bitcast i16 %__u to <16 x i1> 20965 %4 = and <16 x i1> %2, %3 20966 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20967 %6 = bitcast <64 x i1> %5 to i64 20968 ret i64 %6 20969} 20970 20971define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 20972; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: 20973; VLX: # %bb.0: # %entry 20974; VLX-NEXT: kmovd %edi, %k1 20975; VLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} 20976; VLX-NEXT: kmovq %k0, %rax 20977; VLX-NEXT: vzeroupper 20978; VLX-NEXT: retq 20979; 20980; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b: 20981; NoVLX: # %bb.0: # %entry 20982; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 20983; NoVLX-NEXT: kmovw %k0, %eax 20984; NoVLX-NEXT: andl %edi, %eax 20985; NoVLX-NEXT: vzeroupper 20986; NoVLX-NEXT: retq 20987entry: 20988 %0 = bitcast <8 x i64> %__a to <16 x float> 20989 %load = load float, ptr %__b 20990 %vec = insertelement <16 x float> undef, float %load, i32 0 20991 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 20992 %2 = fcmp oeq <16 x float> %0, %1 20993 %3 = bitcast i16 %__u to <16 x i1> 20994 %4 = and <16 x i1> %2, %3 20995 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> <i32 0,i32 1,i32 2,i32 3,i32 4,i32 5,i32 6,i32 7,i32 8,i32 9,i32 10,i32 11,i32 12,i32 13,i32 14,i32 15,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31,i32 16,i32 17,i32 18,i32 19,i32 20,i32 21,i32 22,i32 23,i32 24,i32 25,i32 26,i32 27,i32 28,i32 29,i32 30,i32 31> 20996 %6 = bitcast <64 x i1> %5 to i64 20997 ret i64 %6 20998} 20999 21000 21001 21002define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21003; CHECK-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: 21004; CHECK: # %bb.0: # %entry 21005; CHECK-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21006; CHECK-NEXT: kmovw %k0, %eax 21007; CHECK-NEXT: vzeroupper 21008; CHECK-NEXT: retq 21009entry: 21010 %0 = bitcast <8 x i64> %__a to <16 x float> 21011 %1 = bitcast <8 x i64> %__b to <16 x float> 21012 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 21013 %3 = bitcast <16 x i1> %2 to i16 21014 %4 = zext i16 %3 to i64 21015 ret i64 %4 21016} 21017 21018define zeroext i64 @test_masked_vcmpoeqps_v16i1_v64i1_sae_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 21019; VLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: 21020; VLX: # %bb.0: # %entry 21021; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21022; VLX-NEXT: kmovd %k0, %eax 21023; VLX-NEXT: andl %edi, %eax 21024; VLX-NEXT: vzeroupper 21025; VLX-NEXT: retq 21026; 21027; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_sae_mask: 21028; NoVLX: # %bb.0: # %entry 21029; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 21030; NoVLX-NEXT: kmovw %k0, %eax 21031; NoVLX-NEXT: andl %edi, %eax 21032; NoVLX-NEXT: vzeroupper 21033; NoVLX-NEXT: retq 21034entry: 21035 %0 = bitcast <8 x i64> %__a to <16 x float> 21036 %1 = bitcast <8 x i64> %__b to <16 x float> 21037 %2 = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 21038 %3 = bitcast i16 %__u to <16 x i1> 21039 %4 = and <16 x i1> %2, %3 21040 %5 = bitcast <16 x i1> %4 to i16 21041 %6 = zext i16 %5 to i64 21042 ret i64 %6 21043} 21044 21045 21046 21047declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32) 21048define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21049; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: 21050; VLX: # %bb.0: # %entry 21051; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21052; VLX-NEXT: kmovb %k0, %eax 21053; VLX-NEXT: retq 21054; 21055; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: 21056; NoVLX: # %bb.0: # %entry 21057; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21058; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21059; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21060; NoVLX-NEXT: kmovw %k0, %eax 21061; NoVLX-NEXT: andl $3, %eax 21062; NoVLX-NEXT: vzeroupper 21063; NoVLX-NEXT: retq 21064entry: 21065 %0 = bitcast <2 x i64> %__a to <2 x double> 21066 %1 = bitcast <2 x i64> %__b to <2 x double> 21067 %2 = fcmp oeq <2 x double> %0, %1 21068 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21069 %4 = bitcast <4 x i1> %3 to i4 21070 ret i4 %4 21071} 21072 21073define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21074; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: 21075; VLX: # %bb.0: # %entry 21076; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21077; VLX-NEXT: kmovb %k0, %eax 21078; VLX-NEXT: retq 21079; 21080; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: 21081; NoVLX: # %bb.0: # %entry 21082; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21083; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21084; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21085; NoVLX-NEXT: kmovw %k0, %eax 21086; NoVLX-NEXT: andl $3, %eax 21087; NoVLX-NEXT: vzeroupper 21088; NoVLX-NEXT: retq 21089entry: 21090 %0 = bitcast <2 x i64> %__a to <2 x double> 21091 %load = load <2 x i64>, ptr %__b 21092 %1 = bitcast <2 x i64> %load to <2 x double> 21093 %2 = fcmp oeq <2 x double> %0, %1 21094 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21095 %4 = bitcast <4 x i1> %3 to i4 21096 ret i4 %4 21097} 21098 21099define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21100; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21101; VLX: # %bb.0: # %entry 21102; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21103; VLX-NEXT: kmovb %k0, %eax 21104; VLX-NEXT: retq 21105; 21106; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21107; NoVLX: # %bb.0: # %entry 21108; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21109; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 21110; NoVLX-NEXT: kmovw %k0, %eax 21111; NoVLX-NEXT: andl $3, %eax 21112; NoVLX-NEXT: vzeroupper 21113; NoVLX-NEXT: retq 21114entry: 21115 %0 = bitcast <2 x i64> %__a to <2 x double> 21116 %load = load double, ptr %__b 21117 %vec = insertelement <2 x double> undef, double %load, i32 0 21118 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21119 %2 = fcmp oeq <2 x double> %0, %1 21120 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21121 %4 = bitcast <4 x i1> %3 to i4 21122 ret i4 %4 21123} 21124 21125define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21126; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: 21127; VLX: # %bb.0: # %entry 21128; VLX-NEXT: kmovd %edi, %k1 21129; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21130; VLX-NEXT: kmovb %k0, %eax 21131; VLX-NEXT: retq 21132; 21133; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask: 21134; NoVLX: # %bb.0: # %entry 21135; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21136; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21137; NoVLX-NEXT: kmovw %edi, %k1 21138; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21139; NoVLX-NEXT: kmovw %k0, %eax 21140; NoVLX-NEXT: andl $3, %eax 21141; NoVLX-NEXT: vzeroupper 21142; NoVLX-NEXT: retq 21143entry: 21144 %0 = bitcast <2 x i64> %__a to <2 x double> 21145 %1 = bitcast <2 x i64> %__b to <2 x double> 21146 %2 = fcmp oeq <2 x double> %0, %1 21147 %3 = bitcast i2 %__u to <2 x i1> 21148 %4 = and <2 x i1> %2, %3 21149 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21150 %6 = bitcast <4 x i1> %5 to i4 21151 ret i4 %6 21152} 21153 21154define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21155; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: 21156; VLX: # %bb.0: # %entry 21157; VLX-NEXT: kmovd %edi, %k1 21158; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21159; VLX-NEXT: kmovb %k0, %eax 21160; VLX-NEXT: retq 21161; 21162; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem: 21163; NoVLX: # %bb.0: # %entry 21164; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21165; NoVLX-NEXT: kmovw %edi, %k1 21166; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21167; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21168; NoVLX-NEXT: kmovw %k0, %eax 21169; NoVLX-NEXT: andl $3, %eax 21170; NoVLX-NEXT: vzeroupper 21171; NoVLX-NEXT: retq 21172entry: 21173 %0 = bitcast <2 x i64> %__a to <2 x double> 21174 %load = load <2 x i64>, ptr %__b 21175 %1 = bitcast <2 x i64> %load to <2 x double> 21176 %2 = fcmp oeq <2 x double> %0, %1 21177 %3 = bitcast i2 %__u to <2 x i1> 21178 %4 = and <2 x i1> %2, %3 21179 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21180 %6 = bitcast <4 x i1> %5 to i4 21181 ret i4 %6 21182} 21183 21184define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21185; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21186; VLX: # %bb.0: # %entry 21187; VLX-NEXT: kmovd %edi, %k1 21188; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21189; VLX-NEXT: kmovb %k0, %eax 21190; VLX-NEXT: retq 21191; 21192; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b: 21193; NoVLX: # %bb.0: # %entry 21194; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21195; NoVLX-NEXT: kmovw %edi, %k1 21196; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 21197; NoVLX-NEXT: kmovw %k0, %eax 21198; NoVLX-NEXT: andl $3, %eax 21199; NoVLX-NEXT: vzeroupper 21200; NoVLX-NEXT: retq 21201entry: 21202 %0 = bitcast <2 x i64> %__a to <2 x double> 21203 %load = load double, ptr %__b 21204 %vec = insertelement <2 x double> undef, double %load, i32 0 21205 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21206 %2 = fcmp oeq <2 x double> %0, %1 21207 %3 = bitcast i2 %__u to <2 x i1> 21208 %4 = and <2 x i1> %2, %3 21209 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 21210 %6 = bitcast <4 x i1> %5 to i4 21211 ret i4 %6 21212} 21213 21214 21215 21216define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21217; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: 21218; VLX: # %bb.0: # %entry 21219; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21220; VLX-NEXT: kmovd %k0, %eax 21221; VLX-NEXT: # kill: def $al killed $al killed $eax 21222; VLX-NEXT: retq 21223; 21224; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: 21225; NoVLX: # %bb.0: # %entry 21226; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21227; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21228; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21229; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21230; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21231; NoVLX-NEXT: kmovw %k0, %eax 21232; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21233; NoVLX-NEXT: vzeroupper 21234; NoVLX-NEXT: retq 21235entry: 21236 %0 = bitcast <2 x i64> %__a to <2 x double> 21237 %1 = bitcast <2 x i64> %__b to <2 x double> 21238 %2 = fcmp oeq <2 x double> %0, %1 21239 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21240 %4 = bitcast <8 x i1> %3 to i8 21241 ret i8 %4 21242} 21243 21244define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21245; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: 21246; VLX: # %bb.0: # %entry 21247; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21248; VLX-NEXT: kmovd %k0, %eax 21249; VLX-NEXT: # kill: def $al killed $al killed $eax 21250; VLX-NEXT: retq 21251; 21252; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: 21253; NoVLX: # %bb.0: # %entry 21254; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21255; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21256; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21257; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21258; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21259; NoVLX-NEXT: kmovw %k0, %eax 21260; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21261; NoVLX-NEXT: vzeroupper 21262; NoVLX-NEXT: retq 21263entry: 21264 %0 = bitcast <2 x i64> %__a to <2 x double> 21265 %load = load <2 x i64>, ptr %__b 21266 %1 = bitcast <2 x i64> %load to <2 x double> 21267 %2 = fcmp oeq <2 x double> %0, %1 21268 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21269 %4 = bitcast <8 x i1> %3 to i8 21270 ret i8 %4 21271} 21272 21273define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21274; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21275; VLX: # %bb.0: # %entry 21276; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21277; VLX-NEXT: kmovd %k0, %eax 21278; VLX-NEXT: # kill: def $al killed $al killed $eax 21279; VLX-NEXT: retq 21280; 21281; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21282; NoVLX: # %bb.0: # %entry 21283; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21284; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 21285; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21286; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21287; NoVLX-NEXT: kmovw %k0, %eax 21288; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21289; NoVLX-NEXT: vzeroupper 21290; NoVLX-NEXT: retq 21291entry: 21292 %0 = bitcast <2 x i64> %__a to <2 x double> 21293 %load = load double, ptr %__b 21294 %vec = insertelement <2 x double> undef, double %load, i32 0 21295 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21296 %2 = fcmp oeq <2 x double> %0, %1 21297 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21298 %4 = bitcast <8 x i1> %3 to i8 21299 ret i8 %4 21300} 21301 21302define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21303; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: 21304; VLX: # %bb.0: # %entry 21305; VLX-NEXT: kmovd %edi, %k1 21306; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21307; VLX-NEXT: kmovd %k0, %eax 21308; VLX-NEXT: # kill: def $al killed $al killed $eax 21309; VLX-NEXT: retq 21310; 21311; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask: 21312; NoVLX: # %bb.0: # %entry 21313; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21314; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21315; NoVLX-NEXT: kmovw %edi, %k1 21316; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21317; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21318; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21319; NoVLX-NEXT: kmovw %k0, %eax 21320; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21321; NoVLX-NEXT: vzeroupper 21322; NoVLX-NEXT: retq 21323entry: 21324 %0 = bitcast <2 x i64> %__a to <2 x double> 21325 %1 = bitcast <2 x i64> %__b to <2 x double> 21326 %2 = fcmp oeq <2 x double> %0, %1 21327 %3 = bitcast i2 %__u to <2 x i1> 21328 %4 = and <2 x i1> %2, %3 21329 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21330 %6 = bitcast <8 x i1> %5 to i8 21331 ret i8 %6 21332} 21333 21334define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21335; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: 21336; VLX: # %bb.0: # %entry 21337; VLX-NEXT: kmovd %edi, %k1 21338; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21339; VLX-NEXT: kmovd %k0, %eax 21340; VLX-NEXT: # kill: def $al killed $al killed $eax 21341; VLX-NEXT: retq 21342; 21343; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem: 21344; NoVLX: # %bb.0: # %entry 21345; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21346; NoVLX-NEXT: kmovw %edi, %k1 21347; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21348; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21349; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21350; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21351; NoVLX-NEXT: kmovw %k0, %eax 21352; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21353; NoVLX-NEXT: vzeroupper 21354; NoVLX-NEXT: retq 21355entry: 21356 %0 = bitcast <2 x i64> %__a to <2 x double> 21357 %load = load <2 x i64>, ptr %__b 21358 %1 = bitcast <2 x i64> %load to <2 x double> 21359 %2 = fcmp oeq <2 x double> %0, %1 21360 %3 = bitcast i2 %__u to <2 x i1> 21361 %4 = and <2 x i1> %2, %3 21362 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21363 %6 = bitcast <8 x i1> %5 to i8 21364 ret i8 %6 21365} 21366 21367define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21368; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21369; VLX: # %bb.0: # %entry 21370; VLX-NEXT: kmovd %edi, %k1 21371; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21372; VLX-NEXT: kmovd %k0, %eax 21373; VLX-NEXT: # kill: def $al killed $al killed $eax 21374; VLX-NEXT: retq 21375; 21376; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b: 21377; NoVLX: # %bb.0: # %entry 21378; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21379; NoVLX-NEXT: kmovw %edi, %k1 21380; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 21381; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21382; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21383; NoVLX-NEXT: kmovw %k0, %eax 21384; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21385; NoVLX-NEXT: vzeroupper 21386; NoVLX-NEXT: retq 21387entry: 21388 %0 = bitcast <2 x i64> %__a to <2 x double> 21389 %load = load double, ptr %__b 21390 %vec = insertelement <2 x double> undef, double %load, i32 0 21391 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21392 %2 = fcmp oeq <2 x double> %0, %1 21393 %3 = bitcast i2 %__u to <2 x i1> 21394 %4 = and <2 x i1> %2, %3 21395 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21396 %6 = bitcast <8 x i1> %5 to i8 21397 ret i8 %6 21398} 21399 21400 21401 21402define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21403; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: 21404; VLX: # %bb.0: # %entry 21405; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21406; VLX-NEXT: kmovd %k0, %eax 21407; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21408; VLX-NEXT: retq 21409; 21410; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: 21411; NoVLX: # %bb.0: # %entry 21412; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21413; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21414; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21415; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21416; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21417; NoVLX-NEXT: kmovw %k0, %eax 21418; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21419; NoVLX-NEXT: vzeroupper 21420; NoVLX-NEXT: retq 21421entry: 21422 %0 = bitcast <2 x i64> %__a to <2 x double> 21423 %1 = bitcast <2 x i64> %__b to <2 x double> 21424 %2 = fcmp oeq <2 x double> %0, %1 21425 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21426 %4 = bitcast <16 x i1> %3 to i16 21427 ret i16 %4 21428} 21429 21430define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21431; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: 21432; VLX: # %bb.0: # %entry 21433; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21434; VLX-NEXT: kmovd %k0, %eax 21435; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21436; VLX-NEXT: retq 21437; 21438; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: 21439; NoVLX: # %bb.0: # %entry 21440; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21441; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21442; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21443; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21444; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21445; NoVLX-NEXT: kmovw %k0, %eax 21446; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21447; NoVLX-NEXT: vzeroupper 21448; NoVLX-NEXT: retq 21449entry: 21450 %0 = bitcast <2 x i64> %__a to <2 x double> 21451 %load = load <2 x i64>, ptr %__b 21452 %1 = bitcast <2 x i64> %load to <2 x double> 21453 %2 = fcmp oeq <2 x double> %0, %1 21454 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21455 %4 = bitcast <16 x i1> %3 to i16 21456 ret i16 %4 21457} 21458 21459define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21460; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21461; VLX: # %bb.0: # %entry 21462; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21463; VLX-NEXT: kmovd %k0, %eax 21464; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21465; VLX-NEXT: retq 21466; 21467; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21468; NoVLX: # %bb.0: # %entry 21469; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21470; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 21471; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21472; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21473; NoVLX-NEXT: kmovw %k0, %eax 21474; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21475; NoVLX-NEXT: vzeroupper 21476; NoVLX-NEXT: retq 21477entry: 21478 %0 = bitcast <2 x i64> %__a to <2 x double> 21479 %load = load double, ptr %__b 21480 %vec = insertelement <2 x double> undef, double %load, i32 0 21481 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21482 %2 = fcmp oeq <2 x double> %0, %1 21483 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21484 %4 = bitcast <16 x i1> %3 to i16 21485 ret i16 %4 21486} 21487 21488define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21489; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: 21490; VLX: # %bb.0: # %entry 21491; VLX-NEXT: kmovd %edi, %k1 21492; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21493; VLX-NEXT: kmovd %k0, %eax 21494; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21495; VLX-NEXT: retq 21496; 21497; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask: 21498; NoVLX: # %bb.0: # %entry 21499; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21500; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21501; NoVLX-NEXT: kmovw %edi, %k1 21502; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21503; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21504; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21505; NoVLX-NEXT: kmovw %k0, %eax 21506; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21507; NoVLX-NEXT: vzeroupper 21508; NoVLX-NEXT: retq 21509entry: 21510 %0 = bitcast <2 x i64> %__a to <2 x double> 21511 %1 = bitcast <2 x i64> %__b to <2 x double> 21512 %2 = fcmp oeq <2 x double> %0, %1 21513 %3 = bitcast i2 %__u to <2 x i1> 21514 %4 = and <2 x i1> %2, %3 21515 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21516 %6 = bitcast <16 x i1> %5 to i16 21517 ret i16 %6 21518} 21519 21520define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21521; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: 21522; VLX: # %bb.0: # %entry 21523; VLX-NEXT: kmovd %edi, %k1 21524; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21525; VLX-NEXT: kmovd %k0, %eax 21526; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21527; VLX-NEXT: retq 21528; 21529; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem: 21530; NoVLX: # %bb.0: # %entry 21531; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21532; NoVLX-NEXT: kmovw %edi, %k1 21533; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21534; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21535; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21536; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21537; NoVLX-NEXT: kmovw %k0, %eax 21538; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21539; NoVLX-NEXT: vzeroupper 21540; NoVLX-NEXT: retq 21541entry: 21542 %0 = bitcast <2 x i64> %__a to <2 x double> 21543 %load = load <2 x i64>, ptr %__b 21544 %1 = bitcast <2 x i64> %load to <2 x double> 21545 %2 = fcmp oeq <2 x double> %0, %1 21546 %3 = bitcast i2 %__u to <2 x i1> 21547 %4 = and <2 x i1> %2, %3 21548 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21549 %6 = bitcast <16 x i1> %5 to i16 21550 ret i16 %6 21551} 21552 21553define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21554; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21555; VLX: # %bb.0: # %entry 21556; VLX-NEXT: kmovd %edi, %k1 21557; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21558; VLX-NEXT: kmovd %k0, %eax 21559; VLX-NEXT: # kill: def $ax killed $ax killed $eax 21560; VLX-NEXT: retq 21561; 21562; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b: 21563; NoVLX: # %bb.0: # %entry 21564; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21565; NoVLX-NEXT: kmovw %edi, %k1 21566; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 21567; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21568; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21569; NoVLX-NEXT: kmovw %k0, %eax 21570; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 21571; NoVLX-NEXT: vzeroupper 21572; NoVLX-NEXT: retq 21573entry: 21574 %0 = bitcast <2 x i64> %__a to <2 x double> 21575 %load = load double, ptr %__b 21576 %vec = insertelement <2 x double> undef, double %load, i32 0 21577 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21578 %2 = fcmp oeq <2 x double> %0, %1 21579 %3 = bitcast i2 %__u to <2 x i1> 21580 %4 = and <2 x i1> %2, %3 21581 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21582 %6 = bitcast <16 x i1> %5 to i16 21583 ret i16 %6 21584} 21585 21586 21587 21588define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21589; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: 21590; VLX: # %bb.0: # %entry 21591; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21592; VLX-NEXT: kmovd %k0, %eax 21593; VLX-NEXT: retq 21594; 21595; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: 21596; NoVLX: # %bb.0: # %entry 21597; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21598; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21599; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21600; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21601; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21602; NoVLX-NEXT: kmovw %k0, %eax 21603; NoVLX-NEXT: vzeroupper 21604; NoVLX-NEXT: retq 21605entry: 21606 %0 = bitcast <2 x i64> %__a to <2 x double> 21607 %1 = bitcast <2 x i64> %__b to <2 x double> 21608 %2 = fcmp oeq <2 x double> %0, %1 21609 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21610 %4 = bitcast <32 x i1> %3 to i32 21611 ret i32 %4 21612} 21613 21614define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21615; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: 21616; VLX: # %bb.0: # %entry 21617; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21618; VLX-NEXT: kmovd %k0, %eax 21619; VLX-NEXT: retq 21620; 21621; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: 21622; NoVLX: # %bb.0: # %entry 21623; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21624; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21625; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21626; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21627; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21628; NoVLX-NEXT: kmovw %k0, %eax 21629; NoVLX-NEXT: vzeroupper 21630; NoVLX-NEXT: retq 21631entry: 21632 %0 = bitcast <2 x i64> %__a to <2 x double> 21633 %load = load <2 x i64>, ptr %__b 21634 %1 = bitcast <2 x i64> %load to <2 x double> 21635 %2 = fcmp oeq <2 x double> %0, %1 21636 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21637 %4 = bitcast <32 x i1> %3 to i32 21638 ret i32 %4 21639} 21640 21641define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21642; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 21643; VLX: # %bb.0: # %entry 21644; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21645; VLX-NEXT: kmovd %k0, %eax 21646; VLX-NEXT: retq 21647; 21648; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 21649; NoVLX: # %bb.0: # %entry 21650; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21651; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 21652; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21653; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21654; NoVLX-NEXT: kmovw %k0, %eax 21655; NoVLX-NEXT: vzeroupper 21656; NoVLX-NEXT: retq 21657entry: 21658 %0 = bitcast <2 x i64> %__a to <2 x double> 21659 %load = load double, ptr %__b 21660 %vec = insertelement <2 x double> undef, double %load, i32 0 21661 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21662 %2 = fcmp oeq <2 x double> %0, %1 21663 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21664 %4 = bitcast <32 x i1> %3 to i32 21665 ret i32 %4 21666} 21667 21668define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21669; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: 21670; VLX: # %bb.0: # %entry 21671; VLX-NEXT: kmovd %edi, %k1 21672; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21673; VLX-NEXT: kmovd %k0, %eax 21674; VLX-NEXT: retq 21675; 21676; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask: 21677; NoVLX: # %bb.0: # %entry 21678; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21679; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21680; NoVLX-NEXT: kmovw %edi, %k1 21681; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21682; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21683; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21684; NoVLX-NEXT: kmovw %k0, %eax 21685; NoVLX-NEXT: vzeroupper 21686; NoVLX-NEXT: retq 21687entry: 21688 %0 = bitcast <2 x i64> %__a to <2 x double> 21689 %1 = bitcast <2 x i64> %__b to <2 x double> 21690 %2 = fcmp oeq <2 x double> %0, %1 21691 %3 = bitcast i2 %__u to <2 x i1> 21692 %4 = and <2 x i1> %2, %3 21693 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21694 %6 = bitcast <32 x i1> %5 to i32 21695 ret i32 %6 21696} 21697 21698define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21699; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: 21700; VLX: # %bb.0: # %entry 21701; VLX-NEXT: kmovd %edi, %k1 21702; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21703; VLX-NEXT: kmovd %k0, %eax 21704; VLX-NEXT: retq 21705; 21706; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem: 21707; NoVLX: # %bb.0: # %entry 21708; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21709; NoVLX-NEXT: kmovw %edi, %k1 21710; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21711; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21712; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21713; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21714; NoVLX-NEXT: kmovw %k0, %eax 21715; NoVLX-NEXT: vzeroupper 21716; NoVLX-NEXT: retq 21717entry: 21718 %0 = bitcast <2 x i64> %__a to <2 x double> 21719 %load = load <2 x i64>, ptr %__b 21720 %1 = bitcast <2 x i64> %load to <2 x double> 21721 %2 = fcmp oeq <2 x double> %0, %1 21722 %3 = bitcast i2 %__u to <2 x i1> 21723 %4 = and <2 x i1> %2, %3 21724 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21725 %6 = bitcast <32 x i1> %5 to i32 21726 ret i32 %6 21727} 21728 21729define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21730; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 21731; VLX: # %bb.0: # %entry 21732; VLX-NEXT: kmovd %edi, %k1 21733; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21734; VLX-NEXT: kmovd %k0, %eax 21735; VLX-NEXT: retq 21736; 21737; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b: 21738; NoVLX: # %bb.0: # %entry 21739; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21740; NoVLX-NEXT: kmovw %edi, %k1 21741; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 21742; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21743; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21744; NoVLX-NEXT: kmovw %k0, %eax 21745; NoVLX-NEXT: vzeroupper 21746; NoVLX-NEXT: retq 21747entry: 21748 %0 = bitcast <2 x i64> %__a to <2 x double> 21749 %load = load double, ptr %__b 21750 %vec = insertelement <2 x double> undef, double %load, i32 0 21751 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21752 %2 = fcmp oeq <2 x double> %0, %1 21753 %3 = bitcast i2 %__u to <2 x i1> 21754 %4 = and <2 x i1> %2, %3 21755 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21756 %6 = bitcast <32 x i1> %5 to i32 21757 ret i32 %6 21758} 21759 21760 21761 21762define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21763; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: 21764; VLX: # %bb.0: # %entry 21765; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 21766; VLX-NEXT: kmovq %k0, %rax 21767; VLX-NEXT: retq 21768; 21769; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: 21770; NoVLX: # %bb.0: # %entry 21771; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21772; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21773; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21774; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21775; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21776; NoVLX-NEXT: kmovw %k0, %eax 21777; NoVLX-NEXT: vzeroupper 21778; NoVLX-NEXT: retq 21779entry: 21780 %0 = bitcast <2 x i64> %__a to <2 x double> 21781 %1 = bitcast <2 x i64> %__b to <2 x double> 21782 %2 = fcmp oeq <2 x double> %0, %1 21783 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21784 %4 = bitcast <64 x i1> %3 to i64 21785 ret i64 %4 21786} 21787 21788define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21789; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: 21790; VLX: # %bb.0: # %entry 21791; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 21792; VLX-NEXT: kmovq %k0, %rax 21793; VLX-NEXT: retq 21794; 21795; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: 21796; NoVLX: # %bb.0: # %entry 21797; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21798; NoVLX-NEXT: vmovapd (%rdi), %xmm1 21799; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21800; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21801; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21802; NoVLX-NEXT: kmovw %k0, %eax 21803; NoVLX-NEXT: vzeroupper 21804; NoVLX-NEXT: retq 21805entry: 21806 %0 = bitcast <2 x i64> %__a to <2 x double> 21807 %load = load <2 x i64>, ptr %__b 21808 %1 = bitcast <2 x i64> %load to <2 x double> 21809 %2 = fcmp oeq <2 x double> %0, %1 21810 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21811 %4 = bitcast <64 x i1> %3 to i64 21812 ret i64 %4 21813} 21814 21815define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, ptr %__b) local_unnamed_addr { 21816; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 21817; VLX: # %bb.0: # %entry 21818; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 21819; VLX-NEXT: kmovq %k0, %rax 21820; VLX-NEXT: retq 21821; 21822; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 21823; NoVLX: # %bb.0: # %entry 21824; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21825; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 21826; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21827; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21828; NoVLX-NEXT: kmovw %k0, %eax 21829; NoVLX-NEXT: vzeroupper 21830; NoVLX-NEXT: retq 21831entry: 21832 %0 = bitcast <2 x i64> %__a to <2 x double> 21833 %load = load double, ptr %__b 21834 %vec = insertelement <2 x double> undef, double %load, i32 0 21835 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21836 %2 = fcmp oeq <2 x double> %0, %1 21837 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21838 %4 = bitcast <64 x i1> %3 to i64 21839 ret i64 %4 21840} 21841 21842define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { 21843; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: 21844; VLX: # %bb.0: # %entry 21845; VLX-NEXT: kmovd %edi, %k1 21846; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1} 21847; VLX-NEXT: kmovq %k0, %rax 21848; VLX-NEXT: retq 21849; 21850; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask: 21851; NoVLX: # %bb.0: # %entry 21852; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 21853; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21854; NoVLX-NEXT: kmovw %edi, %k1 21855; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21856; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21857; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21858; NoVLX-NEXT: kmovw %k0, %eax 21859; NoVLX-NEXT: vzeroupper 21860; NoVLX-NEXT: retq 21861entry: 21862 %0 = bitcast <2 x i64> %__a to <2 x double> 21863 %1 = bitcast <2 x i64> %__b to <2 x double> 21864 %2 = fcmp oeq <2 x double> %0, %1 21865 %3 = bitcast i2 %__u to <2 x i1> 21866 %4 = and <2 x i1> %2, %3 21867 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21868 %6 = bitcast <64 x i1> %5 to i64 21869 ret i64 %6 21870} 21871 21872define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21873; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: 21874; VLX: # %bb.0: # %entry 21875; VLX-NEXT: kmovd %edi, %k1 21876; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1} 21877; VLX-NEXT: kmovq %k0, %rax 21878; VLX-NEXT: retq 21879; 21880; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem: 21881; NoVLX: # %bb.0: # %entry 21882; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21883; NoVLX-NEXT: kmovw %edi, %k1 21884; NoVLX-NEXT: vmovapd (%rsi), %xmm1 21885; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 21886; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21887; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21888; NoVLX-NEXT: kmovw %k0, %eax 21889; NoVLX-NEXT: vzeroupper 21890; NoVLX-NEXT: retq 21891entry: 21892 %0 = bitcast <2 x i64> %__a to <2 x double> 21893 %load = load <2 x i64>, ptr %__b 21894 %1 = bitcast <2 x i64> %load to <2 x double> 21895 %2 = fcmp oeq <2 x double> %0, %1 21896 %3 = bitcast i2 %__u to <2 x i1> 21897 %4 = and <2 x i1> %2, %3 21898 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21899 %6 = bitcast <64 x i1> %5 to i64 21900 ret i64 %6 21901} 21902 21903define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, ptr %__b) local_unnamed_addr { 21904; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 21905; VLX: # %bb.0: # %entry 21906; VLX-NEXT: kmovd %edi, %k1 21907; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1} 21908; VLX-NEXT: kmovq %k0, %rax 21909; VLX-NEXT: retq 21910; 21911; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b: 21912; NoVLX: # %bb.0: # %entry 21913; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 21914; NoVLX-NEXT: kmovw %edi, %k1 21915; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 21916; NoVLX-NEXT: kshiftlw $14, %k0, %k0 21917; NoVLX-NEXT: kshiftrw $14, %k0, %k0 21918; NoVLX-NEXT: kmovw %k0, %eax 21919; NoVLX-NEXT: vzeroupper 21920; NoVLX-NEXT: retq 21921entry: 21922 %0 = bitcast <2 x i64> %__a to <2 x double> 21923 %load = load double, ptr %__b 21924 %vec = insertelement <2 x double> undef, double %load, i32 0 21925 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0> 21926 %2 = fcmp oeq <2 x double> %0, %1 21927 %3 = bitcast i2 %__u to <2 x i1> 21928 %4 = and <2 x i1> %2, %3 21929 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> 21930 %6 = bitcast <64 x i1> %5 to i64 21931 ret i64 %6 21932} 21933 21934 21935 21936define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 21937; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: 21938; VLX: # %bb.0: # %entry 21939; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 21940; VLX-NEXT: kmovd %k0, %eax 21941; VLX-NEXT: # kill: def $al killed $al killed $eax 21942; VLX-NEXT: vzeroupper 21943; VLX-NEXT: retq 21944; 21945; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: 21946; NoVLX: # %bb.0: # %entry 21947; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 21948; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 21949; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21950; NoVLX-NEXT: kshiftlw $12, %k0, %k0 21951; NoVLX-NEXT: kshiftrw $12, %k0, %k0 21952; NoVLX-NEXT: kmovw %k0, %eax 21953; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21954; NoVLX-NEXT: vzeroupper 21955; NoVLX-NEXT: retq 21956entry: 21957 %0 = bitcast <4 x i64> %__a to <4 x double> 21958 %1 = bitcast <4 x i64> %__b to <4 x double> 21959 %2 = fcmp oeq <4 x double> %0, %1 21960 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 21961 %4 = bitcast <8 x i1> %3 to i8 21962 ret i8 %4 21963} 21964 21965define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 21966; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: 21967; VLX: # %bb.0: # %entry 21968; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 21969; VLX-NEXT: kmovd %k0, %eax 21970; VLX-NEXT: # kill: def $al killed $al killed $eax 21971; VLX-NEXT: vzeroupper 21972; VLX-NEXT: retq 21973; 21974; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: 21975; NoVLX: # %bb.0: # %entry 21976; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 21977; NoVLX-NEXT: vmovapd (%rdi), %ymm1 21978; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 21979; NoVLX-NEXT: kshiftlw $12, %k0, %k0 21980; NoVLX-NEXT: kshiftrw $12, %k0, %k0 21981; NoVLX-NEXT: kmovw %k0, %eax 21982; NoVLX-NEXT: # kill: def $al killed $al killed $eax 21983; NoVLX-NEXT: vzeroupper 21984; NoVLX-NEXT: retq 21985entry: 21986 %0 = bitcast <4 x i64> %__a to <4 x double> 21987 %load = load <4 x i64>, ptr %__b 21988 %1 = bitcast <4 x i64> %load to <4 x double> 21989 %2 = fcmp oeq <4 x double> %0, %1 21990 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 21991 %4 = bitcast <8 x i1> %3 to i8 21992 ret i8 %4 21993} 21994 21995define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 21996; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 21997; VLX: # %bb.0: # %entry 21998; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 21999; VLX-NEXT: kmovd %k0, %eax 22000; VLX-NEXT: # kill: def $al killed $al killed $eax 22001; VLX-NEXT: vzeroupper 22002; VLX-NEXT: retq 22003; 22004; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22005; NoVLX: # %bb.0: # %entry 22006; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22007; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22008; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22009; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22010; NoVLX-NEXT: kmovw %k0, %eax 22011; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22012; NoVLX-NEXT: vzeroupper 22013; NoVLX-NEXT: retq 22014entry: 22015 %0 = bitcast <4 x i64> %__a to <4 x double> 22016 %load = load double, ptr %__b 22017 %vec = insertelement <4 x double> undef, double %load, i32 0 22018 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22019 %2 = fcmp oeq <4 x double> %0, %1 22020 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22021 %4 = bitcast <8 x i1> %3 to i8 22022 ret i8 %4 22023} 22024 22025define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22026; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: 22027; VLX: # %bb.0: # %entry 22028; VLX-NEXT: kmovd %edi, %k1 22029; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22030; VLX-NEXT: kmovd %k0, %eax 22031; VLX-NEXT: # kill: def $al killed $al killed $eax 22032; VLX-NEXT: vzeroupper 22033; VLX-NEXT: retq 22034; 22035; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask: 22036; NoVLX: # %bb.0: # %entry 22037; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22038; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22039; NoVLX-NEXT: kmovw %edi, %k1 22040; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22041; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22042; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22043; NoVLX-NEXT: kmovw %k0, %eax 22044; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22045; NoVLX-NEXT: vzeroupper 22046; NoVLX-NEXT: retq 22047entry: 22048 %0 = bitcast <4 x i64> %__a to <4 x double> 22049 %1 = bitcast <4 x i64> %__b to <4 x double> 22050 %2 = fcmp oeq <4 x double> %0, %1 22051 %3 = bitcast i4 %__u to <4 x i1> 22052 %4 = and <4 x i1> %2, %3 22053 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22054 %6 = bitcast <8 x i1> %5 to i8 22055 ret i8 %6 22056} 22057 22058define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22059; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: 22060; VLX: # %bb.0: # %entry 22061; VLX-NEXT: kmovd %edi, %k1 22062; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22063; VLX-NEXT: kmovd %k0, %eax 22064; VLX-NEXT: # kill: def $al killed $al killed $eax 22065; VLX-NEXT: vzeroupper 22066; VLX-NEXT: retq 22067; 22068; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem: 22069; NoVLX: # %bb.0: # %entry 22070; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22071; NoVLX-NEXT: kmovw %edi, %k1 22072; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22073; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22074; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22075; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22076; NoVLX-NEXT: kmovw %k0, %eax 22077; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22078; NoVLX-NEXT: vzeroupper 22079; NoVLX-NEXT: retq 22080entry: 22081 %0 = bitcast <4 x i64> %__a to <4 x double> 22082 %load = load <4 x i64>, ptr %__b 22083 %1 = bitcast <4 x i64> %load to <4 x double> 22084 %2 = fcmp oeq <4 x double> %0, %1 22085 %3 = bitcast i4 %__u to <4 x i1> 22086 %4 = and <4 x i1> %2, %3 22087 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22088 %6 = bitcast <8 x i1> %5 to i8 22089 ret i8 %6 22090} 22091 22092define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22093; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22094; VLX: # %bb.0: # %entry 22095; VLX-NEXT: kmovd %edi, %k1 22096; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22097; VLX-NEXT: kmovd %k0, %eax 22098; VLX-NEXT: # kill: def $al killed $al killed $eax 22099; VLX-NEXT: vzeroupper 22100; VLX-NEXT: retq 22101; 22102; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b: 22103; NoVLX: # %bb.0: # %entry 22104; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22105; NoVLX-NEXT: kmovw %edi, %k1 22106; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22107; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22108; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22109; NoVLX-NEXT: kmovw %k0, %eax 22110; NoVLX-NEXT: # kill: def $al killed $al killed $eax 22111; NoVLX-NEXT: vzeroupper 22112; NoVLX-NEXT: retq 22113entry: 22114 %0 = bitcast <4 x i64> %__a to <4 x double> 22115 %load = load double, ptr %__b 22116 %vec = insertelement <4 x double> undef, double %load, i32 0 22117 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22118 %2 = fcmp oeq <4 x double> %0, %1 22119 %3 = bitcast i4 %__u to <4 x i1> 22120 %4 = and <4 x i1> %2, %3 22121 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 22122 %6 = bitcast <8 x i1> %5 to i8 22123 ret i8 %6 22124} 22125 22126 22127 22128define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22129; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: 22130; VLX: # %bb.0: # %entry 22131; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22132; VLX-NEXT: kmovd %k0, %eax 22133; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22134; VLX-NEXT: vzeroupper 22135; VLX-NEXT: retq 22136; 22137; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: 22138; NoVLX: # %bb.0: # %entry 22139; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22140; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22141; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22142; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22143; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22144; NoVLX-NEXT: kmovw %k0, %eax 22145; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22146; NoVLX-NEXT: vzeroupper 22147; NoVLX-NEXT: retq 22148entry: 22149 %0 = bitcast <4 x i64> %__a to <4 x double> 22150 %1 = bitcast <4 x i64> %__b to <4 x double> 22151 %2 = fcmp oeq <4 x double> %0, %1 22152 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22153 %4 = bitcast <16 x i1> %3 to i16 22154 ret i16 %4 22155} 22156 22157define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22158; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: 22159; VLX: # %bb.0: # %entry 22160; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22161; VLX-NEXT: kmovd %k0, %eax 22162; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22163; VLX-NEXT: vzeroupper 22164; VLX-NEXT: retq 22165; 22166; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: 22167; NoVLX: # %bb.0: # %entry 22168; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22169; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22170; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22171; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22172; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22173; NoVLX-NEXT: kmovw %k0, %eax 22174; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22175; NoVLX-NEXT: vzeroupper 22176; NoVLX-NEXT: retq 22177entry: 22178 %0 = bitcast <4 x i64> %__a to <4 x double> 22179 %load = load <4 x i64>, ptr %__b 22180 %1 = bitcast <4 x i64> %load to <4 x double> 22181 %2 = fcmp oeq <4 x double> %0, %1 22182 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22183 %4 = bitcast <16 x i1> %3 to i16 22184 ret i16 %4 22185} 22186 22187define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22188; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22189; VLX: # %bb.0: # %entry 22190; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22191; VLX-NEXT: kmovd %k0, %eax 22192; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22193; VLX-NEXT: vzeroupper 22194; VLX-NEXT: retq 22195; 22196; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22197; NoVLX: # %bb.0: # %entry 22198; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22199; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22200; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22201; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22202; NoVLX-NEXT: kmovw %k0, %eax 22203; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22204; NoVLX-NEXT: vzeroupper 22205; NoVLX-NEXT: retq 22206entry: 22207 %0 = bitcast <4 x i64> %__a to <4 x double> 22208 %load = load double, ptr %__b 22209 %vec = insertelement <4 x double> undef, double %load, i32 0 22210 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22211 %2 = fcmp oeq <4 x double> %0, %1 22212 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22213 %4 = bitcast <16 x i1> %3 to i16 22214 ret i16 %4 22215} 22216 22217define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22218; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: 22219; VLX: # %bb.0: # %entry 22220; VLX-NEXT: kmovd %edi, %k1 22221; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22222; VLX-NEXT: kmovd %k0, %eax 22223; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22224; VLX-NEXT: vzeroupper 22225; VLX-NEXT: retq 22226; 22227; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask: 22228; NoVLX: # %bb.0: # %entry 22229; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22230; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22231; NoVLX-NEXT: kmovw %edi, %k1 22232; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22233; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22234; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22235; NoVLX-NEXT: kmovw %k0, %eax 22236; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22237; NoVLX-NEXT: vzeroupper 22238; NoVLX-NEXT: retq 22239entry: 22240 %0 = bitcast <4 x i64> %__a to <4 x double> 22241 %1 = bitcast <4 x i64> %__b to <4 x double> 22242 %2 = fcmp oeq <4 x double> %0, %1 22243 %3 = bitcast i4 %__u to <4 x i1> 22244 %4 = and <4 x i1> %2, %3 22245 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22246 %6 = bitcast <16 x i1> %5 to i16 22247 ret i16 %6 22248} 22249 22250define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22251; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: 22252; VLX: # %bb.0: # %entry 22253; VLX-NEXT: kmovd %edi, %k1 22254; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22255; VLX-NEXT: kmovd %k0, %eax 22256; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22257; VLX-NEXT: vzeroupper 22258; VLX-NEXT: retq 22259; 22260; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem: 22261; NoVLX: # %bb.0: # %entry 22262; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22263; NoVLX-NEXT: kmovw %edi, %k1 22264; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22265; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22266; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22267; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22268; NoVLX-NEXT: kmovw %k0, %eax 22269; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22270; NoVLX-NEXT: vzeroupper 22271; NoVLX-NEXT: retq 22272entry: 22273 %0 = bitcast <4 x i64> %__a to <4 x double> 22274 %load = load <4 x i64>, ptr %__b 22275 %1 = bitcast <4 x i64> %load to <4 x double> 22276 %2 = fcmp oeq <4 x double> %0, %1 22277 %3 = bitcast i4 %__u to <4 x i1> 22278 %4 = and <4 x i1> %2, %3 22279 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22280 %6 = bitcast <16 x i1> %5 to i16 22281 ret i16 %6 22282} 22283 22284define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22285; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22286; VLX: # %bb.0: # %entry 22287; VLX-NEXT: kmovd %edi, %k1 22288; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22289; VLX-NEXT: kmovd %k0, %eax 22290; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22291; VLX-NEXT: vzeroupper 22292; VLX-NEXT: retq 22293; 22294; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b: 22295; NoVLX: # %bb.0: # %entry 22296; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22297; NoVLX-NEXT: kmovw %edi, %k1 22298; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22299; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22300; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22301; NoVLX-NEXT: kmovw %k0, %eax 22302; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22303; NoVLX-NEXT: vzeroupper 22304; NoVLX-NEXT: retq 22305entry: 22306 %0 = bitcast <4 x i64> %__a to <4 x double> 22307 %load = load double, ptr %__b 22308 %vec = insertelement <4 x double> undef, double %load, i32 0 22309 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22310 %2 = fcmp oeq <4 x double> %0, %1 22311 %3 = bitcast i4 %__u to <4 x i1> 22312 %4 = and <4 x i1> %2, %3 22313 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22314 %6 = bitcast <16 x i1> %5 to i16 22315 ret i16 %6 22316} 22317 22318 22319 22320define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22321; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: 22322; VLX: # %bb.0: # %entry 22323; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22324; VLX-NEXT: kmovd %k0, %eax 22325; VLX-NEXT: vzeroupper 22326; VLX-NEXT: retq 22327; 22328; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: 22329; NoVLX: # %bb.0: # %entry 22330; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22331; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22332; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22333; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22334; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22335; NoVLX-NEXT: kmovw %k0, %eax 22336; NoVLX-NEXT: vzeroupper 22337; NoVLX-NEXT: retq 22338entry: 22339 %0 = bitcast <4 x i64> %__a to <4 x double> 22340 %1 = bitcast <4 x i64> %__b to <4 x double> 22341 %2 = fcmp oeq <4 x double> %0, %1 22342 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22343 %4 = bitcast <32 x i1> %3 to i32 22344 ret i32 %4 22345} 22346 22347define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22348; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: 22349; VLX: # %bb.0: # %entry 22350; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22351; VLX-NEXT: kmovd %k0, %eax 22352; VLX-NEXT: vzeroupper 22353; VLX-NEXT: retq 22354; 22355; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: 22356; NoVLX: # %bb.0: # %entry 22357; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22358; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22359; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22360; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22361; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22362; NoVLX-NEXT: kmovw %k0, %eax 22363; NoVLX-NEXT: vzeroupper 22364; NoVLX-NEXT: retq 22365entry: 22366 %0 = bitcast <4 x i64> %__a to <4 x double> 22367 %load = load <4 x i64>, ptr %__b 22368 %1 = bitcast <4 x i64> %load to <4 x double> 22369 %2 = fcmp oeq <4 x double> %0, %1 22370 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22371 %4 = bitcast <32 x i1> %3 to i32 22372 ret i32 %4 22373} 22374 22375define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22376; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22377; VLX: # %bb.0: # %entry 22378; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22379; VLX-NEXT: kmovd %k0, %eax 22380; VLX-NEXT: vzeroupper 22381; VLX-NEXT: retq 22382; 22383; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22384; NoVLX: # %bb.0: # %entry 22385; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22386; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22387; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22388; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22389; NoVLX-NEXT: kmovw %k0, %eax 22390; NoVLX-NEXT: vzeroupper 22391; NoVLX-NEXT: retq 22392entry: 22393 %0 = bitcast <4 x i64> %__a to <4 x double> 22394 %load = load double, ptr %__b 22395 %vec = insertelement <4 x double> undef, double %load, i32 0 22396 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22397 %2 = fcmp oeq <4 x double> %0, %1 22398 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22399 %4 = bitcast <32 x i1> %3 to i32 22400 ret i32 %4 22401} 22402 22403define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22404; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: 22405; VLX: # %bb.0: # %entry 22406; VLX-NEXT: kmovd %edi, %k1 22407; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22408; VLX-NEXT: kmovd %k0, %eax 22409; VLX-NEXT: vzeroupper 22410; VLX-NEXT: retq 22411; 22412; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask: 22413; NoVLX: # %bb.0: # %entry 22414; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22415; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22416; NoVLX-NEXT: kmovw %edi, %k1 22417; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22418; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22419; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22420; NoVLX-NEXT: kmovw %k0, %eax 22421; NoVLX-NEXT: vzeroupper 22422; NoVLX-NEXT: retq 22423entry: 22424 %0 = bitcast <4 x i64> %__a to <4 x double> 22425 %1 = bitcast <4 x i64> %__b to <4 x double> 22426 %2 = fcmp oeq <4 x double> %0, %1 22427 %3 = bitcast i4 %__u to <4 x i1> 22428 %4 = and <4 x i1> %2, %3 22429 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22430 %6 = bitcast <32 x i1> %5 to i32 22431 ret i32 %6 22432} 22433 22434define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22435; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: 22436; VLX: # %bb.0: # %entry 22437; VLX-NEXT: kmovd %edi, %k1 22438; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22439; VLX-NEXT: kmovd %k0, %eax 22440; VLX-NEXT: vzeroupper 22441; VLX-NEXT: retq 22442; 22443; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem: 22444; NoVLX: # %bb.0: # %entry 22445; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22446; NoVLX-NEXT: kmovw %edi, %k1 22447; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22448; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22449; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22450; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22451; NoVLX-NEXT: kmovw %k0, %eax 22452; NoVLX-NEXT: vzeroupper 22453; NoVLX-NEXT: retq 22454entry: 22455 %0 = bitcast <4 x i64> %__a to <4 x double> 22456 %load = load <4 x i64>, ptr %__b 22457 %1 = bitcast <4 x i64> %load to <4 x double> 22458 %2 = fcmp oeq <4 x double> %0, %1 22459 %3 = bitcast i4 %__u to <4 x i1> 22460 %4 = and <4 x i1> %2, %3 22461 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22462 %6 = bitcast <32 x i1> %5 to i32 22463 ret i32 %6 22464} 22465 22466define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22467; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22468; VLX: # %bb.0: # %entry 22469; VLX-NEXT: kmovd %edi, %k1 22470; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22471; VLX-NEXT: kmovd %k0, %eax 22472; VLX-NEXT: vzeroupper 22473; VLX-NEXT: retq 22474; 22475; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b: 22476; NoVLX: # %bb.0: # %entry 22477; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22478; NoVLX-NEXT: kmovw %edi, %k1 22479; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22480; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22481; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22482; NoVLX-NEXT: kmovw %k0, %eax 22483; NoVLX-NEXT: vzeroupper 22484; NoVLX-NEXT: retq 22485entry: 22486 %0 = bitcast <4 x i64> %__a to <4 x double> 22487 %load = load double, ptr %__b 22488 %vec = insertelement <4 x double> undef, double %load, i32 0 22489 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22490 %2 = fcmp oeq <4 x double> %0, %1 22491 %3 = bitcast i4 %__u to <4 x i1> 22492 %4 = and <4 x i1> %2, %3 22493 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22494 %6 = bitcast <32 x i1> %5 to i32 22495 ret i32 %6 22496} 22497 22498 22499 22500define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22501; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: 22502; VLX: # %bb.0: # %entry 22503; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 22504; VLX-NEXT: kmovq %k0, %rax 22505; VLX-NEXT: vzeroupper 22506; VLX-NEXT: retq 22507; 22508; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: 22509; NoVLX: # %bb.0: # %entry 22510; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22511; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22512; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22513; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22514; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22515; NoVLX-NEXT: kmovw %k0, %eax 22516; NoVLX-NEXT: vzeroupper 22517; NoVLX-NEXT: retq 22518entry: 22519 %0 = bitcast <4 x i64> %__a to <4 x double> 22520 %1 = bitcast <4 x i64> %__b to <4 x double> 22521 %2 = fcmp oeq <4 x double> %0, %1 22522 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22523 %4 = bitcast <64 x i1> %3 to i64 22524 ret i64 %4 22525} 22526 22527define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22528; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: 22529; VLX: # %bb.0: # %entry 22530; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 22531; VLX-NEXT: kmovq %k0, %rax 22532; VLX-NEXT: vzeroupper 22533; VLX-NEXT: retq 22534; 22535; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: 22536; NoVLX: # %bb.0: # %entry 22537; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22538; NoVLX-NEXT: vmovapd (%rdi), %ymm1 22539; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22540; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22541; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22542; NoVLX-NEXT: kmovw %k0, %eax 22543; NoVLX-NEXT: vzeroupper 22544; NoVLX-NEXT: retq 22545entry: 22546 %0 = bitcast <4 x i64> %__a to <4 x double> 22547 %load = load <4 x i64>, ptr %__b 22548 %1 = bitcast <4 x i64> %load to <4 x double> 22549 %2 = fcmp oeq <4 x double> %0, %1 22550 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22551 %4 = bitcast <64 x i1> %3 to i64 22552 ret i64 %4 22553} 22554 22555define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, ptr %__b) local_unnamed_addr { 22556; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 22557; VLX: # %bb.0: # %entry 22558; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 22559; VLX-NEXT: kmovq %k0, %rax 22560; VLX-NEXT: vzeroupper 22561; VLX-NEXT: retq 22562; 22563; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 22564; NoVLX: # %bb.0: # %entry 22565; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22566; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22567; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22568; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22569; NoVLX-NEXT: kmovw %k0, %eax 22570; NoVLX-NEXT: vzeroupper 22571; NoVLX-NEXT: retq 22572entry: 22573 %0 = bitcast <4 x i64> %__a to <4 x double> 22574 %load = load double, ptr %__b 22575 %vec = insertelement <4 x double> undef, double %load, i32 0 22576 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22577 %2 = fcmp oeq <4 x double> %0, %1 22578 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22579 %4 = bitcast <64 x i1> %3 to i64 22580 ret i64 %4 22581} 22582 22583define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { 22584; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: 22585; VLX: # %bb.0: # %entry 22586; VLX-NEXT: kmovd %edi, %k1 22587; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1} 22588; VLX-NEXT: kmovq %k0, %rax 22589; VLX-NEXT: vzeroupper 22590; VLX-NEXT: retq 22591; 22592; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask: 22593; NoVLX: # %bb.0: # %entry 22594; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 22595; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22596; NoVLX-NEXT: kmovw %edi, %k1 22597; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22598; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22599; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22600; NoVLX-NEXT: kmovw %k0, %eax 22601; NoVLX-NEXT: vzeroupper 22602; NoVLX-NEXT: retq 22603entry: 22604 %0 = bitcast <4 x i64> %__a to <4 x double> 22605 %1 = bitcast <4 x i64> %__b to <4 x double> 22606 %2 = fcmp oeq <4 x double> %0, %1 22607 %3 = bitcast i4 %__u to <4 x i1> 22608 %4 = and <4 x i1> %2, %3 22609 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22610 %6 = bitcast <64 x i1> %5 to i64 22611 ret i64 %6 22612} 22613 22614define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22615; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: 22616; VLX: # %bb.0: # %entry 22617; VLX-NEXT: kmovd %edi, %k1 22618; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1} 22619; VLX-NEXT: kmovq %k0, %rax 22620; VLX-NEXT: vzeroupper 22621; VLX-NEXT: retq 22622; 22623; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem: 22624; NoVLX: # %bb.0: # %entry 22625; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22626; NoVLX-NEXT: kmovw %edi, %k1 22627; NoVLX-NEXT: vmovapd (%rsi), %ymm1 22628; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22629; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22630; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22631; NoVLX-NEXT: kmovw %k0, %eax 22632; NoVLX-NEXT: vzeroupper 22633; NoVLX-NEXT: retq 22634entry: 22635 %0 = bitcast <4 x i64> %__a to <4 x double> 22636 %load = load <4 x i64>, ptr %__b 22637 %1 = bitcast <4 x i64> %load to <4 x double> 22638 %2 = fcmp oeq <4 x double> %0, %1 22639 %3 = bitcast i4 %__u to <4 x i1> 22640 %4 = and <4 x i1> %2, %3 22641 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22642 %6 = bitcast <64 x i1> %5 to i64 22643 ret i64 %6 22644} 22645 22646define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, ptr %__b) local_unnamed_addr { 22647; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 22648; VLX: # %bb.0: # %entry 22649; VLX-NEXT: kmovd %edi, %k1 22650; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1} 22651; VLX-NEXT: kmovq %k0, %rax 22652; VLX-NEXT: vzeroupper 22653; VLX-NEXT: retq 22654; 22655; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b: 22656; NoVLX: # %bb.0: # %entry 22657; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 22658; NoVLX-NEXT: kmovw %edi, %k1 22659; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22660; NoVLX-NEXT: kshiftlw $12, %k0, %k0 22661; NoVLX-NEXT: kshiftrw $12, %k0, %k0 22662; NoVLX-NEXT: kmovw %k0, %eax 22663; NoVLX-NEXT: vzeroupper 22664; NoVLX-NEXT: retq 22665entry: 22666 %0 = bitcast <4 x i64> %__a to <4 x double> 22667 %load = load double, ptr %__b 22668 %vec = insertelement <4 x double> undef, double %load, i32 0 22669 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 22670 %2 = fcmp oeq <4 x double> %0, %1 22671 %3 = bitcast i4 %__u to <4 x i1> 22672 %4 = and <4 x i1> %2, %3 22673 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7> 22674 %6 = bitcast <64 x i1> %5 to i64 22675 ret i64 %6 22676} 22677 22678 22679 22680define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22681; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: 22682; VLX: # %bb.0: # %entry 22683; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22684; VLX-NEXT: kmovd %k0, %eax 22685; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22686; VLX-NEXT: vzeroupper 22687; VLX-NEXT: retq 22688; 22689; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: 22690; NoVLX: # %bb.0: # %entry 22691; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22692; NoVLX-NEXT: kmovw %k0, %eax 22693; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22694; NoVLX-NEXT: vzeroupper 22695; NoVLX-NEXT: retq 22696entry: 22697 %0 = bitcast <8 x i64> %__a to <8 x double> 22698 %1 = bitcast <8 x i64> %__b to <8 x double> 22699 %2 = fcmp oeq <8 x double> %0, %1 22700 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22701 %4 = bitcast <16 x i1> %3 to i16 22702 ret i16 %4 22703} 22704 22705define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 22706; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: 22707; VLX: # %bb.0: # %entry 22708; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 22709; VLX-NEXT: kmovd %k0, %eax 22710; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22711; VLX-NEXT: vzeroupper 22712; VLX-NEXT: retq 22713; 22714; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: 22715; NoVLX: # %bb.0: # %entry 22716; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 22717; NoVLX-NEXT: kmovw %k0, %eax 22718; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22719; NoVLX-NEXT: vzeroupper 22720; NoVLX-NEXT: retq 22721entry: 22722 %0 = bitcast <8 x i64> %__a to <8 x double> 22723 %load = load <8 x i64>, ptr %__b 22724 %1 = bitcast <8 x i64> %load to <8 x double> 22725 %2 = fcmp oeq <8 x double> %0, %1 22726 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22727 %4 = bitcast <16 x i1> %3 to i16 22728 ret i16 %4 22729} 22730 22731define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 22732; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 22733; VLX: # %bb.0: # %entry 22734; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22735; VLX-NEXT: kmovd %k0, %eax 22736; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22737; VLX-NEXT: vzeroupper 22738; VLX-NEXT: retq 22739; 22740; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 22741; NoVLX: # %bb.0: # %entry 22742; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22743; NoVLX-NEXT: kmovw %k0, %eax 22744; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22745; NoVLX-NEXT: vzeroupper 22746; NoVLX-NEXT: retq 22747entry: 22748 %0 = bitcast <8 x i64> %__a to <8 x double> 22749 %load = load double, ptr %__b 22750 %vec = insertelement <8 x double> undef, double %load, i32 0 22751 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 22752 %2 = fcmp oeq <8 x double> %0, %1 22753 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22754 %4 = bitcast <16 x i1> %3 to i16 22755 ret i16 %4 22756} 22757 22758define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22759; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: 22760; VLX: # %bb.0: # %entry 22761; VLX-NEXT: kmovd %edi, %k1 22762; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22763; VLX-NEXT: kmovd %k0, %eax 22764; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22765; VLX-NEXT: vzeroupper 22766; VLX-NEXT: retq 22767; 22768; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask: 22769; NoVLX: # %bb.0: # %entry 22770; NoVLX-NEXT: kmovw %edi, %k1 22771; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22772; NoVLX-NEXT: kmovw %k0, %eax 22773; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22774; NoVLX-NEXT: vzeroupper 22775; NoVLX-NEXT: retq 22776entry: 22777 %0 = bitcast <8 x i64> %__a to <8 x double> 22778 %1 = bitcast <8 x i64> %__b to <8 x double> 22779 %2 = fcmp oeq <8 x double> %0, %1 22780 %3 = bitcast i8 %__u to <8 x i1> 22781 %4 = and <8 x i1> %2, %3 22782 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22783 %6 = bitcast <16 x i1> %5 to i16 22784 ret i16 %6 22785} 22786 22787define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 22788; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: 22789; VLX: # %bb.0: # %entry 22790; VLX-NEXT: kmovd %edi, %k1 22791; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 22792; VLX-NEXT: kmovd %k0, %eax 22793; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22794; VLX-NEXT: vzeroupper 22795; VLX-NEXT: retq 22796; 22797; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem: 22798; NoVLX: # %bb.0: # %entry 22799; NoVLX-NEXT: kmovw %edi, %k1 22800; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 22801; NoVLX-NEXT: kmovw %k0, %eax 22802; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22803; NoVLX-NEXT: vzeroupper 22804; NoVLX-NEXT: retq 22805entry: 22806 %0 = bitcast <8 x i64> %__a to <8 x double> 22807 %load = load <8 x i64>, ptr %__b 22808 %1 = bitcast <8 x i64> %load to <8 x double> 22809 %2 = fcmp oeq <8 x double> %0, %1 22810 %3 = bitcast i8 %__u to <8 x i1> 22811 %4 = and <8 x i1> %2, %3 22812 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22813 %6 = bitcast <16 x i1> %5 to i16 22814 ret i16 %6 22815} 22816 22817define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 22818; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 22819; VLX: # %bb.0: # %entry 22820; VLX-NEXT: kmovd %edi, %k1 22821; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22822; VLX-NEXT: kmovd %k0, %eax 22823; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22824; VLX-NEXT: vzeroupper 22825; VLX-NEXT: retq 22826; 22827; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_mask_mem_b: 22828; NoVLX: # %bb.0: # %entry 22829; NoVLX-NEXT: kmovw %edi, %k1 22830; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 22831; NoVLX-NEXT: kmovw %k0, %eax 22832; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22833; NoVLX-NEXT: vzeroupper 22834; NoVLX-NEXT: retq 22835entry: 22836 %0 = bitcast <8 x i64> %__a to <8 x double> 22837 %load = load double, ptr %__b 22838 %vec = insertelement <8 x double> undef, double %load, i32 0 22839 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 22840 %2 = fcmp oeq <8 x double> %0, %1 22841 %3 = bitcast i8 %__u to <8 x i1> 22842 %4 = and <8 x i1> %2, %3 22843 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22844 %6 = bitcast <16 x i1> %5 to i16 22845 ret i16 %6 22846} 22847 22848 22849 22850define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22851; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: 22852; VLX: # %bb.0: # %entry 22853; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 22854; VLX-NEXT: kmovd %k0, %eax 22855; VLX-NEXT: movzbl %al, %eax 22856; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22857; VLX-NEXT: vzeroupper 22858; VLX-NEXT: retq 22859; 22860; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: 22861; NoVLX: # %bb.0: # %entry 22862; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 22863; NoVLX-NEXT: kmovw %k0, %eax 22864; NoVLX-NEXT: movzbl %al, %eax 22865; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22866; NoVLX-NEXT: vzeroupper 22867; NoVLX-NEXT: retq 22868entry: 22869 %0 = bitcast <8 x i64> %__a to <8 x double> 22870 %1 = bitcast <8 x i64> %__b to <8 x double> 22871 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 22872 %3 = bitcast <8 x i1> %2 to i8 22873 %4 = zext i8 %3 to i16 22874 ret i16 %4 22875} 22876 22877define zeroext i16 @test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22878; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: 22879; VLX: # %bb.0: # %entry 22880; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 22881; VLX-NEXT: kmovd %k0, %eax 22882; VLX-NEXT: andb %dil, %al 22883; VLX-NEXT: movzbl %al, %eax 22884; VLX-NEXT: # kill: def $ax killed $ax killed $eax 22885; VLX-NEXT: vzeroupper 22886; VLX-NEXT: retq 22887; 22888; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v16i1_sae_mask: 22889; NoVLX: # %bb.0: # %entry 22890; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 22891; NoVLX-NEXT: kmovw %k0, %eax 22892; NoVLX-NEXT: andb %dil, %al 22893; NoVLX-NEXT: movzbl %al, %eax 22894; NoVLX-NEXT: # kill: def $ax killed $ax killed $eax 22895; NoVLX-NEXT: vzeroupper 22896; NoVLX-NEXT: retq 22897entry: 22898 %0 = bitcast <8 x i64> %__a to <8 x double> 22899 %1 = bitcast <8 x i64> %__b to <8 x double> 22900 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 22901 %3 = bitcast i8 %__u to <8 x i1> 22902 %4 = and <8 x i1> %2, %3 22903 %5 = bitcast <8 x i1> %4 to i8 22904 %6 = zext i8 %5 to i16 22905 ret i16 %6 22906} 22907 22908 22909 22910define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22911; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: 22912; VLX: # %bb.0: # %entry 22913; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22914; VLX-NEXT: kmovd %k0, %eax 22915; VLX-NEXT: vzeroupper 22916; VLX-NEXT: retq 22917; 22918; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: 22919; NoVLX: # %bb.0: # %entry 22920; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 22921; NoVLX-NEXT: kmovw %k0, %eax 22922; NoVLX-NEXT: vzeroupper 22923; NoVLX-NEXT: retq 22924entry: 22925 %0 = bitcast <8 x i64> %__a to <8 x double> 22926 %1 = bitcast <8 x i64> %__b to <8 x double> 22927 %2 = fcmp oeq <8 x double> %0, %1 22928 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22929 %4 = bitcast <32 x i1> %3 to i32 22930 ret i32 %4 22931} 22932 22933define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 22934; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: 22935; VLX: # %bb.0: # %entry 22936; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 22937; VLX-NEXT: kmovd %k0, %eax 22938; VLX-NEXT: vzeroupper 22939; VLX-NEXT: retq 22940; 22941; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: 22942; NoVLX: # %bb.0: # %entry 22943; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 22944; NoVLX-NEXT: kmovw %k0, %eax 22945; NoVLX-NEXT: vzeroupper 22946; NoVLX-NEXT: retq 22947entry: 22948 %0 = bitcast <8 x i64> %__a to <8 x double> 22949 %load = load <8 x i64>, ptr %__b 22950 %1 = bitcast <8 x i64> %load to <8 x double> 22951 %2 = fcmp oeq <8 x double> %0, %1 22952 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22953 %4 = bitcast <32 x i1> %3 to i32 22954 ret i32 %4 22955} 22956 22957define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 22958; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 22959; VLX: # %bb.0: # %entry 22960; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22961; VLX-NEXT: kmovd %k0, %eax 22962; VLX-NEXT: vzeroupper 22963; VLX-NEXT: retq 22964; 22965; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 22966; NoVLX: # %bb.0: # %entry 22967; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 22968; NoVLX-NEXT: kmovw %k0, %eax 22969; NoVLX-NEXT: vzeroupper 22970; NoVLX-NEXT: retq 22971entry: 22972 %0 = bitcast <8 x i64> %__a to <8 x double> 22973 %load = load double, ptr %__b 22974 %vec = insertelement <8 x double> undef, double %load, i32 0 22975 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 22976 %2 = fcmp oeq <8 x double> %0, %1 22977 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 22978 %4 = bitcast <32 x i1> %3 to i32 22979 ret i32 %4 22980} 22981 22982define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 22983; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: 22984; VLX: # %bb.0: # %entry 22985; VLX-NEXT: kmovd %edi, %k1 22986; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22987; VLX-NEXT: kmovd %k0, %eax 22988; VLX-NEXT: vzeroupper 22989; VLX-NEXT: retq 22990; 22991; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask: 22992; NoVLX: # %bb.0: # %entry 22993; NoVLX-NEXT: kmovw %edi, %k1 22994; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 22995; NoVLX-NEXT: kmovw %k0, %eax 22996; NoVLX-NEXT: vzeroupper 22997; NoVLX-NEXT: retq 22998entry: 22999 %0 = bitcast <8 x i64> %__a to <8 x double> 23000 %1 = bitcast <8 x i64> %__b to <8 x double> 23001 %2 = fcmp oeq <8 x double> %0, %1 23002 %3 = bitcast i8 %__u to <8 x i1> 23003 %4 = and <8 x i1> %2, %3 23004 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23005 %6 = bitcast <32 x i1> %5 to i32 23006 ret i32 %6 23007} 23008 23009define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 23010; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: 23011; VLX: # %bb.0: # %entry 23012; VLX-NEXT: kmovd %edi, %k1 23013; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23014; VLX-NEXT: kmovd %k0, %eax 23015; VLX-NEXT: vzeroupper 23016; VLX-NEXT: retq 23017; 23018; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem: 23019; NoVLX: # %bb.0: # %entry 23020; NoVLX-NEXT: kmovw %edi, %k1 23021; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23022; NoVLX-NEXT: kmovw %k0, %eax 23023; NoVLX-NEXT: vzeroupper 23024; NoVLX-NEXT: retq 23025entry: 23026 %0 = bitcast <8 x i64> %__a to <8 x double> 23027 %load = load <8 x i64>, ptr %__b 23028 %1 = bitcast <8 x i64> %load to <8 x double> 23029 %2 = fcmp oeq <8 x double> %0, %1 23030 %3 = bitcast i8 %__u to <8 x i1> 23031 %4 = and <8 x i1> %2, %3 23032 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23033 %6 = bitcast <32 x i1> %5 to i32 23034 ret i32 %6 23035} 23036 23037define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 23038; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23039; VLX: # %bb.0: # %entry 23040; VLX-NEXT: kmovd %edi, %k1 23041; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23042; VLX-NEXT: kmovd %k0, %eax 23043; VLX-NEXT: vzeroupper 23044; VLX-NEXT: retq 23045; 23046; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b: 23047; NoVLX: # %bb.0: # %entry 23048; NoVLX-NEXT: kmovw %edi, %k1 23049; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23050; NoVLX-NEXT: kmovw %k0, %eax 23051; NoVLX-NEXT: vzeroupper 23052; NoVLX-NEXT: retq 23053entry: 23054 %0 = bitcast <8 x i64> %__a to <8 x double> 23055 %load = load double, ptr %__b 23056 %vec = insertelement <8 x double> undef, double %load, i32 0 23057 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23058 %2 = fcmp oeq <8 x double> %0, %1 23059 %3 = bitcast i8 %__u to <8 x i1> 23060 %4 = and <8 x i1> %2, %3 23061 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23062 %6 = bitcast <32 x i1> %5 to i32 23063 ret i32 %6 23064} 23065 23066 23067 23068define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23069; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: 23070; VLX: # %bb.0: # %entry 23071; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23072; VLX-NEXT: kmovb %k0, %eax 23073; VLX-NEXT: vzeroupper 23074; VLX-NEXT: retq 23075; 23076; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: 23077; NoVLX: # %bb.0: # %entry 23078; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23079; NoVLX-NEXT: kmovw %k0, %eax 23080; NoVLX-NEXT: movzbl %al, %eax 23081; NoVLX-NEXT: vzeroupper 23082; NoVLX-NEXT: retq 23083entry: 23084 %0 = bitcast <8 x i64> %__a to <8 x double> 23085 %1 = bitcast <8 x i64> %__b to <8 x double> 23086 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 23087 %3 = bitcast <8 x i1> %2 to i8 23088 %4 = zext i8 %3 to i32 23089 ret i32 %4 23090} 23091 23092define zeroext i32 @test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23093; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: 23094; VLX: # %bb.0: # %entry 23095; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23096; VLX-NEXT: kmovd %k0, %eax 23097; VLX-NEXT: andb %dil, %al 23098; VLX-NEXT: movzbl %al, %eax 23099; VLX-NEXT: vzeroupper 23100; VLX-NEXT: retq 23101; 23102; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_sae_mask: 23103; NoVLX: # %bb.0: # %entry 23104; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23105; NoVLX-NEXT: kmovw %k0, %eax 23106; NoVLX-NEXT: andb %dil, %al 23107; NoVLX-NEXT: movzbl %al, %eax 23108; NoVLX-NEXT: vzeroupper 23109; NoVLX-NEXT: retq 23110entry: 23111 %0 = bitcast <8 x i64> %__a to <8 x double> 23112 %1 = bitcast <8 x i64> %__b to <8 x double> 23113 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 23114 %3 = bitcast i8 %__u to <8 x i1> 23115 %4 = and <8 x i1> %2, %3 23116 %5 = bitcast <8 x i1> %4 to i8 23117 %6 = zext i8 %5 to i32 23118 ret i32 %6 23119} 23120 23121 23122 23123define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23124; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: 23125; VLX: # %bb.0: # %entry 23126; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23127; VLX-NEXT: kmovq %k0, %rax 23128; VLX-NEXT: vzeroupper 23129; VLX-NEXT: retq 23130; 23131; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: 23132; NoVLX: # %bb.0: # %entry 23133; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 23134; NoVLX-NEXT: kmovw %k0, %eax 23135; NoVLX-NEXT: vzeroupper 23136; NoVLX-NEXT: retq 23137entry: 23138 %0 = bitcast <8 x i64> %__a to <8 x double> 23139 %1 = bitcast <8 x i64> %__b to <8 x double> 23140 %2 = fcmp oeq <8 x double> %0, %1 23141 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23142 %4 = bitcast <64 x i1> %3 to i64 23143 ret i64 %4 23144} 23145 23146define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 23147; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: 23148; VLX: # %bb.0: # %entry 23149; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23150; VLX-NEXT: kmovq %k0, %rax 23151; VLX-NEXT: vzeroupper 23152; VLX-NEXT: retq 23153; 23154; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: 23155; NoVLX: # %bb.0: # %entry 23156; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 23157; NoVLX-NEXT: kmovw %k0, %eax 23158; NoVLX-NEXT: vzeroupper 23159; NoVLX-NEXT: retq 23160entry: 23161 %0 = bitcast <8 x i64> %__a to <8 x double> 23162 %load = load <8 x i64>, ptr %__b 23163 %1 = bitcast <8 x i64> %load to <8 x double> 23164 %2 = fcmp oeq <8 x double> %0, %1 23165 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23166 %4 = bitcast <64 x i1> %3 to i64 23167 ret i64 %4 23168} 23169 23170define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, ptr %__b) local_unnamed_addr { 23171; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23172; VLX: # %bb.0: # %entry 23173; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23174; VLX-NEXT: kmovq %k0, %rax 23175; VLX-NEXT: vzeroupper 23176; VLX-NEXT: retq 23177; 23178; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23179; NoVLX: # %bb.0: # %entry 23180; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 23181; NoVLX-NEXT: kmovw %k0, %eax 23182; NoVLX-NEXT: vzeroupper 23183; NoVLX-NEXT: retq 23184entry: 23185 %0 = bitcast <8 x i64> %__a to <8 x double> 23186 %load = load double, ptr %__b 23187 %vec = insertelement <8 x double> undef, double %load, i32 0 23188 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23189 %2 = fcmp oeq <8 x double> %0, %1 23190 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23191 %4 = bitcast <64 x i1> %3 to i64 23192 ret i64 %4 23193} 23194 23195define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23196; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: 23197; VLX: # %bb.0: # %entry 23198; VLX-NEXT: kmovd %edi, %k1 23199; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23200; VLX-NEXT: kmovq %k0, %rax 23201; VLX-NEXT: vzeroupper 23202; VLX-NEXT: retq 23203; 23204; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask: 23205; NoVLX: # %bb.0: # %entry 23206; NoVLX-NEXT: kmovw %edi, %k1 23207; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} 23208; NoVLX-NEXT: kmovw %k0, %eax 23209; NoVLX-NEXT: vzeroupper 23210; NoVLX-NEXT: retq 23211entry: 23212 %0 = bitcast <8 x i64> %__a to <8 x double> 23213 %1 = bitcast <8 x i64> %__b to <8 x double> 23214 %2 = fcmp oeq <8 x double> %0, %1 23215 %3 = bitcast i8 %__u to <8 x i1> 23216 %4 = and <8 x i1> %2, %3 23217 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23218 %6 = bitcast <64 x i1> %5 to i64 23219 ret i64 %6 23220} 23221 23222define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 23223; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: 23224; VLX: # %bb.0: # %entry 23225; VLX-NEXT: kmovd %edi, %k1 23226; VLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23227; VLX-NEXT: kmovq %k0, %rax 23228; VLX-NEXT: vzeroupper 23229; VLX-NEXT: retq 23230; 23231; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem: 23232; NoVLX: # %bb.0: # %entry 23233; NoVLX-NEXT: kmovw %edi, %k1 23234; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1} 23235; NoVLX-NEXT: kmovw %k0, %eax 23236; NoVLX-NEXT: vzeroupper 23237; NoVLX-NEXT: retq 23238entry: 23239 %0 = bitcast <8 x i64> %__a to <8 x double> 23240 %load = load <8 x i64>, ptr %__b 23241 %1 = bitcast <8 x i64> %load to <8 x double> 23242 %2 = fcmp oeq <8 x double> %0, %1 23243 %3 = bitcast i8 %__u to <8 x i1> 23244 %4 = and <8 x i1> %2, %3 23245 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23246 %6 = bitcast <64 x i1> %5 to i64 23247 ret i64 %6 23248} 23249 23250define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, ptr %__b) local_unnamed_addr { 23251; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23252; VLX: # %bb.0: # %entry 23253; VLX-NEXT: kmovd %edi, %k1 23254; VLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23255; VLX-NEXT: kmovq %k0, %rax 23256; VLX-NEXT: vzeroupper 23257; VLX-NEXT: retq 23258; 23259; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b: 23260; NoVLX: # %bb.0: # %entry 23261; NoVLX-NEXT: kmovw %edi, %k1 23262; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} 23263; NoVLX-NEXT: kmovw %k0, %eax 23264; NoVLX-NEXT: vzeroupper 23265; NoVLX-NEXT: retq 23266entry: 23267 %0 = bitcast <8 x i64> %__a to <8 x double> 23268 %load = load double, ptr %__b 23269 %vec = insertelement <8 x double> undef, double %load, i32 0 23270 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 23271 %2 = fcmp oeq <8 x double> %0, %1 23272 %3 = bitcast i8 %__u to <8 x i1> 23273 %4 = and <8 x i1> %2, %3 23274 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 23275 %6 = bitcast <64 x i1> %5 to i64 23276 ret i64 %6 23277} 23278 23279 23280 23281define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23282; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: 23283; VLX: # %bb.0: # %entry 23284; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23285; VLX-NEXT: kmovb %k0, %eax 23286; VLX-NEXT: vzeroupper 23287; VLX-NEXT: retq 23288; 23289; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: 23290; NoVLX: # %bb.0: # %entry 23291; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23292; NoVLX-NEXT: kmovw %k0, %eax 23293; NoVLX-NEXT: movzbl %al, %eax 23294; NoVLX-NEXT: vzeroupper 23295; NoVLX-NEXT: retq 23296entry: 23297 %0 = bitcast <8 x i64> %__a to <8 x double> 23298 %1 = bitcast <8 x i64> %__b to <8 x double> 23299 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 23300 %3 = bitcast <8 x i1> %2 to i8 23301 %4 = zext i8 %3 to i64 23302 ret i64 %4 23303} 23304 23305define zeroext i64 @test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { 23306; VLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: 23307; VLX: # %bb.0: # %entry 23308; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23309; VLX-NEXT: kmovd %k0, %eax 23310; VLX-NEXT: andb %dil, %al 23311; VLX-NEXT: movzbl %al, %eax 23312; VLX-NEXT: vzeroupper 23313; VLX-NEXT: retq 23314; 23315; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_sae_mask: 23316; NoVLX: # %bb.0: # %entry 23317; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 23318; NoVLX-NEXT: kmovw %k0, %eax 23319; NoVLX-NEXT: andb %dil, %al 23320; NoVLX-NEXT: movzbl %al, %eax 23321; NoVLX-NEXT: vzeroupper 23322; NoVLX-NEXT: retq 23323entry: 23324 %0 = bitcast <8 x i64> %__a to <8 x double> 23325 %1 = bitcast <8 x i64> %__b to <8 x double> 23326 %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 23327 %3 = bitcast i8 %__u to <8 x i1> 23328 %4 = and <8 x i1> %2, %3 23329 %5 = bitcast <8 x i1> %4 to i8 23330 %6 = zext i8 %5 to i64 23331 ret i64 %6 23332} 23333 23334; Test that we understand that cmpps with rounding zeros the upper bits of the mask register. 23335define i32 @test_cmpm_rnd_zero(<16 x float> %a, <16 x float> %b) { 23336; VLX-LABEL: test_cmpm_rnd_zero: 23337; VLX: # %bb.0: 23338; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 23339; VLX-NEXT: kmovd %k0, %eax 23340; VLX-NEXT: vzeroupper 23341; VLX-NEXT: retq 23342; 23343; NoVLX-LABEL: test_cmpm_rnd_zero: 23344; NoVLX: # %bb.0: 23345; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 23346; NoVLX-NEXT: kmovw %k0, %eax 23347; NoVLX-NEXT: vzeroupper 23348; NoVLX-NEXT: retq 23349 %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8) 23350 %1 = bitcast <16 x i1> %res to i16 23351 %cast = bitcast i16 %1 to <16 x i1> 23352 %shuffle = shufflevector <16 x i1> %cast, <16 x i1> zeroinitializer, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 23353 %cast2 = bitcast <32 x i1> %shuffle to i32 23354 ret i32 %cast2 23355} 23356 23357define i8 @mask_zero_lower(<4 x i32> %a) { 23358; VLX-LABEL: mask_zero_lower: 23359; VLX: # %bb.0: 23360; VLX-NEXT: vptestmd %xmm0, %xmm0, %k0 23361; VLX-NEXT: kshiftlb $4, %k0, %k0 23362; VLX-NEXT: kmovd %k0, %eax 23363; VLX-NEXT: # kill: def $al killed $al killed $eax 23364; VLX-NEXT: retq 23365; 23366; NoVLX-LABEL: mask_zero_lower: 23367; NoVLX: # %bb.0: 23368; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 23369; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 23370; NoVLX-NEXT: kshiftlw $4, %k0, %k0 23371; NoVLX-NEXT: kmovw %k0, %eax 23372; NoVLX-NEXT: # kill: def $al killed $al killed $eax 23373; NoVLX-NEXT: vzeroupper 23374; NoVLX-NEXT: retq 23375 %cmp = icmp ne <4 x i32> %a, zeroinitializer 23376 %concat = shufflevector <4 x i1> %cmp, <4 x i1> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 23377 %cast = bitcast <8 x i1> %concat to i8 23378 ret i8 %cast 23379} 23380