1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s 4 5; 256-bit 6 7define <4 x i64> @vpaddq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 8; CHECK-LABEL: vpaddq256_test: 9; CHECK: ## %bb.0: 10; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1] 11; CHECK-NEXT: retq ## encoding: [0xc3] 12 %x = add <4 x i64> %i, %j 13 ret <4 x i64> %x 14} 15 16define <4 x i64> @vpaddq256_fold_test(<4 x i64> %i, ptr %j) nounwind { 17; CHECK-LABEL: vpaddq256_fold_test: 18; CHECK: ## %bb.0: 19; CHECK-NEXT: vpaddq (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0x07] 20; CHECK-NEXT: retq ## encoding: [0xc3] 21 %tmp = load <4 x i64>, ptr %j, align 4 22 %x = add <4 x i64> %i, %tmp 23 ret <4 x i64> %x 24} 25 26define <4 x i64> @vpaddq256_broadcast_test(<4 x i64> %i) nounwind { 27; CHECK-LABEL: vpaddq256_broadcast_test: 28; CHECK: ## %bb.0: 29; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x05,A,A,A,A] 30; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 31; CHECK-NEXT: retq ## encoding: [0xc3] 32 %x = add <4 x i64> %i, <i64 2, i64 2, i64 2, i64 2> 33 ret <4 x i64> %x 34} 35 36define <4 x i64> @vpaddq256_broadcast2_test(<4 x i64> %i, ptr %j.ptr) nounwind { 37; CHECK-LABEL: vpaddq256_broadcast2_test: 38; CHECK: ## %bb.0: 39; CHECK-NEXT: vpaddq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xd4,0x07] 40; CHECK-NEXT: retq ## encoding: [0xc3] 41 %j = load i64, ptr %j.ptr 42 %j.0 = insertelement <4 x i64> undef, i64 %j, i32 0 43 %j.v = shufflevector <4 x i64> %j.0, <4 x i64> undef, <4 x i32> zeroinitializer 44 %x = add <4 x i64> %i, %j.v 45 ret <4 x i64> %x 46} 47 48define <8 x i32> @vpaddd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 49; CHECK-LABEL: vpaddd256_test: 50; CHECK: ## %bb.0: 51; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] 52; CHECK-NEXT: retq ## encoding: [0xc3] 53 %x = add <8 x i32> %i, %j 54 ret <8 x i32> %x 55} 56 57define <8 x i32> @vpaddd256_fold_test(<8 x i32> %i, ptr %j) nounwind { 58; CHECK-LABEL: vpaddd256_fold_test: 59; CHECK: ## %bb.0: 60; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07] 61; CHECK-NEXT: retq ## encoding: [0xc3] 62 %tmp = load <8 x i32>, ptr %j, align 4 63 %x = add <8 x i32> %i, %tmp 64 ret <8 x i32> %x 65} 66 67define <8 x i32> @vpaddd256_broadcast_test(<8 x i32> %i) nounwind { 68; CHECK-LABEL: vpaddd256_broadcast_test: 69; CHECK: ## %bb.0: 70; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] 71; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 72; CHECK-NEXT: retq ## encoding: [0xc3] 73 %x = add <8 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 74 ret <8 x i32> %x 75} 76 77define <8 x i32> @vpaddd256_mask_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 78; CHECK-LABEL: vpaddd256_mask_test: 79; CHECK: ## %bb.0: 80; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca] 81; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xc1] 82; CHECK-NEXT: retq ## encoding: [0xc3] 83 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 84 %x = add <8 x i32> %i, %j 85 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 86 ret <8 x i32> %r 87} 88 89define <8 x i32> @vpaddd256_maskz_test(<8 x i32> %i, <8 x i32> %j, <8 x i32> %mask1) nounwind readnone { 90; CHECK-LABEL: vpaddd256_maskz_test: 91; CHECK: ## %bb.0: 92; CHECK-NEXT: vptestmd %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x28,0x27,0xca] 93; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1] 94; CHECK-NEXT: retq ## encoding: [0xc3] 95 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 96 %x = add <8 x i32> %i, %j 97 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 98 ret <8 x i32> %r 99} 100 101define <8 x i32> @vpaddd256_mask_fold_test(<8 x i32> %i, ptr %j.ptr, <8 x i32> %mask1) nounwind readnone { 102; CHECK-LABEL: vpaddd256_mask_fold_test: 103; CHECK: ## %bb.0: 104; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 105; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x07] 106; CHECK-NEXT: retq ## encoding: [0xc3] 107 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 108 %j = load <8 x i32>, ptr %j.ptr 109 %x = add <8 x i32> %i, %j 110 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 111 ret <8 x i32> %r 112} 113 114define <8 x i32> @vpaddd256_mask_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 115; CHECK-LABEL: vpaddd256_mask_broadcast_test: 116; CHECK: ## %bb.0: 117; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 118; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x05,A,A,A,A] 119; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 120; CHECK-NEXT: retq ## encoding: [0xc3] 121 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 122 %x = add <8 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 123 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %i 124 ret <8 x i32> %r 125} 126 127define <8 x i32> @vpaddd256_maskz_fold_test(<8 x i32> %i, ptr %j.ptr, <8 x i32> %mask1) nounwind readnone { 128; CHECK-LABEL: vpaddd256_maskz_fold_test: 129; CHECK: ## %bb.0: 130; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 131; CHECK-NEXT: vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07] 132; CHECK-NEXT: retq ## encoding: [0xc3] 133 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 134 %j = load <8 x i32>, ptr %j.ptr 135 %x = add <8 x i32> %i, %j 136 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 137 ret <8 x i32> %r 138} 139 140define <8 x i32> @vpaddd256_maskz_broadcast_test(<8 x i32> %i, <8 x i32> %mask1) nounwind readnone { 141; CHECK-LABEL: vpaddd256_maskz_broadcast_test: 142; CHECK: ## %bb.0: 143; CHECK-NEXT: vptestmd %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0x75,0x28,0x27,0xc9] 144; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x05,A,A,A,A] 145; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 146; CHECK-NEXT: retq ## encoding: [0xc3] 147 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 148 %x = add <8 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 149 %r = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer 150 ret <8 x i32> %r 151} 152 153define <4 x i64> @vpsubq256_test(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 154; CHECK-LABEL: vpsubq256_test: 155; CHECK: ## %bb.0: 156; CHECK-NEXT: vpsubq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfb,0xc1] 157; CHECK-NEXT: retq ## encoding: [0xc3] 158 %x = sub <4 x i64> %i, %j 159 ret <4 x i64> %x 160} 161 162define <8 x i32> @vpsubd256_test(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 163; CHECK-LABEL: vpsubd256_test: 164; CHECK: ## %bb.0: 165; CHECK-NEXT: vpsubd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] 166; CHECK-NEXT: retq ## encoding: [0xc3] 167 %x = sub <8 x i32> %i, %j 168 ret <8 x i32> %x 169} 170 171define <8 x i32> @vpmulld256_test(<8 x i32> %i, <8 x i32> %j) { 172; CHECK-LABEL: vpmulld256_test: 173; CHECK: ## %bb.0: 174; CHECK-NEXT: vpmulld %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x40,0xc1] 175; CHECK-NEXT: retq ## encoding: [0xc3] 176 %x = mul <8 x i32> %i, %j 177 ret <8 x i32> %x 178} 179 180define <4 x double> @test_vaddpd_256(<4 x double> %y, <4 x double> %x) { 181; CHECK-LABEL: test_vaddpd_256: 182; CHECK: ## %bb.0: ## %entry 183; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0] 184; CHECK-NEXT: retq ## encoding: [0xc3] 185entry: 186 %add.i = fadd <4 x double> %x, %y 187 ret <4 x double> %add.i 188} 189 190define <4 x double> @test_fold_vaddpd_256(<4 x double> %y) { 191; CHECK-LABEL: test_fold_vaddpd_256: 192; CHECK: ## %bb.0: ## %entry 193; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0x05,A,A,A,A] 194; CHECK-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 195; CHECK-NEXT: retq ## encoding: [0xc3] 196entry: 197 %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 4.500000e+00, double 5.600000e+00> 198 ret <4 x double> %add.i 199} 200 201define <8 x float> @test_broadcast_vaddpd_256(<8 x float> %a) nounwind { 202; CHECK-LABEL: test_broadcast_vaddpd_256: 203; CHECK: ## %bb.0: 204; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0x05,A,A,A,A] 205; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 206; CHECK-NEXT: retq ## encoding: [0xc3] 207 %b = fadd <8 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 208 ret <8 x float> %b 209} 210 211define <8 x float> @test_mask_vaddps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 212; CHECK-LABEL: test_mask_vaddps_256: 213; CHECK: ## %bb.0: 214; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 215; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x58,0xc2] 216; CHECK-NEXT: retq ## encoding: [0xc3] 217 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 218 %x = fadd <8 x float> %i, %j 219 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 220 ret <8 x float> %r 221} 222 223define <8 x float> @test_mask_vmulps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 224; CHECK-LABEL: test_mask_vmulps_256: 225; CHECK: ## %bb.0: 226; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 227; CHECK-NEXT: vmulps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x59,0xc2] 228; CHECK-NEXT: retq ## encoding: [0xc3] 229 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 230 %x = fmul <8 x float> %i, %j 231 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 232 ret <8 x float> %r 233} 234 235define <8 x float> @test_mask_vminps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1)nounwind readnone { 236; CHECK-LABEL: test_mask_vminps_256: 237; CHECK: ## %bb.0: 238; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 239; CHECK-NEXT: vminps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5d,0xc2] 240; CHECK-NEXT: retq ## encoding: [0xc3] 241 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 242 %cmp_res = fcmp olt <8 x float> %i, %j 243 %min = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 244 %r = select <8 x i1> %mask, <8 x float> %min, <8 x float> %dst 245 ret <8 x float> %r 246} 247 248define <8 x float> @test_mask_vmaxps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 249; CHECK-LABEL: test_mask_vmaxps_256: 250; CHECK: ## %bb.0: 251; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 252; CHECK-NEXT: vmaxps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5f,0xc2] 253; CHECK-NEXT: retq ## encoding: [0xc3] 254 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 255 %cmp_res = fcmp ogt <8 x float> %i, %j 256 %max = select <8 x i1> %cmp_res, <8 x float> %i, <8 x float> %j 257 %r = select <8 x i1> %mask, <8 x float> %max, <8 x float> %dst 258 ret <8 x float> %r 259} 260 261define <8 x float> @test_mask_vsubps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 262; CHECK-LABEL: test_mask_vsubps_256: 263; CHECK: ## %bb.0: 264; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 265; CHECK-NEXT: vsubps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5c,0xc2] 266; CHECK-NEXT: retq ## encoding: [0xc3] 267 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 268 %x = fsub <8 x float> %i, %j 269 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 270 ret <8 x float> %r 271} 272 273define <8 x float> @test_mask_vdivps_256(<8 x float> %dst, <8 x float> %i, <8 x float> %j, <8 x i32> %mask1) nounwind readnone { 274; CHECK-LABEL: test_mask_vdivps_256: 275; CHECK: ## %bb.0: 276; CHECK-NEXT: vptestmd %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0x65,0x28,0x27,0xcb] 277; CHECK-NEXT: vdivps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x29,0x5e,0xc2] 278; CHECK-NEXT: retq ## encoding: [0xc3] 279 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 280 %x = fdiv <8 x float> %i, %j 281 %r = select <8 x i1> %mask, <8 x float> %x, <8 x float> %dst 282 ret <8 x float> %r 283} 284 285define <4 x double> @test_mask_vmulpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 286; CHECK-LABEL: test_mask_vmulpd_256: 287; CHECK: ## %bb.0: 288; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 289; CHECK-NEXT: vmulpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x59,0xc2] 290; CHECK-NEXT: retq ## encoding: [0xc3] 291 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 292 %x = fmul <4 x double> %i, %j 293 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 294 ret <4 x double> %r 295} 296 297define <4 x double> @test_mask_vminpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 298; CHECK-LABEL: test_mask_vminpd_256: 299; CHECK: ## %bb.0: 300; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 301; CHECK-NEXT: vminpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5d,0xc2] 302; CHECK-NEXT: retq ## encoding: [0xc3] 303 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 304 %cmp_res = fcmp olt <4 x double> %i, %j 305 %min = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 306 %r = select <4 x i1> %mask, <4 x double> %min, <4 x double> %dst 307 ret <4 x double> %r 308} 309 310define <4 x double> @test_mask_vmaxpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 311; CHECK-LABEL: test_mask_vmaxpd_256: 312; CHECK: ## %bb.0: 313; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 314; CHECK-NEXT: vmaxpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5f,0xc2] 315; CHECK-NEXT: retq ## encoding: [0xc3] 316 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 317 %cmp_res = fcmp ogt <4 x double> %i, %j 318 %max = select <4 x i1> %cmp_res, <4 x double> %i, <4 x double> %j 319 %r = select <4 x i1> %mask, <4 x double> %max, <4 x double> %dst 320 ret <4 x double> %r 321} 322 323define <4 x double> @test_mask_vsubpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 324; CHECK-LABEL: test_mask_vsubpd_256: 325; CHECK: ## %bb.0: 326; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 327; CHECK-NEXT: vsubpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5c,0xc2] 328; CHECK-NEXT: retq ## encoding: [0xc3] 329 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 330 %x = fsub <4 x double> %i, %j 331 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 332 ret <4 x double> %r 333} 334 335define <4 x double> @test_mask_vdivpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 336; CHECK-LABEL: test_mask_vdivpd_256: 337; CHECK: ## %bb.0: 338; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 339; CHECK-NEXT: vdivpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x5e,0xc2] 340; CHECK-NEXT: retq ## encoding: [0xc3] 341 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 342 %x = fdiv <4 x double> %i, %j 343 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 344 ret <4 x double> %r 345} 346 347define <4 x double> @test_mask_vaddpd_256(<4 x double> %dst, <4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 348; CHECK-LABEL: test_mask_vaddpd_256: 349; CHECK: ## %bb.0: 350; CHECK-NEXT: vptestmq %ymm3, %ymm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x28,0x27,0xcb] 351; CHECK-NEXT: vaddpd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0xc2] 352; CHECK-NEXT: retq ## encoding: [0xc3] 353 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 354 %x = fadd <4 x double> %i, %j 355 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 356 ret <4 x double> %r 357} 358 359define <4 x double> @test_maskz_vaddpd_256(<4 x double> %i, <4 x double> %j, <4 x i64> %mask1) nounwind readnone { 360; CHECK-LABEL: test_maskz_vaddpd_256: 361; CHECK: ## %bb.0: 362; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 363; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0xc1] 364; CHECK-NEXT: retq ## encoding: [0xc3] 365 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 366 %x = fadd <4 x double> %i, %j 367 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 368 ret <4 x double> %r 369} 370 371define <4 x double> @test_mask_fold_vaddpd_256(<4 x double> %dst, <4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind { 372; CHECK-LABEL: test_mask_fold_vaddpd_256: 373; CHECK: ## %bb.0: 374; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 375; CHECK-NEXT: vaddpd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x29,0x58,0x07] 376; CHECK-NEXT: retq ## encoding: [0xc3] 377 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 378 %tmp = load <4 x double>, ptr %j 379 %x = fadd <4 x double> %i, %tmp 380 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %dst 381 ret <4 x double> %r 382} 383 384define <4 x double> @test_maskz_fold_vaddpd_256(<4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind { 385; CHECK-LABEL: test_maskz_fold_vaddpd_256: 386; CHECK: ## %bb.0: 387; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 388; CHECK-NEXT: vaddpd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x58,0x07] 389; CHECK-NEXT: retq ## encoding: [0xc3] 390 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 391 %tmp = load <4 x double>, ptr %j 392 %x = fadd <4 x double> %i, %tmp 393 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 394 ret <4 x double> %r 395} 396 397define <4 x double> @test_broadcast2_vaddpd_256(<4 x double> %i, ptr %j) nounwind { 398; CHECK-LABEL: test_broadcast2_vaddpd_256: 399; CHECK: ## %bb.0: 400; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0x58,0x07] 401; CHECK-NEXT: retq ## encoding: [0xc3] 402 %tmp = load double, ptr %j 403 %b = insertelement <4 x double> undef, double %tmp, i32 0 404 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 405 %x = fadd <4 x double> %c, %i 406 ret <4 x double> %x 407} 408 409define <4 x double> @test_mask_broadcast_vaddpd_256(<4 x double> %dst, <4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind { 410; CHECK-LABEL: test_mask_broadcast_vaddpd_256: 411; CHECK: ## %bb.0: 412; CHECK-NEXT: vmovapd %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc1] 413; CHECK-NEXT: vptestmq %ymm2, %ymm2, %k1 ## encoding: [0x62,0xf2,0xed,0x28,0x27,0xca] 414; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x39,0x58,0x07] 415; CHECK-NEXT: retq ## encoding: [0xc3] 416 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 417 %tmp = load double, ptr %j 418 %b = insertelement <4 x double> undef, double %tmp, i32 0 419 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 420 %x = fadd <4 x double> %c, %i 421 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> %i 422 ret <4 x double> %r 423} 424 425define <4 x double> @test_maskz_broadcast_vaddpd_256(<4 x double> %i, ptr %j, <4 x i64> %mask1) nounwind { 426; CHECK-LABEL: test_maskz_broadcast_vaddpd_256: 427; CHECK: ## %bb.0: 428; CHECK-NEXT: vptestmq %ymm1, %ymm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x28,0x27,0xc9] 429; CHECK-NEXT: vaddpd (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x58,0x07] 430; CHECK-NEXT: retq ## encoding: [0xc3] 431 %mask = icmp ne <4 x i64> %mask1, zeroinitializer 432 %tmp = load double, ptr %j 433 %b = insertelement <4 x double> undef, double %tmp, i32 0 434 %c = shufflevector <4 x double> %b, <4 x double> undef, <4 x i32> zeroinitializer 435 %x = fadd <4 x double> %c, %i 436 %r = select <4 x i1> %mask, <4 x double> %x, <4 x double> zeroinitializer 437 ret <4 x double> %r 438} 439 440; 128-bit 441 442define <2 x i64> @vpaddq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 443; CHECK-LABEL: vpaddq128_test: 444; CHECK: ## %bb.0: 445; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] 446; CHECK-NEXT: retq ## encoding: [0xc3] 447 %x = add <2 x i64> %i, %j 448 ret <2 x i64> %x 449} 450 451define <2 x i64> @vpaddq128_fold_test(<2 x i64> %i, ptr %j) nounwind { 452; CHECK-LABEL: vpaddq128_fold_test: 453; CHECK: ## %bb.0: 454; CHECK-NEXT: vpaddq (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0x07] 455; CHECK-NEXT: retq ## encoding: [0xc3] 456 %tmp = load <2 x i64>, ptr %j, align 4 457 %x = add <2 x i64> %i, %tmp 458 ret <2 x i64> %x 459} 460 461define <2 x i64> @vpaddq128_broadcast2_test(<2 x i64> %i, ptr %j) nounwind { 462; CHECK-LABEL: vpaddq128_broadcast2_test: 463; CHECK: ## %bb.0: 464; CHECK-NEXT: vpaddq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xd4,0x07] 465; CHECK-NEXT: retq ## encoding: [0xc3] 466 %tmp = load i64, ptr %j 467 %j.0 = insertelement <2 x i64> undef, i64 %tmp, i32 0 468 %j.1 = insertelement <2 x i64> %j.0, i64 %tmp, i32 1 469 %x = add <2 x i64> %i, %j.1 470 ret <2 x i64> %x 471} 472 473define <4 x i32> @vpaddd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 474; CHECK-LABEL: vpaddd128_test: 475; CHECK: ## %bb.0: 476; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] 477; CHECK-NEXT: retq ## encoding: [0xc3] 478 %x = add <4 x i32> %i, %j 479 ret <4 x i32> %x 480} 481 482define <4 x i32> @vpaddd128_fold_test(<4 x i32> %i, ptr %j) nounwind { 483; CHECK-LABEL: vpaddd128_fold_test: 484; CHECK: ## %bb.0: 485; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07] 486; CHECK-NEXT: retq ## encoding: [0xc3] 487 %tmp = load <4 x i32>, ptr %j, align 4 488 %x = add <4 x i32> %i, %tmp 489 ret <4 x i32> %x 490} 491 492define <4 x i32> @vpaddd128_broadcast_test(<4 x i32> %i) nounwind { 493; CHECK-LABEL: vpaddd128_broadcast_test: 494; CHECK: ## %bb.0: 495; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] 496; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 497; CHECK-NEXT: retq ## encoding: [0xc3] 498 %x = add <4 x i32> %i, <i32 6, i32 6, i32 6, i32 6> 499 ret <4 x i32> %x 500} 501 502define <4 x i32> @vpaddd128_mask_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 503; CHECK-LABEL: vpaddd128_mask_test: 504; CHECK: ## %bb.0: 505; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca] 506; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xc1] 507; CHECK-NEXT: retq ## encoding: [0xc3] 508 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 509 %x = add <4 x i32> %i, %j 510 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 511 ret <4 x i32> %r 512} 513 514define <4 x i32> @vpaddd128_maskz_test(<4 x i32> %i, <4 x i32> %j, <4 x i32> %mask1) nounwind readnone { 515; CHECK-LABEL: vpaddd128_maskz_test: 516; CHECK: ## %bb.0: 517; CHECK-NEXT: vptestmd %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0x6d,0x08,0x27,0xca] 518; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1] 519; CHECK-NEXT: retq ## encoding: [0xc3] 520 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 521 %x = add <4 x i32> %i, %j 522 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 523 ret <4 x i32> %r 524} 525 526define <4 x i32> @vpaddd128_mask_fold_test(<4 x i32> %i, ptr %j.ptr, <4 x i32> %mask1) nounwind readnone { 527; CHECK-LABEL: vpaddd128_mask_fold_test: 528; CHECK: ## %bb.0: 529; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 530; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x07] 531; CHECK-NEXT: retq ## encoding: [0xc3] 532 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 533 %j = load <4 x i32>, ptr %j.ptr 534 %x = add <4 x i32> %i, %j 535 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 536 ret <4 x i32> %r 537} 538 539define <4 x i32> @vpaddd128_mask_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 540; CHECK-LABEL: vpaddd128_mask_broadcast_test: 541; CHECK: ## %bb.0: 542; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 543; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x05,A,A,A,A] 544; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 545; CHECK-NEXT: retq ## encoding: [0xc3] 546 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 547 %x = add <4 x i32> %i, <i32 7, i32 7, i32 7, i32 7> 548 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %i 549 ret <4 x i32> %r 550} 551 552define <4 x i32> @vpaddd128_maskz_fold_test(<4 x i32> %i, ptr %j.ptr, <4 x i32> %mask1) nounwind readnone { 553; CHECK-LABEL: vpaddd128_maskz_fold_test: 554; CHECK: ## %bb.0: 555; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 556; CHECK-NEXT: vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07] 557; CHECK-NEXT: retq ## encoding: [0xc3] 558 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 559 %j = load <4 x i32>, ptr %j.ptr 560 %x = add <4 x i32> %i, %j 561 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 562 ret <4 x i32> %r 563} 564 565define <4 x i32> @vpaddd128_maskz_broadcast_test(<4 x i32> %i, <4 x i32> %mask1) nounwind readnone { 566; CHECK-LABEL: vpaddd128_maskz_broadcast_test: 567; CHECK: ## %bb.0: 568; CHECK-NEXT: vptestmd %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0x75,0x08,0x27,0xc9] 569; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x05,A,A,A,A] 570; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 571; CHECK-NEXT: retq ## encoding: [0xc3] 572 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 573 %x = add <4 x i32> %i, <i32 8, i32 8, i32 8, i32 8> 574 %r = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer 575 ret <4 x i32> %r 576} 577 578define <2 x i64> @vpsubq128_test(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 579; CHECK-LABEL: vpsubq128_test: 580; CHECK: ## %bb.0: 581; CHECK-NEXT: vpsubq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] 582; CHECK-NEXT: retq ## encoding: [0xc3] 583 %x = sub <2 x i64> %i, %j 584 ret <2 x i64> %x 585} 586 587define <4 x i32> @vpsubd128_test(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 588; CHECK-LABEL: vpsubd128_test: 589; CHECK: ## %bb.0: 590; CHECK-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] 591; CHECK-NEXT: retq ## encoding: [0xc3] 592 %x = sub <4 x i32> %i, %j 593 ret <4 x i32> %x 594} 595 596define <4 x i32> @vpmulld128_test(<4 x i32> %i, <4 x i32> %j) { 597; CHECK-LABEL: vpmulld128_test: 598; CHECK: ## %bb.0: 599; CHECK-NEXT: vpmulld %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x40,0xc1] 600; CHECK-NEXT: retq ## encoding: [0xc3] 601 %x = mul <4 x i32> %i, %j 602 ret <4 x i32> %x 603} 604 605define <2 x double> @test_vaddpd_128(<2 x double> %y, <2 x double> %x) { 606; CHECK-LABEL: test_vaddpd_128: 607; CHECK: ## %bb.0: ## %entry 608; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0] 609; CHECK-NEXT: retq ## encoding: [0xc3] 610entry: 611 %add.i = fadd <2 x double> %x, %y 612 ret <2 x double> %add.i 613} 614 615define <2 x double> @test_fold_vaddpd_128(<2 x double> %y) { 616; CHECK-LABEL: test_fold_vaddpd_128: 617; CHECK: ## %bb.0: ## %entry 618; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0x05,A,A,A,A] 619; CHECK-NEXT: ## fixup A - offset: 4, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 620; CHECK-NEXT: retq ## encoding: [0xc3] 621entry: 622 %add.i = fadd <2 x double> %y, <double 4.500000e+00, double 3.400000e+00> 623 ret <2 x double> %add.i 624} 625 626define <4 x float> @test_broadcast_vaddpd_128(<4 x float> %a) nounwind { 627; CHECK-LABEL: test_broadcast_vaddpd_128: 628; CHECK: ## %bb.0: 629; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0x05,A,A,A,A] 630; CHECK-NEXT: ## fixup A - offset: 6, value: {{\.?LCPI[0-9]+_[0-9]+}}-4, kind: reloc_riprel_4byte 631; CHECK-NEXT: retq ## encoding: [0xc3] 632 %b = fadd <4 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 633 ret <4 x float> %b 634} 635 636define <4 x float> @test_mask_vaddps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 637; CHECK-LABEL: test_mask_vaddps_128: 638; CHECK: ## %bb.0: 639; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 640; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x58,0xc2] 641; CHECK-NEXT: retq ## encoding: [0xc3] 642 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 643 %x = fadd <4 x float> %i, %j 644 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 645 ret <4 x float> %r 646} 647 648define <4 x float> @test_mask_vmulps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 649; CHECK-LABEL: test_mask_vmulps_128: 650; CHECK: ## %bb.0: 651; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 652; CHECK-NEXT: vmulps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x59,0xc2] 653; CHECK-NEXT: retq ## encoding: [0xc3] 654 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 655 %x = fmul <4 x float> %i, %j 656 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 657 ret <4 x float> %r 658} 659 660define <4 x float> @test_mask_vminps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 661; CHECK-LABEL: test_mask_vminps_128: 662; CHECK: ## %bb.0: 663; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 664; CHECK-NEXT: vminps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5d,0xc2] 665; CHECK-NEXT: retq ## encoding: [0xc3] 666 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 667 %cmp_res = fcmp olt <4 x float> %i, %j 668 %min = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 669 %r = select <4 x i1> %mask, <4 x float> %min, <4 x float> %dst 670 ret <4 x float> %r 671} 672 673define <4 x float> @test_mask_vmaxps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 674; CHECK-LABEL: test_mask_vmaxps_128: 675; CHECK: ## %bb.0: 676; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 677; CHECK-NEXT: vmaxps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5f,0xc2] 678; CHECK-NEXT: retq ## encoding: [0xc3] 679 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 680 %cmp_res = fcmp ogt <4 x float> %i, %j 681 %max = select <4 x i1> %cmp_res, <4 x float> %i, <4 x float> %j 682 %r = select <4 x i1> %mask, <4 x float> %max, <4 x float> %dst 683 ret <4 x float> %r 684} 685 686define <4 x float> @test_mask_vsubps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 687; CHECK-LABEL: test_mask_vsubps_128: 688; CHECK: ## %bb.0: 689; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 690; CHECK-NEXT: vsubps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5c,0xc2] 691; CHECK-NEXT: retq ## encoding: [0xc3] 692 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 693 %x = fsub <4 x float> %i, %j 694 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 695 ret <4 x float> %r 696} 697 698 699define <4 x float> @test_mask_vdivps_128(<4 x float> %dst, <4 x float> %i, <4 x float> %j, <4 x i32> %mask1) nounwind readnone { 700; CHECK-LABEL: test_mask_vdivps_128: 701; CHECK: ## %bb.0: 702; CHECK-NEXT: vptestmd %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0x65,0x08,0x27,0xcb] 703; CHECK-NEXT: vdivps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x74,0x09,0x5e,0xc2] 704; CHECK-NEXT: retq ## encoding: [0xc3] 705 %mask = icmp ne <4 x i32> %mask1, zeroinitializer 706 %x = fdiv <4 x float> %i, %j 707 %r = select <4 x i1> %mask, <4 x float> %x, <4 x float> %dst 708 ret <4 x float> %r 709} 710 711define <2 x double> @test_mask_vmulpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 712; CHECK-LABEL: test_mask_vmulpd_128: 713; CHECK: ## %bb.0: 714; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 715; CHECK-NEXT: vmulpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x59,0xc2] 716; CHECK-NEXT: retq ## encoding: [0xc3] 717 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 718 %x = fmul <2 x double> %i, %j 719 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 720 ret <2 x double> %r 721} 722 723define <2 x double> @test_mask_vminpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 724; CHECK-LABEL: test_mask_vminpd_128: 725; CHECK: ## %bb.0: 726; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 727; CHECK-NEXT: vminpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5d,0xc2] 728; CHECK-NEXT: retq ## encoding: [0xc3] 729 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 730 %cmp_res = fcmp olt <2 x double> %i, %j 731 %min = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 732 %r = select <2 x i1> %mask, <2 x double> %min, <2 x double> %dst 733 ret <2 x double> %r 734} 735 736define <2 x double> @test_mask_vmaxpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 737; CHECK-LABEL: test_mask_vmaxpd_128: 738; CHECK: ## %bb.0: 739; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 740; CHECK-NEXT: vmaxpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5f,0xc2] 741; CHECK-NEXT: retq ## encoding: [0xc3] 742 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 743 %cmp_res = fcmp ogt <2 x double> %i, %j 744 %max = select <2 x i1> %cmp_res, <2 x double> %i, <2 x double> %j 745 %r = select <2 x i1> %mask, <2 x double> %max, <2 x double> %dst 746 ret <2 x double> %r 747} 748 749define <2 x double> @test_mask_vsubpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 750; CHECK-LABEL: test_mask_vsubpd_128: 751; CHECK: ## %bb.0: 752; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 753; CHECK-NEXT: vsubpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5c,0xc2] 754; CHECK-NEXT: retq ## encoding: [0xc3] 755 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 756 %x = fsub <2 x double> %i, %j 757 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 758 ret <2 x double> %r 759} 760 761define <2 x double> @test_mask_vdivpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 762; CHECK-LABEL: test_mask_vdivpd_128: 763; CHECK: ## %bb.0: 764; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 765; CHECK-NEXT: vdivpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x5e,0xc2] 766; CHECK-NEXT: retq ## encoding: [0xc3] 767 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 768 %x = fdiv <2 x double> %i, %j 769 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 770 ret <2 x double> %r 771} 772 773define <2 x double> @test_mask_vaddpd_128(<2 x double> %dst, <2 x double> %i, <2 x double> %j, <2 x i64> %mask1) nounwind readnone { 774; CHECK-LABEL: test_mask_vaddpd_128: 775; CHECK: ## %bb.0: 776; CHECK-NEXT: vptestmq %xmm3, %xmm3, %k1 ## encoding: [0x62,0xf2,0xe5,0x08,0x27,0xcb] 777; CHECK-NEXT: vaddpd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0xc2] 778; CHECK-NEXT: retq ## encoding: [0xc3] 779 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 780 %x = fadd <2 x double> %i, %j 781 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 782 ret <2 x double> %r 783} 784 785define <2 x double> @test_maskz_vaddpd_128(<2 x double> %i, <2 x double> %j, 786; CHECK-LABEL: test_maskz_vaddpd_128: 787; CHECK: ## %bb.0: 788; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 789; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0xc1] 790; CHECK-NEXT: retq ## encoding: [0xc3] 791 <2 x i64> %mask1) nounwind readnone { 792 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 793 %x = fadd <2 x double> %i, %j 794 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 795 ret <2 x double> %r 796} 797 798define <2 x double> @test_mask_fold_vaddpd_128(<2 x double> %dst, <2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind { 799; CHECK-LABEL: test_mask_fold_vaddpd_128: 800; CHECK: ## %bb.0: 801; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 802; CHECK-NEXT: vaddpd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x09,0x58,0x07] 803; CHECK-NEXT: retq ## encoding: [0xc3] 804 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 805 %tmp = load <2 x double>, ptr %j 806 %x = fadd <2 x double> %i, %tmp 807 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %dst 808 ret <2 x double> %r 809} 810 811define <2 x double> @test_maskz_fold_vaddpd_128(<2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind { 812; CHECK-LABEL: test_maskz_fold_vaddpd_128: 813; CHECK: ## %bb.0: 814; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 815; CHECK-NEXT: vaddpd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x58,0x07] 816; CHECK-NEXT: retq ## encoding: [0xc3] 817 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 818 %tmp = load <2 x double>, ptr %j 819 %x = fadd <2 x double> %i, %tmp 820 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 821 ret <2 x double> %r 822} 823 824define <2 x double> @test_broadcast2_vaddpd_128(<2 x double> %i, ptr %j) nounwind { 825; CHECK-LABEL: test_broadcast2_vaddpd_128: 826; CHECK: ## %bb.0: 827; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0x58,0x07] 828; CHECK-NEXT: retq ## encoding: [0xc3] 829 %tmp = load double, ptr %j 830 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 831 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 832 %x = fadd <2 x double> %j.1, %i 833 ret <2 x double> %x 834} 835 836define <2 x double> @test_mask_broadcast_vaddpd_128(<2 x double> %dst, <2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind { 837; CHECK-LABEL: test_mask_broadcast_vaddpd_128: 838; CHECK: ## %bb.0: 839; CHECK-NEXT: vmovapd %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1] 840; CHECK-NEXT: vptestmq %xmm2, %xmm2, %k1 ## encoding: [0x62,0xf2,0xed,0x08,0x27,0xca] 841; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf5,0x19,0x58,0x07] 842; CHECK-NEXT: retq ## encoding: [0xc3] 843 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 844 %tmp = load double, ptr %j 845 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 846 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 847 %x = fadd <2 x double> %j.1, %i 848 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> %i 849 ret <2 x double> %r 850} 851 852define <2 x double> @test_maskz_broadcast_vaddpd_128(<2 x double> %i, ptr %j, <2 x i64> %mask1) nounwind { 853; CHECK-LABEL: test_maskz_broadcast_vaddpd_128: 854; CHECK: ## %bb.0: 855; CHECK-NEXT: vptestmq %xmm1, %xmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x08,0x27,0xc9] 856; CHECK-NEXT: vaddpd (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x58,0x07] 857; CHECK-NEXT: retq ## encoding: [0xc3] 858 %mask = icmp ne <2 x i64> %mask1, zeroinitializer 859 %tmp = load double, ptr %j 860 %j.0 = insertelement <2 x double> undef, double %tmp, i64 0 861 %j.1 = insertelement <2 x double> %j.0, double %tmp, i64 1 862 %x = fadd <2 x double> %j.1, %i 863 %r = select <2 x i1> %mask, <2 x double> %x, <2 x double> zeroinitializer 864 ret <2 x double> %r 865} 866