1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX 7 8define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { 9; CHECK-LABEL: addpd512: 10; CHECK: # %bb.0: # %entry 11; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 12; CHECK-NEXT: retq 13entry: 14 %add.i = fadd <8 x double> %x, %y 15 ret <8 x double> %add.i 16} 17 18define <8 x double> @addpd512fold(<8 x double> %y) { 19; CHECK-LABEL: addpd512fold: 20; CHECK: # %bb.0: # %entry 21; CHECK-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 22; CHECK-NEXT: retq 23entry: 24 %add.i = fadd <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.800000e+00, double 2.300000e+00, double 1.200000e+00> 25 ret <8 x double> %add.i 26} 27 28define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { 29; CHECK-LABEL: addps512: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 32; CHECK-NEXT: retq 33entry: 34 %add.i = fadd <16 x float> %x, %y 35 ret <16 x float> %add.i 36} 37 38define <16 x float> @addps512fold(<16 x float> %y) { 39; CHECK-LABEL: addps512fold: 40; CHECK: # %bb.0: # %entry 41; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 42; CHECK-NEXT: retq 43entry: 44 %add.i = fadd <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 4.500000e+00, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 45 ret <16 x float> %add.i 46} 47 48define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { 49; CHECK-LABEL: subpd512: 50; CHECK: # %bb.0: # %entry 51; CHECK-NEXT: vsubpd %zmm0, %zmm1, %zmm0 52; CHECK-NEXT: retq 53entry: 54 %sub.i = fsub <8 x double> %x, %y 55 ret <8 x double> %sub.i 56} 57 58define <8 x double> @subpd512fold(<8 x double> %y, ptr %x) { 59; CHECK-LABEL: subpd512fold: 60; CHECK: # %bb.0: # %entry 61; CHECK-NEXT: vsubpd (%rdi), %zmm0, %zmm0 62; CHECK-NEXT: retq 63entry: 64 %tmp2 = load <8 x double>, ptr %x, align 8 65 %sub.i = fsub <8 x double> %y, %tmp2 66 ret <8 x double> %sub.i 67} 68 69define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { 70; CHECK-LABEL: subps512: 71; CHECK: # %bb.0: # %entry 72; CHECK-NEXT: vsubps %zmm0, %zmm1, %zmm0 73; CHECK-NEXT: retq 74entry: 75 %sub.i = fsub <16 x float> %x, %y 76 ret <16 x float> %sub.i 77} 78 79define <16 x float> @subps512fold(<16 x float> %y, ptr %x) { 80; CHECK-LABEL: subps512fold: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: vsubps (%rdi), %zmm0, %zmm0 83; CHECK-NEXT: retq 84entry: 85 %tmp2 = load <16 x float>, ptr %x, align 4 86 %sub.i = fsub <16 x float> %y, %tmp2 87 ret <16 x float> %sub.i 88} 89 90define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { 91; AVX512F-LABEL: imulq512: 92; AVX512F: # %bb.0: 93; AVX512F-NEXT: vpsrlq $32, %zmm1, %zmm2 94; AVX512F-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 95; AVX512F-NEXT: vpsrlq $32, %zmm0, %zmm3 96; AVX512F-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 97; AVX512F-NEXT: vpaddq %zmm2, %zmm3, %zmm2 98; AVX512F-NEXT: vpsllq $32, %zmm2, %zmm2 99; AVX512F-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 100; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0 101; AVX512F-NEXT: retq 102; 103; AVX512VL-LABEL: imulq512: 104; AVX512VL: # %bb.0: 105; AVX512VL-NEXT: vpsrlq $32, %zmm1, %zmm2 106; AVX512VL-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 107; AVX512VL-NEXT: vpsrlq $32, %zmm0, %zmm3 108; AVX512VL-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 109; AVX512VL-NEXT: vpaddq %zmm2, %zmm3, %zmm2 110; AVX512VL-NEXT: vpsllq $32, %zmm2, %zmm2 111; AVX512VL-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 112; AVX512VL-NEXT: vpaddq %zmm2, %zmm0, %zmm0 113; AVX512VL-NEXT: retq 114; 115; AVX512BW-LABEL: imulq512: 116; AVX512BW: # %bb.0: 117; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm2 118; AVX512BW-NEXT: vpmuludq %zmm0, %zmm2, %zmm2 119; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm3 120; AVX512BW-NEXT: vpmuludq %zmm3, %zmm1, %zmm3 121; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2 122; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2 123; AVX512BW-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 124; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0 125; AVX512BW-NEXT: retq 126; 127; AVX512DQ-LABEL: imulq512: 128; AVX512DQ: # %bb.0: 129; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 130; AVX512DQ-NEXT: retq 131; 132; SKX-LABEL: imulq512: 133; SKX: # %bb.0: 134; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 135; SKX-NEXT: retq 136 %z = mul <8 x i64>%x, %y 137 ret <8 x i64>%z 138} 139 140define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { 141; AVX512F-LABEL: imulq256: 142; AVX512F: # %bb.0: 143; AVX512F-NEXT: vpsrlq $32, %ymm1, %ymm2 144; AVX512F-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 145; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm3 146; AVX512F-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 147; AVX512F-NEXT: vpaddq %ymm2, %ymm3, %ymm2 148; AVX512F-NEXT: vpsllq $32, %ymm2, %ymm2 149; AVX512F-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 150; AVX512F-NEXT: vpaddq %ymm2, %ymm0, %ymm0 151; AVX512F-NEXT: retq 152; 153; AVX512VL-LABEL: imulq256: 154; AVX512VL: # %bb.0: 155; AVX512VL-NEXT: vpsrlq $32, %ymm1, %ymm2 156; AVX512VL-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 157; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm3 158; AVX512VL-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 159; AVX512VL-NEXT: vpaddq %ymm2, %ymm3, %ymm2 160; AVX512VL-NEXT: vpsllq $32, %ymm2, %ymm2 161; AVX512VL-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 162; AVX512VL-NEXT: vpaddq %ymm2, %ymm0, %ymm0 163; AVX512VL-NEXT: retq 164; 165; AVX512BW-LABEL: imulq256: 166; AVX512BW: # %bb.0: 167; AVX512BW-NEXT: vpsrlq $32, %ymm1, %ymm2 168; AVX512BW-NEXT: vpmuludq %ymm0, %ymm2, %ymm2 169; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3 170; AVX512BW-NEXT: vpmuludq %ymm3, %ymm1, %ymm3 171; AVX512BW-NEXT: vpaddq %ymm2, %ymm3, %ymm2 172; AVX512BW-NEXT: vpsllq $32, %ymm2, %ymm2 173; AVX512BW-NEXT: vpmuludq %ymm0, %ymm1, %ymm0 174; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0 175; AVX512BW-NEXT: retq 176; 177; AVX512DQ-LABEL: imulq256: 178; AVX512DQ: # %bb.0: 179; AVX512DQ-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 180; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 181; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 182; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 183; AVX512DQ-NEXT: retq 184; 185; SKX-LABEL: imulq256: 186; SKX: # %bb.0: 187; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 188; SKX-NEXT: retq 189 %z = mul <4 x i64>%x, %y 190 ret <4 x i64>%z 191} 192 193define <4 x i64> @imulq256_bcast(<4 x i64> %x) { 194; AVX512F-LABEL: imulq256_bcast: 195; AVX512F: # %bb.0: 196; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337] 197; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 198; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 199; AVX512F-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 200; AVX512F-NEXT: vpsllq $32, %ymm0, %ymm0 201; AVX512F-NEXT: vpaddq %ymm0, %ymm2, %ymm0 202; AVX512F-NEXT: retq 203; 204; AVX512VL-LABEL: imulq256_bcast: 205; AVX512VL: # %bb.0: 206; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337] 207; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 208; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 209; AVX512VL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 210; AVX512VL-NEXT: vpsllq $32, %ymm0, %ymm0 211; AVX512VL-NEXT: vpaddq %ymm0, %ymm2, %ymm0 212; AVX512VL-NEXT: retq 213; 214; AVX512BW-LABEL: imulq256_bcast: 215; AVX512BW: # %bb.0: 216; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [1337,1337,1337,1337] 217; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm2 218; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm0 219; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 220; AVX512BW-NEXT: vpsllq $32, %ymm0, %ymm0 221; AVX512BW-NEXT: vpaddq %ymm0, %ymm2, %ymm0 222; AVX512BW-NEXT: retq 223; 224; AVX512DQ-LABEL: imulq256_bcast: 225; AVX512DQ: # %bb.0: 226; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 227; AVX512DQ-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 228; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 229; AVX512DQ-NEXT: retq 230; 231; SKX-LABEL: imulq256_bcast: 232; SKX: # %bb.0: 233; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 234; SKX-NEXT: retq 235 %z = mul <4 x i64> %x, <i64 1337, i64 1337, i64 1337, i64 1337> 236 ret <4 x i64>%z 237} 238 239define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { 240; AVX512F-LABEL: imulq128: 241; AVX512F: # %bb.0: 242; AVX512F-NEXT: vpsrlq $32, %xmm1, %xmm2 243; AVX512F-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 244; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm3 245; AVX512F-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 246; AVX512F-NEXT: vpaddq %xmm2, %xmm3, %xmm2 247; AVX512F-NEXT: vpsllq $32, %xmm2, %xmm2 248; AVX512F-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 249; AVX512F-NEXT: vpaddq %xmm2, %xmm0, %xmm0 250; AVX512F-NEXT: retq 251; 252; AVX512VL-LABEL: imulq128: 253; AVX512VL: # %bb.0: 254; AVX512VL-NEXT: vpsrlq $32, %xmm1, %xmm2 255; AVX512VL-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 256; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm3 257; AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 258; AVX512VL-NEXT: vpaddq %xmm2, %xmm3, %xmm2 259; AVX512VL-NEXT: vpsllq $32, %xmm2, %xmm2 260; AVX512VL-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 261; AVX512VL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 262; AVX512VL-NEXT: retq 263; 264; AVX512BW-LABEL: imulq128: 265; AVX512BW: # %bb.0: 266; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm2 267; AVX512BW-NEXT: vpmuludq %xmm0, %xmm2, %xmm2 268; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm3 269; AVX512BW-NEXT: vpmuludq %xmm3, %xmm1, %xmm3 270; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2 271; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2 272; AVX512BW-NEXT: vpmuludq %xmm0, %xmm1, %xmm0 273; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0 274; AVX512BW-NEXT: retq 275; 276; AVX512DQ-LABEL: imulq128: 277; AVX512DQ: # %bb.0: 278; AVX512DQ-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 279; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 280; AVX512DQ-NEXT: vpmullq %zmm0, %zmm1, %zmm0 281; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 282; AVX512DQ-NEXT: vzeroupper 283; AVX512DQ-NEXT: retq 284; 285; SKX-LABEL: imulq128: 286; SKX: # %bb.0: 287; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 288; SKX-NEXT: retq 289 %z = mul <2 x i64>%x, %y 290 ret <2 x i64>%z 291} 292 293define <2 x i64> @imulq128_bcast(<2 x i64> %x) { 294; AVX512F-LABEL: imulq128_bcast: 295; AVX512F: # %bb.0: 296; AVX512F-NEXT: vpmovsxwq {{.*#+}} xmm1 = [8086,8086] 297; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 298; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 299; AVX512F-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 300; AVX512F-NEXT: vpsllq $32, %xmm0, %xmm0 301; AVX512F-NEXT: vpaddq %xmm0, %xmm2, %xmm0 302; AVX512F-NEXT: retq 303; 304; AVX512VL-LABEL: imulq128_bcast: 305; AVX512VL: # %bb.0: 306; AVX512VL-NEXT: vpbroadcastq {{.*#+}} xmm1 = [8086,8086] 307; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 308; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 309; AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 310; AVX512VL-NEXT: vpsllq $32, %xmm0, %xmm0 311; AVX512VL-NEXT: vpaddq %xmm0, %xmm2, %xmm0 312; AVX512VL-NEXT: retq 313; 314; AVX512BW-LABEL: imulq128_bcast: 315; AVX512BW: # %bb.0: 316; AVX512BW-NEXT: vpmovsxwq {{.*#+}} xmm1 = [8086,8086] 317; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm2 318; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm0 319; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 320; AVX512BW-NEXT: vpsllq $32, %xmm0, %xmm0 321; AVX512BW-NEXT: vpaddq %xmm0, %xmm2, %xmm0 322; AVX512BW-NEXT: retq 323; 324; AVX512DQ-LABEL: imulq128_bcast: 325; AVX512DQ: # %bb.0: 326; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 327; AVX512DQ-NEXT: vpmovsxwq {{.*#+}} xmm1 = [8086,8086] 328; AVX512DQ-NEXT: vpmullq %zmm1, %zmm0, %zmm0 329; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 330; AVX512DQ-NEXT: vzeroupper 331; AVX512DQ-NEXT: retq 332; 333; SKX-LABEL: imulq128_bcast: 334; SKX: # %bb.0: 335; SKX-NEXT: vpmullq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 336; SKX-NEXT: retq 337 %z = mul <2 x i64> %x, <i64 8086, i64 8086> 338 ret <2 x i64>%z 339} 340 341define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { 342; CHECK-LABEL: mulpd512: 343; CHECK: # %bb.0: # %entry 344; CHECK-NEXT: vmulpd %zmm0, %zmm1, %zmm0 345; CHECK-NEXT: retq 346entry: 347 %mul.i = fmul <8 x double> %x, %y 348 ret <8 x double> %mul.i 349} 350 351define <8 x double> @mulpd512fold(<8 x double> %y) { 352; CHECK-LABEL: mulpd512fold: 353; CHECK: # %bb.0: # %entry 354; CHECK-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 355; CHECK-NEXT: retq 356entry: 357 %mul.i = fmul <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 358 ret <8 x double> %mul.i 359} 360 361define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { 362; CHECK-LABEL: mulps512: 363; CHECK: # %bb.0: # %entry 364; CHECK-NEXT: vmulps %zmm0, %zmm1, %zmm0 365; CHECK-NEXT: retq 366entry: 367 %mul.i = fmul <16 x float> %x, %y 368 ret <16 x float> %mul.i 369} 370 371define <16 x float> @mulps512fold(<16 x float> %y) { 372; CHECK-LABEL: mulps512fold: 373; CHECK: # %bb.0: # %entry 374; CHECK-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 375; CHECK-NEXT: retq 376entry: 377 %mul.i = fmul <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000> 378 ret <16 x float> %mul.i 379} 380 381define <8 x double> @divpd512(<8 x double> %y, <8 x double> %x) { 382; CHECK-LABEL: divpd512: 383; CHECK: # %bb.0: # %entry 384; CHECK-NEXT: vdivpd %zmm0, %zmm1, %zmm0 385; CHECK-NEXT: retq 386entry: 387 %div.i = fdiv <8 x double> %x, %y 388 ret <8 x double> %div.i 389} 390 391define <8 x double> @divpd512fold(<8 x double> %y) { 392; CHECK-LABEL: divpd512fold: 393; CHECK: # %bb.0: # %entry 394; CHECK-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 395; CHECK-NEXT: retq 396entry: 397 %div.i = fdiv <8 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00, double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00> 398 ret <8 x double> %div.i 399} 400 401define <16 x float> @divps512(<16 x float> %y, <16 x float> %x) { 402; CHECK-LABEL: divps512: 403; CHECK: # %bb.0: # %entry 404; CHECK-NEXT: vdivps %zmm0, %zmm1, %zmm0 405; CHECK-NEXT: retq 406entry: 407 %div.i = fdiv <16 x float> %x, %y 408 ret <16 x float> %div.i 409} 410 411define <16 x float> @divps512fold(<16 x float> %y) { 412; CHECK-LABEL: divps512fold: 413; CHECK: # %bb.0: # %entry 414; CHECK-NEXT: vdivps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 415; CHECK-NEXT: retq 416entry: 417 %div.i = fdiv <16 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 4.500000e+00, float 0x4002666660000000, float 0x3FF3333340000000> 418 ret <16 x float> %div.i 419} 420 421define <8 x i64> @vpaddq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 422; CHECK-LABEL: vpaddq_test: 423; CHECK: # %bb.0: 424; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 425; CHECK-NEXT: retq 426 %x = add <8 x i64> %i, %j 427 ret <8 x i64> %x 428} 429 430define <8 x i64> @vpaddq_fold_test(<8 x i64> %i, ptr %j) nounwind { 431; CHECK-LABEL: vpaddq_fold_test: 432; CHECK: # %bb.0: 433; CHECK-NEXT: vpaddq (%rdi), %zmm0, %zmm0 434; CHECK-NEXT: retq 435 %tmp = load <8 x i64>, ptr %j, align 4 436 %x = add <8 x i64> %i, %tmp 437 ret <8 x i64> %x 438} 439 440define <8 x i64> @vpaddq_broadcast_test(<8 x i64> %i) nounwind { 441; CHECK-LABEL: vpaddq_broadcast_test: 442; CHECK: # %bb.0: 443; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 444; CHECK-NEXT: retq 445 %x = add <8 x i64> %i, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 446 ret <8 x i64> %x 447} 448 449define <8 x i64> @vpaddq_broadcast2_test(<8 x i64> %i, ptr %j) nounwind { 450; CHECK-LABEL: vpaddq_broadcast2_test: 451; CHECK: # %bb.0: 452; CHECK-NEXT: vpaddq (%rdi){1to8}, %zmm0, %zmm0 453; CHECK-NEXT: retq 454 %tmp = load i64, ptr %j 455 %j.0 = insertelement <8 x i64> undef, i64 %tmp, i32 0 456 %j.1 = insertelement <8 x i64> %j.0, i64 %tmp, i32 1 457 %j.2 = insertelement <8 x i64> %j.1, i64 %tmp, i32 2 458 %j.3 = insertelement <8 x i64> %j.2, i64 %tmp, i32 3 459 %j.4 = insertelement <8 x i64> %j.3, i64 %tmp, i32 4 460 %j.5 = insertelement <8 x i64> %j.4, i64 %tmp, i32 5 461 %j.6 = insertelement <8 x i64> %j.5, i64 %tmp, i32 6 462 %j.7 = insertelement <8 x i64> %j.6, i64 %tmp, i32 7 463 %x = add <8 x i64> %i, %j.7 464 ret <8 x i64> %x 465} 466 467define <16 x i32> @vpaddd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 468; CHECK-LABEL: vpaddd_test: 469; CHECK: # %bb.0: 470; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 471; CHECK-NEXT: retq 472 %x = add <16 x i32> %i, %j 473 ret <16 x i32> %x 474} 475 476define <16 x i32> @vpaddd_fold_test(<16 x i32> %i, ptr %j) nounwind { 477; CHECK-LABEL: vpaddd_fold_test: 478; CHECK: # %bb.0: 479; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 480; CHECK-NEXT: retq 481 %tmp = load <16 x i32>, ptr %j, align 4 482 %x = add <16 x i32> %i, %tmp 483 ret <16 x i32> %x 484} 485 486define <16 x i32> @vpaddd_broadcast_test(<16 x i32> %i) nounwind { 487; CHECK-LABEL: vpaddd_broadcast_test: 488; CHECK: # %bb.0: 489; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 490; CHECK-NEXT: retq 491 %x = add <16 x i32> %i, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 492 ret <16 x i32> %x 493} 494 495define <16 x i32> @vpaddd_mask_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 496; CHECK-LABEL: vpaddd_mask_test: 497; CHECK: # %bb.0: 498; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 499; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 500; CHECK-NEXT: retq 501 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 502 %x = add <16 x i32> %i, %j 503 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 504 ret <16 x i32> %r 505} 506 507define <16 x i32> @vpaddd_maskz_test(<16 x i32> %i, <16 x i32> %j, <16 x i32> %mask1) nounwind readnone { 508; CHECK-LABEL: vpaddd_maskz_test: 509; CHECK: # %bb.0: 510; CHECK-NEXT: vptestmd %zmm2, %zmm2, %k1 511; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} 512; CHECK-NEXT: retq 513 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 514 %x = add <16 x i32> %i, %j 515 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 516 ret <16 x i32> %r 517} 518 519define <16 x i32> @vpaddd_mask_fold_test(<16 x i32> %i, ptr %j.ptr, <16 x i32> %mask1) nounwind readnone { 520; CHECK-LABEL: vpaddd_mask_fold_test: 521; CHECK: # %bb.0: 522; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 523; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} 524; CHECK-NEXT: retq 525 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 526 %j = load <16 x i32>, ptr %j.ptr 527 %x = add <16 x i32> %i, %j 528 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 529 ret <16 x i32> %r 530} 531 532define <16 x i32> @vpaddd_mask_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 533; CHECK-LABEL: vpaddd_mask_broadcast_test: 534; CHECK: # %bb.0: 535; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 536; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 {%k1} 537; CHECK-NEXT: retq 538 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 539 %x = add <16 x i32> %i, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> 540 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 541 ret <16 x i32> %r 542} 543 544define <16 x i32> @vpaddd_maskz_fold_test(<16 x i32> %i, ptr %j.ptr, <16 x i32> %mask1) nounwind readnone { 545; CHECK-LABEL: vpaddd_maskz_fold_test: 546; CHECK: # %bb.0: 547; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 548; CHECK-NEXT: vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} 549; CHECK-NEXT: retq 550 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 551 %j = load <16 x i32>, ptr %j.ptr 552 %x = add <16 x i32> %i, %j 553 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 554 ret <16 x i32> %r 555} 556 557define <16 x i32> @vpaddd_maskz_broadcast_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 558; CHECK-LABEL: vpaddd_maskz_broadcast_test: 559; CHECK: # %bb.0: 560; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 561; CHECK-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 {%k1} {z} 562; CHECK-NEXT: retq 563 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 564 %x = add <16 x i32> %i, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 565 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer 566 ret <16 x i32> %r 567} 568 569define <8 x i64> @vpsubq_test(<8 x i64> %i, <8 x i64> %j) nounwind readnone { 570; CHECK-LABEL: vpsubq_test: 571; CHECK: # %bb.0: 572; CHECK-NEXT: vpsubq %zmm1, %zmm0, %zmm0 573; CHECK-NEXT: retq 574 %x = sub <8 x i64> %i, %j 575 ret <8 x i64> %x 576} 577 578define <16 x i32> @vpsubd_test(<16 x i32> %i, <16 x i32> %j) nounwind readnone { 579; CHECK-LABEL: vpsubd_test: 580; CHECK: # %bb.0: 581; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 582; CHECK-NEXT: retq 583 %x = sub <16 x i32> %i, %j 584 ret <16 x i32> %x 585} 586 587define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { 588; CHECK-LABEL: vpmulld_test: 589; CHECK: # %bb.0: 590; CHECK-NEXT: vpmulld %zmm1, %zmm0, %zmm0 591; CHECK-NEXT: retq 592 %x = mul <16 x i32> %i, %j 593 ret <16 x i32> %x 594} 595 596declare float @sqrtf(float) readnone 597define float @sqrtA(float %a) nounwind uwtable readnone ssp { 598; CHECK-LABEL: sqrtA: 599; CHECK: # %bb.0: # %entry 600; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 601; CHECK-NEXT: retq 602entry: 603 %conv1 = tail call float @sqrtf(float %a) nounwind readnone 604 ret float %conv1 605} 606 607declare double @sqrt(double) readnone 608define double @sqrtB(double %a) nounwind uwtable readnone ssp { 609; CHECK-LABEL: sqrtB: 610; CHECK: # %bb.0: # %entry 611; CHECK-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 612; CHECK-NEXT: retq 613entry: 614 %call = tail call double @sqrt(double %a) nounwind readnone 615 ret double %call 616} 617 618declare float @llvm.sqrt.f32(float) 619define float @sqrtC(float %a) nounwind { 620; CHECK-LABEL: sqrtC: 621; CHECK: # %bb.0: 622; CHECK-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 623; CHECK-NEXT: retq 624 %b = call float @llvm.sqrt.f32(float %a) 625 ret float %b 626} 627 628declare <16 x float> @llvm.sqrt.v16f32(<16 x float>) 629define <16 x float> @sqrtD(<16 x float> %a) nounwind { 630; CHECK-LABEL: sqrtD: 631; CHECK: # %bb.0: 632; CHECK-NEXT: vsqrtps %zmm0, %zmm0 633; CHECK-NEXT: retq 634 %b = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a) 635 ret <16 x float> %b 636} 637 638declare <8 x double> @llvm.sqrt.v8f64(<8 x double>) 639define <8 x double> @sqrtE(<8 x double> %a) nounwind { 640; CHECK-LABEL: sqrtE: 641; CHECK: # %bb.0: 642; CHECK-NEXT: vsqrtpd %zmm0, %zmm0 643; CHECK-NEXT: retq 644 %b = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a) 645 ret <8 x double> %b 646} 647 648define <16 x float> @fadd_broadcast(<16 x float> %a) nounwind { 649; CHECK-LABEL: fadd_broadcast: 650; CHECK: # %bb.0: 651; CHECK-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 652; CHECK-NEXT: retq 653 %b = fadd <16 x float> %a, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> 654 ret <16 x float> %b 655} 656 657define <8 x i64> @addq_broadcast(<8 x i64> %a) nounwind { 658; CHECK-LABEL: addq_broadcast: 659; CHECK: # %bb.0: 660; CHECK-NEXT: vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 661; CHECK-NEXT: retq 662 %b = add <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 663 ret <8 x i64> %b 664} 665 666define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind { 667; AVX512F-LABEL: orq_broadcast: 668; AVX512F: # %bb.0: 669; AVX512F-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 670; AVX512F-NEXT: retq 671; 672; AVX512VL-LABEL: orq_broadcast: 673; AVX512VL: # %bb.0: 674; AVX512VL-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 675; AVX512VL-NEXT: retq 676; 677; AVX512BW-LABEL: orq_broadcast: 678; AVX512BW: # %bb.0: 679; AVX512BW-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 680; AVX512BW-NEXT: retq 681; 682; AVX512DQ-LABEL: orq_broadcast: 683; AVX512DQ: # %bb.0: 684; AVX512DQ-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 685; AVX512DQ-NEXT: retq 686; 687; SKX-LABEL: orq_broadcast: 688; SKX: # %bb.0: 689; SKX-NEXT: vorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 690; SKX-NEXT: retq 691 %b = or <8 x i64> %a, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2> 692 ret <8 x i64> %b 693} 694 695define <16 x i32> @andd512fold(<16 x i32> %y, ptr %x) { 696; AVX512F-LABEL: andd512fold: 697; AVX512F: # %bb.0: # %entry 698; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0 699; AVX512F-NEXT: retq 700; 701; AVX512VL-LABEL: andd512fold: 702; AVX512VL: # %bb.0: # %entry 703; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0 704; AVX512VL-NEXT: retq 705; 706; AVX512BW-LABEL: andd512fold: 707; AVX512BW: # %bb.0: # %entry 708; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0 709; AVX512BW-NEXT: retq 710; 711; AVX512DQ-LABEL: andd512fold: 712; AVX512DQ: # %bb.0: # %entry 713; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0 714; AVX512DQ-NEXT: retq 715; 716; SKX-LABEL: andd512fold: 717; SKX: # %bb.0: # %entry 718; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0 719; SKX-NEXT: retq 720entry: 721 %a = load <16 x i32>, ptr %x, align 4 722 %b = and <16 x i32> %y, %a 723 ret <16 x i32> %b 724} 725 726define <8 x i64> @andqbrst(<8 x i64> %p1, ptr %ap) { 727; AVX512F-LABEL: andqbrst: 728; AVX512F: # %bb.0: # %entry 729; AVX512F-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 730; AVX512F-NEXT: retq 731; 732; AVX512VL-LABEL: andqbrst: 733; AVX512VL: # %bb.0: # %entry 734; AVX512VL-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 735; AVX512VL-NEXT: retq 736; 737; AVX512BW-LABEL: andqbrst: 738; AVX512BW: # %bb.0: # %entry 739; AVX512BW-NEXT: vpandq (%rdi){1to8}, %zmm0, %zmm0 740; AVX512BW-NEXT: retq 741; 742; AVX512DQ-LABEL: andqbrst: 743; AVX512DQ: # %bb.0: # %entry 744; AVX512DQ-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 745; AVX512DQ-NEXT: retq 746; 747; SKX-LABEL: andqbrst: 748; SKX: # %bb.0: # %entry 749; SKX-NEXT: vandpd (%rdi){1to8}, %zmm0, %zmm0 750; SKX-NEXT: retq 751entry: 752 %a = load i64, ptr %ap, align 8 753 %b = insertelement <8 x i64> undef, i64 %a, i32 0 754 %c = shufflevector <8 x i64> %b, <8 x i64> undef, <8 x i32> zeroinitializer 755 %d = and <8 x i64> %p1, %c 756 ret <8 x i64>%d 757} 758 759define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, 760; CHECK-LABEL: test_mask_vaddps: 761; CHECK: # %bb.0: 762; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 763; CHECK-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} 764; CHECK-NEXT: retq 765 <16 x float> %j, <16 x i32> %mask1) 766 nounwind readnone { 767 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 768 %x = fadd <16 x float> %i, %j 769 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 770 ret <16 x float> %r 771} 772 773define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, 774; CHECK-LABEL: test_mask_vmulps: 775; CHECK: # %bb.0: 776; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 777; CHECK-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} 778; CHECK-NEXT: retq 779 <16 x float> %j, <16 x i32> %mask1) 780 nounwind readnone { 781 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 782 %x = fmul <16 x float> %i, %j 783 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 784 ret <16 x float> %r 785} 786 787define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, 788; CHECK-LABEL: test_mask_vminps: 789; CHECK: # %bb.0: 790; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 791; CHECK-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} 792; CHECK-NEXT: retq 793 <16 x float> %j, <16 x i32> %mask1) 794 nounwind readnone { 795 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 796 %cmp_res = fcmp olt <16 x float> %i, %j 797 %min = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 798 %r = select <16 x i1> %mask, <16 x float> %min, <16 x float> %dst 799 ret <16 x float> %r 800} 801 802define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, 803; AVX512F-LABEL: test_mask_vminpd: 804; AVX512F: # %bb.0: 805; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 806; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 807; AVX512F-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 808; AVX512F-NEXT: retq 809; 810; AVX512VL-LABEL: test_mask_vminpd: 811; AVX512VL: # %bb.0: 812; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 813; AVX512VL-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 814; AVX512VL-NEXT: retq 815; 816; AVX512BW-LABEL: test_mask_vminpd: 817; AVX512BW: # %bb.0: 818; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 819; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 820; AVX512BW-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 821; AVX512BW-NEXT: retq 822; 823; AVX512DQ-LABEL: test_mask_vminpd: 824; AVX512DQ: # %bb.0: 825; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 826; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 827; AVX512DQ-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 828; AVX512DQ-NEXT: retq 829; 830; SKX-LABEL: test_mask_vminpd: 831; SKX: # %bb.0: 832; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 833; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} 834; SKX-NEXT: retq 835 <8 x double> %j, <8 x i32> %mask1) 836 nounwind readnone { 837 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 838 %cmp_res = fcmp olt <8 x double> %i, %j 839 %min = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 840 %r = select <8 x i1> %mask, <8 x double> %min, <8 x double> %dst 841 ret <8 x double> %r 842} 843 844define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, 845; CHECK-LABEL: test_mask_vmaxps: 846; CHECK: # %bb.0: 847; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 848; CHECK-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} 849; CHECK-NEXT: retq 850 <16 x float> %j, <16 x i32> %mask1) 851 nounwind readnone { 852 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 853 %cmp_res = fcmp ogt <16 x float> %i, %j 854 %max = select <16 x i1> %cmp_res, <16 x float> %i, <16 x float> %j 855 %r = select <16 x i1> %mask, <16 x float> %max, <16 x float> %dst 856 ret <16 x float> %r 857} 858 859define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, 860; AVX512F-LABEL: test_mask_vmaxpd: 861; AVX512F: # %bb.0: 862; AVX512F-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 863; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1 864; AVX512F-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 865; AVX512F-NEXT: retq 866; 867; AVX512VL-LABEL: test_mask_vmaxpd: 868; AVX512VL: # %bb.0: 869; AVX512VL-NEXT: vptestmd %ymm3, %ymm3, %k1 870; AVX512VL-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 871; AVX512VL-NEXT: retq 872; 873; AVX512BW-LABEL: test_mask_vmaxpd: 874; AVX512BW: # %bb.0: 875; AVX512BW-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 876; AVX512BW-NEXT: vptestmd %zmm3, %zmm3, %k1 877; AVX512BW-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 878; AVX512BW-NEXT: retq 879; 880; AVX512DQ-LABEL: test_mask_vmaxpd: 881; AVX512DQ: # %bb.0: 882; AVX512DQ-NEXT: # kill: def $ymm3 killed $ymm3 def $zmm3 883; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k1 884; AVX512DQ-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 885; AVX512DQ-NEXT: retq 886; 887; SKX-LABEL: test_mask_vmaxpd: 888; SKX: # %bb.0: 889; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 890; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} 891; SKX-NEXT: retq 892 <8 x double> %j, <8 x i32> %mask1) 893 nounwind readnone { 894 %mask = icmp ne <8 x i32> %mask1, zeroinitializer 895 %cmp_res = fcmp ogt <8 x double> %i, %j 896 %max = select <8 x i1> %cmp_res, <8 x double> %i, <8 x double> %j 897 %r = select <8 x i1> %mask, <8 x double> %max, <8 x double> %dst 898 ret <8 x double> %r 899} 900 901define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, 902; CHECK-LABEL: test_mask_vsubps: 903; CHECK: # %bb.0: 904; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 905; CHECK-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} 906; CHECK-NEXT: retq 907 <16 x float> %j, <16 x i32> %mask1) 908 nounwind readnone { 909 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 910 %x = fsub <16 x float> %i, %j 911 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 912 ret <16 x float> %r 913} 914 915define <16 x float> @test_mask_vdivps(<16 x float> %dst, <16 x float> %i, 916; CHECK-LABEL: test_mask_vdivps: 917; CHECK: # %bb.0: 918; CHECK-NEXT: vptestmd %zmm3, %zmm3, %k1 919; CHECK-NEXT: vdivps %zmm2, %zmm1, %zmm0 {%k1} 920; CHECK-NEXT: retq 921 <16 x float> %j, <16 x i32> %mask1) 922 nounwind readnone { 923 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 924 %x = fdiv <16 x float> %i, %j 925 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %dst 926 ret <16 x float> %r 927} 928 929define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, 930; CHECK-LABEL: test_mask_vaddpd: 931; CHECK: # %bb.0: 932; CHECK-NEXT: vptestmq %zmm3, %zmm3, %k1 933; CHECK-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} 934; CHECK-NEXT: retq 935 <8 x double> %j, <8 x i64> %mask1) 936 nounwind readnone { 937 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 938 %x = fadd <8 x double> %i, %j 939 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 940 ret <8 x double> %r 941} 942 943define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, 944; CHECK-LABEL: test_maskz_vaddpd: 945; CHECK: # %bb.0: 946; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 947; CHECK-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} 948; CHECK-NEXT: retq 949 <8 x i64> %mask1) nounwind readnone { 950 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 951 %x = fadd <8 x double> %i, %j 952 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 953 ret <8 x double> %r 954} 955 956define <8 x double> @test_mask_fold_vaddpd(<8 x double> %dst, <8 x double> %i, 957; CHECK-LABEL: test_mask_fold_vaddpd: 958; CHECK: # %bb.0: 959; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 960; CHECK-NEXT: vaddpd (%rdi), %zmm1, %zmm0 {%k1} 961; CHECK-NEXT: retq 962 ptr %j, <8 x i64> %mask1) 963 nounwind { 964 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 965 %tmp = load <8 x double>, ptr %j, align 8 966 %x = fadd <8 x double> %i, %tmp 967 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %dst 968 ret <8 x double> %r 969} 970 971define <8 x double> @test_maskz_fold_vaddpd(<8 x double> %i, ptr %j, 972; CHECK-LABEL: test_maskz_fold_vaddpd: 973; CHECK: # %bb.0: 974; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 975; CHECK-NEXT: vaddpd (%rdi), %zmm0, %zmm0 {%k1} {z} 976; CHECK-NEXT: retq 977 <8 x i64> %mask1) nounwind { 978 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 979 %tmp = load <8 x double>, ptr %j, align 8 980 %x = fadd <8 x double> %i, %tmp 981 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 982 ret <8 x double> %r 983} 984 985define <8 x double> @test_broadcast_vaddpd(<8 x double> %i, ptr %j) nounwind { 986; CHECK-LABEL: test_broadcast_vaddpd: 987; CHECK: # %bb.0: 988; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 989; CHECK-NEXT: retq 990 %tmp = load double, ptr %j 991 %b = insertelement <8 x double> undef, double %tmp, i32 0 992 %c = shufflevector <8 x double> %b, <8 x double> undef, 993 <8 x i32> zeroinitializer 994 %x = fadd <8 x double> %c, %i 995 ret <8 x double> %x 996} 997 998define <8 x double> @test_mask_broadcast_vaddpd(<8 x double> %dst, <8 x double> %i, 999; CHECK-LABEL: test_mask_broadcast_vaddpd: 1000; CHECK: # %bb.0: 1001; CHECK-NEXT: vmovapd %zmm1, %zmm0 1002; CHECK-NEXT: vptestmq %zmm2, %zmm2, %k1 1003; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm1, %zmm0 {%k1} 1004; CHECK-NEXT: retq 1005 ptr %j, <8 x i64> %mask1) nounwind { 1006 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 1007 %tmp = load double, ptr %j 1008 %b = insertelement <8 x double> undef, double %tmp, i32 0 1009 %c = shufflevector <8 x double> %b, <8 x double> undef, 1010 <8 x i32> zeroinitializer 1011 %x = fadd <8 x double> %c, %i 1012 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> %i 1013 ret <8 x double> %r 1014} 1015 1016define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, ptr %j, 1017; CHECK-LABEL: test_maskz_broadcast_vaddpd: 1018; CHECK: # %bb.0: 1019; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1 1020; CHECK-NEXT: vaddpd (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} 1021; CHECK-NEXT: retq 1022 <8 x i64> %mask1) nounwind { 1023 %mask = icmp ne <8 x i64> %mask1, zeroinitializer 1024 %tmp = load double, ptr %j 1025 %b = insertelement <8 x double> undef, double %tmp, i32 0 1026 %c = shufflevector <8 x double> %b, <8 x double> undef, 1027 <8 x i32> zeroinitializer 1028 %x = fadd <8 x double> %c, %i 1029 %r = select <8 x i1> %mask, <8 x double> %x, <8 x double> zeroinitializer 1030 ret <8 x double> %r 1031} 1032 1033define <16 x float> @test_fxor(<16 x float> %a) { 1034; AVX512F-LABEL: test_fxor: 1035; AVX512F: # %bb.0: 1036; AVX512F-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1037; AVX512F-NEXT: retq 1038; 1039; AVX512VL-LABEL: test_fxor: 1040; AVX512VL: # %bb.0: 1041; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1042; AVX512VL-NEXT: retq 1043; 1044; AVX512BW-LABEL: test_fxor: 1045; AVX512BW: # %bb.0: 1046; AVX512BW-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1047; AVX512BW-NEXT: retq 1048; 1049; AVX512DQ-LABEL: test_fxor: 1050; AVX512DQ: # %bb.0: 1051; AVX512DQ-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1052; AVX512DQ-NEXT: retq 1053; 1054; SKX-LABEL: test_fxor: 1055; SKX: # %bb.0: 1056; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1057; SKX-NEXT: retq 1058 1059 %res = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1060 ret <16 x float>%res 1061} 1062 1063define <8 x float> @test_fxor_8f32(<8 x float> %a) { 1064; AVX512F-LABEL: test_fxor_8f32: 1065; AVX512F: # %bb.0: 1066; AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 1067; AVX512F-NEXT: vxorps %ymm1, %ymm0, %ymm0 1068; AVX512F-NEXT: retq 1069; 1070; AVX512VL-LABEL: test_fxor_8f32: 1071; AVX512VL: # %bb.0: 1072; AVX512VL-NEXT: vpxord {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 1073; AVX512VL-NEXT: retq 1074; 1075; AVX512BW-LABEL: test_fxor_8f32: 1076; AVX512BW: # %bb.0: 1077; AVX512BW-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 1078; AVX512BW-NEXT: vxorps %ymm1, %ymm0, %ymm0 1079; AVX512BW-NEXT: retq 1080; 1081; AVX512DQ-LABEL: test_fxor_8f32: 1082; AVX512DQ: # %bb.0: 1083; AVX512DQ-NEXT: vbroadcastss {{.*#+}} ymm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 1084; AVX512DQ-NEXT: vxorps %ymm1, %ymm0, %ymm0 1085; AVX512DQ-NEXT: retq 1086; 1087; SKX-LABEL: test_fxor_8f32: 1088; SKX: # %bb.0: 1089; SKX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 1090; SKX-NEXT: retq 1091 %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a 1092 ret <8 x float>%res 1093} 1094 1095define <8 x double> @fabs_v8f64(<8 x double> %p) 1096; AVX512F-LABEL: fabs_v8f64: 1097; AVX512F: # %bb.0: 1098; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1099; AVX512F-NEXT: retq 1100; 1101; AVX512VL-LABEL: fabs_v8f64: 1102; AVX512VL: # %bb.0: 1103; AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1104; AVX512VL-NEXT: retq 1105; 1106; AVX512BW-LABEL: fabs_v8f64: 1107; AVX512BW: # %bb.0: 1108; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1109; AVX512BW-NEXT: retq 1110; 1111; AVX512DQ-LABEL: fabs_v8f64: 1112; AVX512DQ: # %bb.0: 1113; AVX512DQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1114; AVX512DQ-NEXT: retq 1115; 1116; SKX-LABEL: fabs_v8f64: 1117; SKX: # %bb.0: 1118; SKX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 1119; SKX-NEXT: retq 1120{ 1121 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1122 ret <8 x double> %t 1123} 1124declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 1125 1126define <16 x float> @fabs_v16f32(<16 x float> %p) 1127; AVX512F-LABEL: fabs_v16f32: 1128; AVX512F: # %bb.0: 1129; AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1130; AVX512F-NEXT: retq 1131; 1132; AVX512VL-LABEL: fabs_v16f32: 1133; AVX512VL: # %bb.0: 1134; AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1135; AVX512VL-NEXT: retq 1136; 1137; AVX512BW-LABEL: fabs_v16f32: 1138; AVX512BW: # %bb.0: 1139; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1140; AVX512BW-NEXT: retq 1141; 1142; AVX512DQ-LABEL: fabs_v16f32: 1143; AVX512DQ: # %bb.0: 1144; AVX512DQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1145; AVX512DQ-NEXT: retq 1146; 1147; SKX-LABEL: fabs_v16f32: 1148; SKX: # %bb.0: 1149; SKX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 1150; SKX-NEXT: retq 1151{ 1152 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1153 ret <16 x float> %t 1154} 1155declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 1156 1157define <16 x i32> @masked_inc_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 1158; CHECK-LABEL: masked_inc_test: 1159; CHECK: # %bb.0: 1160; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 1161; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1162; CHECK-NEXT: vpsubd %zmm1, %zmm0, %zmm0 {%k1} 1163; CHECK-NEXT: retq 1164 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 1165 %x = add <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1166 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 1167 ret <16 x i32> %r 1168} 1169 1170define <16 x i32> @masked_dec_test(<16 x i32> %i, <16 x i32> %mask1) nounwind readnone { 1171; CHECK-LABEL: masked_dec_test: 1172; CHECK: # %bb.0: 1173; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1 1174; CHECK-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 1175; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 {%k1} 1176; CHECK-NEXT: retq 1177 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 1178 %x = sub <16 x i32> %i, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 1179 %r = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %i 1180 ret <16 x i32> %r 1181} 1182