1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=ALL,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512VL 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX512,AVX512FP16 9 10; 11; vXf32 12; 13 14define float @test_v1f32(<1 x float> %a0) { 15; ALL-LABEL: test_v1f32: 16; ALL: # %bb.0: 17; ALL-NEXT: retq 18 %1 = call nnan float @llvm.vector.reduce.fmin.v1f32(<1 x float> %a0) 19 ret float %1 20} 21 22define float @test_v2f32(<2 x float> %a0) { 23; SSE2-LABEL: test_v2f32: 24; SSE2: # %bb.0: 25; SSE2-NEXT: movaps %xmm0, %xmm1 26; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 27; SSE2-NEXT: minss %xmm1, %xmm0 28; SSE2-NEXT: retq 29; 30; SSE41-LABEL: test_v2f32: 31; SSE41: # %bb.0: 32; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 33; SSE41-NEXT: minss %xmm1, %xmm0 34; SSE41-NEXT: retq 35; 36; AVX-LABEL: test_v2f32: 37; AVX: # %bb.0: 38; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 39; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 40; AVX-NEXT: retq 41; 42; AVX512-LABEL: test_v2f32: 43; AVX512: # %bb.0: 44; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 45; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 46; AVX512-NEXT: retq 47 %1 = call nnan float @llvm.vector.reduce.fmin.v2f32(<2 x float> %a0) 48 ret float %1 49} 50 51define float @test_v3f32(<3 x float> %a0) { 52; SSE2-LABEL: test_v3f32: 53; SSE2: # %bb.0: 54; SSE2-NEXT: movaps %xmm0, %xmm2 55; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[1,1] 56; SSE2-NEXT: movaps %xmm0, %xmm1 57; SSE2-NEXT: minss %xmm2, %xmm1 58; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 59; SSE2-NEXT: minss %xmm0, %xmm1 60; SSE2-NEXT: movaps %xmm1, %xmm0 61; SSE2-NEXT: retq 62; 63; SSE41-LABEL: test_v3f32: 64; SSE41: # %bb.0: 65; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 66; SSE41-NEXT: movaps %xmm0, %xmm1 67; SSE41-NEXT: minss %xmm2, %xmm1 68; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 69; SSE41-NEXT: minss %xmm0, %xmm1 70; SSE41-NEXT: movaps %xmm1, %xmm0 71; SSE41-NEXT: retq 72; 73; AVX-LABEL: test_v3f32: 74; AVX: # %bb.0: 75; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 76; AVX-NEXT: vminss %xmm1, %xmm0, %xmm1 77; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 78; AVX-NEXT: vminss %xmm0, %xmm1, %xmm0 79; AVX-NEXT: retq 80; 81; AVX512-LABEL: test_v3f32: 82; AVX512: # %bb.0: 83; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 84; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm1 85; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 86; AVX512-NEXT: vminss %xmm0, %xmm1, %xmm0 87; AVX512-NEXT: retq 88 %1 = call nnan float @llvm.vector.reduce.fmin.v3f32(<3 x float> %a0) 89 ret float %1 90} 91 92define float @test_v4f32(<4 x float> %a0) { 93; SSE2-LABEL: test_v4f32: 94; SSE2: # %bb.0: 95; SSE2-NEXT: movaps %xmm0, %xmm1 96; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 97; SSE2-NEXT: minps %xmm1, %xmm0 98; SSE2-NEXT: movaps %xmm0, %xmm1 99; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 100; SSE2-NEXT: minss %xmm1, %xmm0 101; SSE2-NEXT: retq 102; 103; SSE41-LABEL: test_v4f32: 104; SSE41: # %bb.0: 105; SSE41-NEXT: movaps %xmm0, %xmm1 106; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 107; SSE41-NEXT: minps %xmm1, %xmm0 108; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 109; SSE41-NEXT: minss %xmm1, %xmm0 110; SSE41-NEXT: retq 111; 112; AVX-LABEL: test_v4f32: 113; AVX: # %bb.0: 114; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 115; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 116; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 117; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 118; AVX-NEXT: retq 119; 120; AVX512-LABEL: test_v4f32: 121; AVX512: # %bb.0: 122; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 123; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 124; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 125; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 126; AVX512-NEXT: retq 127 %1 = call nnan float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a0) 128 ret float %1 129} 130 131define float @test_v8f32(<8 x float> %a0) { 132; SSE2-LABEL: test_v8f32: 133; SSE2: # %bb.0: 134; SSE2-NEXT: minps %xmm1, %xmm0 135; SSE2-NEXT: movaps %xmm0, %xmm1 136; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 137; SSE2-NEXT: minps %xmm1, %xmm0 138; SSE2-NEXT: movaps %xmm0, %xmm1 139; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 140; SSE2-NEXT: minss %xmm1, %xmm0 141; SSE2-NEXT: retq 142; 143; SSE41-LABEL: test_v8f32: 144; SSE41: # %bb.0: 145; SSE41-NEXT: minps %xmm1, %xmm0 146; SSE41-NEXT: movaps %xmm0, %xmm1 147; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 148; SSE41-NEXT: minps %xmm1, %xmm0 149; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 150; SSE41-NEXT: minss %xmm1, %xmm0 151; SSE41-NEXT: retq 152; 153; AVX-LABEL: test_v8f32: 154; AVX: # %bb.0: 155; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 156; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 157; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 158; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 159; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 160; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 161; AVX-NEXT: vzeroupper 162; AVX-NEXT: retq 163; 164; AVX512-LABEL: test_v8f32: 165; AVX512: # %bb.0: 166; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 167; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 168; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 169; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 170; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 171; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 172; AVX512-NEXT: vzeroupper 173; AVX512-NEXT: retq 174 %1 = call nnan float @llvm.vector.reduce.fmin.v8f32(<8 x float> %a0) 175 ret float %1 176} 177 178define float @test_v16f32(<16 x float> %a0) { 179; SSE2-LABEL: test_v16f32: 180; SSE2: # %bb.0: 181; SSE2-NEXT: minps %xmm3, %xmm1 182; SSE2-NEXT: minps %xmm2, %xmm0 183; SSE2-NEXT: minps %xmm1, %xmm0 184; SSE2-NEXT: movaps %xmm0, %xmm1 185; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 186; SSE2-NEXT: minps %xmm1, %xmm0 187; SSE2-NEXT: movaps %xmm0, %xmm1 188; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 189; SSE2-NEXT: minss %xmm1, %xmm0 190; SSE2-NEXT: retq 191; 192; SSE41-LABEL: test_v16f32: 193; SSE41: # %bb.0: 194; SSE41-NEXT: minps %xmm3, %xmm1 195; SSE41-NEXT: minps %xmm2, %xmm0 196; SSE41-NEXT: minps %xmm1, %xmm0 197; SSE41-NEXT: movaps %xmm0, %xmm1 198; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 199; SSE41-NEXT: minps %xmm1, %xmm0 200; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 201; SSE41-NEXT: minss %xmm1, %xmm0 202; SSE41-NEXT: retq 203; 204; AVX-LABEL: test_v16f32: 205; AVX: # %bb.0: 206; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 207; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 208; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 209; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 210; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 211; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 212; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 213; AVX-NEXT: vzeroupper 214; AVX-NEXT: retq 215; 216; AVX512-LABEL: test_v16f32: 217; AVX512: # %bb.0: 218; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 219; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 220; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 221; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 222; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 223; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 224; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 225; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 226; AVX512-NEXT: vzeroupper 227; AVX512-NEXT: retq 228 %1 = call nnan float @llvm.vector.reduce.fmin.v16f32(<16 x float> %a0) 229 ret float %1 230} 231 232; 233; vXf64 234; 235 236define double @test_v2f64(<2 x double> %a0) { 237; SSE-LABEL: test_v2f64: 238; SSE: # %bb.0: 239; SSE-NEXT: movapd %xmm0, %xmm1 240; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 241; SSE-NEXT: minsd %xmm1, %xmm0 242; SSE-NEXT: retq 243; 244; AVX-LABEL: test_v2f64: 245; AVX: # %bb.0: 246; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 247; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 248; AVX-NEXT: retq 249; 250; AVX512-LABEL: test_v2f64: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 253; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 254; AVX512-NEXT: retq 255 %1 = call nnan double @llvm.vector.reduce.fmin.v2f64(<2 x double> %a0) 256 ret double %1 257} 258 259define double @test_v4f64(<4 x double> %a0) { 260; SSE-LABEL: test_v4f64: 261; SSE: # %bb.0: 262; SSE-NEXT: minpd %xmm1, %xmm0 263; SSE-NEXT: movapd %xmm0, %xmm1 264; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 265; SSE-NEXT: minsd %xmm1, %xmm0 266; SSE-NEXT: retq 267; 268; AVX-LABEL: test_v4f64: 269; AVX: # %bb.0: 270; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 271; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 272; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 273; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 274; AVX-NEXT: vzeroupper 275; AVX-NEXT: retq 276; 277; AVX512-LABEL: test_v4f64: 278; AVX512: # %bb.0: 279; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 280; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 281; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 282; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 283; AVX512-NEXT: vzeroupper 284; AVX512-NEXT: retq 285 %1 = call nnan double @llvm.vector.reduce.fmin.v4f64(<4 x double> %a0) 286 ret double %1 287} 288 289define double @test_v8f64(<8 x double> %a0) { 290; SSE-LABEL: test_v8f64: 291; SSE: # %bb.0: 292; SSE-NEXT: minpd %xmm3, %xmm1 293; SSE-NEXT: minpd %xmm2, %xmm0 294; SSE-NEXT: minpd %xmm1, %xmm0 295; SSE-NEXT: movapd %xmm0, %xmm1 296; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 297; SSE-NEXT: minsd %xmm1, %xmm0 298; SSE-NEXT: retq 299; 300; AVX-LABEL: test_v8f64: 301; AVX: # %bb.0: 302; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 303; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 304; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 305; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 306; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 307; AVX-NEXT: vzeroupper 308; AVX-NEXT: retq 309; 310; AVX512-LABEL: test_v8f64: 311; AVX512: # %bb.0: 312; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 313; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 314; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 315; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 316; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 317; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 318; AVX512-NEXT: vzeroupper 319; AVX512-NEXT: retq 320 %1 = call nnan double @llvm.vector.reduce.fmin.v8f64(<8 x double> %a0) 321 ret double %1 322} 323 324define double @test_v16f64(<16 x double> %a0) { 325; SSE-LABEL: test_v16f64: 326; SSE: # %bb.0: 327; SSE-NEXT: minpd %xmm6, %xmm2 328; SSE-NEXT: minpd %xmm4, %xmm0 329; SSE-NEXT: minpd %xmm2, %xmm0 330; SSE-NEXT: minpd %xmm7, %xmm3 331; SSE-NEXT: minpd %xmm5, %xmm1 332; SSE-NEXT: minpd %xmm3, %xmm1 333; SSE-NEXT: minpd %xmm1, %xmm0 334; SSE-NEXT: movapd %xmm0, %xmm1 335; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 336; SSE-NEXT: minsd %xmm1, %xmm0 337; SSE-NEXT: retq 338; 339; AVX-LABEL: test_v16f64: 340; AVX: # %bb.0: 341; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 342; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 343; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 344; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 345; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 346; AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 347; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 348; AVX-NEXT: vzeroupper 349; AVX-NEXT: retq 350; 351; AVX512-LABEL: test_v16f64: 352; AVX512: # %bb.0: 353; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 354; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 355; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 356; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 357; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 358; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 359; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 360; AVX512-NEXT: vzeroupper 361; AVX512-NEXT: retq 362 %1 = call nnan double @llvm.vector.reduce.fmin.v16f64(<16 x double> %a0) 363 ret double %1 364} 365 366define half @test_v2f16(<2 x half> %a0) nounwind { 367; SSE-LABEL: test_v2f16: 368; SSE: # %bb.0: 369; SSE-NEXT: pushq %rbp 370; SSE-NEXT: pushq %rbx 371; SSE-NEXT: subq $40, %rsp 372; SSE-NEXT: movdqa %xmm0, %xmm1 373; SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 374; SSE-NEXT: psrld $16, %xmm0 375; SSE-NEXT: pextrw $0, %xmm0, %ebx 376; SSE-NEXT: pextrw $0, %xmm1, %ebp 377; SSE-NEXT: callq __extendhfsf2@PLT 378; SSE-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 379; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 380; SSE-NEXT: callq __extendhfsf2@PLT 381; SSE-NEXT: ucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 382; SSE-NEXT: cmovbl %ebp, %ebx 383; SSE-NEXT: pinsrw $0, %ebx, %xmm0 384; SSE-NEXT: addq $40, %rsp 385; SSE-NEXT: popq %rbx 386; SSE-NEXT: popq %rbp 387; SSE-NEXT: retq 388; 389; AVX-LABEL: test_v2f16: 390; AVX: # %bb.0: 391; AVX-NEXT: pushq %rbp 392; AVX-NEXT: pushq %rbx 393; AVX-NEXT: subq $40, %rsp 394; AVX-NEXT: vmovdqa %xmm0, %xmm1 395; AVX-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 396; AVX-NEXT: vpsrld $16, %xmm0, %xmm0 397; AVX-NEXT: vpextrw $0, %xmm0, %ebx 398; AVX-NEXT: vpextrw $0, %xmm1, %ebp 399; AVX-NEXT: callq __extendhfsf2@PLT 400; AVX-NEXT: vmovd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill 401; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 402; AVX-NEXT: callq __extendhfsf2@PLT 403; AVX-NEXT: vucomiss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 404; AVX-NEXT: cmovbl %ebp, %ebx 405; AVX-NEXT: vpinsrw $0, %ebx, %xmm0, %xmm0 406; AVX-NEXT: addq $40, %rsp 407; AVX-NEXT: popq %rbx 408; AVX-NEXT: popq %rbp 409; AVX-NEXT: retq 410; 411; AVX512F-LABEL: test_v2f16: 412; AVX512F: # %bb.0: 413; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 414; AVX512F-NEXT: vpsrld $16, %xmm0, %xmm1 415; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm2 416; AVX512F-NEXT: vcvtph2ps %xmm1, %xmm3 417; AVX512F-NEXT: xorl %eax, %eax 418; AVX512F-NEXT: vucomiss %xmm3, %xmm2 419; AVX512F-NEXT: sbbl %eax, %eax 420; AVX512F-NEXT: kmovd %eax, %k1 421; AVX512F-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1} 422; AVX512F-NEXT: vmovdqa %xmm1, %xmm0 423; AVX512F-NEXT: vzeroupper 424; AVX512F-NEXT: retq 425; 426; AVX512VL-LABEL: test_v2f16: 427; AVX512VL: # %bb.0: 428; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 429; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2 430; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3 431; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 432; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} 433; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 434; AVX512VL-NEXT: vzeroupper 435; AVX512VL-NEXT: retq 436; 437; AVX512FP16-LABEL: test_v2f16: 438; AVX512FP16: # %bb.0: 439; AVX512FP16-NEXT: vpsrld $16, %xmm0, %xmm1 440; AVX512FP16-NEXT: vcmpltph %xmm1, %xmm0, %k1 441; AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} 442; AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 443; AVX512FP16-NEXT: retq 444 %1 = call nnan half @llvm.vector.reduce.fmin.v2f16(<2 x half> %a0) 445 ret half %1 446} 447 448declare float @llvm.vector.reduce.fmin.v1f32(<1 x float>) 449declare float @llvm.vector.reduce.fmin.v2f32(<2 x float>) 450declare float @llvm.vector.reduce.fmin.v3f32(<3 x float>) 451declare float @llvm.vector.reduce.fmin.v4f32(<4 x float>) 452declare float @llvm.vector.reduce.fmin.v8f32(<8 x float>) 453declare float @llvm.vector.reduce.fmin.v16f32(<16 x float>) 454 455declare double @llvm.vector.reduce.fmin.v2f64(<2 x double>) 456declare double @llvm.vector.reduce.fmin.v4f64(<4 x double>) 457declare double @llvm.vector.reduce.fmin.v8f64(<8 x double>) 458declare double @llvm.vector.reduce.fmin.v16f64(<16 x double>) 459 460declare half @llvm.vector.reduce.fmin.v2f16(<2 x half>) 461