1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=X64 4 5define <8 x i16> @test_v8f16_oeq_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 6; X86-LABEL: test_v8f16_oeq_q: 7; X86: # %bb.0: 8; X86-NEXT: pushl %ebp 9; X86-NEXT: movl %esp, %ebp 10; X86-NEXT: andl $-16, %esp 11; X86-NEXT: subl $16, %esp 12; X86-NEXT: vcmpeqph 8(%ebp), %xmm2, %k1 13; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 14; X86-NEXT: movl %ebp, %esp 15; X86-NEXT: popl %ebp 16; X86-NEXT: retl 17; 18; X64-LABEL: test_v8f16_oeq_q: 19; X64: # %bb.0: 20; X64-NEXT: vcmpeqph %xmm3, %xmm2, %k1 21; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 22; X64-NEXT: retq 23 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 24 <8 x half> %f1, <8 x half> %f2, metadata !"oeq", 25 metadata !"fpexcept.strict") #0 26 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 27 ret <8 x i16> %res 28} 29 30define <8 x i16> @test_v8f16_ogt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 31; X86-LABEL: test_v8f16_ogt_q: 32; X86: # %bb.0: 33; X86-NEXT: pushl %ebp 34; X86-NEXT: movl %esp, %ebp 35; X86-NEXT: andl $-16, %esp 36; X86-NEXT: subl $16, %esp 37; X86-NEXT: vcmpgt_oqph 8(%ebp), %xmm2, %k1 38; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 39; X86-NEXT: movl %ebp, %esp 40; X86-NEXT: popl %ebp 41; X86-NEXT: retl 42; 43; X64-LABEL: test_v8f16_ogt_q: 44; X64: # %bb.0: 45; X64-NEXT: vcmplt_oqph %xmm2, %xmm3, %k1 46; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 47; X64-NEXT: retq 48 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 49 <8 x half> %f1, <8 x half> %f2, metadata !"ogt", 50 metadata !"fpexcept.strict") #0 51 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 52 ret <8 x i16> %res 53} 54 55define <8 x i16> @test_v8f16_oge_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 56; X86-LABEL: test_v8f16_oge_q: 57; X86: # %bb.0: 58; X86-NEXT: pushl %ebp 59; X86-NEXT: movl %esp, %ebp 60; X86-NEXT: andl $-16, %esp 61; X86-NEXT: subl $16, %esp 62; X86-NEXT: vcmpge_oqph 8(%ebp), %xmm2, %k1 63; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 64; X86-NEXT: movl %ebp, %esp 65; X86-NEXT: popl %ebp 66; X86-NEXT: retl 67; 68; X64-LABEL: test_v8f16_oge_q: 69; X64: # %bb.0: 70; X64-NEXT: vcmple_oqph %xmm2, %xmm3, %k1 71; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 72; X64-NEXT: retq 73 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 74 <8 x half> %f1, <8 x half> %f2, metadata !"oge", 75 metadata !"fpexcept.strict") #0 76 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 77 ret <8 x i16> %res 78} 79 80define <8 x i16> @test_v8f16_olt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 81; X86-LABEL: test_v8f16_olt_q: 82; X86: # %bb.0: 83; X86-NEXT: pushl %ebp 84; X86-NEXT: movl %esp, %ebp 85; X86-NEXT: andl $-16, %esp 86; X86-NEXT: subl $16, %esp 87; X86-NEXT: vcmplt_oqph 8(%ebp), %xmm2, %k1 88; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 89; X86-NEXT: movl %ebp, %esp 90; X86-NEXT: popl %ebp 91; X86-NEXT: retl 92; 93; X64-LABEL: test_v8f16_olt_q: 94; X64: # %bb.0: 95; X64-NEXT: vcmplt_oqph %xmm3, %xmm2, %k1 96; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 97; X64-NEXT: retq 98 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 99 <8 x half> %f1, <8 x half> %f2, metadata !"olt", 100 metadata !"fpexcept.strict") #0 101 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 102 ret <8 x i16> %res 103} 104 105define <8 x i16> @test_v8f16_ole_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 106; X86-LABEL: test_v8f16_ole_q: 107; X86: # %bb.0: 108; X86-NEXT: pushl %ebp 109; X86-NEXT: movl %esp, %ebp 110; X86-NEXT: andl $-16, %esp 111; X86-NEXT: subl $16, %esp 112; X86-NEXT: vcmple_oqph 8(%ebp), %xmm2, %k1 113; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 114; X86-NEXT: movl %ebp, %esp 115; X86-NEXT: popl %ebp 116; X86-NEXT: retl 117; 118; X64-LABEL: test_v8f16_ole_q: 119; X64: # %bb.0: 120; X64-NEXT: vcmple_oqph %xmm3, %xmm2, %k1 121; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 122; X64-NEXT: retq 123 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 124 <8 x half> %f1, <8 x half> %f2, metadata !"ole", 125 metadata !"fpexcept.strict") #0 126 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 127 ret <8 x i16> %res 128} 129 130define <8 x i16> @test_v8f16_one_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 131; X86-LABEL: test_v8f16_one_q: 132; X86: # %bb.0: 133; X86-NEXT: pushl %ebp 134; X86-NEXT: movl %esp, %ebp 135; X86-NEXT: andl $-16, %esp 136; X86-NEXT: subl $16, %esp 137; X86-NEXT: vcmpneq_oqph 8(%ebp), %xmm2, %k1 138; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 139; X86-NEXT: movl %ebp, %esp 140; X86-NEXT: popl %ebp 141; X86-NEXT: retl 142; 143; X64-LABEL: test_v8f16_one_q: 144; X64: # %bb.0: 145; X64-NEXT: vcmpneq_oqph %xmm3, %xmm2, %k1 146; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 147; X64-NEXT: retq 148 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 149 <8 x half> %f1, <8 x half> %f2, metadata !"one", 150 metadata !"fpexcept.strict") #0 151 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 152 ret <8 x i16> %res 153} 154 155define <8 x i16> @test_v8f16_ord_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 156; X86-LABEL: test_v8f16_ord_q: 157; X86: # %bb.0: 158; X86-NEXT: pushl %ebp 159; X86-NEXT: movl %esp, %ebp 160; X86-NEXT: andl $-16, %esp 161; X86-NEXT: subl $16, %esp 162; X86-NEXT: vcmpordph 8(%ebp), %xmm2, %k1 163; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 164; X86-NEXT: movl %ebp, %esp 165; X86-NEXT: popl %ebp 166; X86-NEXT: retl 167; 168; X64-LABEL: test_v8f16_ord_q: 169; X64: # %bb.0: 170; X64-NEXT: vcmpordph %xmm3, %xmm2, %k1 171; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 172; X64-NEXT: retq 173 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 174 <8 x half> %f1, <8 x half> %f2, metadata !"ord", 175 metadata !"fpexcept.strict") #0 176 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 177 ret <8 x i16> %res 178} 179 180define <8 x i16> @test_v8f16_ueq_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 181; X86-LABEL: test_v8f16_ueq_q: 182; X86: # %bb.0: 183; X86-NEXT: pushl %ebp 184; X86-NEXT: movl %esp, %ebp 185; X86-NEXT: andl $-16, %esp 186; X86-NEXT: subl $16, %esp 187; X86-NEXT: vcmpeq_uqph 8(%ebp), %xmm2, %k1 188; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 189; X86-NEXT: movl %ebp, %esp 190; X86-NEXT: popl %ebp 191; X86-NEXT: retl 192; 193; X64-LABEL: test_v8f16_ueq_q: 194; X64: # %bb.0: 195; X64-NEXT: vcmpeq_uqph %xmm3, %xmm2, %k1 196; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 197; X64-NEXT: retq 198 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 199 <8 x half> %f1, <8 x half> %f2, metadata !"ueq", 200 metadata !"fpexcept.strict") #0 201 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 202 ret <8 x i16> %res 203} 204 205define <8 x i16> @test_v8f16_ugt_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 206; X86-LABEL: test_v8f16_ugt_q: 207; X86: # %bb.0: 208; X86-NEXT: pushl %ebp 209; X86-NEXT: movl %esp, %ebp 210; X86-NEXT: andl $-16, %esp 211; X86-NEXT: subl $16, %esp 212; X86-NEXT: vcmpnle_uqph 8(%ebp), %xmm2, %k1 213; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 214; X86-NEXT: movl %ebp, %esp 215; X86-NEXT: popl %ebp 216; X86-NEXT: retl 217; 218; X64-LABEL: test_v8f16_ugt_q: 219; X64: # %bb.0: 220; X64-NEXT: vcmpnle_uqph %xmm3, %xmm2, %k1 221; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 222; X64-NEXT: retq 223 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 224 <8 x half> %f1, <8 x half> %f2, metadata !"ugt", 225 metadata !"fpexcept.strict") #0 226 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 227 ret <8 x i16> %res 228} 229 230define <8 x i16> @test_v8f16_uge_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 231; X86-LABEL: test_v8f16_uge_q: 232; X86: # %bb.0: 233; X86-NEXT: pushl %ebp 234; X86-NEXT: movl %esp, %ebp 235; X86-NEXT: andl $-16, %esp 236; X86-NEXT: subl $16, %esp 237; X86-NEXT: vcmpnlt_uqph 8(%ebp), %xmm2, %k1 238; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 239; X86-NEXT: movl %ebp, %esp 240; X86-NEXT: popl %ebp 241; X86-NEXT: retl 242; 243; X64-LABEL: test_v8f16_uge_q: 244; X64: # %bb.0: 245; X64-NEXT: vcmpnlt_uqph %xmm3, %xmm2, %k1 246; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 247; X64-NEXT: retq 248 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 249 <8 x half> %f1, <8 x half> %f2, metadata !"uge", 250 metadata !"fpexcept.strict") #0 251 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 252 ret <8 x i16> %res 253} 254 255define <8 x i16> @test_v8f16_ult_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 256; X86-LABEL: test_v8f16_ult_q: 257; X86: # %bb.0: 258; X86-NEXT: pushl %ebp 259; X86-NEXT: movl %esp, %ebp 260; X86-NEXT: andl $-16, %esp 261; X86-NEXT: subl $16, %esp 262; X86-NEXT: vcmpnge_uqph 8(%ebp), %xmm2, %k1 263; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 264; X86-NEXT: movl %ebp, %esp 265; X86-NEXT: popl %ebp 266; X86-NEXT: retl 267; 268; X64-LABEL: test_v8f16_ult_q: 269; X64: # %bb.0: 270; X64-NEXT: vcmpnle_uqph %xmm2, %xmm3, %k1 271; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 272; X64-NEXT: retq 273 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 274 <8 x half> %f1, <8 x half> %f2, metadata !"ult", 275 metadata !"fpexcept.strict") #0 276 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 277 ret <8 x i16> %res 278} 279 280define <8 x i16> @test_v8f16_ule_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 281; X86-LABEL: test_v8f16_ule_q: 282; X86: # %bb.0: 283; X86-NEXT: pushl %ebp 284; X86-NEXT: movl %esp, %ebp 285; X86-NEXT: andl $-16, %esp 286; X86-NEXT: subl $16, %esp 287; X86-NEXT: vcmpngt_uqph 8(%ebp), %xmm2, %k1 288; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 289; X86-NEXT: movl %ebp, %esp 290; X86-NEXT: popl %ebp 291; X86-NEXT: retl 292; 293; X64-LABEL: test_v8f16_ule_q: 294; X64: # %bb.0: 295; X64-NEXT: vcmpnlt_uqph %xmm2, %xmm3, %k1 296; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 297; X64-NEXT: retq 298 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 299 <8 x half> %f1, <8 x half> %f2, metadata !"ule", 300 metadata !"fpexcept.strict") #0 301 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 302 ret <8 x i16> %res 303} 304 305define <8 x i16> @test_v8f16_une_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 306; X86-LABEL: test_v8f16_une_q: 307; X86: # %bb.0: 308; X86-NEXT: pushl %ebp 309; X86-NEXT: movl %esp, %ebp 310; X86-NEXT: andl $-16, %esp 311; X86-NEXT: subl $16, %esp 312; X86-NEXT: vcmpneqph 8(%ebp), %xmm2, %k1 313; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 314; X86-NEXT: movl %ebp, %esp 315; X86-NEXT: popl %ebp 316; X86-NEXT: retl 317; 318; X64-LABEL: test_v8f16_une_q: 319; X64: # %bb.0: 320; X64-NEXT: vcmpneqph %xmm3, %xmm2, %k1 321; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 322; X64-NEXT: retq 323 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 324 <8 x half> %f1, <8 x half> %f2, metadata !"une", 325 metadata !"fpexcept.strict") #0 326 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 327 ret <8 x i16> %res 328} 329 330define <8 x i16> @test_v8f16_uno_q(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 331; X86-LABEL: test_v8f16_uno_q: 332; X86: # %bb.0: 333; X86-NEXT: pushl %ebp 334; X86-NEXT: movl %esp, %ebp 335; X86-NEXT: andl $-16, %esp 336; X86-NEXT: subl $16, %esp 337; X86-NEXT: vcmpunordph 8(%ebp), %xmm2, %k1 338; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 339; X86-NEXT: movl %ebp, %esp 340; X86-NEXT: popl %ebp 341; X86-NEXT: retl 342; 343; X64-LABEL: test_v8f16_uno_q: 344; X64: # %bb.0: 345; X64-NEXT: vcmpunordph %xmm3, %xmm2, %k1 346; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 347; X64-NEXT: retq 348 %cond = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f16( 349 <8 x half> %f1, <8 x half> %f2, metadata !"uno", 350 metadata !"fpexcept.strict") #0 351 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 352 ret <8 x i16> %res 353} 354 355define <8 x i16> @test_v8f16_oeq_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 356; X86-LABEL: test_v8f16_oeq_s: 357; X86: # %bb.0: 358; X86-NEXT: pushl %ebp 359; X86-NEXT: movl %esp, %ebp 360; X86-NEXT: andl $-16, %esp 361; X86-NEXT: subl $16, %esp 362; X86-NEXT: vcmpeq_osph 8(%ebp), %xmm2, %k1 363; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 364; X86-NEXT: movl %ebp, %esp 365; X86-NEXT: popl %ebp 366; X86-NEXT: retl 367; 368; X64-LABEL: test_v8f16_oeq_s: 369; X64: # %bb.0: 370; X64-NEXT: vcmpeq_osph %xmm3, %xmm2, %k1 371; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 372; X64-NEXT: retq 373 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 374 <8 x half> %f1, <8 x half> %f2, metadata !"oeq", 375 metadata !"fpexcept.strict") #0 376 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 377 ret <8 x i16> %res 378} 379 380define <8 x i16> @test_v8f16_ogt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 381; X86-LABEL: test_v8f16_ogt_s: 382; X86: # %bb.0: 383; X86-NEXT: pushl %ebp 384; X86-NEXT: movl %esp, %ebp 385; X86-NEXT: andl $-16, %esp 386; X86-NEXT: subl $16, %esp 387; X86-NEXT: vcmpgtph 8(%ebp), %xmm2, %k1 388; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 389; X86-NEXT: movl %ebp, %esp 390; X86-NEXT: popl %ebp 391; X86-NEXT: retl 392; 393; X64-LABEL: test_v8f16_ogt_s: 394; X64: # %bb.0: 395; X64-NEXT: vcmpltph %xmm2, %xmm3, %k1 396; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 397; X64-NEXT: retq 398 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 399 <8 x half> %f1, <8 x half> %f2, metadata !"ogt", 400 metadata !"fpexcept.strict") #0 401 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 402 ret <8 x i16> %res 403} 404 405define <8 x i16> @test_v8f16_oge_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 406; X86-LABEL: test_v8f16_oge_s: 407; X86: # %bb.0: 408; X86-NEXT: pushl %ebp 409; X86-NEXT: movl %esp, %ebp 410; X86-NEXT: andl $-16, %esp 411; X86-NEXT: subl $16, %esp 412; X86-NEXT: vcmpgeph 8(%ebp), %xmm2, %k1 413; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 414; X86-NEXT: movl %ebp, %esp 415; X86-NEXT: popl %ebp 416; X86-NEXT: retl 417; 418; X64-LABEL: test_v8f16_oge_s: 419; X64: # %bb.0: 420; X64-NEXT: vcmpleph %xmm2, %xmm3, %k1 421; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 422; X64-NEXT: retq 423 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 424 <8 x half> %f1, <8 x half> %f2, metadata !"oge", 425 metadata !"fpexcept.strict") #0 426 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 427 ret <8 x i16> %res 428} 429 430define <8 x i16> @test_v8f16_olt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 431; X86-LABEL: test_v8f16_olt_s: 432; X86: # %bb.0: 433; X86-NEXT: pushl %ebp 434; X86-NEXT: movl %esp, %ebp 435; X86-NEXT: andl $-16, %esp 436; X86-NEXT: subl $16, %esp 437; X86-NEXT: vcmpltph 8(%ebp), %xmm2, %k1 438; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 439; X86-NEXT: movl %ebp, %esp 440; X86-NEXT: popl %ebp 441; X86-NEXT: retl 442; 443; X64-LABEL: test_v8f16_olt_s: 444; X64: # %bb.0: 445; X64-NEXT: vcmpltph %xmm3, %xmm2, %k1 446; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 447; X64-NEXT: retq 448 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 449 <8 x half> %f1, <8 x half> %f2, metadata !"olt", 450 metadata !"fpexcept.strict") #0 451 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 452 ret <8 x i16> %res 453} 454 455define <8 x i16> @test_v8f16_ole_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 456; X86-LABEL: test_v8f16_ole_s: 457; X86: # %bb.0: 458; X86-NEXT: pushl %ebp 459; X86-NEXT: movl %esp, %ebp 460; X86-NEXT: andl $-16, %esp 461; X86-NEXT: subl $16, %esp 462; X86-NEXT: vcmpleph 8(%ebp), %xmm2, %k1 463; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 464; X86-NEXT: movl %ebp, %esp 465; X86-NEXT: popl %ebp 466; X86-NEXT: retl 467; 468; X64-LABEL: test_v8f16_ole_s: 469; X64: # %bb.0: 470; X64-NEXT: vcmpleph %xmm3, %xmm2, %k1 471; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 472; X64-NEXT: retq 473 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 474 <8 x half> %f1, <8 x half> %f2, metadata !"ole", 475 metadata !"fpexcept.strict") #0 476 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 477 ret <8 x i16> %res 478} 479 480define <8 x i16> @test_v8f16_one_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 481; X86-LABEL: test_v8f16_one_s: 482; X86: # %bb.0: 483; X86-NEXT: pushl %ebp 484; X86-NEXT: movl %esp, %ebp 485; X86-NEXT: andl $-16, %esp 486; X86-NEXT: subl $16, %esp 487; X86-NEXT: vcmpneq_osph 8(%ebp), %xmm2, %k1 488; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 489; X86-NEXT: movl %ebp, %esp 490; X86-NEXT: popl %ebp 491; X86-NEXT: retl 492; 493; X64-LABEL: test_v8f16_one_s: 494; X64: # %bb.0: 495; X64-NEXT: vcmpneq_osph %xmm3, %xmm2, %k1 496; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 497; X64-NEXT: retq 498 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 499 <8 x half> %f1, <8 x half> %f2, metadata !"one", 500 metadata !"fpexcept.strict") #0 501 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 502 ret <8 x i16> %res 503} 504 505define <8 x i16> @test_v8f16_ord_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 506; X86-LABEL: test_v8f16_ord_s: 507; X86: # %bb.0: 508; X86-NEXT: pushl %ebp 509; X86-NEXT: movl %esp, %ebp 510; X86-NEXT: andl $-16, %esp 511; X86-NEXT: subl $16, %esp 512; X86-NEXT: vcmpord_sph 8(%ebp), %xmm2, %k1 513; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 514; X86-NEXT: movl %ebp, %esp 515; X86-NEXT: popl %ebp 516; X86-NEXT: retl 517; 518; X64-LABEL: test_v8f16_ord_s: 519; X64: # %bb.0: 520; X64-NEXT: vcmpord_sph %xmm3, %xmm2, %k1 521; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 522; X64-NEXT: retq 523 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 524 <8 x half> %f1, <8 x half> %f2, metadata !"ord", 525 metadata !"fpexcept.strict") #0 526 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 527 ret <8 x i16> %res 528} 529 530define <8 x i16> @test_v8f16_ueq_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 531; X86-LABEL: test_v8f16_ueq_s: 532; X86: # %bb.0: 533; X86-NEXT: pushl %ebp 534; X86-NEXT: movl %esp, %ebp 535; X86-NEXT: andl $-16, %esp 536; X86-NEXT: subl $16, %esp 537; X86-NEXT: vcmpeq_usph 8(%ebp), %xmm2, %k1 538; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 539; X86-NEXT: movl %ebp, %esp 540; X86-NEXT: popl %ebp 541; X86-NEXT: retl 542; 543; X64-LABEL: test_v8f16_ueq_s: 544; X64: # %bb.0: 545; X64-NEXT: vcmpeq_usph %xmm3, %xmm2, %k1 546; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 547; X64-NEXT: retq 548 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 549 <8 x half> %f1, <8 x half> %f2, metadata !"ueq", 550 metadata !"fpexcept.strict") #0 551 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 552 ret <8 x i16> %res 553} 554 555define <8 x i16> @test_v8f16_ugt_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 556; X86-LABEL: test_v8f16_ugt_s: 557; X86: # %bb.0: 558; X86-NEXT: pushl %ebp 559; X86-NEXT: movl %esp, %ebp 560; X86-NEXT: andl $-16, %esp 561; X86-NEXT: subl $16, %esp 562; X86-NEXT: vcmpnleph 8(%ebp), %xmm2, %k1 563; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 564; X86-NEXT: movl %ebp, %esp 565; X86-NEXT: popl %ebp 566; X86-NEXT: retl 567; 568; X64-LABEL: test_v8f16_ugt_s: 569; X64: # %bb.0: 570; X64-NEXT: vcmpnleph %xmm3, %xmm2, %k1 571; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 572; X64-NEXT: retq 573 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 574 <8 x half> %f1, <8 x half> %f2, metadata !"ugt", 575 metadata !"fpexcept.strict") #0 576 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 577 ret <8 x i16> %res 578} 579 580define <8 x i16> @test_v8f16_uge_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 581; X86-LABEL: test_v8f16_uge_s: 582; X86: # %bb.0: 583; X86-NEXT: pushl %ebp 584; X86-NEXT: movl %esp, %ebp 585; X86-NEXT: andl $-16, %esp 586; X86-NEXT: subl $16, %esp 587; X86-NEXT: vcmpnltph 8(%ebp), %xmm2, %k1 588; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 589; X86-NEXT: movl %ebp, %esp 590; X86-NEXT: popl %ebp 591; X86-NEXT: retl 592; 593; X64-LABEL: test_v8f16_uge_s: 594; X64: # %bb.0: 595; X64-NEXT: vcmpnltph %xmm3, %xmm2, %k1 596; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 597; X64-NEXT: retq 598 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 599 <8 x half> %f1, <8 x half> %f2, metadata !"uge", 600 metadata !"fpexcept.strict") #0 601 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 602 ret <8 x i16> %res 603} 604 605define <8 x i16> @test_v8f16_ult_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 606; X86-LABEL: test_v8f16_ult_s: 607; X86: # %bb.0: 608; X86-NEXT: pushl %ebp 609; X86-NEXT: movl %esp, %ebp 610; X86-NEXT: andl $-16, %esp 611; X86-NEXT: subl $16, %esp 612; X86-NEXT: vcmpngeph 8(%ebp), %xmm2, %k1 613; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 614; X86-NEXT: movl %ebp, %esp 615; X86-NEXT: popl %ebp 616; X86-NEXT: retl 617; 618; X64-LABEL: test_v8f16_ult_s: 619; X64: # %bb.0: 620; X64-NEXT: vcmpnleph %xmm2, %xmm3, %k1 621; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 622; X64-NEXT: retq 623 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 624 <8 x half> %f1, <8 x half> %f2, metadata !"ult", 625 metadata !"fpexcept.strict") #0 626 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 627 ret <8 x i16> %res 628} 629 630define <8 x i16> @test_v8f16_ule_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 631; X86-LABEL: test_v8f16_ule_s: 632; X86: # %bb.0: 633; X86-NEXT: pushl %ebp 634; X86-NEXT: movl %esp, %ebp 635; X86-NEXT: andl $-16, %esp 636; X86-NEXT: subl $16, %esp 637; X86-NEXT: vcmpngtph 8(%ebp), %xmm2, %k1 638; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 639; X86-NEXT: movl %ebp, %esp 640; X86-NEXT: popl %ebp 641; X86-NEXT: retl 642; 643; X64-LABEL: test_v8f16_ule_s: 644; X64: # %bb.0: 645; X64-NEXT: vcmpnltph %xmm2, %xmm3, %k1 646; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 647; X64-NEXT: retq 648 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 649 <8 x half> %f1, <8 x half> %f2, metadata !"ule", 650 metadata !"fpexcept.strict") #0 651 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 652 ret <8 x i16> %res 653} 654 655define <8 x i16> @test_v8f16_une_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 656; X86-LABEL: test_v8f16_une_s: 657; X86: # %bb.0: 658; X86-NEXT: pushl %ebp 659; X86-NEXT: movl %esp, %ebp 660; X86-NEXT: andl $-16, %esp 661; X86-NEXT: subl $16, %esp 662; X86-NEXT: vcmpneq_usph 8(%ebp), %xmm2, %k1 663; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 664; X86-NEXT: movl %ebp, %esp 665; X86-NEXT: popl %ebp 666; X86-NEXT: retl 667; 668; X64-LABEL: test_v8f16_une_s: 669; X64: # %bb.0: 670; X64-NEXT: vcmpneq_usph %xmm3, %xmm2, %k1 671; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 672; X64-NEXT: retq 673 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 674 <8 x half> %f1, <8 x half> %f2, metadata !"une", 675 metadata !"fpexcept.strict") #0 676 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 677 ret <8 x i16> %res 678} 679 680define <8 x i16> @test_v8f16_uno_s(<8 x i16> %a, <8 x i16> %b, <8 x half> %f1, <8 x half> %f2) #0 { 681; X86-LABEL: test_v8f16_uno_s: 682; X86: # %bb.0: 683; X86-NEXT: pushl %ebp 684; X86-NEXT: movl %esp, %ebp 685; X86-NEXT: andl $-16, %esp 686; X86-NEXT: subl $16, %esp 687; X86-NEXT: vcmpunord_sph 8(%ebp), %xmm2, %k1 688; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 689; X86-NEXT: movl %ebp, %esp 690; X86-NEXT: popl %ebp 691; X86-NEXT: retl 692; 693; X64-LABEL: test_v8f16_uno_s: 694; X64: # %bb.0: 695; X64-NEXT: vcmpunord_sph %xmm3, %xmm2, %k1 696; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 697; X64-NEXT: retq 698 %cond = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f16( 699 <8 x half> %f1, <8 x half> %f2, metadata !"uno", 700 metadata !"fpexcept.strict") #0 701 %res = select <8 x i1> %cond, <8 x i16> %a, <8 x i16> %b 702 ret <8 x i16> %res 703} 704 705define <2 x i16> @test_v2f16_oeq_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <2 x half> %f2) #0 { 706; X86-LABEL: test_v2f16_oeq_q: 707; X86: # %bb.0: 708; X86-NEXT: pushl %ebp 709; X86-NEXT: movl %esp, %ebp 710; X86-NEXT: andl $-16, %esp 711; X86-NEXT: subl $16, %esp 712; X86-NEXT: vucomish 8(%ebp), %xmm2 713; X86-NEXT: setnp %al 714; X86-NEXT: sete %cl 715; X86-NEXT: testb %al, %cl 716; X86-NEXT: setne %al 717; X86-NEXT: kmovd %eax, %k0 718; X86-NEXT: kshiftlb $7, %k0, %k0 719; X86-NEXT: kshiftrb $7, %k0, %k0 720; X86-NEXT: vpsrld $16, %xmm2, %xmm2 721; X86-NEXT: vucomish 10(%ebp), %xmm2 722; X86-NEXT: setnp %al 723; X86-NEXT: sete %cl 724; X86-NEXT: testb %al, %cl 725; X86-NEXT: setne %al 726; X86-NEXT: kmovd %eax, %k1 727; X86-NEXT: kshiftlb $7, %k1, %k1 728; X86-NEXT: kshiftrb $6, %k1, %k1 729; X86-NEXT: korb %k1, %k0, %k1 730; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 731; X86-NEXT: movl %ebp, %esp 732; X86-NEXT: popl %ebp 733; X86-NEXT: retl 734; 735; X64-LABEL: test_v2f16_oeq_q: 736; X64: # %bb.0: 737; X64-NEXT: vucomish %xmm3, %xmm2 738; X64-NEXT: setnp %al 739; X64-NEXT: sete %cl 740; X64-NEXT: testb %al, %cl 741; X64-NEXT: setne %al 742; X64-NEXT: kmovd %eax, %k0 743; X64-NEXT: kshiftlb $7, %k0, %k0 744; X64-NEXT: kshiftrb $7, %k0, %k0 745; X64-NEXT: vpsrld $16, %xmm3, %xmm3 746; X64-NEXT: vpsrld $16, %xmm2, %xmm2 747; X64-NEXT: vucomish %xmm3, %xmm2 748; X64-NEXT: setnp %al 749; X64-NEXT: sete %cl 750; X64-NEXT: testb %al, %cl 751; X64-NEXT: setne %al 752; X64-NEXT: kmovd %eax, %k1 753; X64-NEXT: kshiftlb $7, %k1, %k1 754; X64-NEXT: kshiftrb $6, %k1, %k1 755; X64-NEXT: korb %k1, %k0, %k1 756; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 757; X64-NEXT: retq 758 %cond = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f16( 759 <2 x half> %f1, <2 x half> %f2, metadata !"oeq", 760 metadata !"fpexcept.strict") #0 761 %res = select <2 x i1> %cond, <2 x i16> %a, <2 x i16> %b 762 ret <2 x i16> %res 763} 764 765define <2 x i16> @test_v2f16_ogt_q(<2 x i16> %a, <2 x i16> %b, <2 x half> %f1, <2 x half> %f2) #0 { 766; X86-LABEL: test_v2f16_ogt_q: 767; X86: # %bb.0: 768; X86-NEXT: pushl %ebp 769; X86-NEXT: movl %esp, %ebp 770; X86-NEXT: andl $-16, %esp 771; X86-NEXT: subl $16, %esp 772; X86-NEXT: vcomish 8(%ebp), %xmm2 773; X86-NEXT: seta %al 774; X86-NEXT: kmovd %eax, %k0 775; X86-NEXT: kshiftlb $7, %k0, %k0 776; X86-NEXT: kshiftrb $7, %k0, %k0 777; X86-NEXT: vpsrld $16, %xmm2, %xmm2 778; X86-NEXT: vcomish 10(%ebp), %xmm2 779; X86-NEXT: seta %al 780; X86-NEXT: kmovd %eax, %k1 781; X86-NEXT: kshiftlb $7, %k1, %k1 782; X86-NEXT: kshiftrb $6, %k1, %k1 783; X86-NEXT: korb %k1, %k0, %k1 784; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 785; X86-NEXT: movl %ebp, %esp 786; X86-NEXT: popl %ebp 787; X86-NEXT: retl 788; 789; X64-LABEL: test_v2f16_ogt_q: 790; X64: # %bb.0: 791; X64-NEXT: vcomish %xmm3, %xmm2 792; X64-NEXT: seta %al 793; X64-NEXT: kmovd %eax, %k0 794; X64-NEXT: kshiftlb $7, %k0, %k0 795; X64-NEXT: kshiftrb $7, %k0, %k0 796; X64-NEXT: vpsrld $16, %xmm3, %xmm3 797; X64-NEXT: vpsrld $16, %xmm2, %xmm2 798; X64-NEXT: vcomish %xmm3, %xmm2 799; X64-NEXT: seta %al 800; X64-NEXT: kmovd %eax, %k1 801; X64-NEXT: kshiftlb $7, %k1, %k1 802; X64-NEXT: kshiftrb $6, %k1, %k1 803; X64-NEXT: korb %k1, %k0, %k1 804; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 805; X64-NEXT: retq 806 %cond = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f16( 807 <2 x half> %f1, <2 x half> %f2, metadata !"ogt", 808 metadata !"fpexcept.strict") #0 809 %res = select <2 x i1> %cond, <2 x i16> %a, <2 x i16> %b 810 ret <2 x i16> %res 811} 812 813define <4 x i16> @test_v4f16_oge_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <4 x half> %f2) #0 { 814; X86-LABEL: test_v4f16_oge_q: 815; X86: # %bb.0: 816; X86-NEXT: pushl %ebp 817; X86-NEXT: movl %esp, %ebp 818; X86-NEXT: andl $-16, %esp 819; X86-NEXT: subl $16, %esp 820; X86-NEXT: vucomish 8(%ebp), %xmm2 821; X86-NEXT: setae %al 822; X86-NEXT: kmovd %eax, %k0 823; X86-NEXT: kshiftlb $7, %k0, %k0 824; X86-NEXT: kshiftrb $7, %k0, %k0 825; X86-NEXT: vpsrld $16, %xmm2, %xmm3 826; X86-NEXT: vucomish 10(%ebp), %xmm3 827; X86-NEXT: setae %al 828; X86-NEXT: kmovd %eax, %k1 829; X86-NEXT: kshiftlb $7, %k1, %k1 830; X86-NEXT: kshiftrb $6, %k1, %k1 831; X86-NEXT: korb %k1, %k0, %k0 832; X86-NEXT: movb $-5, %al 833; X86-NEXT: kmovd %eax, %k1 834; X86-NEXT: kandb %k1, %k0, %k0 835; X86-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 836; X86-NEXT: vucomish 12(%ebp), %xmm3 837; X86-NEXT: setae %al 838; X86-NEXT: kmovd %eax, %k1 839; X86-NEXT: kshiftlb $7, %k1, %k1 840; X86-NEXT: kshiftrb $5, %k1, %k1 841; X86-NEXT: korb %k1, %k0, %k0 842; X86-NEXT: movb $-9, %al 843; X86-NEXT: kmovd %eax, %k1 844; X86-NEXT: kandb %k1, %k0, %k0 845; X86-NEXT: vpsrlq $48, %xmm2, %xmm2 846; X86-NEXT: vucomish 14(%ebp), %xmm2 847; X86-NEXT: setae %al 848; X86-NEXT: kmovd %eax, %k1 849; X86-NEXT: kshiftlb $7, %k1, %k1 850; X86-NEXT: kshiftrb $4, %k1, %k1 851; X86-NEXT: korb %k1, %k0, %k1 852; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 853; X86-NEXT: movl %ebp, %esp 854; X86-NEXT: popl %ebp 855; X86-NEXT: retl 856; 857; X64-LABEL: test_v4f16_oge_q: 858; X64: # %bb.0: 859; X64-NEXT: vucomish %xmm3, %xmm2 860; X64-NEXT: setae %al 861; X64-NEXT: kmovd %eax, %k0 862; X64-NEXT: kshiftlb $7, %k0, %k0 863; X64-NEXT: kshiftrb $7, %k0, %k0 864; X64-NEXT: vpsrld $16, %xmm3, %xmm4 865; X64-NEXT: vpsrld $16, %xmm2, %xmm5 866; X64-NEXT: vucomish %xmm4, %xmm5 867; X64-NEXT: setae %al 868; X64-NEXT: kmovd %eax, %k1 869; X64-NEXT: kshiftlb $7, %k1, %k1 870; X64-NEXT: kshiftrb $6, %k1, %k1 871; X64-NEXT: korb %k1, %k0, %k0 872; X64-NEXT: movb $-5, %al 873; X64-NEXT: kmovd %eax, %k1 874; X64-NEXT: kandb %k1, %k0, %k0 875; X64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3] 876; X64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] 877; X64-NEXT: vucomish %xmm4, %xmm5 878; X64-NEXT: setae %al 879; X64-NEXT: kmovd %eax, %k1 880; X64-NEXT: kshiftlb $7, %k1, %k1 881; X64-NEXT: kshiftrb $5, %k1, %k1 882; X64-NEXT: korb %k1, %k0, %k0 883; X64-NEXT: movb $-9, %al 884; X64-NEXT: kmovd %eax, %k1 885; X64-NEXT: kandb %k1, %k0, %k0 886; X64-NEXT: vpsrlq $48, %xmm3, %xmm3 887; X64-NEXT: vpsrlq $48, %xmm2, %xmm2 888; X64-NEXT: vucomish %xmm3, %xmm2 889; X64-NEXT: setae %al 890; X64-NEXT: kmovd %eax, %k1 891; X64-NEXT: kshiftlb $7, %k1, %k1 892; X64-NEXT: kshiftrb $4, %k1, %k1 893; X64-NEXT: korb %k1, %k0, %k1 894; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 895; X64-NEXT: retq 896 %cond = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f16( 897 <4 x half> %f1, <4 x half> %f2, metadata !"oge", 898 metadata !"fpexcept.strict") #0 899 %res = select <4 x i1> %cond, <4 x i16> %a, <4 x i16> %b 900 ret <4 x i16> %res 901} 902 903define <4 x i16> @test_v4f16_olt_q(<4 x i16> %a, <4 x i16> %b, <4 x half> %f1, <4 x half> %f2) #0 { 904; X86-LABEL: test_v4f16_olt_q: 905; X86: # %bb.0: 906; X86-NEXT: pushl %ebp 907; X86-NEXT: movl %esp, %ebp 908; X86-NEXT: andl $-16, %esp 909; X86-NEXT: subl $16, %esp 910; X86-NEXT: vmovsh 8(%ebp), %xmm3 911; X86-NEXT: vcomish %xmm2, %xmm3 912; X86-NEXT: seta %al 913; X86-NEXT: kmovd %eax, %k0 914; X86-NEXT: kshiftlb $7, %k0, %k0 915; X86-NEXT: kshiftrb $7, %k0, %k0 916; X86-NEXT: vpsrld $16, %xmm2, %xmm3 917; X86-NEXT: vmovsh 10(%ebp), %xmm4 918; X86-NEXT: vcomish %xmm3, %xmm4 919; X86-NEXT: seta %al 920; X86-NEXT: kmovd %eax, %k1 921; X86-NEXT: kshiftlb $7, %k1, %k1 922; X86-NEXT: kshiftrb $6, %k1, %k1 923; X86-NEXT: korb %k1, %k0, %k0 924; X86-NEXT: movb $-5, %al 925; X86-NEXT: kmovd %eax, %k1 926; X86-NEXT: kandb %k1, %k0, %k0 927; X86-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 928; X86-NEXT: vmovsh 12(%ebp), %xmm4 929; X86-NEXT: vcomish %xmm3, %xmm4 930; X86-NEXT: seta %al 931; X86-NEXT: kmovd %eax, %k1 932; X86-NEXT: kshiftlb $7, %k1, %k1 933; X86-NEXT: kshiftrb $5, %k1, %k1 934; X86-NEXT: korb %k1, %k0, %k0 935; X86-NEXT: movb $-9, %al 936; X86-NEXT: kmovd %eax, %k1 937; X86-NEXT: kandb %k1, %k0, %k0 938; X86-NEXT: vpsrlq $48, %xmm2, %xmm2 939; X86-NEXT: vmovsh 14(%ebp), %xmm3 940; X86-NEXT: vcomish %xmm2, %xmm3 941; X86-NEXT: seta %al 942; X86-NEXT: kmovd %eax, %k1 943; X86-NEXT: kshiftlb $7, %k1, %k1 944; X86-NEXT: kshiftrb $4, %k1, %k1 945; X86-NEXT: korb %k1, %k0, %k1 946; X86-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 947; X86-NEXT: movl %ebp, %esp 948; X86-NEXT: popl %ebp 949; X86-NEXT: retl 950; 951; X64-LABEL: test_v4f16_olt_q: 952; X64: # %bb.0: 953; X64-NEXT: vcomish %xmm2, %xmm3 954; X64-NEXT: seta %al 955; X64-NEXT: kmovd %eax, %k0 956; X64-NEXT: kshiftlb $7, %k0, %k0 957; X64-NEXT: kshiftrb $7, %k0, %k0 958; X64-NEXT: vpsrld $16, %xmm2, %xmm4 959; X64-NEXT: vpsrld $16, %xmm3, %xmm5 960; X64-NEXT: vcomish %xmm4, %xmm5 961; X64-NEXT: seta %al 962; X64-NEXT: kmovd %eax, %k1 963; X64-NEXT: kshiftlb $7, %k1, %k1 964; X64-NEXT: kshiftrb $6, %k1, %k1 965; X64-NEXT: korb %k1, %k0, %k0 966; X64-NEXT: movb $-5, %al 967; X64-NEXT: kmovd %eax, %k1 968; X64-NEXT: kandb %k1, %k0, %k0 969; X64-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] 970; X64-NEXT: vmovshdup {{.*#+}} xmm5 = xmm3[1,1,3,3] 971; X64-NEXT: vcomish %xmm4, %xmm5 972; X64-NEXT: seta %al 973; X64-NEXT: kmovd %eax, %k1 974; X64-NEXT: kshiftlb $7, %k1, %k1 975; X64-NEXT: kshiftrb $5, %k1, %k1 976; X64-NEXT: korb %k1, %k0, %k0 977; X64-NEXT: movb $-9, %al 978; X64-NEXT: kmovd %eax, %k1 979; X64-NEXT: kandb %k1, %k0, %k0 980; X64-NEXT: vpsrlq $48, %xmm2, %xmm2 981; X64-NEXT: vpsrlq $48, %xmm3, %xmm3 982; X64-NEXT: vcomish %xmm2, %xmm3 983; X64-NEXT: seta %al 984; X64-NEXT: kmovd %eax, %k1 985; X64-NEXT: kshiftlb $7, %k1, %k1 986; X64-NEXT: kshiftrb $4, %k1, %k1 987; X64-NEXT: korb %k1, %k0, %k1 988; X64-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1} 989; X64-NEXT: retq 990 %cond = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f16( 991 <4 x half> %f1, <4 x half> %f2, metadata !"olt", 992 metadata !"fpexcept.strict") #0 993 %res = select <4 x i1> %cond, <4 x i16> %a, <4 x i16> %b 994 ret <4 x i16> %res 995} 996 997attributes #0 = { strictfp nounwind } 998 999declare <2 x i1> @llvm.experimental.constrained.fcmp.v2f16(<2 x half>, <2 x half>, metadata, metadata) 1000declare <2 x i1> @llvm.experimental.constrained.fcmps.v2f16(<2 x half>, <2 x half>, metadata, metadata) 1001declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f16(<4 x half>, <4 x half>, metadata, metadata) 1002declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f16(<4 x half>, <4 x half>, metadata, metadata) 1003declare <8 x i1> @llvm.experimental.constrained.fcmp.v8f16(<8 x half>, <8 x half>, metadata, metadata) 1004declare <8 x i1> @llvm.experimental.constrained.fcmps.v8f16(<8 x half>, <8 x half>, metadata, metadata) 1005