1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-32 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefix=SSE-64 4; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-32 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 | FileCheck %s --check-prefixes=AVX,AVX-64 6; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-32 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=AVX512F,AVX512F-64 8; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-32 9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VL,AVX512VL-64 10; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-32 11; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq -O3 | FileCheck %s --check-prefixes=AVX512DQ,AVX512DQ-64 12; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-32 13; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=avx512dq,avx512vl -O3 | FileCheck %s --check-prefixes=AVX512VLDQ,AVX512VLDQ-64 14 15declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) 16declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) 17declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata) 18declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata) 19declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) 20declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) 21declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata) 22declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata) 23declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double>, metadata) 24declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double>, metadata) 25declare <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float>, metadata) 26declare <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float>, metadata) 27declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double>, metadata) 28declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double>, metadata) 29declare <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float>, metadata) 30declare <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float>, metadata) 31declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double>, metadata) 32declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double>, metadata) 33declare <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float>, metadata) 34declare <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float>, metadata) 35declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) 36declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) 37declare <4 x i16> @llvm.experimental.constrained.fptosi.v4i16.v4f32(<4 x float>, metadata) 38declare <4 x i16> @llvm.experimental.constrained.fptoui.v4i16.v4f32(<4 x float>, metadata) 39declare <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float>, metadata) 40declare <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float>, metadata) 41declare <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float>, metadata) 42declare <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float>, metadata) 43 44define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { 45; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 46; SSE-32: # %bb.0: 47; SSE-32-NEXT: pushl %ebp 48; SSE-32-NEXT: .cfi_def_cfa_offset 8 49; SSE-32-NEXT: .cfi_offset %ebp, -8 50; SSE-32-NEXT: movl %esp, %ebp 51; SSE-32-NEXT: .cfi_def_cfa_register %ebp 52; SSE-32-NEXT: andl $-8, %esp 53; SSE-32-NEXT: subl $24, %esp 54; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) 55; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 56; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 57; SSE-32-NEXT: wait 58; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 59; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 60; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 61; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 62; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 63; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 64; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 65; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 66; SSE-32-NEXT: wait 67; SSE-32-NEXT: fnstcw (%esp) 68; SSE-32-NEXT: movzwl (%esp), %eax 69; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 70; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 71; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 72; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 73; SSE-32-NEXT: fldcw (%esp) 74; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 75; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 76; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 77; SSE-32-NEXT: movl %ebp, %esp 78; SSE-32-NEXT: popl %ebp 79; SSE-32-NEXT: .cfi_def_cfa %esp, 4 80; SSE-32-NEXT: retl 81; 82; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 83; SSE-64: # %bb.0: 84; SSE-64-NEXT: cvttsd2si %xmm0, %rax 85; SSE-64-NEXT: movq %rax, %xmm1 86; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 87; SSE-64-NEXT: cvttsd2si %xmm0, %rax 88; SSE-64-NEXT: movq %rax, %xmm0 89; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 90; SSE-64-NEXT: movdqa %xmm1, %xmm0 91; SSE-64-NEXT: retq 92; 93; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 94; AVX-32: # %bb.0: 95; AVX-32-NEXT: pushl %ebp 96; AVX-32-NEXT: .cfi_def_cfa_offset 8 97; AVX-32-NEXT: .cfi_offset %ebp, -8 98; AVX-32-NEXT: movl %esp, %ebp 99; AVX-32-NEXT: .cfi_def_cfa_register %ebp 100; AVX-32-NEXT: andl $-8, %esp 101; AVX-32-NEXT: subl $16, %esp 102; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 103; AVX-32-NEXT: vmovhps %xmm0, (%esp) 104; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 105; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 106; AVX-32-NEXT: fldl (%esp) 107; AVX-32-NEXT: fisttpll (%esp) 108; AVX-32-NEXT: wait 109; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 110; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 111; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 112; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 113; AVX-32-NEXT: movl %ebp, %esp 114; AVX-32-NEXT: popl %ebp 115; AVX-32-NEXT: .cfi_def_cfa %esp, 4 116; AVX-32-NEXT: retl 117; 118; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 119; AVX-64: # %bb.0: 120; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 121; AVX-64-NEXT: vmovq %rax, %xmm1 122; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 123; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 124; AVX-64-NEXT: vmovq %rax, %xmm0 125; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 126; AVX-64-NEXT: retq 127; 128; AVX512F-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 129; AVX512F-32: # %bb.0: 130; AVX512F-32-NEXT: pushl %ebp 131; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 132; AVX512F-32-NEXT: .cfi_offset %ebp, -8 133; AVX512F-32-NEXT: movl %esp, %ebp 134; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 135; AVX512F-32-NEXT: andl $-8, %esp 136; AVX512F-32-NEXT: subl $16, %esp 137; AVX512F-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 138; AVX512F-32-NEXT: vmovhps %xmm0, (%esp) 139; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 140; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 141; AVX512F-32-NEXT: fldl (%esp) 142; AVX512F-32-NEXT: fisttpll (%esp) 143; AVX512F-32-NEXT: wait 144; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 145; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 146; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 147; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 148; AVX512F-32-NEXT: movl %ebp, %esp 149; AVX512F-32-NEXT: popl %ebp 150; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 151; AVX512F-32-NEXT: retl 152; 153; AVX512F-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 154; AVX512F-64: # %bb.0: 155; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax 156; AVX512F-64-NEXT: vmovq %rax, %xmm1 157; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 158; AVX512F-64-NEXT: vcvttsd2si %xmm0, %rax 159; AVX512F-64-NEXT: vmovq %rax, %xmm0 160; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 161; AVX512F-64-NEXT: retq 162; 163; AVX512VL-32-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 164; AVX512VL-32: # %bb.0: 165; AVX512VL-32-NEXT: pushl %ebp 166; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 167; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 168; AVX512VL-32-NEXT: movl %esp, %ebp 169; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 170; AVX512VL-32-NEXT: andl $-8, %esp 171; AVX512VL-32-NEXT: subl $16, %esp 172; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 173; AVX512VL-32-NEXT: vmovhps %xmm0, (%esp) 174; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 175; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 176; AVX512VL-32-NEXT: fldl (%esp) 177; AVX512VL-32-NEXT: fisttpll (%esp) 178; AVX512VL-32-NEXT: wait 179; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 180; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 181; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 182; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 183; AVX512VL-32-NEXT: movl %ebp, %esp 184; AVX512VL-32-NEXT: popl %ebp 185; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 186; AVX512VL-32-NEXT: retl 187; 188; AVX512VL-64-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 189; AVX512VL-64: # %bb.0: 190; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 191; AVX512VL-64-NEXT: vmovq %rax, %xmm1 192; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 193; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 194; AVX512VL-64-NEXT: vmovq %rax, %xmm0 195; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 196; AVX512VL-64-NEXT: retq 197; 198; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 199; AVX512DQ: # %bb.0: 200; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 201; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 202; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 203; AVX512DQ-NEXT: vzeroupper 204; AVX512DQ-NEXT: ret{{[l|q]}} 205; 206; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i64: 207; AVX512VLDQ: # %bb.0: 208; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 209; AVX512VLDQ-NEXT: ret{{[l|q]}} 210 %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> %a, 211 metadata !"fpexcept.strict") #0 212 ret <2 x i64> %ret 213} 214 215define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { 216; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 217; SSE-32: # %bb.0: 218; SSE-32-NEXT: pushl %ebp 219; SSE-32-NEXT: .cfi_def_cfa_offset 8 220; SSE-32-NEXT: .cfi_offset %ebp, -8 221; SSE-32-NEXT: movl %esp, %ebp 222; SSE-32-NEXT: .cfi_def_cfa_register %ebp 223; SSE-32-NEXT: andl $-8, %esp 224; SSE-32-NEXT: subl $24, %esp 225; SSE-32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 226; SSE-32-NEXT: comisd %xmm1, %xmm0 227; SSE-32-NEXT: movapd %xmm1, %xmm2 228; SSE-32-NEXT: jae .LBB1_2 229; SSE-32-NEXT: # %bb.1: 230; SSE-32-NEXT: xorpd %xmm2, %xmm2 231; SSE-32-NEXT: .LBB1_2: 232; SSE-32-NEXT: movapd %xmm0, %xmm3 233; SSE-32-NEXT: subsd %xmm2, %xmm3 234; SSE-32-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) 235; SSE-32-NEXT: setae %al 236; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 237; SSE-32-NEXT: wait 238; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 239; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 240; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 241; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp) 242; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 243; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 244; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 245; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 246; SSE-32-NEXT: comisd %xmm1, %xmm0 247; SSE-32-NEXT: jae .LBB1_4 248; SSE-32-NEXT: # %bb.3: 249; SSE-32-NEXT: xorpd %xmm1, %xmm1 250; SSE-32-NEXT: .LBB1_4: 251; SSE-32-NEXT: subsd %xmm1, %xmm0 252; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 253; SSE-32-NEXT: setae %cl 254; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 255; SSE-32-NEXT: wait 256; SSE-32-NEXT: fnstcw (%esp) 257; SSE-32-NEXT: movzwl (%esp), %edx 258; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 259; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp) 260; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 261; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 262; SSE-32-NEXT: fldcw (%esp) 263; SSE-32-NEXT: movzbl %al, %eax 264; SSE-32-NEXT: shll $31, %eax 265; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 266; SSE-32-NEXT: movd %eax, %xmm1 267; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 268; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 269; SSE-32-NEXT: movzbl %cl, %eax 270; SSE-32-NEXT: shll $31, %eax 271; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 272; SSE-32-NEXT: movd %eax, %xmm1 273; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 274; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 275; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 276; SSE-32-NEXT: movl %ebp, %esp 277; SSE-32-NEXT: popl %ebp 278; SSE-32-NEXT: .cfi_def_cfa %esp, 4 279; SSE-32-NEXT: retl 280; 281; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 282; SSE-64: # %bb.0: 283; SSE-64-NEXT: movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0] 284; SSE-64-NEXT: comisd %xmm3, %xmm0 285; SSE-64-NEXT: xorpd %xmm2, %xmm2 286; SSE-64-NEXT: xorpd %xmm1, %xmm1 287; SSE-64-NEXT: jb .LBB1_2 288; SSE-64-NEXT: # %bb.1: 289; SSE-64-NEXT: movapd %xmm3, %xmm1 290; SSE-64-NEXT: .LBB1_2: 291; SSE-64-NEXT: movapd %xmm0, %xmm4 292; SSE-64-NEXT: subsd %xmm1, %xmm4 293; SSE-64-NEXT: cvttsd2si %xmm4, %rax 294; SSE-64-NEXT: setae %cl 295; SSE-64-NEXT: movzbl %cl, %ecx 296; SSE-64-NEXT: shlq $63, %rcx 297; SSE-64-NEXT: xorq %rax, %rcx 298; SSE-64-NEXT: movq %rcx, %xmm1 299; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 300; SSE-64-NEXT: comisd %xmm3, %xmm0 301; SSE-64-NEXT: jb .LBB1_4 302; SSE-64-NEXT: # %bb.3: 303; SSE-64-NEXT: movapd %xmm3, %xmm2 304; SSE-64-NEXT: .LBB1_4: 305; SSE-64-NEXT: subsd %xmm2, %xmm0 306; SSE-64-NEXT: cvttsd2si %xmm0, %rax 307; SSE-64-NEXT: setae %cl 308; SSE-64-NEXT: movzbl %cl, %ecx 309; SSE-64-NEXT: shlq $63, %rcx 310; SSE-64-NEXT: xorq %rax, %rcx 311; SSE-64-NEXT: movq %rcx, %xmm0 312; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 313; SSE-64-NEXT: movdqa %xmm1, %xmm0 314; SSE-64-NEXT: retq 315; 316; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 317; AVX-32: # %bb.0: 318; AVX-32-NEXT: pushl %ebp 319; AVX-32-NEXT: .cfi_def_cfa_offset 8 320; AVX-32-NEXT: .cfi_offset %ebp, -8 321; AVX-32-NEXT: movl %esp, %ebp 322; AVX-32-NEXT: .cfi_def_cfa_register %ebp 323; AVX-32-NEXT: andl $-8, %esp 324; AVX-32-NEXT: subl $16, %esp 325; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 326; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 327; AVX-32-NEXT: vcomisd %xmm1, %xmm2 328; AVX-32-NEXT: vmovapd %xmm1, %xmm3 329; AVX-32-NEXT: jae .LBB1_2 330; AVX-32-NEXT: # %bb.1: 331; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3 332; AVX-32-NEXT: .LBB1_2: 333; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 334; AVX-32-NEXT: vmovsd %xmm2, (%esp) 335; AVX-32-NEXT: fldl (%esp) 336; AVX-32-NEXT: fisttpll (%esp) 337; AVX-32-NEXT: wait 338; AVX-32-NEXT: setae %al 339; AVX-32-NEXT: movzbl %al, %eax 340; AVX-32-NEXT: shll $31, %eax 341; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 342; AVX-32-NEXT: vcomisd %xmm1, %xmm0 343; AVX-32-NEXT: jae .LBB1_4 344; AVX-32-NEXT: # %bb.3: 345; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 346; AVX-32-NEXT: .LBB1_4: 347; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 348; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 349; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 350; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 351; AVX-32-NEXT: wait 352; AVX-32-NEXT: setae %cl 353; AVX-32-NEXT: movzbl %cl, %ecx 354; AVX-32-NEXT: shll $31, %ecx 355; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 356; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 357; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 358; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 359; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 360; AVX-32-NEXT: movl %ebp, %esp 361; AVX-32-NEXT: popl %ebp 362; AVX-32-NEXT: .cfi_def_cfa %esp, 4 363; AVX-32-NEXT: retl 364; 365; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 366; AVX-64: # %bb.0: 367; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 368; AVX-64-NEXT: vcomisd %xmm1, %xmm0 369; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 370; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3 371; AVX-64-NEXT: jb .LBB1_2 372; AVX-64-NEXT: # %bb.1: 373; AVX-64-NEXT: vmovapd %xmm1, %xmm3 374; AVX-64-NEXT: .LBB1_2: 375; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3 376; AVX-64-NEXT: vcvttsd2si %xmm3, %rax 377; AVX-64-NEXT: setae %cl 378; AVX-64-NEXT: movzbl %cl, %ecx 379; AVX-64-NEXT: shlq $63, %rcx 380; AVX-64-NEXT: xorq %rax, %rcx 381; AVX-64-NEXT: vmovq %rcx, %xmm3 382; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 383; AVX-64-NEXT: vcomisd %xmm1, %xmm0 384; AVX-64-NEXT: jb .LBB1_4 385; AVX-64-NEXT: # %bb.3: 386; AVX-64-NEXT: vmovapd %xmm1, %xmm2 387; AVX-64-NEXT: .LBB1_4: 388; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0 389; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 390; AVX-64-NEXT: setae %cl 391; AVX-64-NEXT: movzbl %cl, %ecx 392; AVX-64-NEXT: shlq $63, %rcx 393; AVX-64-NEXT: xorq %rax, %rcx 394; AVX-64-NEXT: vmovq %rcx, %xmm0 395; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 396; AVX-64-NEXT: retq 397; 398; AVX512F-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 399; AVX512F-32: # %bb.0: 400; AVX512F-32-NEXT: pushl %ebp 401; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 402; AVX512F-32-NEXT: .cfi_offset %ebp, -8 403; AVX512F-32-NEXT: movl %esp, %ebp 404; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 405; AVX512F-32-NEXT: andl $-8, %esp 406; AVX512F-32-NEXT: subl $16, %esp 407; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 408; AVX512F-32-NEXT: vmovsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] 409; AVX512F-32-NEXT: xorl %eax, %eax 410; AVX512F-32-NEXT: vcomisd %xmm2, %xmm1 411; AVX512F-32-NEXT: setae %al 412; AVX512F-32-NEXT: kmovw %eax, %k1 413; AVX512F-32-NEXT: vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z} 414; AVX512F-32-NEXT: vsubsd %xmm3, %xmm1, %xmm1 415; AVX512F-32-NEXT: vmovsd %xmm1, (%esp) 416; AVX512F-32-NEXT: xorl %ecx, %ecx 417; AVX512F-32-NEXT: vcomisd %xmm2, %xmm0 418; AVX512F-32-NEXT: setae %cl 419; AVX512F-32-NEXT: kmovw %ecx, %k1 420; AVX512F-32-NEXT: vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z} 421; AVX512F-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 422; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 423; AVX512F-32-NEXT: fldl (%esp) 424; AVX512F-32-NEXT: fisttpll (%esp) 425; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) 426; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 427; AVX512F-32-NEXT: wait 428; AVX512F-32-NEXT: shll $31, %eax 429; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 430; AVX512F-32-NEXT: shll $31, %ecx 431; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 432; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 433; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 434; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 435; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 436; AVX512F-32-NEXT: movl %ebp, %esp 437; AVX512F-32-NEXT: popl %ebp 438; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 439; AVX512F-32-NEXT: retl 440; 441; AVX512F-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 442; AVX512F-64: # %bb.0: 443; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax 444; AVX512F-64-NEXT: vmovq %rax, %xmm1 445; AVX512F-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 446; AVX512F-64-NEXT: vcvttsd2usi %xmm0, %rax 447; AVX512F-64-NEXT: vmovq %rax, %xmm0 448; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 449; AVX512F-64-NEXT: retq 450; 451; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 452; AVX512VL-32: # %bb.0: 453; AVX512VL-32-NEXT: pushl %ebp 454; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 455; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 456; AVX512VL-32-NEXT: movl %esp, %ebp 457; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 458; AVX512VL-32-NEXT: andl $-8, %esp 459; AVX512VL-32-NEXT: subl $16, %esp 460; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 461; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] 462; AVX512VL-32-NEXT: xorl %eax, %eax 463; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm1 464; AVX512VL-32-NEXT: setae %al 465; AVX512VL-32-NEXT: kmovw %eax, %k1 466; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm2, %xmm3 {%k1} {z} 467; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm1, %xmm1 468; AVX512VL-32-NEXT: vmovsd %xmm1, (%esp) 469; AVX512VL-32-NEXT: xorl %ecx, %ecx 470; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm0 471; AVX512VL-32-NEXT: setae %cl 472; AVX512VL-32-NEXT: kmovw %ecx, %k1 473; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm2, %xmm1 {%k1} {z} 474; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 475; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 476; AVX512VL-32-NEXT: fldl (%esp) 477; AVX512VL-32-NEXT: fisttpll (%esp) 478; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 479; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 480; AVX512VL-32-NEXT: wait 481; AVX512VL-32-NEXT: shll $31, %eax 482; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 483; AVX512VL-32-NEXT: shll $31, %ecx 484; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 485; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 486; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 487; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 488; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 489; AVX512VL-32-NEXT: movl %ebp, %esp 490; AVX512VL-32-NEXT: popl %ebp 491; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 492; AVX512VL-32-NEXT: retl 493; 494; AVX512VL-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 495; AVX512VL-64: # %bb.0: 496; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 497; AVX512VL-64-NEXT: vmovq %rax, %xmm1 498; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 499; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 500; AVX512VL-64-NEXT: vmovq %rax, %xmm0 501; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 502; AVX512VL-64-NEXT: retq 503; 504; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 505; AVX512DQ: # %bb.0: 506; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 507; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 508; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 509; AVX512DQ-NEXT: vzeroupper 510; AVX512DQ-NEXT: ret{{[l|q]}} 511; 512; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i64: 513; AVX512VLDQ: # %bb.0: 514; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 515; AVX512VLDQ-NEXT: ret{{[l|q]}} 516 %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %a, 517 metadata !"fpexcept.strict") #0 518 ret <2 x i64> %ret 519} 520 521define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { 522; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 523; SSE-32: # %bb.0: 524; SSE-32-NEXT: pushl %ebp 525; SSE-32-NEXT: .cfi_def_cfa_offset 8 526; SSE-32-NEXT: .cfi_offset %ebp, -8 527; SSE-32-NEXT: movl %esp, %ebp 528; SSE-32-NEXT: .cfi_def_cfa_register %ebp 529; SSE-32-NEXT: andl $-8, %esp 530; SSE-32-NEXT: subl $24, %esp 531; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 532; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 533; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 534; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 535; SSE-32-NEXT: wait 536; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 537; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 538; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 539; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 540; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 541; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 542; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 543; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 544; SSE-32-NEXT: wait 545; SSE-32-NEXT: fnstcw (%esp) 546; SSE-32-NEXT: movzwl (%esp), %eax 547; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 548; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 549; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 550; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 551; SSE-32-NEXT: fldcw (%esp) 552; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 553; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 554; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 555; SSE-32-NEXT: movl %ebp, %esp 556; SSE-32-NEXT: popl %ebp 557; SSE-32-NEXT: .cfi_def_cfa %esp, 4 558; SSE-32-NEXT: retl 559; 560; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 561; SSE-64: # %bb.0: 562; SSE-64-NEXT: cvttss2si %xmm0, %rax 563; SSE-64-NEXT: movq %rax, %xmm1 564; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 565; SSE-64-NEXT: cvttss2si %xmm0, %rax 566; SSE-64-NEXT: movq %rax, %xmm0 567; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 568; SSE-64-NEXT: movdqa %xmm1, %xmm0 569; SSE-64-NEXT: retq 570; 571; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 572; AVX-32: # %bb.0: 573; AVX-32-NEXT: pushl %ebp 574; AVX-32-NEXT: .cfi_def_cfa_offset 8 575; AVX-32-NEXT: .cfi_offset %ebp, -8 576; AVX-32-NEXT: movl %esp, %ebp 577; AVX-32-NEXT: .cfi_def_cfa_register %ebp 578; AVX-32-NEXT: andl $-8, %esp 579; AVX-32-NEXT: subl $16, %esp 580; AVX-32-NEXT: vmovss %xmm0, (%esp) 581; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 582; AVX-32-NEXT: flds (%esp) 583; AVX-32-NEXT: fisttpll (%esp) 584; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 585; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 586; AVX-32-NEXT: wait 587; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 588; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 589; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 590; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 591; AVX-32-NEXT: movl %ebp, %esp 592; AVX-32-NEXT: popl %ebp 593; AVX-32-NEXT: .cfi_def_cfa %esp, 4 594; AVX-32-NEXT: retl 595; 596; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 597; AVX-64: # %bb.0: 598; AVX-64-NEXT: vcvttss2si %xmm0, %rax 599; AVX-64-NEXT: vmovq %rax, %xmm1 600; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 601; AVX-64-NEXT: vcvttss2si %xmm0, %rax 602; AVX-64-NEXT: vmovq %rax, %xmm0 603; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 604; AVX-64-NEXT: retq 605; 606; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 607; AVX512F-32: # %bb.0: 608; AVX512F-32-NEXT: pushl %ebp 609; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 610; AVX512F-32-NEXT: .cfi_offset %ebp, -8 611; AVX512F-32-NEXT: movl %esp, %ebp 612; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 613; AVX512F-32-NEXT: andl $-8, %esp 614; AVX512F-32-NEXT: subl $16, %esp 615; AVX512F-32-NEXT: vmovd %xmm0, (%esp) 616; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 617; AVX512F-32-NEXT: flds (%esp) 618; AVX512F-32-NEXT: fisttpll (%esp) 619; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 620; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 621; AVX512F-32-NEXT: wait 622; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 623; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 624; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 625; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 626; AVX512F-32-NEXT: movl %ebp, %esp 627; AVX512F-32-NEXT: popl %ebp 628; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 629; AVX512F-32-NEXT: retl 630; 631; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 632; AVX512F-64: # %bb.0: 633; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax 634; AVX512F-64-NEXT: vmovq %rax, %xmm1 635; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 636; AVX512F-64-NEXT: vcvttss2si %xmm0, %rax 637; AVX512F-64-NEXT: vmovq %rax, %xmm0 638; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 639; AVX512F-64-NEXT: retq 640; 641; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 642; AVX512VL-32: # %bb.0: 643; AVX512VL-32-NEXT: pushl %ebp 644; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 645; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 646; AVX512VL-32-NEXT: movl %esp, %ebp 647; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 648; AVX512VL-32-NEXT: andl $-8, %esp 649; AVX512VL-32-NEXT: subl $16, %esp 650; AVX512VL-32-NEXT: vmovd %xmm0, (%esp) 651; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 652; AVX512VL-32-NEXT: flds (%esp) 653; AVX512VL-32-NEXT: fisttpll (%esp) 654; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 655; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 656; AVX512VL-32-NEXT: wait 657; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 658; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 659; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 660; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 661; AVX512VL-32-NEXT: movl %ebp, %esp 662; AVX512VL-32-NEXT: popl %ebp 663; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 664; AVX512VL-32-NEXT: retl 665; 666; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 667; AVX512VL-64: # %bb.0: 668; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 669; AVX512VL-64-NEXT: vmovq %rax, %xmm1 670; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 671; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 672; AVX512VL-64-NEXT: vmovq %rax, %xmm0 673; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 674; AVX512VL-64-NEXT: retq 675; 676; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 677; AVX512DQ: # %bb.0: 678; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 679; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 680; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 681; AVX512DQ-NEXT: vzeroupper 682; AVX512DQ-NEXT: ret{{[l|q]}} 683; 684; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64: 685; AVX512VLDQ: # %bb.0: 686; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 687; AVX512VLDQ-NEXT: ret{{[l|q]}} 688 %ret = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %a, 689 metadata !"fpexcept.strict") #0 690 ret <2 x i64> %ret 691} 692 693define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(ptr %x) strictfp { 694; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 695; SSE-32: # %bb.0: 696; SSE-32-NEXT: pushl %ebp 697; SSE-32-NEXT: .cfi_def_cfa_offset 8 698; SSE-32-NEXT: .cfi_offset %ebp, -8 699; SSE-32-NEXT: movl %esp, %ebp 700; SSE-32-NEXT: .cfi_def_cfa_register %ebp 701; SSE-32-NEXT: andl $-8, %esp 702; SSE-32-NEXT: subl $24, %esp 703; SSE-32-NEXT: movl 8(%ebp), %eax 704; SSE-32-NEXT: movaps (%eax), %xmm0 705; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 706; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 707; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 708; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 709; SSE-32-NEXT: wait 710; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 711; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 712; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 713; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 714; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 715; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 716; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 717; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 718; SSE-32-NEXT: wait 719; SSE-32-NEXT: fnstcw (%esp) 720; SSE-32-NEXT: movzwl (%esp), %eax 721; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 722; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 723; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 724; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 725; SSE-32-NEXT: fldcw (%esp) 726; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 727; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 728; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 729; SSE-32-NEXT: movl %ebp, %esp 730; SSE-32-NEXT: popl %ebp 731; SSE-32-NEXT: .cfi_def_cfa %esp, 4 732; SSE-32-NEXT: retl 733; 734; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 735; SSE-64: # %bb.0: 736; SSE-64-NEXT: movaps (%rdi), %xmm1 737; SSE-64-NEXT: cvttss2si %xmm1, %rax 738; SSE-64-NEXT: movq %rax, %xmm0 739; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 740; SSE-64-NEXT: cvttss2si %xmm1, %rax 741; SSE-64-NEXT: movq %rax, %xmm1 742; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 743; SSE-64-NEXT: retq 744; 745; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 746; AVX-32: # %bb.0: 747; AVX-32-NEXT: pushl %ebp 748; AVX-32-NEXT: .cfi_def_cfa_offset 8 749; AVX-32-NEXT: .cfi_offset %ebp, -8 750; AVX-32-NEXT: movl %esp, %ebp 751; AVX-32-NEXT: .cfi_def_cfa_register %ebp 752; AVX-32-NEXT: andl $-8, %esp 753; AVX-32-NEXT: subl $16, %esp 754; AVX-32-NEXT: movl 8(%ebp), %eax 755; AVX-32-NEXT: vmovaps (%eax), %xmm0 756; AVX-32-NEXT: vmovss %xmm0, (%esp) 757; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 758; AVX-32-NEXT: flds (%esp) 759; AVX-32-NEXT: fisttpll (%esp) 760; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 761; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 762; AVX-32-NEXT: wait 763; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 764; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 765; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 766; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 767; AVX-32-NEXT: movl %ebp, %esp 768; AVX-32-NEXT: popl %ebp 769; AVX-32-NEXT: .cfi_def_cfa %esp, 4 770; AVX-32-NEXT: retl 771; 772; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 773; AVX-64: # %bb.0: 774; AVX-64-NEXT: vcvttss2si 4(%rdi), %rax 775; AVX-64-NEXT: vmovq %rax, %xmm0 776; AVX-64-NEXT: vcvttss2si (%rdi), %rax 777; AVX-64-NEXT: vmovq %rax, %xmm1 778; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 779; AVX-64-NEXT: retq 780; 781; AVX512F-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 782; AVX512F-32: # %bb.0: 783; AVX512F-32-NEXT: pushl %ebp 784; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 785; AVX512F-32-NEXT: .cfi_offset %ebp, -8 786; AVX512F-32-NEXT: movl %esp, %ebp 787; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 788; AVX512F-32-NEXT: andl $-8, %esp 789; AVX512F-32-NEXT: subl $16, %esp 790; AVX512F-32-NEXT: movl 8(%ebp), %eax 791; AVX512F-32-NEXT: vmovdqa (%eax), %xmm0 792; AVX512F-32-NEXT: vmovd %xmm0, (%esp) 793; AVX512F-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 794; AVX512F-32-NEXT: flds (%esp) 795; AVX512F-32-NEXT: fisttpll (%esp) 796; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 797; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 798; AVX512F-32-NEXT: wait 799; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 800; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 801; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 802; AVX512F-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 803; AVX512F-32-NEXT: movl %ebp, %esp 804; AVX512F-32-NEXT: popl %ebp 805; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 806; AVX512F-32-NEXT: retl 807; 808; AVX512F-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 809; AVX512F-64: # %bb.0: 810; AVX512F-64-NEXT: vcvttss2si 4(%rdi), %rax 811; AVX512F-64-NEXT: vmovq %rax, %xmm0 812; AVX512F-64-NEXT: vcvttss2si (%rdi), %rax 813; AVX512F-64-NEXT: vmovq %rax, %xmm1 814; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 815; AVX512F-64-NEXT: retq 816; 817; AVX512VL-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 818; AVX512VL-32: # %bb.0: 819; AVX512VL-32-NEXT: pushl %ebp 820; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 821; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 822; AVX512VL-32-NEXT: movl %esp, %ebp 823; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 824; AVX512VL-32-NEXT: andl $-8, %esp 825; AVX512VL-32-NEXT: subl $16, %esp 826; AVX512VL-32-NEXT: movl 8(%ebp), %eax 827; AVX512VL-32-NEXT: vmovdqa (%eax), %xmm0 828; AVX512VL-32-NEXT: vmovd %xmm0, (%esp) 829; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 830; AVX512VL-32-NEXT: flds (%esp) 831; AVX512VL-32-NEXT: fisttpll (%esp) 832; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 833; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 834; AVX512VL-32-NEXT: wait 835; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 836; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 837; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 838; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 839; AVX512VL-32-NEXT: movl %ebp, %esp 840; AVX512VL-32-NEXT: popl %ebp 841; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 842; AVX512VL-32-NEXT: retl 843; 844; AVX512VL-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 845; AVX512VL-64: # %bb.0: 846; AVX512VL-64-NEXT: vcvttss2si 4(%rdi), %rax 847; AVX512VL-64-NEXT: vmovq %rax, %xmm0 848; AVX512VL-64-NEXT: vcvttss2si (%rdi), %rax 849; AVX512VL-64-NEXT: vmovq %rax, %xmm1 850; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 851; AVX512VL-64-NEXT: retq 852; 853; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 854; AVX512DQ-32: # %bb.0: 855; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax 856; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 857; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0 858; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 859; AVX512DQ-32-NEXT: vzeroupper 860; AVX512DQ-32-NEXT: retl 861; 862; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 863; AVX512DQ-64: # %bb.0: 864; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 865; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0 866; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 867; AVX512DQ-64-NEXT: vzeroupper 868; AVX512DQ-64-NEXT: retq 869; 870; AVX512VLDQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 871; AVX512VLDQ-32: # %bb.0: 872; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax 873; AVX512VLDQ-32-NEXT: vcvttps2qq (%eax), %xmm0 874; AVX512VLDQ-32-NEXT: retl 875; 876; AVX512VLDQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128: 877; AVX512VLDQ-64: # %bb.0: 878; AVX512VLDQ-64-NEXT: vcvttps2qq (%rdi), %xmm0 879; AVX512VLDQ-64-NEXT: retq 880 %a = load <4 x float>, ptr %x 881 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 882 %c = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0 883 ret <2 x i64> %c 884} 885 886define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { 887; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 888; SSE-32: # %bb.0: 889; SSE-32-NEXT: pushl %ebp 890; SSE-32-NEXT: .cfi_def_cfa_offset 8 891; SSE-32-NEXT: .cfi_offset %ebp, -8 892; SSE-32-NEXT: movl %esp, %ebp 893; SSE-32-NEXT: .cfi_def_cfa_register %ebp 894; SSE-32-NEXT: andl $-8, %esp 895; SSE-32-NEXT: subl $24, %esp 896; SSE-32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 897; SSE-32-NEXT: comiss %xmm1, %xmm0 898; SSE-32-NEXT: movaps %xmm1, %xmm2 899; SSE-32-NEXT: jae .LBB4_2 900; SSE-32-NEXT: # %bb.1: 901; SSE-32-NEXT: xorps %xmm2, %xmm2 902; SSE-32-NEXT: .LBB4_2: 903; SSE-32-NEXT: movaps %xmm0, %xmm3 904; SSE-32-NEXT: subss %xmm2, %xmm3 905; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp) 906; SSE-32-NEXT: setae %al 907; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 908; SSE-32-NEXT: wait 909; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 910; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 911; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 912; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp) 913; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 914; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 915; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 916; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 917; SSE-32-NEXT: comiss %xmm1, %xmm0 918; SSE-32-NEXT: jae .LBB4_4 919; SSE-32-NEXT: # %bb.3: 920; SSE-32-NEXT: xorps %xmm1, %xmm1 921; SSE-32-NEXT: .LBB4_4: 922; SSE-32-NEXT: subss %xmm1, %xmm0 923; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 924; SSE-32-NEXT: setae %cl 925; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 926; SSE-32-NEXT: wait 927; SSE-32-NEXT: fnstcw (%esp) 928; SSE-32-NEXT: movzwl (%esp), %edx 929; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 930; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp) 931; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 932; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 933; SSE-32-NEXT: fldcw (%esp) 934; SSE-32-NEXT: movzbl %al, %eax 935; SSE-32-NEXT: shll $31, %eax 936; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 937; SSE-32-NEXT: movd %eax, %xmm1 938; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 939; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 940; SSE-32-NEXT: movzbl %cl, %eax 941; SSE-32-NEXT: shll $31, %eax 942; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 943; SSE-32-NEXT: movd %eax, %xmm1 944; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 945; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 946; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 947; SSE-32-NEXT: movl %ebp, %esp 948; SSE-32-NEXT: popl %ebp 949; SSE-32-NEXT: .cfi_def_cfa %esp, 4 950; SSE-32-NEXT: retl 951; 952; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 953; SSE-64: # %bb.0: 954; SSE-64-NEXT: movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 955; SSE-64-NEXT: comiss %xmm3, %xmm0 956; SSE-64-NEXT: xorps %xmm2, %xmm2 957; SSE-64-NEXT: xorps %xmm1, %xmm1 958; SSE-64-NEXT: jb .LBB4_2 959; SSE-64-NEXT: # %bb.1: 960; SSE-64-NEXT: movaps %xmm3, %xmm1 961; SSE-64-NEXT: .LBB4_2: 962; SSE-64-NEXT: movaps %xmm0, %xmm4 963; SSE-64-NEXT: subss %xmm1, %xmm4 964; SSE-64-NEXT: cvttss2si %xmm4, %rax 965; SSE-64-NEXT: setae %cl 966; SSE-64-NEXT: movzbl %cl, %ecx 967; SSE-64-NEXT: shlq $63, %rcx 968; SSE-64-NEXT: xorq %rax, %rcx 969; SSE-64-NEXT: movq %rcx, %xmm1 970; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 971; SSE-64-NEXT: comiss %xmm3, %xmm0 972; SSE-64-NEXT: jb .LBB4_4 973; SSE-64-NEXT: # %bb.3: 974; SSE-64-NEXT: movaps %xmm3, %xmm2 975; SSE-64-NEXT: .LBB4_4: 976; SSE-64-NEXT: subss %xmm2, %xmm0 977; SSE-64-NEXT: cvttss2si %xmm0, %rax 978; SSE-64-NEXT: setae %cl 979; SSE-64-NEXT: movzbl %cl, %ecx 980; SSE-64-NEXT: shlq $63, %rcx 981; SSE-64-NEXT: xorq %rax, %rcx 982; SSE-64-NEXT: movq %rcx, %xmm0 983; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 984; SSE-64-NEXT: movdqa %xmm1, %xmm0 985; SSE-64-NEXT: retq 986; 987; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 988; AVX-32: # %bb.0: 989; AVX-32-NEXT: pushl %ebp 990; AVX-32-NEXT: .cfi_def_cfa_offset 8 991; AVX-32-NEXT: .cfi_offset %ebp, -8 992; AVX-32-NEXT: movl %esp, %ebp 993; AVX-32-NEXT: .cfi_def_cfa_register %ebp 994; AVX-32-NEXT: andl $-8, %esp 995; AVX-32-NEXT: subl $16, %esp 996; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 997; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 998; AVX-32-NEXT: vcomiss %xmm1, %xmm2 999; AVX-32-NEXT: vmovaps %xmm1, %xmm3 1000; AVX-32-NEXT: jae .LBB4_2 1001; AVX-32-NEXT: # %bb.1: 1002; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 1003; AVX-32-NEXT: .LBB4_2: 1004; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 1005; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 1006; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 1007; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1008; AVX-32-NEXT: wait 1009; AVX-32-NEXT: setae %al 1010; AVX-32-NEXT: movzbl %al, %eax 1011; AVX-32-NEXT: shll $31, %eax 1012; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1013; AVX-32-NEXT: vcomiss %xmm1, %xmm0 1014; AVX-32-NEXT: jae .LBB4_4 1015; AVX-32-NEXT: # %bb.3: 1016; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 1017; AVX-32-NEXT: .LBB4_4: 1018; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1019; AVX-32-NEXT: vmovss %xmm0, (%esp) 1020; AVX-32-NEXT: flds (%esp) 1021; AVX-32-NEXT: fisttpll (%esp) 1022; AVX-32-NEXT: wait 1023; AVX-32-NEXT: setae %cl 1024; AVX-32-NEXT: movzbl %cl, %ecx 1025; AVX-32-NEXT: shll $31, %ecx 1026; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1027; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1028; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1029; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1030; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1031; AVX-32-NEXT: movl %ebp, %esp 1032; AVX-32-NEXT: popl %ebp 1033; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1034; AVX-32-NEXT: retl 1035; 1036; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1037; AVX-64: # %bb.0: 1038; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1039; AVX-64-NEXT: vcomiss %xmm1, %xmm0 1040; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2 1041; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3 1042; AVX-64-NEXT: jb .LBB4_2 1043; AVX-64-NEXT: # %bb.1: 1044; AVX-64-NEXT: vmovaps %xmm1, %xmm3 1045; AVX-64-NEXT: .LBB4_2: 1046; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3 1047; AVX-64-NEXT: vcvttss2si %xmm3, %rax 1048; AVX-64-NEXT: setae %cl 1049; AVX-64-NEXT: movzbl %cl, %ecx 1050; AVX-64-NEXT: shlq $63, %rcx 1051; AVX-64-NEXT: xorq %rax, %rcx 1052; AVX-64-NEXT: vmovq %rcx, %xmm3 1053; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1054; AVX-64-NEXT: vcomiss %xmm1, %xmm0 1055; AVX-64-NEXT: jb .LBB4_4 1056; AVX-64-NEXT: # %bb.3: 1057; AVX-64-NEXT: vmovaps %xmm1, %xmm2 1058; AVX-64-NEXT: .LBB4_4: 1059; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0 1060; AVX-64-NEXT: vcvttss2si %xmm0, %rax 1061; AVX-64-NEXT: setae %cl 1062; AVX-64-NEXT: movzbl %cl, %ecx 1063; AVX-64-NEXT: shlq $63, %rcx 1064; AVX-64-NEXT: xorq %rax, %rcx 1065; AVX-64-NEXT: vmovq %rcx, %xmm0 1066; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1067; AVX-64-NEXT: retq 1068; 1069; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1070; AVX512F-32: # %bb.0: 1071; AVX512F-32-NEXT: pushl %ebp 1072; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 1073; AVX512F-32-NEXT: .cfi_offset %ebp, -8 1074; AVX512F-32-NEXT: movl %esp, %ebp 1075; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 1076; AVX512F-32-NEXT: andl $-8, %esp 1077; AVX512F-32-NEXT: subl $16, %esp 1078; AVX512F-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1079; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1080; AVX512F-32-NEXT: xorl %eax, %eax 1081; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1 1082; AVX512F-32-NEXT: setae %al 1083; AVX512F-32-NEXT: kmovw %eax, %k1 1084; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1085; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1086; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1087; AVX512F-32-NEXT: xorl %ecx, %ecx 1088; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0 1089; AVX512F-32-NEXT: setae %cl 1090; AVX512F-32-NEXT: kmovw %ecx, %k1 1091; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 1092; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1093; AVX512F-32-NEXT: vmovss %xmm0, (%esp) 1094; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 1095; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1096; AVX512F-32-NEXT: flds (%esp) 1097; AVX512F-32-NEXT: fisttpll (%esp) 1098; AVX512F-32-NEXT: wait 1099; AVX512F-32-NEXT: shll $31, %eax 1100; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1101; AVX512F-32-NEXT: shll $31, %ecx 1102; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1103; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1104; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1105; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1106; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1107; AVX512F-32-NEXT: movl %ebp, %esp 1108; AVX512F-32-NEXT: popl %ebp 1109; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 1110; AVX512F-32-NEXT: retl 1111; 1112; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1113; AVX512F-64: # %bb.0: 1114; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax 1115; AVX512F-64-NEXT: vmovq %rax, %xmm1 1116; AVX512F-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1117; AVX512F-64-NEXT: vcvttss2usi %xmm0, %rax 1118; AVX512F-64-NEXT: vmovq %rax, %xmm0 1119; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1120; AVX512F-64-NEXT: retq 1121; 1122; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1123; AVX512VL-32: # %bb.0: 1124; AVX512VL-32-NEXT: pushl %ebp 1125; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 1126; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 1127; AVX512VL-32-NEXT: movl %esp, %ebp 1128; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 1129; AVX512VL-32-NEXT: andl $-8, %esp 1130; AVX512VL-32-NEXT: subl $16, %esp 1131; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1132; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1133; AVX512VL-32-NEXT: xorl %eax, %eax 1134; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1 1135; AVX512VL-32-NEXT: setae %al 1136; AVX512VL-32-NEXT: kmovw %eax, %k1 1137; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1138; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1139; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1140; AVX512VL-32-NEXT: xorl %ecx, %ecx 1141; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0 1142; AVX512VL-32-NEXT: setae %cl 1143; AVX512VL-32-NEXT: kmovw %ecx, %k1 1144; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 1145; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1146; AVX512VL-32-NEXT: vmovss %xmm0, (%esp) 1147; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 1148; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1149; AVX512VL-32-NEXT: flds (%esp) 1150; AVX512VL-32-NEXT: fisttpll (%esp) 1151; AVX512VL-32-NEXT: wait 1152; AVX512VL-32-NEXT: shll $31, %eax 1153; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1154; AVX512VL-32-NEXT: shll $31, %ecx 1155; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1156; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1157; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1158; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1159; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1160; AVX512VL-32-NEXT: movl %ebp, %esp 1161; AVX512VL-32-NEXT: popl %ebp 1162; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 1163; AVX512VL-32-NEXT: retl 1164; 1165; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1166; AVX512VL-64: # %bb.0: 1167; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 1168; AVX512VL-64-NEXT: vmovq %rax, %xmm1 1169; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1170; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 1171; AVX512VL-64-NEXT: vmovq %rax, %xmm0 1172; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1173; AVX512VL-64-NEXT: retq 1174; 1175; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1176; AVX512DQ: # %bb.0: 1177; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1178; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1179; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1180; AVX512DQ-NEXT: vzeroupper 1181; AVX512DQ-NEXT: ret{{[l|q]}} 1182; 1183; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64: 1184; AVX512VLDQ: # %bb.0: 1185; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 1186; AVX512VLDQ-NEXT: ret{{[l|q]}} 1187 %ret = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %a, 1188 metadata !"fpexcept.strict") #0 1189 ret <2 x i64> %ret 1190} 1191 1192define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(ptr %x) strictfp { 1193; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1194; SSE-32: # %bb.0: 1195; SSE-32-NEXT: pushl %ebp 1196; SSE-32-NEXT: .cfi_def_cfa_offset 8 1197; SSE-32-NEXT: .cfi_offset %ebp, -8 1198; SSE-32-NEXT: movl %esp, %ebp 1199; SSE-32-NEXT: .cfi_def_cfa_register %ebp 1200; SSE-32-NEXT: andl $-8, %esp 1201; SSE-32-NEXT: subl $24, %esp 1202; SSE-32-NEXT: movl 8(%ebp), %eax 1203; SSE-32-NEXT: movaps (%eax), %xmm0 1204; SSE-32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1205; SSE-32-NEXT: comiss %xmm1, %xmm0 1206; SSE-32-NEXT: movaps %xmm1, %xmm2 1207; SSE-32-NEXT: jae .LBB5_2 1208; SSE-32-NEXT: # %bb.1: 1209; SSE-32-NEXT: xorps %xmm2, %xmm2 1210; SSE-32-NEXT: .LBB5_2: 1211; SSE-32-NEXT: movaps %xmm0, %xmm3 1212; SSE-32-NEXT: subss %xmm2, %xmm3 1213; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp) 1214; SSE-32-NEXT: setae %al 1215; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 1216; SSE-32-NEXT: wait 1217; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 1218; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 1219; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 1220; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp) 1221; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 1222; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 1223; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 1224; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1225; SSE-32-NEXT: comiss %xmm1, %xmm0 1226; SSE-32-NEXT: jae .LBB5_4 1227; SSE-32-NEXT: # %bb.3: 1228; SSE-32-NEXT: xorps %xmm1, %xmm1 1229; SSE-32-NEXT: .LBB5_4: 1230; SSE-32-NEXT: subss %xmm1, %xmm0 1231; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 1232; SSE-32-NEXT: setae %cl 1233; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 1234; SSE-32-NEXT: wait 1235; SSE-32-NEXT: fnstcw (%esp) 1236; SSE-32-NEXT: movzwl (%esp), %edx 1237; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 1238; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp) 1239; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 1240; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 1241; SSE-32-NEXT: fldcw (%esp) 1242; SSE-32-NEXT: movzbl %al, %eax 1243; SSE-32-NEXT: shll $31, %eax 1244; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1245; SSE-32-NEXT: movd %eax, %xmm1 1246; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1247; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1248; SSE-32-NEXT: movzbl %cl, %eax 1249; SSE-32-NEXT: shll $31, %eax 1250; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1251; SSE-32-NEXT: movd %eax, %xmm1 1252; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 1253; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1254; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1255; SSE-32-NEXT: movl %ebp, %esp 1256; SSE-32-NEXT: popl %ebp 1257; SSE-32-NEXT: .cfi_def_cfa %esp, 4 1258; SSE-32-NEXT: retl 1259; 1260; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1261; SSE-64: # %bb.0: 1262; SSE-64-NEXT: movaps (%rdi), %xmm1 1263; SSE-64-NEXT: movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1264; SSE-64-NEXT: comiss %xmm3, %xmm1 1265; SSE-64-NEXT: xorps %xmm2, %xmm2 1266; SSE-64-NEXT: xorps %xmm0, %xmm0 1267; SSE-64-NEXT: jb .LBB5_2 1268; SSE-64-NEXT: # %bb.1: 1269; SSE-64-NEXT: movaps %xmm3, %xmm0 1270; SSE-64-NEXT: .LBB5_2: 1271; SSE-64-NEXT: movaps %xmm1, %xmm4 1272; SSE-64-NEXT: subss %xmm0, %xmm4 1273; SSE-64-NEXT: cvttss2si %xmm4, %rax 1274; SSE-64-NEXT: setae %cl 1275; SSE-64-NEXT: movzbl %cl, %ecx 1276; SSE-64-NEXT: shlq $63, %rcx 1277; SSE-64-NEXT: xorq %rax, %rcx 1278; SSE-64-NEXT: movq %rcx, %xmm0 1279; SSE-64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 1280; SSE-64-NEXT: comiss %xmm3, %xmm1 1281; SSE-64-NEXT: jb .LBB5_4 1282; SSE-64-NEXT: # %bb.3: 1283; SSE-64-NEXT: movaps %xmm3, %xmm2 1284; SSE-64-NEXT: .LBB5_4: 1285; SSE-64-NEXT: subss %xmm2, %xmm1 1286; SSE-64-NEXT: cvttss2si %xmm1, %rax 1287; SSE-64-NEXT: setae %cl 1288; SSE-64-NEXT: movzbl %cl, %ecx 1289; SSE-64-NEXT: shlq $63, %rcx 1290; SSE-64-NEXT: xorq %rax, %rcx 1291; SSE-64-NEXT: movq %rcx, %xmm1 1292; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1293; SSE-64-NEXT: retq 1294; 1295; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1296; AVX-32: # %bb.0: 1297; AVX-32-NEXT: pushl %ebp 1298; AVX-32-NEXT: .cfi_def_cfa_offset 8 1299; AVX-32-NEXT: .cfi_offset %ebp, -8 1300; AVX-32-NEXT: movl %esp, %ebp 1301; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1302; AVX-32-NEXT: andl $-8, %esp 1303; AVX-32-NEXT: subl $16, %esp 1304; AVX-32-NEXT: movl 8(%ebp), %eax 1305; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1306; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1307; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1308; AVX-32-NEXT: vcomiss %xmm1, %xmm2 1309; AVX-32-NEXT: vmovaps %xmm1, %xmm3 1310; AVX-32-NEXT: jae .LBB5_2 1311; AVX-32-NEXT: # %bb.1: 1312; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 1313; AVX-32-NEXT: .LBB5_2: 1314; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 1315; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 1316; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 1317; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1318; AVX-32-NEXT: wait 1319; AVX-32-NEXT: setae %al 1320; AVX-32-NEXT: movzbl %al, %eax 1321; AVX-32-NEXT: shll $31, %eax 1322; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1323; AVX-32-NEXT: vcomiss %xmm1, %xmm0 1324; AVX-32-NEXT: jae .LBB5_4 1325; AVX-32-NEXT: # %bb.3: 1326; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 1327; AVX-32-NEXT: .LBB5_4: 1328; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1329; AVX-32-NEXT: vmovss %xmm0, (%esp) 1330; AVX-32-NEXT: flds (%esp) 1331; AVX-32-NEXT: fisttpll (%esp) 1332; AVX-32-NEXT: wait 1333; AVX-32-NEXT: setae %cl 1334; AVX-32-NEXT: movzbl %cl, %ecx 1335; AVX-32-NEXT: shll $31, %ecx 1336; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1337; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1338; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1339; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1340; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1341; AVX-32-NEXT: movl %ebp, %esp 1342; AVX-32-NEXT: popl %ebp 1343; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1344; AVX-32-NEXT: retl 1345; 1346; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1347; AVX-64: # %bb.0: 1348; AVX-64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1349; AVX-64-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1350; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1351; AVX-64-NEXT: vcomiss %xmm1, %xmm3 1352; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2 1353; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4 1354; AVX-64-NEXT: jb .LBB5_2 1355; AVX-64-NEXT: # %bb.1: 1356; AVX-64-NEXT: vmovaps %xmm1, %xmm4 1357; AVX-64-NEXT: .LBB5_2: 1358; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3 1359; AVX-64-NEXT: vcvttss2si %xmm3, %rax 1360; AVX-64-NEXT: setae %cl 1361; AVX-64-NEXT: movzbl %cl, %ecx 1362; AVX-64-NEXT: shlq $63, %rcx 1363; AVX-64-NEXT: xorq %rax, %rcx 1364; AVX-64-NEXT: vmovq %rcx, %xmm3 1365; AVX-64-NEXT: vcomiss %xmm1, %xmm0 1366; AVX-64-NEXT: jb .LBB5_4 1367; AVX-64-NEXT: # %bb.3: 1368; AVX-64-NEXT: vmovaps %xmm1, %xmm2 1369; AVX-64-NEXT: .LBB5_4: 1370; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0 1371; AVX-64-NEXT: vcvttss2si %xmm0, %rax 1372; AVX-64-NEXT: setae %cl 1373; AVX-64-NEXT: movzbl %cl, %ecx 1374; AVX-64-NEXT: shlq $63, %rcx 1375; AVX-64-NEXT: xorq %rax, %rcx 1376; AVX-64-NEXT: vmovq %rcx, %xmm0 1377; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 1378; AVX-64-NEXT: retq 1379; 1380; AVX512F-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1381; AVX512F-32: # %bb.0: 1382; AVX512F-32-NEXT: pushl %ebp 1383; AVX512F-32-NEXT: .cfi_def_cfa_offset 8 1384; AVX512F-32-NEXT: .cfi_offset %ebp, -8 1385; AVX512F-32-NEXT: movl %esp, %ebp 1386; AVX512F-32-NEXT: .cfi_def_cfa_register %ebp 1387; AVX512F-32-NEXT: andl $-8, %esp 1388; AVX512F-32-NEXT: subl $16, %esp 1389; AVX512F-32-NEXT: movl 8(%ebp), %eax 1390; AVX512F-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1391; AVX512F-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1392; AVX512F-32-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1393; AVX512F-32-NEXT: xorl %eax, %eax 1394; AVX512F-32-NEXT: vcomiss %xmm2, %xmm1 1395; AVX512F-32-NEXT: setae %al 1396; AVX512F-32-NEXT: kmovw %eax, %k1 1397; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1398; AVX512F-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1399; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1400; AVX512F-32-NEXT: xorl %ecx, %ecx 1401; AVX512F-32-NEXT: vcomiss %xmm2, %xmm0 1402; AVX512F-32-NEXT: setae %cl 1403; AVX512F-32-NEXT: kmovw %ecx, %k1 1404; AVX512F-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 1405; AVX512F-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1406; AVX512F-32-NEXT: vmovss %xmm0, (%esp) 1407; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) 1408; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1409; AVX512F-32-NEXT: flds (%esp) 1410; AVX512F-32-NEXT: fisttpll (%esp) 1411; AVX512F-32-NEXT: wait 1412; AVX512F-32-NEXT: shll $31, %eax 1413; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1414; AVX512F-32-NEXT: shll $31, %ecx 1415; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1416; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1417; AVX512F-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1418; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1419; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1420; AVX512F-32-NEXT: movl %ebp, %esp 1421; AVX512F-32-NEXT: popl %ebp 1422; AVX512F-32-NEXT: .cfi_def_cfa %esp, 4 1423; AVX512F-32-NEXT: retl 1424; 1425; AVX512F-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1426; AVX512F-64: # %bb.0: 1427; AVX512F-64-NEXT: vcvttss2usi 4(%rdi), %rax 1428; AVX512F-64-NEXT: vmovq %rax, %xmm0 1429; AVX512F-64-NEXT: vcvttss2usi (%rdi), %rax 1430; AVX512F-64-NEXT: vmovq %rax, %xmm1 1431; AVX512F-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1432; AVX512F-64-NEXT: retq 1433; 1434; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1435; AVX512VL-32: # %bb.0: 1436; AVX512VL-32-NEXT: pushl %ebp 1437; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 1438; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 1439; AVX512VL-32-NEXT: movl %esp, %ebp 1440; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 1441; AVX512VL-32-NEXT: andl $-8, %esp 1442; AVX512VL-32-NEXT: subl $16, %esp 1443; AVX512VL-32-NEXT: movl 8(%ebp), %eax 1444; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1445; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1446; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1447; AVX512VL-32-NEXT: xorl %eax, %eax 1448; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1 1449; AVX512VL-32-NEXT: setae %al 1450; AVX512VL-32-NEXT: kmovw %eax, %k1 1451; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm3 {%k1} {z} 1452; AVX512VL-32-NEXT: vsubss %xmm3, %xmm1, %xmm1 1453; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) 1454; AVX512VL-32-NEXT: xorl %ecx, %ecx 1455; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0 1456; AVX512VL-32-NEXT: setae %cl 1457; AVX512VL-32-NEXT: kmovw %ecx, %k1 1458; AVX512VL-32-NEXT: vmovss %xmm2, %xmm2, %xmm1 {%k1} {z} 1459; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 1460; AVX512VL-32-NEXT: vmovss %xmm0, (%esp) 1461; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 1462; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1463; AVX512VL-32-NEXT: flds (%esp) 1464; AVX512VL-32-NEXT: fisttpll (%esp) 1465; AVX512VL-32-NEXT: wait 1466; AVX512VL-32-NEXT: shll $31, %eax 1467; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 1468; AVX512VL-32-NEXT: shll $31, %ecx 1469; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 1470; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1471; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1472; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 1473; AVX512VL-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1474; AVX512VL-32-NEXT: movl %ebp, %esp 1475; AVX512VL-32-NEXT: popl %ebp 1476; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 1477; AVX512VL-32-NEXT: retl 1478; 1479; AVX512VL-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1480; AVX512VL-64: # %bb.0: 1481; AVX512VL-64-NEXT: vcvttss2usi 4(%rdi), %rax 1482; AVX512VL-64-NEXT: vmovq %rax, %xmm0 1483; AVX512VL-64-NEXT: vcvttss2usi (%rdi), %rax 1484; AVX512VL-64-NEXT: vmovq %rax, %xmm1 1485; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1486; AVX512VL-64-NEXT: retq 1487; 1488; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1489; AVX512DQ-32: # %bb.0: 1490; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1491; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1492; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0 1493; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1494; AVX512DQ-32-NEXT: vzeroupper 1495; AVX512DQ-32-NEXT: retl 1496; 1497; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1498; AVX512DQ-64: # %bb.0: 1499; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 1500; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0 1501; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1502; AVX512DQ-64-NEXT: vzeroupper 1503; AVX512DQ-64-NEXT: retq 1504; 1505; AVX512VLDQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1506; AVX512VLDQ-32: # %bb.0: 1507; AVX512VLDQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax 1508; AVX512VLDQ-32-NEXT: vcvttps2uqq (%eax), %xmm0 1509; AVX512VLDQ-32-NEXT: retl 1510; 1511; AVX512VLDQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128: 1512; AVX512VLDQ-64: # %bb.0: 1513; AVX512VLDQ-64-NEXT: vcvttps2uqq (%rdi), %xmm0 1514; AVX512VLDQ-64-NEXT: retq 1515 %a = load <4 x float>, ptr %x 1516 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1517 %c = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float> %b, metadata !"fpexcept.strict") #0 1518 ret <2 x i64> %c 1519} 1520 1521define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 { 1522; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1523; SSE-32: # %bb.0: 1524; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 1525; SSE-32-NEXT: retl 1526; 1527; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1528; SSE-64: # %bb.0: 1529; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 1530; SSE-64-NEXT: retq 1531; 1532; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1533; AVX: # %bb.0: 1534; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 1535; AVX-NEXT: ret{{[l|q]}} 1536; 1537; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1538; AVX512F: # %bb.0: 1539; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 1540; AVX512F-NEXT: ret{{[l|q]}} 1541; 1542; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1543; AVX512VL: # %bb.0: 1544; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 1545; AVX512VL-NEXT: ret{{[l|q]}} 1546; 1547; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1548; AVX512DQ: # %bb.0: 1549; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1550; AVX512DQ-NEXT: ret{{[l|q]}} 1551; 1552; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i32: 1553; AVX512VLDQ: # %bb.0: 1554; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1555; AVX512VLDQ-NEXT: ret{{[l|q]}} 1556 %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double> %a, 1557 metadata !"fpexcept.strict") #0 1558 ret <2 x i32> %ret 1559} 1560 1561define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 { 1562; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1563; SSE-32: # %bb.0: 1564; SSE-32-NEXT: movsd {{.*#+}} xmm3 = [2.147483648E+9,0.0E+0] 1565; SSE-32-NEXT: comisd %xmm3, %xmm0 1566; SSE-32-NEXT: xorpd %xmm2, %xmm2 1567; SSE-32-NEXT: xorpd %xmm1, %xmm1 1568; SSE-32-NEXT: jb .LBB7_2 1569; SSE-32-NEXT: # %bb.1: 1570; SSE-32-NEXT: movapd %xmm3, %xmm1 1571; SSE-32-NEXT: .LBB7_2: 1572; SSE-32-NEXT: setae %al 1573; SSE-32-NEXT: movzbl %al, %eax 1574; SSE-32-NEXT: shll $31, %eax 1575; SSE-32-NEXT: movapd %xmm0, %xmm4 1576; SSE-32-NEXT: subsd %xmm1, %xmm4 1577; SSE-32-NEXT: cvttsd2si %xmm4, %ecx 1578; SSE-32-NEXT: xorl %eax, %ecx 1579; SSE-32-NEXT: movd %ecx, %xmm1 1580; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 1581; SSE-32-NEXT: comisd %xmm3, %xmm0 1582; SSE-32-NEXT: jb .LBB7_4 1583; SSE-32-NEXT: # %bb.3: 1584; SSE-32-NEXT: movapd %xmm3, %xmm2 1585; SSE-32-NEXT: .LBB7_4: 1586; SSE-32-NEXT: setae %al 1587; SSE-32-NEXT: movzbl %al, %eax 1588; SSE-32-NEXT: shll $31, %eax 1589; SSE-32-NEXT: subsd %xmm2, %xmm0 1590; SSE-32-NEXT: cvttsd2si %xmm0, %ecx 1591; SSE-32-NEXT: xorl %eax, %ecx 1592; SSE-32-NEXT: movd %ecx, %xmm0 1593; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1594; SSE-32-NEXT: movdqa %xmm1, %xmm0 1595; SSE-32-NEXT: retl 1596; 1597; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1598; SSE-64: # %bb.0: 1599; SSE-64-NEXT: cvttsd2si %xmm0, %rax 1600; SSE-64-NEXT: movd %eax, %xmm1 1601; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 1602; SSE-64-NEXT: cvttsd2si %xmm0, %rax 1603; SSE-64-NEXT: movd %eax, %xmm0 1604; SSE-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1605; SSE-64-NEXT: movdqa %xmm1, %xmm0 1606; SSE-64-NEXT: retq 1607; 1608; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1609; AVX-32: # %bb.0: 1610; AVX-32-NEXT: pushl %ebp 1611; AVX-32-NEXT: .cfi_def_cfa_offset 8 1612; AVX-32-NEXT: .cfi_offset %ebp, -8 1613; AVX-32-NEXT: movl %esp, %ebp 1614; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1615; AVX-32-NEXT: andl $-8, %esp 1616; AVX-32-NEXT: subl $16, %esp 1617; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 1618; AVX-32-NEXT: vmovhps %xmm0, (%esp) 1619; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 1620; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1621; AVX-32-NEXT: fldl (%esp) 1622; AVX-32-NEXT: fisttpll (%esp) 1623; AVX-32-NEXT: wait 1624; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1625; AVX-32-NEXT: vpinsrd $1, (%esp), %xmm0, %xmm0 1626; AVX-32-NEXT: movl %ebp, %esp 1627; AVX-32-NEXT: popl %ebp 1628; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1629; AVX-32-NEXT: retl 1630; 1631; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1632; AVX-64: # %bb.0: 1633; AVX-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 1634; AVX-64-NEXT: vcvttsd2si %xmm1, %rax 1635; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx 1636; AVX-64-NEXT: vmovd %ecx, %xmm0 1637; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1638; AVX-64-NEXT: retq 1639; 1640; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1641; AVX512F: # %bb.0: 1642; AVX512F-NEXT: vmovaps %xmm0, %xmm0 1643; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 1644; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1645; AVX512F-NEXT: vzeroupper 1646; AVX512F-NEXT: ret{{[l|q]}} 1647; 1648; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1649; AVX512VL: # %bb.0: 1650; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 1651; AVX512VL-NEXT: ret{{[l|q]}} 1652; 1653; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1654; AVX512DQ: # %bb.0: 1655; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 1656; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 1657; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1658; AVX512DQ-NEXT: vzeroupper 1659; AVX512DQ-NEXT: ret{{[l|q]}} 1660; 1661; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i32: 1662; AVX512VLDQ: # %bb.0: 1663; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 1664; AVX512VLDQ-NEXT: ret{{[l|q]}} 1665 %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double> %a, 1666 metadata !"fpexcept.strict") #0 1667 ret <2 x i32> %ret 1668} 1669 1670define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 { 1671; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1672; SSE-32: # %bb.0: 1673; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1674; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 1675; SSE-32-NEXT: retl 1676; 1677; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1678; SSE-64: # %bb.0: 1679; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1680; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 1681; SSE-64-NEXT: retq 1682; 1683; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1684; AVX: # %bb.0: 1685; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1686; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 1687; AVX-NEXT: ret{{[l|q]}} 1688; 1689; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1690; AVX512F: # %bb.0: 1691; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1692; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 1693; AVX512F-NEXT: ret{{[l|q]}} 1694; 1695; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1696; AVX512VL: # %bb.0: 1697; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1698; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 1699; AVX512VL-NEXT: ret{{[l|q]}} 1700; 1701; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1702; AVX512DQ: # %bb.0: 1703; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1704; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 1705; AVX512DQ-NEXT: ret{{[l|q]}} 1706; 1707; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i32: 1708; AVX512VLDQ: # %bb.0: 1709; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1710; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 1711; AVX512VLDQ-NEXT: ret{{[l|q]}} 1712 %ret = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float> %a, 1713 metadata !"fpexcept.strict") #0 1714 ret <2 x i32> %ret 1715} 1716 1717define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 { 1718; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1719; SSE-32: # %bb.0: 1720; SSE-32-NEXT: movss {{.*#+}} xmm3 = [2.14748365E+9,0.0E+0,0.0E+0,0.0E+0] 1721; SSE-32-NEXT: comiss %xmm3, %xmm0 1722; SSE-32-NEXT: xorps %xmm2, %xmm2 1723; SSE-32-NEXT: xorps %xmm1, %xmm1 1724; SSE-32-NEXT: jb .LBB9_2 1725; SSE-32-NEXT: # %bb.1: 1726; SSE-32-NEXT: movaps %xmm3, %xmm1 1727; SSE-32-NEXT: .LBB9_2: 1728; SSE-32-NEXT: setae %al 1729; SSE-32-NEXT: movzbl %al, %eax 1730; SSE-32-NEXT: shll $31, %eax 1731; SSE-32-NEXT: movaps %xmm0, %xmm4 1732; SSE-32-NEXT: subss %xmm1, %xmm4 1733; SSE-32-NEXT: cvttss2si %xmm4, %ecx 1734; SSE-32-NEXT: xorl %eax, %ecx 1735; SSE-32-NEXT: movd %ecx, %xmm1 1736; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1737; SSE-32-NEXT: comiss %xmm3, %xmm0 1738; SSE-32-NEXT: jb .LBB9_4 1739; SSE-32-NEXT: # %bb.3: 1740; SSE-32-NEXT: movaps %xmm3, %xmm2 1741; SSE-32-NEXT: .LBB9_4: 1742; SSE-32-NEXT: setae %al 1743; SSE-32-NEXT: movzbl %al, %eax 1744; SSE-32-NEXT: shll $31, %eax 1745; SSE-32-NEXT: subss %xmm2, %xmm0 1746; SSE-32-NEXT: cvttss2si %xmm0, %ecx 1747; SSE-32-NEXT: xorl %eax, %ecx 1748; SSE-32-NEXT: movd %ecx, %xmm0 1749; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1750; SSE-32-NEXT: movdqa %xmm1, %xmm0 1751; SSE-32-NEXT: retl 1752; 1753; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1754; SSE-64: # %bb.0: 1755; SSE-64-NEXT: cvttss2si %xmm0, %rax 1756; SSE-64-NEXT: movd %eax, %xmm1 1757; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1758; SSE-64-NEXT: cvttss2si %xmm0, %rax 1759; SSE-64-NEXT: movd %eax, %xmm0 1760; SSE-64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1761; SSE-64-NEXT: movdqa %xmm1, %xmm0 1762; SSE-64-NEXT: retq 1763; 1764; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1765; AVX-32: # %bb.0: 1766; AVX-32-NEXT: pushl %ebp 1767; AVX-32-NEXT: .cfi_def_cfa_offset 8 1768; AVX-32-NEXT: .cfi_offset %ebp, -8 1769; AVX-32-NEXT: movl %esp, %ebp 1770; AVX-32-NEXT: .cfi_def_cfa_register %ebp 1771; AVX-32-NEXT: andl $-8, %esp 1772; AVX-32-NEXT: subl $16, %esp 1773; AVX-32-NEXT: vmovss %xmm0, (%esp) 1774; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 1775; AVX-32-NEXT: flds (%esp) 1776; AVX-32-NEXT: fisttpll (%esp) 1777; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 1778; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 1779; AVX-32-NEXT: wait 1780; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1781; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 1782; AVX-32-NEXT: movl %ebp, %esp 1783; AVX-32-NEXT: popl %ebp 1784; AVX-32-NEXT: .cfi_def_cfa %esp, 4 1785; AVX-32-NEXT: retl 1786; 1787; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1788; AVX-64: # %bb.0: 1789; AVX-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1790; AVX-64-NEXT: vcvttss2si %xmm1, %rax 1791; AVX-64-NEXT: vcvttss2si %xmm0, %rcx 1792; AVX-64-NEXT: vmovd %ecx, %xmm0 1793; AVX-64-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1794; AVX-64-NEXT: retq 1795; 1796; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1797; AVX512F: # %bb.0: 1798; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1799; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1800; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1801; AVX512F-NEXT: vzeroupper 1802; AVX512F-NEXT: ret{{[l|q]}} 1803; 1804; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1805; AVX512VL: # %bb.0: 1806; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1807; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 1808; AVX512VL-NEXT: ret{{[l|q]}} 1809; 1810; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1811; AVX512DQ: # %bb.0: 1812; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1813; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1814; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1815; AVX512DQ-NEXT: vzeroupper 1816; AVX512DQ-NEXT: ret{{[l|q]}} 1817; 1818; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i32: 1819; AVX512VLDQ: # %bb.0: 1820; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1821; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 1822; AVX512VLDQ-NEXT: ret{{[l|q]}} 1823 %ret = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float> %a, 1824 metadata !"fpexcept.strict") #0 1825 ret <2 x i32> %ret 1826} 1827 1828define <2 x i16> @strict_vector_fptosi_v2f64_to_v2i16(<2 x double> %a) #0 { 1829; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1830; SSE-32: # %bb.0: 1831; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 1832; SSE-32-NEXT: packssdw %xmm0, %xmm0 1833; SSE-32-NEXT: retl 1834; 1835; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1836; SSE-64: # %bb.0: 1837; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 1838; SSE-64-NEXT: packssdw %xmm0, %xmm0 1839; SSE-64-NEXT: retq 1840; 1841; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1842; AVX: # %bb.0: 1843; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 1844; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1845; AVX-NEXT: ret{{[l|q]}} 1846; 1847; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1848; AVX512F: # %bb.0: 1849; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 1850; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1851; AVX512F-NEXT: ret{{[l|q]}} 1852; 1853; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1854; AVX512VL: # %bb.0: 1855; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 1856; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1857; AVX512VL-NEXT: ret{{[l|q]}} 1858; 1859; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1860; AVX512DQ: # %bb.0: 1861; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1862; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1863; AVX512DQ-NEXT: ret{{[l|q]}} 1864; 1865; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16: 1866; AVX512VLDQ: # %bb.0: 1867; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1868; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1869; AVX512VLDQ-NEXT: ret{{[l|q]}} 1870 %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a, 1871 metadata !"fpexcept.strict") #0 1872 ret <2 x i16> %ret 1873} 1874 1875define <2 x i16> @strict_vector_fptoui_v2f64_to_v2i16(<2 x double> %a) #0 { 1876; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1877; SSE-32: # %bb.0: 1878; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 1879; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1880; SSE-32-NEXT: retl 1881; 1882; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1883; SSE-64: # %bb.0: 1884; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 1885; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1886; SSE-64-NEXT: retq 1887; 1888; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1889; AVX: # %bb.0: 1890; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 1891; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1892; AVX-NEXT: ret{{[l|q]}} 1893; 1894; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1895; AVX512F: # %bb.0: 1896; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 1897; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1898; AVX512F-NEXT: ret{{[l|q]}} 1899; 1900; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1901; AVX512VL: # %bb.0: 1902; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 1903; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1904; AVX512VL-NEXT: ret{{[l|q]}} 1905; 1906; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1907; AVX512DQ: # %bb.0: 1908; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1909; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1910; AVX512DQ-NEXT: ret{{[l|q]}} 1911; 1912; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16: 1913; AVX512VLDQ: # %bb.0: 1914; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 1915; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1916; AVX512VLDQ-NEXT: ret{{[l|q]}} 1917 %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a, 1918 metadata !"fpexcept.strict") #0 1919 ret <2 x i16> %ret 1920} 1921 1922define <2 x i16> @strict_vector_fptosi_v2f32_to_v2i16(<2 x float> %a) #0 { 1923; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1924; SSE-32: # %bb.0: 1925; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1926; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 1927; SSE-32-NEXT: packssdw %xmm0, %xmm0 1928; SSE-32-NEXT: retl 1929; 1930; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1931; SSE-64: # %bb.0: 1932; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1933; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 1934; SSE-64-NEXT: packssdw %xmm0, %xmm0 1935; SSE-64-NEXT: retq 1936; 1937; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1938; AVX: # %bb.0: 1939; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1940; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 1941; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1942; AVX-NEXT: ret{{[l|q]}} 1943; 1944; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1945; AVX512F: # %bb.0: 1946; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1947; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 1948; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1949; AVX512F-NEXT: ret{{[l|q]}} 1950; 1951; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1952; AVX512VL: # %bb.0: 1953; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1954; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 1955; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1956; AVX512VL-NEXT: ret{{[l|q]}} 1957; 1958; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1959; AVX512DQ: # %bb.0: 1960; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1961; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 1962; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1963; AVX512DQ-NEXT: ret{{[l|q]}} 1964; 1965; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16: 1966; AVX512VLDQ: # %bb.0: 1967; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1968; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 1969; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 1970; AVX512VLDQ-NEXT: ret{{[l|q]}} 1971 %ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a, 1972 metadata !"fpexcept.strict") #0 1973 ret <2 x i16> %ret 1974} 1975 1976define <2 x i16> @strict_vector_fptoui_v2f32_to_v2i16(<2 x float> %a) #0 { 1977; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 1978; SSE-32: # %bb.0: 1979; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1980; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 1981; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1982; SSE-32-NEXT: retl 1983; 1984; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 1985; SSE-64: # %bb.0: 1986; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 1987; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 1988; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1989; SSE-64-NEXT: retq 1990; 1991; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 1992; AVX: # %bb.0: 1993; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 1994; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 1995; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 1996; AVX-NEXT: ret{{[l|q]}} 1997; 1998; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 1999; AVX512F: # %bb.0: 2000; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2001; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2002; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2003; AVX512F-NEXT: ret{{[l|q]}} 2004; 2005; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 2006; AVX512VL: # %bb.0: 2007; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2008; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2009; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2010; AVX512VL-NEXT: ret{{[l|q]}} 2011; 2012; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 2013; AVX512DQ: # %bb.0: 2014; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2015; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2016; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2017; AVX512DQ-NEXT: ret{{[l|q]}} 2018; 2019; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16: 2020; AVX512VLDQ: # %bb.0: 2021; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2022; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2023; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2024; AVX512VLDQ-NEXT: ret{{[l|q]}} 2025 %ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a, 2026 metadata !"fpexcept.strict") #0 2027 ret <2 x i16> %ret 2028} 2029 2030define <2 x i8> @strict_vector_fptosi_v2f64_to_v2i8(<2 x double> %a) #0 { 2031; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2032; SSE-32: # %bb.0: 2033; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 2034; SSE-32-NEXT: packssdw %xmm0, %xmm0 2035; SSE-32-NEXT: packsswb %xmm0, %xmm0 2036; SSE-32-NEXT: retl 2037; 2038; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2039; SSE-64: # %bb.0: 2040; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 2041; SSE-64-NEXT: packssdw %xmm0, %xmm0 2042; SSE-64-NEXT: packsswb %xmm0, %xmm0 2043; SSE-64-NEXT: retq 2044; 2045; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2046; AVX: # %bb.0: 2047; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2048; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2049; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2050; AVX-NEXT: ret{{[l|q]}} 2051; 2052; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2053; AVX512F: # %bb.0: 2054; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2055; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2056; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2057; AVX512F-NEXT: ret{{[l|q]}} 2058; 2059; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2060; AVX512VL: # %bb.0: 2061; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2062; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2063; AVX512VL-NEXT: ret{{[l|q]}} 2064; 2065; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2066; AVX512DQ: # %bb.0: 2067; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2068; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2069; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2070; AVX512DQ-NEXT: ret{{[l|q]}} 2071; 2072; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8: 2073; AVX512VLDQ: # %bb.0: 2074; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2075; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2076; AVX512VLDQ-NEXT: ret{{[l|q]}} 2077 %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f64(<2 x double> %a, 2078 metadata !"fpexcept.strict") #0 2079 ret <2 x i8> %ret 2080} 2081 2082define <2 x i8> @strict_vector_fptoui_v2f64_to_v2i8(<2 x double> %a) #0 { 2083; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2084; SSE-32: # %bb.0: 2085; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0 2086; SSE-32-NEXT: packuswb %xmm0, %xmm0 2087; SSE-32-NEXT: packuswb %xmm0, %xmm0 2088; SSE-32-NEXT: retl 2089; 2090; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2091; SSE-64: # %bb.0: 2092; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0 2093; SSE-64-NEXT: packuswb %xmm0, %xmm0 2094; SSE-64-NEXT: packuswb %xmm0, %xmm0 2095; SSE-64-NEXT: retq 2096; 2097; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2098; AVX: # %bb.0: 2099; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2100; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2101; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2102; AVX-NEXT: ret{{[l|q]}} 2103; 2104; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2105; AVX512F: # %bb.0: 2106; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2107; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2108; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2109; AVX512F-NEXT: ret{{[l|q]}} 2110; 2111; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2112; AVX512VL: # %bb.0: 2113; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2114; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2115; AVX512VL-NEXT: ret{{[l|q]}} 2116; 2117; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2118; AVX512DQ: # %bb.0: 2119; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2120; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2121; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2122; AVX512DQ-NEXT: ret{{[l|q]}} 2123; 2124; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8: 2125; AVX512VLDQ: # %bb.0: 2126; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2127; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2128; AVX512VLDQ-NEXT: ret{{[l|q]}} 2129 %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f64(<2 x double> %a, 2130 metadata !"fpexcept.strict") #0 2131 ret <2 x i8> %ret 2132} 2133 2134define <2 x i8> @strict_vector_fptosi_v2f32_to_v2i8(<2 x float> %a) #0 { 2135; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2136; SSE-32: # %bb.0: 2137; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2138; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 2139; SSE-32-NEXT: packssdw %xmm0, %xmm0 2140; SSE-32-NEXT: packsswb %xmm0, %xmm0 2141; SSE-32-NEXT: retl 2142; 2143; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2144; SSE-64: # %bb.0: 2145; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2146; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 2147; SSE-64-NEXT: packssdw %xmm0, %xmm0 2148; SSE-64-NEXT: packsswb %xmm0, %xmm0 2149; SSE-64-NEXT: retq 2150; 2151; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2152; AVX: # %bb.0: 2153; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2154; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2155; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2156; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2157; AVX-NEXT: ret{{[l|q]}} 2158; 2159; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2160; AVX512F: # %bb.0: 2161; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2162; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2163; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2164; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2165; AVX512F-NEXT: ret{{[l|q]}} 2166; 2167; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2168; AVX512VL: # %bb.0: 2169; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2170; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2171; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2172; AVX512VL-NEXT: ret{{[l|q]}} 2173; 2174; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2175; AVX512DQ: # %bb.0: 2176; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2177; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2178; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2179; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2180; AVX512DQ-NEXT: ret{{[l|q]}} 2181; 2182; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8: 2183; AVX512VLDQ: # %bb.0: 2184; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2185; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2186; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2187; AVX512VLDQ-NEXT: ret{{[l|q]}} 2188 %ret = call <2 x i8> @llvm.experimental.constrained.fptosi.v2i8.v2f32(<2 x float> %a, 2189 metadata !"fpexcept.strict") #0 2190 ret <2 x i8> %ret 2191} 2192 2193define <2 x i8> @strict_vector_fptoui_v2f32_to_v2i8(<2 x float> %a) #0 { 2194; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2195; SSE-32: # %bb.0: 2196; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2197; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 2198; SSE-32-NEXT: packuswb %xmm0, %xmm0 2199; SSE-32-NEXT: packuswb %xmm0, %xmm0 2200; SSE-32-NEXT: retl 2201; 2202; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2203; SSE-64: # %bb.0: 2204; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 2205; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 2206; SSE-64-NEXT: packuswb %xmm0, %xmm0 2207; SSE-64-NEXT: packuswb %xmm0, %xmm0 2208; SSE-64-NEXT: retq 2209; 2210; AVX-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2211; AVX: # %bb.0: 2212; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2213; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2214; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2215; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2216; AVX-NEXT: ret{{[l|q]}} 2217; 2218; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2219; AVX512F: # %bb.0: 2220; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2221; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2222; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2223; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2224; AVX512F-NEXT: ret{{[l|q]}} 2225; 2226; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2227; AVX512VL: # %bb.0: 2228; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2229; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2230; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2231; AVX512VL-NEXT: ret{{[l|q]}} 2232; 2233; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2234; AVX512DQ: # %bb.0: 2235; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2236; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2237; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2238; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2239; AVX512DQ-NEXT: ret{{[l|q]}} 2240; 2241; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8: 2242; AVX512VLDQ: # %bb.0: 2243; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2244; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2245; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2246; AVX512VLDQ-NEXT: ret{{[l|q]}} 2247 %ret = call <2 x i8> @llvm.experimental.constrained.fptoui.v2i8.v2f32(<2 x float> %a, 2248 metadata !"fpexcept.strict") #0 2249 ret <2 x i8> %ret 2250} 2251 2252define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 { 2253; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2254; SSE-32: # %bb.0: 2255; SSE-32-NEXT: pushl %ebp 2256; SSE-32-NEXT: .cfi_def_cfa_offset 8 2257; SSE-32-NEXT: .cfi_offset %ebp, -8 2258; SSE-32-NEXT: movl %esp, %ebp 2259; SSE-32-NEXT: .cfi_def_cfa_register %ebp 2260; SSE-32-NEXT: andl $-8, %esp 2261; SSE-32-NEXT: subl $24, %esp 2262; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) 2263; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 2264; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 2265; SSE-32-NEXT: wait 2266; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 2267; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 2268; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 2269; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 2270; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2271; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2272; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2273; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 2274; SSE-32-NEXT: wait 2275; SSE-32-NEXT: fnstcw (%esp) 2276; SSE-32-NEXT: movzwl (%esp), %eax 2277; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 2278; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 2279; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2280; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2281; SSE-32-NEXT: fldcw (%esp) 2282; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2283; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2284; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2285; SSE-32-NEXT: movl %ebp, %esp 2286; SSE-32-NEXT: popl %ebp 2287; SSE-32-NEXT: .cfi_def_cfa %esp, 4 2288; SSE-32-NEXT: retl 2289; 2290; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2291; SSE-64: # %bb.0: 2292; SSE-64-NEXT: cvttsd2si %xmm0, %rax 2293; SSE-64-NEXT: movq %rax, %xmm1 2294; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 2295; SSE-64-NEXT: cvttsd2si %xmm0, %rax 2296; SSE-64-NEXT: movq %rax, %xmm0 2297; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2298; SSE-64-NEXT: movdqa %xmm1, %xmm0 2299; SSE-64-NEXT: retq 2300; 2301; AVX-32-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2302; AVX-32: # %bb.0: 2303; AVX-32-NEXT: pushl %ebp 2304; AVX-32-NEXT: .cfi_def_cfa_offset 8 2305; AVX-32-NEXT: .cfi_offset %ebp, -8 2306; AVX-32-NEXT: movl %esp, %ebp 2307; AVX-32-NEXT: .cfi_def_cfa_register %ebp 2308; AVX-32-NEXT: andl $-8, %esp 2309; AVX-32-NEXT: subl $16, %esp 2310; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 2311; AVX-32-NEXT: vmovhps %xmm0, (%esp) 2312; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 2313; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 2314; AVX-32-NEXT: fldl (%esp) 2315; AVX-32-NEXT: fisttpll (%esp) 2316; AVX-32-NEXT: wait 2317; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2318; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2319; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 2320; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2321; AVX-32-NEXT: movl %ebp, %esp 2322; AVX-32-NEXT: popl %ebp 2323; AVX-32-NEXT: .cfi_def_cfa %esp, 4 2324; AVX-32-NEXT: retl 2325; 2326; AVX-64-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2327; AVX-64: # %bb.0: 2328; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 2329; AVX-64-NEXT: vmovq %rax, %xmm1 2330; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 2331; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 2332; AVX-64-NEXT: vmovq %rax, %xmm0 2333; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2334; AVX-64-NEXT: retq 2335; 2336; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2337; AVX512F: # %bb.0: 2338; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2339; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 2340; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 2341; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2342; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2343; AVX512F-NEXT: vzeroupper 2344; AVX512F-NEXT: ret{{[l|q]}} 2345; 2346; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2347; AVX512VL: # %bb.0: 2348; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2349; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 2350; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 2351; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 2352; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 2353; AVX512VL-NEXT: ret{{[l|q]}} 2354; 2355; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2356; AVX512DQ: # %bb.0: 2357; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2358; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 2359; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 2360; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2361; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2362; AVX512DQ-NEXT: vzeroupper 2363; AVX512DQ-NEXT: ret{{[l|q]}} 2364; 2365; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i1: 2366; AVX512VLDQ: # %bb.0: 2367; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2368; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2369; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0 2370; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0 2371; AVX512VLDQ-NEXT: ret{{[l|q]}} 2372 %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f64(<2 x double> %a, 2373 metadata !"fpexcept.strict") #0 2374 ret <2 x i1> %ret 2375} 2376 2377define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 { 2378; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2379; SSE-32: # %bb.0: 2380; SSE-32-NEXT: pushl %ebp 2381; SSE-32-NEXT: .cfi_def_cfa_offset 8 2382; SSE-32-NEXT: .cfi_offset %ebp, -8 2383; SSE-32-NEXT: movl %esp, %ebp 2384; SSE-32-NEXT: .cfi_def_cfa_register %ebp 2385; SSE-32-NEXT: andl $-8, %esp 2386; SSE-32-NEXT: subl $24, %esp 2387; SSE-32-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 2388; SSE-32-NEXT: comisd %xmm1, %xmm0 2389; SSE-32-NEXT: movapd %xmm1, %xmm2 2390; SSE-32-NEXT: jae .LBB19_2 2391; SSE-32-NEXT: # %bb.1: 2392; SSE-32-NEXT: xorpd %xmm2, %xmm2 2393; SSE-32-NEXT: .LBB19_2: 2394; SSE-32-NEXT: movapd %xmm0, %xmm3 2395; SSE-32-NEXT: subsd %xmm2, %xmm3 2396; SSE-32-NEXT: movsd %xmm3, {{[0-9]+}}(%esp) 2397; SSE-32-NEXT: setae %al 2398; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 2399; SSE-32-NEXT: wait 2400; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 2401; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 2402; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 2403; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp) 2404; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2405; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2406; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2407; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 2408; SSE-32-NEXT: comisd %xmm1, %xmm0 2409; SSE-32-NEXT: jae .LBB19_4 2410; SSE-32-NEXT: # %bb.3: 2411; SSE-32-NEXT: xorpd %xmm1, %xmm1 2412; SSE-32-NEXT: .LBB19_4: 2413; SSE-32-NEXT: subsd %xmm1, %xmm0 2414; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) 2415; SSE-32-NEXT: setae %cl 2416; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) 2417; SSE-32-NEXT: wait 2418; SSE-32-NEXT: fnstcw (%esp) 2419; SSE-32-NEXT: movzwl (%esp), %edx 2420; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 2421; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp) 2422; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2423; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2424; SSE-32-NEXT: fldcw (%esp) 2425; SSE-32-NEXT: movzbl %al, %eax 2426; SSE-32-NEXT: shll $31, %eax 2427; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2428; SSE-32-NEXT: movd %eax, %xmm1 2429; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2430; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2431; SSE-32-NEXT: movzbl %cl, %eax 2432; SSE-32-NEXT: shll $31, %eax 2433; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2434; SSE-32-NEXT: movd %eax, %xmm1 2435; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2436; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2437; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2438; SSE-32-NEXT: movl %ebp, %esp 2439; SSE-32-NEXT: popl %ebp 2440; SSE-32-NEXT: .cfi_def_cfa %esp, 4 2441; SSE-32-NEXT: retl 2442; 2443; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2444; SSE-64: # %bb.0: 2445; SSE-64-NEXT: movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0] 2446; SSE-64-NEXT: comisd %xmm3, %xmm0 2447; SSE-64-NEXT: xorpd %xmm2, %xmm2 2448; SSE-64-NEXT: xorpd %xmm1, %xmm1 2449; SSE-64-NEXT: jb .LBB19_2 2450; SSE-64-NEXT: # %bb.1: 2451; SSE-64-NEXT: movapd %xmm3, %xmm1 2452; SSE-64-NEXT: .LBB19_2: 2453; SSE-64-NEXT: movapd %xmm0, %xmm4 2454; SSE-64-NEXT: subsd %xmm1, %xmm4 2455; SSE-64-NEXT: cvttsd2si %xmm4, %rax 2456; SSE-64-NEXT: setae %cl 2457; SSE-64-NEXT: movzbl %cl, %ecx 2458; SSE-64-NEXT: shlq $63, %rcx 2459; SSE-64-NEXT: xorq %rax, %rcx 2460; SSE-64-NEXT: movq %rcx, %xmm1 2461; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 2462; SSE-64-NEXT: comisd %xmm3, %xmm0 2463; SSE-64-NEXT: jb .LBB19_4 2464; SSE-64-NEXT: # %bb.3: 2465; SSE-64-NEXT: movapd %xmm3, %xmm2 2466; SSE-64-NEXT: .LBB19_4: 2467; SSE-64-NEXT: subsd %xmm2, %xmm0 2468; SSE-64-NEXT: cvttsd2si %xmm0, %rax 2469; SSE-64-NEXT: setae %cl 2470; SSE-64-NEXT: movzbl %cl, %ecx 2471; SSE-64-NEXT: shlq $63, %rcx 2472; SSE-64-NEXT: xorq %rax, %rcx 2473; SSE-64-NEXT: movq %rcx, %xmm0 2474; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2475; SSE-64-NEXT: movdqa %xmm1, %xmm0 2476; SSE-64-NEXT: retq 2477; 2478; AVX-32-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2479; AVX-32: # %bb.0: 2480; AVX-32-NEXT: pushl %ebp 2481; AVX-32-NEXT: .cfi_def_cfa_offset 8 2482; AVX-32-NEXT: .cfi_offset %ebp, -8 2483; AVX-32-NEXT: movl %esp, %ebp 2484; AVX-32-NEXT: .cfi_def_cfa_register %ebp 2485; AVX-32-NEXT: andl $-8, %esp 2486; AVX-32-NEXT: subl $16, %esp 2487; AVX-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 2488; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 2489; AVX-32-NEXT: vcomisd %xmm1, %xmm2 2490; AVX-32-NEXT: vmovapd %xmm1, %xmm3 2491; AVX-32-NEXT: jae .LBB19_2 2492; AVX-32-NEXT: # %bb.1: 2493; AVX-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2494; AVX-32-NEXT: .LBB19_2: 2495; AVX-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 2496; AVX-32-NEXT: vmovsd %xmm2, (%esp) 2497; AVX-32-NEXT: fldl (%esp) 2498; AVX-32-NEXT: fisttpll (%esp) 2499; AVX-32-NEXT: wait 2500; AVX-32-NEXT: setae %al 2501; AVX-32-NEXT: movzbl %al, %eax 2502; AVX-32-NEXT: shll $31, %eax 2503; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2504; AVX-32-NEXT: vcomisd %xmm1, %xmm0 2505; AVX-32-NEXT: jae .LBB19_4 2506; AVX-32-NEXT: # %bb.3: 2507; AVX-32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 2508; AVX-32-NEXT: .LBB19_4: 2509; AVX-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 2510; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 2511; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) 2512; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 2513; AVX-32-NEXT: wait 2514; AVX-32-NEXT: setae %cl 2515; AVX-32-NEXT: movzbl %cl, %ecx 2516; AVX-32-NEXT: shll $31, %ecx 2517; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 2518; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2519; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2520; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 2521; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 2522; AVX-32-NEXT: movl %ebp, %esp 2523; AVX-32-NEXT: popl %ebp 2524; AVX-32-NEXT: .cfi_def_cfa %esp, 4 2525; AVX-32-NEXT: retl 2526; 2527; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2528; AVX-64: # %bb.0: 2529; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 2530; AVX-64-NEXT: vcomisd %xmm1, %xmm0 2531; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2 2532; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3 2533; AVX-64-NEXT: jb .LBB19_2 2534; AVX-64-NEXT: # %bb.1: 2535; AVX-64-NEXT: vmovapd %xmm1, %xmm3 2536; AVX-64-NEXT: .LBB19_2: 2537; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3 2538; AVX-64-NEXT: vcvttsd2si %xmm3, %rax 2539; AVX-64-NEXT: setae %cl 2540; AVX-64-NEXT: movzbl %cl, %ecx 2541; AVX-64-NEXT: shlq $63, %rcx 2542; AVX-64-NEXT: xorq %rax, %rcx 2543; AVX-64-NEXT: vmovq %rcx, %xmm3 2544; AVX-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 2545; AVX-64-NEXT: vcomisd %xmm1, %xmm0 2546; AVX-64-NEXT: jb .LBB19_4 2547; AVX-64-NEXT: # %bb.3: 2548; AVX-64-NEXT: vmovapd %xmm1, %xmm2 2549; AVX-64-NEXT: .LBB19_4: 2550; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0 2551; AVX-64-NEXT: vcvttsd2si %xmm0, %rax 2552; AVX-64-NEXT: setae %cl 2553; AVX-64-NEXT: movzbl %cl, %ecx 2554; AVX-64-NEXT: shlq $63, %rcx 2555; AVX-64-NEXT: xorq %rax, %rcx 2556; AVX-64-NEXT: vmovq %rcx, %xmm0 2557; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 2558; AVX-64-NEXT: retq 2559; 2560; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2561; AVX512F: # %bb.0: 2562; AVX512F-NEXT: vmovaps %xmm0, %xmm0 2563; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 2564; AVX512F-NEXT: vpslld $31, %ymm0, %ymm0 2565; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 2566; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2567; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2568; AVX512F-NEXT: vzeroupper 2569; AVX512F-NEXT: ret{{[l|q]}} 2570; 2571; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2572; AVX512VL: # %bb.0: 2573; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 2574; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 2575; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 2576; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 2577; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 2578; AVX512VL-NEXT: ret{{[l|q]}} 2579; 2580; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2581; AVX512DQ: # %bb.0: 2582; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 2583; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 2584; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 2585; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 2586; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2587; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2588; AVX512DQ-NEXT: vzeroupper 2589; AVX512DQ-NEXT: ret{{[l|q]}} 2590; 2591; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i1: 2592; AVX512VLDQ: # %bb.0: 2593; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 2594; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 2595; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0 2596; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0 2597; AVX512VLDQ-NEXT: ret{{[l|q]}} 2598 %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f64(<2 x double> %a, 2599 metadata !"fpexcept.strict") #0 2600 ret <2 x i1> %ret 2601} 2602 2603define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 { 2604; SSE-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2605; SSE-32: # %bb.0: 2606; SSE-32-NEXT: pushl %ebp 2607; SSE-32-NEXT: .cfi_def_cfa_offset 8 2608; SSE-32-NEXT: .cfi_offset %ebp, -8 2609; SSE-32-NEXT: movl %esp, %ebp 2610; SSE-32-NEXT: .cfi_def_cfa_register %ebp 2611; SSE-32-NEXT: andl $-8, %esp 2612; SSE-32-NEXT: subl $24, %esp 2613; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2614; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2615; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2616; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 2617; SSE-32-NEXT: wait 2618; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 2619; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax 2620; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 2621; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 2622; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2623; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2624; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2625; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 2626; SSE-32-NEXT: wait 2627; SSE-32-NEXT: fnstcw (%esp) 2628; SSE-32-NEXT: movzwl (%esp), %eax 2629; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 2630; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp) 2631; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2632; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2633; SSE-32-NEXT: fldcw (%esp) 2634; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 2635; SSE-32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2636; SSE-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2637; SSE-32-NEXT: movl %ebp, %esp 2638; SSE-32-NEXT: popl %ebp 2639; SSE-32-NEXT: .cfi_def_cfa %esp, 4 2640; SSE-32-NEXT: retl 2641; 2642; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2643; SSE-64: # %bb.0: 2644; SSE-64-NEXT: cvttss2si %xmm0, %rax 2645; SSE-64-NEXT: movq %rax, %xmm1 2646; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2647; SSE-64-NEXT: cvttss2si %xmm0, %rax 2648; SSE-64-NEXT: movq %rax, %xmm0 2649; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2650; SSE-64-NEXT: movdqa %xmm1, %xmm0 2651; SSE-64-NEXT: retq 2652; 2653; AVX-32-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2654; AVX-32: # %bb.0: 2655; AVX-32-NEXT: pushl %ebp 2656; AVX-32-NEXT: .cfi_def_cfa_offset 8 2657; AVX-32-NEXT: .cfi_offset %ebp, -8 2658; AVX-32-NEXT: movl %esp, %ebp 2659; AVX-32-NEXT: .cfi_def_cfa_register %ebp 2660; AVX-32-NEXT: andl $-8, %esp 2661; AVX-32-NEXT: subl $16, %esp 2662; AVX-32-NEXT: vmovss %xmm0, (%esp) 2663; AVX-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 2664; AVX-32-NEXT: flds (%esp) 2665; AVX-32-NEXT: fisttpll (%esp) 2666; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 2667; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 2668; AVX-32-NEXT: wait 2669; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2670; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 2671; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2672; AVX-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 2673; AVX-32-NEXT: movl %ebp, %esp 2674; AVX-32-NEXT: popl %ebp 2675; AVX-32-NEXT: .cfi_def_cfa %esp, 4 2676; AVX-32-NEXT: retl 2677; 2678; AVX-64-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2679; AVX-64: # %bb.0: 2680; AVX-64-NEXT: vcvttss2si %xmm0, %rax 2681; AVX-64-NEXT: vmovq %rax, %xmm1 2682; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2683; AVX-64-NEXT: vcvttss2si %xmm0, %rax 2684; AVX-64-NEXT: vmovq %rax, %xmm0 2685; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2686; AVX-64-NEXT: retq 2687; 2688; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2689; AVX512F: # %bb.0: 2690; AVX512F-NEXT: vcvttss2si %xmm0, %eax 2691; AVX512F-NEXT: andl $1, %eax 2692; AVX512F-NEXT: kmovw %eax, %k0 2693; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2694; AVX512F-NEXT: vcvttss2si %xmm0, %eax 2695; AVX512F-NEXT: kmovw %eax, %k1 2696; AVX512F-NEXT: kshiftlw $1, %k1, %k1 2697; AVX512F-NEXT: korw %k1, %k0, %k1 2698; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2699; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2700; AVX512F-NEXT: vzeroupper 2701; AVX512F-NEXT: ret{{[l|q]}} 2702; 2703; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2704; AVX512VL: # %bb.0: 2705; AVX512VL-NEXT: vcvttss2si %xmm0, %eax 2706; AVX512VL-NEXT: andl $1, %eax 2707; AVX512VL-NEXT: kmovw %eax, %k0 2708; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2709; AVX512VL-NEXT: vcvttss2si %xmm0, %eax 2710; AVX512VL-NEXT: kmovw %eax, %k1 2711; AVX512VL-NEXT: kshiftlw $1, %k1, %k1 2712; AVX512VL-NEXT: korw %k1, %k0, %k1 2713; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 2714; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 2715; AVX512VL-NEXT: ret{{[l|q]}} 2716; 2717; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2718; AVX512DQ: # %bb.0: 2719; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2720; AVX512DQ-NEXT: vcvttss2si %xmm1, %eax 2721; AVX512DQ-NEXT: kmovw %eax, %k0 2722; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 2723; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax 2724; AVX512DQ-NEXT: kmovw %eax, %k1 2725; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 2726; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1 2727; AVX512DQ-NEXT: korw %k0, %k1, %k0 2728; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2729; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2730; AVX512DQ-NEXT: vzeroupper 2731; AVX512DQ-NEXT: ret{{[l|q]}} 2732; 2733; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i1: 2734; AVX512VLDQ: # %bb.0: 2735; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2736; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax 2737; AVX512VLDQ-NEXT: kmovw %eax, %k0 2738; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 2739; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax 2740; AVX512VLDQ-NEXT: kmovw %eax, %k1 2741; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 2742; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1 2743; AVX512VLDQ-NEXT: korw %k0, %k1, %k0 2744; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0 2745; AVX512VLDQ-NEXT: ret{{[l|q]}} 2746 %ret = call <2 x i1> @llvm.experimental.constrained.fptosi.v2i1.v2f32(<2 x float> %a, 2747 metadata !"fpexcept.strict") #0 2748 ret <2 x i1> %ret 2749} 2750 2751define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 { 2752; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2753; SSE-32: # %bb.0: 2754; SSE-32-NEXT: pushl %ebp 2755; SSE-32-NEXT: .cfi_def_cfa_offset 8 2756; SSE-32-NEXT: .cfi_offset %ebp, -8 2757; SSE-32-NEXT: movl %esp, %ebp 2758; SSE-32-NEXT: .cfi_def_cfa_register %ebp 2759; SSE-32-NEXT: andl $-8, %esp 2760; SSE-32-NEXT: subl $24, %esp 2761; SSE-32-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2762; SSE-32-NEXT: comiss %xmm1, %xmm0 2763; SSE-32-NEXT: movaps %xmm1, %xmm2 2764; SSE-32-NEXT: jae .LBB21_2 2765; SSE-32-NEXT: # %bb.1: 2766; SSE-32-NEXT: xorps %xmm2, %xmm2 2767; SSE-32-NEXT: .LBB21_2: 2768; SSE-32-NEXT: movaps %xmm0, %xmm3 2769; SSE-32-NEXT: subss %xmm2, %xmm3 2770; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp) 2771; SSE-32-NEXT: setae %al 2772; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 2773; SSE-32-NEXT: wait 2774; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) 2775; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 2776; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 2777; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp) 2778; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2779; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2780; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2781; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2782; SSE-32-NEXT: comiss %xmm1, %xmm0 2783; SSE-32-NEXT: jae .LBB21_4 2784; SSE-32-NEXT: # %bb.3: 2785; SSE-32-NEXT: xorps %xmm1, %xmm1 2786; SSE-32-NEXT: .LBB21_4: 2787; SSE-32-NEXT: subss %xmm1, %xmm0 2788; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 2789; SSE-32-NEXT: setae %cl 2790; SSE-32-NEXT: flds {{[0-9]+}}(%esp) 2791; SSE-32-NEXT: wait 2792; SSE-32-NEXT: fnstcw (%esp) 2793; SSE-32-NEXT: movzwl (%esp), %edx 2794; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 2795; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp) 2796; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) 2797; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) 2798; SSE-32-NEXT: fldcw (%esp) 2799; SSE-32-NEXT: movzbl %al, %eax 2800; SSE-32-NEXT: shll $31, %eax 2801; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2802; SSE-32-NEXT: movd %eax, %xmm1 2803; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2804; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2805; SSE-32-NEXT: movzbl %cl, %eax 2806; SSE-32-NEXT: shll $31, %eax 2807; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2808; SSE-32-NEXT: movd %eax, %xmm1 2809; SSE-32-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 2810; SSE-32-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2811; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 2812; SSE-32-NEXT: movl %ebp, %esp 2813; SSE-32-NEXT: popl %ebp 2814; SSE-32-NEXT: .cfi_def_cfa %esp, 4 2815; SSE-32-NEXT: retl 2816; 2817; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2818; SSE-64: # %bb.0: 2819; SSE-64-NEXT: movss {{.*#+}} xmm3 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2820; SSE-64-NEXT: comiss %xmm3, %xmm0 2821; SSE-64-NEXT: xorps %xmm2, %xmm2 2822; SSE-64-NEXT: xorps %xmm1, %xmm1 2823; SSE-64-NEXT: jb .LBB21_2 2824; SSE-64-NEXT: # %bb.1: 2825; SSE-64-NEXT: movaps %xmm3, %xmm1 2826; SSE-64-NEXT: .LBB21_2: 2827; SSE-64-NEXT: movaps %xmm0, %xmm4 2828; SSE-64-NEXT: subss %xmm1, %xmm4 2829; SSE-64-NEXT: cvttss2si %xmm4, %rax 2830; SSE-64-NEXT: setae %cl 2831; SSE-64-NEXT: movzbl %cl, %ecx 2832; SSE-64-NEXT: shlq $63, %rcx 2833; SSE-64-NEXT: xorq %rax, %rcx 2834; SSE-64-NEXT: movq %rcx, %xmm1 2835; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 2836; SSE-64-NEXT: comiss %xmm3, %xmm0 2837; SSE-64-NEXT: jb .LBB21_4 2838; SSE-64-NEXT: # %bb.3: 2839; SSE-64-NEXT: movaps %xmm3, %xmm2 2840; SSE-64-NEXT: .LBB21_4: 2841; SSE-64-NEXT: subss %xmm2, %xmm0 2842; SSE-64-NEXT: cvttss2si %xmm0, %rax 2843; SSE-64-NEXT: setae %cl 2844; SSE-64-NEXT: movzbl %cl, %ecx 2845; SSE-64-NEXT: shlq $63, %rcx 2846; SSE-64-NEXT: xorq %rax, %rcx 2847; SSE-64-NEXT: movq %rcx, %xmm0 2848; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 2849; SSE-64-NEXT: movdqa %xmm1, %xmm0 2850; SSE-64-NEXT: retq 2851; 2852; AVX-32-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2853; AVX-32: # %bb.0: 2854; AVX-32-NEXT: pushl %ebp 2855; AVX-32-NEXT: .cfi_def_cfa_offset 8 2856; AVX-32-NEXT: .cfi_offset %ebp, -8 2857; AVX-32-NEXT: movl %esp, %ebp 2858; AVX-32-NEXT: .cfi_def_cfa_register %ebp 2859; AVX-32-NEXT: andl $-8, %esp 2860; AVX-32-NEXT: subl $16, %esp 2861; AVX-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 2862; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2863; AVX-32-NEXT: vcomiss %xmm1, %xmm2 2864; AVX-32-NEXT: vmovaps %xmm1, %xmm3 2865; AVX-32-NEXT: jae .LBB21_2 2866; AVX-32-NEXT: # %bb.1: 2867; AVX-32-NEXT: vxorps %xmm3, %xmm3, %xmm3 2868; AVX-32-NEXT: .LBB21_2: 2869; AVX-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 2870; AVX-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 2871; AVX-32-NEXT: flds {{[0-9]+}}(%esp) 2872; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) 2873; AVX-32-NEXT: wait 2874; AVX-32-NEXT: setae %al 2875; AVX-32-NEXT: movzbl %al, %eax 2876; AVX-32-NEXT: shll $31, %eax 2877; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2878; AVX-32-NEXT: vcomiss %xmm1, %xmm0 2879; AVX-32-NEXT: jae .LBB21_4 2880; AVX-32-NEXT: # %bb.3: 2881; AVX-32-NEXT: vxorps %xmm1, %xmm1, %xmm1 2882; AVX-32-NEXT: .LBB21_4: 2883; AVX-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 2884; AVX-32-NEXT: vmovss %xmm0, (%esp) 2885; AVX-32-NEXT: flds (%esp) 2886; AVX-32-NEXT: fisttpll (%esp) 2887; AVX-32-NEXT: wait 2888; AVX-32-NEXT: setae %cl 2889; AVX-32-NEXT: movzbl %cl, %ecx 2890; AVX-32-NEXT: shll $31, %ecx 2891; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 2892; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2893; AVX-32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2894; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 2895; AVX-32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 2896; AVX-32-NEXT: movl %ebp, %esp 2897; AVX-32-NEXT: popl %ebp 2898; AVX-32-NEXT: .cfi_def_cfa %esp, 4 2899; AVX-32-NEXT: retl 2900; 2901; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2902; AVX-64: # %bb.0: 2903; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2904; AVX-64-NEXT: vcomiss %xmm1, %xmm0 2905; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2 2906; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3 2907; AVX-64-NEXT: jb .LBB21_2 2908; AVX-64-NEXT: # %bb.1: 2909; AVX-64-NEXT: vmovaps %xmm1, %xmm3 2910; AVX-64-NEXT: .LBB21_2: 2911; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3 2912; AVX-64-NEXT: vcvttss2si %xmm3, %rax 2913; AVX-64-NEXT: setae %cl 2914; AVX-64-NEXT: movzbl %cl, %ecx 2915; AVX-64-NEXT: shlq $63, %rcx 2916; AVX-64-NEXT: xorq %rax, %rcx 2917; AVX-64-NEXT: vmovq %rcx, %xmm3 2918; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2919; AVX-64-NEXT: vcomiss %xmm1, %xmm0 2920; AVX-64-NEXT: jb .LBB21_4 2921; AVX-64-NEXT: # %bb.3: 2922; AVX-64-NEXT: vmovaps %xmm1, %xmm2 2923; AVX-64-NEXT: .LBB21_4: 2924; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0 2925; AVX-64-NEXT: vcvttss2si %xmm0, %rax 2926; AVX-64-NEXT: setae %cl 2927; AVX-64-NEXT: movzbl %cl, %ecx 2928; AVX-64-NEXT: shlq $63, %rcx 2929; AVX-64-NEXT: xorq %rax, %rcx 2930; AVX-64-NEXT: vmovq %rcx, %xmm0 2931; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 2932; AVX-64-NEXT: retq 2933; 2934; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2935; AVX512F: # %bb.0: 2936; AVX512F-NEXT: vcvttss2si %xmm0, %eax 2937; AVX512F-NEXT: andl $1, %eax 2938; AVX512F-NEXT: kmovw %eax, %k0 2939; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2940; AVX512F-NEXT: vcvttss2si %xmm0, %eax 2941; AVX512F-NEXT: kmovw %eax, %k1 2942; AVX512F-NEXT: kshiftlw $1, %k1, %k1 2943; AVX512F-NEXT: korw %k1, %k0, %k1 2944; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2945; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2946; AVX512F-NEXT: vzeroupper 2947; AVX512F-NEXT: ret{{[l|q]}} 2948; 2949; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2950; AVX512VL: # %bb.0: 2951; AVX512VL-NEXT: vcvttss2si %xmm0, %eax 2952; AVX512VL-NEXT: andl $1, %eax 2953; AVX512VL-NEXT: kmovw %eax, %k0 2954; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2955; AVX512VL-NEXT: vcvttss2si %xmm0, %eax 2956; AVX512VL-NEXT: kmovw %eax, %k1 2957; AVX512VL-NEXT: kshiftlw $1, %k1, %k1 2958; AVX512VL-NEXT: korw %k1, %k0, %k1 2959; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 2960; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z} 2961; AVX512VL-NEXT: ret{{[l|q]}} 2962; 2963; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2964; AVX512DQ: # %bb.0: 2965; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2966; AVX512DQ-NEXT: vcvttss2si %xmm1, %eax 2967; AVX512DQ-NEXT: kmovw %eax, %k0 2968; AVX512DQ-NEXT: kshiftlb $1, %k0, %k0 2969; AVX512DQ-NEXT: vcvttss2si %xmm0, %eax 2970; AVX512DQ-NEXT: kmovw %eax, %k1 2971; AVX512DQ-NEXT: kshiftlb $7, %k1, %k1 2972; AVX512DQ-NEXT: kshiftrb $7, %k1, %k1 2973; AVX512DQ-NEXT: korw %k0, %k1, %k0 2974; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 2975; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2976; AVX512DQ-NEXT: vzeroupper 2977; AVX512DQ-NEXT: ret{{[l|q]}} 2978; 2979; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i1: 2980; AVX512VLDQ: # %bb.0: 2981; AVX512VLDQ-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2982; AVX512VLDQ-NEXT: vcvttss2si %xmm1, %eax 2983; AVX512VLDQ-NEXT: kmovw %eax, %k0 2984; AVX512VLDQ-NEXT: kshiftlb $1, %k0, %k0 2985; AVX512VLDQ-NEXT: vcvttss2si %xmm0, %eax 2986; AVX512VLDQ-NEXT: kmovw %eax, %k1 2987; AVX512VLDQ-NEXT: kshiftlb $7, %k1, %k1 2988; AVX512VLDQ-NEXT: kshiftrb $7, %k1, %k1 2989; AVX512VLDQ-NEXT: korw %k0, %k1, %k0 2990; AVX512VLDQ-NEXT: vpmovm2q %k0, %xmm0 2991; AVX512VLDQ-NEXT: ret{{[l|q]}} 2992 %ret = call <2 x i1> @llvm.experimental.constrained.fptoui.v2i1.v2f32(<2 x float> %a, 2993 metadata !"fpexcept.strict") #0 2994 ret <2 x i1> %ret 2995} 2996 2997define <4 x i32> @strict_vector_fptosi_v4f32_to_v4i32(<4 x float> %a) #0 { 2998; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 2999; SSE-32: # %bb.0: 3000; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3001; SSE-32-NEXT: retl 3002; 3003; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3004; SSE-64: # %bb.0: 3005; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3006; SSE-64-NEXT: retq 3007; 3008; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3009; AVX: # %bb.0: 3010; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3011; AVX-NEXT: ret{{[l|q]}} 3012; 3013; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3014; AVX512F: # %bb.0: 3015; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 3016; AVX512F-NEXT: ret{{[l|q]}} 3017; 3018; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3019; AVX512VL: # %bb.0: 3020; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 3021; AVX512VL-NEXT: ret{{[l|q]}} 3022; 3023; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3024; AVX512DQ: # %bb.0: 3025; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 3026; AVX512DQ-NEXT: ret{{[l|q]}} 3027; 3028; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i32: 3029; AVX512VLDQ: # %bb.0: 3030; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 3031; AVX512VLDQ-NEXT: ret{{[l|q]}} 3032 %ret = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float> %a, 3033 metadata !"fpexcept.strict") #0 3034 ret <4 x i32> %ret 3035} 3036 3037define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 { 3038; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3039; SSE-32: # %bb.0: 3040; SSE-32-NEXT: movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 3041; SSE-32-NEXT: movaps %xmm0, %xmm2 3042; SSE-32-NEXT: cmpltps %xmm1, %xmm2 3043; SSE-32-NEXT: movaps %xmm2, %xmm3 3044; SSE-32-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 3045; SSE-32-NEXT: andnps %xmm1, %xmm2 3046; SSE-32-NEXT: subps %xmm2, %xmm0 3047; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3048; SSE-32-NEXT: xorps %xmm3, %xmm0 3049; SSE-32-NEXT: retl 3050; 3051; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3052; SSE-64: # %bb.0: 3053; SSE-64-NEXT: movaps {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 3054; SSE-64-NEXT: movaps %xmm0, %xmm2 3055; SSE-64-NEXT: cmpltps %xmm1, %xmm2 3056; SSE-64-NEXT: movaps %xmm2, %xmm3 3057; SSE-64-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3058; SSE-64-NEXT: andnps %xmm1, %xmm2 3059; SSE-64-NEXT: subps %xmm2, %xmm0 3060; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3061; SSE-64-NEXT: xorps %xmm3, %xmm0 3062; SSE-64-NEXT: retq 3063; 3064; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3065; AVX: # %bb.0: 3066; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 3067; AVX-NEXT: vcmpltps %xmm1, %xmm0, %xmm2 3068; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3 3069; AVX-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 3070; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4 3071; AVX-NEXT: vblendvps %xmm2, %xmm3, %xmm1, %xmm1 3072; AVX-NEXT: vsubps %xmm1, %xmm0, %xmm0 3073; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3074; AVX-NEXT: vxorps %xmm4, %xmm0, %xmm0 3075; AVX-NEXT: ret{{[l|q]}} 3076; 3077; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3078; AVX512F: # %bb.0: 3079; AVX512F-NEXT: vmovaps %xmm0, %xmm0 3080; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 3081; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3082; AVX512F-NEXT: vzeroupper 3083; AVX512F-NEXT: ret{{[l|q]}} 3084; 3085; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3086; AVX512VL: # %bb.0: 3087; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 3088; AVX512VL-NEXT: ret{{[l|q]}} 3089; 3090; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3091; AVX512DQ: # %bb.0: 3092; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 3093; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 3094; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3095; AVX512DQ-NEXT: vzeroupper 3096; AVX512DQ-NEXT: ret{{[l|q]}} 3097; 3098; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i32: 3099; AVX512VLDQ: # %bb.0: 3100; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 3101; AVX512VLDQ-NEXT: ret{{[l|q]}} 3102 %ret = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float> %a, 3103 metadata !"fpexcept.strict") #0 3104 ret <4 x i32> %ret 3105} 3106 3107define <4 x i8> @strict_vector_fptosi_v4f32_to_v4i8(<4 x float> %a) #0 { 3108; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3109; SSE-32: # %bb.0: 3110; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3111; SSE-32-NEXT: packssdw %xmm0, %xmm0 3112; SSE-32-NEXT: packsswb %xmm0, %xmm0 3113; SSE-32-NEXT: retl 3114; 3115; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3116; SSE-64: # %bb.0: 3117; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3118; SSE-64-NEXT: packssdw %xmm0, %xmm0 3119; SSE-64-NEXT: packsswb %xmm0, %xmm0 3120; SSE-64-NEXT: retq 3121; 3122; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3123; AVX: # %bb.0: 3124; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3125; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3126; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3127; AVX-NEXT: ret{{[l|q]}} 3128; 3129; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3130; AVX512F: # %bb.0: 3131; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 3132; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3133; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3134; AVX512F-NEXT: ret{{[l|q]}} 3135; 3136; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3137; AVX512VL: # %bb.0: 3138; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 3139; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 3140; AVX512VL-NEXT: ret{{[l|q]}} 3141; 3142; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3143; AVX512DQ: # %bb.0: 3144; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 3145; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 3146; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 3147; AVX512DQ-NEXT: ret{{[l|q]}} 3148; 3149; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i8: 3150; AVX512VLDQ: # %bb.0: 3151; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 3152; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 3153; AVX512VLDQ-NEXT: ret{{[l|q]}} 3154 %ret = call <4 x i8> @llvm.experimental.constrained.fptosi.v4i8.v4f32(<4 x float> %a, 3155 metadata !"fpexcept.strict") #0 3156 ret <4 x i8> %ret 3157} 3158 3159define <4 x i8> @strict_vector_fptoui_v4f32_to_v4i8(<4 x float> %a) #0 { 3160; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3161; SSE-32: # %bb.0: 3162; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3163; SSE-32-NEXT: packuswb %xmm0, %xmm0 3164; SSE-32-NEXT: packuswb %xmm0, %xmm0 3165; SSE-32-NEXT: retl 3166; 3167; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3168; SSE-64: # %bb.0: 3169; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3170; SSE-64-NEXT: packuswb %xmm0, %xmm0 3171; SSE-64-NEXT: packuswb %xmm0, %xmm0 3172; SSE-64-NEXT: retq 3173; 3174; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3175; AVX: # %bb.0: 3176; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3177; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 3178; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3179; AVX-NEXT: ret{{[l|q]}} 3180; 3181; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3182; AVX512F: # %bb.0: 3183; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 3184; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 3185; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3186; AVX512F-NEXT: ret{{[l|q]}} 3187; 3188; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3189; AVX512VL: # %bb.0: 3190; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 3191; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 3192; AVX512VL-NEXT: ret{{[l|q]}} 3193; 3194; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3195; AVX512DQ: # %bb.0: 3196; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 3197; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 3198; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 3199; AVX512DQ-NEXT: ret{{[l|q]}} 3200; 3201; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i8: 3202; AVX512VLDQ: # %bb.0: 3203; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 3204; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 3205; AVX512VLDQ-NEXT: ret{{[l|q]}} 3206 %ret = call <4 x i8> @llvm.experimental.constrained.fptoui.v4i8.v4f32(<4 x float> %a, 3207 metadata !"fpexcept.strict") #0 3208 ret <4 x i8> %ret 3209} 3210 3211define <4 x i1> @strict_vector_fptosi_v4f32_to_v4i1(<4 x float> %a) #0 { 3212; SSE-32-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3213; SSE-32: # %bb.0: 3214; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3215; SSE-32-NEXT: retl 3216; 3217; SSE-64-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3218; SSE-64: # %bb.0: 3219; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3220; SSE-64-NEXT: retq 3221; 3222; AVX-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3223; AVX: # %bb.0: 3224; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3225; AVX-NEXT: ret{{[l|q]}} 3226; 3227; AVX512F-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3228; AVX512F: # %bb.0: 3229; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 3230; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 3231; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 3232; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3233; AVX512F-NEXT: vzeroupper 3234; AVX512F-NEXT: ret{{[l|q]}} 3235; 3236; AVX512VL-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3237; AVX512VL: # %bb.0: 3238; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 3239; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 3240; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 3241; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 3242; AVX512VL-NEXT: ret{{[l|q]}} 3243; 3244; AVX512DQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3245; AVX512DQ: # %bb.0: 3246; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 3247; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 3248; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 3249; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3250; AVX512DQ-NEXT: vzeroupper 3251; AVX512DQ-NEXT: ret{{[l|q]}} 3252; 3253; AVX512VLDQ-LABEL: strict_vector_fptosi_v4f32_to_v4i1: 3254; AVX512VLDQ: # %bb.0: 3255; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 3256; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0 3257; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0 3258; AVX512VLDQ-NEXT: ret{{[l|q]}} 3259 %ret = call <4 x i1> @llvm.experimental.constrained.fptosi.v4i1.v4f32(<4 x float> %a, 3260 metadata !"fpexcept.strict") #0 3261 ret <4 x i1> %ret 3262} 3263 3264define <4 x i1> @strict_vector_fptoui_v4f32_to_v4i1(<4 x float> %a) #0 { 3265; SSE-32-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3266; SSE-32: # %bb.0: 3267; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0 3268; SSE-32-NEXT: retl 3269; 3270; SSE-64-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3271; SSE-64: # %bb.0: 3272; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0 3273; SSE-64-NEXT: retq 3274; 3275; AVX-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3276; AVX: # %bb.0: 3277; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 3278; AVX-NEXT: ret{{[l|q]}} 3279; 3280; AVX512F-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3281; AVX512F: # %bb.0: 3282; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 3283; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0 3284; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 3285; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 3286; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3287; AVX512F-NEXT: vzeroupper 3288; AVX512F-NEXT: ret{{[l|q]}} 3289; 3290; AVX512VL-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3291; AVX512VL: # %bb.0: 3292; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 3293; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0 3294; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1 3295; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 3296; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} 3297; AVX512VL-NEXT: ret{{[l|q]}} 3298; 3299; AVX512DQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3300; AVX512DQ: # %bb.0: 3301; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 3302; AVX512DQ-NEXT: vpslld $31, %xmm0, %xmm0 3303; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 3304; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 3305; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 3306; AVX512DQ-NEXT: vzeroupper 3307; AVX512DQ-NEXT: ret{{[l|q]}} 3308; 3309; AVX512VLDQ-LABEL: strict_vector_fptoui_v4f32_to_v4i1: 3310; AVX512VLDQ: # %bb.0: 3311; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 3312; AVX512VLDQ-NEXT: vpslld $31, %xmm0, %xmm0 3313; AVX512VLDQ-NEXT: vpmovd2m %xmm0, %k0 3314; AVX512VLDQ-NEXT: vpmovm2d %k0, %xmm0 3315; AVX512VLDQ-NEXT: ret{{[l|q]}} 3316 %ret = call <4 x i1> @llvm.experimental.constrained.fptoui.v4i1.v4f32(<4 x float> %a, 3317 metadata !"fpexcept.strict") #0 3318 ret <4 x i1> %ret 3319} 3320 3321attributes #0 = { strictfp } 3322