1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VL-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,AVX512DQ 6 7declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double>, metadata) 8declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double>, metadata) 9declare <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float>, metadata) 10declare <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float>, metadata) 11declare <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double>, metadata) 12declare <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double>, metadata) 13declare <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double>, metadata) 14declare <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double>, metadata) 15declare <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double>, metadata) 16declare <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double>, metadata) 17declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double>, metadata) 18declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double>, metadata) 19declare <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f32(<8 x float>, metadata) 20declare <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f32(<8 x float>, metadata) 21 22declare <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float>, metadata) 23declare <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float>, metadata) 24declare <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float>, metadata) 25declare <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float>, metadata) 26declare <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float>, metadata) 27declare <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float>, metadata) 28declare <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float>, metadata) 29declare <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float>, metadata) 30 31define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 { 32; AVX512VL-32-LABEL: strict_vector_fptosi_v8f64_to_v8i64: 33; AVX512VL-32: # %bb.0: 34; AVX512VL-32-NEXT: pushl %ebp 35; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 36; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 37; AVX512VL-32-NEXT: movl %esp, %ebp 38; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 39; AVX512VL-32-NEXT: andl $-8, %esp 40; AVX512VL-32-NEXT: subl $64, %esp 41; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 42; AVX512VL-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 43; AVX512VL-32-NEXT: vmovhps %xmm1, {{[0-9]+}}(%esp) 44; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1 45; AVX512VL-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 46; AVX512VL-32-NEXT: vmovhps %xmm1, (%esp) 47; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 48; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) 49; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm0 50; AVX512VL-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 51; AVX512VL-32-NEXT: vmovhps %xmm0, {{[0-9]+}}(%esp) 52; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 53; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 54; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 55; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 56; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 57; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 58; AVX512VL-32-NEXT: fldl (%esp) 59; AVX512VL-32-NEXT: fisttpll (%esp) 60; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 61; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 62; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 63; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 64; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 65; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 66; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 67; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 68; AVX512VL-32-NEXT: wait 69; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 70; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 71; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 72; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 73; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 74; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 75; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 76; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 77; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 78; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 79; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 80; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 81; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 82; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 83; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 84; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 85; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 86; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 87; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 88; AVX512VL-32-NEXT: movl %ebp, %esp 89; AVX512VL-32-NEXT: popl %ebp 90; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 91; AVX512VL-32-NEXT: retl 92; 93; AVX512VL-64-LABEL: strict_vector_fptosi_v8f64_to_v8i64: 94; AVX512VL-64: # %bb.0: 95; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1 96; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax 97; AVX512VL-64-NEXT: vmovq %rax, %xmm2 98; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 99; AVX512VL-64-NEXT: vcvttsd2si %xmm1, %rax 100; AVX512VL-64-NEXT: vmovq %rax, %xmm1 101; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 102; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2 103; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax 104; AVX512VL-64-NEXT: vmovq %rax, %xmm3 105; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 106; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax 107; AVX512VL-64-NEXT: vmovq %rax, %xmm2 108; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 109; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 110; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2 111; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax 112; AVX512VL-64-NEXT: vmovq %rax, %xmm3 113; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 114; AVX512VL-64-NEXT: vcvttsd2si %xmm2, %rax 115; AVX512VL-64-NEXT: vmovq %rax, %xmm2 116; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 117; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 118; AVX512VL-64-NEXT: vmovq %rax, %xmm3 119; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 120; AVX512VL-64-NEXT: vcvttsd2si %xmm0, %rax 121; AVX512VL-64-NEXT: vmovq %rax, %xmm0 122; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 123; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 124; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 125; AVX512VL-64-NEXT: retq 126; 127; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i64: 128; AVX512DQ: # %bb.0: 129; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 130; AVX512DQ-NEXT: ret{{[l|q]}} 131 %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f64(<8 x double> %a, 132 metadata !"fpexcept.strict") #0 133 ret <8 x i64> %ret 134} 135 136define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { 137; AVX512VL-32-LABEL: strict_vector_fptoui_v8f64_to_v8i64: 138; AVX512VL-32: # %bb.0: 139; AVX512VL-32-NEXT: pushl %ebp 140; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 141; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 142; AVX512VL-32-NEXT: movl %esp, %ebp 143; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 144; AVX512VL-32-NEXT: pushl %ebx 145; AVX512VL-32-NEXT: pushl %edi 146; AVX512VL-32-NEXT: pushl %esi 147; AVX512VL-32-NEXT: andl $-8, %esp 148; AVX512VL-32-NEXT: subl $80, %esp 149; AVX512VL-32-NEXT: .cfi_offset %esi, -20 150; AVX512VL-32-NEXT: .cfi_offset %edi, -16 151; AVX512VL-32-NEXT: .cfi_offset %ebx, -12 152; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm2 153; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0] 154; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 155; AVX512VL-32-NEXT: xorl %eax, %eax 156; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3 157; AVX512VL-32-NEXT: setae %al 158; AVX512VL-32-NEXT: kmovw %eax, %k1 159; AVX512VL-32-NEXT: movl %eax, %esi 160; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z} 161; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3 162; AVX512VL-32-NEXT: vmovsd %xmm3, (%esp) 163; AVX512VL-32-NEXT: xorl %ebx, %ebx 164; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2 165; AVX512VL-32-NEXT: setae %bl 166; AVX512VL-32-NEXT: kmovw %ebx, %k1 167; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z} 168; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 169; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 170; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm2 171; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0] 172; AVX512VL-32-NEXT: xorl %eax, %eax 173; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3 174; AVX512VL-32-NEXT: setae %al 175; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 176; AVX512VL-32-NEXT: kmovw %eax, %k1 177; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z} 178; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3 179; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) 180; AVX512VL-32-NEXT: xorl %edx, %edx 181; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2 182; AVX512VL-32-NEXT: setae %dl 183; AVX512VL-32-NEXT: kmovw %edx, %k1 184; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z} 185; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 186; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 187; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2 188; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0] 189; AVX512VL-32-NEXT: xorl %eax, %eax 190; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3 191; AVX512VL-32-NEXT: setae %al 192; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 193; AVX512VL-32-NEXT: kmovw %eax, %k1 194; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm4 {%k1} {z} 195; AVX512VL-32-NEXT: vsubsd %xmm4, %xmm3, %xmm3 196; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) 197; AVX512VL-32-NEXT: xorl %ecx, %ecx 198; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2 199; AVX512VL-32-NEXT: setae %cl 200; AVX512VL-32-NEXT: kmovw %ecx, %k1 201; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z} 202; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 203; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 204; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 205; AVX512VL-32-NEXT: xorl %eax, %eax 206; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2 207; AVX512VL-32-NEXT: setae %al 208; AVX512VL-32-NEXT: kmovw %eax, %k1 209; AVX512VL-32-NEXT: movl %eax, %edi 210; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm3 {%k1} {z} 211; AVX512VL-32-NEXT: vsubsd %xmm3, %xmm2, %xmm2 212; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) 213; AVX512VL-32-NEXT: xorl %eax, %eax 214; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0 215; AVX512VL-32-NEXT: setae %al 216; AVX512VL-32-NEXT: kmovw %eax, %k1 217; AVX512VL-32-NEXT: vmovsd %xmm1, %xmm1, %xmm1 {%k1} {z} 218; AVX512VL-32-NEXT: vsubsd %xmm1, %xmm0, %xmm0 219; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) 220; AVX512VL-32-NEXT: fldl (%esp) 221; AVX512VL-32-NEXT: fisttpll (%esp) 222; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 223; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 224; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 225; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 226; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 227; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 228; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 229; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 230; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 231; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 232; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 233; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 234; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) 235; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 236; AVX512VL-32-NEXT: wait 237; AVX512VL-32-NEXT: shll $31, %ebx 238; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 239; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 240; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 241; AVX512VL-32-NEXT: shll $31, %esi 242; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %esi 243; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 244; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 245; AVX512VL-32-NEXT: shll $31, %edx 246; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 247; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 248; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 249; AVX512VL-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 250; AVX512VL-32-NEXT: shll $31, %edx 251; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 252; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 253; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm1, %xmm1 254; AVX512VL-32-NEXT: shll $31, %ecx 255; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 256; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 257; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 258; AVX512VL-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 259; AVX512VL-32-NEXT: shll $31, %ecx 260; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 261; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 262; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2 263; AVX512VL-32-NEXT: shll $31, %eax 264; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 265; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 266; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 267; AVX512VL-32-NEXT: shll $31, %edi 268; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edi 269; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3 270; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm3, %xmm3 271; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 272; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 273; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 274; AVX512VL-32-NEXT: leal -12(%ebp), %esp 275; AVX512VL-32-NEXT: popl %esi 276; AVX512VL-32-NEXT: popl %edi 277; AVX512VL-32-NEXT: popl %ebx 278; AVX512VL-32-NEXT: popl %ebp 279; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 280; AVX512VL-32-NEXT: retl 281; 282; AVX512VL-64-LABEL: strict_vector_fptoui_v8f64_to_v8i64: 283; AVX512VL-64: # %bb.0: 284; AVX512VL-64-NEXT: vextractf32x4 $3, %zmm0, %xmm1 285; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax 286; AVX512VL-64-NEXT: vmovq %rax, %xmm2 287; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 288; AVX512VL-64-NEXT: vcvttsd2usi %xmm1, %rax 289; AVX512VL-64-NEXT: vmovq %rax, %xmm1 290; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 291; AVX512VL-64-NEXT: vextractf32x4 $2, %zmm0, %xmm2 292; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax 293; AVX512VL-64-NEXT: vmovq %rax, %xmm3 294; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 295; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax 296; AVX512VL-64-NEXT: vmovq %rax, %xmm2 297; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 298; AVX512VL-64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 299; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm2 300; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax 301; AVX512VL-64-NEXT: vmovq %rax, %xmm3 302; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 303; AVX512VL-64-NEXT: vcvttsd2usi %xmm2, %rax 304; AVX512VL-64-NEXT: vmovq %rax, %xmm2 305; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 306; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 307; AVX512VL-64-NEXT: vmovq %rax, %xmm3 308; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 309; AVX512VL-64-NEXT: vcvttsd2usi %xmm0, %rax 310; AVX512VL-64-NEXT: vmovq %rax, %xmm0 311; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 312; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 313; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 314; AVX512VL-64-NEXT: retq 315; 316; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i64: 317; AVX512DQ: # %bb.0: 318; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 319; AVX512DQ-NEXT: ret{{[l|q]}} 320 %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f64(<8 x double> %a, 321 metadata !"fpexcept.strict") #0 322 ret <8 x i64> %ret 323} 324 325define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 { 326; AVX512VL-32-LABEL: strict_vector_fptosi_v8f32_to_v8i64: 327; AVX512VL-32: # %bb.0: 328; AVX512VL-32-NEXT: pushl %ebp 329; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 330; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 331; AVX512VL-32-NEXT: movl %esp, %ebp 332; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 333; AVX512VL-32-NEXT: andl $-8, %esp 334; AVX512VL-32-NEXT: subl $64, %esp 335; AVX512VL-32-NEXT: vextracti128 $1, %ymm0, %xmm1 336; AVX512VL-32-NEXT: vmovd %xmm1, {{[0-9]+}}(%esp) 337; AVX512VL-32-NEXT: vextractps $1, %xmm1, {{[0-9]+}}(%esp) 338; AVX512VL-32-NEXT: vextractps $2, %xmm1, {{[0-9]+}}(%esp) 339; AVX512VL-32-NEXT: vextractps $3, %xmm1, (%esp) 340; AVX512VL-32-NEXT: vmovd %xmm0, {{[0-9]+}}(%esp) 341; AVX512VL-32-NEXT: vextractps $1, %xmm0, {{[0-9]+}}(%esp) 342; AVX512VL-32-NEXT: vextractps $2, %xmm0, {{[0-9]+}}(%esp) 343; AVX512VL-32-NEXT: vextractps $3, %xmm0, {{[0-9]+}}(%esp) 344; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 345; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 346; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 347; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 348; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 349; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 350; AVX512VL-32-NEXT: flds (%esp) 351; AVX512VL-32-NEXT: fisttpll (%esp) 352; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 353; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 354; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 355; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 356; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 357; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 358; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 359; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 360; AVX512VL-32-NEXT: wait 361; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 362; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 363; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 364; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 365; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 366; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 367; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm1, %xmm1 368; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 369; AVX512VL-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 370; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 371; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1 372; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 373; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm1, %xmm1 374; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 375; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm2, %xmm2 376; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 377; AVX512VL-32-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm2, %xmm2 378; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 379; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 380; AVX512VL-32-NEXT: movl %ebp, %esp 381; AVX512VL-32-NEXT: popl %ebp 382; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 383; AVX512VL-32-NEXT: retl 384; 385; AVX512VL-64-LABEL: strict_vector_fptosi_v8f32_to_v8i64: 386; AVX512VL-64: # %bb.0: 387; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1 388; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 389; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax 390; AVX512VL-64-NEXT: vmovq %rax, %xmm2 391; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 392; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax 393; AVX512VL-64-NEXT: vmovq %rax, %xmm3 394; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 395; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax 396; AVX512VL-64-NEXT: vmovq %rax, %xmm3 397; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 398; AVX512VL-64-NEXT: vcvttss2si %xmm1, %rax 399; AVX512VL-64-NEXT: vmovq %rax, %xmm1 400; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 401; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 402; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 403; AVX512VL-64-NEXT: vcvttss2si %xmm2, %rax 404; AVX512VL-64-NEXT: vmovq %rax, %xmm2 405; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 406; AVX512VL-64-NEXT: vcvttss2si %xmm3, %rax 407; AVX512VL-64-NEXT: vmovq %rax, %xmm3 408; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 409; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 410; AVX512VL-64-NEXT: vmovq %rax, %xmm3 411; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 412; AVX512VL-64-NEXT: vcvttss2si %xmm0, %rax 413; AVX512VL-64-NEXT: vmovq %rax, %xmm0 414; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 415; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 416; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 417; AVX512VL-64-NEXT: retq 418; 419; AVX512DQ-LABEL: strict_vector_fptosi_v8f32_to_v8i64: 420; AVX512DQ: # %bb.0: 421; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 422; AVX512DQ-NEXT: ret{{[l|q]}} 423 %ret = call <8 x i64> @llvm.experimental.constrained.fptosi.v8i64.v8f32(<8 x float> %a, 424 metadata !"fpexcept.strict") #0 425 ret <8 x i64> %ret 426} 427 428define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { 429; AVX512VL-32-LABEL: strict_vector_fptoui_v8f32_to_v8i64: 430; AVX512VL-32: # %bb.0: 431; AVX512VL-32-NEXT: pushl %ebp 432; AVX512VL-32-NEXT: .cfi_def_cfa_offset 8 433; AVX512VL-32-NEXT: .cfi_offset %ebp, -8 434; AVX512VL-32-NEXT: movl %esp, %ebp 435; AVX512VL-32-NEXT: .cfi_def_cfa_register %ebp 436; AVX512VL-32-NEXT: pushl %ebx 437; AVX512VL-32-NEXT: pushl %edi 438; AVX512VL-32-NEXT: pushl %esi 439; AVX512VL-32-NEXT: andl $-8, %esp 440; AVX512VL-32-NEXT: subl $80, %esp 441; AVX512VL-32-NEXT: .cfi_offset %esi, -20 442; AVX512VL-32-NEXT: .cfi_offset %edi, -16 443; AVX512VL-32-NEXT: .cfi_offset %ebx, -12 444; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2 445; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm3 = xmm2[3,3,3,3] 446; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 447; AVX512VL-32-NEXT: xorl %eax, %eax 448; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3 449; AVX512VL-32-NEXT: setae %al 450; AVX512VL-32-NEXT: kmovw %eax, %k1 451; AVX512VL-32-NEXT: movl %eax, %esi 452; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm4 {%k1} {z} 453; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3 454; AVX512VL-32-NEXT: vmovss %xmm3, (%esp) 455; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0] 456; AVX512VL-32-NEXT: xorl %ebx, %ebx 457; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3 458; AVX512VL-32-NEXT: setae %bl 459; AVX512VL-32-NEXT: kmovw %ebx, %k1 460; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm4 {%k1} {z} 461; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3 462; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 463; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm2[1,1,3,3] 464; AVX512VL-32-NEXT: xorl %eax, %eax 465; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3 466; AVX512VL-32-NEXT: setae %al 467; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 468; AVX512VL-32-NEXT: kmovw %eax, %k1 469; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm4 {%k1} {z} 470; AVX512VL-32-NEXT: vsubss %xmm4, %xmm3, %xmm3 471; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) 472; AVX512VL-32-NEXT: xorl %edx, %edx 473; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2 474; AVX512VL-32-NEXT: setae %dl 475; AVX512VL-32-NEXT: kmovw %edx, %k1 476; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z} 477; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 478; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 479; AVX512VL-32-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 480; AVX512VL-32-NEXT: xorl %eax, %eax 481; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2 482; AVX512VL-32-NEXT: setae %al 483; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 484; AVX512VL-32-NEXT: kmovw %eax, %k1 485; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z} 486; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 487; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 488; AVX512VL-32-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 489; AVX512VL-32-NEXT: xorl %ecx, %ecx 490; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2 491; AVX512VL-32-NEXT: setae %cl 492; AVX512VL-32-NEXT: kmovw %ecx, %k1 493; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z} 494; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 495; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 496; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] 497; AVX512VL-32-NEXT: xorl %eax, %eax 498; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2 499; AVX512VL-32-NEXT: setae %al 500; AVX512VL-32-NEXT: kmovw %eax, %k1 501; AVX512VL-32-NEXT: movl %eax, %edi 502; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm3 {%k1} {z} 503; AVX512VL-32-NEXT: vsubss %xmm3, %xmm2, %xmm2 504; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) 505; AVX512VL-32-NEXT: xorl %eax, %eax 506; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0 507; AVX512VL-32-NEXT: setae %al 508; AVX512VL-32-NEXT: kmovw %eax, %k1 509; AVX512VL-32-NEXT: vmovss %xmm1, %xmm1, %xmm1 {%k1} {z} 510; AVX512VL-32-NEXT: vsubss %xmm1, %xmm0, %xmm0 511; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) 512; AVX512VL-32-NEXT: flds (%esp) 513; AVX512VL-32-NEXT: fisttpll (%esp) 514; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 515; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 516; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 517; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 518; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 519; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 520; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 521; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 522; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 523; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 524; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 525; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 526; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) 527; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) 528; AVX512VL-32-NEXT: wait 529; AVX512VL-32-NEXT: shll $31, %ebx 530; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ebx 531; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 532; AVX512VL-32-NEXT: vpinsrd $1, %ebx, %xmm0, %xmm0 533; AVX512VL-32-NEXT: shll $31, %esi 534; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %esi 535; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 536; AVX512VL-32-NEXT: vpinsrd $3, %esi, %xmm0, %xmm0 537; AVX512VL-32-NEXT: shll $31, %edx 538; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 539; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 540; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm1, %xmm1 541; AVX512VL-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload 542; AVX512VL-32-NEXT: shll $31, %edx 543; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx 544; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm1, %xmm1 545; AVX512VL-32-NEXT: vpinsrd $3, %edx, %xmm1, %xmm1 546; AVX512VL-32-NEXT: shll $31, %ecx 547; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 548; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero 549; AVX512VL-32-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 550; AVX512VL-32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload 551; AVX512VL-32-NEXT: shll $31, %ecx 552; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 553; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm2, %xmm2 554; AVX512VL-32-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm2 555; AVX512VL-32-NEXT: shll $31, %eax 556; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax 557; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero 558; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm3, %xmm3 559; AVX512VL-32-NEXT: shll $31, %edi 560; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edi 561; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm3, %xmm3 562; AVX512VL-32-NEXT: vpinsrd $3, %edi, %xmm3, %xmm3 563; AVX512VL-32-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 564; AVX512VL-32-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm1 565; AVX512VL-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 566; AVX512VL-32-NEXT: leal -12(%ebp), %esp 567; AVX512VL-32-NEXT: popl %esi 568; AVX512VL-32-NEXT: popl %edi 569; AVX512VL-32-NEXT: popl %ebx 570; AVX512VL-32-NEXT: popl %ebp 571; AVX512VL-32-NEXT: .cfi_def_cfa %esp, 4 572; AVX512VL-32-NEXT: retl 573; 574; AVX512VL-64-LABEL: strict_vector_fptoui_v8f32_to_v8i64: 575; AVX512VL-64: # %bb.0: 576; AVX512VL-64-NEXT: vextractf128 $1, %ymm0, %xmm1 577; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 578; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax 579; AVX512VL-64-NEXT: vmovq %rax, %xmm2 580; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 581; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax 582; AVX512VL-64-NEXT: vmovq %rax, %xmm3 583; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 584; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax 585; AVX512VL-64-NEXT: vmovq %rax, %xmm3 586; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 587; AVX512VL-64-NEXT: vcvttss2usi %xmm1, %rax 588; AVX512VL-64-NEXT: vmovq %rax, %xmm1 589; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 590; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 591; AVX512VL-64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 592; AVX512VL-64-NEXT: vcvttss2usi %xmm2, %rax 593; AVX512VL-64-NEXT: vmovq %rax, %xmm2 594; AVX512VL-64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 595; AVX512VL-64-NEXT: vcvttss2usi %xmm3, %rax 596; AVX512VL-64-NEXT: vmovq %rax, %xmm3 597; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 598; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 599; AVX512VL-64-NEXT: vmovq %rax, %xmm3 600; AVX512VL-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 601; AVX512VL-64-NEXT: vcvttss2usi %xmm0, %rax 602; AVX512VL-64-NEXT: vmovq %rax, %xmm0 603; AVX512VL-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 604; AVX512VL-64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 605; AVX512VL-64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 606; AVX512VL-64-NEXT: retq 607; 608; AVX512DQ-LABEL: strict_vector_fptoui_v8f32_to_v8i64: 609; AVX512DQ: # %bb.0: 610; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 611; AVX512DQ-NEXT: ret{{[l|q]}} 612 %ret = call <8 x i64> @llvm.experimental.constrained.fptoui.v8i64.v8f32(<8 x float> %a, 613 metadata !"fpexcept.strict") #0 614 ret <8 x i64> %ret 615} 616 617define <8 x i32> @strict_vector_fptosi_v8f64_to_v8i32(<8 x double> %a) #0 { 618; CHECK-LABEL: strict_vector_fptosi_v8f64_to_v8i32: 619; CHECK: # %bb.0: 620; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 621; CHECK-NEXT: ret{{[l|q]}} 622 %ret = call <8 x i32> @llvm.experimental.constrained.fptosi.v8i32.v8f64(<8 x double> %a, 623 metadata !"fpexcept.strict") #0 624 ret <8 x i32> %ret 625} 626 627define <8 x i32> @strict_vector_fptoui_v8f64_to_v8i32(<8 x double> %a) #0 { 628; CHECK-LABEL: strict_vector_fptoui_v8f64_to_v8i32: 629; CHECK: # %bb.0: 630; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 631; CHECK-NEXT: ret{{[l|q]}} 632 %ret = call <8 x i32> @llvm.experimental.constrained.fptoui.v8i32.v8f64(<8 x double> %a, 633 metadata !"fpexcept.strict") #0 634 ret <8 x i32> %ret 635} 636 637define <8 x i16> @strict_vector_fptosi_v8f64_to_v8i16(<8 x double> %a) #0 { 638; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i16: 639; AVX512VL: # %bb.0: 640; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 641; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 642; AVX512VL-NEXT: vzeroupper 643; AVX512VL-NEXT: ret{{[l|q]}} 644; 645; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i16: 646; AVX512DQ: # %bb.0: 647; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 648; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 649; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 650; AVX512DQ-NEXT: vzeroupper 651; AVX512DQ-NEXT: ret{{[l|q]}} 652 %ret = call <8 x i16> @llvm.experimental.constrained.fptosi.v8i16.v8f64(<8 x double> %a, 653 metadata !"fpexcept.strict") #0 654 ret <8 x i16> %ret 655} 656 657define <8 x i16> @strict_vector_fptoui_v8f64_to_v8i16(<8 x double> %a) #0 { 658; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i16: 659; AVX512VL: # %bb.0: 660; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 661; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 662; AVX512VL-NEXT: vzeroupper 663; AVX512VL-NEXT: ret{{[l|q]}} 664; 665; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i16: 666; AVX512DQ: # %bb.0: 667; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 668; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 669; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 670; AVX512DQ-NEXT: vzeroupper 671; AVX512DQ-NEXT: ret{{[l|q]}} 672 %ret = call <8 x i16> @llvm.experimental.constrained.fptoui.v8i16.v8f64(<8 x double> %a, 673 metadata !"fpexcept.strict") #0 674 ret <8 x i16> %ret 675} 676 677define <8 x i8> @strict_vector_fptosi_v8f64_to_v8i8(<8 x double> %a) #0 { 678; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i8: 679; AVX512VL: # %bb.0: 680; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 681; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 682; AVX512VL-NEXT: vzeroupper 683; AVX512VL-NEXT: ret{{[l|q]}} 684; 685; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i8: 686; AVX512DQ: # %bb.0: 687; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 688; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 689; AVX512DQ-NEXT: vzeroupper 690; AVX512DQ-NEXT: ret{{[l|q]}} 691 %ret = call <8 x i8> @llvm.experimental.constrained.fptosi.v8i8.v8f64(<8 x double> %a, 692 metadata !"fpexcept.strict") #0 693 ret <8 x i8> %ret 694} 695 696define <8 x i8> @strict_vector_fptoui_v8f64_to_v8i8(<8 x double> %a) #0 { 697; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i8: 698; AVX512VL: # %bb.0: 699; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 700; AVX512VL-NEXT: vpmovdb %ymm0, %xmm0 701; AVX512VL-NEXT: vzeroupper 702; AVX512VL-NEXT: ret{{[l|q]}} 703; 704; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i8: 705; AVX512DQ: # %bb.0: 706; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 707; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 708; AVX512DQ-NEXT: vzeroupper 709; AVX512DQ-NEXT: ret{{[l|q]}} 710 %ret = call <8 x i8> @llvm.experimental.constrained.fptoui.v8i8.v8f64(<8 x double> %a, 711 metadata !"fpexcept.strict") #0 712 ret <8 x i8> %ret 713} 714 715define <8 x i1> @strict_vector_fptosi_v8f64_to_v8i1(<8 x double> %a) #0 { 716; AVX512VL-LABEL: strict_vector_fptosi_v8f64_to_v8i1: 717; AVX512VL: # %bb.0: 718; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 719; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 720; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 721; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 722; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 723; AVX512VL-NEXT: vzeroupper 724; AVX512VL-NEXT: ret{{[l|q]}} 725; 726; AVX512DQ-LABEL: strict_vector_fptosi_v8f64_to_v8i1: 727; AVX512DQ: # %bb.0: 728; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 729; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 730; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 731; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 732; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 733; AVX512DQ-NEXT: vzeroupper 734; AVX512DQ-NEXT: ret{{[l|q]}} 735 %ret = call <8 x i1> @llvm.experimental.constrained.fptosi.v8i1.v8f64(<8 x double> %a, 736 metadata !"fpexcept.strict") #0 737 ret <8 x i1> %ret 738} 739 740define <8 x i1> @strict_vector_fptoui_v8f64_to_v8i1(<8 x double> %a) #0 { 741; AVX512VL-LABEL: strict_vector_fptoui_v8f64_to_v8i1: 742; AVX512VL: # %bb.0: 743; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 744; AVX512VL-NEXT: vpslld $31, %ymm0, %ymm0 745; AVX512VL-NEXT: vptestmd %ymm0, %ymm0, %k1 746; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 747; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} 748; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 749; AVX512VL-NEXT: vzeroupper 750; AVX512VL-NEXT: ret{{[l|q]}} 751; 752; AVX512DQ-LABEL: strict_vector_fptoui_v8f64_to_v8i1: 753; AVX512DQ: # %bb.0: 754; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 755; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 756; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 757; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 758; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 759; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 760; AVX512DQ-NEXT: vzeroupper 761; AVX512DQ-NEXT: ret{{[l|q]}} 762 %ret = call <8 x i1> @llvm.experimental.constrained.fptoui.v8i1.v8f64(<8 x double> %a, 763 metadata !"fpexcept.strict") #0 764 ret <8 x i1> %ret 765} 766 767define <16 x i32> @strict_vector_fptosi_v16f32_to_v16i32(<16 x float> %a) #0 { 768; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i32: 769; CHECK: # %bb.0: 770; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 771; CHECK-NEXT: ret{{[l|q]}} 772 %ret = call <16 x i32> @llvm.experimental.constrained.fptosi.v16i32.v16f32(<16 x float> %a, 773 metadata !"fpexcept.strict") #0 774 ret <16 x i32> %ret 775} 776 777define <16 x i32> @strict_vector_fptoui_v16f32_to_v16i32(<16 x float> %a) #0 { 778; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i32: 779; CHECK: # %bb.0: 780; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 781; CHECK-NEXT: ret{{[l|q]}} 782 %ret = call <16 x i32> @llvm.experimental.constrained.fptoui.v16i32.v16f32(<16 x float> %a, 783 metadata !"fpexcept.strict") #0 784 ret <16 x i32> %ret 785} 786 787define <16 x i16> @strict_vector_fptosi_v16f32_to_v16i16(<16 x float> %a) #0 { 788; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i16: 789; CHECK: # %bb.0: 790; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 791; CHECK-NEXT: vpmovdw %zmm0, %ymm0 792; CHECK-NEXT: ret{{[l|q]}} 793 %ret = call <16 x i16> @llvm.experimental.constrained.fptosi.v16i16.v16f32(<16 x float> %a, 794 metadata !"fpexcept.strict") #0 795 ret <16 x i16> %ret 796} 797 798define <16 x i16> @strict_vector_fptoui_v16f32_to_v16i16(<16 x float> %a) #0 { 799; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i16: 800; CHECK: # %bb.0: 801; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 802; CHECK-NEXT: vpmovdw %zmm0, %ymm0 803; CHECK-NEXT: ret{{[l|q]}} 804 %ret = call <16 x i16> @llvm.experimental.constrained.fptoui.v16i16.v16f32(<16 x float> %a, 805 metadata !"fpexcept.strict") #0 806 ret <16 x i16> %ret 807} 808 809define <16 x i8> @strict_vector_fptosi_v16f32_to_v16i8(<16 x float> %a) #0 { 810; CHECK-LABEL: strict_vector_fptosi_v16f32_to_v16i8: 811; CHECK: # %bb.0: 812; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 813; CHECK-NEXT: vpmovdb %zmm0, %xmm0 814; CHECK-NEXT: vzeroupper 815; CHECK-NEXT: ret{{[l|q]}} 816 %ret = call <16 x i8> @llvm.experimental.constrained.fptosi.v16i8.v16f32(<16 x float> %a, 817 metadata !"fpexcept.strict") #0 818 ret <16 x i8> %ret 819} 820 821define <16 x i8> @strict_vector_fptoui_v16f32_to_v16i8(<16 x float> %a) #0 { 822; CHECK-LABEL: strict_vector_fptoui_v16f32_to_v16i8: 823; CHECK: # %bb.0: 824; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 825; CHECK-NEXT: vpmovdb %zmm0, %xmm0 826; CHECK-NEXT: vzeroupper 827; CHECK-NEXT: ret{{[l|q]}} 828 %ret = call <16 x i8> @llvm.experimental.constrained.fptoui.v16i8.v16f32(<16 x float> %a, 829 metadata !"fpexcept.strict") #0 830 ret <16 x i8> %ret 831} 832 833define <16 x i1> @strict_vector_fptosi_v16f32_to_v16i1(<16 x float> %a) #0 { 834; AVX512VL-LABEL: strict_vector_fptosi_v16f32_to_v16i1: 835; AVX512VL: # %bb.0: 836; AVX512VL-NEXT: vcvttps2dq %zmm0, %zmm0 837; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 838; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 839; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 840; AVX512VL-NEXT: vzeroupper 841; AVX512VL-NEXT: ret{{[l|q]}} 842; 843; AVX512DQ-LABEL: strict_vector_fptosi_v16f32_to_v16i1: 844; AVX512DQ: # %bb.0: 845; AVX512DQ-NEXT: vcvttps2dq %zmm0, %zmm0 846; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 847; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 848; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 849; AVX512DQ-NEXT: vzeroupper 850; AVX512DQ-NEXT: ret{{[l|q]}} 851 %ret = call <16 x i1> @llvm.experimental.constrained.fptosi.v16i1.v16f32(<16 x float> %a, 852 metadata !"fpexcept.strict") #0 853 ret <16 x i1> %ret 854} 855 856define <16 x i1> @strict_vector_fptoui_v16f32_to_v16i1(<16 x float> %a) #0 { 857; AVX512VL-LABEL: strict_vector_fptoui_v16f32_to_v16i1: 858; AVX512VL: # %bb.0: 859; AVX512VL-NEXT: vcvttps2dq %zmm0, %zmm0 860; AVX512VL-NEXT: vpslld $31, %zmm0, %zmm0 861; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 862; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 863; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 864; AVX512VL-NEXT: vzeroupper 865; AVX512VL-NEXT: ret{{[l|q]}} 866; 867; AVX512DQ-LABEL: strict_vector_fptoui_v16f32_to_v16i1: 868; AVX512DQ: # %bb.0: 869; AVX512DQ-NEXT: vcvttps2dq %zmm0, %zmm0 870; AVX512DQ-NEXT: vpslld $31, %zmm0, %zmm0 871; AVX512DQ-NEXT: vpmovd2m %zmm0, %k0 872; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 873; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 874; AVX512DQ-NEXT: vzeroupper 875; AVX512DQ-NEXT: ret{{[l|q]}} 876 %ret = call <16 x i1> @llvm.experimental.constrained.fptoui.v16i1.v16f32(<16 x float> %a, 877 metadata !"fpexcept.strict") #0 878 ret <16 x i1> %ret 879} 880 881 882attributes #0 = { strictfp } 883