1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,VEX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,VEX,AVX2 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX,AVX512,AVX512DQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512VLDQ 9; 10; 32-bit tests to make sure we're not doing anything stupid. 11; RUN: llc < %s -mtriple=i686-unknown-unknown 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse 13; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 14 15; 16; Double to Signed Integer 17; 18 19define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { 20; SSE-LABEL: fptosi_2f64_to_2i64: 21; SSE: # %bb.0: 22; SSE-NEXT: cvttsd2si %xmm0, %rax 23; SSE-NEXT: movq %rax, %xmm1 24; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 25; SSE-NEXT: cvttsd2si %xmm0, %rax 26; SSE-NEXT: movq %rax, %xmm0 27; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 28; SSE-NEXT: movdqa %xmm1, %xmm0 29; SSE-NEXT: retq 30; 31; VEX-LABEL: fptosi_2f64_to_2i64: 32; VEX: # %bb.0: 33; VEX-NEXT: vcvttsd2si %xmm0, %rax 34; VEX-NEXT: vmovq %rax, %xmm1 35; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 36; VEX-NEXT: vcvttsd2si %xmm0, %rax 37; VEX-NEXT: vmovq %rax, %xmm0 38; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 39; VEX-NEXT: retq 40; 41; AVX512F-LABEL: fptosi_2f64_to_2i64: 42; AVX512F: # %bb.0: 43; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 44; AVX512F-NEXT: vmovq %rax, %xmm1 45; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 46; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 47; AVX512F-NEXT: vmovq %rax, %xmm0 48; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 49; AVX512F-NEXT: retq 50; 51; AVX512VL-LABEL: fptosi_2f64_to_2i64: 52; AVX512VL: # %bb.0: 53; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 54; AVX512VL-NEXT: vmovq %rax, %xmm1 55; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 56; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 57; AVX512VL-NEXT: vmovq %rax, %xmm0 58; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 59; AVX512VL-NEXT: retq 60; 61; AVX512DQ-LABEL: fptosi_2f64_to_2i64: 62; AVX512DQ: # %bb.0: 63; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 64; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 65; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 66; AVX512DQ-NEXT: vzeroupper 67; AVX512DQ-NEXT: retq 68; 69; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64: 70; AVX512VLDQ: # %bb.0: 71; AVX512VLDQ-NEXT: vcvttpd2qq %xmm0, %xmm0 72; AVX512VLDQ-NEXT: retq 73 %cvt = fptosi <2 x double> %a to <2 x i64> 74 ret <2 x i64> %cvt 75} 76 77define <4 x i32> @fptosi_2f64_to_4i32(<2 x double> %a) { 78; SSE-LABEL: fptosi_2f64_to_4i32: 79; SSE: # %bb.0: 80; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 81; SSE-NEXT: retq 82; 83; AVX-LABEL: fptosi_2f64_to_4i32: 84; AVX: # %bb.0: 85; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 86; AVX-NEXT: retq 87 %cvt = fptosi <2 x double> %a to <2 x i32> 88 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 89 ret <4 x i32> %ext 90} 91 92define <2 x i32> @fptosi_2f64_to_2i32(<2 x double> %a) { 93; SSE-LABEL: fptosi_2f64_to_2i32: 94; SSE: # %bb.0: 95; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 96; SSE-NEXT: retq 97; 98; AVX-LABEL: fptosi_2f64_to_2i32: 99; AVX: # %bb.0: 100; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 101; AVX-NEXT: retq 102 %cvt = fptosi <2 x double> %a to <2 x i32> 103 ret <2 x i32> %cvt 104} 105 106define <4 x i32> @fptosi_4f64_to_2i32(<2 x double> %a) { 107; SSE-LABEL: fptosi_4f64_to_2i32: 108; SSE: # %bb.0: 109; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 110; SSE-NEXT: retq 111; 112; AVX-LABEL: fptosi_4f64_to_2i32: 113; AVX: # %bb.0: 114; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 115; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 116; AVX-NEXT: vzeroupper 117; AVX-NEXT: retq 118 %ext = shufflevector <2 x double> %a, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 119 %cvt = fptosi <4 x double> %ext to <4 x i32> 120 ret <4 x i32> %cvt 121} 122 123define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { 124; SSE-LABEL: fptosi_4f64_to_4i64: 125; SSE: # %bb.0: 126; SSE-NEXT: cvttsd2si %xmm0, %rax 127; SSE-NEXT: movq %rax, %xmm2 128; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 129; SSE-NEXT: cvttsd2si %xmm0, %rax 130; SSE-NEXT: movq %rax, %xmm0 131; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 132; SSE-NEXT: cvttsd2si %xmm1, %rax 133; SSE-NEXT: movq %rax, %xmm3 134; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 135; SSE-NEXT: cvttsd2si %xmm1, %rax 136; SSE-NEXT: movq %rax, %xmm0 137; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 138; SSE-NEXT: movdqa %xmm2, %xmm0 139; SSE-NEXT: movdqa %xmm3, %xmm1 140; SSE-NEXT: retq 141; 142; AVX1-LABEL: fptosi_4f64_to_4i64: 143; AVX1: # %bb.0: 144; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 145; AVX1-NEXT: vcvttsd2si %xmm1, %rax 146; AVX1-NEXT: vmovq %rax, %xmm2 147; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 148; AVX1-NEXT: vcvttsd2si %xmm1, %rax 149; AVX1-NEXT: vmovq %rax, %xmm1 150; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 151; AVX1-NEXT: vcvttsd2si %xmm0, %rax 152; AVX1-NEXT: vmovq %rax, %xmm2 153; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 154; AVX1-NEXT: vcvttsd2si %xmm0, %rax 155; AVX1-NEXT: vmovq %rax, %xmm0 156; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 157; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 158; AVX1-NEXT: retq 159; 160; AVX2-LABEL: fptosi_4f64_to_4i64: 161; AVX2: # %bb.0: 162; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 163; AVX2-NEXT: vcvttsd2si %xmm1, %rax 164; AVX2-NEXT: vmovq %rax, %xmm2 165; AVX2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 166; AVX2-NEXT: vcvttsd2si %xmm1, %rax 167; AVX2-NEXT: vmovq %rax, %xmm1 168; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 169; AVX2-NEXT: vcvttsd2si %xmm0, %rax 170; AVX2-NEXT: vmovq %rax, %xmm2 171; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 172; AVX2-NEXT: vcvttsd2si %xmm0, %rax 173; AVX2-NEXT: vmovq %rax, %xmm0 174; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 175; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 176; AVX2-NEXT: retq 177; 178; AVX512F-LABEL: fptosi_4f64_to_4i64: 179; AVX512F: # %bb.0: 180; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 181; AVX512F-NEXT: vcvttsd2si %xmm1, %rax 182; AVX512F-NEXT: vmovq %rax, %xmm2 183; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 184; AVX512F-NEXT: vcvttsd2si %xmm1, %rax 185; AVX512F-NEXT: vmovq %rax, %xmm1 186; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 187; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 188; AVX512F-NEXT: vmovq %rax, %xmm2 189; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 190; AVX512F-NEXT: vcvttsd2si %xmm0, %rax 191; AVX512F-NEXT: vmovq %rax, %xmm0 192; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 193; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 194; AVX512F-NEXT: retq 195; 196; AVX512VL-LABEL: fptosi_4f64_to_4i64: 197; AVX512VL: # %bb.0: 198; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 199; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax 200; AVX512VL-NEXT: vmovq %rax, %xmm2 201; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 202; AVX512VL-NEXT: vcvttsd2si %xmm1, %rax 203; AVX512VL-NEXT: vmovq %rax, %xmm1 204; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 205; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 206; AVX512VL-NEXT: vmovq %rax, %xmm2 207; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 208; AVX512VL-NEXT: vcvttsd2si %xmm0, %rax 209; AVX512VL-NEXT: vmovq %rax, %xmm0 210; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 211; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 212; AVX512VL-NEXT: retq 213; 214; AVX512DQ-LABEL: fptosi_4f64_to_4i64: 215; AVX512DQ: # %bb.0: 216; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 217; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 218; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 219; AVX512DQ-NEXT: retq 220; 221; AVX512VLDQ-LABEL: fptosi_4f64_to_4i64: 222; AVX512VLDQ: # %bb.0: 223; AVX512VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0 224; AVX512VLDQ-NEXT: retq 225 %cvt = fptosi <4 x double> %a to <4 x i64> 226 ret <4 x i64> %cvt 227} 228 229define <4 x i32> @fptosi_4f64_to_4i32(<4 x double> %a) { 230; SSE-LABEL: fptosi_4f64_to_4i32: 231; SSE: # %bb.0: 232; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 233; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 234; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 235; SSE-NEXT: retq 236; 237; AVX-LABEL: fptosi_4f64_to_4i32: 238; AVX: # %bb.0: 239; AVX-NEXT: vcvttpd2dq %ymm0, %xmm0 240; AVX-NEXT: vzeroupper 241; AVX-NEXT: retq 242 %cvt = fptosi <4 x double> %a to <4 x i32> 243 ret <4 x i32> %cvt 244} 245 246; 247; Double to Unsigned Integer 248; 249 250define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { 251; SSE-LABEL: fptoui_2f64_to_2i64: 252; SSE: # %bb.0: 253; SSE-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] 254; SSE-NEXT: movapd %xmm0, %xmm1 255; SSE-NEXT: subsd %xmm2, %xmm1 256; SSE-NEXT: cvttsd2si %xmm1, %rax 257; SSE-NEXT: cvttsd2si %xmm0, %rcx 258; SSE-NEXT: movq %rcx, %rdx 259; SSE-NEXT: sarq $63, %rdx 260; SSE-NEXT: andq %rax, %rdx 261; SSE-NEXT: orq %rcx, %rdx 262; SSE-NEXT: movq %rdx, %xmm1 263; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 264; SSE-NEXT: cvttsd2si %xmm0, %rax 265; SSE-NEXT: subsd %xmm2, %xmm0 266; SSE-NEXT: cvttsd2si %xmm0, %rcx 267; SSE-NEXT: movq %rax, %rdx 268; SSE-NEXT: sarq $63, %rdx 269; SSE-NEXT: andq %rcx, %rdx 270; SSE-NEXT: orq %rax, %rdx 271; SSE-NEXT: movq %rdx, %xmm0 272; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 273; SSE-NEXT: movdqa %xmm1, %xmm0 274; SSE-NEXT: retq 275; 276; VEX-LABEL: fptoui_2f64_to_2i64: 277; VEX: # %bb.0: 278; VEX-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 279; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm2 280; VEX-NEXT: vcvttsd2si %xmm2, %rax 281; VEX-NEXT: vcvttsd2si %xmm0, %rcx 282; VEX-NEXT: movq %rcx, %rdx 283; VEX-NEXT: sarq $63, %rdx 284; VEX-NEXT: andq %rax, %rdx 285; VEX-NEXT: orq %rcx, %rdx 286; VEX-NEXT: vmovq %rdx, %xmm2 287; VEX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 288; VEX-NEXT: vsubsd %xmm1, %xmm0, %xmm1 289; VEX-NEXT: vcvttsd2si %xmm1, %rax 290; VEX-NEXT: vcvttsd2si %xmm0, %rcx 291; VEX-NEXT: movq %rcx, %rdx 292; VEX-NEXT: sarq $63, %rdx 293; VEX-NEXT: andq %rax, %rdx 294; VEX-NEXT: orq %rcx, %rdx 295; VEX-NEXT: vmovq %rdx, %xmm0 296; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 297; VEX-NEXT: retq 298; 299; AVX512F-LABEL: fptoui_2f64_to_2i64: 300; AVX512F: # %bb.0: 301; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 302; AVX512F-NEXT: vmovq %rax, %xmm1 303; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 304; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 305; AVX512F-NEXT: vmovq %rax, %xmm0 306; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 307; AVX512F-NEXT: retq 308; 309; AVX512VL-LABEL: fptoui_2f64_to_2i64: 310; AVX512VL: # %bb.0: 311; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 312; AVX512VL-NEXT: vmovq %rax, %xmm1 313; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 314; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 315; AVX512VL-NEXT: vmovq %rax, %xmm0 316; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 317; AVX512VL-NEXT: retq 318; 319; AVX512DQ-LABEL: fptoui_2f64_to_2i64: 320; AVX512DQ: # %bb.0: 321; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 322; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 323; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 324; AVX512DQ-NEXT: vzeroupper 325; AVX512DQ-NEXT: retq 326; 327; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64: 328; AVX512VLDQ: # %bb.0: 329; AVX512VLDQ-NEXT: vcvttpd2uqq %xmm0, %xmm0 330; AVX512VLDQ-NEXT: retq 331 %cvt = fptoui <2 x double> %a to <2 x i64> 332 ret <2 x i64> %cvt 333} 334 335define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { 336; SSE-LABEL: fptoui_2f64_to_4i32: 337; SSE: # %bb.0: 338; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 339; SSE-NEXT: movapd %xmm1, %xmm2 340; SSE-NEXT: psrad $31, %xmm2 341; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 342; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 343; SSE-NEXT: andpd %xmm2, %xmm0 344; SSE-NEXT: orpd %xmm1, %xmm0 345; SSE-NEXT: retq 346; 347; VEX-LABEL: fptoui_2f64_to_4i32: 348; VEX: # %bb.0: 349; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 350; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 351; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 352; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 353; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 354; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 355; VEX-NEXT: retq 356; 357; AVX512F-LABEL: fptoui_2f64_to_4i32: 358; AVX512F: # %bb.0: 359; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 360; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 361; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 362; AVX512F-NEXT: vzeroupper 363; AVX512F-NEXT: retq 364; 365; AVX512VL-LABEL: fptoui_2f64_to_4i32: 366; AVX512VL: # %bb.0: 367; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 368; AVX512VL-NEXT: retq 369; 370; AVX512DQ-LABEL: fptoui_2f64_to_4i32: 371; AVX512DQ: # %bb.0: 372; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 373; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 374; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 375; AVX512DQ-NEXT: vzeroupper 376; AVX512DQ-NEXT: retq 377; 378; AVX512VLDQ-LABEL: fptoui_2f64_to_4i32: 379; AVX512VLDQ: # %bb.0: 380; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 381; AVX512VLDQ-NEXT: retq 382 %cvt = fptoui <2 x double> %a to <2 x i32> 383 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 384 ret <4 x i32> %ext 385} 386 387define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { 388; SSE-LABEL: fptoui_2f64_to_2i32: 389; SSE: # %bb.0: 390; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 391; SSE-NEXT: movapd %xmm1, %xmm2 392; SSE-NEXT: psrad $31, %xmm2 393; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 394; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 395; SSE-NEXT: andpd %xmm2, %xmm0 396; SSE-NEXT: orpd %xmm1, %xmm0 397; SSE-NEXT: retq 398; 399; VEX-LABEL: fptoui_2f64_to_2i32: 400; VEX: # %bb.0: 401; VEX-NEXT: vcvttpd2dq %xmm0, %xmm1 402; VEX-NEXT: vpsrad $31, %xmm1, %xmm2 403; VEX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 404; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 405; VEX-NEXT: vandpd %xmm2, %xmm0, %xmm0 406; VEX-NEXT: vorpd %xmm0, %xmm1, %xmm0 407; VEX-NEXT: retq 408; 409; AVX512F-LABEL: fptoui_2f64_to_2i32: 410; AVX512F: # %bb.0: 411; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 412; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 413; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 414; AVX512F-NEXT: vzeroupper 415; AVX512F-NEXT: retq 416; 417; AVX512VL-LABEL: fptoui_2f64_to_2i32: 418; AVX512VL: # %bb.0: 419; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm0 420; AVX512VL-NEXT: retq 421; 422; AVX512DQ-LABEL: fptoui_2f64_to_2i32: 423; AVX512DQ: # %bb.0: 424; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 425; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 426; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 427; AVX512DQ-NEXT: vzeroupper 428; AVX512DQ-NEXT: retq 429; 430; AVX512VLDQ-LABEL: fptoui_2f64_to_2i32: 431; AVX512VLDQ: # %bb.0: 432; AVX512VLDQ-NEXT: vcvttpd2udq %xmm0, %xmm0 433; AVX512VLDQ-NEXT: retq 434 %cvt = fptoui <2 x double> %a to <2 x i32> 435 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 436 ret <4 x i32> %ext 437} 438 439define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { 440; SSE-LABEL: fptoui_4f64_to_2i32: 441; SSE: # %bb.0: 442; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 443; SSE-NEXT: movapd %xmm1, %xmm2 444; SSE-NEXT: psrad $31, %xmm2 445; SSE-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 446; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 447; SSE-NEXT: andpd %xmm2, %xmm0 448; SSE-NEXT: orpd %xmm1, %xmm0 449; SSE-NEXT: retq 450; 451; AVX1-LABEL: fptoui_4f64_to_2i32: 452; AVX1: # %bb.0: 453; AVX1-NEXT: vmovapd %xmm0, %xmm0 454; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1 455; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 456; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 457; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 458; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 459; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 460; AVX1-NEXT: vzeroupper 461; AVX1-NEXT: retq 462; 463; AVX2-LABEL: fptoui_4f64_to_2i32: 464; AVX2: # %bb.0: 465; AVX2-NEXT: vmovapd %xmm0, %xmm0 466; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 467; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 468; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 469; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 470; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 471; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 472; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 473; AVX2-NEXT: vzeroupper 474; AVX2-NEXT: retq 475; 476; AVX512F-LABEL: fptoui_4f64_to_2i32: 477; AVX512F: # %bb.0: 478; AVX512F-NEXT: vmovaps %xmm0, %xmm0 479; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 480; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 481; AVX512F-NEXT: vzeroupper 482; AVX512F-NEXT: retq 483; 484; AVX512VL-LABEL: fptoui_4f64_to_2i32: 485; AVX512VL: # %bb.0: 486; AVX512VL-NEXT: vmovaps %xmm0, %xmm0 487; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 488; AVX512VL-NEXT: vzeroupper 489; AVX512VL-NEXT: retq 490; 491; AVX512DQ-LABEL: fptoui_4f64_to_2i32: 492; AVX512DQ: # %bb.0: 493; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 494; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 495; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 496; AVX512DQ-NEXT: vzeroupper 497; AVX512DQ-NEXT: retq 498; 499; AVX512VLDQ-LABEL: fptoui_4f64_to_2i32: 500; AVX512VLDQ: # %bb.0: 501; AVX512VLDQ-NEXT: vmovaps %xmm0, %xmm0 502; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0 503; AVX512VLDQ-NEXT: vzeroupper 504; AVX512VLDQ-NEXT: retq 505 %ext = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 506 %cvt = fptoui <4 x double> %ext to <4 x i32> 507 ret <4 x i32> %cvt 508} 509 510define <4 x i64> @fptoui_4f64_to_4i64(<4 x double> %a) { 511; SSE-LABEL: fptoui_4f64_to_4i64: 512; SSE: # %bb.0: 513; SSE-NEXT: movapd %xmm0, %xmm2 514; SSE-NEXT: movsd {{.*#+}} xmm3 = [9.2233720368547758E+18,0.0E+0] 515; SSE-NEXT: subsd %xmm3, %xmm0 516; SSE-NEXT: cvttsd2si %xmm0, %rax 517; SSE-NEXT: cvttsd2si %xmm2, %rcx 518; SSE-NEXT: movq %rcx, %rdx 519; SSE-NEXT: sarq $63, %rdx 520; SSE-NEXT: andq %rax, %rdx 521; SSE-NEXT: orq %rcx, %rdx 522; SSE-NEXT: movq %rdx, %xmm0 523; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 524; SSE-NEXT: cvttsd2si %xmm2, %rax 525; SSE-NEXT: subsd %xmm3, %xmm2 526; SSE-NEXT: cvttsd2si %xmm2, %rcx 527; SSE-NEXT: movq %rax, %rdx 528; SSE-NEXT: sarq $63, %rdx 529; SSE-NEXT: andq %rcx, %rdx 530; SSE-NEXT: orq %rax, %rdx 531; SSE-NEXT: movq %rdx, %xmm2 532; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 533; SSE-NEXT: movapd %xmm1, %xmm2 534; SSE-NEXT: subsd %xmm3, %xmm2 535; SSE-NEXT: cvttsd2si %xmm2, %rax 536; SSE-NEXT: cvttsd2si %xmm1, %rcx 537; SSE-NEXT: movq %rcx, %rdx 538; SSE-NEXT: sarq $63, %rdx 539; SSE-NEXT: andq %rax, %rdx 540; SSE-NEXT: orq %rcx, %rdx 541; SSE-NEXT: movq %rdx, %xmm2 542; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 543; SSE-NEXT: cvttsd2si %xmm1, %rax 544; SSE-NEXT: subsd %xmm3, %xmm1 545; SSE-NEXT: cvttsd2si %xmm1, %rcx 546; SSE-NEXT: movq %rax, %rdx 547; SSE-NEXT: sarq $63, %rdx 548; SSE-NEXT: andq %rcx, %rdx 549; SSE-NEXT: orq %rax, %rdx 550; SSE-NEXT: movq %rdx, %xmm1 551; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 552; SSE-NEXT: movdqa %xmm2, %xmm1 553; SSE-NEXT: retq 554; 555; AVX1-LABEL: fptoui_4f64_to_4i64: 556; AVX1: # %bb.0: 557; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 558; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 559; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm3 560; AVX1-NEXT: vcvttsd2si %xmm3, %rax 561; AVX1-NEXT: vcvttsd2si %xmm2, %rcx 562; AVX1-NEXT: movq %rcx, %rdx 563; AVX1-NEXT: sarq $63, %rdx 564; AVX1-NEXT: andq %rax, %rdx 565; AVX1-NEXT: orq %rcx, %rdx 566; AVX1-NEXT: vmovq %rdx, %xmm3 567; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 568; AVX1-NEXT: vsubsd %xmm1, %xmm2, %xmm4 569; AVX1-NEXT: vcvttsd2si %xmm4, %rax 570; AVX1-NEXT: vcvttsd2si %xmm2, %rcx 571; AVX1-NEXT: movq %rcx, %rdx 572; AVX1-NEXT: sarq $63, %rdx 573; AVX1-NEXT: andq %rax, %rdx 574; AVX1-NEXT: orq %rcx, %rdx 575; AVX1-NEXT: vmovq %rdx, %xmm2 576; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 577; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm3 578; AVX1-NEXT: vcvttsd2si %xmm3, %rax 579; AVX1-NEXT: vcvttsd2si %xmm0, %rcx 580; AVX1-NEXT: movq %rcx, %rdx 581; AVX1-NEXT: sarq $63, %rdx 582; AVX1-NEXT: andq %rax, %rdx 583; AVX1-NEXT: orq %rcx, %rdx 584; AVX1-NEXT: vmovq %rdx, %xmm3 585; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 586; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm1 587; AVX1-NEXT: vcvttsd2si %xmm1, %rax 588; AVX1-NEXT: vcvttsd2si %xmm0, %rcx 589; AVX1-NEXT: movq %rcx, %rdx 590; AVX1-NEXT: sarq $63, %rdx 591; AVX1-NEXT: andq %rax, %rdx 592; AVX1-NEXT: orq %rcx, %rdx 593; AVX1-NEXT: vmovq %rdx, %xmm0 594; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 595; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 596; AVX1-NEXT: retq 597; 598; AVX2-LABEL: fptoui_4f64_to_4i64: 599; AVX2: # %bb.0: 600; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 601; AVX2-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 602; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm3 603; AVX2-NEXT: vcvttsd2si %xmm3, %rax 604; AVX2-NEXT: vcvttsd2si %xmm2, %rcx 605; AVX2-NEXT: movq %rcx, %rdx 606; AVX2-NEXT: sarq $63, %rdx 607; AVX2-NEXT: andq %rax, %rdx 608; AVX2-NEXT: orq %rcx, %rdx 609; AVX2-NEXT: vmovq %rdx, %xmm3 610; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 611; AVX2-NEXT: vsubsd %xmm1, %xmm2, %xmm4 612; AVX2-NEXT: vcvttsd2si %xmm4, %rax 613; AVX2-NEXT: vcvttsd2si %xmm2, %rcx 614; AVX2-NEXT: movq %rcx, %rdx 615; AVX2-NEXT: sarq $63, %rdx 616; AVX2-NEXT: andq %rax, %rdx 617; AVX2-NEXT: orq %rcx, %rdx 618; AVX2-NEXT: vmovq %rdx, %xmm2 619; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 620; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm3 621; AVX2-NEXT: vcvttsd2si %xmm3, %rax 622; AVX2-NEXT: vcvttsd2si %xmm0, %rcx 623; AVX2-NEXT: movq %rcx, %rdx 624; AVX2-NEXT: sarq $63, %rdx 625; AVX2-NEXT: andq %rax, %rdx 626; AVX2-NEXT: orq %rcx, %rdx 627; AVX2-NEXT: vmovq %rdx, %xmm3 628; AVX2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 629; AVX2-NEXT: vsubsd %xmm1, %xmm0, %xmm1 630; AVX2-NEXT: vcvttsd2si %xmm1, %rax 631; AVX2-NEXT: vcvttsd2si %xmm0, %rcx 632; AVX2-NEXT: movq %rcx, %rdx 633; AVX2-NEXT: sarq $63, %rdx 634; AVX2-NEXT: andq %rax, %rdx 635; AVX2-NEXT: orq %rcx, %rdx 636; AVX2-NEXT: vmovq %rdx, %xmm0 637; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 638; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 639; AVX2-NEXT: retq 640; 641; AVX512F-LABEL: fptoui_4f64_to_4i64: 642; AVX512F: # %bb.0: 643; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 644; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax 645; AVX512F-NEXT: vmovq %rax, %xmm2 646; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 647; AVX512F-NEXT: vcvttsd2usi %xmm1, %rax 648; AVX512F-NEXT: vmovq %rax, %xmm1 649; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 650; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 651; AVX512F-NEXT: vmovq %rax, %xmm2 652; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 653; AVX512F-NEXT: vcvttsd2usi %xmm0, %rax 654; AVX512F-NEXT: vmovq %rax, %xmm0 655; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 656; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 657; AVX512F-NEXT: retq 658; 659; AVX512VL-LABEL: fptoui_4f64_to_4i64: 660; AVX512VL: # %bb.0: 661; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 662; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax 663; AVX512VL-NEXT: vmovq %rax, %xmm2 664; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 665; AVX512VL-NEXT: vcvttsd2usi %xmm1, %rax 666; AVX512VL-NEXT: vmovq %rax, %xmm1 667; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 668; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 669; AVX512VL-NEXT: vmovq %rax, %xmm2 670; AVX512VL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 671; AVX512VL-NEXT: vcvttsd2usi %xmm0, %rax 672; AVX512VL-NEXT: vmovq %rax, %xmm0 673; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 674; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 675; AVX512VL-NEXT: retq 676; 677; AVX512DQ-LABEL: fptoui_4f64_to_4i64: 678; AVX512DQ: # %bb.0: 679; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 680; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 681; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 682; AVX512DQ-NEXT: retq 683; 684; AVX512VLDQ-LABEL: fptoui_4f64_to_4i64: 685; AVX512VLDQ: # %bb.0: 686; AVX512VLDQ-NEXT: vcvttpd2uqq %ymm0, %ymm0 687; AVX512VLDQ-NEXT: retq 688 %cvt = fptoui <4 x double> %a to <4 x i64> 689 ret <4 x i64> %cvt 690} 691 692define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { 693; SSE-LABEL: fptoui_4f64_to_4i32: 694; SSE: # %bb.0: 695; SSE-NEXT: movapd {{.*#+}} xmm2 = [2.147483648E+9,2.147483648E+9] 696; SSE-NEXT: cvttpd2dq %xmm1, %xmm3 697; SSE-NEXT: subpd %xmm2, %xmm1 698; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 699; SSE-NEXT: movapd %xmm3, %xmm4 700; SSE-NEXT: psrad $31, %xmm4 701; SSE-NEXT: pand %xmm1, %xmm4 702; SSE-NEXT: por %xmm3, %xmm4 703; SSE-NEXT: cvttpd2dq %xmm0, %xmm1 704; SSE-NEXT: subpd %xmm2, %xmm0 705; SSE-NEXT: cvttpd2dq %xmm0, %xmm2 706; SSE-NEXT: movapd %xmm1, %xmm0 707; SSE-NEXT: psrad $31, %xmm0 708; SSE-NEXT: pand %xmm2, %xmm0 709; SSE-NEXT: por %xmm1, %xmm0 710; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] 711; SSE-NEXT: retq 712; 713; AVX1-LABEL: fptoui_4f64_to_4i32: 714; AVX1: # %bb.0: 715; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm1 716; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 717; AVX1-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 718; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 719; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 720; AVX1-NEXT: vorpd %xmm0, %xmm1, %xmm0 721; AVX1-NEXT: vzeroupper 722; AVX1-NEXT: retq 723; 724; AVX2-LABEL: fptoui_4f64_to_4i32: 725; AVX2: # %bb.0: 726; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 727; AVX2-NEXT: vsubpd %ymm1, %ymm0, %ymm1 728; AVX2-NEXT: vcvttpd2dq %ymm1, %xmm1 729; AVX2-NEXT: vcvttpd2dq %ymm0, %xmm0 730; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 731; AVX2-NEXT: vandpd %xmm2, %xmm1, %xmm1 732; AVX2-NEXT: vorpd %xmm1, %xmm0, %xmm0 733; AVX2-NEXT: vzeroupper 734; AVX2-NEXT: retq 735; 736; AVX512F-LABEL: fptoui_4f64_to_4i32: 737; AVX512F: # %bb.0: 738; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 739; AVX512F-NEXT: vcvttpd2udq %zmm0, %ymm0 740; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 741; AVX512F-NEXT: vzeroupper 742; AVX512F-NEXT: retq 743; 744; AVX512VL-LABEL: fptoui_4f64_to_4i32: 745; AVX512VL: # %bb.0: 746; AVX512VL-NEXT: vcvttpd2udq %ymm0, %xmm0 747; AVX512VL-NEXT: vzeroupper 748; AVX512VL-NEXT: retq 749; 750; AVX512DQ-LABEL: fptoui_4f64_to_4i32: 751; AVX512DQ: # %bb.0: 752; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 753; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0 754; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 755; AVX512DQ-NEXT: vzeroupper 756; AVX512DQ-NEXT: retq 757; 758; AVX512VLDQ-LABEL: fptoui_4f64_to_4i32: 759; AVX512VLDQ: # %bb.0: 760; AVX512VLDQ-NEXT: vcvttpd2udq %ymm0, %xmm0 761; AVX512VLDQ-NEXT: vzeroupper 762; AVX512VLDQ-NEXT: retq 763 %cvt = fptoui <4 x double> %a to <4 x i32> 764 ret <4 x i32> %cvt 765} 766 767; 768; Float to Signed Integer 769; 770 771define <2 x i32> @fptosi_2f32_to_2i32(<2 x float> %a) { 772; SSE-LABEL: fptosi_2f32_to_2i32: 773; SSE: # %bb.0: 774; SSE-NEXT: cvttps2dq %xmm0, %xmm0 775; SSE-NEXT: retq 776; 777; AVX-LABEL: fptosi_2f32_to_2i32: 778; AVX: # %bb.0: 779; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 780; AVX-NEXT: retq 781 %cvt = fptosi <2 x float> %a to <2 x i32> 782 ret <2 x i32> %cvt 783} 784 785define <4 x i32> @fptosi_4f32_to_4i32(<4 x float> %a) { 786; SSE-LABEL: fptosi_4f32_to_4i32: 787; SSE: # %bb.0: 788; SSE-NEXT: cvttps2dq %xmm0, %xmm0 789; SSE-NEXT: retq 790; 791; AVX-LABEL: fptosi_4f32_to_4i32: 792; AVX: # %bb.0: 793; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 794; AVX-NEXT: retq 795 %cvt = fptosi <4 x float> %a to <4 x i32> 796 ret <4 x i32> %cvt 797} 798 799define <2 x i64> @fptosi_2f32_to_2i64(<4 x float> %a) { 800; SSE-LABEL: fptosi_2f32_to_2i64: 801; SSE: # %bb.0: 802; SSE-NEXT: cvttss2si %xmm0, %rax 803; SSE-NEXT: movq %rax, %xmm1 804; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 805; SSE-NEXT: cvttss2si %xmm0, %rax 806; SSE-NEXT: movq %rax, %xmm0 807; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 808; SSE-NEXT: movdqa %xmm1, %xmm0 809; SSE-NEXT: retq 810; 811; VEX-LABEL: fptosi_2f32_to_2i64: 812; VEX: # %bb.0: 813; VEX-NEXT: vcvttss2si %xmm0, %rax 814; VEX-NEXT: vmovq %rax, %xmm1 815; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 816; VEX-NEXT: vcvttss2si %xmm0, %rax 817; VEX-NEXT: vmovq %rax, %xmm0 818; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 819; VEX-NEXT: retq 820; 821; AVX512F-LABEL: fptosi_2f32_to_2i64: 822; AVX512F: # %bb.0: 823; AVX512F-NEXT: vcvttss2si %xmm0, %rax 824; AVX512F-NEXT: vmovq %rax, %xmm1 825; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 826; AVX512F-NEXT: vcvttss2si %xmm0, %rax 827; AVX512F-NEXT: vmovq %rax, %xmm0 828; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 829; AVX512F-NEXT: retq 830; 831; AVX512VL-LABEL: fptosi_2f32_to_2i64: 832; AVX512VL: # %bb.0: 833; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 834; AVX512VL-NEXT: vmovq %rax, %xmm1 835; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 836; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 837; AVX512VL-NEXT: vmovq %rax, %xmm0 838; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 839; AVX512VL-NEXT: retq 840; 841; AVX512DQ-LABEL: fptosi_2f32_to_2i64: 842; AVX512DQ: # %bb.0: 843; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 844; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 845; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 846; AVX512DQ-NEXT: vzeroupper 847; AVX512DQ-NEXT: retq 848; 849; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64: 850; AVX512VLDQ: # %bb.0: 851; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %xmm0 852; AVX512VLDQ-NEXT: retq 853 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 854 %cvt = fptosi <2 x float> %shuf to <2 x i64> 855 ret <2 x i64> %cvt 856} 857 858define <2 x i64> @fptosi_4f32_to_2i64(<4 x float> %a) { 859; SSE-LABEL: fptosi_4f32_to_2i64: 860; SSE: # %bb.0: 861; SSE-NEXT: cvttss2si %xmm0, %rax 862; SSE-NEXT: movq %rax, %xmm1 863; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 864; SSE-NEXT: cvttss2si %xmm0, %rax 865; SSE-NEXT: movq %rax, %xmm0 866; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 867; SSE-NEXT: movdqa %xmm1, %xmm0 868; SSE-NEXT: retq 869; 870; VEX-LABEL: fptosi_4f32_to_2i64: 871; VEX: # %bb.0: 872; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 873; VEX-NEXT: vcvttss2si %xmm1, %rax 874; VEX-NEXT: vcvttss2si %xmm0, %rcx 875; VEX-NEXT: vmovq %rcx, %xmm0 876; VEX-NEXT: vmovq %rax, %xmm1 877; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 878; VEX-NEXT: retq 879; 880; AVX512F-LABEL: fptosi_4f32_to_2i64: 881; AVX512F: # %bb.0: 882; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 883; AVX512F-NEXT: vcvttss2si %xmm1, %rax 884; AVX512F-NEXT: vcvttss2si %xmm0, %rcx 885; AVX512F-NEXT: vmovq %rcx, %xmm0 886; AVX512F-NEXT: vmovq %rax, %xmm1 887; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 888; AVX512F-NEXT: retq 889; 890; AVX512VL-LABEL: fptosi_4f32_to_2i64: 891; AVX512VL: # %bb.0: 892; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 893; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 894; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx 895; AVX512VL-NEXT: vmovq %rcx, %xmm0 896; AVX512VL-NEXT: vmovq %rax, %xmm1 897; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 898; AVX512VL-NEXT: retq 899; 900; AVX512DQ-LABEL: fptosi_4f32_to_2i64: 901; AVX512DQ: # %bb.0: 902; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 903; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 904; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 905; AVX512DQ-NEXT: vzeroupper 906; AVX512DQ-NEXT: retq 907; 908; AVX512VLDQ-LABEL: fptosi_4f32_to_2i64: 909; AVX512VLDQ: # %bb.0: 910; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 911; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 912; AVX512VLDQ-NEXT: vzeroupper 913; AVX512VLDQ-NEXT: retq 914 %cvt = fptosi <4 x float> %a to <4 x i64> 915 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 916 ret <2 x i64> %shuf 917} 918 919define <8 x i32> @fptosi_8f32_to_8i32(<8 x float> %a) { 920; SSE-LABEL: fptosi_8f32_to_8i32: 921; SSE: # %bb.0: 922; SSE-NEXT: cvttps2dq %xmm0, %xmm0 923; SSE-NEXT: cvttps2dq %xmm1, %xmm1 924; SSE-NEXT: retq 925; 926; AVX-LABEL: fptosi_8f32_to_8i32: 927; AVX: # %bb.0: 928; AVX-NEXT: vcvttps2dq %ymm0, %ymm0 929; AVX-NEXT: retq 930 %cvt = fptosi <8 x float> %a to <8 x i32> 931 ret <8 x i32> %cvt 932} 933 934define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { 935; SSE-LABEL: fptosi_4f32_to_4i64: 936; SSE: # %bb.0: 937; SSE-NEXT: cvttss2si %xmm0, %rax 938; SSE-NEXT: movq %rax, %xmm2 939; SSE-NEXT: movaps %xmm0, %xmm1 940; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 941; SSE-NEXT: cvttss2si %xmm1, %rax 942; SSE-NEXT: movq %rax, %xmm1 943; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 944; SSE-NEXT: movaps %xmm0, %xmm1 945; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 946; SSE-NEXT: cvttss2si %xmm1, %rax 947; SSE-NEXT: movq %rax, %xmm3 948; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 949; SSE-NEXT: cvttss2si %xmm0, %rax 950; SSE-NEXT: movq %rax, %xmm1 951; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 952; SSE-NEXT: movdqa %xmm2, %xmm0 953; SSE-NEXT: retq 954; 955; AVX1-LABEL: fptosi_4f32_to_4i64: 956; AVX1: # %bb.0: 957; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 958; AVX1-NEXT: vcvttss2si %xmm1, %rax 959; AVX1-NEXT: vmovq %rax, %xmm1 960; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 961; AVX1-NEXT: vcvttss2si %xmm2, %rax 962; AVX1-NEXT: vmovq %rax, %xmm2 963; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 964; AVX1-NEXT: vcvttss2si %xmm0, %rax 965; AVX1-NEXT: vmovq %rax, %xmm2 966; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 967; AVX1-NEXT: vcvttss2si %xmm0, %rax 968; AVX1-NEXT: vmovq %rax, %xmm0 969; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 970; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 971; AVX1-NEXT: retq 972; 973; AVX2-LABEL: fptosi_4f32_to_4i64: 974; AVX2: # %bb.0: 975; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 976; AVX2-NEXT: vcvttss2si %xmm1, %rax 977; AVX2-NEXT: vmovq %rax, %xmm1 978; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 979; AVX2-NEXT: vcvttss2si %xmm2, %rax 980; AVX2-NEXT: vmovq %rax, %xmm2 981; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 982; AVX2-NEXT: vcvttss2si %xmm0, %rax 983; AVX2-NEXT: vmovq %rax, %xmm2 984; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 985; AVX2-NEXT: vcvttss2si %xmm0, %rax 986; AVX2-NEXT: vmovq %rax, %xmm0 987; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 988; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 989; AVX2-NEXT: retq 990; 991; AVX512F-LABEL: fptosi_4f32_to_4i64: 992; AVX512F: # %bb.0: 993; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 994; AVX512F-NEXT: vcvttss2si %xmm1, %rax 995; AVX512F-NEXT: vmovq %rax, %xmm1 996; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 997; AVX512F-NEXT: vcvttss2si %xmm2, %rax 998; AVX512F-NEXT: vmovq %rax, %xmm2 999; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1000; AVX512F-NEXT: vcvttss2si %xmm0, %rax 1001; AVX512F-NEXT: vmovq %rax, %xmm2 1002; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1003; AVX512F-NEXT: vcvttss2si %xmm0, %rax 1004; AVX512F-NEXT: vmovq %rax, %xmm0 1005; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1006; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1007; AVX512F-NEXT: retq 1008; 1009; AVX512VL-LABEL: fptosi_4f32_to_4i64: 1010; AVX512VL: # %bb.0: 1011; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1012; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 1013; AVX512VL-NEXT: vmovq %rax, %xmm1 1014; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1015; AVX512VL-NEXT: vcvttss2si %xmm2, %rax 1016; AVX512VL-NEXT: vmovq %rax, %xmm2 1017; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1018; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 1019; AVX512VL-NEXT: vmovq %rax, %xmm2 1020; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1021; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 1022; AVX512VL-NEXT: vmovq %rax, %xmm0 1023; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1024; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1025; AVX512VL-NEXT: retq 1026; 1027; AVX512DQ-LABEL: fptosi_4f32_to_4i64: 1028; AVX512DQ: # %bb.0: 1029; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 1030; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1031; AVX512DQ-NEXT: retq 1032; 1033; AVX512VLDQ-LABEL: fptosi_4f32_to_4i64: 1034; AVX512VLDQ: # %bb.0: 1035; AVX512VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0 1036; AVX512VLDQ-NEXT: retq 1037 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1038 %cvt = fptosi <4 x float> %shuf to <4 x i64> 1039 ret <4 x i64> %cvt 1040} 1041 1042define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { 1043; SSE-LABEL: fptosi_8f32_to_4i64: 1044; SSE: # %bb.0: 1045; SSE-NEXT: cvttss2si %xmm0, %rax 1046; SSE-NEXT: movq %rax, %xmm2 1047; SSE-NEXT: movaps %xmm0, %xmm1 1048; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 1049; SSE-NEXT: cvttss2si %xmm1, %rax 1050; SSE-NEXT: movq %rax, %xmm1 1051; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 1052; SSE-NEXT: movaps %xmm0, %xmm1 1053; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 1054; SSE-NEXT: cvttss2si %xmm1, %rax 1055; SSE-NEXT: movq %rax, %xmm3 1056; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1057; SSE-NEXT: cvttss2si %xmm0, %rax 1058; SSE-NEXT: movq %rax, %xmm1 1059; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1060; SSE-NEXT: movdqa %xmm2, %xmm0 1061; SSE-NEXT: retq 1062; 1063; AVX1-LABEL: fptosi_8f32_to_4i64: 1064; AVX1: # %bb.0: 1065; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1066; AVX1-NEXT: vcvttss2si %xmm1, %rax 1067; AVX1-NEXT: vmovq %rax, %xmm1 1068; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1069; AVX1-NEXT: vcvttss2si %xmm2, %rax 1070; AVX1-NEXT: vmovq %rax, %xmm2 1071; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1072; AVX1-NEXT: vcvttss2si %xmm0, %rax 1073; AVX1-NEXT: vmovq %rax, %xmm2 1074; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1075; AVX1-NEXT: vcvttss2si %xmm0, %rax 1076; AVX1-NEXT: vmovq %rax, %xmm0 1077; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1078; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1079; AVX1-NEXT: retq 1080; 1081; AVX2-LABEL: fptosi_8f32_to_4i64: 1082; AVX2: # %bb.0: 1083; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1084; AVX2-NEXT: vcvttss2si %xmm1, %rax 1085; AVX2-NEXT: vmovq %rax, %xmm1 1086; AVX2-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1087; AVX2-NEXT: vcvttss2si %xmm2, %rax 1088; AVX2-NEXT: vmovq %rax, %xmm2 1089; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1090; AVX2-NEXT: vcvttss2si %xmm0, %rax 1091; AVX2-NEXT: vmovq %rax, %xmm2 1092; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1093; AVX2-NEXT: vcvttss2si %xmm0, %rax 1094; AVX2-NEXT: vmovq %rax, %xmm0 1095; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1096; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1097; AVX2-NEXT: retq 1098; 1099; AVX512F-LABEL: fptosi_8f32_to_4i64: 1100; AVX512F: # %bb.0: 1101; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1102; AVX512F-NEXT: vcvttss2si %xmm1, %rax 1103; AVX512F-NEXT: vcvttss2si %xmm0, %rcx 1104; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 1105; AVX512F-NEXT: vcvttss2si %xmm1, %rdx 1106; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1107; AVX512F-NEXT: vcvttss2si %xmm0, %rsi 1108; AVX512F-NEXT: vmovq %rsi, %xmm0 1109; AVX512F-NEXT: vmovq %rdx, %xmm1 1110; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1111; AVX512F-NEXT: vmovq %rcx, %xmm1 1112; AVX512F-NEXT: vmovq %rax, %xmm2 1113; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1114; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1115; AVX512F-NEXT: retq 1116; 1117; AVX512VL-LABEL: fptosi_8f32_to_4i64: 1118; AVX512VL: # %bb.0: 1119; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1120; AVX512VL-NEXT: vcvttss2si %xmm1, %rax 1121; AVX512VL-NEXT: vcvttss2si %xmm0, %rcx 1122; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 1123; AVX512VL-NEXT: vcvttss2si %xmm1, %rdx 1124; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1125; AVX512VL-NEXT: vcvttss2si %xmm0, %rsi 1126; AVX512VL-NEXT: vmovq %rsi, %xmm0 1127; AVX512VL-NEXT: vmovq %rdx, %xmm1 1128; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1129; AVX512VL-NEXT: vmovq %rcx, %xmm1 1130; AVX512VL-NEXT: vmovq %rax, %xmm2 1131; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1132; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1133; AVX512VL-NEXT: retq 1134; 1135; AVX512DQ-LABEL: fptosi_8f32_to_4i64: 1136; AVX512DQ: # %bb.0: 1137; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 1138; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1139; AVX512DQ-NEXT: retq 1140; 1141; AVX512VLDQ-LABEL: fptosi_8f32_to_4i64: 1142; AVX512VLDQ: # %bb.0: 1143; AVX512VLDQ-NEXT: vcvttps2qq %ymm0, %zmm0 1144; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1145; AVX512VLDQ-NEXT: retq 1146 %cvt = fptosi <8 x float> %a to <8 x i64> 1147 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1148 ret <4 x i64> %shuf 1149} 1150 1151; 1152; Float to Unsigned Integer 1153; 1154 1155define <2 x i32> @fptoui_2f32_to_2i32(<2 x float> %a) { 1156; SSE-LABEL: fptoui_2f32_to_2i32: 1157; SSE: # %bb.0: 1158; SSE-NEXT: cvttps2dq %xmm0, %xmm1 1159; SSE-NEXT: movdqa %xmm1, %xmm2 1160; SSE-NEXT: psrad $31, %xmm2 1161; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1162; SSE-NEXT: cvttps2dq %xmm0, %xmm0 1163; SSE-NEXT: pand %xmm2, %xmm0 1164; SSE-NEXT: por %xmm1, %xmm0 1165; SSE-NEXT: retq 1166; 1167; AVX1-LABEL: fptoui_2f32_to_2i32: 1168; AVX1: # %bb.0: 1169; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1 1170; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 1171; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1172; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 1173; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1174; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 1175; AVX1-NEXT: retq 1176; 1177; AVX2-LABEL: fptoui_2f32_to_2i32: 1178; AVX2: # %bb.0: 1179; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1180; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1 1181; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1 1182; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 1183; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 1184; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 1185; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1186; AVX2-NEXT: retq 1187; 1188; AVX512F-LABEL: fptoui_2f32_to_2i32: 1189; AVX512F: # %bb.0: 1190; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1191; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1192; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1193; AVX512F-NEXT: vzeroupper 1194; AVX512F-NEXT: retq 1195; 1196; AVX512VL-LABEL: fptoui_2f32_to_2i32: 1197; AVX512VL: # %bb.0: 1198; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 1199; AVX512VL-NEXT: retq 1200; 1201; AVX512DQ-LABEL: fptoui_2f32_to_2i32: 1202; AVX512DQ: # %bb.0: 1203; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1204; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1205; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1206; AVX512DQ-NEXT: vzeroupper 1207; AVX512DQ-NEXT: retq 1208; 1209; AVX512VLDQ-LABEL: fptoui_2f32_to_2i32: 1210; AVX512VLDQ: # %bb.0: 1211; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 1212; AVX512VLDQ-NEXT: retq 1213 %cvt = fptoui <2 x float> %a to <2 x i32> 1214 ret <2 x i32> %cvt 1215} 1216 1217define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { 1218; SSE-LABEL: fptoui_4f32_to_4i32: 1219; SSE: # %bb.0: 1220; SSE-NEXT: cvttps2dq %xmm0, %xmm1 1221; SSE-NEXT: movdqa %xmm1, %xmm2 1222; SSE-NEXT: psrad $31, %xmm2 1223; SSE-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1224; SSE-NEXT: cvttps2dq %xmm0, %xmm0 1225; SSE-NEXT: pand %xmm2, %xmm0 1226; SSE-NEXT: por %xmm1, %xmm0 1227; SSE-NEXT: retq 1228; 1229; AVX1-LABEL: fptoui_4f32_to_4i32: 1230; AVX1: # %bb.0: 1231; AVX1-NEXT: vcvttps2dq %xmm0, %xmm1 1232; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2 1233; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1234; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 1235; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 1236; AVX1-NEXT: vpor %xmm0, %xmm1, %xmm0 1237; AVX1-NEXT: retq 1238; 1239; AVX2-LABEL: fptoui_4f32_to_4i32: 1240; AVX2: # %bb.0: 1241; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1242; AVX2-NEXT: vsubps %xmm1, %xmm0, %xmm1 1243; AVX2-NEXT: vcvttps2dq %xmm1, %xmm1 1244; AVX2-NEXT: vcvttps2dq %xmm0, %xmm0 1245; AVX2-NEXT: vpsrad $31, %xmm0, %xmm2 1246; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 1247; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 1248; AVX2-NEXT: retq 1249; 1250; AVX512F-LABEL: fptoui_4f32_to_4i32: 1251; AVX512F: # %bb.0: 1252; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1253; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1254; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1255; AVX512F-NEXT: vzeroupper 1256; AVX512F-NEXT: retq 1257; 1258; AVX512VL-LABEL: fptoui_4f32_to_4i32: 1259; AVX512VL: # %bb.0: 1260; AVX512VL-NEXT: vcvttps2udq %xmm0, %xmm0 1261; AVX512VL-NEXT: retq 1262; 1263; AVX512DQ-LABEL: fptoui_4f32_to_4i32: 1264; AVX512DQ: # %bb.0: 1265; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 1266; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1267; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1268; AVX512DQ-NEXT: vzeroupper 1269; AVX512DQ-NEXT: retq 1270; 1271; AVX512VLDQ-LABEL: fptoui_4f32_to_4i32: 1272; AVX512VLDQ: # %bb.0: 1273; AVX512VLDQ-NEXT: vcvttps2udq %xmm0, %xmm0 1274; AVX512VLDQ-NEXT: retq 1275 %cvt = fptoui <4 x float> %a to <4 x i32> 1276 ret <4 x i32> %cvt 1277} 1278 1279define <2 x i64> @fptoui_2f32_to_2i64(<4 x float> %a) { 1280; SSE-LABEL: fptoui_2f32_to_2i64: 1281; SSE: # %bb.0: 1282; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1283; SSE-NEXT: movaps %xmm0, %xmm1 1284; SSE-NEXT: subss %xmm2, %xmm1 1285; SSE-NEXT: cvttss2si %xmm1, %rax 1286; SSE-NEXT: cvttss2si %xmm0, %rcx 1287; SSE-NEXT: movq %rcx, %rdx 1288; SSE-NEXT: sarq $63, %rdx 1289; SSE-NEXT: andq %rax, %rdx 1290; SSE-NEXT: orq %rcx, %rdx 1291; SSE-NEXT: movq %rdx, %xmm1 1292; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1293; SSE-NEXT: cvttss2si %xmm0, %rax 1294; SSE-NEXT: subss %xmm2, %xmm0 1295; SSE-NEXT: cvttss2si %xmm0, %rcx 1296; SSE-NEXT: movq %rax, %rdx 1297; SSE-NEXT: sarq $63, %rdx 1298; SSE-NEXT: andq %rcx, %rdx 1299; SSE-NEXT: orq %rax, %rdx 1300; SSE-NEXT: movq %rdx, %xmm0 1301; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1302; SSE-NEXT: movdqa %xmm1, %xmm0 1303; SSE-NEXT: retq 1304; 1305; VEX-LABEL: fptoui_2f32_to_2i64: 1306; VEX: # %bb.0: 1307; VEX-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1308; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 1309; VEX-NEXT: vcvttss2si %xmm2, %rax 1310; VEX-NEXT: vcvttss2si %xmm0, %rcx 1311; VEX-NEXT: movq %rcx, %rdx 1312; VEX-NEXT: sarq $63, %rdx 1313; VEX-NEXT: andq %rax, %rdx 1314; VEX-NEXT: orq %rcx, %rdx 1315; VEX-NEXT: vmovq %rdx, %xmm2 1316; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1317; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1 1318; VEX-NEXT: vcvttss2si %xmm1, %rax 1319; VEX-NEXT: vcvttss2si %xmm0, %rcx 1320; VEX-NEXT: movq %rcx, %rdx 1321; VEX-NEXT: sarq $63, %rdx 1322; VEX-NEXT: andq %rax, %rdx 1323; VEX-NEXT: orq %rcx, %rdx 1324; VEX-NEXT: vmovq %rdx, %xmm0 1325; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1326; VEX-NEXT: retq 1327; 1328; AVX512F-LABEL: fptoui_2f32_to_2i64: 1329; AVX512F: # %bb.0: 1330; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1331; AVX512F-NEXT: vmovq %rax, %xmm1 1332; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1333; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1334; AVX512F-NEXT: vmovq %rax, %xmm0 1335; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1336; AVX512F-NEXT: retq 1337; 1338; AVX512VL-LABEL: fptoui_2f32_to_2i64: 1339; AVX512VL: # %bb.0: 1340; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1341; AVX512VL-NEXT: vmovq %rax, %xmm1 1342; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1343; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1344; AVX512VL-NEXT: vmovq %rax, %xmm0 1345; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1346; AVX512VL-NEXT: retq 1347; 1348; AVX512DQ-LABEL: fptoui_2f32_to_2i64: 1349; AVX512DQ: # %bb.0: 1350; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1351; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1352; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1353; AVX512DQ-NEXT: vzeroupper 1354; AVX512DQ-NEXT: retq 1355; 1356; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64: 1357; AVX512VLDQ: # %bb.0: 1358; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %xmm0 1359; AVX512VLDQ-NEXT: retq 1360 %shuf = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 1361 %cvt = fptoui <2 x float> %shuf to <2 x i64> 1362 ret <2 x i64> %cvt 1363} 1364 1365define <2 x i64> @fptoui_4f32_to_2i64(<4 x float> %a) { 1366; SSE-LABEL: fptoui_4f32_to_2i64: 1367; SSE: # %bb.0: 1368; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1369; SSE-NEXT: movaps %xmm0, %xmm1 1370; SSE-NEXT: subss %xmm2, %xmm1 1371; SSE-NEXT: cvttss2si %xmm1, %rax 1372; SSE-NEXT: cvttss2si %xmm0, %rcx 1373; SSE-NEXT: movq %rcx, %rdx 1374; SSE-NEXT: sarq $63, %rdx 1375; SSE-NEXT: andq %rax, %rdx 1376; SSE-NEXT: orq %rcx, %rdx 1377; SSE-NEXT: movq %rdx, %xmm1 1378; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 1379; SSE-NEXT: cvttss2si %xmm0, %rax 1380; SSE-NEXT: subss %xmm2, %xmm0 1381; SSE-NEXT: cvttss2si %xmm0, %rcx 1382; SSE-NEXT: movq %rax, %rdx 1383; SSE-NEXT: sarq $63, %rdx 1384; SSE-NEXT: andq %rcx, %rdx 1385; SSE-NEXT: orq %rax, %rdx 1386; SSE-NEXT: movq %rdx, %xmm0 1387; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 1388; SSE-NEXT: movdqa %xmm1, %xmm0 1389; SSE-NEXT: retq 1390; 1391; VEX-LABEL: fptoui_4f32_to_2i64: 1392; VEX: # %bb.0: 1393; VEX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1394; VEX-NEXT: vmovss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1395; VEX-NEXT: vsubss %xmm2, %xmm1, %xmm3 1396; VEX-NEXT: vcvttss2si %xmm3, %rax 1397; VEX-NEXT: vcvttss2si %xmm1, %rcx 1398; VEX-NEXT: movq %rcx, %rdx 1399; VEX-NEXT: sarq $63, %rdx 1400; VEX-NEXT: andq %rax, %rdx 1401; VEX-NEXT: orq %rcx, %rdx 1402; VEX-NEXT: vsubss %xmm2, %xmm0, %xmm1 1403; VEX-NEXT: vcvttss2si %xmm1, %rax 1404; VEX-NEXT: vcvttss2si %xmm0, %rcx 1405; VEX-NEXT: movq %rcx, %rsi 1406; VEX-NEXT: sarq $63, %rsi 1407; VEX-NEXT: andq %rax, %rsi 1408; VEX-NEXT: orq %rcx, %rsi 1409; VEX-NEXT: vmovq %rsi, %xmm0 1410; VEX-NEXT: vmovq %rdx, %xmm1 1411; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1412; VEX-NEXT: retq 1413; 1414; AVX512F-LABEL: fptoui_4f32_to_2i64: 1415; AVX512F: # %bb.0: 1416; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1417; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1418; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx 1419; AVX512F-NEXT: vmovq %rcx, %xmm0 1420; AVX512F-NEXT: vmovq %rax, %xmm1 1421; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1422; AVX512F-NEXT: retq 1423; 1424; AVX512VL-LABEL: fptoui_4f32_to_2i64: 1425; AVX512VL: # %bb.0: 1426; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1427; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1428; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx 1429; AVX512VL-NEXT: vmovq %rcx, %xmm0 1430; AVX512VL-NEXT: vmovq %rax, %xmm1 1431; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1432; AVX512VL-NEXT: retq 1433; 1434; AVX512DQ-LABEL: fptoui_4f32_to_2i64: 1435; AVX512DQ: # %bb.0: 1436; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 1437; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1438; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1439; AVX512DQ-NEXT: vzeroupper 1440; AVX512DQ-NEXT: retq 1441; 1442; AVX512VLDQ-LABEL: fptoui_4f32_to_2i64: 1443; AVX512VLDQ: # %bb.0: 1444; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 1445; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1446; AVX512VLDQ-NEXT: vzeroupper 1447; AVX512VLDQ-NEXT: retq 1448 %cvt = fptoui <4 x float> %a to <4 x i64> 1449 %shuf = shufflevector <4 x i64> %cvt, <4 x i64> undef, <2 x i32> <i32 0, i32 1> 1450 ret <2 x i64> %shuf 1451} 1452 1453define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { 1454; SSE-LABEL: fptoui_8f32_to_8i32: 1455; SSE: # %bb.0: 1456; SSE-NEXT: movaps {{.*#+}} xmm2 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1457; SSE-NEXT: cvttps2dq %xmm0, %xmm3 1458; SSE-NEXT: subps %xmm2, %xmm0 1459; SSE-NEXT: cvttps2dq %xmm0, %xmm4 1460; SSE-NEXT: movdqa %xmm3, %xmm0 1461; SSE-NEXT: psrad $31, %xmm0 1462; SSE-NEXT: pand %xmm4, %xmm0 1463; SSE-NEXT: por %xmm3, %xmm0 1464; SSE-NEXT: cvttps2dq %xmm1, %xmm3 1465; SSE-NEXT: subps %xmm2, %xmm1 1466; SSE-NEXT: cvttps2dq %xmm1, %xmm2 1467; SSE-NEXT: movdqa %xmm3, %xmm1 1468; SSE-NEXT: psrad $31, %xmm1 1469; SSE-NEXT: pand %xmm2, %xmm1 1470; SSE-NEXT: por %xmm3, %xmm1 1471; SSE-NEXT: retq 1472; 1473; AVX1-LABEL: fptoui_8f32_to_8i32: 1474; AVX1: # %bb.0: 1475; AVX1-NEXT: vcvttps2dq %ymm0, %ymm1 1476; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1477; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 1478; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 1479; AVX1-NEXT: vblendvps %ymm1, %ymm0, %ymm1, %ymm0 1480; AVX1-NEXT: retq 1481; 1482; AVX2-LABEL: fptoui_8f32_to_8i32: 1483; AVX2: # %bb.0: 1484; AVX2-NEXT: vbroadcastss {{.*#+}} ymm1 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 1485; AVX2-NEXT: vsubps %ymm1, %ymm0, %ymm1 1486; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 1487; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 1488; AVX2-NEXT: vpsrad $31, %ymm0, %ymm2 1489; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 1490; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 1491; AVX2-NEXT: retq 1492; 1493; AVX512F-LABEL: fptoui_8f32_to_8i32: 1494; AVX512F: # %bb.0: 1495; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1496; AVX512F-NEXT: vcvttps2udq %zmm0, %zmm0 1497; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1498; AVX512F-NEXT: retq 1499; 1500; AVX512VL-LABEL: fptoui_8f32_to_8i32: 1501; AVX512VL: # %bb.0: 1502; AVX512VL-NEXT: vcvttps2udq %ymm0, %ymm0 1503; AVX512VL-NEXT: retq 1504; 1505; AVX512DQ-LABEL: fptoui_8f32_to_8i32: 1506; AVX512DQ: # %bb.0: 1507; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 1508; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0 1509; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1510; AVX512DQ-NEXT: retq 1511; 1512; AVX512VLDQ-LABEL: fptoui_8f32_to_8i32: 1513; AVX512VLDQ: # %bb.0: 1514; AVX512VLDQ-NEXT: vcvttps2udq %ymm0, %ymm0 1515; AVX512VLDQ-NEXT: retq 1516 %cvt = fptoui <8 x float> %a to <8 x i32> 1517 ret <8 x i32> %cvt 1518} 1519 1520define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { 1521; SSE-LABEL: fptoui_4f32_to_4i64: 1522; SSE: # %bb.0: 1523; SSE-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1524; SSE-NEXT: movaps %xmm0, %xmm2 1525; SSE-NEXT: subss %xmm1, %xmm2 1526; SSE-NEXT: cvttss2si %xmm2, %rax 1527; SSE-NEXT: cvttss2si %xmm0, %rcx 1528; SSE-NEXT: movq %rcx, %rdx 1529; SSE-NEXT: sarq $63, %rdx 1530; SSE-NEXT: andq %rax, %rdx 1531; SSE-NEXT: orq %rcx, %rdx 1532; SSE-NEXT: movq %rdx, %xmm2 1533; SSE-NEXT: movaps %xmm0, %xmm3 1534; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] 1535; SSE-NEXT: cvttss2si %xmm3, %rax 1536; SSE-NEXT: subss %xmm1, %xmm3 1537; SSE-NEXT: cvttss2si %xmm3, %rcx 1538; SSE-NEXT: movq %rax, %rdx 1539; SSE-NEXT: sarq $63, %rdx 1540; SSE-NEXT: andq %rcx, %rdx 1541; SSE-NEXT: orq %rax, %rdx 1542; SSE-NEXT: movq %rdx, %xmm3 1543; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1544; SSE-NEXT: movaps %xmm0, %xmm3 1545; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3] 1546; SSE-NEXT: cvttss2si %xmm3, %rax 1547; SSE-NEXT: subss %xmm1, %xmm3 1548; SSE-NEXT: cvttss2si %xmm3, %rcx 1549; SSE-NEXT: movq %rax, %rdx 1550; SSE-NEXT: sarq $63, %rdx 1551; SSE-NEXT: andq %rcx, %rdx 1552; SSE-NEXT: orq %rax, %rdx 1553; SSE-NEXT: movq %rdx, %xmm3 1554; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1555; SSE-NEXT: cvttss2si %xmm0, %rax 1556; SSE-NEXT: subss %xmm1, %xmm0 1557; SSE-NEXT: cvttss2si %xmm0, %rcx 1558; SSE-NEXT: movq %rax, %rdx 1559; SSE-NEXT: sarq $63, %rdx 1560; SSE-NEXT: andq %rcx, %rdx 1561; SSE-NEXT: orq %rax, %rdx 1562; SSE-NEXT: movq %rdx, %xmm1 1563; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1564; SSE-NEXT: movdqa %xmm2, %xmm0 1565; SSE-NEXT: retq 1566; 1567; AVX1-LABEL: fptoui_4f32_to_4i64: 1568; AVX1: # %bb.0: 1569; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1570; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1571; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 1572; AVX1-NEXT: vcvttss2si %xmm3, %rax 1573; AVX1-NEXT: vcvttss2si %xmm2, %rcx 1574; AVX1-NEXT: movq %rcx, %rdx 1575; AVX1-NEXT: sarq $63, %rdx 1576; AVX1-NEXT: andq %rax, %rdx 1577; AVX1-NEXT: orq %rcx, %rdx 1578; AVX1-NEXT: vmovq %rdx, %xmm2 1579; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 1580; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 1581; AVX1-NEXT: vcvttss2si %xmm4, %rax 1582; AVX1-NEXT: vcvttss2si %xmm3, %rcx 1583; AVX1-NEXT: movq %rcx, %rdx 1584; AVX1-NEXT: sarq $63, %rdx 1585; AVX1-NEXT: andq %rax, %rdx 1586; AVX1-NEXT: orq %rcx, %rdx 1587; AVX1-NEXT: vmovq %rdx, %xmm3 1588; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1589; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 1590; AVX1-NEXT: vcvttss2si %xmm3, %rax 1591; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1592; AVX1-NEXT: movq %rcx, %rdx 1593; AVX1-NEXT: sarq $63, %rdx 1594; AVX1-NEXT: andq %rax, %rdx 1595; AVX1-NEXT: orq %rcx, %rdx 1596; AVX1-NEXT: vmovq %rdx, %xmm3 1597; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1598; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1 1599; AVX1-NEXT: vcvttss2si %xmm1, %rax 1600; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1601; AVX1-NEXT: movq %rcx, %rdx 1602; AVX1-NEXT: sarq $63, %rdx 1603; AVX1-NEXT: andq %rax, %rdx 1604; AVX1-NEXT: orq %rcx, %rdx 1605; AVX1-NEXT: vmovq %rdx, %xmm0 1606; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1607; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1608; AVX1-NEXT: retq 1609; 1610; AVX2-LABEL: fptoui_4f32_to_4i64: 1611; AVX2: # %bb.0: 1612; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1613; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1614; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 1615; AVX2-NEXT: vcvttss2si %xmm3, %rax 1616; AVX2-NEXT: vcvttss2si %xmm2, %rcx 1617; AVX2-NEXT: movq %rcx, %rdx 1618; AVX2-NEXT: sarq $63, %rdx 1619; AVX2-NEXT: andq %rax, %rdx 1620; AVX2-NEXT: orq %rcx, %rdx 1621; AVX2-NEXT: vmovq %rdx, %xmm2 1622; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 1623; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 1624; AVX2-NEXT: vcvttss2si %xmm4, %rax 1625; AVX2-NEXT: vcvttss2si %xmm3, %rcx 1626; AVX2-NEXT: movq %rcx, %rdx 1627; AVX2-NEXT: sarq $63, %rdx 1628; AVX2-NEXT: andq %rax, %rdx 1629; AVX2-NEXT: orq %rcx, %rdx 1630; AVX2-NEXT: vmovq %rdx, %xmm3 1631; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1632; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 1633; AVX2-NEXT: vcvttss2si %xmm3, %rax 1634; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1635; AVX2-NEXT: movq %rcx, %rdx 1636; AVX2-NEXT: sarq $63, %rdx 1637; AVX2-NEXT: andq %rax, %rdx 1638; AVX2-NEXT: orq %rcx, %rdx 1639; AVX2-NEXT: vmovq %rdx, %xmm3 1640; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1641; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1 1642; AVX2-NEXT: vcvttss2si %xmm1, %rax 1643; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1644; AVX2-NEXT: movq %rcx, %rdx 1645; AVX2-NEXT: sarq $63, %rdx 1646; AVX2-NEXT: andq %rax, %rdx 1647; AVX2-NEXT: orq %rcx, %rdx 1648; AVX2-NEXT: vmovq %rdx, %xmm0 1649; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1650; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1651; AVX2-NEXT: retq 1652; 1653; AVX512F-LABEL: fptoui_4f32_to_4i64: 1654; AVX512F: # %bb.0: 1655; AVX512F-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1656; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1657; AVX512F-NEXT: vmovq %rax, %xmm1 1658; AVX512F-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1659; AVX512F-NEXT: vcvttss2usi %xmm2, %rax 1660; AVX512F-NEXT: vmovq %rax, %xmm2 1661; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1662; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1663; AVX512F-NEXT: vmovq %rax, %xmm2 1664; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1665; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 1666; AVX512F-NEXT: vmovq %rax, %xmm0 1667; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1668; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1669; AVX512F-NEXT: retq 1670; 1671; AVX512VL-LABEL: fptoui_4f32_to_4i64: 1672; AVX512VL: # %bb.0: 1673; AVX512VL-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 1674; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1675; AVX512VL-NEXT: vmovq %rax, %xmm1 1676; AVX512VL-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 1677; AVX512VL-NEXT: vcvttss2usi %xmm2, %rax 1678; AVX512VL-NEXT: vmovq %rax, %xmm2 1679; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1680; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1681; AVX512VL-NEXT: vmovq %rax, %xmm2 1682; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1683; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 1684; AVX512VL-NEXT: vmovq %rax, %xmm0 1685; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 1686; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 1687; AVX512VL-NEXT: retq 1688; 1689; AVX512DQ-LABEL: fptoui_4f32_to_4i64: 1690; AVX512DQ: # %bb.0: 1691; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1692; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1693; AVX512DQ-NEXT: retq 1694; 1695; AVX512VLDQ-LABEL: fptoui_4f32_to_4i64: 1696; AVX512VLDQ: # %bb.0: 1697; AVX512VLDQ-NEXT: vcvttps2uqq %xmm0, %ymm0 1698; AVX512VLDQ-NEXT: retq 1699 %shuf = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1700 %cvt = fptoui <4 x float> %shuf to <4 x i64> 1701 ret <4 x i64> %cvt 1702} 1703 1704define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { 1705; SSE-LABEL: fptoui_8f32_to_4i64: 1706; SSE: # %bb.0: 1707; SSE-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1708; SSE-NEXT: movaps %xmm0, %xmm2 1709; SSE-NEXT: subss %xmm1, %xmm2 1710; SSE-NEXT: cvttss2si %xmm2, %rax 1711; SSE-NEXT: cvttss2si %xmm0, %rcx 1712; SSE-NEXT: movq %rcx, %rdx 1713; SSE-NEXT: sarq $63, %rdx 1714; SSE-NEXT: andq %rax, %rdx 1715; SSE-NEXT: orq %rcx, %rdx 1716; SSE-NEXT: movq %rdx, %xmm2 1717; SSE-NEXT: movaps %xmm0, %xmm3 1718; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[1,1] 1719; SSE-NEXT: cvttss2si %xmm3, %rax 1720; SSE-NEXT: subss %xmm1, %xmm3 1721; SSE-NEXT: cvttss2si %xmm3, %rcx 1722; SSE-NEXT: movq %rax, %rdx 1723; SSE-NEXT: sarq $63, %rdx 1724; SSE-NEXT: andq %rcx, %rdx 1725; SSE-NEXT: orq %rax, %rdx 1726; SSE-NEXT: movq %rdx, %xmm3 1727; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1728; SSE-NEXT: movaps %xmm0, %xmm3 1729; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm0[3,3] 1730; SSE-NEXT: cvttss2si %xmm3, %rax 1731; SSE-NEXT: subss %xmm1, %xmm3 1732; SSE-NEXT: cvttss2si %xmm3, %rcx 1733; SSE-NEXT: movq %rax, %rdx 1734; SSE-NEXT: sarq $63, %rdx 1735; SSE-NEXT: andq %rcx, %rdx 1736; SSE-NEXT: orq %rax, %rdx 1737; SSE-NEXT: movq %rdx, %xmm3 1738; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 1739; SSE-NEXT: cvttss2si %xmm0, %rax 1740; SSE-NEXT: subss %xmm1, %xmm0 1741; SSE-NEXT: cvttss2si %xmm0, %rcx 1742; SSE-NEXT: movq %rax, %rdx 1743; SSE-NEXT: sarq $63, %rdx 1744; SSE-NEXT: andq %rcx, %rdx 1745; SSE-NEXT: orq %rax, %rdx 1746; SSE-NEXT: movq %rdx, %xmm1 1747; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 1748; SSE-NEXT: movdqa %xmm2, %xmm0 1749; SSE-NEXT: retq 1750; 1751; AVX1-LABEL: fptoui_8f32_to_4i64: 1752; AVX1: # %bb.0: 1753; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1754; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1755; AVX1-NEXT: vsubss %xmm1, %xmm2, %xmm3 1756; AVX1-NEXT: vcvttss2si %xmm3, %rax 1757; AVX1-NEXT: vcvttss2si %xmm2, %rcx 1758; AVX1-NEXT: movq %rcx, %rdx 1759; AVX1-NEXT: sarq $63, %rdx 1760; AVX1-NEXT: andq %rax, %rdx 1761; AVX1-NEXT: orq %rcx, %rdx 1762; AVX1-NEXT: vmovq %rdx, %xmm2 1763; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 1764; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm4 1765; AVX1-NEXT: vcvttss2si %xmm4, %rax 1766; AVX1-NEXT: vcvttss2si %xmm3, %rcx 1767; AVX1-NEXT: movq %rcx, %rdx 1768; AVX1-NEXT: sarq $63, %rdx 1769; AVX1-NEXT: andq %rax, %rdx 1770; AVX1-NEXT: orq %rcx, %rdx 1771; AVX1-NEXT: vmovq %rdx, %xmm3 1772; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1773; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm3 1774; AVX1-NEXT: vcvttss2si %xmm3, %rax 1775; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1776; AVX1-NEXT: movq %rcx, %rdx 1777; AVX1-NEXT: sarq $63, %rdx 1778; AVX1-NEXT: andq %rax, %rdx 1779; AVX1-NEXT: orq %rcx, %rdx 1780; AVX1-NEXT: vmovq %rdx, %xmm3 1781; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1782; AVX1-NEXT: vsubss %xmm1, %xmm0, %xmm1 1783; AVX1-NEXT: vcvttss2si %xmm1, %rax 1784; AVX1-NEXT: vcvttss2si %xmm0, %rcx 1785; AVX1-NEXT: movq %rcx, %rdx 1786; AVX1-NEXT: sarq $63, %rdx 1787; AVX1-NEXT: andq %rax, %rdx 1788; AVX1-NEXT: orq %rcx, %rdx 1789; AVX1-NEXT: vmovq %rdx, %xmm0 1790; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1791; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1792; AVX1-NEXT: retq 1793; 1794; AVX2-LABEL: fptoui_8f32_to_4i64: 1795; AVX2: # %bb.0: 1796; AVX2-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 1797; AVX2-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 1798; AVX2-NEXT: vsubss %xmm1, %xmm2, %xmm3 1799; AVX2-NEXT: vcvttss2si %xmm3, %rax 1800; AVX2-NEXT: vcvttss2si %xmm2, %rcx 1801; AVX2-NEXT: movq %rcx, %rdx 1802; AVX2-NEXT: sarq $63, %rdx 1803; AVX2-NEXT: andq %rax, %rdx 1804; AVX2-NEXT: orq %rcx, %rdx 1805; AVX2-NEXT: vmovq %rdx, %xmm2 1806; AVX2-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 1807; AVX2-NEXT: vsubss %xmm1, %xmm3, %xmm4 1808; AVX2-NEXT: vcvttss2si %xmm4, %rax 1809; AVX2-NEXT: vcvttss2si %xmm3, %rcx 1810; AVX2-NEXT: movq %rcx, %rdx 1811; AVX2-NEXT: sarq $63, %rdx 1812; AVX2-NEXT: andq %rax, %rdx 1813; AVX2-NEXT: orq %rcx, %rdx 1814; AVX2-NEXT: vmovq %rdx, %xmm3 1815; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 1816; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm3 1817; AVX2-NEXT: vcvttss2si %xmm3, %rax 1818; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1819; AVX2-NEXT: movq %rcx, %rdx 1820; AVX2-NEXT: sarq $63, %rdx 1821; AVX2-NEXT: andq %rax, %rdx 1822; AVX2-NEXT: orq %rcx, %rdx 1823; AVX2-NEXT: vmovq %rdx, %xmm3 1824; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 1825; AVX2-NEXT: vsubss %xmm1, %xmm0, %xmm1 1826; AVX2-NEXT: vcvttss2si %xmm1, %rax 1827; AVX2-NEXT: vcvttss2si %xmm0, %rcx 1828; AVX2-NEXT: movq %rcx, %rdx 1829; AVX2-NEXT: sarq $63, %rdx 1830; AVX2-NEXT: andq %rax, %rdx 1831; AVX2-NEXT: orq %rcx, %rdx 1832; AVX2-NEXT: vmovq %rdx, %xmm0 1833; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 1834; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 1835; AVX2-NEXT: retq 1836; 1837; AVX512F-LABEL: fptoui_8f32_to_4i64: 1838; AVX512F: # %bb.0: 1839; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1840; AVX512F-NEXT: vcvttss2usi %xmm1, %rax 1841; AVX512F-NEXT: vcvttss2usi %xmm0, %rcx 1842; AVX512F-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 1843; AVX512F-NEXT: vcvttss2usi %xmm1, %rdx 1844; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1845; AVX512F-NEXT: vcvttss2usi %xmm0, %rsi 1846; AVX512F-NEXT: vmovq %rsi, %xmm0 1847; AVX512F-NEXT: vmovq %rdx, %xmm1 1848; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1849; AVX512F-NEXT: vmovq %rcx, %xmm1 1850; AVX512F-NEXT: vmovq %rax, %xmm2 1851; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1852; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1853; AVX512F-NEXT: retq 1854; 1855; AVX512VL-LABEL: fptoui_8f32_to_4i64: 1856; AVX512VL: # %bb.0: 1857; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 1858; AVX512VL-NEXT: vcvttss2usi %xmm1, %rax 1859; AVX512VL-NEXT: vcvttss2usi %xmm0, %rcx 1860; AVX512VL-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 1861; AVX512VL-NEXT: vcvttss2usi %xmm1, %rdx 1862; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 1863; AVX512VL-NEXT: vcvttss2usi %xmm0, %rsi 1864; AVX512VL-NEXT: vmovq %rsi, %xmm0 1865; AVX512VL-NEXT: vmovq %rdx, %xmm1 1866; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1867; AVX512VL-NEXT: vmovq %rcx, %xmm1 1868; AVX512VL-NEXT: vmovq %rax, %xmm2 1869; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1870; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1871; AVX512VL-NEXT: retq 1872; 1873; AVX512DQ-LABEL: fptoui_8f32_to_4i64: 1874; AVX512DQ: # %bb.0: 1875; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1876; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1877; AVX512DQ-NEXT: retq 1878; 1879; AVX512VLDQ-LABEL: fptoui_8f32_to_4i64: 1880; AVX512VLDQ: # %bb.0: 1881; AVX512VLDQ-NEXT: vcvttps2uqq %ymm0, %zmm0 1882; AVX512VLDQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1883; AVX512VLDQ-NEXT: retq 1884 %cvt = fptoui <8 x float> %a to <8 x i64> 1885 %shuf = shufflevector <8 x i64> %cvt, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1886 ret <4 x i64> %shuf 1887} 1888 1889; 1890; Constant Folding 1891; 1892 1893define <2 x i64> @fptosi_2f64_to_2i64_const() { 1894; SSE-LABEL: fptosi_2f64_to_2i64_const: 1895; SSE: # %bb.0: 1896; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1897; SSE-NEXT: retq 1898; 1899; VEX-LABEL: fptosi_2f64_to_2i64_const: 1900; VEX: # %bb.0: 1901; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [1,18446744073709551615] 1902; VEX-NEXT: retq 1903; 1904; AVX512-LABEL: fptosi_2f64_to_2i64_const: 1905; AVX512: # %bb.0: 1906; AVX512-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,18446744073709551615] 1907; AVX512-NEXT: retq 1908 %cvt = fptosi <2 x double> <double 1.0, double -1.0> to <2 x i64> 1909 ret <2 x i64> %cvt 1910} 1911 1912define <4 x i32> @fptosi_2f64_to_2i32_const() { 1913; SSE-LABEL: fptosi_2f64_to_2i32_const: 1914; SSE: # %bb.0: 1915; SSE-NEXT: movsd {{.*#+}} xmm0 = [4294967295,1,0,0] 1916; SSE-NEXT: retq 1917; 1918; VEX-LABEL: fptosi_2f64_to_2i32_const: 1919; VEX: # %bb.0: 1920; VEX-NEXT: vmovsd {{.*#+}} xmm0 = [4294967295,1,0,0] 1921; VEX-NEXT: retq 1922; 1923; AVX512-LABEL: fptosi_2f64_to_2i32_const: 1924; AVX512: # %bb.0: 1925; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,1,0,0] 1926; AVX512-NEXT: retq 1927 %cvt = fptosi <2 x double> <double -1.0, double 1.0> to <2 x i32> 1928 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1929 ret <4 x i32> %ext 1930} 1931 1932define <4 x i64> @fptosi_4f64_to_4i64_const() { 1933; SSE-LABEL: fptosi_4f64_to_4i64_const: 1934; SSE: # %bb.0: 1935; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 1936; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,18446744073709551613] 1937; SSE-NEXT: retq 1938; 1939; VEX-LABEL: fptosi_4f64_to_4i64_const: 1940; VEX: # %bb.0: 1941; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1942; VEX-NEXT: retq 1943; 1944; AVX512-LABEL: fptosi_4f64_to_4i64_const: 1945; AVX512: # %bb.0: 1946; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,18446744073709551615,2,18446744073709551613] 1947; AVX512-NEXT: retq 1948 %cvt = fptosi <4 x double> <double 1.0, double -1.0, double 2.0, double -3.0> to <4 x i64> 1949 ret <4 x i64> %cvt 1950} 1951 1952define <4 x i32> @fptosi_4f64_to_4i32_const() { 1953; SSE-LABEL: fptosi_4f64_to_4i32_const: 1954; SSE: # %bb.0: 1955; SSE-NEXT: movaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1956; SSE-NEXT: retq 1957; 1958; VEX-LABEL: fptosi_4f64_to_4i32_const: 1959; VEX: # %bb.0: 1960; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1961; VEX-NEXT: retq 1962; 1963; AVX512-LABEL: fptosi_4f64_to_4i32_const: 1964; AVX512: # %bb.0: 1965; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [4294967295,1,4294967294,3] 1966; AVX512-NEXT: retq 1967 %cvt = fptosi <4 x double> <double -1.0, double 1.0, double -2.0, double 3.0> to <4 x i32> 1968 ret <4 x i32> %cvt 1969} 1970 1971define <2 x i64> @fptoui_2f64_to_2i64_const() { 1972; SSE-LABEL: fptoui_2f64_to_2i64_const: 1973; SSE: # %bb.0: 1974; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 1975; SSE-NEXT: retq 1976; 1977; VEX-LABEL: fptoui_2f64_to_2i64_const: 1978; VEX: # %bb.0: 1979; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4] 1980; VEX-NEXT: retq 1981; 1982; AVX512-LABEL: fptoui_2f64_to_2i64_const: 1983; AVX512: # %bb.0: 1984; AVX512-NEXT: vpmovsxbq {{.*#+}} xmm0 = [2,4] 1985; AVX512-NEXT: retq 1986 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i64> 1987 ret <2 x i64> %cvt 1988} 1989 1990define <4 x i32> @fptoui_2f64_to_2i32_const(<2 x double> %a) { 1991; SSE-LABEL: fptoui_2f64_to_2i32_const: 1992; SSE: # %bb.0: 1993; SSE-NEXT: movsd {{.*#+}} xmm0 = [2,4,0,0] 1994; SSE-NEXT: retq 1995; 1996; VEX-LABEL: fptoui_2f64_to_2i32_const: 1997; VEX: # %bb.0: 1998; VEX-NEXT: vmovsd {{.*#+}} xmm0 = [2,4,0,0] 1999; VEX-NEXT: retq 2000; 2001; AVX512-LABEL: fptoui_2f64_to_2i32_const: 2002; AVX512: # %bb.0: 2003; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,4,0,0] 2004; AVX512-NEXT: retq 2005 %cvt = fptoui <2 x double> <double 2.0, double 4.0> to <2 x i32> 2006 %ext = shufflevector <2 x i32> %cvt, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 2007 ret <4 x i32> %ext 2008} 2009 2010define <4 x i64> @fptoui_4f64_to_4i64_const(<4 x double> %a) { 2011; SSE-LABEL: fptoui_4f64_to_4i64_const: 2012; SSE: # %bb.0: 2013; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4] 2014; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,8] 2015; SSE-NEXT: retq 2016; 2017; VEX-LABEL: fptoui_4f64_to_4i64_const: 2018; VEX: # %bb.0: 2019; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [2,4,6,8] 2020; VEX-NEXT: retq 2021; 2022; AVX512-LABEL: fptoui_4f64_to_4i64_const: 2023; AVX512: # %bb.0: 2024; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm0 = [2,4,6,8] 2025; AVX512-NEXT: retq 2026 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i64> 2027 ret <4 x i64> %cvt 2028} 2029 2030define <4 x i32> @fptoui_4f64_to_4i32_const(<4 x double> %a) { 2031; SSE-LABEL: fptoui_4f64_to_4i32_const: 2032; SSE: # %bb.0: 2033; SSE-NEXT: movaps {{.*#+}} xmm0 = [2,4,6,8] 2034; SSE-NEXT: retq 2035; 2036; VEX-LABEL: fptoui_4f64_to_4i32_const: 2037; VEX: # %bb.0: 2038; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [2,4,6,8] 2039; VEX-NEXT: retq 2040; 2041; AVX512-LABEL: fptoui_4f64_to_4i32_const: 2042; AVX512: # %bb.0: 2043; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [2,4,6,8] 2044; AVX512-NEXT: retq 2045 %cvt = fptoui <4 x double> <double 2.0, double 4.0, double 6.0, double 8.0> to <4 x i32> 2046 ret <4 x i32> %cvt 2047} 2048 2049define <4 x i32> @fptosi_4f32_to_4i32_const() { 2050; SSE-LABEL: fptosi_4f32_to_4i32_const: 2051; SSE: # %bb.0: 2052; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2053; SSE-NEXT: retq 2054; 2055; VEX-LABEL: fptosi_4f32_to_4i32_const: 2056; VEX: # %bb.0: 2057; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2058; VEX-NEXT: retq 2059; 2060; AVX512-LABEL: fptosi_4f32_to_4i32_const: 2061; AVX512: # %bb.0: 2062; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [1,4294967295,2,3] 2063; AVX512-NEXT: retq 2064 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i32> 2065 ret <4 x i32> %cvt 2066} 2067 2068define <4 x i64> @fptosi_4f32_to_4i64_const() { 2069; SSE-LABEL: fptosi_4f32_to_4i64_const: 2070; SSE: # %bb.0: 2071; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,18446744073709551615] 2072; SSE-NEXT: movaps {{.*#+}} xmm1 = [2,3] 2073; SSE-NEXT: retq 2074; 2075; VEX-LABEL: fptosi_4f32_to_4i64_const: 2076; VEX: # %bb.0: 2077; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 2078; VEX-NEXT: retq 2079; 2080; AVX512-LABEL: fptosi_4f32_to_4i64_const: 2081; AVX512: # %bb.0: 2082; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,18446744073709551615,2,3] 2083; AVX512-NEXT: retq 2084 %cvt = fptosi <4 x float> <float 1.0, float -1.0, float 2.0, float 3.0> to <4 x i64> 2085 ret <4 x i64> %cvt 2086} 2087 2088define <8 x i32> @fptosi_8f32_to_8i32_const(<8 x float> %a) { 2089; SSE-LABEL: fptosi_8f32_to_8i32_const: 2090; SSE: # %bb.0: 2091; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,4294967295,2,3] 2092; SSE-NEXT: movaps {{.*#+}} xmm1 = [6,4294967288,2,4294967295] 2093; SSE-NEXT: retq 2094; 2095; VEX-LABEL: fptosi_8f32_to_8i32_const: 2096; VEX: # %bb.0: 2097; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 2098; VEX-NEXT: retq 2099; 2100; AVX512-LABEL: fptosi_8f32_to_8i32_const: 2101; AVX512: # %bb.0: 2102; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm0 = [1,4294967295,2,3,6,4294967288,2,4294967295] 2103; AVX512-NEXT: retq 2104 %cvt = fptosi <8 x float> <float 1.0, float -1.0, float 2.0, float 3.0, float 6.0, float -8.0, float 2.0, float -1.0> to <8 x i32> 2105 ret <8 x i32> %cvt 2106} 2107 2108define <4 x i32> @fptoui_4f32_to_4i32_const(<4 x float> %a) { 2109; SSE-LABEL: fptoui_4f32_to_4i32_const: 2110; SSE: # %bb.0: 2111; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 2112; SSE-NEXT: retq 2113; 2114; VEX-LABEL: fptoui_4f32_to_4i32_const: 2115; VEX: # %bb.0: 2116; VEX-NEXT: vmovaps {{.*#+}} xmm0 = [1,2,4,6] 2117; VEX-NEXT: retq 2118; 2119; AVX512-LABEL: fptoui_4f32_to_4i32_const: 2120; AVX512: # %bb.0: 2121; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm0 = [1,2,4,6] 2122; AVX512-NEXT: retq 2123 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 6.0> to <4 x i32> 2124 ret <4 x i32> %cvt 2125} 2126 2127define <4 x i64> @fptoui_4f32_to_4i64_const() { 2128; SSE-LABEL: fptoui_4f32_to_4i64_const: 2129; SSE: # %bb.0: 2130; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2] 2131; SSE-NEXT: movaps {{.*#+}} xmm1 = [4,8] 2132; SSE-NEXT: retq 2133; 2134; VEX-LABEL: fptoui_4f32_to_4i64_const: 2135; VEX: # %bb.0: 2136; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8] 2137; VEX-NEXT: retq 2138; 2139; AVX512-LABEL: fptoui_4f32_to_4i64_const: 2140; AVX512: # %bb.0: 2141; AVX512-NEXT: vpmovsxbq {{.*#+}} ymm0 = [1,2,4,8] 2142; AVX512-NEXT: retq 2143 %cvt = fptoui <4 x float> <float 1.0, float 2.0, float 4.0, float 8.0> to <4 x i64> 2144 ret <4 x i64> %cvt 2145} 2146 2147define <8 x i32> @fptoui_8f32_to_8i32_const(<8 x float> %a) { 2148; SSE-LABEL: fptoui_8f32_to_8i32_const: 2149; SSE: # %bb.0: 2150; SSE-NEXT: movaps {{.*#+}} xmm0 = [1,2,4,6] 2151; SSE-NEXT: movaps {{.*#+}} xmm1 = [8,6,4,1] 2152; SSE-NEXT: retq 2153; 2154; VEX-LABEL: fptoui_8f32_to_8i32_const: 2155; VEX: # %bb.0: 2156; VEX-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 2157; VEX-NEXT: retq 2158; 2159; AVX512-LABEL: fptoui_8f32_to_8i32_const: 2160; AVX512: # %bb.0: 2161; AVX512-NEXT: vpmovsxbd {{.*#+}} ymm0 = [1,2,4,6,8,6,4,1] 2162; AVX512-NEXT: retq 2163 %cvt = fptoui <8 x float> <float 1.0, float 2.0, float 4.0, float 6.0, float 8.0, float 6.0, float 4.0, float 1.0> to <8 x i32> 2164 ret <8 x i32> %cvt 2165} 2166 2167define <4 x i32> @fptosi_2f80_to_4i32(<2 x x86_fp80> %a) nounwind { 2168; SSE-LABEL: fptosi_2f80_to_4i32: 2169; SSE: # %bb.0: 2170; SSE-NEXT: fldt {{[0-9]+}}(%rsp) 2171; SSE-NEXT: fldt {{[0-9]+}}(%rsp) 2172; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) 2173; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 2174; SSE-NEXT: orl $3072, %eax # imm = 0xC00 2175; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 2176; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2177; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp) 2178; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2179; SSE-NEXT: fnstcw -{{[0-9]+}}(%rsp) 2180; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 2181; SSE-NEXT: orl $3072, %eax # imm = 0xC00 2182; SSE-NEXT: movw %ax, -{{[0-9]+}}(%rsp) 2183; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2184; SSE-NEXT: fistpl -{{[0-9]+}}(%rsp) 2185; SSE-NEXT: fldcw -{{[0-9]+}}(%rsp) 2186; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2187; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2188; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2189; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2190; SSE-NEXT: retq 2191; 2192; AVX-LABEL: fptosi_2f80_to_4i32: 2193; AVX: # %bb.0: 2194; AVX-NEXT: fldt {{[0-9]+}}(%rsp) 2195; AVX-NEXT: fldt {{[0-9]+}}(%rsp) 2196; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp) 2197; AVX-NEXT: fisttpl -{{[0-9]+}}(%rsp) 2198; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2199; AVX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2200; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2201; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2202; AVX-NEXT: retq 2203 %cvt = fptosi <2 x x86_fp80> %a to <2 x i32> 2204 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2205 ret <4 x i32> %ext 2206} 2207 2208define <4 x i32> @fptosi_2f128_to_4i32(<2 x fp128> %a) nounwind { 2209; SSE-LABEL: fptosi_2f128_to_4i32: 2210; SSE: # %bb.0: 2211; SSE-NEXT: pushq %rbx 2212; SSE-NEXT: subq $16, %rsp 2213; SSE-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 2214; SSE-NEXT: callq __fixtfsi@PLT 2215; SSE-NEXT: movl %eax, %ebx 2216; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2217; SSE-NEXT: callq __fixtfsi@PLT 2218; SSE-NEXT: movd %eax, %xmm0 2219; SSE-NEXT: movd %ebx, %xmm1 2220; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2221; SSE-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero 2222; SSE-NEXT: addq $16, %rsp 2223; SSE-NEXT: popq %rbx 2224; SSE-NEXT: retq 2225; 2226; AVX-LABEL: fptosi_2f128_to_4i32: 2227; AVX: # %bb.0: 2228; AVX-NEXT: pushq %rbx 2229; AVX-NEXT: subq $16, %rsp 2230; AVX-NEXT: vmovaps %xmm1, (%rsp) # 16-byte Spill 2231; AVX-NEXT: callq __fixtfsi@PLT 2232; AVX-NEXT: movl %eax, %ebx 2233; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 2234; AVX-NEXT: callq __fixtfsi@PLT 2235; AVX-NEXT: vmovd %eax, %xmm0 2236; AVX-NEXT: vmovd %ebx, %xmm1 2237; AVX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2238; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 2239; AVX-NEXT: addq $16, %rsp 2240; AVX-NEXT: popq %rbx 2241; AVX-NEXT: retq 2242 %cvt = fptosi <2 x fp128> %a to <2 x i32> 2243 %ext = shufflevector <2 x i32> %cvt, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2244 ret <4 x i32> %ext 2245} 2246 2247define <2 x i8> @fptosi_2f32_to_2i8(<2 x float> %a) { 2248; SSE-LABEL: fptosi_2f32_to_2i8: 2249; SSE: # %bb.0: 2250; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2251; SSE-NEXT: packssdw %xmm0, %xmm0 2252; SSE-NEXT: packsswb %xmm0, %xmm0 2253; SSE-NEXT: retq 2254; 2255; VEX-LABEL: fptosi_2f32_to_2i8: 2256; VEX: # %bb.0: 2257; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 2258; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2259; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2260; VEX-NEXT: retq 2261; 2262; AVX512F-LABEL: fptosi_2f32_to_2i8: 2263; AVX512F: # %bb.0: 2264; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2265; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2266; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2267; AVX512F-NEXT: retq 2268; 2269; AVX512VL-LABEL: fptosi_2f32_to_2i8: 2270; AVX512VL: # %bb.0: 2271; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2272; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2273; AVX512VL-NEXT: retq 2274; 2275; AVX512DQ-LABEL: fptosi_2f32_to_2i8: 2276; AVX512DQ: # %bb.0: 2277; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2278; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2279; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2280; AVX512DQ-NEXT: retq 2281; 2282; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8: 2283; AVX512VLDQ: # %bb.0: 2284; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2285; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2286; AVX512VLDQ-NEXT: retq 2287 %cvt = fptosi <2 x float> %a to <2 x i8> 2288 ret <2 x i8> %cvt 2289} 2290 2291define <2 x i16> @fptosi_2f32_to_2i16(<2 x float> %a) { 2292; SSE-LABEL: fptosi_2f32_to_2i16: 2293; SSE: # %bb.0: 2294; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2295; SSE-NEXT: packssdw %xmm0, %xmm0 2296; SSE-NEXT: retq 2297; 2298; AVX-LABEL: fptosi_2f32_to_2i16: 2299; AVX: # %bb.0: 2300; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2301; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2302; AVX-NEXT: retq 2303 %cvt = fptosi <2 x float> %a to <2 x i16> 2304 ret <2 x i16> %cvt 2305} 2306 2307define <2 x i8> @fptoui_2f32_to_2i8(<2 x float> %a) { 2308; SSE-LABEL: fptoui_2f32_to_2i8: 2309; SSE: # %bb.0: 2310; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2311; SSE-NEXT: packuswb %xmm0, %xmm0 2312; SSE-NEXT: packuswb %xmm0, %xmm0 2313; SSE-NEXT: retq 2314; 2315; VEX-LABEL: fptoui_2f32_to_2i8: 2316; VEX: # %bb.0: 2317; VEX-NEXT: vcvttps2dq %xmm0, %xmm0 2318; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2319; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2320; VEX-NEXT: retq 2321; 2322; AVX512F-LABEL: fptoui_2f32_to_2i8: 2323; AVX512F: # %bb.0: 2324; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0 2325; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2326; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2327; AVX512F-NEXT: retq 2328; 2329; AVX512VL-LABEL: fptoui_2f32_to_2i8: 2330; AVX512VL: # %bb.0: 2331; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0 2332; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2333; AVX512VL-NEXT: retq 2334; 2335; AVX512DQ-LABEL: fptoui_2f32_to_2i8: 2336; AVX512DQ: # %bb.0: 2337; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0 2338; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2339; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2340; AVX512DQ-NEXT: retq 2341; 2342; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8: 2343; AVX512VLDQ: # %bb.0: 2344; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0 2345; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2346; AVX512VLDQ-NEXT: retq 2347 %cvt = fptoui <2 x float> %a to <2 x i8> 2348 ret <2 x i8> %cvt 2349} 2350 2351define <2 x i16> @fptoui_2f32_to_2i16(<2 x float> %a) { 2352; SSE-LABEL: fptoui_2f32_to_2i16: 2353; SSE: # %bb.0: 2354; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2355; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2356; SSE-NEXT: retq 2357; 2358; AVX-LABEL: fptoui_2f32_to_2i16: 2359; AVX: # %bb.0: 2360; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 2361; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2362; AVX-NEXT: retq 2363 %cvt = fptoui <2 x float> %a to <2 x i16> 2364 ret <2 x i16> %cvt 2365} 2366 2367define <2 x i8> @fptosi_2f64_to_2i8(<2 x double> %a) { 2368; SSE-LABEL: fptosi_2f64_to_2i8: 2369; SSE: # %bb.0: 2370; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2371; SSE-NEXT: packssdw %xmm0, %xmm0 2372; SSE-NEXT: packsswb %xmm0, %xmm0 2373; SSE-NEXT: retq 2374; 2375; VEX-LABEL: fptosi_2f64_to_2i8: 2376; VEX: # %bb.0: 2377; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 2378; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2379; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2380; VEX-NEXT: retq 2381; 2382; AVX512F-LABEL: fptosi_2f64_to_2i8: 2383; AVX512F: # %bb.0: 2384; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2385; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2386; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2387; AVX512F-NEXT: retq 2388; 2389; AVX512VL-LABEL: fptosi_2f64_to_2i8: 2390; AVX512VL: # %bb.0: 2391; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2392; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2393; AVX512VL-NEXT: retq 2394; 2395; AVX512DQ-LABEL: fptosi_2f64_to_2i8: 2396; AVX512DQ: # %bb.0: 2397; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2398; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2399; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 2400; AVX512DQ-NEXT: retq 2401; 2402; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8: 2403; AVX512VLDQ: # %bb.0: 2404; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2405; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2406; AVX512VLDQ-NEXT: retq 2407 %cvt = fptosi <2 x double> %a to <2 x i8> 2408 ret <2 x i8> %cvt 2409} 2410 2411define <2 x i16> @fptosi_2f64_to_2i16(<2 x double> %a) { 2412; SSE-LABEL: fptosi_2f64_to_2i16: 2413; SSE: # %bb.0: 2414; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2415; SSE-NEXT: packssdw %xmm0, %xmm0 2416; SSE-NEXT: retq 2417; 2418; AVX-LABEL: fptosi_2f64_to_2i16: 2419; AVX: # %bb.0: 2420; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2421; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0 2422; AVX-NEXT: retq 2423 %cvt = fptosi <2 x double> %a to <2 x i16> 2424 ret <2 x i16> %cvt 2425} 2426 2427define <2 x i8> @fptoui_2f64_to_2i8(<2 x double> %a) { 2428; SSE-LABEL: fptoui_2f64_to_2i8: 2429; SSE: # %bb.0: 2430; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2431; SSE-NEXT: packuswb %xmm0, %xmm0 2432; SSE-NEXT: packuswb %xmm0, %xmm0 2433; SSE-NEXT: retq 2434; 2435; VEX-LABEL: fptoui_2f64_to_2i8: 2436; VEX: # %bb.0: 2437; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0 2438; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2439; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2440; VEX-NEXT: retq 2441; 2442; AVX512F-LABEL: fptoui_2f64_to_2i8: 2443; AVX512F: # %bb.0: 2444; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0 2445; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2446; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2447; AVX512F-NEXT: retq 2448; 2449; AVX512VL-LABEL: fptoui_2f64_to_2i8: 2450; AVX512VL: # %bb.0: 2451; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0 2452; AVX512VL-NEXT: vpmovdb %xmm0, %xmm0 2453; AVX512VL-NEXT: retq 2454; 2455; AVX512DQ-LABEL: fptoui_2f64_to_2i8: 2456; AVX512DQ: # %bb.0: 2457; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2458; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2459; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 2460; AVX512DQ-NEXT: retq 2461; 2462; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8: 2463; AVX512VLDQ: # %bb.0: 2464; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0 2465; AVX512VLDQ-NEXT: vpmovdb %xmm0, %xmm0 2466; AVX512VLDQ-NEXT: retq 2467 %cvt = fptoui <2 x double> %a to <2 x i8> 2468 ret <2 x i8> %cvt 2469} 2470 2471define <2 x i16> @fptoui_2f64_to_2i16(<2 x double> %a) { 2472; SSE-LABEL: fptoui_2f64_to_2i16: 2473; SSE: # %bb.0: 2474; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2475; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2476; SSE-NEXT: retq 2477; 2478; AVX-LABEL: fptoui_2f64_to_2i16: 2479; AVX: # %bb.0: 2480; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 2481; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 2482; AVX-NEXT: retq 2483 %cvt = fptoui <2 x double> %a to <2 x i16> 2484 ret <2 x i16> %cvt 2485} 2486 2487define <8 x i16> @fptosi_8f64_to_8i16(<8 x double> %a) { 2488; SSE-LABEL: fptosi_8f64_to_8i16: 2489; SSE: # %bb.0: 2490; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 2491; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 2492; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2493; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 2494; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2495; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2496; SSE-NEXT: packssdw %xmm2, %xmm0 2497; SSE-NEXT: retq 2498; 2499; VEX-LABEL: fptosi_8f64_to_8i16: 2500; VEX: # %bb.0: 2501; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 2502; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 2503; VEX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 2504; VEX-NEXT: vzeroupper 2505; VEX-NEXT: retq 2506; 2507; AVX512F-LABEL: fptosi_8f64_to_8i16: 2508; AVX512F: # %bb.0: 2509; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0 2510; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2511; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2512; AVX512F-NEXT: vzeroupper 2513; AVX512F-NEXT: retq 2514; 2515; AVX512VL-LABEL: fptosi_8f64_to_8i16: 2516; AVX512VL: # %bb.0: 2517; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 2518; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 2519; AVX512VL-NEXT: vzeroupper 2520; AVX512VL-NEXT: retq 2521; 2522; AVX512DQ-LABEL: fptosi_8f64_to_8i16: 2523; AVX512DQ: # %bb.0: 2524; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2525; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 2526; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2527; AVX512DQ-NEXT: vzeroupper 2528; AVX512DQ-NEXT: retq 2529; 2530; AVX512VLDQ-LABEL: fptosi_8f64_to_8i16: 2531; AVX512VLDQ: # %bb.0: 2532; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2533; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0 2534; AVX512VLDQ-NEXT: vzeroupper 2535; AVX512VLDQ-NEXT: retq 2536 %cvt = fptosi <8 x double> %a to <8 x i16> 2537 ret <8 x i16> %cvt 2538} 2539 2540define <8 x i16> @fptoui_8f64_to_8i16(<8 x double> %a) { 2541; SSE-LABEL: fptoui_8f64_to_8i16: 2542; SSE: # %bb.0: 2543; SSE-NEXT: cvttpd2dq %xmm3, %xmm3 2544; SSE-NEXT: cvttpd2dq %xmm2, %xmm2 2545; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2546; SSE-NEXT: pslld $16, %xmm2 2547; SSE-NEXT: psrad $16, %xmm2 2548; SSE-NEXT: cvttpd2dq %xmm1, %xmm1 2549; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 2550; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2551; SSE-NEXT: pslld $16, %xmm0 2552; SSE-NEXT: psrad $16, %xmm0 2553; SSE-NEXT: packssdw %xmm2, %xmm0 2554; SSE-NEXT: retq 2555; 2556; VEX-LABEL: fptoui_8f64_to_8i16: 2557; VEX: # %bb.0: 2558; VEX-NEXT: vcvttpd2dq %ymm1, %xmm1 2559; VEX-NEXT: vcvttpd2dq %ymm0, %xmm0 2560; VEX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 2561; VEX-NEXT: vzeroupper 2562; VEX-NEXT: retq 2563; 2564; AVX512F-LABEL: fptoui_8f64_to_8i16: 2565; AVX512F: # %bb.0: 2566; AVX512F-NEXT: vcvttpd2dq %zmm0, %ymm0 2567; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2568; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2569; AVX512F-NEXT: vzeroupper 2570; AVX512F-NEXT: retq 2571; 2572; AVX512VL-LABEL: fptoui_8f64_to_8i16: 2573; AVX512VL: # %bb.0: 2574; AVX512VL-NEXT: vcvttpd2dq %zmm0, %ymm0 2575; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 2576; AVX512VL-NEXT: vzeroupper 2577; AVX512VL-NEXT: retq 2578; 2579; AVX512DQ-LABEL: fptoui_8f64_to_8i16: 2580; AVX512DQ: # %bb.0: 2581; AVX512DQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2582; AVX512DQ-NEXT: vpmovdw %zmm0, %ymm0 2583; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2584; AVX512DQ-NEXT: vzeroupper 2585; AVX512DQ-NEXT: retq 2586; 2587; AVX512VLDQ-LABEL: fptoui_8f64_to_8i16: 2588; AVX512VLDQ: # %bb.0: 2589; AVX512VLDQ-NEXT: vcvttpd2dq %zmm0, %ymm0 2590; AVX512VLDQ-NEXT: vpmovdw %ymm0, %xmm0 2591; AVX512VLDQ-NEXT: vzeroupper 2592; AVX512VLDQ-NEXT: retq 2593 %cvt = fptoui <8 x double> %a to <8 x i16> 2594 ret <8 x i16> %cvt 2595} 2596 2597define <16 x i8> @fptosi_16f32_to_16i8(<16 x float> %a) { 2598; SSE-LABEL: fptosi_16f32_to_16i8: 2599; SSE: # %bb.0: 2600; SSE-NEXT: cvttps2dq %xmm3, %xmm3 2601; SSE-NEXT: cvttps2dq %xmm2, %xmm2 2602; SSE-NEXT: packssdw %xmm3, %xmm2 2603; SSE-NEXT: cvttps2dq %xmm1, %xmm1 2604; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2605; SSE-NEXT: packssdw %xmm1, %xmm0 2606; SSE-NEXT: packsswb %xmm2, %xmm0 2607; SSE-NEXT: retq 2608; 2609; AVX1-LABEL: fptosi_16f32_to_16i8: 2610; AVX1: # %bb.0: 2611; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 2612; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2613; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2614; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 2615; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2616; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2617; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2618; AVX1-NEXT: vzeroupper 2619; AVX1-NEXT: retq 2620; 2621; AVX2-LABEL: fptosi_16f32_to_16i8: 2622; AVX2: # %bb.0: 2623; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 2624; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2625; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2626; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 2627; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2628; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2629; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 2630; AVX2-NEXT: vzeroupper 2631; AVX2-NEXT: retq 2632; 2633; AVX512-LABEL: fptosi_16f32_to_16i8: 2634; AVX512: # %bb.0: 2635; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0 2636; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2637; AVX512-NEXT: vzeroupper 2638; AVX512-NEXT: retq 2639 %cvt = fptosi <16 x float> %a to <16 x i8> 2640 ret <16 x i8> %cvt 2641} 2642 2643define <16 x i8> @fptoui_16f32_to_16i8(<16 x float> %a) { 2644; SSE-LABEL: fptoui_16f32_to_16i8: 2645; SSE: # %bb.0: 2646; SSE-NEXT: cvttps2dq %xmm3, %xmm3 2647; SSE-NEXT: cvttps2dq %xmm2, %xmm2 2648; SSE-NEXT: packssdw %xmm3, %xmm2 2649; SSE-NEXT: cvttps2dq %xmm1, %xmm1 2650; SSE-NEXT: cvttps2dq %xmm0, %xmm0 2651; SSE-NEXT: packssdw %xmm1, %xmm0 2652; SSE-NEXT: packuswb %xmm2, %xmm0 2653; SSE-NEXT: retq 2654; 2655; AVX1-LABEL: fptoui_16f32_to_16i8: 2656; AVX1: # %bb.0: 2657; AVX1-NEXT: vcvttps2dq %ymm1, %ymm1 2658; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 2659; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2660; AVX1-NEXT: vcvttps2dq %ymm0, %ymm0 2661; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 2662; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2663; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2664; AVX1-NEXT: vzeroupper 2665; AVX1-NEXT: retq 2666; 2667; AVX2-LABEL: fptoui_16f32_to_16i8: 2668; AVX2: # %bb.0: 2669; AVX2-NEXT: vcvttps2dq %ymm1, %ymm1 2670; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 2671; AVX2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 2672; AVX2-NEXT: vcvttps2dq %ymm0, %ymm0 2673; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 2674; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 2675; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 2676; AVX2-NEXT: vzeroupper 2677; AVX2-NEXT: retq 2678; 2679; AVX512-LABEL: fptoui_16f32_to_16i8: 2680; AVX512: # %bb.0: 2681; AVX512-NEXT: vcvttps2dq %zmm0, %zmm0 2682; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2683; AVX512-NEXT: vzeroupper 2684; AVX512-NEXT: retq 2685 %cvt = fptoui <16 x float> %a to <16 x i8> 2686 ret <16 x i8> %cvt 2687} 2688 2689define <2 x i64> @fptosi_2f32_to_2i64_load(ptr %x) { 2690; SSE-LABEL: fptosi_2f32_to_2i64_load: 2691; SSE: # %bb.0: 2692; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2693; SSE-NEXT: cvttss2si %xmm1, %rax 2694; SSE-NEXT: movq %rax, %xmm0 2695; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2696; SSE-NEXT: cvttss2si %xmm1, %rax 2697; SSE-NEXT: movq %rax, %xmm1 2698; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2699; SSE-NEXT: retq 2700; 2701; VEX-LABEL: fptosi_2f32_to_2i64_load: 2702; VEX: # %bb.0: 2703; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2704; VEX-NEXT: vcvttss2si %xmm0, %rax 2705; VEX-NEXT: vmovq %rax, %xmm1 2706; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2707; VEX-NEXT: vcvttss2si %xmm0, %rax 2708; VEX-NEXT: vmovq %rax, %xmm0 2709; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2710; VEX-NEXT: retq 2711; 2712; AVX512F-LABEL: fptosi_2f32_to_2i64_load: 2713; AVX512F: # %bb.0: 2714; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2715; AVX512F-NEXT: vcvttss2si %xmm0, %rax 2716; AVX512F-NEXT: vmovq %rax, %xmm1 2717; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2718; AVX512F-NEXT: vcvttss2si %xmm0, %rax 2719; AVX512F-NEXT: vmovq %rax, %xmm0 2720; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2721; AVX512F-NEXT: retq 2722; 2723; AVX512VL-LABEL: fptosi_2f32_to_2i64_load: 2724; AVX512VL: # %bb.0: 2725; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2726; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 2727; AVX512VL-NEXT: vmovq %rax, %xmm1 2728; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2729; AVX512VL-NEXT: vcvttss2si %xmm0, %rax 2730; AVX512VL-NEXT: vmovq %rax, %xmm0 2731; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2732; AVX512VL-NEXT: retq 2733; 2734; AVX512DQ-LABEL: fptosi_2f32_to_2i64_load: 2735; AVX512DQ: # %bb.0: 2736; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2737; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 2738; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2739; AVX512DQ-NEXT: vzeroupper 2740; AVX512DQ-NEXT: retq 2741; 2742; AVX512VLDQ-LABEL: fptosi_2f32_to_2i64_load: 2743; AVX512VLDQ: # %bb.0: 2744; AVX512VLDQ-NEXT: vcvttps2qq (%rdi), %xmm0 2745; AVX512VLDQ-NEXT: retq 2746 %a = load <2 x float>, ptr %x 2747 %b = fptosi <2 x float> %a to <2 x i64> 2748 ret <2 x i64> %b 2749} 2750 2751define <2 x i64> @fptoui_2f32_to_2i64_load(ptr %x) { 2752; SSE-LABEL: fptoui_2f32_to_2i64_load: 2753; SSE: # %bb.0: 2754; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 2755; SSE-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2756; SSE-NEXT: movaps %xmm1, %xmm0 2757; SSE-NEXT: subss %xmm2, %xmm0 2758; SSE-NEXT: cvttss2si %xmm0, %rax 2759; SSE-NEXT: cvttss2si %xmm1, %rcx 2760; SSE-NEXT: movq %rcx, %rdx 2761; SSE-NEXT: sarq $63, %rdx 2762; SSE-NEXT: andq %rax, %rdx 2763; SSE-NEXT: orq %rcx, %rdx 2764; SSE-NEXT: movq %rdx, %xmm0 2765; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,1,1] 2766; SSE-NEXT: cvttss2si %xmm1, %rax 2767; SSE-NEXT: subss %xmm2, %xmm1 2768; SSE-NEXT: cvttss2si %xmm1, %rcx 2769; SSE-NEXT: movq %rax, %rdx 2770; SSE-NEXT: sarq $63, %rdx 2771; SSE-NEXT: andq %rcx, %rdx 2772; SSE-NEXT: orq %rax, %rdx 2773; SSE-NEXT: movq %rdx, %xmm1 2774; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2775; SSE-NEXT: retq 2776; 2777; VEX-LABEL: fptoui_2f32_to_2i64_load: 2778; VEX: # %bb.0: 2779; VEX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2780; VEX-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 2781; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm2 2782; VEX-NEXT: vcvttss2si %xmm2, %rax 2783; VEX-NEXT: vcvttss2si %xmm0, %rcx 2784; VEX-NEXT: movq %rcx, %rdx 2785; VEX-NEXT: sarq $63, %rdx 2786; VEX-NEXT: andq %rax, %rdx 2787; VEX-NEXT: orq %rcx, %rdx 2788; VEX-NEXT: vmovq %rdx, %xmm2 2789; VEX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2790; VEX-NEXT: vsubss %xmm1, %xmm0, %xmm1 2791; VEX-NEXT: vcvttss2si %xmm1, %rax 2792; VEX-NEXT: vcvttss2si %xmm0, %rcx 2793; VEX-NEXT: movq %rcx, %rdx 2794; VEX-NEXT: sarq $63, %rdx 2795; VEX-NEXT: andq %rax, %rdx 2796; VEX-NEXT: orq %rcx, %rdx 2797; VEX-NEXT: vmovq %rdx, %xmm0 2798; VEX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 2799; VEX-NEXT: retq 2800; 2801; AVX512F-LABEL: fptoui_2f32_to_2i64_load: 2802; AVX512F: # %bb.0: 2803; AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2804; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 2805; AVX512F-NEXT: vmovq %rax, %xmm1 2806; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2807; AVX512F-NEXT: vcvttss2usi %xmm0, %rax 2808; AVX512F-NEXT: vmovq %rax, %xmm0 2809; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2810; AVX512F-NEXT: retq 2811; 2812; AVX512VL-LABEL: fptoui_2f32_to_2i64_load: 2813; AVX512VL: # %bb.0: 2814; AVX512VL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2815; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 2816; AVX512VL-NEXT: vmovq %rax, %xmm1 2817; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 2818; AVX512VL-NEXT: vcvttss2usi %xmm0, %rax 2819; AVX512VL-NEXT: vmovq %rax, %xmm0 2820; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2821; AVX512VL-NEXT: retq 2822; 2823; AVX512DQ-LABEL: fptoui_2f32_to_2i64_load: 2824; AVX512DQ: # %bb.0: 2825; AVX512DQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 2826; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 2827; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2828; AVX512DQ-NEXT: vzeroupper 2829; AVX512DQ-NEXT: retq 2830; 2831; AVX512VLDQ-LABEL: fptoui_2f32_to_2i64_load: 2832; AVX512VLDQ: # %bb.0: 2833; AVX512VLDQ-NEXT: vcvttps2uqq (%rdi), %xmm0 2834; AVX512VLDQ-NEXT: retq 2835 %a = load <2 x float>, ptr %x 2836 %b = fptoui <2 x float> %a to <2 x i64> 2837 ret <2 x i64> %b 2838} 2839