1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=X86-SSE2 3; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 4; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32 5; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=i686-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X86-AVX,AVX512-i32 6; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i32,X64-AVX1-i32 7; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32 8; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i32,AVX512-i32 9; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64-AVX-i64,X64-AVX1-i64 10; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512-i64 11; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=X64-AVX-i64,AVX512DQ-i64 12 13define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { 14; X86-SSE2-LABEL: lrint_v1f32: 15; X86-SSE2: # %bb.0: 16; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax 17; X86-SSE2-NEXT: retl 18; 19; X86-AVX-LABEL: lrint_v1f32: 20; X86-AVX: # %bb.0: 21; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax 22; X86-AVX-NEXT: retl 23; 24; X64-AVX-i32-LABEL: lrint_v1f32: 25; X64-AVX-i32: # %bb.0: 26; X64-AVX-i32-NEXT: vcvtss2si %xmm0, %eax 27; X64-AVX-i32-NEXT: retq 28; 29; X64-AVX-i64-LABEL: lrint_v1f32: 30; X64-AVX-i64: # %bb.0: 31; X64-AVX-i64-NEXT: vcvtss2si %xmm0, %rax 32; X64-AVX-i64-NEXT: retq 33 %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) 34 ret <1 x iXLen> %a 35} 36declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float>) 37 38define <2 x iXLen> @lrint_v2f32(<2 x float> %x) { 39; X86-SSE2-LABEL: lrint_v2f32: 40; X86-SSE2: # %bb.0: 41; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 42; X86-SSE2-NEXT: retl 43; 44; X86-AVX-LABEL: lrint_v2f32: 45; X86-AVX: # %bb.0: 46; X86-AVX-NEXT: vcvtps2dq %xmm0, %xmm0 47; X86-AVX-NEXT: retl 48; 49; X64-AVX-i32-LABEL: lrint_v2f32: 50; X64-AVX-i32: # %bb.0: 51; X64-AVX-i32-NEXT: vcvtps2dq %xmm0, %xmm0 52; X64-AVX-i32-NEXT: retq 53; 54; X64-AVX1-i64-LABEL: lrint_v2f32: 55; X64-AVX1-i64: # %bb.0: 56; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 57; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 58; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 59; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 60; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 61; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 62; X64-AVX1-i64-NEXT: retq 63; 64; AVX512-i64-LABEL: lrint_v2f32: 65; AVX512-i64: # %bb.0: 66; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 67; AVX512-i64-NEXT: vmovq %rax, %xmm1 68; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 69; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 70; AVX512-i64-NEXT: vmovq %rax, %xmm0 71; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 72; AVX512-i64-NEXT: retq 73; 74; AVX512DQ-i64-LABEL: lrint_v2f32: 75; AVX512DQ-i64: # %bb.0: 76; AVX512DQ-i64-NEXT: vcvtps2qq %xmm0, %xmm0 77; AVX512DQ-i64-NEXT: retq 78 %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float> %x) 79 ret <2 x iXLen> %a 80} 81declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f32(<2 x float>) 82 83define <4 x iXLen> @lrint_v4f32(<4 x float> %x) { 84; X86-SSE2-LABEL: lrint_v4f32: 85; X86-SSE2: # %bb.0: 86; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 87; X86-SSE2-NEXT: retl 88; 89; X86-AVX-LABEL: lrint_v4f32: 90; X86-AVX: # %bb.0: 91; X86-AVX-NEXT: vcvtps2dq %xmm0, %xmm0 92; X86-AVX-NEXT: retl 93; 94; X64-AVX-i32-LABEL: lrint_v4f32: 95; X64-AVX-i32: # %bb.0: 96; X64-AVX-i32-NEXT: vcvtps2dq %xmm0, %xmm0 97; X64-AVX-i32-NEXT: retq 98; 99; X64-AVX1-i64-LABEL: lrint_v4f32: 100; X64-AVX1-i64: # %bb.0: 101; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 102; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax 103; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 104; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 105; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax 106; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 107; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 108; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 109; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 110; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 111; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 112; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 113; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 114; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 115; X64-AVX1-i64-NEXT: retq 116; 117; AVX512-i64-LABEL: lrint_v4f32: 118; AVX512-i64: # %bb.0: 119; AVX512-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 120; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax 121; AVX512-i64-NEXT: vmovq %rax, %xmm1 122; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 123; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax 124; AVX512-i64-NEXT: vmovq %rax, %xmm2 125; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 126; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 127; AVX512-i64-NEXT: vmovq %rax, %xmm2 128; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 129; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 130; AVX512-i64-NEXT: vmovq %rax, %xmm0 131; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 132; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 133; AVX512-i64-NEXT: retq 134; 135; AVX512DQ-i64-LABEL: lrint_v4f32: 136; AVX512DQ-i64: # %bb.0: 137; AVX512DQ-i64-NEXT: vcvtps2qq %xmm0, %ymm0 138; AVX512DQ-i64-NEXT: retq 139 %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float> %x) 140 ret <4 x iXLen> %a 141} 142declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f32(<4 x float>) 143 144define <8 x iXLen> @lrint_v8f32(<8 x float> %x) { 145; X86-SSE2-LABEL: lrint_v8f32: 146; X86-SSE2: # %bb.0: 147; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 148; X86-SSE2-NEXT: cvtps2dq %xmm1, %xmm1 149; X86-SSE2-NEXT: retl 150; 151; X86-AVX-LABEL: lrint_v8f32: 152; X86-AVX: # %bb.0: 153; X86-AVX-NEXT: vcvtps2dq %ymm0, %ymm0 154; X86-AVX-NEXT: retl 155; 156; X64-AVX-i32-LABEL: lrint_v8f32: 157; X64-AVX-i32: # %bb.0: 158; X64-AVX-i32-NEXT: vcvtps2dq %ymm0, %ymm0 159; X64-AVX-i32-NEXT: retq 160; 161; X64-AVX1-i64-LABEL: lrint_v8f32: 162; X64-AVX1-i64: # %bb.0: 163; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 164; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax 165; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 166; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 167; X64-AVX1-i64-NEXT: vcvtss2si %xmm2, %rax 168; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 169; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 170; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 171; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 172; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 173; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax 174; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 175; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 176; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 177; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm0 178; X64-AVX1-i64-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 179; X64-AVX1-i64-NEXT: vcvtss2si %xmm1, %rax 180; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 181; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 182; X64-AVX1-i64-NEXT: vcvtss2si %xmm3, %rax 183; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 184; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 185; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 186; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 187; X64-AVX1-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 188; X64-AVX1-i64-NEXT: vcvtss2si %xmm0, %rax 189; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 190; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 191; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 192; X64-AVX1-i64-NEXT: vmovaps %ymm2, %ymm0 193; X64-AVX1-i64-NEXT: retq 194; 195; AVX512-i64-LABEL: lrint_v8f32: 196; AVX512-i64: # %bb.0: 197; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 198; AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 199; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax 200; AVX512-i64-NEXT: vmovq %rax, %xmm2 201; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 202; AVX512-i64-NEXT: vcvtss2si %xmm3, %rax 203; AVX512-i64-NEXT: vmovq %rax, %xmm3 204; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 205; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax 206; AVX512-i64-NEXT: vmovq %rax, %xmm3 207; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 208; AVX512-i64-NEXT: vcvtss2si %xmm1, %rax 209; AVX512-i64-NEXT: vmovq %rax, %xmm1 210; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 211; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 212; AVX512-i64-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 213; AVX512-i64-NEXT: vcvtss2si %xmm2, %rax 214; AVX512-i64-NEXT: vmovq %rax, %xmm2 215; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 216; AVX512-i64-NEXT: vcvtss2si %xmm3, %rax 217; AVX512-i64-NEXT: vmovq %rax, %xmm3 218; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 219; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 220; AVX512-i64-NEXT: vmovq %rax, %xmm3 221; AVX512-i64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 222; AVX512-i64-NEXT: vcvtss2si %xmm0, %rax 223; AVX512-i64-NEXT: vmovq %rax, %xmm0 224; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 225; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 226; AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 227; AVX512-i64-NEXT: retq 228; 229; AVX512DQ-i64-LABEL: lrint_v8f32: 230; AVX512DQ-i64: # %bb.0: 231; AVX512DQ-i64-NEXT: vcvtps2qq %ymm0, %zmm0 232; AVX512DQ-i64-NEXT: retq 233 %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float> %x) 234 ret <8 x iXLen> %a 235} 236declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f32(<8 x float>) 237 238define <16 x iXLen> @lrint_v16iXLen_v16f32(<16 x float> %x) { 239 %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float> %x) 240 ret <16 x iXLen> %a 241} 242declare <16 x iXLen> @llvm.lrint.v16iXLen.v16f32(<16 x float>) 243 244define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { 245; X86-SSE2-LABEL: lrint_v1f64: 246; X86-SSE2: # %bb.0: 247; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax 248; X86-SSE2-NEXT: retl 249; 250; X86-AVX-LABEL: lrint_v1f64: 251; X86-AVX: # %bb.0: 252; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax 253; X86-AVX-NEXT: retl 254; 255; X64-AVX-i32-LABEL: lrint_v1f64: 256; X64-AVX-i32: # %bb.0: 257; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %eax 258; X64-AVX-i32-NEXT: retq 259; 260; X64-AVX-i64-LABEL: lrint_v1f64: 261; X64-AVX-i64: # %bb.0: 262; X64-AVX-i64-NEXT: vcvtsd2si %xmm0, %rax 263; X64-AVX-i64-NEXT: retq 264 %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double> %x) 265 ret <1 x iXLen> %a 266} 267declare <1 x iXLen> @llvm.lrint.v1iXLen.v1f64(<1 x double>) 268 269define <2 x iXLen> @lrint_v2f64(<2 x double> %x) { 270; X86-SSE2-LABEL: lrint_v2f64: 271; X86-SSE2: # %bb.0: 272; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax 273; X86-SSE2-NEXT: movd %eax, %xmm1 274; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 275; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax 276; X86-SSE2-NEXT: movd %eax, %xmm0 277; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 278; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 279; X86-SSE2-NEXT: retl 280; 281; X86-AVX-LABEL: lrint_v2f64: 282; X86-AVX: # %bb.0: 283; X86-AVX-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 284; X86-AVX-NEXT: vcvtsd2si %xmm1, %eax 285; X86-AVX-NEXT: vcvtsd2si %xmm0, %ecx 286; X86-AVX-NEXT: vmovd %ecx, %xmm0 287; X86-AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 288; X86-AVX-NEXT: retl 289; 290; X64-AVX-i32-LABEL: lrint_v2f64: 291; X64-AVX-i32: # %bb.0: 292; X64-AVX-i32-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1,0] 293; X64-AVX-i32-NEXT: vcvtsd2si %xmm1, %eax 294; X64-AVX-i32-NEXT: vcvtsd2si %xmm0, %ecx 295; X64-AVX-i32-NEXT: vmovd %ecx, %xmm0 296; X64-AVX-i32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 297; X64-AVX-i32-NEXT: retq 298; 299; X64-AVX1-i64-LABEL: lrint_v2f64: 300; X64-AVX1-i64: # %bb.0: 301; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 302; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 303; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 304; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 305; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 306; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 307; X64-AVX1-i64-NEXT: retq 308; 309; AVX512-i64-LABEL: lrint_v2f64: 310; AVX512-i64: # %bb.0: 311; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 312; AVX512-i64-NEXT: vmovq %rax, %xmm1 313; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 314; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 315; AVX512-i64-NEXT: vmovq %rax, %xmm0 316; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 317; AVX512-i64-NEXT: retq 318; 319; AVX512DQ-i64-LABEL: lrint_v2f64: 320; AVX512DQ-i64: # %bb.0: 321; AVX512DQ-i64-NEXT: vcvtpd2qq %xmm0, %xmm0 322; AVX512DQ-i64-NEXT: retq 323 %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double> %x) 324 ret <2 x iXLen> %a 325} 326declare <2 x iXLen> @llvm.lrint.v2iXLen.v2f64(<2 x double>) 327 328define <4 x iXLen> @lrint_v4f64(<4 x double> %x) { 329; X86-SSE2-LABEL: lrint_v4f64: 330; X86-SSE2: # %bb.0: 331; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax 332; X86-SSE2-NEXT: movd %eax, %xmm2 333; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 334; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax 335; X86-SSE2-NEXT: movd %eax, %xmm1 336; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 337; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax 338; X86-SSE2-NEXT: movd %eax, %xmm1 339; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 340; X86-SSE2-NEXT: cvtsd2si %xmm0, %eax 341; X86-SSE2-NEXT: movd %eax, %xmm0 342; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 343; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 344; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 345; X86-SSE2-NEXT: retl 346; 347; X86-AVX-LABEL: lrint_v4f64: 348; X86-AVX: # %bb.0: 349; X86-AVX-NEXT: vcvtpd2dq %ymm0, %xmm0 350; X86-AVX-NEXT: vzeroupper 351; X86-AVX-NEXT: retl 352; 353; X64-AVX-i32-LABEL: lrint_v4f64: 354; X64-AVX-i32: # %bb.0: 355; X64-AVX-i32-NEXT: vcvtpd2dq %ymm0, %xmm0 356; X64-AVX-i32-NEXT: vzeroupper 357; X64-AVX-i32-NEXT: retq 358; 359; X64-AVX1-i64-LABEL: lrint_v4f64: 360; X64-AVX1-i64: # %bb.0: 361; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 362; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax 363; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 364; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 365; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax 366; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 367; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 368; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 369; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 370; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 371; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 372; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 373; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 374; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 375; X64-AVX1-i64-NEXT: retq 376; 377; AVX512-i64-LABEL: lrint_v4f64: 378; AVX512-i64: # %bb.0: 379; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm1 380; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax 381; AVX512-i64-NEXT: vmovq %rax, %xmm2 382; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 383; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax 384; AVX512-i64-NEXT: vmovq %rax, %xmm1 385; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 386; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 387; AVX512-i64-NEXT: vmovq %rax, %xmm2 388; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 389; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 390; AVX512-i64-NEXT: vmovq %rax, %xmm0 391; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 392; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 393; AVX512-i64-NEXT: retq 394; 395; AVX512DQ-i64-LABEL: lrint_v4f64: 396; AVX512DQ-i64: # %bb.0: 397; AVX512DQ-i64-NEXT: vcvtpd2qq %ymm0, %ymm0 398; AVX512DQ-i64-NEXT: retq 399 %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double> %x) 400 ret <4 x iXLen> %a 401} 402declare <4 x iXLen> @llvm.lrint.v4iXLen.v4f64(<4 x double>) 403 404define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { 405; X86-SSE2-LABEL: lrint_v8f64: 406; X86-SSE2: # %bb.0: 407; X86-SSE2-NEXT: pushl %ebp 408; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 409; X86-SSE2-NEXT: .cfi_offset %ebp, -8 410; X86-SSE2-NEXT: movl %esp, %ebp 411; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp 412; X86-SSE2-NEXT: andl $-16, %esp 413; X86-SSE2-NEXT: subl $16, %esp 414; X86-SSE2-NEXT: movapd %xmm0, %xmm3 415; X86-SSE2-NEXT: movapd 8(%ebp), %xmm4 416; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax 417; X86-SSE2-NEXT: movd %eax, %xmm5 418; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 419; X86-SSE2-NEXT: cvtsd2si %xmm1, %eax 420; X86-SSE2-NEXT: movd %eax, %xmm0 421; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1] 422; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax 423; X86-SSE2-NEXT: movd %eax, %xmm0 424; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] 425; X86-SSE2-NEXT: cvtsd2si %xmm3, %eax 426; X86-SSE2-NEXT: movd %eax, %xmm1 427; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 428; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] 429; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax 430; X86-SSE2-NEXT: movd %eax, %xmm3 431; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1,1] 432; X86-SSE2-NEXT: cvtsd2si %xmm4, %eax 433; X86-SSE2-NEXT: movd %eax, %xmm1 434; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 435; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax 436; X86-SSE2-NEXT: movd %eax, %xmm1 437; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 438; X86-SSE2-NEXT: cvtsd2si %xmm2, %eax 439; X86-SSE2-NEXT: movd %eax, %xmm2 440; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 441; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 442; X86-SSE2-NEXT: movl %ebp, %esp 443; X86-SSE2-NEXT: popl %ebp 444; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 445; X86-SSE2-NEXT: retl 446; 447; X86-AVX1-LABEL: lrint_v8f64: 448; X86-AVX1: # %bb.0: 449; X86-AVX1-NEXT: vcvtpd2dq %ymm0, %xmm0 450; X86-AVX1-NEXT: vcvtpd2dq %ymm1, %xmm1 451; X86-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 452; X86-AVX1-NEXT: retl 453; 454; AVX512-i32-LABEL: lrint_v8f64: 455; AVX512-i32: # %bb.0: 456; AVX512-i32-NEXT: vcvtpd2dq %zmm0, %ymm0 457; AVX512-i32-NEXT: ret{{[l|q]}} 458; 459; X64-AVX1-i32-LABEL: lrint_v8f64: 460; X64-AVX1-i32: # %bb.0: 461; X64-AVX1-i32-NEXT: vcvtpd2dq %ymm0, %xmm0 462; X64-AVX1-i32-NEXT: vcvtpd2dq %ymm1, %xmm1 463; X64-AVX1-i32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 464; X64-AVX1-i32-NEXT: retq 465; 466; X64-AVX1-i64-LABEL: lrint_v8f64: 467; X64-AVX1-i64: # %bb.0: 468; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm0, %xmm2 469; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax 470; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 471; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 472; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax 473; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 474; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 475; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 476; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 477; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 478; X64-AVX1-i64-NEXT: vcvtsd2si %xmm0, %rax 479; X64-AVX1-i64-NEXT: vmovq %rax, %xmm0 480; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 481; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 482; X64-AVX1-i64-NEXT: vextractf128 $1, %ymm1, %xmm2 483; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax 484; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 485; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 486; X64-AVX1-i64-NEXT: vcvtsd2si %xmm2, %rax 487; X64-AVX1-i64-NEXT: vmovq %rax, %xmm2 488; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 489; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax 490; X64-AVX1-i64-NEXT: vmovq %rax, %xmm3 491; X64-AVX1-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 492; X64-AVX1-i64-NEXT: vcvtsd2si %xmm1, %rax 493; X64-AVX1-i64-NEXT: vmovq %rax, %xmm1 494; X64-AVX1-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 495; X64-AVX1-i64-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 496; X64-AVX1-i64-NEXT: retq 497; 498; AVX512-i64-LABEL: lrint_v8f64: 499; AVX512-i64: # %bb.0: 500; AVX512-i64-NEXT: vextractf32x4 $3, %zmm0, %xmm1 501; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax 502; AVX512-i64-NEXT: vmovq %rax, %xmm2 503; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 504; AVX512-i64-NEXT: vcvtsd2si %xmm1, %rax 505; AVX512-i64-NEXT: vmovq %rax, %xmm1 506; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 507; AVX512-i64-NEXT: vextractf32x4 $2, %zmm0, %xmm2 508; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax 509; AVX512-i64-NEXT: vmovq %rax, %xmm3 510; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 511; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax 512; AVX512-i64-NEXT: vmovq %rax, %xmm2 513; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 514; AVX512-i64-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 515; AVX512-i64-NEXT: vextractf128 $1, %ymm0, %xmm2 516; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax 517; AVX512-i64-NEXT: vmovq %rax, %xmm3 518; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 519; AVX512-i64-NEXT: vcvtsd2si %xmm2, %rax 520; AVX512-i64-NEXT: vmovq %rax, %xmm2 521; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 522; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 523; AVX512-i64-NEXT: vmovq %rax, %xmm3 524; AVX512-i64-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 525; AVX512-i64-NEXT: vcvtsd2si %xmm0, %rax 526; AVX512-i64-NEXT: vmovq %rax, %xmm0 527; AVX512-i64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 528; AVX512-i64-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 529; AVX512-i64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 530; AVX512-i64-NEXT: retq 531; 532; AVX512DQ-i64-LABEL: lrint_v8f64: 533; AVX512DQ-i64: # %bb.0: 534; AVX512DQ-i64-NEXT: vcvtpd2qq %zmm0, %zmm0 535; AVX512DQ-i64-NEXT: retq 536 %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double> %x) 537 ret <8 x iXLen> %a 538} 539declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) 540