1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512dq,avx512vl | FileCheck %s --check-prefixes=AVX512DQ 6 7define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { 8; SSE-LABEL: llrint_v1i64_v1f32: 9; SSE: # %bb.0: 10; SSE-NEXT: cvtss2si %xmm0, %rax 11; SSE-NEXT: retq 12; 13; AVX-LABEL: llrint_v1i64_v1f32: 14; AVX: # %bb.0: 15; AVX-NEXT: vcvtss2si %xmm0, %rax 16; AVX-NEXT: retq 17; 18; AVX512DQ-LABEL: llrint_v1i64_v1f32: 19; AVX512DQ: # %bb.0: 20; AVX512DQ-NEXT: vcvtss2si %xmm0, %rax 21; AVX512DQ-NEXT: retq 22 %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) 23 ret <1 x i64> %a 24} 25declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) 26 27define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { 28; SSE-LABEL: llrint_v2i64_v2f32: 29; SSE: # %bb.0: 30; SSE-NEXT: cvtss2si %xmm0, %rax 31; SSE-NEXT: movq %rax, %xmm1 32; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 33; SSE-NEXT: cvtss2si %xmm0, %rax 34; SSE-NEXT: movq %rax, %xmm0 35; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 36; SSE-NEXT: movdqa %xmm1, %xmm0 37; SSE-NEXT: retq 38; 39; AVX-LABEL: llrint_v2i64_v2f32: 40; AVX: # %bb.0: 41; AVX-NEXT: vcvtss2si %xmm0, %rax 42; AVX-NEXT: vmovq %rax, %xmm1 43; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 44; AVX-NEXT: vcvtss2si %xmm0, %rax 45; AVX-NEXT: vmovq %rax, %xmm0 46; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 47; AVX-NEXT: retq 48; 49; AVX512DQ-LABEL: llrint_v2i64_v2f32: 50; AVX512DQ: # %bb.0: 51; AVX512DQ-NEXT: vcvtps2qq %xmm0, %xmm0 52; AVX512DQ-NEXT: retq 53 %a = call <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float> %x) 54 ret <2 x i64> %a 55} 56declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) 57 58define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { 59; SSE-LABEL: llrint_v4i64_v4f32: 60; SSE: # %bb.0: 61; SSE-NEXT: cvtss2si %xmm0, %rax 62; SSE-NEXT: movq %rax, %xmm2 63; SSE-NEXT: movaps %xmm0, %xmm1 64; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 65; SSE-NEXT: cvtss2si %xmm1, %rax 66; SSE-NEXT: movq %rax, %xmm1 67; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] 68; SSE-NEXT: movaps %xmm0, %xmm1 69; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3] 70; SSE-NEXT: cvtss2si %xmm1, %rax 71; SSE-NEXT: movq %rax, %xmm3 72; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 73; SSE-NEXT: cvtss2si %xmm0, %rax 74; SSE-NEXT: movq %rax, %xmm1 75; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 76; SSE-NEXT: movdqa %xmm2, %xmm0 77; SSE-NEXT: retq 78; 79; AVX1-LABEL: llrint_v4i64_v4f32: 80; AVX1: # %bb.0: 81; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 82; AVX1-NEXT: vcvtss2si %xmm1, %rax 83; AVX1-NEXT: vmovq %rax, %xmm1 84; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 85; AVX1-NEXT: vcvtss2si %xmm2, %rax 86; AVX1-NEXT: vmovq %rax, %xmm2 87; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 88; AVX1-NEXT: vcvtss2si %xmm0, %rax 89; AVX1-NEXT: vmovq %rax, %xmm2 90; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 91; AVX1-NEXT: vcvtss2si %xmm0, %rax 92; AVX1-NEXT: vmovq %rax, %xmm0 93; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 94; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 95; AVX1-NEXT: retq 96; 97; AVX512-LABEL: llrint_v4i64_v4f32: 98; AVX512: # %bb.0: 99; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 100; AVX512-NEXT: vcvtss2si %xmm1, %rax 101; AVX512-NEXT: vmovq %rax, %xmm1 102; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 103; AVX512-NEXT: vcvtss2si %xmm2, %rax 104; AVX512-NEXT: vmovq %rax, %xmm2 105; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 106; AVX512-NEXT: vcvtss2si %xmm0, %rax 107; AVX512-NEXT: vmovq %rax, %xmm2 108; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 109; AVX512-NEXT: vcvtss2si %xmm0, %rax 110; AVX512-NEXT: vmovq %rax, %xmm0 111; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 112; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 113; AVX512-NEXT: retq 114; 115; AVX512DQ-LABEL: llrint_v4i64_v4f32: 116; AVX512DQ: # %bb.0: 117; AVX512DQ-NEXT: vcvtps2qq %xmm0, %ymm0 118; AVX512DQ-NEXT: retq 119 %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) 120 ret <4 x i64> %a 121} 122declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) 123 124define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { 125; SSE-LABEL: llrint_v8i64_v8f32: 126; SSE: # %bb.0: 127; SSE-NEXT: movaps %xmm0, %xmm2 128; SSE-NEXT: cvtss2si %xmm0, %rax 129; SSE-NEXT: movq %rax, %xmm0 130; SSE-NEXT: movaps %xmm2, %xmm3 131; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[1,1] 132; SSE-NEXT: cvtss2si %xmm3, %rax 133; SSE-NEXT: movq %rax, %xmm3 134; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 135; SSE-NEXT: movaps %xmm2, %xmm3 136; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm2[3,3] 137; SSE-NEXT: cvtss2si %xmm3, %rax 138; SSE-NEXT: movq %rax, %xmm3 139; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 140; SSE-NEXT: cvtss2si %xmm2, %rax 141; SSE-NEXT: movq %rax, %xmm4 142; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm3[0] 143; SSE-NEXT: cvtss2si %xmm1, %rax 144; SSE-NEXT: movq %rax, %xmm2 145; SSE-NEXT: movaps %xmm1, %xmm3 146; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[1,1] 147; SSE-NEXT: cvtss2si %xmm3, %rax 148; SSE-NEXT: movq %rax, %xmm3 149; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 150; SSE-NEXT: movaps %xmm1, %xmm3 151; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,3],xmm1[3,3] 152; SSE-NEXT: cvtss2si %xmm3, %rax 153; SSE-NEXT: movq %rax, %xmm5 154; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 155; SSE-NEXT: cvtss2si %xmm1, %rax 156; SSE-NEXT: movq %rax, %xmm3 157; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] 158; SSE-NEXT: movdqa %xmm4, %xmm1 159; SSE-NEXT: retq 160; 161; AVX1-LABEL: llrint_v8i64_v8f32: 162; AVX1: # %bb.0: 163; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 164; AVX1-NEXT: vcvtss2si %xmm1, %rax 165; AVX1-NEXT: vmovq %rax, %xmm1 166; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] 167; AVX1-NEXT: vcvtss2si %xmm2, %rax 168; AVX1-NEXT: vmovq %rax, %xmm2 169; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 170; AVX1-NEXT: vcvtss2si %xmm0, %rax 171; AVX1-NEXT: vmovq %rax, %xmm2 172; AVX1-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] 173; AVX1-NEXT: vcvtss2si %xmm3, %rax 174; AVX1-NEXT: vmovq %rax, %xmm3 175; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 176; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 177; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 178; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[3,3,3,3] 179; AVX1-NEXT: vcvtss2si %xmm1, %rax 180; AVX1-NEXT: vmovq %rax, %xmm1 181; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 182; AVX1-NEXT: vcvtss2si %xmm3, %rax 183; AVX1-NEXT: vmovq %rax, %xmm3 184; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 185; AVX1-NEXT: vcvtss2si %xmm0, %rax 186; AVX1-NEXT: vmovq %rax, %xmm3 187; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 188; AVX1-NEXT: vcvtss2si %xmm0, %rax 189; AVX1-NEXT: vmovq %rax, %xmm0 190; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 191; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 192; AVX1-NEXT: vmovaps %ymm2, %ymm0 193; AVX1-NEXT: retq 194; 195; AVX512-LABEL: llrint_v8i64_v8f32: 196; AVX512: # %bb.0: 197; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 198; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 199; AVX512-NEXT: vcvtss2si %xmm2, %rax 200; AVX512-NEXT: vmovq %rax, %xmm2 201; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 202; AVX512-NEXT: vcvtss2si %xmm3, %rax 203; AVX512-NEXT: vmovq %rax, %xmm3 204; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 205; AVX512-NEXT: vcvtss2si %xmm1, %rax 206; AVX512-NEXT: vmovq %rax, %xmm3 207; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 208; AVX512-NEXT: vcvtss2si %xmm1, %rax 209; AVX512-NEXT: vmovq %rax, %xmm1 210; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 211; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 212; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 213; AVX512-NEXT: vcvtss2si %xmm2, %rax 214; AVX512-NEXT: vmovq %rax, %xmm2 215; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 216; AVX512-NEXT: vcvtss2si %xmm3, %rax 217; AVX512-NEXT: vmovq %rax, %xmm3 218; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 219; AVX512-NEXT: vcvtss2si %xmm0, %rax 220; AVX512-NEXT: vmovq %rax, %xmm3 221; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 222; AVX512-NEXT: vcvtss2si %xmm0, %rax 223; AVX512-NEXT: vmovq %rax, %xmm0 224; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 225; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 226; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 227; AVX512-NEXT: retq 228; 229; AVX512DQ-LABEL: llrint_v8i64_v8f32: 230; AVX512DQ: # %bb.0: 231; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm0 232; AVX512DQ-NEXT: retq 233 %a = call <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float> %x) 234 ret <8 x i64> %a 235} 236declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) 237 238define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { 239; SSE-LABEL: llrint_v16i64_v16f32: 240; SSE: # %bb.0: 241; SSE-NEXT: movq %rdi, %rax 242; SSE-NEXT: cvtss2si %xmm0, %rcx 243; SSE-NEXT: movq %rcx, %xmm4 244; SSE-NEXT: movaps %xmm0, %xmm5 245; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[1,1] 246; SSE-NEXT: cvtss2si %xmm5, %rcx 247; SSE-NEXT: movq %rcx, %xmm5 248; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm5[0] 249; SSE-NEXT: movaps %xmm0, %xmm5 250; SSE-NEXT: shufps {{.*#+}} xmm5 = xmm5[3,3],xmm0[3,3] 251; SSE-NEXT: cvtss2si %xmm5, %rcx 252; SSE-NEXT: movq %rcx, %xmm5 253; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 254; SSE-NEXT: cvtss2si %xmm0, %rcx 255; SSE-NEXT: movq %rcx, %xmm0 256; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm5[0] 257; SSE-NEXT: cvtss2si %xmm1, %rcx 258; SSE-NEXT: movq %rcx, %xmm5 259; SSE-NEXT: movaps %xmm1, %xmm6 260; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[1,1],xmm1[1,1] 261; SSE-NEXT: cvtss2si %xmm6, %rcx 262; SSE-NEXT: movq %rcx, %xmm6 263; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0] 264; SSE-NEXT: movaps %xmm1, %xmm6 265; SSE-NEXT: shufps {{.*#+}} xmm6 = xmm6[3,3],xmm1[3,3] 266; SSE-NEXT: cvtss2si %xmm6, %rcx 267; SSE-NEXT: movq %rcx, %xmm6 268; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] 269; SSE-NEXT: cvtss2si %xmm1, %rcx 270; SSE-NEXT: movq %rcx, %xmm1 271; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm6[0] 272; SSE-NEXT: cvtss2si %xmm2, %rcx 273; SSE-NEXT: movq %rcx, %xmm6 274; SSE-NEXT: movaps %xmm2, %xmm7 275; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[1,1],xmm2[1,1] 276; SSE-NEXT: cvtss2si %xmm7, %rcx 277; SSE-NEXT: movq %rcx, %xmm7 278; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm7[0] 279; SSE-NEXT: movaps %xmm2, %xmm7 280; SSE-NEXT: shufps {{.*#+}} xmm7 = xmm7[3,3],xmm2[3,3] 281; SSE-NEXT: cvtss2si %xmm7, %rcx 282; SSE-NEXT: movq %rcx, %xmm7 283; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] 284; SSE-NEXT: cvtss2si %xmm2, %rcx 285; SSE-NEXT: movq %rcx, %xmm2 286; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm7[0] 287; SSE-NEXT: cvtss2si %xmm3, %rcx 288; SSE-NEXT: movq %rcx, %xmm7 289; SSE-NEXT: movaps %xmm3, %xmm8 290; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[1,1],xmm3[1,1] 291; SSE-NEXT: cvtss2si %xmm8, %rcx 292; SSE-NEXT: movq %rcx, %xmm8 293; SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm8[0] 294; SSE-NEXT: movaps %xmm3, %xmm8 295; SSE-NEXT: shufps {{.*#+}} xmm8 = xmm8[3,3],xmm3[3,3] 296; SSE-NEXT: cvtss2si %xmm8, %rcx 297; SSE-NEXT: movq %rcx, %xmm8 298; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] 299; SSE-NEXT: cvtss2si %xmm3, %rcx 300; SSE-NEXT: movq %rcx, %xmm3 301; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm8[0] 302; SSE-NEXT: movdqa %xmm3, 112(%rdi) 303; SSE-NEXT: movdqa %xmm7, 96(%rdi) 304; SSE-NEXT: movdqa %xmm2, 80(%rdi) 305; SSE-NEXT: movdqa %xmm6, 64(%rdi) 306; SSE-NEXT: movdqa %xmm1, 48(%rdi) 307; SSE-NEXT: movdqa %xmm5, 32(%rdi) 308; SSE-NEXT: movdqa %xmm0, 16(%rdi) 309; SSE-NEXT: movdqa %xmm4, (%rdi) 310; SSE-NEXT: retq 311; 312; AVX1-LABEL: llrint_v16i64_v16f32: 313; AVX1: # %bb.0: 314; AVX1-NEXT: vmovaps %ymm0, %ymm2 315; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm2[3,3,3,3] 316; AVX1-NEXT: vcvtss2si %xmm0, %rax 317; AVX1-NEXT: vmovq %rax, %xmm0 318; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm2[1,0] 319; AVX1-NEXT: vcvtss2si %xmm3, %rax 320; AVX1-NEXT: vmovq %rax, %xmm3 321; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 322; AVX1-NEXT: vcvtss2si %xmm2, %rax 323; AVX1-NEXT: vmovq %rax, %xmm3 324; AVX1-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3] 325; AVX1-NEXT: vcvtss2si %xmm4, %rax 326; AVX1-NEXT: vmovq %rax, %xmm4 327; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 328; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 329; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 330; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[3,3,3,3] 331; AVX1-NEXT: vcvtss2si %xmm3, %rax 332; AVX1-NEXT: vmovq %rax, %xmm3 333; AVX1-NEXT: vshufpd {{.*#+}} xmm4 = xmm2[1,0] 334; AVX1-NEXT: vcvtss2si %xmm4, %rax 335; AVX1-NEXT: vmovq %rax, %xmm4 336; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] 337; AVX1-NEXT: vcvtss2si %xmm2, %rax 338; AVX1-NEXT: vmovq %rax, %xmm4 339; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] 340; AVX1-NEXT: vcvtss2si %xmm2, %rax 341; AVX1-NEXT: vmovq %rax, %xmm2 342; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0] 343; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm4 344; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 345; AVX1-NEXT: vcvtss2si %xmm2, %rax 346; AVX1-NEXT: vmovq %rax, %xmm2 347; AVX1-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 348; AVX1-NEXT: vcvtss2si %xmm3, %rax 349; AVX1-NEXT: vmovq %rax, %xmm3 350; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 351; AVX1-NEXT: vcvtss2si %xmm1, %rax 352; AVX1-NEXT: vmovq %rax, %xmm3 353; AVX1-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] 354; AVX1-NEXT: vcvtss2si %xmm5, %rax 355; AVX1-NEXT: vmovq %rax, %xmm5 356; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] 357; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 358; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 359; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3] 360; AVX1-NEXT: vcvtss2si %xmm3, %rax 361; AVX1-NEXT: vmovq %rax, %xmm3 362; AVX1-NEXT: vshufpd {{.*#+}} xmm5 = xmm1[1,0] 363; AVX1-NEXT: vcvtss2si %xmm5, %rax 364; AVX1-NEXT: vmovq %rax, %xmm5 365; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm5[0],xmm3[0] 366; AVX1-NEXT: vcvtss2si %xmm1, %rax 367; AVX1-NEXT: vmovq %rax, %xmm5 368; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 369; AVX1-NEXT: vcvtss2si %xmm1, %rax 370; AVX1-NEXT: vmovq %rax, %xmm1 371; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0] 372; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm3 373; AVX1-NEXT: vmovaps %ymm4, %ymm1 374; AVX1-NEXT: retq 375; 376; AVX512-LABEL: llrint_v16i64_v16f32: 377; AVX512: # %bb.0: 378; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 379; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm1[3,3,3,3] 380; AVX512-NEXT: vcvtss2si %xmm2, %rax 381; AVX512-NEXT: vmovq %rax, %xmm2 382; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm1[1,0] 383; AVX512-NEXT: vcvtss2si %xmm3, %rax 384; AVX512-NEXT: vmovq %rax, %xmm3 385; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 386; AVX512-NEXT: vcvtss2si %xmm1, %rax 387; AVX512-NEXT: vmovq %rax, %xmm3 388; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 389; AVX512-NEXT: vcvtss2si %xmm1, %rax 390; AVX512-NEXT: vmovq %rax, %xmm1 391; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 392; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 393; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm0[3,3,3,3] 394; AVX512-NEXT: vcvtss2si %xmm2, %rax 395; AVX512-NEXT: vmovq %rax, %xmm2 396; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0] 397; AVX512-NEXT: vcvtss2si %xmm3, %rax 398; AVX512-NEXT: vmovq %rax, %xmm3 399; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 400; AVX512-NEXT: vcvtss2si %xmm0, %rax 401; AVX512-NEXT: vmovq %rax, %xmm3 402; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 403; AVX512-NEXT: vcvtss2si %xmm4, %rax 404; AVX512-NEXT: vmovq %rax, %xmm4 405; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 406; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 407; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm2 408; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0 409; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 410; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[3,3,3,3] 411; AVX512-NEXT: vcvtss2si %xmm3, %rax 412; AVX512-NEXT: vmovq %rax, %xmm3 413; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm1[1,0] 414; AVX512-NEXT: vcvtss2si %xmm4, %rax 415; AVX512-NEXT: vmovq %rax, %xmm4 416; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] 417; AVX512-NEXT: vcvtss2si %xmm1, %rax 418; AVX512-NEXT: vmovq %rax, %xmm4 419; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 420; AVX512-NEXT: vcvtss2si %xmm1, %rax 421; AVX512-NEXT: vmovq %rax, %xmm1 422; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0] 423; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 424; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm0[3,3,3,3] 425; AVX512-NEXT: vcvtss2si %xmm3, %rax 426; AVX512-NEXT: vmovq %rax, %xmm3 427; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0] 428; AVX512-NEXT: vcvtss2si %xmm4, %rax 429; AVX512-NEXT: vmovq %rax, %xmm4 430; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0] 431; AVX512-NEXT: vcvtss2si %xmm0, %rax 432; AVX512-NEXT: vmovq %rax, %xmm4 433; AVX512-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] 434; AVX512-NEXT: vcvtss2si %xmm0, %rax 435; AVX512-NEXT: vmovq %rax, %xmm0 436; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0] 437; AVX512-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 438; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm1 439; AVX512-NEXT: vmovdqa64 %zmm2, %zmm0 440; AVX512-NEXT: retq 441; 442; AVX512DQ-LABEL: llrint_v16i64_v16f32: 443; AVX512DQ: # %bb.0: 444; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm2 445; AVX512DQ-NEXT: vextractf64x4 $1, %zmm0, %ymm0 446; AVX512DQ-NEXT: vcvtps2qq %ymm0, %zmm1 447; AVX512DQ-NEXT: vmovaps %zmm2, %zmm0 448; AVX512DQ-NEXT: retq 449 %a = call <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float> %x) 450 ret <16 x i64> %a 451} 452declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) 453 454define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { 455; SSE-LABEL: llrint_v1i64_v1f64: 456; SSE: # %bb.0: 457; SSE-NEXT: cvtsd2si %xmm0, %rax 458; SSE-NEXT: retq 459; 460; AVX-LABEL: llrint_v1i64_v1f64: 461; AVX: # %bb.0: 462; AVX-NEXT: vcvtsd2si %xmm0, %rax 463; AVX-NEXT: retq 464; 465; AVX512DQ-LABEL: llrint_v1i64_v1f64: 466; AVX512DQ: # %bb.0: 467; AVX512DQ-NEXT: vcvtsd2si %xmm0, %rax 468; AVX512DQ-NEXT: retq 469 %a = call <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double> %x) 470 ret <1 x i64> %a 471} 472declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) 473 474define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { 475; SSE-LABEL: llrint_v2i64_v2f64: 476; SSE: # %bb.0: 477; SSE-NEXT: cvtsd2si %xmm0, %rax 478; SSE-NEXT: movq %rax, %xmm1 479; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 480; SSE-NEXT: cvtsd2si %xmm0, %rax 481; SSE-NEXT: movq %rax, %xmm0 482; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 483; SSE-NEXT: movdqa %xmm1, %xmm0 484; SSE-NEXT: retq 485; 486; AVX-LABEL: llrint_v2i64_v2f64: 487; AVX: # %bb.0: 488; AVX-NEXT: vcvtsd2si %xmm0, %rax 489; AVX-NEXT: vmovq %rax, %xmm1 490; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 491; AVX-NEXT: vcvtsd2si %xmm0, %rax 492; AVX-NEXT: vmovq %rax, %xmm0 493; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 494; AVX-NEXT: retq 495; 496; AVX512DQ-LABEL: llrint_v2i64_v2f64: 497; AVX512DQ: # %bb.0: 498; AVX512DQ-NEXT: vcvtpd2qq %xmm0, %xmm0 499; AVX512DQ-NEXT: retq 500 %a = call <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double> %x) 501 ret <2 x i64> %a 502} 503declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) 504 505define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { 506; SSE-LABEL: llrint_v4i64_v4f64: 507; SSE: # %bb.0: 508; SSE-NEXT: cvtsd2si %xmm0, %rax 509; SSE-NEXT: movq %rax, %xmm2 510; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 511; SSE-NEXT: cvtsd2si %xmm0, %rax 512; SSE-NEXT: movq %rax, %xmm0 513; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 514; SSE-NEXT: cvtsd2si %xmm1, %rax 515; SSE-NEXT: movq %rax, %xmm3 516; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 517; SSE-NEXT: cvtsd2si %xmm1, %rax 518; SSE-NEXT: movq %rax, %xmm0 519; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] 520; SSE-NEXT: movdqa %xmm2, %xmm0 521; SSE-NEXT: movdqa %xmm3, %xmm1 522; SSE-NEXT: retq 523; 524; AVX1-LABEL: llrint_v4i64_v4f64: 525; AVX1: # %bb.0: 526; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 527; AVX1-NEXT: vcvtsd2si %xmm1, %rax 528; AVX1-NEXT: vmovq %rax, %xmm2 529; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 530; AVX1-NEXT: vcvtsd2si %xmm1, %rax 531; AVX1-NEXT: vmovq %rax, %xmm1 532; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 533; AVX1-NEXT: vcvtsd2si %xmm0, %rax 534; AVX1-NEXT: vmovq %rax, %xmm2 535; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 536; AVX1-NEXT: vcvtsd2si %xmm0, %rax 537; AVX1-NEXT: vmovq %rax, %xmm0 538; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 539; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 540; AVX1-NEXT: retq 541; 542; AVX512-LABEL: llrint_v4i64_v4f64: 543; AVX512: # %bb.0: 544; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 545; AVX512-NEXT: vcvtsd2si %xmm1, %rax 546; AVX512-NEXT: vmovq %rax, %xmm2 547; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 548; AVX512-NEXT: vcvtsd2si %xmm1, %rax 549; AVX512-NEXT: vmovq %rax, %xmm1 550; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 551; AVX512-NEXT: vcvtsd2si %xmm0, %rax 552; AVX512-NEXT: vmovq %rax, %xmm2 553; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 554; AVX512-NEXT: vcvtsd2si %xmm0, %rax 555; AVX512-NEXT: vmovq %rax, %xmm0 556; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] 557; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 558; AVX512-NEXT: retq 559; 560; AVX512DQ-LABEL: llrint_v4i64_v4f64: 561; AVX512DQ: # %bb.0: 562; AVX512DQ-NEXT: vcvtpd2qq %ymm0, %ymm0 563; AVX512DQ-NEXT: retq 564 %a = call <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double> %x) 565 ret <4 x i64> %a 566} 567declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) 568 569define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { 570; SSE-LABEL: llrint_v8i64_v8f64: 571; SSE: # %bb.0: 572; SSE-NEXT: cvtsd2si %xmm0, %rax 573; SSE-NEXT: movq %rax, %xmm4 574; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 575; SSE-NEXT: cvtsd2si %xmm0, %rax 576; SSE-NEXT: movq %rax, %xmm0 577; SSE-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0] 578; SSE-NEXT: cvtsd2si %xmm1, %rax 579; SSE-NEXT: movq %rax, %xmm5 580; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 581; SSE-NEXT: cvtsd2si %xmm1, %rax 582; SSE-NEXT: movq %rax, %xmm0 583; SSE-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0] 584; SSE-NEXT: cvtsd2si %xmm2, %rax 585; SSE-NEXT: movq %rax, %xmm6 586; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 587; SSE-NEXT: cvtsd2si %xmm2, %rax 588; SSE-NEXT: movq %rax, %xmm0 589; SSE-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0] 590; SSE-NEXT: cvtsd2si %xmm3, %rax 591; SSE-NEXT: movq %rax, %xmm7 592; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1,1] 593; SSE-NEXT: cvtsd2si %xmm3, %rax 594; SSE-NEXT: movq %rax, %xmm0 595; SSE-NEXT: punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm0[0] 596; SSE-NEXT: movdqa %xmm4, %xmm0 597; SSE-NEXT: movdqa %xmm5, %xmm1 598; SSE-NEXT: movdqa %xmm6, %xmm2 599; SSE-NEXT: movdqa %xmm7, %xmm3 600; SSE-NEXT: retq 601; 602; AVX1-LABEL: llrint_v8i64_v8f64: 603; AVX1: # %bb.0: 604; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 605; AVX1-NEXT: vcvtsd2si %xmm2, %rax 606; AVX1-NEXT: vmovq %rax, %xmm3 607; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 608; AVX1-NEXT: vcvtsd2si %xmm2, %rax 609; AVX1-NEXT: vmovq %rax, %xmm2 610; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 611; AVX1-NEXT: vcvtsd2si %xmm0, %rax 612; AVX1-NEXT: vmovq %rax, %xmm3 613; AVX1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 614; AVX1-NEXT: vcvtsd2si %xmm0, %rax 615; AVX1-NEXT: vmovq %rax, %xmm0 616; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 617; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 618; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 619; AVX1-NEXT: vcvtsd2si %xmm2, %rax 620; AVX1-NEXT: vmovq %rax, %xmm3 621; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 622; AVX1-NEXT: vcvtsd2si %xmm2, %rax 623; AVX1-NEXT: vmovq %rax, %xmm2 624; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 625; AVX1-NEXT: vcvtsd2si %xmm1, %rax 626; AVX1-NEXT: vmovq %rax, %xmm3 627; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 628; AVX1-NEXT: vcvtsd2si %xmm1, %rax 629; AVX1-NEXT: vmovq %rax, %xmm1 630; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm3[0],xmm1[0] 631; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 632; AVX1-NEXT: retq 633; 634; AVX512-LABEL: llrint_v8i64_v8f64: 635; AVX512: # %bb.0: 636; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm1 637; AVX512-NEXT: vcvtsd2si %xmm1, %rax 638; AVX512-NEXT: vmovq %rax, %xmm2 639; AVX512-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1,0] 640; AVX512-NEXT: vcvtsd2si %xmm1, %rax 641; AVX512-NEXT: vmovq %rax, %xmm1 642; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 643; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm2 644; AVX512-NEXT: vcvtsd2si %xmm2, %rax 645; AVX512-NEXT: vmovq %rax, %xmm3 646; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 647; AVX512-NEXT: vcvtsd2si %xmm2, %rax 648; AVX512-NEXT: vmovq %rax, %xmm2 649; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 650; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 651; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2 652; AVX512-NEXT: vcvtsd2si %xmm2, %rax 653; AVX512-NEXT: vmovq %rax, %xmm3 654; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] 655; AVX512-NEXT: vcvtsd2si %xmm2, %rax 656; AVX512-NEXT: vmovq %rax, %xmm2 657; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 658; AVX512-NEXT: vcvtsd2si %xmm0, %rax 659; AVX512-NEXT: vmovq %rax, %xmm3 660; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] 661; AVX512-NEXT: vcvtsd2si %xmm0, %rax 662; AVX512-NEXT: vmovq %rax, %xmm0 663; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0] 664; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 665; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 666; AVX512-NEXT: retq 667; 668; AVX512DQ-LABEL: llrint_v8i64_v8f64: 669; AVX512DQ: # %bb.0: 670; AVX512DQ-NEXT: vcvtpd2qq %zmm0, %zmm0 671; AVX512DQ-NEXT: retq 672 %a = call <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double> %x) 673 ret <8 x i64> %a 674} 675declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) 676