1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X86-SSE 3; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86-AVX 4; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE 5; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX 6 7define void @fptrunc_frommem2(ptr %in, ptr %out) { 8; X86-SSE-LABEL: fptrunc_frommem2: 9; X86-SSE: # %bb.0: # %entry 10; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 11; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 12; X86-SSE-NEXT: cvtpd2ps (%ecx), %xmm0 13; X86-SSE-NEXT: movlpd %xmm0, (%eax) 14; X86-SSE-NEXT: retl 15; 16; X86-AVX-LABEL: fptrunc_frommem2: 17; X86-AVX: # %bb.0: # %entry 18; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 19; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 20; X86-AVX-NEXT: vcvtpd2psx (%ecx), %xmm0 21; X86-AVX-NEXT: vmovlpd %xmm0, (%eax) 22; X86-AVX-NEXT: retl 23; 24; X64-SSE-LABEL: fptrunc_frommem2: 25; X64-SSE: # %bb.0: # %entry 26; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 27; X64-SSE-NEXT: movlpd %xmm0, (%rsi) 28; X64-SSE-NEXT: retq 29; 30; X64-AVX-LABEL: fptrunc_frommem2: 31; X64-AVX: # %bb.0: # %entry 32; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 33; X64-AVX-NEXT: vmovlpd %xmm0, (%rsi) 34; X64-AVX-NEXT: retq 35entry: 36 %0 = load <2 x double>, ptr %in 37 %1 = fptrunc <2 x double> %0 to <2 x float> 38 store <2 x float> %1, ptr %out, align 1 39 ret void 40} 41 42define void @fptrunc_frommem4(ptr %in, ptr %out) { 43; X86-SSE-LABEL: fptrunc_frommem4: 44; X86-SSE: # %bb.0: # %entry 45; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 46; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 47; X86-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 48; X86-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 49; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 50; X86-SSE-NEXT: movupd %xmm1, (%eax) 51; X86-SSE-NEXT: retl 52; 53; X86-AVX-LABEL: fptrunc_frommem4: 54; X86-AVX: # %bb.0: # %entry 55; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 56; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 57; X86-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 58; X86-AVX-NEXT: vmovupd %xmm0, (%eax) 59; X86-AVX-NEXT: retl 60; 61; X64-SSE-LABEL: fptrunc_frommem4: 62; X64-SSE: # %bb.0: # %entry 63; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 64; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 65; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 66; X64-SSE-NEXT: movupd %xmm1, (%rsi) 67; X64-SSE-NEXT: retq 68; 69; X64-AVX-LABEL: fptrunc_frommem4: 70; X64-AVX: # %bb.0: # %entry 71; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 72; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 73; X64-AVX-NEXT: retq 74entry: 75 %0 = load <4 x double>, ptr %in 76 %1 = fptrunc <4 x double> %0 to <4 x float> 77 store <4 x float> %1, ptr %out, align 1 78 ret void 79} 80 81define void @fptrunc_frommem8(ptr %in, ptr %out) { 82; X86-SSE-LABEL: fptrunc_frommem8: 83; X86-SSE: # %bb.0: # %entry 84; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 85; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 86; X86-SSE-NEXT: cvtpd2ps 16(%ecx), %xmm0 87; X86-SSE-NEXT: cvtpd2ps (%ecx), %xmm1 88; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 89; X86-SSE-NEXT: cvtpd2ps 48(%ecx), %xmm0 90; X86-SSE-NEXT: cvtpd2ps 32(%ecx), %xmm2 91; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 92; X86-SSE-NEXT: movupd %xmm2, 16(%eax) 93; X86-SSE-NEXT: movupd %xmm1, (%eax) 94; X86-SSE-NEXT: retl 95; 96; X86-AVX-LABEL: fptrunc_frommem8: 97; X86-AVX: # %bb.0: # %entry 98; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx 100; X86-AVX-NEXT: vcvtpd2psy (%ecx), %xmm0 101; X86-AVX-NEXT: vcvtpd2psy 32(%ecx), %xmm1 102; X86-AVX-NEXT: vmovupd %xmm1, 16(%eax) 103; X86-AVX-NEXT: vmovupd %xmm0, (%eax) 104; X86-AVX-NEXT: retl 105; 106; X64-SSE-LABEL: fptrunc_frommem8: 107; X64-SSE: # %bb.0: # %entry 108; X64-SSE-NEXT: cvtpd2ps 16(%rdi), %xmm0 109; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 110; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 111; X64-SSE-NEXT: cvtpd2ps 48(%rdi), %xmm0 112; X64-SSE-NEXT: cvtpd2ps 32(%rdi), %xmm2 113; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 114; X64-SSE-NEXT: movupd %xmm2, 16(%rsi) 115; X64-SSE-NEXT: movupd %xmm1, (%rsi) 116; X64-SSE-NEXT: retq 117; 118; X64-AVX-LABEL: fptrunc_frommem8: 119; X64-AVX: # %bb.0: # %entry 120; X64-AVX-NEXT: vcvtpd2psy (%rdi), %xmm0 121; X64-AVX-NEXT: vcvtpd2psy 32(%rdi), %xmm1 122; X64-AVX-NEXT: vmovupd %xmm1, 16(%rsi) 123; X64-AVX-NEXT: vmovupd %xmm0, (%rsi) 124; X64-AVX-NEXT: retq 125entry: 126 %0 = load <8 x double>, ptr %in 127 %1 = fptrunc <8 x double> %0 to <8 x float> 128 store <8 x float> %1, ptr %out, align 1 129 ret void 130} 131 132define <4 x float> @fptrunc_frommem2_zext(ptr %ld) { 133; X86-SSE-LABEL: fptrunc_frommem2_zext: 134; X86-SSE: # %bb.0: 135; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 136; X86-SSE-NEXT: cvtpd2ps (%eax), %xmm0 137; X86-SSE-NEXT: retl 138; 139; X86-AVX-LABEL: fptrunc_frommem2_zext: 140; X86-AVX: # %bb.0: 141; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax 142; X86-AVX-NEXT: vcvtpd2psx (%eax), %xmm0 143; X86-AVX-NEXT: retl 144; 145; X64-SSE-LABEL: fptrunc_frommem2_zext: 146; X64-SSE: # %bb.0: 147; X64-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 148; X64-SSE-NEXT: retq 149; 150; X64-AVX-LABEL: fptrunc_frommem2_zext: 151; X64-AVX: # %bb.0: 152; X64-AVX-NEXT: vcvtpd2psx (%rdi), %xmm0 153; X64-AVX-NEXT: retq 154 %arg = load <2 x double>, ptr %ld, align 16 155 %cvt = fptrunc <2 x double> %arg to <2 x float> 156 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 157 ret <4 x float> %ret 158} 159 160define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) { 161; X86-SSE-LABEL: fptrunc_fromreg2_zext: 162; X86-SSE: # %bb.0: 163; X86-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 164; X86-SSE-NEXT: retl 165; 166; X86-AVX-LABEL: fptrunc_fromreg2_zext: 167; X86-AVX: # %bb.0: 168; X86-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 169; X86-AVX-NEXT: retl 170; 171; X64-SSE-LABEL: fptrunc_fromreg2_zext: 172; X64-SSE: # %bb.0: 173; X64-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 174; X64-SSE-NEXT: retq 175; 176; X64-AVX-LABEL: fptrunc_fromreg2_zext: 177; X64-AVX: # %bb.0: 178; X64-AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 179; X64-AVX-NEXT: retq 180 %cvt = fptrunc <2 x double> %arg to <2 x float> 181 %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2> 182 ret <4 x float> %ret 183} 184 185; FIXME: For exact truncations we should be able to fold this. 186define <4 x float> @fptrunc_fromconst() { 187; X86-SSE-LABEL: fptrunc_fromconst: 188; X86-SSE: # %bb.0: # %entry 189; X86-SSE-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 190; X86-SSE-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 191; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 192; X86-SSE-NEXT: retl 193; 194; X86-AVX-LABEL: fptrunc_fromconst: 195; X86-AVX: # %bb.0: # %entry 196; X86-AVX-NEXT: vcvtpd2psy {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 197; X86-AVX-NEXT: retl 198; 199; X64-SSE-LABEL: fptrunc_fromconst: 200; X64-SSE: # %bb.0: # %entry 201; X64-SSE-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 202; X64-SSE-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 203; X64-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 204; X64-SSE-NEXT: retq 205; 206; X64-AVX-LABEL: fptrunc_fromconst: 207; X64-AVX: # %bb.0: # %entry 208; X64-AVX-NEXT: vcvtpd2psy {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 209; X64-AVX-NEXT: retq 210entry: 211 %0 = insertelement <4 x double> undef, double 1.0, i32 0 212 %1 = insertelement <4 x double> %0, double -2.0, i32 1 213 %2 = insertelement <4 x double> %1, double +4.0, i32 2 214 %3 = insertelement <4 x double> %2, double -0.0, i32 3 215 %4 = fptrunc <4 x double> %3 to <4 x float> 216 ret <4 x float> %4 217} 218