xref: /llvm-project/llvm/test/CodeGen/X86/prefer-fpext-splat.ll (revision 139bcda542514b7a064fe9225014ec4268bb2b65)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown                              | FileCheck %s --check-prefixes=SSE
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx                  | FileCheck %s --check-prefixes=AVX,AVX1
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2                 | FileCheck %s --check-prefixes=AVX,AVX2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl    | FileCheck %s --check-prefixes=AVX512,AVX512F
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512FP16
7
8define <2 x double> @prefer_f32_v2f64(ptr %p) nounwind {
9; SSE-LABEL: prefer_f32_v2f64:
10; SSE:       # %bb.0: # %entry
11; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
12; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
13; SSE-NEXT:    cvtps2pd %xmm0, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: prefer_f32_v2f64:
17; AVX:       # %bb.0: # %entry
18; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
19; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0
20; AVX-NEXT:    retq
21;
22; AVX512-LABEL: prefer_f32_v2f64:
23; AVX512:       # %bb.0: # %entry
24; AVX512-NEXT:    vcvtps2pd (%rdi){1to2}, %xmm0
25; AVX512-NEXT:    retq
26entry:
27  %0 = load float, ptr %p, align 4
28  %vecinit.i = insertelement <2 x float> undef, float %0, i64 0
29  %vecinit3.i = shufflevector <2 x float> %vecinit.i, <2 x float> poison, <2 x i32> zeroinitializer
30  %conv.i = fpext <2 x float> %vecinit3.i to <2 x double>
31  ret <2 x double> %conv.i
32}
33
34define <4 x double> @prefer_f32_v4f64(ptr %p) nounwind {
35; SSE-LABEL: prefer_f32_v4f64:
36; SSE:       # %bb.0: # %entry
37; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
38; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
39; SSE-NEXT:    cvtps2pd %xmm0, %xmm0
40; SSE-NEXT:    movaps %xmm0, %xmm1
41; SSE-NEXT:    retq
42;
43; AVX-LABEL: prefer_f32_v4f64:
44; AVX:       # %bb.0: # %entry
45; AVX-NEXT:    vbroadcastss (%rdi), %xmm0
46; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0
47; AVX-NEXT:    retq
48;
49; AVX512-LABEL: prefer_f32_v4f64:
50; AVX512:       # %bb.0: # %entry
51; AVX512-NEXT:    vcvtps2pd (%rdi){1to4}, %ymm0
52; AVX512-NEXT:    retq
53entry:
54  %0 = load float, ptr %p, align 4
55  %vecinit.i = insertelement <4 x float> undef, float %0, i64 0
56  %vecinit3.i = shufflevector <4 x float> %vecinit.i, <4 x float> poison, <4 x i32> zeroinitializer
57  %conv.i = fpext <4 x float> %vecinit3.i to <4 x double>
58  ret <4 x double> %conv.i
59}
60
61define <4 x float> @prefer_f16_v4f32(ptr %p) nounwind {
62; SSE-LABEL: prefer_f16_v4f32:
63; SSE:       # %bb.0: # %entry
64; SSE-NEXT:    pushq %rax
65; SSE-NEXT:    pinsrw $0, (%rdi), %xmm0
66; SSE-NEXT:    callq __extendhfsf2@PLT
67; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
68; SSE-NEXT:    popq %rax
69; SSE-NEXT:    retq
70;
71; AVX1-LABEL: prefer_f16_v4f32:
72; AVX1:       # %bb.0: # %entry
73; AVX1-NEXT:    pushq %rax
74; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
75; AVX1-NEXT:    callq __extendhfsf2@PLT
76; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
77; AVX1-NEXT:    popq %rax
78; AVX1-NEXT:    retq
79;
80; AVX2-LABEL: prefer_f16_v4f32:
81; AVX2:       # %bb.0: # %entry
82; AVX2-NEXT:    pushq %rax
83; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
84; AVX2-NEXT:    callq __extendhfsf2@PLT
85; AVX2-NEXT:    vpbroadcastd %xmm0, %xmm0
86; AVX2-NEXT:    popq %rax
87; AVX2-NEXT:    retq
88;
89; AVX512F-LABEL: prefer_f16_v4f32:
90; AVX512F:       # %bb.0: # %entry
91; AVX512F-NEXT:    vpbroadcastw (%rdi), %xmm0
92; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm0
93; AVX512F-NEXT:    retq
94;
95; AVX512FP16-LABEL: prefer_f16_v4f32:
96; AVX512FP16:       # %bb.0: # %entry
97; AVX512FP16-NEXT:    vcvtph2psx (%rdi){1to4}, %xmm0
98; AVX512FP16-NEXT:    retq
99entry:
100  %0 = load half, ptr %p, align 4
101  %vecinit.i = insertelement <4 x half> undef, half %0, i64 0
102  %vecinit3.i = shufflevector <4 x half> %vecinit.i, <4 x half> poison, <4 x i32> zeroinitializer
103  %conv.i = fpext <4 x half> %vecinit3.i to <4 x float>
104  ret <4 x float> %conv.i
105}
106
107define <8 x float> @prefer_f16_v8f32(ptr %p) nounwind {
108; SSE-LABEL: prefer_f16_v8f32:
109; SSE:       # %bb.0: # %entry
110; SSE-NEXT:    pushq %rax
111; SSE-NEXT:    pinsrw $0, (%rdi), %xmm0
112; SSE-NEXT:    callq __extendhfsf2@PLT
113; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
114; SSE-NEXT:    movaps %xmm0, %xmm1
115; SSE-NEXT:    popq %rax
116; SSE-NEXT:    retq
117;
118; AVX1-LABEL: prefer_f16_v8f32:
119; AVX1:       # %bb.0: # %entry
120; AVX1-NEXT:    pushq %rax
121; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
122; AVX1-NEXT:    callq __extendhfsf2@PLT
123; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
124; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
125; AVX1-NEXT:    popq %rax
126; AVX1-NEXT:    retq
127;
128; AVX2-LABEL: prefer_f16_v8f32:
129; AVX2:       # %bb.0: # %entry
130; AVX2-NEXT:    pushq %rax
131; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
132; AVX2-NEXT:    callq __extendhfsf2@PLT
133; AVX2-NEXT:    vpbroadcastd %xmm0, %ymm0
134; AVX2-NEXT:    popq %rax
135; AVX2-NEXT:    retq
136;
137; AVX512F-LABEL: prefer_f16_v8f32:
138; AVX512F:       # %bb.0: # %entry
139; AVX512F-NEXT:    vpbroadcastw (%rdi), %xmm0
140; AVX512F-NEXT:    vcvtph2ps %xmm0, %ymm0
141; AVX512F-NEXT:    retq
142;
143; AVX512FP16-LABEL: prefer_f16_v8f32:
144; AVX512FP16:       # %bb.0: # %entry
145; AVX512FP16-NEXT:    vcvtph2psx (%rdi){1to8}, %ymm0
146; AVX512FP16-NEXT:    retq
147entry:
148  %0 = load half, ptr %p, align 4
149  %vecinit.i = insertelement <8 x half> undef, half %0, i64 0
150  %vecinit3.i = shufflevector <8 x half> %vecinit.i, <8 x half> poison, <8 x i32> zeroinitializer
151  %conv.i = fpext <8 x half> %vecinit3.i to <8 x float>
152  ret <8 x float> %conv.i
153}
154
155define <2 x double> @prefer_f16_v2f64(ptr %p) nounwind {
156; SSE-LABEL: prefer_f16_v2f64:
157; SSE:       # %bb.0: # %entry
158; SSE-NEXT:    pushq %rax
159; SSE-NEXT:    pinsrw $0, (%rdi), %xmm0
160; SSE-NEXT:    callq __extendhfsf2@PLT
161; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
162; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
163; SSE-NEXT:    popq %rax
164; SSE-NEXT:    retq
165;
166; AVX-LABEL: prefer_f16_v2f64:
167; AVX:       # %bb.0: # %entry
168; AVX-NEXT:    pushq %rax
169; AVX-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
170; AVX-NEXT:    callq __extendhfsf2@PLT
171; AVX-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
172; AVX-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
173; AVX-NEXT:    popq %rax
174; AVX-NEXT:    retq
175;
176; AVX512F-LABEL: prefer_f16_v2f64:
177; AVX512F:       # %bb.0: # %entry
178; AVX512F-NEXT:    vpbroadcastw (%rdi), %xmm0
179; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm0
180; AVX512F-NEXT:    vcvtps2pd %xmm0, %xmm0
181; AVX512F-NEXT:    retq
182;
183; AVX512FP16-LABEL: prefer_f16_v2f64:
184; AVX512FP16:       # %bb.0: # %entry
185; AVX512FP16-NEXT:    vcvtph2pd (%rdi){1to2}, %xmm0
186; AVX512FP16-NEXT:    retq
187entry:
188  %0 = load half, ptr %p, align 4
189  %vecinit.i = insertelement <2 x half> undef, half %0, i64 0
190  %vecinit3.i = shufflevector <2 x half> %vecinit.i, <2 x half> poison, <2 x i32> zeroinitializer
191  %conv.i = fpext <2 x half> %vecinit3.i to <2 x double>
192  ret <2 x double> %conv.i
193}
194
195define <4 x double> @prefer_f16_v4f64(ptr %p) nounwind {
196; SSE-LABEL: prefer_f16_v4f64:
197; SSE:       # %bb.0: # %entry
198; SSE-NEXT:    pushq %rax
199; SSE-NEXT:    pinsrw $0, (%rdi), %xmm0
200; SSE-NEXT:    callq __extendhfsf2@PLT
201; SSE-NEXT:    cvtss2sd %xmm0, %xmm0
202; SSE-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
203; SSE-NEXT:    movaps %xmm0, %xmm1
204; SSE-NEXT:    popq %rax
205; SSE-NEXT:    retq
206;
207; AVX1-LABEL: prefer_f16_v4f64:
208; AVX1:       # %bb.0: # %entry
209; AVX1-NEXT:    pushq %rax
210; AVX1-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
211; AVX1-NEXT:    callq __extendhfsf2@PLT
212; AVX1-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
213; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
214; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
215; AVX1-NEXT:    popq %rax
216; AVX1-NEXT:    retq
217;
218; AVX2-LABEL: prefer_f16_v4f64:
219; AVX2:       # %bb.0: # %entry
220; AVX2-NEXT:    pushq %rax
221; AVX2-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
222; AVX2-NEXT:    callq __extendhfsf2@PLT
223; AVX2-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
224; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
225; AVX2-NEXT:    popq %rax
226; AVX2-NEXT:    retq
227;
228; AVX512F-LABEL: prefer_f16_v4f64:
229; AVX512F:       # %bb.0: # %entry
230; AVX512F-NEXT:    vpbroadcastw (%rdi), %xmm0
231; AVX512F-NEXT:    vcvtph2ps %xmm0, %xmm0
232; AVX512F-NEXT:    vcvtps2pd %xmm0, %ymm0
233; AVX512F-NEXT:    retq
234;
235; AVX512FP16-LABEL: prefer_f16_v4f64:
236; AVX512FP16:       # %bb.0: # %entry
237; AVX512FP16-NEXT:    vcvtph2pd (%rdi){1to4}, %ymm0
238; AVX512FP16-NEXT:    retq
239entry:
240  %0 = load half, ptr %p, align 4
241  %vecinit.i = insertelement <4 x half> undef, half %0, i64 0
242  %vecinit3.i = shufflevector <4 x half> %vecinit.i, <4 x half> poison, <4 x i32> zeroinitializer
243  %conv.i = fpext <4 x half> %vecinit3.i to <4 x double>
244  ret <4 x double> %conv.i
245}
246