xref: /llvm-project/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll (revision 032014ef103157bfd8403418538e25f3f58efa9d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtfprd f0, r3
16; CHECK-P8-NEXT:    mffprd r3, f0
17; CHECK-P8-NEXT:    clrldi r4, r3, 48
18; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
19; CHECK-P8-NEXT:    clrlwi r4, r4, 16
20; CHECK-P8-NEXT:    clrlwi r3, r3, 16
21; CHECK-P8-NEXT:    mtfprwz f0, r4
22; CHECK-P8-NEXT:    mtfprwz f1, r3
23; CHECK-P8-NEXT:    xscvuxdsp f0, f0
24; CHECK-P8-NEXT:    xscvuxdsp f1, f1
25; CHECK-P8-NEXT:    xscvdpspn vs0, f0
26; CHECK-P8-NEXT:    xscvdpspn vs1, f1
27; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
28; CHECK-P8-NEXT:    xxswapd vs0, vs0
29; CHECK-P8-NEXT:    mffprd r3, f0
30; CHECK-P8-NEXT:    blr
31;
32; CHECK-P9-LABEL: test2elt:
33; CHECK-P9:       # %bb.0: # %entry
34; CHECK-P9-NEXT:    mtvsrws v2, r3
35; CHECK-P9-NEXT:    vextractuh v3, v2, 14
36; CHECK-P9-NEXT:    vextractuh v2, v2, 12
37; CHECK-P9-NEXT:    xscvuxdsp f0, v3
38; CHECK-P9-NEXT:    xscvuxdsp f1, v2
39; CHECK-P9-NEXT:    xscvdpspn vs0, f0
40; CHECK-P9-NEXT:    xscvdpspn vs1, f1
41; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
42; CHECK-P9-NEXT:    mfvsrld r3, vs0
43; CHECK-P9-NEXT:    blr
44;
45; CHECK-BE-LABEL: test2elt:
46; CHECK-BE:       # %bb.0: # %entry
47; CHECK-BE-NEXT:    mtvsrws v2, r3
48; CHECK-BE-NEXT:    vextractuh v3, v2, 2
49; CHECK-BE-NEXT:    vextractuh v2, v2, 0
50; CHECK-BE-NEXT:    xscvuxdsp f0, v3
51; CHECK-BE-NEXT:    xscvdpspn v3, f0
52; CHECK-BE-NEXT:    xscvuxdsp f0, v2
53; CHECK-BE-NEXT:    xscvdpspn v2, f0
54; CHECK-BE-NEXT:    vmrgow v2, v2, v3
55; CHECK-BE-NEXT:    mfvsrd r3, v2
56; CHECK-BE-NEXT:    blr
57entry:
58  %0 = bitcast i32 %a.coerce to <2 x i16>
59  %1 = uitofp <2 x i16> %0 to <2 x float>
60  %2 = bitcast <2 x float> %1 to i64
61  ret i64 %2
62}
63
64define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
65; CHECK-P8-LABEL: test4elt:
66; CHECK-P8:       # %bb.0: # %entry
67; CHECK-P8-NEXT:    mtvsrd v2, r3
68; CHECK-P8-NEXT:    xxlxor v3, v3, v3
69; CHECK-P8-NEXT:    vmrghh v2, v3, v2
70; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
71; CHECK-P8-NEXT:    blr
72;
73; CHECK-P9-LABEL: test4elt:
74; CHECK-P9:       # %bb.0: # %entry
75; CHECK-P9-NEXT:    mtvsrd v2, r3
76; CHECK-P9-NEXT:    xxlxor v3, v3, v3
77; CHECK-P9-NEXT:    vmrghh v2, v3, v2
78; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
79; CHECK-P9-NEXT:    blr
80;
81; CHECK-BE-LABEL: test4elt:
82; CHECK-BE:       # %bb.0: # %entry
83; CHECK-BE-NEXT:    mtfprd f0, r3
84; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
85; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
86; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
87; CHECK-BE-NEXT:    lxv vs1, 0(r3)
88; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
89; CHECK-BE-NEXT:    xvcvuxwsp v2, vs0
90; CHECK-BE-NEXT:    blr
91entry:
92  %0 = bitcast i64 %a.coerce to <4 x i16>
93  %1 = uitofp <4 x i16> %0 to <4 x float>
94  ret <4 x float> %1
95}
96
97define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
98; CHECK-P8-LABEL: test8elt:
99; CHECK-P8:       # %bb.0: # %entry
100; CHECK-P8-NEXT:    xxlxor v3, v3, v3
101; CHECK-P8-NEXT:    li r4, 16
102; CHECK-P8-NEXT:    vmrglh v4, v3, v2
103; CHECK-P8-NEXT:    vmrghh v2, v3, v2
104; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
105; CHECK-P8-NEXT:    xvcvuxwsp vs1, v2
106; CHECK-P8-NEXT:    xxswapd vs1, vs1
107; CHECK-P8-NEXT:    xxswapd vs0, vs0
108; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
109; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
110; CHECK-P8-NEXT:    blr
111;
112; CHECK-P9-LABEL: test8elt:
113; CHECK-P9:       # %bb.0: # %entry
114; CHECK-P9-NEXT:    xxlxor v3, v3, v3
115; CHECK-P9-NEXT:    vmrglh v4, v3, v2
116; CHECK-P9-NEXT:    vmrghh v2, v3, v2
117; CHECK-P9-NEXT:    xvcvuxwsp vs0, v4
118; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
119; CHECK-P9-NEXT:    stxv vs1, 16(r3)
120; CHECK-P9-NEXT:    stxv vs0, 0(r3)
121; CHECK-P9-NEXT:    blr
122;
123; CHECK-BE-LABEL: test8elt:
124; CHECK-BE:       # %bb.0: # %entry
125; CHECK-BE-NEXT:    xxlxor v3, v3, v3
126; CHECK-BE-NEXT:    vmrghh v4, v3, v2
127; CHECK-BE-NEXT:    vmrglh v2, v3, v2
128; CHECK-BE-NEXT:    xvcvuxwsp vs0, v4
129; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
130; CHECK-BE-NEXT:    stxv vs1, 16(r3)
131; CHECK-BE-NEXT:    stxv vs0, 0(r3)
132; CHECK-BE-NEXT:    blr
133entry:
134  %0 = uitofp <8 x i16> %a to <8 x float>
135  store <8 x float> %0, ptr %agg.result, align 32
136  ret void
137}
138
139define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
140; CHECK-P8-LABEL: test16elt:
141; CHECK-P8:       # %bb.0: # %entry
142; CHECK-P8-NEXT:    li r5, 16
143; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
144; CHECK-P8-NEXT:    xxlxor v0, v0, v0
145; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
146; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
147; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1@toc@l
148; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
149; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
150; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0@toc@l
151; CHECK-P8-NEXT:    xxswapd v4, vs1
152; CHECK-P8-NEXT:    xxswapd v2, vs0
153; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
154; CHECK-P8-NEXT:    li r4, 48
155; CHECK-P8-NEXT:    xxswapd v3, vs2
156; CHECK-P8-NEXT:    vperm v1, v0, v2, v3
157; CHECK-P8-NEXT:    vperm v3, v0, v4, v3
158; CHECK-P8-NEXT:    xvcvuxwsp vs2, v1
159; CHECK-P8-NEXT:    xvcvuxwsp vs1, v3
160; CHECK-P8-NEXT:    xxswapd v5, vs0
161; CHECK-P8-NEXT:    vperm v4, v0, v4, v5
162; CHECK-P8-NEXT:    vperm v2, v0, v2, v5
163; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
164; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
165; CHECK-P8-NEXT:    xxswapd vs2, vs2
166; CHECK-P8-NEXT:    xxswapd vs1, vs1
167; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
168; CHECK-P8-NEXT:    li r4, 32
169; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
170; CHECK-P8-NEXT:    xxswapd vs3, vs3
171; CHECK-P8-NEXT:    xxswapd vs0, vs0
172; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
173; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
174; CHECK-P8-NEXT:    blr
175;
176; CHECK-P9-LABEL: test16elt:
177; CHECK-P9:       # %bb.0: # %entry
178; CHECK-P9-NEXT:    lxv v2, 16(r4)
179; CHECK-P9-NEXT:    lxv v3, 0(r4)
180; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
181; CHECK-P9-NEXT:    xxlxor v5, v5, v5
182; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
183; CHECK-P9-NEXT:    lxv v4, 0(r4)
184; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
185; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
186; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
187; CHECK-P9-NEXT:    xvcvuxwsp vs0, v0
188; CHECK-P9-NEXT:    lxv v0, 0(r4)
189; CHECK-P9-NEXT:    vperm v3, v5, v3, v0
190; CHECK-P9-NEXT:    stxv vs0, 0(r3)
191; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
192; CHECK-P9-NEXT:    vperm v3, v5, v2, v4
193; CHECK-P9-NEXT:    vperm v2, v5, v2, v0
194; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
195; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
196; CHECK-P9-NEXT:    stxv vs1, 16(r3)
197; CHECK-P9-NEXT:    stxv vs3, 48(r3)
198; CHECK-P9-NEXT:    stxv vs2, 32(r3)
199; CHECK-P9-NEXT:    blr
200;
201; CHECK-BE-LABEL: test16elt:
202; CHECK-BE:       # %bb.0: # %entry
203; CHECK-BE-NEXT:    lxv v2, 16(r4)
204; CHECK-BE-NEXT:    lxv v3, 0(r4)
205; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
206; CHECK-BE-NEXT:    xxlxor v5, v5, v5
207; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
208; CHECK-BE-NEXT:    lxv v4, 0(r4)
209; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
210; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
211; CHECK-BE-NEXT:    vmrglh v3, v5, v3
212; CHECK-BE-NEXT:    vmrglh v2, v5, v2
213; CHECK-BE-NEXT:    xvcvuxwsp vs0, v0
214; CHECK-BE-NEXT:    xvcvuxwsp vs1, v4
215; CHECK-BE-NEXT:    stxv vs1, 32(r3)
216; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
217; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
218; CHECK-BE-NEXT:    stxv vs3, 48(r3)
219; CHECK-BE-NEXT:    stxv vs2, 16(r3)
220; CHECK-BE-NEXT:    stxv vs0, 0(r3)
221; CHECK-BE-NEXT:    blr
222entry:
223  %a = load <16 x i16>, ptr %0, align 32
224  %1 = uitofp <16 x i16> %a to <16 x float>
225  store <16 x float> %1, ptr %agg.result, align 64
226  ret void
227}
228
229define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
230; CHECK-P8-LABEL: test2elt_signed:
231; CHECK-P8:       # %bb.0: # %entry
232; CHECK-P8-NEXT:    mtfprd f0, r3
233; CHECK-P8-NEXT:    mffprd r3, f0
234; CHECK-P8-NEXT:    clrldi r4, r3, 48
235; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
236; CHECK-P8-NEXT:    extsh r4, r4
237; CHECK-P8-NEXT:    extsh r3, r3
238; CHECK-P8-NEXT:    mtfprwa f0, r4
239; CHECK-P8-NEXT:    mtfprwa f1, r3
240; CHECK-P8-NEXT:    xscvsxdsp f0, f0
241; CHECK-P8-NEXT:    xscvsxdsp f1, f1
242; CHECK-P8-NEXT:    xscvdpspn vs0, f0
243; CHECK-P8-NEXT:    xscvdpspn vs1, f1
244; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
245; CHECK-P8-NEXT:    xxswapd vs0, vs0
246; CHECK-P8-NEXT:    mffprd r3, f0
247; CHECK-P8-NEXT:    blr
248;
249; CHECK-P9-LABEL: test2elt_signed:
250; CHECK-P9:       # %bb.0: # %entry
251; CHECK-P9-NEXT:    mtvsrws v2, r3
252; CHECK-P9-NEXT:    vextractuh v3, v2, 14
253; CHECK-P9-NEXT:    vextractuh v2, v2, 12
254; CHECK-P9-NEXT:    vextsh2d v3, v3
255; CHECK-P9-NEXT:    vextsh2d v2, v2
256; CHECK-P9-NEXT:    xscvsxdsp f0, v3
257; CHECK-P9-NEXT:    xscvsxdsp f1, v2
258; CHECK-P9-NEXT:    xscvdpspn vs0, f0
259; CHECK-P9-NEXT:    xscvdpspn vs1, f1
260; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
261; CHECK-P9-NEXT:    mfvsrld r3, vs0
262; CHECK-P9-NEXT:    blr
263;
264; CHECK-BE-LABEL: test2elt_signed:
265; CHECK-BE:       # %bb.0: # %entry
266; CHECK-BE-NEXT:    mtvsrws v2, r3
267; CHECK-BE-NEXT:    vextractuh v3, v2, 2
268; CHECK-BE-NEXT:    vextractuh v2, v2, 0
269; CHECK-BE-NEXT:    vextsh2d v3, v3
270; CHECK-BE-NEXT:    vextsh2d v2, v2
271; CHECK-BE-NEXT:    xscvsxdsp f0, v3
272; CHECK-BE-NEXT:    xscvdpspn v3, f0
273; CHECK-BE-NEXT:    xscvsxdsp f0, v2
274; CHECK-BE-NEXT:    xscvdpspn v2, f0
275; CHECK-BE-NEXT:    vmrgow v2, v2, v3
276; CHECK-BE-NEXT:    mfvsrd r3, v2
277; CHECK-BE-NEXT:    blr
278entry:
279  %0 = bitcast i32 %a.coerce to <2 x i16>
280  %1 = sitofp <2 x i16> %0 to <2 x float>
281  %2 = bitcast <2 x float> %1 to i64
282  ret i64 %2
283}
284
285define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
286; CHECK-P8-LABEL: test4elt_signed:
287; CHECK-P8:       # %bb.0: # %entry
288; CHECK-P8-NEXT:    mtvsrd v3, r3
289; CHECK-P8-NEXT:    vspltisw v2, 8
290; CHECK-P8-NEXT:    vadduwm v2, v2, v2
291; CHECK-P8-NEXT:    vmrghh v3, v3, v3
292; CHECK-P8-NEXT:    vslw v3, v3, v2
293; CHECK-P8-NEXT:    vsraw v2, v3, v2
294; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
295; CHECK-P8-NEXT:    blr
296;
297; CHECK-P9-LABEL: test4elt_signed:
298; CHECK-P9:       # %bb.0: # %entry
299; CHECK-P9-NEXT:    mtvsrd v2, r3
300; CHECK-P9-NEXT:    vmrghh v2, v2, v2
301; CHECK-P9-NEXT:    vextsh2w v2, v2
302; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
303; CHECK-P9-NEXT:    blr
304;
305; CHECK-BE-LABEL: test4elt_signed:
306; CHECK-BE:       # %bb.0: # %entry
307; CHECK-BE-NEXT:    mtvsrd v2, r3
308; CHECK-BE-NEXT:    vmrghh v2, v2, v2
309; CHECK-BE-NEXT:    vextsh2w v2, v2
310; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
311; CHECK-BE-NEXT:    blr
312entry:
313  %0 = bitcast i64 %a.coerce to <4 x i16>
314  %1 = sitofp <4 x i16> %0 to <4 x float>
315  ret <4 x float> %1
316}
317
318define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
319; CHECK-P8-LABEL: test8elt_signed:
320; CHECK-P8:       # %bb.0: # %entry
321; CHECK-P8-NEXT:    vspltisw v3, 8
322; CHECK-P8-NEXT:    vmrglh v4, v2, v2
323; CHECK-P8-NEXT:    li r4, 16
324; CHECK-P8-NEXT:    vadduwm v3, v3, v3
325; CHECK-P8-NEXT:    vmrghh v2, v2, v2
326; CHECK-P8-NEXT:    vslw v4, v4, v3
327; CHECK-P8-NEXT:    vslw v2, v2, v3
328; CHECK-P8-NEXT:    vsraw v4, v4, v3
329; CHECK-P8-NEXT:    vsraw v2, v2, v3
330; CHECK-P8-NEXT:    xvcvsxwsp vs0, v4
331; CHECK-P8-NEXT:    xvcvsxwsp vs1, v2
332; CHECK-P8-NEXT:    xxswapd vs1, vs1
333; CHECK-P8-NEXT:    xxswapd vs0, vs0
334; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
335; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
336; CHECK-P8-NEXT:    blr
337;
338; CHECK-P9-LABEL: test8elt_signed:
339; CHECK-P9:       # %bb.0: # %entry
340; CHECK-P9-NEXT:    vmrglh v3, v2, v2
341; CHECK-P9-NEXT:    vmrghh v2, v2, v2
342; CHECK-P9-NEXT:    vextsh2w v3, v3
343; CHECK-P9-NEXT:    vextsh2w v2, v2
344; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
345; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
346; CHECK-P9-NEXT:    stxv vs1, 16(r3)
347; CHECK-P9-NEXT:    stxv vs0, 0(r3)
348; CHECK-P9-NEXT:    blr
349;
350; CHECK-BE-LABEL: test8elt_signed:
351; CHECK-BE:       # %bb.0: # %entry
352; CHECK-BE-NEXT:    vmrghh v3, v2, v2
353; CHECK-BE-NEXT:    vmrglh v2, v2, v2
354; CHECK-BE-NEXT:    vextsh2w v3, v3
355; CHECK-BE-NEXT:    vextsh2w v2, v2
356; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
357; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
358; CHECK-BE-NEXT:    stxv vs1, 16(r3)
359; CHECK-BE-NEXT:    stxv vs0, 0(r3)
360; CHECK-BE-NEXT:    blr
361entry:
362  %0 = sitofp <8 x i16> %a to <8 x float>
363  store <8 x float> %0, ptr %agg.result, align 32
364  ret void
365}
366
367define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.result, ptr nocapture readonly) local_unnamed_addr #3 {
368; CHECK-P8-LABEL: test16elt_signed:
369; CHECK-P8:       # %bb.0: # %entry
370; CHECK-P8-NEXT:    li r5, 16
371; CHECK-P8-NEXT:    vspltisw v2, 8
372; CHECK-P8-NEXT:    lxvd2x vs0, r4, r5
373; CHECK-P8-NEXT:    vadduwm v2, v2, v2
374; CHECK-P8-NEXT:    xxswapd v3, vs0
375; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
376; CHECK-P8-NEXT:    li r4, 48
377; CHECK-P8-NEXT:    vmrghh v5, v3, v3
378; CHECK-P8-NEXT:    vmrglh v3, v3, v3
379; CHECK-P8-NEXT:    vslw v3, v3, v2
380; CHECK-P8-NEXT:    vslw v5, v5, v2
381; CHECK-P8-NEXT:    vsraw v3, v3, v2
382; CHECK-P8-NEXT:    xvcvsxwsp vs3, v3
383; CHECK-P8-NEXT:    xxswapd v4, vs0
384; CHECK-P8-NEXT:    vmrglh v0, v4, v4
385; CHECK-P8-NEXT:    vmrghh v4, v4, v4
386; CHECK-P8-NEXT:    vslw v0, v0, v2
387; CHECK-P8-NEXT:    vslw v4, v4, v2
388; CHECK-P8-NEXT:    vsraw v0, v0, v2
389; CHECK-P8-NEXT:    vsraw v4, v4, v2
390; CHECK-P8-NEXT:    vsraw v2, v5, v2
391; CHECK-P8-NEXT:    xvcvsxwsp vs2, v2
392; CHECK-P8-NEXT:    xvcvsxwsp vs0, v0
393; CHECK-P8-NEXT:    xvcvsxwsp vs1, v4
394; CHECK-P8-NEXT:    xxswapd vs3, vs3
395; CHECK-P8-NEXT:    xxswapd vs2, vs2
396; CHECK-P8-NEXT:    xxswapd vs1, vs1
397; CHECK-P8-NEXT:    xxswapd vs0, vs0
398; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
399; CHECK-P8-NEXT:    li r4, 32
400; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
401; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
402; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
403; CHECK-P8-NEXT:    blr
404;
405; CHECK-P9-LABEL: test16elt_signed:
406; CHECK-P9:       # %bb.0: # %entry
407; CHECK-P9-NEXT:    lxv v3, 0(r4)
408; CHECK-P9-NEXT:    lxv v2, 16(r4)
409; CHECK-P9-NEXT:    vmrglh v4, v3, v3
410; CHECK-P9-NEXT:    vmrghh v3, v3, v3
411; CHECK-P9-NEXT:    vextsh2w v3, v3
412; CHECK-P9-NEXT:    vextsh2w v4, v4
413; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
414; CHECK-P9-NEXT:    vmrglh v3, v2, v2
415; CHECK-P9-NEXT:    vmrghh v2, v2, v2
416; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
417; CHECK-P9-NEXT:    vextsh2w v3, v3
418; CHECK-P9-NEXT:    vextsh2w v2, v2
419; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
420; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
421; CHECK-P9-NEXT:    stxv vs1, 16(r3)
422; CHECK-P9-NEXT:    stxv vs0, 0(r3)
423; CHECK-P9-NEXT:    stxv vs3, 48(r3)
424; CHECK-P9-NEXT:    stxv vs2, 32(r3)
425; CHECK-P9-NEXT:    blr
426;
427; CHECK-BE-LABEL: test16elt_signed:
428; CHECK-BE:       # %bb.0: # %entry
429; CHECK-BE-NEXT:    lxv v3, 0(r4)
430; CHECK-BE-NEXT:    lxv v2, 16(r4)
431; CHECK-BE-NEXT:    vmrghh v4, v3, v3
432; CHECK-BE-NEXT:    vmrglh v3, v3, v3
433; CHECK-BE-NEXT:    vextsh2w v3, v3
434; CHECK-BE-NEXT:    vextsh2w v4, v4
435; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
436; CHECK-BE-NEXT:    vmrghh v3, v2, v2
437; CHECK-BE-NEXT:    vmrglh v2, v2, v2
438; CHECK-BE-NEXT:    xvcvsxwsp vs0, v4
439; CHECK-BE-NEXT:    vextsh2w v3, v3
440; CHECK-BE-NEXT:    vextsh2w v2, v2
441; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
442; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
443; CHECK-BE-NEXT:    stxv vs1, 16(r3)
444; CHECK-BE-NEXT:    stxv vs0, 0(r3)
445; CHECK-BE-NEXT:    stxv vs3, 48(r3)
446; CHECK-BE-NEXT:    stxv vs2, 32(r3)
447; CHECK-BE-NEXT:    blr
448entry:
449  %a = load <16 x i16>, ptr %0, align 32
450  %1 = sitofp <16 x i16> %a to <16 x float>
451  store <16 x float> %1, ptr %agg.result, align 64
452  ret void
453}
454