xref: /llvm-project/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll (revision 032014ef103157bfd8403418538e25f3f58efa9d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtfprd f0, r3
16; CHECK-P8-NEXT:    mffprd r3, f0
17; CHECK-P8-NEXT:    clrldi r4, r3, 56
18; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
19; CHECK-P8-NEXT:    clrlwi r4, r4, 24
20; CHECK-P8-NEXT:    clrlwi r3, r3, 24
21; CHECK-P8-NEXT:    mtfprwz f0, r4
22; CHECK-P8-NEXT:    mtfprwz f1, r3
23; CHECK-P8-NEXT:    xscvuxdsp f0, f0
24; CHECK-P8-NEXT:    xscvuxdsp f1, f1
25; CHECK-P8-NEXT:    xscvdpspn vs0, f0
26; CHECK-P8-NEXT:    xscvdpspn vs1, f1
27; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
28; CHECK-P8-NEXT:    xxswapd vs0, vs0
29; CHECK-P8-NEXT:    mffprd r3, f0
30; CHECK-P8-NEXT:    blr
31;
32; CHECK-P9-LABEL: test2elt:
33; CHECK-P9:       # %bb.0: # %entry
34; CHECK-P9-NEXT:    mtfprd f0, r3
35; CHECK-P9-NEXT:    xxswapd v2, vs0
36; CHECK-P9-NEXT:    vextractub v3, v2, 15
37; CHECK-P9-NEXT:    vextractub v2, v2, 14
38; CHECK-P9-NEXT:    xscvuxdsp f0, v3
39; CHECK-P9-NEXT:    xscvuxdsp f1, v2
40; CHECK-P9-NEXT:    xscvdpspn vs0, f0
41; CHECK-P9-NEXT:    xscvdpspn vs1, f1
42; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
43; CHECK-P9-NEXT:    mfvsrld r3, vs0
44; CHECK-P9-NEXT:    blr
45;
46; CHECK-BE-LABEL: test2elt:
47; CHECK-BE:       # %bb.0: # %entry
48; CHECK-BE-NEXT:    sldi r3, r3, 48
49; CHECK-BE-NEXT:    mtvsrd v2, r3
50; CHECK-BE-NEXT:    vextractub v3, v2, 2
51; CHECK-BE-NEXT:    vextractub v2, v2, 0
52; CHECK-BE-NEXT:    xscvuxdsp f0, v3
53; CHECK-BE-NEXT:    xscvdpspn v3, f0
54; CHECK-BE-NEXT:    xscvuxdsp f0, v2
55; CHECK-BE-NEXT:    xscvdpspn v2, f0
56; CHECK-BE-NEXT:    vmrgow v2, v2, v3
57; CHECK-BE-NEXT:    mfvsrd r3, v2
58; CHECK-BE-NEXT:    blr
59entry:
60  %0 = bitcast i16 %a.coerce to <2 x i8>
61  %1 = uitofp <2 x i8> %0 to <2 x float>
62  %2 = bitcast <2 x float> %1 to i64
63  ret i64 %2
64}
65
66define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
67; CHECK-P8-LABEL: test4elt:
68; CHECK-P8:       # %bb.0: # %entry
69; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
70; CHECK-P8-NEXT:    mtvsrwz v3, r3
71; CHECK-P8-NEXT:    xxlxor v4, v4, v4
72; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0@toc@l
73; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
74; CHECK-P8-NEXT:    xxswapd v2, vs0
75; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
76; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
77; CHECK-P8-NEXT:    blr
78;
79; CHECK-P9-LABEL: test4elt:
80; CHECK-P9:       # %bb.0: # %entry
81; CHECK-P9-NEXT:    mtfprwz f0, r3
82; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
83; CHECK-P9-NEXT:    xxlxor vs2, vs2, vs2
84; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
85; CHECK-P9-NEXT:    lxv vs1, 0(r3)
86; CHECK-P9-NEXT:    xxperm vs0, vs2, vs1
87; CHECK-P9-NEXT:    xvcvuxwsp v2, vs0
88; CHECK-P9-NEXT:    blr
89;
90; CHECK-BE-LABEL: test4elt:
91; CHECK-BE:       # %bb.0: # %entry
92; CHECK-BE-NEXT:    mtfprwz f0, r3
93; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
94; CHECK-BE-NEXT:    xxlxor vs2, vs2, vs2
95; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
96; CHECK-BE-NEXT:    lxv vs1, 0(r3)
97; CHECK-BE-NEXT:    xxperm vs0, vs2, vs1
98; CHECK-BE-NEXT:    xvcvuxwsp v2, vs0
99; CHECK-BE-NEXT:    blr
100entry:
101  %0 = bitcast i32 %a.coerce to <4 x i8>
102  %1 = uitofp <4 x i8> %0 to <4 x float>
103  ret <4 x float> %1
104}
105
106define void @test8elt(ptr noalias nocapture sret(<8 x float>) %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
107; CHECK-P8-LABEL: test8elt:
108; CHECK-P8:       # %bb.0: # %entry
109; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
110; CHECK-P8-NEXT:    mtvsrd v4, r4
111; CHECK-P8-NEXT:    xxlxor v5, v5, v5
112; CHECK-P8-NEXT:    li r4, 16
113; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
114; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
115; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_1@toc@ha
116; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_1@toc@l
117; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
118; CHECK-P8-NEXT:    xxswapd v2, vs0
119; CHECK-P8-NEXT:    vperm v2, v5, v4, v2
120; CHECK-P8-NEXT:    xvcvuxwsp vs0, v2
121; CHECK-P8-NEXT:    xxswapd v3, vs1
122; CHECK-P8-NEXT:    vperm v3, v5, v4, v3
123; CHECK-P8-NEXT:    xvcvuxwsp vs1, v3
124; CHECK-P8-NEXT:    xxswapd vs0, vs0
125; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
126; CHECK-P8-NEXT:    xxswapd vs1, vs1
127; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
128; CHECK-P8-NEXT:    blr
129;
130; CHECK-P9-LABEL: test8elt:
131; CHECK-P9:       # %bb.0: # %entry
132; CHECK-P9-NEXT:    mtvsrd v2, r4
133; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
134; CHECK-P9-NEXT:    xxlxor v4, v4, v4
135; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
136; CHECK-P9-NEXT:    lxv v3, 0(r4)
137; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
138; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1@toc@l
139; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
140; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
141; CHECK-P9-NEXT:    lxv v3, 0(r4)
142; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
143; CHECK-P9-NEXT:    stxv vs0, 0(r3)
144; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
145; CHECK-P9-NEXT:    stxv vs1, 16(r3)
146; CHECK-P9-NEXT:    blr
147;
148; CHECK-BE-LABEL: test8elt:
149; CHECK-BE:       # %bb.0: # %entry
150; CHECK-BE-NEXT:    mtvsrd v2, r4
151; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
152; CHECK-BE-NEXT:    xxlxor v4, v4, v4
153; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
154; CHECK-BE-NEXT:    lxv v3, 0(r4)
155; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
156; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
157; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
158; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
159; CHECK-BE-NEXT:    lxv v3, 0(r4)
160; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
161; CHECK-BE-NEXT:    stxv vs0, 0(r3)
162; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
163; CHECK-BE-NEXT:    stxv vs1, 16(r3)
164; CHECK-BE-NEXT:    blr
165entry:
166  %0 = bitcast i64 %a.coerce to <8 x i8>
167  %1 = uitofp <8 x i8> %0 to <8 x float>
168  store <8 x float> %1, ptr %agg.result, align 32
169  ret void
170}
171
172define void @test16elt(ptr noalias nocapture sret(<16 x float>) %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
173; CHECK-P8-LABEL: test16elt:
174; CHECK-P8:       # %bb.0: # %entry
175; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
176; CHECK-P8-NEXT:    xxlxor v1, v1, v1
177; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0@toc@l
178; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
179; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
180; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1@toc@l
181; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
182; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
183; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3@toc@l
184; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
185; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
186; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_2@toc@l
187; CHECK-P8-NEXT:    xxswapd v4, vs0
188; CHECK-P8-NEXT:    vperm v4, v1, v2, v4
189; CHECK-P8-NEXT:    xvcvuxwsp vs0, v4
190; CHECK-P8-NEXT:    xxswapd v5, vs1
191; CHECK-P8-NEXT:    vperm v5, v1, v2, v5
192; CHECK-P8-NEXT:    xvcvuxwsp vs1, v5
193; CHECK-P8-NEXT:    xxswapd v3, vs2
194; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
195; CHECK-P8-NEXT:    li r4, 48
196; CHECK-P8-NEXT:    vperm v3, v1, v2, v3
197; CHECK-P8-NEXT:    xxswapd vs0, vs0
198; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
199; CHECK-P8-NEXT:    xxswapd v0, vs2
200; CHECK-P8-NEXT:    xvcvuxwsp vs2, v3
201; CHECK-P8-NEXT:    xxswapd vs1, vs1
202; CHECK-P8-NEXT:    vperm v2, v1, v2, v0
203; CHECK-P8-NEXT:    xvcvuxwsp vs3, v2
204; CHECK-P8-NEXT:    xxswapd vs2, vs2
205; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
206; CHECK-P8-NEXT:    li r4, 32
207; CHECK-P8-NEXT:    xxswapd vs3, vs3
208; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
209; CHECK-P8-NEXT:    li r4, 16
210; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
211; CHECK-P8-NEXT:    blr
212;
213; CHECK-P9-LABEL: test16elt:
214; CHECK-P9:       # %bb.0: # %entry
215; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
216; CHECK-P9-NEXT:    xxlxor v4, v4, v4
217; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
218; CHECK-P9-NEXT:    lxv v3, 0(r4)
219; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
220; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
221; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
222; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
223; CHECK-P9-NEXT:    lxv v3, 0(r4)
224; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
225; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2@toc@l
226; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
227; CHECK-P9-NEXT:    stxv vs0, 0(r3)
228; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
229; CHECK-P9-NEXT:    lxv v3, 0(r4)
230; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
231; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3@toc@l
232; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
233; CHECK-P9-NEXT:    stxv vs1, 16(r3)
234; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
235; CHECK-P9-NEXT:    lxv v3, 0(r4)
236; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
237; CHECK-P9-NEXT:    stxv vs2, 32(r3)
238; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
239; CHECK-P9-NEXT:    stxv vs3, 48(r3)
240; CHECK-P9-NEXT:    blr
241;
242; CHECK-BE-LABEL: test16elt:
243; CHECK-BE:       # %bb.0: # %entry
244; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
245; CHECK-BE-NEXT:    xxlxor v4, v4, v4
246; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
247; CHECK-BE-NEXT:    lxv v3, 0(r4)
248; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
249; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
250; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
251; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
252; CHECK-BE-NEXT:    lxv v3, 0(r4)
253; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
254; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2@toc@l
255; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
256; CHECK-BE-NEXT:    stxv vs0, 0(r3)
257; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
258; CHECK-BE-NEXT:    lxv v3, 0(r4)
259; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
260; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3@toc@l
261; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
262; CHECK-BE-NEXT:    stxv vs1, 16(r3)
263; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
264; CHECK-BE-NEXT:    lxv v3, 0(r4)
265; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
266; CHECK-BE-NEXT:    stxv vs2, 32(r3)
267; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
268; CHECK-BE-NEXT:    stxv vs3, 48(r3)
269; CHECK-BE-NEXT:    blr
270entry:
271  %0 = uitofp <16 x i8> %a to <16 x float>
272  store <16 x float> %0, ptr %agg.result, align 64
273  ret void
274}
275
276define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
277; CHECK-P8-LABEL: test2elt_signed:
278; CHECK-P8:       # %bb.0: # %entry
279; CHECK-P8-NEXT:    mtfprd f0, r3
280; CHECK-P8-NEXT:    mffprd r3, f0
281; CHECK-P8-NEXT:    clrldi r4, r3, 56
282; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
283; CHECK-P8-NEXT:    extsb r4, r4
284; CHECK-P8-NEXT:    extsb r3, r3
285; CHECK-P8-NEXT:    mtfprwa f0, r4
286; CHECK-P8-NEXT:    mtfprwa f1, r3
287; CHECK-P8-NEXT:    xscvsxdsp f0, f0
288; CHECK-P8-NEXT:    xscvsxdsp f1, f1
289; CHECK-P8-NEXT:    xscvdpspn vs0, f0
290; CHECK-P8-NEXT:    xscvdpspn vs1, f1
291; CHECK-P8-NEXT:    xxmrghw vs0, vs1, vs0
292; CHECK-P8-NEXT:    xxswapd vs0, vs0
293; CHECK-P8-NEXT:    mffprd r3, f0
294; CHECK-P8-NEXT:    blr
295;
296; CHECK-P9-LABEL: test2elt_signed:
297; CHECK-P9:       # %bb.0: # %entry
298; CHECK-P9-NEXT:    mtfprd f0, r3
299; CHECK-P9-NEXT:    xxswapd v2, vs0
300; CHECK-P9-NEXT:    vextractub v3, v2, 15
301; CHECK-P9-NEXT:    vextractub v2, v2, 14
302; CHECK-P9-NEXT:    vextsh2d v3, v3
303; CHECK-P9-NEXT:    vextsh2d v2, v2
304; CHECK-P9-NEXT:    xscvsxdsp f0, v3
305; CHECK-P9-NEXT:    xscvsxdsp f1, v2
306; CHECK-P9-NEXT:    xscvdpspn vs0, f0
307; CHECK-P9-NEXT:    xscvdpspn vs1, f1
308; CHECK-P9-NEXT:    xxmrghw vs0, vs1, vs0
309; CHECK-P9-NEXT:    mfvsrld r3, vs0
310; CHECK-P9-NEXT:    blr
311;
312; CHECK-BE-LABEL: test2elt_signed:
313; CHECK-BE:       # %bb.0: # %entry
314; CHECK-BE-NEXT:    sldi r3, r3, 48
315; CHECK-BE-NEXT:    mtvsrd v2, r3
316; CHECK-BE-NEXT:    vextractub v3, v2, 2
317; CHECK-BE-NEXT:    vextractub v2, v2, 0
318; CHECK-BE-NEXT:    vextsh2d v3, v3
319; CHECK-BE-NEXT:    vextsh2d v2, v2
320; CHECK-BE-NEXT:    xscvsxdsp f0, v3
321; CHECK-BE-NEXT:    xscvdpspn v3, f0
322; CHECK-BE-NEXT:    xscvsxdsp f0, v2
323; CHECK-BE-NEXT:    xscvdpspn v2, f0
324; CHECK-BE-NEXT:    vmrgow v2, v2, v3
325; CHECK-BE-NEXT:    mfvsrd r3, v2
326; CHECK-BE-NEXT:    blr
327entry:
328  %0 = bitcast i16 %a.coerce to <2 x i8>
329  %1 = sitofp <2 x i8> %0 to <2 x float>
330  %2 = bitcast <2 x float> %1 to i64
331  ret i64 %2
332}
333
334define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
335; CHECK-P8-LABEL: test4elt_signed:
336; CHECK-P8:       # %bb.0: # %entry
337; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
338; CHECK-P8-NEXT:    mtvsrwz v4, r3
339; CHECK-P8-NEXT:    vspltisw v3, 12
340; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_0@toc@l
341; CHECK-P8-NEXT:    vadduwm v3, v3, v3
342; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
343; CHECK-P8-NEXT:    xxswapd v2, vs0
344; CHECK-P8-NEXT:    vperm v2, v4, v4, v2
345; CHECK-P8-NEXT:    vslw v2, v2, v3
346; CHECK-P8-NEXT:    vsraw v2, v2, v3
347; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
348; CHECK-P8-NEXT:    blr
349;
350; CHECK-P9-LABEL: test4elt_signed:
351; CHECK-P9:       # %bb.0: # %entry
352; CHECK-P9-NEXT:    mtvsrwz v2, r3
353; CHECK-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
354; CHECK-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
355; CHECK-P9-NEXT:    lxv vs0, 0(r3)
356; CHECK-P9-NEXT:    xxperm v2, v2, vs0
357; CHECK-P9-NEXT:    vextsb2w v2, v2
358; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
359; CHECK-P9-NEXT:    blr
360;
361; CHECK-BE-LABEL: test4elt_signed:
362; CHECK-BE:       # %bb.0: # %entry
363; CHECK-BE-NEXT:    mtvsrwz v2, r3
364; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
365; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
366; CHECK-BE-NEXT:    lxv vs0, 0(r3)
367; CHECK-BE-NEXT:    xxperm v2, v2, vs0
368; CHECK-BE-NEXT:    vextsb2w v2, v2
369; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
370; CHECK-BE-NEXT:    blr
371entry:
372  %0 = bitcast i32 %a.coerce to <4 x i8>
373  %1 = sitofp <4 x i8> %0 to <4 x float>
374  ret <4 x float> %1
375}
376
377define void @test8elt_signed(ptr noalias nocapture sret(<8 x float>) %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
378; CHECK-P8-LABEL: test8elt_signed:
379; CHECK-P8:       # %bb.0: # %entry
380; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0@toc@ha
381; CHECK-P8-NEXT:    mtvsrd v5, r4
382; CHECK-P8-NEXT:    vspltisw v3, 12
383; CHECK-P8-NEXT:    li r4, 16
384; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0@toc@l
385; CHECK-P8-NEXT:    vadduwm v3, v3, v3
386; CHECK-P8-NEXT:    lxvd2x vs0, 0, r5
387; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_1@toc@ha
388; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_1@toc@l
389; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
390; CHECK-P8-NEXT:    xxswapd v2, vs0
391; CHECK-P8-NEXT:    vperm v2, v5, v5, v2
392; CHECK-P8-NEXT:    vslw v2, v2, v3
393; CHECK-P8-NEXT:    vsraw v2, v2, v3
394; CHECK-P8-NEXT:    xvcvsxwsp vs0, v2
395; CHECK-P8-NEXT:    xxswapd v4, vs1
396; CHECK-P8-NEXT:    vperm v4, v5, v5, v4
397; CHECK-P8-NEXT:    vslw v2, v4, v3
398; CHECK-P8-NEXT:    vsraw v2, v2, v3
399; CHECK-P8-NEXT:    xvcvsxwsp vs1, v2
400; CHECK-P8-NEXT:    xxswapd vs0, vs0
401; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
402; CHECK-P8-NEXT:    xxswapd vs1, vs1
403; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
404; CHECK-P8-NEXT:    blr
405;
406; CHECK-P9-LABEL: test8elt_signed:
407; CHECK-P9:       # %bb.0: # %entry
408; CHECK-P9-NEXT:    mtvsrd v2, r4
409; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
410; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0@toc@l
411; CHECK-P9-NEXT:    lxv v3, 0(r4)
412; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
413; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_1@toc@l
414; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
415; CHECK-P9-NEXT:    vextsb2w v3, v3
416; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
417; CHECK-P9-NEXT:    lxv v3, 0(r4)
418; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
419; CHECK-P9-NEXT:    stxv vs0, 0(r3)
420; CHECK-P9-NEXT:    vextsb2w v2, v2
421; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
422; CHECK-P9-NEXT:    stxv vs1, 16(r3)
423; CHECK-P9-NEXT:    blr
424;
425; CHECK-BE-LABEL: test8elt_signed:
426; CHECK-BE:       # %bb.0: # %entry
427; CHECK-BE-NEXT:    mtvsrd v2, r4
428; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
429; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
430; CHECK-BE-NEXT:    lxv v3, 0(r4)
431; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
432; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1@toc@l
433; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
434; CHECK-BE-NEXT:    vextsb2w v3, v3
435; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
436; CHECK-BE-NEXT:    lxv v3, 0(r4)
437; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
438; CHECK-BE-NEXT:    stxv vs0, 0(r3)
439; CHECK-BE-NEXT:    vextsb2w v2, v2
440; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
441; CHECK-BE-NEXT:    stxv vs1, 16(r3)
442; CHECK-BE-NEXT:    blr
443entry:
444  %0 = bitcast i64 %a.coerce to <8 x i8>
445  %1 = sitofp <8 x i8> %0 to <8 x float>
446  store <8 x float> %1, ptr %agg.result, align 32
447  ret void
448}
449
450define void @test16elt_signed(ptr noalias nocapture sret(<16 x float>) %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
451; CHECK-P8-LABEL: test16elt_signed:
452; CHECK-P8:       # %bb.0: # %entry
453; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
454; CHECK-P8-NEXT:    vspltisw v3, 12
455; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0@toc@l
456; CHECK-P8-NEXT:    vadduwm v3, v3, v3
457; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
458; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
459; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_1@toc@l
460; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
461; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
462; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3@toc@l
463; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
464; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
465; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_2@toc@l
466; CHECK-P8-NEXT:    xxswapd v5, vs0
467; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
468; CHECK-P8-NEXT:    vslw v5, v5, v3
469; CHECK-P8-NEXT:    vsraw v5, v5, v3
470; CHECK-P8-NEXT:    xvcvsxwsp vs0, v5
471; CHECK-P8-NEXT:    xxswapd v0, vs1
472; CHECK-P8-NEXT:    vperm v0, v2, v2, v0
473; CHECK-P8-NEXT:    vslw v0, v0, v3
474; CHECK-P8-NEXT:    vsraw v0, v0, v3
475; CHECK-P8-NEXT:    xvcvsxwsp vs1, v0
476; CHECK-P8-NEXT:    xxswapd v4, vs2
477; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
478; CHECK-P8-NEXT:    li r4, 48
479; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
480; CHECK-P8-NEXT:    vslw v4, v4, v3
481; CHECK-P8-NEXT:    xxswapd v1, vs2
482; CHECK-P8-NEXT:    xxswapd vs0, vs0
483; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
484; CHECK-P8-NEXT:    vperm v2, v2, v2, v1
485; CHECK-P8-NEXT:    vslw v2, v2, v3
486; CHECK-P8-NEXT:    vsraw v2, v2, v3
487; CHECK-P8-NEXT:    vsraw v3, v4, v3
488; CHECK-P8-NEXT:    xvcvsxwsp vs2, v3
489; CHECK-P8-NEXT:    xvcvsxwsp vs3, v2
490; CHECK-P8-NEXT:    xxswapd vs1, vs1
491; CHECK-P8-NEXT:    xxswapd vs2, vs2
492; CHECK-P8-NEXT:    xxswapd vs3, vs3
493; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
494; CHECK-P8-NEXT:    li r4, 32
495; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
496; CHECK-P8-NEXT:    li r4, 16
497; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
498; CHECK-P8-NEXT:    blr
499;
500; CHECK-P9-LABEL: test16elt_signed:
501; CHECK-P9:       # %bb.0: # %entry
502; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
503; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0@toc@l
504; CHECK-P9-NEXT:    lxv v3, 0(r4)
505; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
506; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_1@toc@l
507; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
508; CHECK-P9-NEXT:    vextsb2w v3, v3
509; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
510; CHECK-P9-NEXT:    lxv v3, 0(r4)
511; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
512; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_2@toc@l
513; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
514; CHECK-P9-NEXT:    stxv vs0, 0(r3)
515; CHECK-P9-NEXT:    vextsb2w v3, v3
516; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
517; CHECK-P9-NEXT:    lxv v3, 0(r4)
518; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
519; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_3@toc@l
520; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
521; CHECK-P9-NEXT:    stxv vs1, 16(r3)
522; CHECK-P9-NEXT:    vextsb2w v3, v3
523; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
524; CHECK-P9-NEXT:    lxv v3, 0(r4)
525; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
526; CHECK-P9-NEXT:    stxv vs2, 32(r3)
527; CHECK-P9-NEXT:    vextsb2w v2, v2
528; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
529; CHECK-P9-NEXT:    stxv vs3, 48(r3)
530; CHECK-P9-NEXT:    blr
531;
532; CHECK-BE-LABEL: test16elt_signed:
533; CHECK-BE:       # %bb.0: # %entry
534; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
535; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0@toc@l
536; CHECK-BE-NEXT:    lxv v3, 0(r4)
537; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
538; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1@toc@l
539; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
540; CHECK-BE-NEXT:    vextsb2w v3, v3
541; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
542; CHECK-BE-NEXT:    lxv v3, 0(r4)
543; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
544; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2@toc@l
545; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
546; CHECK-BE-NEXT:    stxv vs0, 0(r3)
547; CHECK-BE-NEXT:    vextsb2w v3, v3
548; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
549; CHECK-BE-NEXT:    lxv v3, 0(r4)
550; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
551; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3@toc@l
552; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
553; CHECK-BE-NEXT:    stxv vs1, 16(r3)
554; CHECK-BE-NEXT:    vextsb2w v3, v3
555; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
556; CHECK-BE-NEXT:    lxv v3, 0(r4)
557; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
558; CHECK-BE-NEXT:    stxv vs2, 32(r3)
559; CHECK-BE-NEXT:    vextsb2w v2, v2
560; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
561; CHECK-BE-NEXT:    stxv vs3, 48(r3)
562; CHECK-BE-NEXT:    blr
563entry:
564  %0 = sitofp <16 x i8> %a to <16 x float>
565  store <16 x float> %0, ptr %agg.result, align 64
566  ret void
567}
568