xref: /llvm-project/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll (revision b922a3621116b404d868af8b74cab25ab78555be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
3; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
4
5; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
6; RUN:   -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
7
8; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx \
9; RUN:   -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8-BE
10
11; RUN: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -verify-machineinstrs -mcpu=pwr9 -mattr=+vsx \
12; RUN:   -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9-BE
13
14define <2 x i64> @load_swap00(ptr %vp1, ptr %vp2) {
15; CHECK-P8-LABEL: load_swap00:
16; CHECK-P8:       # %bb.0:
17; CHECK-P8-NEXT:    lxvd2x v2, 0, r3
18; CHECK-P8-NEXT:    blr
19;
20; CHECK-P9-LABEL: load_swap00:
21; CHECK-P9:       # %bb.0:
22; CHECK-P9-NEXT:    lxvd2x v2, 0, r3
23; CHECK-P9-NEXT:    blr
24;
25; CHECK-P8-BE-LABEL: load_swap00:
26; CHECK-P8-BE:       # %bb.0:
27; CHECK-P8-BE-NEXT:    lxvd2x v2, 0, r3
28; CHECK-P8-BE-NEXT:    xxswapd v2, v2
29; CHECK-P8-BE-NEXT:    blr
30;
31; CHECK-P9-BE-LABEL: load_swap00:
32; CHECK-P9-BE:       # %bb.0:
33; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
34; CHECK-P9-BE-NEXT:    xxswapd v2, v2
35; CHECK-P9-BE-NEXT:    blr
36  %v1 = load <2 x i64>, ptr %vp1
37  %v2 = load <2 x i64>, ptr %vp2
38  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
39  ret <2 x i64> %v3
40}
41
42define <2 x i64> @load_swap01(ptr %vp1, ptr %vp2) {
43; CHECK-P8-LABEL: load_swap01:
44; CHECK-P8:       # %bb.0:
45; CHECK-P8-NEXT:    lxvd2x v2, 0, r4
46; CHECK-P8-NEXT:    blr
47;
48; CHECK-P9-LABEL: load_swap01:
49; CHECK-P9:       # %bb.0:
50; CHECK-P9-NEXT:    lxvd2x v2, 0, r4
51; CHECK-P9-NEXT:    blr
52;
53; CHECK-P8-BE-LABEL: load_swap01:
54; CHECK-P8-BE:       # %bb.0:
55; CHECK-P8-BE-NEXT:    lxvd2x v2, 0, r4
56; CHECK-P8-BE-NEXT:    xxswapd v2, v2
57; CHECK-P8-BE-NEXT:    blr
58;
59; CHECK-P9-BE-LABEL: load_swap01:
60; CHECK-P9-BE:       # %bb.0:
61; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
62; CHECK-P9-BE-NEXT:    xxswapd v2, v2
63; CHECK-P9-BE-NEXT:    blr
64  %v1 = load <2 x i64>, ptr %vp1
65  %v2 = load <2 x i64>, ptr %vp2
66  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
67  ret <2 x i64> %v3
68}
69
70define <4 x i32> @load_swap10(ptr %vp1, ptr %vp2) {
71; CHECK-P8-LABEL: load_swap10:
72; CHECK-P8:       # %bb.0:
73; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
74; CHECK-P8-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
75; CHECK-P8-NEXT:    addi r3, r3, .LCPI2_0@toc@l
76; CHECK-P8-NEXT:    xxswapd v2, vs0
77; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
78; CHECK-P8-NEXT:    xxswapd v3, vs0
79; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
80; CHECK-P8-NEXT:    blr
81;
82; CHECK-P9-LABEL: load_swap10:
83; CHECK-P9:       # %bb.0:
84; CHECK-P9-NEXT:    lxvw4x v2, 0, r3
85; CHECK-P9-NEXT:    blr
86;
87; CHECK-P8-BE-LABEL: load_swap10:
88; CHECK-P8-BE:       # %bb.0:
89; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
90; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
91; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
92; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
93; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
94; CHECK-P8-BE-NEXT:    blr
95;
96; CHECK-P9-BE-LABEL: load_swap10:
97; CHECK-P9-BE:       # %bb.0:
98; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
99; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
100; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
101; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
102; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
103; CHECK-P9-BE-NEXT:    blr
104  %v1 = load <4 x i32>, ptr %vp1
105  %v2 = load <4 x i32>, ptr %vp2
106  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
107  ret <4 x i32> %v3
108}
109
110define <4 x i32> @load_swap11(ptr %vp1, ptr %vp2) {
111; CHECK-P8-LABEL: load_swap11:
112; CHECK-P8:       # %bb.0:
113; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
114; CHECK-P8-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
115; CHECK-P8-NEXT:    addi r3, r3, .LCPI3_0@toc@l
116; CHECK-P8-NEXT:    xxswapd v2, vs0
117; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
118; CHECK-P8-NEXT:    xxswapd v3, vs0
119; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
120; CHECK-P8-NEXT:    blr
121;
122; CHECK-P9-LABEL: load_swap11:
123; CHECK-P9:       # %bb.0:
124; CHECK-P9-NEXT:    lxvw4x v2, 0, r4
125; CHECK-P9-NEXT:    blr
126;
127; CHECK-P8-BE-LABEL: load_swap11:
128; CHECK-P8-BE:       # %bb.0:
129; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
130; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
131; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
132; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
133; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
134; CHECK-P8-BE-NEXT:    blr
135;
136; CHECK-P9-BE-LABEL: load_swap11:
137; CHECK-P9-BE:       # %bb.0:
138; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
139; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
140; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
141; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
142; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
143; CHECK-P9-BE-NEXT:    blr
144  %v1 = load <4 x i32>, ptr %vp1
145  %v2 = load <4 x i32>, ptr %vp2
146  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
147  ret <4 x i32> %v3
148}
149
150define <8 x i16> @load_swap20(ptr %vp1, ptr %vp2){
151; CHECK-P8-LABEL: load_swap20:
152; CHECK-P8:       # %bb.0:
153; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
154; CHECK-P8-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
155; CHECK-P8-NEXT:    addi r3, r3, .LCPI4_0@toc@l
156; CHECK-P8-NEXT:    xxswapd v2, vs0
157; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
158; CHECK-P8-NEXT:    xxswapd v3, vs0
159; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
160; CHECK-P8-NEXT:    blr
161;
162; CHECK-P9-LABEL: load_swap20:
163; CHECK-P9:       # %bb.0:
164; CHECK-P9-NEXT:    lxvh8x v2, 0, r3
165; CHECK-P9-NEXT:    blr
166;
167; CHECK-P8-BE-LABEL: load_swap20:
168; CHECK-P8-BE:       # %bb.0:
169; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
170; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
171; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
172; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
173; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
174; CHECK-P8-BE-NEXT:    blr
175;
176; CHECK-P9-BE-LABEL: load_swap20:
177; CHECK-P9-BE:       # %bb.0:
178; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
179; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
180; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
181; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
182; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
183; CHECK-P9-BE-NEXT:    blr
184  %v1 = load <8 x i16>, ptr %vp1
185  %v2 = load <8 x i16>, ptr %vp2
186  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
187  ret <8 x i16> %v3
188}
189
190define <8 x i16> @load_swap21(ptr %vp1, ptr %vp2){
191; CHECK-P8-LABEL: load_swap21:
192; CHECK-P8:       # %bb.0:
193; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
194; CHECK-P8-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
195; CHECK-P8-NEXT:    addi r3, r3, .LCPI5_0@toc@l
196; CHECK-P8-NEXT:    xxswapd v2, vs0
197; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
198; CHECK-P8-NEXT:    xxswapd v3, vs0
199; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
200; CHECK-P8-NEXT:    blr
201;
202; CHECK-P9-LABEL: load_swap21:
203; CHECK-P9:       # %bb.0:
204; CHECK-P9-NEXT:    lxvh8x v2, 0, r4
205; CHECK-P9-NEXT:    blr
206;
207; CHECK-P8-BE-LABEL: load_swap21:
208; CHECK-P8-BE:       # %bb.0:
209; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
210; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
211; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
212; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
213; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
214; CHECK-P8-BE-NEXT:    blr
215;
216; CHECK-P9-BE-LABEL: load_swap21:
217; CHECK-P9-BE:       # %bb.0:
218; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
219; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
220; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
221; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
222; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
223; CHECK-P9-BE-NEXT:    blr
224  %v1 = load <8 x i16>, ptr %vp1
225  %v2 = load <8 x i16>, ptr %vp2
226  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
227  ret <8 x i16> %v3
228}
229
230define <16 x i8> @load_swap30(ptr %vp1, ptr %vp2){
231; CHECK-P8-LABEL: load_swap30:
232; CHECK-P8:       # %bb.0:
233; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
234; CHECK-P8-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
235; CHECK-P8-NEXT:    addi r3, r3, .LCPI6_0@toc@l
236; CHECK-P8-NEXT:    xxswapd v2, vs0
237; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
238; CHECK-P8-NEXT:    xxswapd v3, vs0
239; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
240; CHECK-P8-NEXT:    blr
241;
242; CHECK-P9-LABEL: load_swap30:
243; CHECK-P9:       # %bb.0:
244; CHECK-P9-NEXT:    lxvb16x v2, 0, r3
245; CHECK-P9-NEXT:    blr
246;
247; CHECK-P8-BE-LABEL: load_swap30:
248; CHECK-P8-BE:       # %bb.0:
249; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
250; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
251; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI6_0@toc@l
252; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
253; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
254; CHECK-P8-BE-NEXT:    blr
255;
256; CHECK-P9-BE-LABEL: load_swap30:
257; CHECK-P9-BE:       # %bb.0:
258; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
259; CHECK-P9-BE-NEXT:    xxbrq v2, vs0
260; CHECK-P9-BE-NEXT:    blr
261  %v1 = load <16 x i8>, ptr %vp1
262  %v2 = load <16 x i8>, ptr %vp2
263  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
264  ret <16 x i8> %v3
265}
266
267define <16 x i8> @load_swap31(ptr %vp1, ptr %vp2){
268; CHECK-P8-LABEL: load_swap31:
269; CHECK-P8:       # %bb.0:
270; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
271; CHECK-P8-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
272; CHECK-P8-NEXT:    addi r3, r3, .LCPI7_0@toc@l
273; CHECK-P8-NEXT:    xxswapd v2, vs0
274; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
275; CHECK-P8-NEXT:    xxswapd v3, vs0
276; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
277; CHECK-P8-NEXT:    blr
278;
279; CHECK-P9-LABEL: load_swap31:
280; CHECK-P9:       # %bb.0:
281; CHECK-P9-NEXT:    lxvb16x v2, 0, r4
282; CHECK-P9-NEXT:    blr
283;
284; CHECK-P8-BE-LABEL: load_swap31:
285; CHECK-P8-BE:       # %bb.0:
286; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI7_0@toc@ha
287; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
288; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI7_0@toc@l
289; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
290; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
291; CHECK-P8-BE-NEXT:    blr
292;
293; CHECK-P9-BE-LABEL: load_swap31:
294; CHECK-P9-BE:       # %bb.0:
295; CHECK-P9-BE-NEXT:    lxv vs0, 0(r4)
296; CHECK-P9-BE-NEXT:    xxbrq v2, vs0
297; CHECK-P9-BE-NEXT:    blr
298  %v1 = load <16 x i8>, ptr %vp1
299  %v2 = load <16 x i8>, ptr %vp2
300  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
301  ret <16 x i8> %v3
302}
303
304define <2 x double> @load_swap40(ptr %vp1, ptr %vp2) {
305; CHECK-P8-LABEL: load_swap40:
306; CHECK-P8:       # %bb.0:
307; CHECK-P8-NEXT:    lxvd2x v2, 0, r4
308; CHECK-P8-NEXT:    blr
309;
310; CHECK-P9-LABEL: load_swap40:
311; CHECK-P9:       # %bb.0:
312; CHECK-P9-NEXT:    lxvd2x v2, 0, r4
313; CHECK-P9-NEXT:    blr
314;
315; CHECK-P8-BE-LABEL: load_swap40:
316; CHECK-P8-BE:       # %bb.0:
317; CHECK-P8-BE-NEXT:    lxvd2x vs0, 0, r4
318; CHECK-P8-BE-NEXT:    xxswapd v2, vs0
319; CHECK-P8-BE-NEXT:    blr
320;
321; CHECK-P9-BE-LABEL: load_swap40:
322; CHECK-P9-BE:       # %bb.0:
323; CHECK-P9-BE-NEXT:    lxv vs0, 0(r4)
324; CHECK-P9-BE-NEXT:    xxswapd v2, vs0
325; CHECK-P9-BE-NEXT:    blr
326  %v1 = load <2 x double>, ptr %vp1
327  %v2 = load <2 x double>, ptr %vp2
328  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
329  ret <2 x double> %v3
330}
331
332define <4 x float> @load_swap50(ptr %vp1, ptr %vp2) {
333; CHECK-P8-LABEL: load_swap50:
334; CHECK-P8:       # %bb.0:
335; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
336; CHECK-P8-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
337; CHECK-P8-NEXT:    addi r3, r3, .LCPI9_0@toc@l
338; CHECK-P8-NEXT:    xxswapd v2, vs0
339; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
340; CHECK-P8-NEXT:    xxswapd v3, vs0
341; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
342; CHECK-P8-NEXT:    blr
343;
344; CHECK-P9-LABEL: load_swap50:
345; CHECK-P9:       # %bb.0:
346; CHECK-P9-NEXT:    lxvw4x v2, 0, r3
347; CHECK-P9-NEXT:    blr
348;
349; CHECK-P8-BE-LABEL: load_swap50:
350; CHECK-P8-BE:       # %bb.0:
351; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
352; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
353; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI9_0@toc@l
354; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
355; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
356; CHECK-P8-BE-NEXT:    blr
357;
358; CHECK-P9-BE-LABEL: load_swap50:
359; CHECK-P9-BE:       # %bb.0:
360; CHECK-P9-BE-NEXT:    lxv v2, 0(r3)
361; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
362; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI9_0@toc@l
363; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
364; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
365; CHECK-P9-BE-NEXT:    blr
366  %v1 = load <4 x float>, ptr %vp1
367  %v2 = load <4 x float>, ptr %vp2
368  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
369  ret <4 x float> %v3
370}
371
372define <4 x float> @load_swap51(ptr %vp1, ptr %vp2) {
373; CHECK-P8-LABEL: load_swap51:
374; CHECK-P8:       # %bb.0:
375; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
376; CHECK-P8-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
377; CHECK-P8-NEXT:    addi r3, r3, .LCPI10_0@toc@l
378; CHECK-P8-NEXT:    xxswapd v2, vs0
379; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
380; CHECK-P8-NEXT:    xxswapd v3, vs0
381; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
382; CHECK-P8-NEXT:    blr
383;
384; CHECK-P9-LABEL: load_swap51:
385; CHECK-P9:       # %bb.0:
386; CHECK-P9-NEXT:    lxvw4x v2, 0, r4
387; CHECK-P9-NEXT:    blr
388;
389; CHECK-P8-BE-LABEL: load_swap51:
390; CHECK-P8-BE:       # %bb.0:
391; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
392; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r4
393; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
394; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
395; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
396; CHECK-P8-BE-NEXT:    blr
397;
398; CHECK-P9-BE-LABEL: load_swap51:
399; CHECK-P9-BE:       # %bb.0:
400; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
401; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
402; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
403; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
404; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
405; CHECK-P9-BE-NEXT:    blr
406  %v1 = load <4 x float>, ptr %vp1
407  %v2 = load <4 x float>, ptr %vp2
408  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
409  ret <4 x float> %v3
410}
411
412define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, ptr %vp) {
413; CHECK-P8-LABEL: swap_store00:
414; CHECK-P8:       # %bb.0:
415; CHECK-P8-NEXT:    stxvd2x v2, 0, r7
416; CHECK-P8-NEXT:    blr
417;
418; CHECK-P9-LABEL: swap_store00:
419; CHECK-P9:       # %bb.0:
420; CHECK-P9-NEXT:    stxvd2x v2, 0, r7
421; CHECK-P9-NEXT:    blr
422;
423; CHECK-P8-BE-LABEL: swap_store00:
424; CHECK-P8-BE:       # %bb.0:
425; CHECK-P8-BE-NEXT:    xxswapd vs0, v2
426; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
427; CHECK-P8-BE-NEXT:    blr
428;
429; CHECK-P9-BE-LABEL: swap_store00:
430; CHECK-P9-BE:       # %bb.0:
431; CHECK-P9-BE-NEXT:    xxswapd vs0, v2
432; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
433; CHECK-P9-BE-NEXT:    blr
434  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 1, i32 0>
435  store <2 x i64> %v3, ptr %vp
436  ret void
437}
438
439define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, ptr %vp) {
440; CHECK-P8-LABEL: swap_store01:
441; CHECK-P8:       # %bb.0:
442; CHECK-P8-NEXT:    stxvd2x v3, 0, r7
443; CHECK-P8-NEXT:    blr
444;
445; CHECK-P9-LABEL: swap_store01:
446; CHECK-P9:       # %bb.0:
447; CHECK-P9-NEXT:    stxvd2x v3, 0, r7
448; CHECK-P9-NEXT:    blr
449;
450; CHECK-P8-BE-LABEL: swap_store01:
451; CHECK-P8-BE:       # %bb.0:
452; CHECK-P8-BE-NEXT:    xxswapd vs0, v3
453; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
454; CHECK-P8-BE-NEXT:    blr
455;
456; CHECK-P9-BE-LABEL: swap_store01:
457; CHECK-P9-BE:       # %bb.0:
458; CHECK-P9-BE-NEXT:    xxswapd vs0, v3
459; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
460; CHECK-P9-BE-NEXT:    blr
461  %v3 = shufflevector <2 x i64> %v1, <2 x i64> %v2, <2 x i32> <i32 3, i32 2>
462  store <2 x i64> %v3, ptr %vp
463  ret void
464}
465
466define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
467; CHECK-P8-LABEL: swap_store10:
468; CHECK-P8:       # %bb.0:
469; CHECK-P8-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
470; CHECK-P8-NEXT:    addi r3, r3, .LCPI13_0@toc@l
471; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
472; CHECK-P8-NEXT:    xxswapd v3, vs0
473; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
474; CHECK-P8-NEXT:    xxswapd vs0, v2
475; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
476; CHECK-P8-NEXT:    blr
477;
478; CHECK-P9-LABEL: swap_store10:
479; CHECK-P9:       # %bb.0:
480; CHECK-P9-NEXT:    stxvw4x v2, 0, r7
481; CHECK-P9-NEXT:    blr
482;
483; CHECK-P8-BE-LABEL: swap_store10:
484; CHECK-P8-BE:       # %bb.0:
485; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
486; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI13_0@toc@l
487; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
488; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
489; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
490; CHECK-P8-BE-NEXT:    blr
491;
492; CHECK-P9-BE-LABEL: swap_store10:
493; CHECK-P9-BE:       # %bb.0:
494; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
495; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI13_0@toc@l
496; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
497; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
498; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
499; CHECK-P9-BE-NEXT:    blr
500  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
501  store <4 x i32> %v3, ptr %vp
502  ret void
503}
504
505define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, ptr %vp) {
506; CHECK-P8-LABEL: swap_store11:
507; CHECK-P8:       # %bb.0:
508; CHECK-P8-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
509; CHECK-P8-NEXT:    addi r3, r3, .LCPI14_0@toc@l
510; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
511; CHECK-P8-NEXT:    xxswapd v2, vs0
512; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
513; CHECK-P8-NEXT:    xxswapd vs0, v2
514; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
515; CHECK-P8-NEXT:    blr
516;
517; CHECK-P9-LABEL: swap_store11:
518; CHECK-P9:       # %bb.0:
519; CHECK-P9-NEXT:    stxvw4x v3, 0, r7
520; CHECK-P9-NEXT:    blr
521;
522; CHECK-P8-BE-LABEL: swap_store11:
523; CHECK-P8-BE:       # %bb.0:
524; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
525; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI14_0@toc@l
526; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
527; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
528; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
529; CHECK-P8-BE-NEXT:    blr
530;
531; CHECK-P9-BE-LABEL: swap_store11:
532; CHECK-P9-BE:       # %bb.0:
533; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
534; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI14_0@toc@l
535; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
536; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
537; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
538; CHECK-P9-BE-NEXT:    blr
539  %v3 = shufflevector <4 x i32> %v1, <4 x i32> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
540  store <4 x i32> %v3, ptr %vp
541  ret void
542}
543
544define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
545; CHECK-P8-LABEL: swap_store20:
546; CHECK-P8:       # %bb.0:
547; CHECK-P8-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
548; CHECK-P8-NEXT:    addi r3, r3, .LCPI15_0@toc@l
549; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
550; CHECK-P8-NEXT:    xxswapd v3, vs0
551; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
552; CHECK-P8-NEXT:    xxswapd vs0, v2
553; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
554; CHECK-P8-NEXT:    blr
555;
556; CHECK-P9-LABEL: swap_store20:
557; CHECK-P9:       # %bb.0:
558; CHECK-P9-NEXT:    stxvh8x v2, 0, r7
559; CHECK-P9-NEXT:    blr
560;
561; CHECK-P8-BE-LABEL: swap_store20:
562; CHECK-P8-BE:       # %bb.0:
563; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
564; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI15_0@toc@l
565; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
566; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
567; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
568; CHECK-P8-BE-NEXT:    blr
569;
570; CHECK-P9-BE-LABEL: swap_store20:
571; CHECK-P9-BE:       # %bb.0:
572; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
573; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI15_0@toc@l
574; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
575; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
576; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
577; CHECK-P9-BE-NEXT:    blr
578  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
579  store <8 x i16> %v3, ptr %vp
580  ret void
581}
582
583define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, ptr %vp) {
584; CHECK-P8-LABEL: swap_store21:
585; CHECK-P8:       # %bb.0:
586; CHECK-P8-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
587; CHECK-P8-NEXT:    addi r3, r3, .LCPI16_0@toc@l
588; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
589; CHECK-P8-NEXT:    xxswapd v2, vs0
590; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
591; CHECK-P8-NEXT:    xxswapd vs0, v2
592; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
593; CHECK-P8-NEXT:    blr
594;
595; CHECK-P9-LABEL: swap_store21:
596; CHECK-P9:       # %bb.0:
597; CHECK-P9-NEXT:    stxvh8x v3, 0, r7
598; CHECK-P9-NEXT:    blr
599;
600; CHECK-P8-BE-LABEL: swap_store21:
601; CHECK-P8-BE:       # %bb.0:
602; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
603; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI16_0@toc@l
604; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
605; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
606; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
607; CHECK-P8-BE-NEXT:    blr
608;
609; CHECK-P9-BE-LABEL: swap_store21:
610; CHECK-P9-BE:       # %bb.0:
611; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
612; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI16_0@toc@l
613; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
614; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
615; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
616; CHECK-P9-BE-NEXT:    blr
617  %v3 = shufflevector <8 x i16> %v1, <8 x i16> %v2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
618  store <8 x i16> %v3, ptr %vp
619  ret void
620}
621
622define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, ptr %vp) {
623; CHECK-P8-LABEL: swap_store30:
624; CHECK-P8:       # %bb.0:
625; CHECK-P8-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
626; CHECK-P8-NEXT:    addi r3, r3, .LCPI17_0@toc@l
627; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
628; CHECK-P8-NEXT:    xxswapd v3, vs0
629; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
630; CHECK-P8-NEXT:    xxswapd vs0, v2
631; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
632; CHECK-P8-NEXT:    blr
633;
634; CHECK-P9-LABEL: swap_store30:
635; CHECK-P9:       # %bb.0:
636; CHECK-P9-NEXT:    stxvb16x v2, 0, r7
637; CHECK-P9-NEXT:    blr
638;
639; CHECK-P8-BE-LABEL: swap_store30:
640; CHECK-P8-BE:       # %bb.0:
641; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI17_0@toc@ha
642; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI17_0@toc@l
643; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
644; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
645; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
646; CHECK-P8-BE-NEXT:    blr
647;
648; CHECK-P9-BE-LABEL: swap_store30:
649; CHECK-P9-BE:       # %bb.0:
650; CHECK-P9-BE-NEXT:    xxbrq vs0, v2
651; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
652; CHECK-P9-BE-NEXT:    blr
653  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
654  store <16 x i8> %v3, ptr %vp
655  ret void
656}
657
658define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, ptr %vp) {
659; CHECK-P8-LABEL: swap_store31:
660; CHECK-P8:       # %bb.0:
661; CHECK-P8-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
662; CHECK-P8-NEXT:    addi r3, r3, .LCPI18_0@toc@l
663; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
664; CHECK-P8-NEXT:    xxswapd v2, vs0
665; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
666; CHECK-P8-NEXT:    xxswapd vs0, v2
667; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
668; CHECK-P8-NEXT:    blr
669;
670; CHECK-P9-LABEL: swap_store31:
671; CHECK-P9:       # %bb.0:
672; CHECK-P9-NEXT:    stxvb16x v3, 0, r7
673; CHECK-P9-NEXT:    blr
674;
675; CHECK-P8-BE-LABEL: swap_store31:
676; CHECK-P8-BE:       # %bb.0:
677; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI18_0@toc@ha
678; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI18_0@toc@l
679; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
680; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
681; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
682; CHECK-P8-BE-NEXT:    blr
683;
684; CHECK-P9-BE-LABEL: swap_store31:
685; CHECK-P9-BE:       # %bb.0:
686; CHECK-P9-BE-NEXT:    xxbrq vs0, v3
687; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
688; CHECK-P9-BE-NEXT:    blr
689  %v3 = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16>
690  store <16 x i8> %v3, ptr %vp
691  ret void
692}
693
694define void @swap_store40(<2 x double> %v1, <2 x double> %v2, ptr %vp) {
695; CHECK-P8-LABEL: swap_store40:
696; CHECK-P8:       # %bb.0:
697; CHECK-P8-NEXT:    stxvd2x v2, 0, r7
698; CHECK-P8-NEXT:    blr
699;
700; CHECK-P9-LABEL: swap_store40:
701; CHECK-P9:       # %bb.0:
702; CHECK-P9-NEXT:    stxvd2x v2, 0, r7
703; CHECK-P9-NEXT:    blr
704;
705; CHECK-P8-BE-LABEL: swap_store40:
706; CHECK-P8-BE:       # %bb.0:
707; CHECK-P8-BE-NEXT:    xxswapd vs0, v2
708; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
709; CHECK-P8-BE-NEXT:    blr
710;
711; CHECK-P9-BE-LABEL: swap_store40:
712; CHECK-P9-BE:       # %bb.0:
713; CHECK-P9-BE-NEXT:    xxswapd vs0, v2
714; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
715; CHECK-P9-BE-NEXT:    blr
716  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 1, i32 0>
717  store <2 x double> %v3, ptr %vp
718  ret void
719}
720
721define void @swap_store41(<2 x double> %v1, <2 x double> %v2, ptr %vp) {
722; CHECK-P8-LABEL: swap_store41:
723; CHECK-P8:       # %bb.0:
724; CHECK-P8-NEXT:    stxvd2x v3, 0, r7
725; CHECK-P8-NEXT:    blr
726;
727; CHECK-P9-LABEL: swap_store41:
728; CHECK-P9:       # %bb.0:
729; CHECK-P9-NEXT:    stxvd2x v3, 0, r7
730; CHECK-P9-NEXT:    blr
731;
732; CHECK-P8-BE-LABEL: swap_store41:
733; CHECK-P8-BE:       # %bb.0:
734; CHECK-P8-BE-NEXT:    xxswapd vs0, v3
735; CHECK-P8-BE-NEXT:    stxvd2x vs0, 0, r7
736; CHECK-P8-BE-NEXT:    blr
737;
738; CHECK-P9-BE-LABEL: swap_store41:
739; CHECK-P9-BE:       # %bb.0:
740; CHECK-P9-BE-NEXT:    xxswapd vs0, v3
741; CHECK-P9-BE-NEXT:    stxv vs0, 0(r7)
742; CHECK-P9-BE-NEXT:    blr
743  %v3 = shufflevector <2 x double> %v1, <2 x double> %v2, <2 x i32> <i32 3, i32 2>
744  store <2 x double> %v3, ptr %vp
745  ret void
746}
747
748define void @swap_store50(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
749; CHECK-P8-LABEL: swap_store50:
750; CHECK-P8:       # %bb.0:
751; CHECK-P8-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
752; CHECK-P8-NEXT:    addi r3, r3, .LCPI21_0@toc@l
753; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
754; CHECK-P8-NEXT:    xxswapd v3, vs0
755; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
756; CHECK-P8-NEXT:    xxswapd vs0, v2
757; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
758; CHECK-P8-NEXT:    blr
759;
760; CHECK-P9-LABEL: swap_store50:
761; CHECK-P9:       # %bb.0:
762; CHECK-P9-NEXT:    stxvw4x v2, 0, r7
763; CHECK-P9-NEXT:    blr
764;
765; CHECK-P8-BE-LABEL: swap_store50:
766; CHECK-P8-BE:       # %bb.0:
767; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
768; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI21_0@toc@l
769; CHECK-P8-BE-NEXT:    lxvw4x v3, 0, r3
770; CHECK-P8-BE-NEXT:    vperm v2, v2, v2, v3
771; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
772; CHECK-P8-BE-NEXT:    blr
773;
774; CHECK-P9-BE-LABEL: swap_store50:
775; CHECK-P9-BE:       # %bb.0:
776; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI21_0@toc@ha
777; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI21_0@toc@l
778; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
779; CHECK-P9-BE-NEXT:    xxperm v2, v2, vs0
780; CHECK-P9-BE-NEXT:    stxv v2, 0(r7)
781; CHECK-P9-BE-NEXT:    blr
782  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
783  store <4 x float> %v3, ptr %vp
784  ret void
785}
786
787define void @swap_store51(<4 x float> %v1, <4 x float> %v2, ptr %vp) {
788; CHECK-P8-LABEL: swap_store51:
789; CHECK-P8:       # %bb.0:
790; CHECK-P8-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
791; CHECK-P8-NEXT:    addi r3, r3, .LCPI22_0@toc@l
792; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
793; CHECK-P8-NEXT:    xxswapd v2, vs0
794; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
795; CHECK-P8-NEXT:    xxswapd vs0, v2
796; CHECK-P8-NEXT:    stxvd2x vs0, 0, r7
797; CHECK-P8-NEXT:    blr
798;
799; CHECK-P9-LABEL: swap_store51:
800; CHECK-P9:       # %bb.0:
801; CHECK-P9-NEXT:    stxvw4x v3, 0, r7
802; CHECK-P9-NEXT:    blr
803;
804; CHECK-P8-BE-LABEL: swap_store51:
805; CHECK-P8-BE:       # %bb.0:
806; CHECK-P8-BE-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
807; CHECK-P8-BE-NEXT:    addi r3, r3, .LCPI22_0@toc@l
808; CHECK-P8-BE-NEXT:    lxvw4x v2, 0, r3
809; CHECK-P8-BE-NEXT:    vperm v2, v3, v3, v2
810; CHECK-P8-BE-NEXT:    stxvw4x v2, 0, r7
811; CHECK-P8-BE-NEXT:    blr
812;
813; CHECK-P9-BE-LABEL: swap_store51:
814; CHECK-P9-BE:       # %bb.0:
815; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI22_0@toc@ha
816; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI22_0@toc@l
817; CHECK-P9-BE-NEXT:    lxv vs0, 0(r3)
818; CHECK-P9-BE-NEXT:    xxperm v3, v3, vs0
819; CHECK-P9-BE-NEXT:    stxv v3, 0(r7)
820; CHECK-P9-BE-NEXT:    blr
821  %v3 = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
822  store <4 x float> %v3, ptr %vp
823  ret void
824}
825