xref: /llvm-project/llvm/test/CodeGen/SystemZ/store-replicated-vals.ll (revision a65ccc1b9fe740c9f65d9cf2b627de50278aad56)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
3;
4; Test storing of replicated values using vector replicate type instructions.
5
6;; Replicated registers
7
8define void @fun_2x1b(ptr %Src, ptr %Dst) {
9; CHECK-LABEL: fun_2x1b:
10; CHECK:       # %bb.0:
11; CHECK-NEXT:    vlrepb %v0, 0(%r2)
12; CHECK-NEXT:    vsteh %v0, 0(%r3), 0
13; CHECK-NEXT:    br %r14
14 %i = load i8, ptr %Src
15 %ZE = zext i8 %i to i16
16 %Val = mul i16 %ZE, 257
17 store i16 %Val, ptr %Dst
18 ret void
19}
20
21; Test multiple stores of same value.
22define void @fun_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
23; CHECK-LABEL: fun_4x1b:
24; CHECK:       # %bb.0:
25; CHECK-NEXT:    vlrepb %v0, 0(%r2)
26; CHECK-NEXT:    vstef %v0, 0(%r3), 0
27; CHECK-NEXT:    vstef %v0, 0(%r4), 0
28; CHECK-NEXT:    br %r14
29 %i = load i8, ptr %Src
30 %ZE = zext i8 %i to i32
31 %Val = mul i32 %ZE, 16843009
32 store i32 %Val, ptr %Dst
33 store i32 %Val, ptr %Dst2
34 ret void
35}
36
37define void @fun_8x1b(ptr %Src, ptr %Dst) {
38; CHECK-LABEL: fun_8x1b:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    vlrepb %v0, 0(%r2)
41; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
42; CHECK-NEXT:    br %r14
43 %i = load i8, ptr %Src
44 %ZE = zext i8 %i to i64
45 %Val = mul i64 %ZE, 72340172838076673
46 store i64 %Val, ptr %Dst
47 ret void
48}
49
50; A second truncated store of same value.
51define void @fun_8x1b_4x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
52; CHECK-LABEL: fun_8x1b_4x1b:
53; CHECK:       # %bb.0:
54; CHECK-NEXT:    vlrepb %v0, 0(%r2)
55; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
56; CHECK-NEXT:    vstef %v0, 0(%r4), 0
57; CHECK-NEXT:    br %r14
58 %i = load i8, ptr %Src
59 %ZE = zext i8 %i to i64
60 %Val = mul i64 %ZE, 72340172838076673
61 store i64 %Val, ptr %Dst
62 %TrVal = trunc i64 %Val to i32
63 store i32 %TrVal, ptr %Dst2
64 ret void
65}
66
67define void @fun_2x2b(ptr %Src, ptr %Dst) {
68; CHECK-LABEL: fun_2x2b:
69; CHECK:       # %bb.0:
70; CHECK-NEXT:    vlreph %v0, 0(%r2)
71; CHECK-NEXT:    vstef %v0, 0(%r3), 0
72; CHECK-NEXT:    br %r14
73 %i = load i16, ptr %Src
74 %ZE = zext i16 %i to i32
75 %Val = mul i32 %ZE, 65537
76 store i32 %Val, ptr %Dst
77 ret void
78}
79
80define void @fun_4x2b(ptr %Src, ptr %Dst) {
81; CHECK-LABEL: fun_4x2b:
82; CHECK:       # %bb.0:
83; CHECK-NEXT:    vlreph %v0, 0(%r2)
84; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
85; CHECK-NEXT:    br %r14
86 %i = load i16, ptr %Src
87 %ZE = zext i16 %i to i64
88 %Val = mul i64 %ZE, 281479271743489
89 store i64 %Val, ptr %Dst
90 ret void
91}
92
93define void @fun_2x4b(ptr %Src, ptr %Dst) {
94; CHECK-LABEL: fun_2x4b:
95; CHECK:       # %bb.0:
96; CHECK-NEXT:    vlrepf %v0, 0(%r2)
97; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
98; CHECK-NEXT:    br %r14
99 %i = load i32, ptr %Src
100 %ZE = zext i32 %i to i64
101 %Val = mul i64 %ZE, 4294967297
102 store i64 %Val, ptr %Dst
103 ret void
104}
105
106;; Replicated registers already in a vector.
107
108; Test multiple stores of same value.
109define void @fun_2Eltsx8x1b(ptr %Src, ptr %Dst, ptr %Dst2) {
110; CHECK-LABEL: fun_2Eltsx8x1b:
111; CHECK:       # %bb.0:
112; CHECK-NEXT:    vlrepb %v0, 0(%r2)
113; CHECK-NEXT:    vst %v0, 0(%r3), 3
114; CHECK-NEXT:    vst %v0, 0(%r4), 3
115; CHECK-NEXT:    br %r14
116 %i = load i8, ptr %Src
117 %ZE = zext i8 %i to i64
118 %Mul = mul i64 %ZE, 72340172838076673
119 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
120 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
121 store <2 x i64> %Val, ptr %Dst
122 store <2 x i64> %Val, ptr %Dst2
123 ret void
124}
125
126define void @fun_4Eltsx2x2b(ptr %Src, ptr %Dst) {
127; CHECK-LABEL: fun_4Eltsx2x2b:
128; CHECK:       # %bb.0:
129; CHECK-NEXT:    vlreph %v0, 0(%r2)
130; CHECK-NEXT:    vst %v0, 0(%r3), 3
131; CHECK-NEXT:    br %r14
132 %i = load i16, ptr %Src
133 %ZE = zext i16 %i to i32
134 %Mul = mul i32 %ZE, 65537
135 %tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0
136 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
137 store <4 x i32> %Val, ptr %Dst
138 ret void
139}
140
141define void @fun_6Eltsx2x2b(ptr %Src, ptr %Dst) {
142; CHECK-LABEL: fun_6Eltsx2x2b:
143; CHECK:       # %bb.0:
144; CHECK-NEXT:    vlreph %v0, 0(%r2)
145; CHECK-NEXT:    vsteg %v0, 16(%r3), 0
146; CHECK-NEXT:    vst %v0, 0(%r3), 4
147; CHECK-NEXT:    br %r14
148 %i = load i16, ptr %Src
149 %ZE = zext i16 %i to i32
150 %Mul = mul i32 %ZE, 65537
151 %tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0
152 %Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer
153 store <6 x i32> %Val, ptr %Dst
154 ret void
155}
156
157define void @fun_2Eltsx2x4b(ptr %Src, ptr %Dst) {
158; CHECK-LABEL: fun_2Eltsx2x4b:
159; CHECK:       # %bb.0:
160; CHECK-NEXT:    vlrepf %v0, 0(%r2)
161; CHECK-NEXT:    vst %v0, 0(%r3), 3
162; CHECK-NEXT:    br %r14
163 %i = load i32, ptr %Src
164 %ZE = zext i32 %i to i64
165 %Mul = mul i64 %ZE, 4294967297
166 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
167 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
168 store <2 x i64> %Val, ptr %Dst
169 ret void
170}
171
172define void @fun_5Eltsx2x4b(ptr %Src, ptr %Dst) {
173; CHECK-LABEL: fun_5Eltsx2x4b:
174; CHECK:       # %bb.0:
175; CHECK-NEXT:    vlrepf %v0, 0(%r2)
176; CHECK-NEXT:    vsteg %v0, 32(%r3), 0
177; CHECK-NEXT:    vst %v0, 16(%r3), 4
178; CHECK-NEXT:    vst %v0, 0(%r3), 4
179; CHECK-NEXT:    br %r14
180 %i = load i32, ptr %Src
181 %ZE = zext i32 %i to i64
182 %Mul = mul i64 %ZE, 4294967297
183 %tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0
184 %Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer
185 store <5 x i64> %Val, ptr %Dst
186 ret void
187}
188
189; Test replicating an incoming argument.
190define void @fun_8x1b_arg(i8 %Arg, ptr %Dst) {
191; CHECK-LABEL: fun_8x1b_arg:
192; CHECK:       # %bb.0:
193; CHECK-NEXT:    vlvgp %v0, %r2, %r2
194; CHECK-NEXT:    vrepb %v0, %v0, 7
195; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
196; CHECK-NEXT:    br %r14
197 %ZE = zext i8 %Arg to i64
198 %Val = mul i64 %ZE, 72340172838076673
199 store i64 %Val, ptr %Dst
200 ret void
201}
202
203; A replication of a non-local value (ISD::AssertZext case).
204define void @fun_nonlocalval() {
205; CHECK-LABEL: fun_nonlocalval:
206; CHECK:       # %bb.0:
207; CHECK-NEXT:    lhi %r0, 0
208; CHECK-NEXT:    ciblh %r0, 0, 0(%r14)
209; CHECK-NEXT:  .LBB13_1: # %bb2
210; CHECK-NEXT:    llgf %r0, 0(%r1)
211; CHECK-NEXT:    vlvgp %v0, %r0, %r0
212; CHECK-NEXT:    vrepf %v0, %v0, 1
213; CHECK-NEXT:    vst %v0, 0(%r1), 3
214; CHECK-NEXT:    br %r14
215  %i = load i32, ptr undef, align 4
216  br i1 undef, label %bb2, label %bb7
217
218bb2:                                              ; preds = %bb1
219  %i3 = zext i32 %i to i64
220  %i4 = mul nuw i64 %i3, 4294967297
221  %i5 = insertelement <2 x i64> poison, i64 %i4, i64 0
222  %i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer
223  store <2 x i64> %i6, ptr undef, align 8
224  ret void
225
226bb7:
227  ret void
228}
229
230;; Replicated immediates
231
232; Some cases where scalar instruction is better
233define void @fun_8x1i_zero(ptr %Dst) {
234; CHECK-LABEL: fun_8x1i_zero:
235; CHECK:       # %bb.0:
236; CHECK-NEXT:    mvghi 0(%r2), 0
237; CHECK-NEXT:    br %r14
238 store i64 0, ptr %Dst
239 ret void
240}
241
242define void @fun_4x1i_minus1(ptr %Dst) {
243; CHECK-LABEL: fun_4x1i_minus1:
244; CHECK:       # %bb.0:
245; CHECK-NEXT:    mvhi 0(%r2), -1
246; CHECK-NEXT:    br %r14
247 store i32 -1, ptr %Dst
248 ret void
249}
250
251define void @fun_4x1i_allones(ptr %Dst) {
252; CHECK-LABEL: fun_4x1i_allones:
253; CHECK:       # %bb.0:
254; CHECK-NEXT:    mvhi 0(%r2), -1
255; CHECK-NEXT:    br %r14
256 store i32 4294967295, ptr %Dst
257 ret void
258}
259
260define void @fun_2i(ptr %Dst) {
261; CHECK-LABEL: fun_2i:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    mvhhi 0(%r2), 1
264; CHECK-NEXT:    br %r14
265 store i16 1, ptr %Dst
266 ret void
267}
268
269define void @fun_2x2i(ptr %Dst) {
270; CHECK-LABEL: fun_2x2i:
271; CHECK:       # %bb.0:
272; CHECK-NEXT:    vrepih %v0, 1
273; CHECK-NEXT:    vstef %v0, 0(%r2), 0
274; CHECK-NEXT:    br %r14
275 store i32 65537, ptr %Dst
276 ret void
277}
278
279define void @fun_4x2i(ptr %Dst) {
280; CHECK-LABEL: fun_4x2i:
281; CHECK:       # %bb.0:
282; CHECK-NEXT:    vrepih %v0, 1
283; CHECK-NEXT:    vsteg %v0, 0(%r2), 0
284; CHECK-NEXT:    br %r14
285 store i64 281479271743489, ptr %Dst
286 ret void
287}
288
289define void @fun_2x4i(ptr %Dst) {
290; CHECK-LABEL: fun_2x4i:
291; CHECK:       # %bb.0:
292; CHECK-NEXT:    vrepif %v0, 1
293; CHECK-NEXT:    vsteg %v0, 0(%r2), 0
294; CHECK-NEXT:    br %r14
295 store i64 4294967297, ptr %Dst
296 ret void
297}
298
299; Store replicated immediate twice using the same vector.
300define void @fun_4x1i(ptr %Dst, ptr %Dst2) {
301; CHECK-LABEL: fun_4x1i:
302; CHECK:       # %bb.0:
303; CHECK-NEXT:    vrepib %v0, 3
304; CHECK-NEXT:    vstef %v0, 0(%r2), 0
305; CHECK-NEXT:    vstef %v0, 0(%r3), 0
306; CHECK-NEXT:    br %r14
307 store i32 50529027, ptr %Dst
308 store i32 50529027, ptr %Dst2
309 ret void
310}
311
312define void @fun_8x1i(ptr %Dst, ptr %Dst2) {
313; CHECK-LABEL: fun_8x1i:
314; CHECK:       # %bb.0:
315; CHECK-NEXT:    vrepib %v0, 1
316; CHECK-NEXT:    vsteg %v0, 0(%r2), 0
317; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
318; CHECK-NEXT:    br %r14
319 store i64 72340172838076673, ptr %Dst
320 store i64 72340172838076673, ptr %Dst2
321 ret void
322}
323
324; Similar, but with vectors.
325define void @fun_4Eltsx4x1i_2Eltsx4x1i(ptr %Dst, ptr %Dst2) {
326; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i:
327; CHECK:       # %bb.0:
328; CHECK-NEXT:    vrepib %v0, 3
329; CHECK-NEXT:    vst %v0, 0(%r2), 3
330; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
331; CHECK-NEXT:    br %r14
332 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
333 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
334 store <4 x i32> %Val, ptr %Dst
335 %tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0
336 %Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
337 store <2 x i32> %Val2, ptr %Dst2
338 ret void
339}
340
341; Same, but 64-bit store is scalar.
342define void @fun_4Eltsx4x1i_8x1i(ptr %Dst, ptr %Dst2) {
343; CHECK-LABEL: fun_4Eltsx4x1i_8x1i:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    vrepib %v0, 3
346; CHECK-NEXT:    vst %v0, 0(%r2), 3
347; CHECK-NEXT:    vsteg %v0, 0(%r3), 0
348; CHECK-NEXT:    br %r14
349 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
350 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
351 store <4 x i32> %Val, ptr %Dst
352 store i64 217020518514230019, ptr %Dst2
353 ret void
354}
355
356define void @fun_3Eltsx2x4i(ptr %Dst) {
357; CHECK-LABEL: fun_3Eltsx2x4i:
358; CHECK:       # %bb.0:
359; CHECK-NEXT:    vrepif %v0, 1
360; CHECK-NEXT:    vsteg %v0, 16(%r2), 0
361; CHECK-NEXT:    vst %v0, 0(%r2), 4
362; CHECK-NEXT:    br %r14
363 %tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0
364 %Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer
365 store <3 x i64> %Val, ptr %Dst
366 ret void
367}
368
369define void @fun_16x1i(ptr %Dst) {
370; CHECK-LABEL: fun_16x1i:
371; CHECK:       # %bb.0:
372; CHECK-NEXT:    vrepib %v0, 1
373; CHECK-NEXT:    vst %v0, 0(%r2), 3
374; CHECK-NEXT:    br %r14
375 store i128 1334440654591915542993625911497130241, ptr %Dst
376 ret void
377}
378