xref: /llvm-project/llvm/test/CodeGen/PowerPC/P10-stack-alignment.ll (revision a51712751c184ebe056718c938d2526693a31564)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN:   FileCheck %s --check-prefix=CHECK-LE
5; RUN: llc -verify-machineinstrs -mcpu=ppc -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN:   FileCheck %s --check-prefix=CHECK-BE
8; RUN: opt --passes='sroa,loop-vectorize,loop-unroll,instcombine' -S \
9; RUN: -vectorizer-maximize-bandwidth --mtriple=powerpc64le-- -mcpu=pwr10 < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-OPT
11
12target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
13
14define dso_local signext i32 @test_32byte_vector() nounwind {
15; CHECK-LE-LABEL: test_32byte_vector:
16; CHECK-LE:       # %bb.0: # %entry
17; CHECK-LE-NEXT:    mflr r0
18; CHECK-LE-NEXT:    std r30, -16(r1)
19; CHECK-LE-NEXT:    mr r30, r1
20; CHECK-LE-NEXT:    std r0, 16(r1)
21; CHECK-LE-NEXT:    clrldi r0, r1, 59
22; CHECK-LE-NEXT:    subfic r0, r0, -96
23; CHECK-LE-NEXT:    stdux r1, r1, r0
24; CHECK-LE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
25; CHECK-LE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
26; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
27; CHECK-LE-NEXT:    addi r3, r1, 48
28; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
29; CHECK-LE-NEXT:    addis r3, r2, .LCPI0_1@toc@ha
30; CHECK-LE-NEXT:    addi r3, r3, .LCPI0_1@toc@l
31; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
32; CHECK-LE-NEXT:    addi r3, r1, 32
33; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
34; CHECK-LE-NEXT:    bl test
35; CHECK-LE-NEXT:    nop
36; CHECK-LE-NEXT:    lwa r3, 32(r1)
37; CHECK-LE-NEXT:    mr r1, r30
38; CHECK-LE-NEXT:    ld r0, 16(r1)
39; CHECK-LE-NEXT:    ld r30, -16(r1)
40; CHECK-LE-NEXT:    mtlr r0
41; CHECK-LE-NEXT:    blr
42;
43; CHECK-BE-LABEL: test_32byte_vector:
44; CHECK-BE:       # %bb.0: # %entry
45; CHECK-BE-NEXT:    mflr r0
46; CHECK-BE-NEXT:    std r30, -16(r1)
47; CHECK-BE-NEXT:    std r0, 16(r1)
48; CHECK-BE-NEXT:    clrldi r0, r1, 59
49; CHECK-BE-NEXT:    mr r30, r1
50; CHECK-BE-NEXT:    subfic r0, r0, -192
51; CHECK-BE-NEXT:    stdux r1, r1, r0
52; CHECK-BE-NEXT:    lis r3, -8192
53; CHECK-BE-NEXT:    li r4, 5
54; CHECK-BE-NEXT:    lis r5, -16384
55; CHECK-BE-NEXT:    lis r6, -32768
56; CHECK-BE-NEXT:    ori r3, r3, 1
57; CHECK-BE-NEXT:    rldic r4, r4, 32, 29
58; CHECK-BE-NEXT:    ori r5, r5, 1
59; CHECK-BE-NEXT:    ori r6, r6, 1
60; CHECK-BE-NEXT:    rldic r3, r3, 3, 29
61; CHECK-BE-NEXT:    ori r4, r4, 6
62; CHECK-BE-NEXT:    rldic r5, r5, 2, 30
63; CHECK-BE-NEXT:    rldic r6, r6, 1, 31
64; CHECK-BE-NEXT:    std r3, 152(r1)
65; CHECK-BE-NEXT:    addi r3, r1, 128
66; CHECK-BE-NEXT:    std r4, 144(r1)
67; CHECK-BE-NEXT:    std r5, 136(r1)
68; CHECK-BE-NEXT:    std r6, 128(r1)
69; CHECK-BE-NEXT:    bl test
70; CHECK-BE-NEXT:    nop
71; CHECK-BE-NEXT:    lwa r3, 128(r1)
72; CHECK-BE-NEXT:    mr r1, r30
73; CHECK-BE-NEXT:    ld r0, 16(r1)
74; CHECK-BE-NEXT:    ld r30, -16(r1)
75; CHECK-BE-NEXT:    mtlr r0
76; CHECK-BE-NEXT:    blr
77entry:
78  %a = alloca <8 x i32>, align 32
79  call void @llvm.lifetime.start.p0(i64 32, ptr %a)
80  store <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>, ptr %a, align 32
81  call void @test(ptr %a)
82  %0 = load <8 x i32>, ptr %a, align 32
83  %vecext = extractelement <8 x i32> %0, i32 0
84  call void @llvm.lifetime.end.p0(i64 32, ptr %a)
85  ret i32 %vecext
86}
87
88define dso_local signext i32 @test_32byte_aligned_vector() nounwind {
89; CHECK-LE-LABEL: test_32byte_aligned_vector:
90; CHECK-LE:       # %bb.0: # %entry
91; CHECK-LE-NEXT:    mflr r0
92; CHECK-LE-NEXT:    std r30, -16(r1)
93; CHECK-LE-NEXT:    mr r30, r1
94; CHECK-LE-NEXT:    std r0, 16(r1)
95; CHECK-LE-NEXT:    clrldi r0, r1, 59
96; CHECK-LE-NEXT:    subfic r0, r0, -64
97; CHECK-LE-NEXT:    stdux r1, r1, r0
98; CHECK-LE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
99; CHECK-LE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
100; CHECK-LE-NEXT:    lxvd2x vs0, 0, r3
101; CHECK-LE-NEXT:    addi r3, r1, 32
102; CHECK-LE-NEXT:    stxvd2x vs0, 0, r3
103; CHECK-LE-NEXT:    bl test1
104; CHECK-LE-NEXT:    nop
105; CHECK-LE-NEXT:    lwa r3, 32(r1)
106; CHECK-LE-NEXT:    mr r1, r30
107; CHECK-LE-NEXT:    ld r0, 16(r1)
108; CHECK-LE-NEXT:    ld r30, -16(r1)
109; CHECK-LE-NEXT:    mtlr r0
110; CHECK-LE-NEXT:    blr
111;
112; CHECK-BE-LABEL: test_32byte_aligned_vector:
113; CHECK-BE:       # %bb.0: # %entry
114; CHECK-BE-NEXT:    mflr r0
115; CHECK-BE-NEXT:    std r30, -16(r1)
116; CHECK-BE-NEXT:    std r0, 16(r1)
117; CHECK-BE-NEXT:    clrldi r0, r1, 59
118; CHECK-BE-NEXT:    mr r30, r1
119; CHECK-BE-NEXT:    subfic r0, r0, -160
120; CHECK-BE-NEXT:    stdux r1, r1, r0
121; CHECK-BE-NEXT:    lis r3, -16384
122; CHECK-BE-NEXT:    lis r4, -32768
123; CHECK-BE-NEXT:    ori r3, r3, 1
124; CHECK-BE-NEXT:    ori r4, r4, 1
125; CHECK-BE-NEXT:    rldic r3, r3, 2, 30
126; CHECK-BE-NEXT:    rldic r4, r4, 1, 31
127; CHECK-BE-NEXT:    std r3, 136(r1)
128; CHECK-BE-NEXT:    addi r3, r1, 128
129; CHECK-BE-NEXT:    std r4, 128(r1)
130; CHECK-BE-NEXT:    bl test1
131; CHECK-BE-NEXT:    nop
132; CHECK-BE-NEXT:    lwa r3, 128(r1)
133; CHECK-BE-NEXT:    mr r1, r30
134; CHECK-BE-NEXT:    ld r0, 16(r1)
135; CHECK-BE-NEXT:    ld r30, -16(r1)
136; CHECK-BE-NEXT:    mtlr r0
137; CHECK-BE-NEXT:    blr
138entry:
139  %a = alloca <4 x i32>, align 32
140  call void @llvm.lifetime.start.p0(i64 16, ptr %a)
141  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr %a, align 32
142  call void @test1(ptr %a)
143  %0 = load <4 x i32>, ptr %a, align 32
144  %vecext = extractelement <4 x i32> %0, i32 0
145  call void @llvm.lifetime.end.p0(i64 16, ptr %a)
146  ret i32 %vecext
147}
148
149
150@Arr1 = dso_local global [64 x i8] zeroinitializer, align 1
151
152define dso_local void @test_Array() nounwind {
153; CHECK-OPT-LABEL: @test_Array(
154; CHECK-OPT-NEXT: entry:
155; CHECK-OPT-NEXT: %Arr2 = alloca [64 x i16], align 2
156; CHECK-OPT: store <16 x i16> [[TMP0:%.*]], ptr [[TMP0:%.*]], align 2
157; CHECK-LE-LABEL: test_Array:
158; CHECK-LE:       # %bb.0: # %entry
159; CHECK-LE-NEXT:    mflr r0
160; CHECK-LE-NEXT:    stdu r1, -176(r1)
161; CHECK-LE-NEXT:    li r3, 0
162; CHECK-LE-NEXT:    addis r4, r2, Arr1@toc@ha
163; CHECK-LE-NEXT:    li r6, 65
164; CHECK-LE-NEXT:    std r0, 192(r1)
165; CHECK-LE-NEXT:    addi r5, r1, 46
166; CHECK-LE-NEXT:    stw r3, 44(r1)
167; CHECK-LE-NEXT:    addi r4, r4, Arr1@toc@l
168; CHECK-LE-NEXT:    mtctr r6
169; CHECK-LE-NEXT:    addi r4, r4, -1
170; CHECK-LE-NEXT:    bdz .LBB2_2
171; CHECK-LE-NEXT:    .p2align 5
172; CHECK-LE-NEXT:  .LBB2_1: # %for.body
173; CHECK-LE-NEXT:    #
174; CHECK-LE-NEXT:    lbz r6, 1(r4)
175; CHECK-LE-NEXT:    addi r4, r4, 1
176; CHECK-LE-NEXT:    addi r3, r3, 1
177; CHECK-LE-NEXT:    sth r6, 2(r5)
178; CHECK-LE-NEXT:    addi r5, r5, 2
179; CHECK-LE-NEXT:    bdnz .LBB2_1
180; CHECK-LE-NEXT:  .LBB2_2: # %for.cond.cleanup
181; CHECK-LE-NEXT:    addi r3, r1, 48
182; CHECK-LE-NEXT:    bl test_arr
183; CHECK-LE-NEXT:    nop
184; CHECK-LE-NEXT:    addi r1, r1, 176
185; CHECK-LE-NEXT:    ld r0, 16(r1)
186; CHECK-LE-NEXT:    mtlr r0
187; CHECK-LE-NEXT:    blr
188;
189; CHECK-BE-LABEL: test_Array:
190; CHECK-BE:       # %bb.0: # %entry
191; CHECK-BE-NEXT:    mflr r0
192; CHECK-BE-NEXT:    stdu r1, -256(r1)
193; CHECK-BE-NEXT:    addis r5, r2, Arr1@toc@ha
194; CHECK-BE-NEXT:    li r3, 0
195; CHECK-BE-NEXT:    addi r5, r5, Arr1@toc@l
196; CHECK-BE-NEXT:    std r0, 272(r1)
197; CHECK-BE-NEXT:    addi r4, r1, 126
198; CHECK-BE-NEXT:    li r6, 65
199; CHECK-BE-NEXT:    stw r3, 124(r1)
200; CHECK-BE-NEXT:    addi r5, r5, -1
201; CHECK-BE-NEXT:    mtctr r6
202; CHECK-BE-NEXT:    bdz .LBB2_2
203; CHECK-BE-NEXT:  .LBB2_1: # %for.body
204; CHECK-BE-NEXT:    #
205; CHECK-BE-NEXT:    lbz r6, 1(r5)
206; CHECK-BE-NEXT:    addi r5, r5, 1
207; CHECK-BE-NEXT:    addi r3, r3, 1
208; CHECK-BE-NEXT:    sth r6, 2(r4)
209; CHECK-BE-NEXT:    addi r4, r4, 2
210; CHECK-BE-NEXT:    bdnz .LBB2_1
211; CHECK-BE-NEXT:  .LBB2_2: # %for.cond.cleanup
212; CHECK-BE-NEXT:    addi r3, r1, 128
213; CHECK-BE-NEXT:    bl test_arr
214; CHECK-BE-NEXT:    nop
215; CHECK-BE-NEXT:    addi r1, r1, 256
216; CHECK-BE-NEXT:    ld r0, 16(r1)
217; CHECK-BE-NEXT:    mtlr r0
218; CHECK-BE-NEXT:    blr
219entry:
220  %Arr2 = alloca [64 x i16], align 2
221  %i = alloca i32, align 4
222  call void @llvm.lifetime.start.p0(i64 128, ptr %Arr2)
223  call void @llvm.lifetime.start.p0(i64 4, ptr %i)
224  store i32 0, ptr %i, align 4
225  br label %for.cond
226
227for.cond:                                         ; preds = %for.inc, %entry
228  %0 = load i32, ptr %i, align 4
229  %cmp = icmp slt i32 %0, 64
230  br i1 %cmp, label %for.body, label %for.cond.cleanup
231
232for.cond.cleanup:                                 ; preds = %for.cond
233  call void @llvm.lifetime.end.p0(i64 4, ptr %i)
234  br label %for.end
235
236for.body:                                         ; preds = %for.cond
237  %1 = load i32, ptr %i, align 4
238  %idxprom = sext i32 %1 to i64
239  %arrayidx = getelementptr inbounds [64 x i8], ptr @Arr1, i64 0, i64 %idxprom
240  %2 = load i8, ptr %arrayidx, align 1
241  %conv = zext i8 %2 to i16
242  %3 = load i32, ptr %i, align 4
243  %idxprom1 = sext i32 %3 to i64
244  %arrayidx2 = getelementptr inbounds [64 x i16], ptr %Arr2, i64 0, i64 %idxprom1
245  store i16 %conv, ptr %arrayidx2, align 2
246  br label %for.inc
247
248for.inc:                                          ; preds = %for.body
249  %4 = load i32, ptr %i, align 4
250  %inc = add nsw i32 %4, 1
251  store i32 %inc, ptr %i, align 4
252  br label %for.cond
253
254for.end:                                          ; preds = %for.cond.cleanup
255  call void @test_arr(ptr %Arr2)
256  call void @llvm.lifetime.end.p0(i64 128, ptr %Arr2)
257  ret void
258}
259
260declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) nounwind
261
262declare void @test(ptr) nounwind
263declare void @test1(ptr) nounwind
264declare void @test_arr(ptr)
265
266declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) nounwind
267