xref: /llvm-project/llvm/test/CodeGen/X86/stack-folding-lwp.ll (revision 1ec3ad9ed85292c4b3be04b1b09f7541928cc3f0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+lwp < %s | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-unknown-unknown"
6
7; Stack reload folding tests.
8;
9; By including a nop call with sideeffects we can force a partial register spill of the
10; relevant registers and check that the reload is correctly folded into the instruction.
11
12define i8 @stack_fold_lwpins_u32(i32 %a0, i32 %a1) {
13; CHECK-LABEL: stack_fold_lwpins_u32:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    pushq %rbp
16; CHECK-NEXT:    .cfi_def_cfa_offset 16
17; CHECK-NEXT:    pushq %r15
18; CHECK-NEXT:    .cfi_def_cfa_offset 24
19; CHECK-NEXT:    pushq %r14
20; CHECK-NEXT:    .cfi_def_cfa_offset 32
21; CHECK-NEXT:    pushq %r13
22; CHECK-NEXT:    .cfi_def_cfa_offset 40
23; CHECK-NEXT:    pushq %r12
24; CHECK-NEXT:    .cfi_def_cfa_offset 48
25; CHECK-NEXT:    pushq %rbx
26; CHECK-NEXT:    .cfi_def_cfa_offset 56
27; CHECK-NEXT:    .cfi_offset %rbx, -56
28; CHECK-NEXT:    .cfi_offset %r12, -48
29; CHECK-NEXT:    .cfi_offset %r13, -40
30; CHECK-NEXT:    .cfi_offset %r14, -32
31; CHECK-NEXT:    .cfi_offset %r15, -24
32; CHECK-NEXT:    .cfi_offset %rbp, -16
33; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
34; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
35; CHECK-NEXT:    #APP
36; CHECK-NEXT:    nop
37; CHECK-NEXT:    #NO_APP
38; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
39; CHECK-NEXT:    lwpins $2814, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
40; CHECK-NEXT:    # imm = 0xAFE
41; CHECK-NEXT:    setb %al
42; CHECK-NEXT:    popq %rbx
43; CHECK-NEXT:    .cfi_def_cfa_offset 48
44; CHECK-NEXT:    popq %r12
45; CHECK-NEXT:    .cfi_def_cfa_offset 40
46; CHECK-NEXT:    popq %r13
47; CHECK-NEXT:    .cfi_def_cfa_offset 32
48; CHECK-NEXT:    popq %r14
49; CHECK-NEXT:    .cfi_def_cfa_offset 24
50; CHECK-NEXT:    popq %r15
51; CHECK-NEXT:    .cfi_def_cfa_offset 16
52; CHECK-NEXT:    popq %rbp
53; CHECK-NEXT:    .cfi_def_cfa_offset 8
54; CHECK-NEXT:    retq
55  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
56  %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 2814)
57  ret i8 %2
58}
59declare i8 @llvm.x86.lwpins32(i32, i32, i32)
60
61define i8 @stack_fold_lwpins_u64(i64 %a0, i32 %a1) {
62; CHECK-LABEL: stack_fold_lwpins_u64:
63; CHECK:       # %bb.0:
64; CHECK-NEXT:    pushq %rbp
65; CHECK-NEXT:    .cfi_def_cfa_offset 16
66; CHECK-NEXT:    pushq %r15
67; CHECK-NEXT:    .cfi_def_cfa_offset 24
68; CHECK-NEXT:    pushq %r14
69; CHECK-NEXT:    .cfi_def_cfa_offset 32
70; CHECK-NEXT:    pushq %r13
71; CHECK-NEXT:    .cfi_def_cfa_offset 40
72; CHECK-NEXT:    pushq %r12
73; CHECK-NEXT:    .cfi_def_cfa_offset 48
74; CHECK-NEXT:    pushq %rbx
75; CHECK-NEXT:    .cfi_def_cfa_offset 56
76; CHECK-NEXT:    .cfi_offset %rbx, -56
77; CHECK-NEXT:    .cfi_offset %r12, -48
78; CHECK-NEXT:    .cfi_offset %r13, -40
79; CHECK-NEXT:    .cfi_offset %r14, -32
80; CHECK-NEXT:    .cfi_offset %r15, -24
81; CHECK-NEXT:    .cfi_offset %rbp, -16
82; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
83; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
84; CHECK-NEXT:    #APP
85; CHECK-NEXT:    nop
86; CHECK-NEXT:    #NO_APP
87; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
88; CHECK-NEXT:    lwpins $2814, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
89; CHECK-NEXT:    # imm = 0xAFE
90; CHECK-NEXT:    setb %al
91; CHECK-NEXT:    popq %rbx
92; CHECK-NEXT:    .cfi_def_cfa_offset 48
93; CHECK-NEXT:    popq %r12
94; CHECK-NEXT:    .cfi_def_cfa_offset 40
95; CHECK-NEXT:    popq %r13
96; CHECK-NEXT:    .cfi_def_cfa_offset 32
97; CHECK-NEXT:    popq %r14
98; CHECK-NEXT:    .cfi_def_cfa_offset 24
99; CHECK-NEXT:    popq %r15
100; CHECK-NEXT:    .cfi_def_cfa_offset 16
101; CHECK-NEXT:    popq %rbp
102; CHECK-NEXT:    .cfi_def_cfa_offset 8
103; CHECK-NEXT:    retq
104  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
105  %2 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2814)
106  ret i8 %2
107}
108declare i8 @llvm.x86.lwpins64(i64, i32, i32)
109
110define void @stack_fold_lwpval_u32(i32 %a0, i32 %a1) {
111; CHECK-LABEL: stack_fold_lwpval_u32:
112; CHECK:       # %bb.0:
113; CHECK-NEXT:    pushq %rbp
114; CHECK-NEXT:    .cfi_def_cfa_offset 16
115; CHECK-NEXT:    pushq %r15
116; CHECK-NEXT:    .cfi_def_cfa_offset 24
117; CHECK-NEXT:    pushq %r14
118; CHECK-NEXT:    .cfi_def_cfa_offset 32
119; CHECK-NEXT:    pushq %r13
120; CHECK-NEXT:    .cfi_def_cfa_offset 40
121; CHECK-NEXT:    pushq %r12
122; CHECK-NEXT:    .cfi_def_cfa_offset 48
123; CHECK-NEXT:    pushq %rbx
124; CHECK-NEXT:    .cfi_def_cfa_offset 56
125; CHECK-NEXT:    .cfi_offset %rbx, -56
126; CHECK-NEXT:    .cfi_offset %r12, -48
127; CHECK-NEXT:    .cfi_offset %r13, -40
128; CHECK-NEXT:    .cfi_offset %r14, -32
129; CHECK-NEXT:    .cfi_offset %r15, -24
130; CHECK-NEXT:    .cfi_offset %rbp, -16
131; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
132; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
133; CHECK-NEXT:    #APP
134; CHECK-NEXT:    nop
135; CHECK-NEXT:    #NO_APP
136; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
137; CHECK-NEXT:    lwpval $2814, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
138; CHECK-NEXT:    # imm = 0xAFE
139; CHECK-NEXT:    popq %rbx
140; CHECK-NEXT:    .cfi_def_cfa_offset 48
141; CHECK-NEXT:    popq %r12
142; CHECK-NEXT:    .cfi_def_cfa_offset 40
143; CHECK-NEXT:    popq %r13
144; CHECK-NEXT:    .cfi_def_cfa_offset 32
145; CHECK-NEXT:    popq %r14
146; CHECK-NEXT:    .cfi_def_cfa_offset 24
147; CHECK-NEXT:    popq %r15
148; CHECK-NEXT:    .cfi_def_cfa_offset 16
149; CHECK-NEXT:    popq %rbp
150; CHECK-NEXT:    .cfi_def_cfa_offset 8
151; CHECK-NEXT:    retq
152  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
153  tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 2814)
154  ret void
155}
156declare void @llvm.x86.lwpval32(i32, i32, i32)
157
158define void @stack_fold_lwpval_u64(i64 %a0, i32 %a1) {
159; CHECK-LABEL: stack_fold_lwpval_u64:
160; CHECK:       # %bb.0:
161; CHECK-NEXT:    pushq %rbp
162; CHECK-NEXT:    .cfi_def_cfa_offset 16
163; CHECK-NEXT:    pushq %r15
164; CHECK-NEXT:    .cfi_def_cfa_offset 24
165; CHECK-NEXT:    pushq %r14
166; CHECK-NEXT:    .cfi_def_cfa_offset 32
167; CHECK-NEXT:    pushq %r13
168; CHECK-NEXT:    .cfi_def_cfa_offset 40
169; CHECK-NEXT:    pushq %r12
170; CHECK-NEXT:    .cfi_def_cfa_offset 48
171; CHECK-NEXT:    pushq %rbx
172; CHECK-NEXT:    .cfi_def_cfa_offset 56
173; CHECK-NEXT:    .cfi_offset %rbx, -56
174; CHECK-NEXT:    .cfi_offset %r12, -48
175; CHECK-NEXT:    .cfi_offset %r13, -40
176; CHECK-NEXT:    .cfi_offset %r14, -32
177; CHECK-NEXT:    .cfi_offset %r15, -24
178; CHECK-NEXT:    .cfi_offset %rbp, -16
179; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
180; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
181; CHECK-NEXT:    #APP
182; CHECK-NEXT:    nop
183; CHECK-NEXT:    #NO_APP
184; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
185; CHECK-NEXT:    lwpval $2814, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload
186; CHECK-NEXT:    # imm = 0xAFE
187; CHECK-NEXT:    popq %rbx
188; CHECK-NEXT:    .cfi_def_cfa_offset 48
189; CHECK-NEXT:    popq %r12
190; CHECK-NEXT:    .cfi_def_cfa_offset 40
191; CHECK-NEXT:    popq %r13
192; CHECK-NEXT:    .cfi_def_cfa_offset 32
193; CHECK-NEXT:    popq %r14
194; CHECK-NEXT:    .cfi_def_cfa_offset 24
195; CHECK-NEXT:    popq %r15
196; CHECK-NEXT:    .cfi_def_cfa_offset 16
197; CHECK-NEXT:    popq %rbp
198; CHECK-NEXT:    .cfi_def_cfa_offset 8
199; CHECK-NEXT:    retq
200  %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
201  tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 2814)
202  ret void
203}
204declare void @llvm.x86.lwpval64(i64, i32, i32)
205