xref: /llvm-project/llvm/test/CodeGen/X86/AMX/amx-zero-config.ll (revision 0eb17a9d8672c3503c76a808b0773235b042f5a9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f | FileCheck %s --check-prefix=AVX512
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 | FileCheck %s --check-prefix=SSE2
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -O0 | FileCheck %s --check-prefix=AVX512-O0
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx2 -O0 | FileCheck %s --check-prefix=AVX2-O0
7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -O0 | FileCheck %s --check-prefix=SSE2-O0
8
9define void @foo(ptr %buf) nounwind {
10; AVX512-LABEL: foo:
11; AVX512:       # %bb.0: # %entry
12; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
13; AVX512-NEXT:    vmovups %zmm0, -{{[0-9]+}}(%rsp)
14; AVX512-NEXT:    movb $1, -{{[0-9]+}}(%rsp)
15; AVX512-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
16; AVX512-NEXT:    movw $32, -{{[0-9]+}}(%rsp)
17; AVX512-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
18; AVX512-NEXT:    movw $32, %ax
19; AVX512-NEXT:    movw $8, %cx
20; AVX512-NEXT:    tilezero %tmm0
21; AVX512-NEXT:    movl $1024, %edx # imm = 0x400
22; AVX512-NEXT:    tilestored %tmm0, (%rdi,%rdx)
23; AVX512-NEXT:    tilerelease
24; AVX512-NEXT:    vzeroupper
25; AVX512-NEXT:    retq
26;
27; AVX2-LABEL: foo:
28; AVX2:       # %bb.0: # %entry
29; AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
30; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
31; AVX2-NEXT:    vmovups %ymm0, -{{[0-9]+}}(%rsp)
32; AVX2-NEXT:    movb $1, -{{[0-9]+}}(%rsp)
33; AVX2-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
34; AVX2-NEXT:    movw $32, -{{[0-9]+}}(%rsp)
35; AVX2-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
36; AVX2-NEXT:    movw $32, %ax
37; AVX2-NEXT:    movw $8, %cx
38; AVX2-NEXT:    tilezero %tmm0
39; AVX2-NEXT:    movl $1024, %edx # imm = 0x400
40; AVX2-NEXT:    tilestored %tmm0, (%rdi,%rdx)
41; AVX2-NEXT:    tilerelease
42; AVX2-NEXT:    vzeroupper
43; AVX2-NEXT:    retq
44;
45; SSE2-LABEL: foo:
46; SSE2:       # %bb.0: # %entry
47; SSE2-NEXT:    xorps %xmm0, %xmm0
48; SSE2-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp)
49; SSE2-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp)
50; SSE2-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp)
51; SSE2-NEXT:    movups %xmm0, -{{[0-9]+}}(%rsp)
52; SSE2-NEXT:    movb $1, -{{[0-9]+}}(%rsp)
53; SSE2-NEXT:    movb $8, -{{[0-9]+}}(%rsp)
54; SSE2-NEXT:    movw $32, -{{[0-9]+}}(%rsp)
55; SSE2-NEXT:    ldtilecfg -{{[0-9]+}}(%rsp)
56; SSE2-NEXT:    movw $32, %ax
57; SSE2-NEXT:    movw $8, %cx
58; SSE2-NEXT:    tilezero %tmm0
59; SSE2-NEXT:    movl $1024, %edx # imm = 0x400
60; SSE2-NEXT:    tilestored %tmm0, (%rdi,%rdx)
61; SSE2-NEXT:    tilerelease
62; SSE2-NEXT:    retq
63;
64; AVX512-O0-LABEL: foo:
65; AVX512-O0:       # %bb.0: # %entry
66; AVX512-O0-NEXT:    pushq %rbp
67; AVX512-O0-NEXT:    movq %rsp, %rbp
68; AVX512-O0-NEXT:    andq $-1024, %rsp # imm = 0xFC00
69; AVX512-O0-NEXT:    subq $3072, %rsp # imm = 0xC00
70; AVX512-O0-NEXT:    vxorps %xmm0, %xmm0, %xmm0
71; AVX512-O0-NEXT:    vmovups %zmm0, {{[0-9]+}}(%rsp)
72; AVX512-O0-NEXT:    movb $1, {{[0-9]+}}(%rsp)
73; AVX512-O0-NEXT:    movw $32, %cx
74; AVX512-O0-NEXT:    movw $8, %ax
75; AVX512-O0-NEXT:    # implicit-def: $al
76; AVX512-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
77; AVX512-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
78; AVX512-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
79; AVX512-O0-NEXT:    tilezero %tmm0
80; AVX512-O0-NEXT:    movl $64, %esi
81; AVX512-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
82; AVX512-O0-NEXT:    movw $32, %cx
83; AVX512-O0-NEXT:    movw $8, %ax
84; AVX512-O0-NEXT:    tilestored %tmm0, (%rdx,%rsi)
85; AVX512-O0-NEXT:    movl $64, %esi
86; AVX512-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
87; AVX512-O0-NEXT:    movw $32, %cx
88; AVX512-O0-NEXT:    movw $8, %ax
89; AVX512-O0-NEXT:    # implicit-def: $al
90; AVX512-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
91; AVX512-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
92; AVX512-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
93; AVX512-O0-NEXT:    tileloadd (%rdx,%rsi), %tmm0
94; AVX512-O0-NEXT:    movl $1024, %edx # imm = 0x400
95; AVX512-O0-NEXT:    movw $32, %cx
96; AVX512-O0-NEXT:    movw $8, %ax
97; AVX512-O0-NEXT:    tilestored %tmm0, (%rdi,%rdx)
98; AVX512-O0-NEXT:    movq %rbp, %rsp
99; AVX512-O0-NEXT:    popq %rbp
100; AVX512-O0-NEXT:    tilerelease
101; AVX512-O0-NEXT:    vzeroupper
102; AVX512-O0-NEXT:    retq
103;
104; AVX2-O0-LABEL: foo:
105; AVX2-O0:       # %bb.0: # %entry
106; AVX2-O0-NEXT:    pushq %rbp
107; AVX2-O0-NEXT:    movq %rsp, %rbp
108; AVX2-O0-NEXT:    andq $-1024, %rsp # imm = 0xFC00
109; AVX2-O0-NEXT:    subq $3072, %rsp # imm = 0xC00
110; AVX2-O0-NEXT:    vxorps %xmm0, %xmm0, %xmm0
111; AVX2-O0-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
112; AVX2-O0-NEXT:    vmovups %ymm0, {{[0-9]+}}(%rsp)
113; AVX2-O0-NEXT:    movb $1, {{[0-9]+}}(%rsp)
114; AVX2-O0-NEXT:    movw $32, %cx
115; AVX2-O0-NEXT:    movw $8, %ax
116; AVX2-O0-NEXT:    # implicit-def: $al
117; AVX2-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
118; AVX2-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
119; AVX2-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
120; AVX2-O0-NEXT:    tilezero %tmm0
121; AVX2-O0-NEXT:    movl $64, %esi
122; AVX2-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
123; AVX2-O0-NEXT:    movw $32, %cx
124; AVX2-O0-NEXT:    movw $8, %ax
125; AVX2-O0-NEXT:    tilestored %tmm0, (%rdx,%rsi)
126; AVX2-O0-NEXT:    movl $64, %esi
127; AVX2-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
128; AVX2-O0-NEXT:    movw $32, %cx
129; AVX2-O0-NEXT:    movw $8, %ax
130; AVX2-O0-NEXT:    # implicit-def: $al
131; AVX2-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
132; AVX2-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
133; AVX2-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
134; AVX2-O0-NEXT:    tileloadd (%rdx,%rsi), %tmm0
135; AVX2-O0-NEXT:    movl $1024, %edx # imm = 0x400
136; AVX2-O0-NEXT:    movw $32, %cx
137; AVX2-O0-NEXT:    movw $8, %ax
138; AVX2-O0-NEXT:    tilestored %tmm0, (%rdi,%rdx)
139; AVX2-O0-NEXT:    movq %rbp, %rsp
140; AVX2-O0-NEXT:    popq %rbp
141; AVX2-O0-NEXT:    tilerelease
142; AVX2-O0-NEXT:    vzeroupper
143; AVX2-O0-NEXT:    retq
144;
145; SSE2-O0-LABEL: foo:
146; SSE2-O0:       # %bb.0: # %entry
147; SSE2-O0-NEXT:    pushq %rbp
148; SSE2-O0-NEXT:    movq %rsp, %rbp
149; SSE2-O0-NEXT:    andq $-1024, %rsp # imm = 0xFC00
150; SSE2-O0-NEXT:    subq $3072, %rsp # imm = 0xC00
151; SSE2-O0-NEXT:    xorps %xmm0, %xmm0
152; SSE2-O0-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)
153; SSE2-O0-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)
154; SSE2-O0-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)
155; SSE2-O0-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)
156; SSE2-O0-NEXT:    movb $1, {{[0-9]+}}(%rsp)
157; SSE2-O0-NEXT:    movw $32, %cx
158; SSE2-O0-NEXT:    movw $8, %ax
159; SSE2-O0-NEXT:    # implicit-def: $al
160; SSE2-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
161; SSE2-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
162; SSE2-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
163; SSE2-O0-NEXT:    tilezero %tmm0
164; SSE2-O0-NEXT:    movl $64, %esi
165; SSE2-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
166; SSE2-O0-NEXT:    movw $32, %cx
167; SSE2-O0-NEXT:    movw $8, %ax
168; SSE2-O0-NEXT:    tilestored %tmm0, (%rdx,%rsi)
169; SSE2-O0-NEXT:    movl $64, %esi
170; SSE2-O0-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
171; SSE2-O0-NEXT:    movw $32, %cx
172; SSE2-O0-NEXT:    movw $8, %ax
173; SSE2-O0-NEXT:    # implicit-def: $al
174; SSE2-O0-NEXT:    movb %al, {{[0-9]+}}(%rsp)
175; SSE2-O0-NEXT:    movw %cx, {{[0-9]+}}(%rsp)
176; SSE2-O0-NEXT:    ldtilecfg {{[0-9]+}}(%rsp)
177; SSE2-O0-NEXT:    tileloadd (%rdx,%rsi), %tmm0
178; SSE2-O0-NEXT:    movl $1024, %edx # imm = 0x400
179; SSE2-O0-NEXT:    movw $32, %cx
180; SSE2-O0-NEXT:    movw $8, %ax
181; SSE2-O0-NEXT:    tilestored %tmm0, (%rdi,%rdx)
182; SSE2-O0-NEXT:    movq %rbp, %rsp
183; SSE2-O0-NEXT:    popq %rbp
184; SSE2-O0-NEXT:    tilerelease
185; SSE2-O0-NEXT:    retq
186entry:
187  %t = call x86_amx @llvm.x86.tilezero.internal(i16 8, i16 32)
188  call void @llvm.x86.tilestored64.internal(i16 8, i16 32, ptr %buf, i64 1024, x86_amx %t)
189  ret void
190}
191
192declare x86_amx @llvm.x86.tilezero.internal(i16, i16)
193declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
194