xref: /llvm-project/clang/test/CodeGen/X86/amx_transpose_api.c (revision 813f7c3820d00349fe23bfc6ba26159764541540)
1c72a751dSPhoebe Wang // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f \
2*813f7c38SPhoebe Wang // RUN: -target-feature +amx-transpose -target-feature +amx-bf16 -target-feature +amx-fp16 -target-feature +amx-complex \
3c72a751dSPhoebe Wang // RUN: -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
4c72a751dSPhoebe Wang 
5c72a751dSPhoebe Wang #include <immintrin.h>
6c72a751dSPhoebe Wang 
7c72a751dSPhoebe Wang char buf[2048];
8c72a751dSPhoebe Wang #define STRIDE 32
9c72a751dSPhoebe Wang 
10c72a751dSPhoebe Wang char buf2[2048];
11c72a751dSPhoebe Wang 
12c72a751dSPhoebe Wang void test_tile_2rpntlvwz0(__tile1024i dst0, __tile1024i dst1) {
13c72a751dSPhoebe Wang   //CHECK-LABEL: @test_tile_2rpntlvwz0
14c72a751dSPhoebe Wang   //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0.internal
15c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
16c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
17c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
18c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
19c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
20c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
21c72a751dSPhoebe Wang   __tile_2rpntlvwz0(&dst0, &dst1, buf, STRIDE);
22c72a751dSPhoebe Wang }
23c72a751dSPhoebe Wang 
24c72a751dSPhoebe Wang void test_tile_2rpntlvwz0t1(__tile1024i dst0, __tile1024i dst1) {
25c72a751dSPhoebe Wang   //CHECK-LABEL: @test_tile_2rpntlvwz0t1
26c72a751dSPhoebe Wang   //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz0t1.internal
27c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
28c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
29c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
30c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
31c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
32c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
33c72a751dSPhoebe Wang   __tile_2rpntlvwz0t1(&dst0, &dst1, buf, STRIDE);
34c72a751dSPhoebe Wang }
35c72a751dSPhoebe Wang 
36c72a751dSPhoebe Wang void test_tile_2rpntlvwz1(__tile1024i dst0, __tile1024i dst1) {
37c72a751dSPhoebe Wang   //CHECK-LABEL: @test_tile_2rpntlvwz1
38c72a751dSPhoebe Wang   //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1.internal
39c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
40c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
41c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
42c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
43c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
44c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
45c72a751dSPhoebe Wang   __tile_2rpntlvwz1(&dst0, &dst1, buf, STRIDE);
46c72a751dSPhoebe Wang }
47c72a751dSPhoebe Wang 
48c72a751dSPhoebe Wang void test_tile_2rpntlvwz1t1(__tile1024i dst0, __tile1024i dst1) {
49c72a751dSPhoebe Wang   //CHECK-LABEL: @test_tile_2rpntlvwz1t1
50c72a751dSPhoebe Wang   //CHECK: call { x86_amx, x86_amx } @llvm.x86.t2rpntlvwz1t1.internal
51c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 0
52c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
53c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
54c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = extractvalue { x86_amx, x86_amx } {{%.*}}, 1
55c72a751dSPhoebe Wang   //CHECK-NEXT: {{%.*}} = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
56c72a751dSPhoebe Wang   //CHECK-NEXT: store <256 x i32> {{%.*}}, ptr {{%.*}}
57c72a751dSPhoebe Wang   __tile_2rpntlvwz1t1(&dst0, &dst1, buf, STRIDE);
58c72a751dSPhoebe Wang }
59c72a751dSPhoebe Wang 
60c72a751dSPhoebe Wang void test_tile_transposed(__tile1024i dst, __tile1024i src) {
61c72a751dSPhoebe Wang   //CHECK-LABEL: @test_tile_transposed
62c72a751dSPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
63c72a751dSPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.ttransposed.internal
64c72a751dSPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
65c72a751dSPhoebe Wang   __tile_transposed(&dst, src);
66c72a751dSPhoebe Wang }
67*813f7c38SPhoebe Wang 
68*813f7c38SPhoebe Wang void test_tile_tdpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
69*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_tdpbf16ps
70*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
71*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.ttdpbf16ps.internal
72*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
73*813f7c38SPhoebe Wang   __tile_tdpbf16ps(&c, a, b);
74*813f7c38SPhoebe Wang }
75*813f7c38SPhoebe Wang 
76*813f7c38SPhoebe Wang void test_tile_tdpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
77*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_tdpfp16ps
78*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
79*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.ttdpfp16ps.internal
80*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
81*813f7c38SPhoebe Wang   __tile_tdpfp16ps(&c, a, b);
82*813f7c38SPhoebe Wang }
83*813f7c38SPhoebe Wang 
84*813f7c38SPhoebe Wang void test_tile_tcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
85*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_tcmmimfp16ps
86*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
87*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.ttcmmimfp16ps.internal
88*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
89*813f7c38SPhoebe Wang   __tile_tcmmimfp16ps(&c, a, b);
90*813f7c38SPhoebe Wang }
91*813f7c38SPhoebe Wang 
92*813f7c38SPhoebe Wang void test_tile_tcmmrlfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
93*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_tcmmrlfp16ps
94*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
95*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.ttcmmrlfp16ps.internal
96*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
97*813f7c38SPhoebe Wang   __tile_tcmmrlfp16ps(&c, a, b);
98*813f7c38SPhoebe Wang }
99*813f7c38SPhoebe Wang 
100*813f7c38SPhoebe Wang void test_tile_conjtcmmimfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
101*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_conjtcmmimfp16ps
102*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
103*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.tconjtcmmimfp16ps.internal
104*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
105*813f7c38SPhoebe Wang   __tile_conjtcmmimfp16ps(&c, a, b);
106*813f7c38SPhoebe Wang }
107*813f7c38SPhoebe Wang 
108*813f7c38SPhoebe Wang void test_tile_conjtfp16(__tile1024i dst, __tile1024i src) {
109*813f7c38SPhoebe Wang   //CHECK-LABEL: @test_tile_conjtfp16
110*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
111*813f7c38SPhoebe Wang   //CHECK-DAG: call x86_amx @llvm.x86.tconjtfp16.internal
112*813f7c38SPhoebe Wang   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
113*813f7c38SPhoebe Wang   __tile_conjtfp16(&dst, src);
114*813f7c38SPhoebe Wang }
115