xref: /llvm-project/clang/test/CodeGen/X86/amx_api.c (revision b43c97c2ddfe9e922bb044de01312adb81591a48)
1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown  -target-feature +avx512f  -target-feature +amx-int8  \
2 // RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK
3 
4 #include <immintrin.h>
5 
6 char buf[1024];
7 #define STRIDE 32
8 
9 char buf2[1024];
10 
11 // This is an example code and integration test.
12 void test_api(int cond, short row, short col) {
13   //CHECK-LABEL: @test_api
14   //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal
15   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
16   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
17   //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
18   //CHECK-DAG: call void @llvm.x86.tilestored64.internal
19   __tile1024i a = {row, 8};
20   __tile1024i b = {8, col};
21   __tile1024i c = {row, col};
22 
23   if (cond) {
24     __tile_loadd(&a, buf, STRIDE);
25     __tile_loadd(&b, buf, STRIDE);
26     __tile_loadd(&c, buf, STRIDE);
27   } else {
28     __tile_loadd(&a, buf2, STRIDE);
29     __tile_loadd(&b, buf2, STRIDE);
30     __tile_loadd(&c, buf2, STRIDE);
31   }
32   __tile_dpbssd(&c, a, b);
33   __tile_stored(buf, STRIDE, c);
34 }
35 
36 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) {
37   //CHECK-LABEL: @test_tile_dpbssd
38   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
39   //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal
40   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
41   __tile_dpbssd(&c, a, b);
42 }
43 
44 void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) {
45   //CHECK-LABEL: @test_tile_dpbsud
46   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
47   //CHECK-DAG: call x86_amx @llvm.x86.tdpbsud.internal
48   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
49   __tile_dpbsud(&c, a, b);
50 }
51 
52 void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) {
53   //CHECK-LABEL: @test_tile_dpbusd
54   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
55   //CHECK-DAG: call x86_amx @llvm.x86.tdpbusd.internal
56   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
57   __tile_dpbusd(&c, a, b);
58 }
59 
60 void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) {
61   //CHECK-LABEL: @test_tile_dpbuud
62   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
63   //CHECK-DAG: call x86_amx @llvm.x86.tdpbuud.internal
64   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
65   __tile_dpbuud(&c, a, b);
66 }
67 
68 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
69   //CHECK-LABEL: @test_tile_dpbf16ps
70   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
71   //CHECK-DAG: call x86_amx @llvm.x86.tdpbf16ps.internal
72   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
73   __tile_dpbf16ps(&a, b, c);
74 }
75 
76 void test_tile_dpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) {
77   //CHECK-LABEL: @test_tile_dpfp16ps
78   //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}})
79   //CHECK-DAG: call x86_amx @llvm.x86.tdpfp16ps.internal
80   //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}})
81   __tile_dpfp16ps(&a, b, c);
82 }
83