1 // RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512f -target-feature +amx-int8 \ 2 // RUN: -target-feature +amx-bf16 -target-feature +amx-fp16 -emit-llvm -o - -Werror -pedantic | FileCheck %s --check-prefixes=CHECK 3 4 #include <immintrin.h> 5 6 char buf[1024]; 7 #define STRIDE 32 8 9 char buf2[1024]; 10 11 // This is an example code and integration test. 12 void test_api(int cond, short row, short col) { 13 //CHECK-LABEL: @test_api 14 //CHECK-DAG: call x86_amx @llvm.x86.tileloadd64.internal 15 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 16 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 17 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal 18 //CHECK-DAG: call void @llvm.x86.tilestored64.internal 19 __tile1024i a = {row, 8}; 20 __tile1024i b = {8, col}; 21 __tile1024i c = {row, col}; 22 23 if (cond) { 24 __tile_loadd(&a, buf, STRIDE); 25 __tile_loadd(&b, buf, STRIDE); 26 __tile_loadd(&c, buf, STRIDE); 27 } else { 28 __tile_loadd(&a, buf2, STRIDE); 29 __tile_loadd(&b, buf2, STRIDE); 30 __tile_loadd(&c, buf2, STRIDE); 31 } 32 __tile_dpbssd(&c, a, b); 33 __tile_stored(buf, STRIDE, c); 34 } 35 36 void test_tile_dpbssd(__tile1024i a, __tile1024i b, __tile1024i c) { 37 //CHECK-LABEL: @test_tile_dpbssd 38 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 39 //CHECK-DAG: call x86_amx @llvm.x86.tdpbssd.internal 40 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 41 __tile_dpbssd(&c, a, b); 42 } 43 44 void test_tile_dpbsud(__tile1024i a, __tile1024i b, __tile1024i c) { 45 //CHECK-LABEL: @test_tile_dpbsud 46 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 47 //CHECK-DAG: call x86_amx @llvm.x86.tdpbsud.internal 48 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 49 __tile_dpbsud(&c, a, b); 50 } 51 52 void test_tile_dpbusd(__tile1024i a, __tile1024i b, __tile1024i c) { 53 //CHECK-LABEL: @test_tile_dpbusd 54 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 55 //CHECK-DAG: call x86_amx @llvm.x86.tdpbusd.internal 56 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 57 __tile_dpbusd(&c, a, b); 58 } 59 60 void test_tile_dpbuud(__tile1024i a, __tile1024i b, __tile1024i c) { 61 //CHECK-LABEL: @test_tile_dpbuud 62 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 63 //CHECK-DAG: call x86_amx @llvm.x86.tdpbuud.internal 64 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 65 __tile_dpbuud(&c, a, b); 66 } 67 68 void test_tile_dpbf16ps(__tile1024i a, __tile1024i b, __tile1024i c) { 69 //CHECK-LABEL: @test_tile_dpbf16ps 70 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 71 //CHECK-DAG: call x86_amx @llvm.x86.tdpbf16ps.internal 72 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 73 __tile_dpbf16ps(&a, b, c); 74 } 75 76 void test_tile_dpfp16ps(__tile1024i a, __tile1024i b, __tile1024i c) { 77 //CHECK-LABEL: @test_tile_dpfp16ps 78 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) 79 //CHECK-DAG: call x86_amx @llvm.x86.tdpfp16ps.internal 80 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) 81 __tile_dpfp16ps(&a, b, c); 82 } 83