xref: /llvm-project/mlir/test/Dialect/ArmSME/tile-zero-masks.mlir (revision e37d6d2a74d76fdc95f5c5d625e282ce600aad55)
1// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(convert-arm-sme-to-llvm,canonicalize))" | FileCheck %s
2
3// This test verifies the tile mask operand of the zero intrinsic zeroes
4// the correct tiles. Both integer and floating-point datatypes are checked.
5
6// -----
7
8// CHECK-LABEL: zero_za_b
9func.func @zero_za_b() {
10  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> ()
11  %zero_za0b = arm_sme.zero : vector<[16]x[16]xi8>
12  "test.some_use"(%zero_za0b) : (vector<[16]x[16]xi8>) -> ()
13  return
14}
15
16// -----
17
18// CHECK-LABEL: zero_za_h
19func.func @zero_za_h() {
20  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}> : () -> ()
21  %zero_za0h = arm_sme.zero : vector<[8]x[8]xi16>
22  "test.prevent_zero_merge"() : () -> ()
23  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 170 : i32}> : () -> ()
24  %zero_za1h = arm_sme.zero : vector<[8]x[8]xf16>
25  "test.some_use"(%zero_za0h) : (vector<[8]x[8]xi16>) -> ()
26  "test.some_use"(%zero_za1h) : (vector<[8]x[8]xf16>) -> ()
27  return
28}
29
30// -----
31
32// CHECK-LABEL: zero_za_s
33func.func @zero_za_s() {
34  // CHECK: arm_sme.intr.zero"() <{tile_mask = 17 : i32}> : () -> ()
35  %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32>
36  "test.prevent_zero_merge"() : () -> ()
37  // CHECK: arm_sme.intr.zero"() <{tile_mask = 34 : i32}> : () -> ()
38  %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32>
39  "test.prevent_zero_merge"() : () -> ()
40  // CHECK: arm_sme.intr.zero"() <{tile_mask = 68 : i32}> : () -> ()
41  %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32>
42  "test.prevent_zero_merge"() : () -> ()
43  // CHECK: arm_sme.intr.zero"() <{tile_mask = 136 : i32}> : () -> ()
44  %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32>
45  "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> ()
46  "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> ()
47  "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> ()
48  "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> ()
49  return
50}
51
52// -----
53
54// CHECK-LABEL: zero_za_d
55func.func @zero_za_d() {
56  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 1 : i32}> : () -> ()
57  %zero_za0d = arm_sme.zero : vector<[2]x[2]xi64>
58  "test.prevent_zero_merge"() : () -> ()
59  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 2 : i32}> : () -> ()
60  %zero_za1d = arm_sme.zero : vector<[2]x[2]xi64>
61  "test.prevent_zero_merge"() : () -> ()
62  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 4 : i32}> : () -> ()
63  %zero_za2d = arm_sme.zero : vector<[2]x[2]xi64>
64  "test.prevent_zero_merge"() : () -> ()
65  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 8 : i32}> : () -> ()
66  %zero_za3d = arm_sme.zero : vector<[2]x[2]xi64>
67  "test.prevent_zero_merge"() : () -> ()
68  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 16 : i32}> : () -> ()
69  %zero_za4d = arm_sme.zero : vector<[2]x[2]xi64>
70  "test.prevent_zero_merge"() : () -> ()
71  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 32 : i32}> : () -> ()
72  %zero_za5d = arm_sme.zero : vector<[2]x[2]xi64>
73  "test.prevent_zero_merge"() : () -> ()
74  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 64 : i32}> : () -> ()
75  %zero_za6d = arm_sme.zero : vector<[2]x[2]xi64>
76  "test.prevent_zero_merge"() : () -> ()
77  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 128 : i32}> : () -> ()
78  %zero_za7d = arm_sme.zero : vector<[2]x[2]xf64>
79  "test.some_use"(%zero_za0d) : (vector<[2]x[2]xi64>) -> ()
80  "test.some_use"(%zero_za1d) : (vector<[2]x[2]xi64>) -> ()
81  "test.some_use"(%zero_za2d) : (vector<[2]x[2]xi64>) -> ()
82  "test.some_use"(%zero_za3d) : (vector<[2]x[2]xi64>) -> ()
83  "test.some_use"(%zero_za4d) : (vector<[2]x[2]xi64>) -> ()
84  "test.some_use"(%zero_za5d) : (vector<[2]x[2]xi64>) -> ()
85  "test.some_use"(%zero_za6d) : (vector<[2]x[2]xi64>) -> ()
86  "test.some_use"(%zero_za7d) : (vector<[2]x[2]xf64>) -> ()
87  return
88}
89
90// -----
91
92// CHECK-LABEL: merge_consecutive_tile_zero_ops
93func.func @merge_consecutive_tile_zero_ops() {
94  // CHECK-NOT: arm_sme.intr.zero
95  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> ()
96  // CHECK-NOT: arm_sme.intr.zero
97  %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32>
98  %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32>
99  %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32>
100  %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32>
101  "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> ()
102  "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> ()
103  "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> ()
104  "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> ()
105  return
106}
107
108// -----
109
110/// arm_sme.intr.zero intrinsics are not merged when there is an op other than
111/// arm_sme.intr.zero between them.
112
113// CHECK-LABEL: merge_consecutive_tile_zero_ops_with_barrier
114func.func @merge_consecutive_tile_zero_ops_with_barrier() {
115  // CHECK-NOT: arm_sme.intr.zero
116  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 51 : i32}> : () -> ()
117  // CHECK-NOT: arm_sme.intr.zero
118  %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32>
119  %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32>
120  "test.prevent_zero_merge"() : () -> ()
121  // CHECK: "arm_sme.intr.zero"() <{tile_mask = 204 : i32}> : () -> ()
122  // CHECK-NOT: arm_sme.intr.zero
123  %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32>
124  %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32>
125  "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> ()
126  "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> ()
127  "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> ()
128  "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> ()
129  return
130}
131