xref: /llvm-project/mlir/test/Dialect/AMDGPU/canonicalize.mlir (revision e7790fbed32b729ad59cea4b77d152514605cb0e)
1// RUN: mlir-opt %s -split-input-file -canonicalize="test-convergence" | FileCheck %s
2
3// CHECK-LABEL: func @known_oob_load
4func.func @known_oob_load(%arg0: memref<4xf32>) -> f32 {
5  // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
6  // CHECK: return %[[zero]]
7  %c4_i32 = arith.constant 4 : i32
8  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
9  func.return %0 : f32
10}
11
12// -----
13
14// CHECK-LABEL: func @known_oob_load_2d
15func.func @known_oob_load_2d(%arg0: memref<4x4xf32>) -> f32 {
16  // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
17  // CHECK: return %[[zero]]
18  %c0_i32 = arith.constant 0 : i32
19  %c4_i32 = arith.constant 4 : i32
20  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32, %c0_i32] : memref<4x4xf32>, i32, i32 -> f32
21  func.return %0 : f32
22}
23
24// -----
25
26// CHECK-LABEL: func @known_oob_load_2d_on_last
27func.func @known_oob_load_2d_on_last(%arg0: memref<4x4xf32>) -> f32 {
28  // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
29  // CHECK: return %[[zero]]
30  %c0_i32 = arith.constant 0 : i32
31  %c16_i32 = arith.constant 16 : i32
32  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c16_i32] : memref<4x4xf32>, i32, i32 -> f32
33  func.return %0 : f32
34}
35
36// -----
37
38// CHECK-LABEL: func @known_oob_load_index
39func.func @known_oob_load_index(%arg0: memref<4xf32>) -> f32 {
40  // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
41  // CHECK: return %[[zero]]
42  %c0_i32 = arith.constant 0 : i32
43  %0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 4 : i32} %arg0[%c0_i32] : memref<4xf32>, i32 -> f32
44  func.return %0 : f32
45}
46
47// -----
48
49// CHECK-LABEL: func @known_oob_load_sgproffset
50func.func @known_oob_load_sgproffset(%arg0: memref<4xf32>) -> f32 {
51  // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
52  // CHECK: return %[[zero]]
53  %c2_i32 = arith.constant 2 : i32
54  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c2_i32] sgprOffset %c2_i32 : memref<4xf32>, i32 -> f32
55  func.return %0 : f32
56}
57
58// -----
59
60// CHECK-LABEL: func @unknown_load
61func.func @unknown_load(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
62  // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
63  // CHECK: return %[[loaded]]
64  %c4_i32 = arith.constant 4 : i32
65  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%arg1] sgprOffset %c4_i32 : memref<4xf32>, i32 -> f32
66  func.return %0 : f32
67}
68
69// -----
70
71// CHECK-LABEL: func @unknown_load_sgproffset
72func.func @unknown_load_sgproffset(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
73  // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
74  // CHECK: return %[[loaded]]
75  %c4_i32 = arith.constant 4 : i32
76  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] sgprOffset %arg1 : memref<4xf32>, i32 -> f32
77  func.return %0 : f32
78}
79
80// -----
81
82// CHECK-LABEL: func @unranked
83func.func @unranked(%arg0: memref<?xf32>) -> f32 {
84  // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
85  // CHECK: return %[[loaded]]
86  %c4_i32 = arith.constant 4 : i32
87  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<?xf32>, i32 -> f32
88  func.return %0 : f32
89}
90
91// -----
92
93// CHECK-LABEL: func @no_oob_check
94func.func @no_oob_check(%arg0: memref<4xf32>) -> f32 {
95  // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
96  // CHECK: return %[[loaded]]
97  %c4_i32 = arith.constant 4 : i32
98  %0 = amdgpu.raw_buffer_load {boundsCheck = false} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
99  func.return %0 : f32
100}
101
102// -----
103
104// CHECK-LABEL: func @in_bounds_overall
105func.func @in_bounds_overall(%arg0: memref<4x4xf32>) -> f32 {
106  // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
107  // CHECK: return %[[loaded]]
108  %c0_i32 = arith.constant 0 : i32
109  %c15_i32 = arith.constant 15 : i32
110  %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c15_i32] : memref<4x4xf32>, i32, i32 -> f32
111  func.return %0 : f32
112}
113
114// -----
115
116// CHECK-LABEL: func @dead_store
117func.func @dead_store(%arg0: memref<4xf32>, %arg1: f32) {
118  // CHECK-NOT: amdgpu.raw_buffer_store
119  %c4_i32 = arith.constant 4 : i32
120  amdgpu.raw_buffer_store {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
121  func.return
122}
123
124// -----
125
126// CHECK-LABEL: func @dead_atomic_add
127func.func @dead_atomic_add(%arg0: memref<4xf32>, %arg1: f32) {
128  // CHECK-NOT: amdgpu.raw_buffer_atomic_fadd
129  %c4_i32 = arith.constant 4 : i32
130  amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
131  func.return
132}
133