1// RUN: mlir-opt %s -split-input-file -canonicalize="test-convergence" | FileCheck %s 2 3// CHECK-LABEL: func @known_oob_load 4func.func @known_oob_load(%arg0: memref<4xf32>) -> f32 { 5 // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32 6 // CHECK: return %[[zero]] 7 %c4_i32 = arith.constant 4 : i32 8 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32 9 func.return %0 : f32 10} 11 12// ----- 13 14// CHECK-LABEL: func @known_oob_load_2d 15func.func @known_oob_load_2d(%arg0: memref<4x4xf32>) -> f32 { 16 // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32 17 // CHECK: return %[[zero]] 18 %c0_i32 = arith.constant 0 : i32 19 %c4_i32 = arith.constant 4 : i32 20 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32, %c0_i32] : memref<4x4xf32>, i32, i32 -> f32 21 func.return %0 : f32 22} 23 24// ----- 25 26// CHECK-LABEL: func @known_oob_load_2d_on_last 27func.func @known_oob_load_2d_on_last(%arg0: memref<4x4xf32>) -> f32 { 28 // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32 29 // CHECK: return %[[zero]] 30 %c0_i32 = arith.constant 0 : i32 31 %c16_i32 = arith.constant 16 : i32 32 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c16_i32] : memref<4x4xf32>, i32, i32 -> f32 33 func.return %0 : f32 34} 35 36// ----- 37 38// CHECK-LABEL: func @known_oob_load_index 39func.func @known_oob_load_index(%arg0: memref<4xf32>) -> f32 { 40 // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32 41 // CHECK: return %[[zero]] 42 %c0_i32 = arith.constant 0 : i32 43 %0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 4 : i32} %arg0[%c0_i32] : memref<4xf32>, i32 -> f32 44 func.return %0 : f32 45} 46 47// ----- 48 49// CHECK-LABEL: func @known_oob_load_sgproffset 50func.func @known_oob_load_sgproffset(%arg0: memref<4xf32>) -> f32 { 51 // CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32 52 // CHECK: return %[[zero]] 53 %c2_i32 = arith.constant 2 : i32 54 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c2_i32] sgprOffset %c2_i32 : memref<4xf32>, i32 -> f32 55 func.return %0 : f32 56} 57 58// ----- 59 60// CHECK-LABEL: func @unknown_load 61func.func @unknown_load(%arg0: memref<4xf32>, %arg1: i32) -> f32 { 62 // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load 63 // CHECK: return %[[loaded]] 64 %c4_i32 = arith.constant 4 : i32 65 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%arg1] sgprOffset %c4_i32 : memref<4xf32>, i32 -> f32 66 func.return %0 : f32 67} 68 69// ----- 70 71// CHECK-LABEL: func @unknown_load_sgproffset 72func.func @unknown_load_sgproffset(%arg0: memref<4xf32>, %arg1: i32) -> f32 { 73 // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load 74 // CHECK: return %[[loaded]] 75 %c4_i32 = arith.constant 4 : i32 76 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] sgprOffset %arg1 : memref<4xf32>, i32 -> f32 77 func.return %0 : f32 78} 79 80// ----- 81 82// CHECK-LABEL: func @unranked 83func.func @unranked(%arg0: memref<?xf32>) -> f32 { 84 // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load 85 // CHECK: return %[[loaded]] 86 %c4_i32 = arith.constant 4 : i32 87 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<?xf32>, i32 -> f32 88 func.return %0 : f32 89} 90 91// ----- 92 93// CHECK-LABEL: func @no_oob_check 94func.func @no_oob_check(%arg0: memref<4xf32>) -> f32 { 95 // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load 96 // CHECK: return %[[loaded]] 97 %c4_i32 = arith.constant 4 : i32 98 %0 = amdgpu.raw_buffer_load {boundsCheck = false} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32 99 func.return %0 : f32 100} 101 102// ----- 103 104// CHECK-LABEL: func @in_bounds_overall 105func.func @in_bounds_overall(%arg0: memref<4x4xf32>) -> f32 { 106 // CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load 107 // CHECK: return %[[loaded]] 108 %c0_i32 = arith.constant 0 : i32 109 %c15_i32 = arith.constant 15 : i32 110 %0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c15_i32] : memref<4x4xf32>, i32, i32 -> f32 111 func.return %0 : f32 112} 113 114// ----- 115 116// CHECK-LABEL: func @dead_store 117func.func @dead_store(%arg0: memref<4xf32>, %arg1: f32) { 118 // CHECK-NOT: amdgpu.raw_buffer_store 119 %c4_i32 = arith.constant 4 : i32 120 amdgpu.raw_buffer_store {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32 121 func.return 122} 123 124// ----- 125 126// CHECK-LABEL: func @dead_atomic_add 127func.func @dead_atomic_add(%arg0: memref<4xf32>, %arg1: f32) { 128 // CHECK-NOT: amdgpu.raw_buffer_atomic_fadd 129 %c4_i32 = arith.constant 4 : i32 130 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32 131 func.return 132} 133