1// RUN: mlir-opt %s --pass-pipeline="builtin.module(func.func(convert-arm-sme-to-llvm,canonicalize))" | FileCheck %s 2 3// This test verifies the tile mask operand of the zero intrinsic zeroes 4// the correct tiles. Both integer and floating-point datatypes are checked. 5 6// ----- 7 8// CHECK-LABEL: zero_za_b 9func.func @zero_za_b() { 10 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> () 11 %zero_za0b = arm_sme.zero : vector<[16]x[16]xi8> 12 "test.some_use"(%zero_za0b) : (vector<[16]x[16]xi8>) -> () 13 return 14} 15 16// ----- 17 18// CHECK-LABEL: zero_za_h 19func.func @zero_za_h() { 20 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 85 : i32}> : () -> () 21 %zero_za0h = arm_sme.zero : vector<[8]x[8]xi16> 22 "test.prevent_zero_merge"() : () -> () 23 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 170 : i32}> : () -> () 24 %zero_za1h = arm_sme.zero : vector<[8]x[8]xf16> 25 "test.some_use"(%zero_za0h) : (vector<[8]x[8]xi16>) -> () 26 "test.some_use"(%zero_za1h) : (vector<[8]x[8]xf16>) -> () 27 return 28} 29 30// ----- 31 32// CHECK-LABEL: zero_za_s 33func.func @zero_za_s() { 34 // CHECK: arm_sme.intr.zero"() <{tile_mask = 17 : i32}> : () -> () 35 %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32> 36 "test.prevent_zero_merge"() : () -> () 37 // CHECK: arm_sme.intr.zero"() <{tile_mask = 34 : i32}> : () -> () 38 %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32> 39 "test.prevent_zero_merge"() : () -> () 40 // CHECK: arm_sme.intr.zero"() <{tile_mask = 68 : i32}> : () -> () 41 %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32> 42 "test.prevent_zero_merge"() : () -> () 43 // CHECK: arm_sme.intr.zero"() <{tile_mask = 136 : i32}> : () -> () 44 %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32> 45 "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> () 46 "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> () 47 "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> () 48 "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> () 49 return 50} 51 52// ----- 53 54// CHECK-LABEL: zero_za_d 55func.func @zero_za_d() { 56 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 1 : i32}> : () -> () 57 %zero_za0d = arm_sme.zero : vector<[2]x[2]xi64> 58 "test.prevent_zero_merge"() : () -> () 59 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 2 : i32}> : () -> () 60 %zero_za1d = arm_sme.zero : vector<[2]x[2]xi64> 61 "test.prevent_zero_merge"() : () -> () 62 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 4 : i32}> : () -> () 63 %zero_za2d = arm_sme.zero : vector<[2]x[2]xi64> 64 "test.prevent_zero_merge"() : () -> () 65 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 8 : i32}> : () -> () 66 %zero_za3d = arm_sme.zero : vector<[2]x[2]xi64> 67 "test.prevent_zero_merge"() : () -> () 68 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 16 : i32}> : () -> () 69 %zero_za4d = arm_sme.zero : vector<[2]x[2]xi64> 70 "test.prevent_zero_merge"() : () -> () 71 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 32 : i32}> : () -> () 72 %zero_za5d = arm_sme.zero : vector<[2]x[2]xi64> 73 "test.prevent_zero_merge"() : () -> () 74 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 64 : i32}> : () -> () 75 %zero_za6d = arm_sme.zero : vector<[2]x[2]xi64> 76 "test.prevent_zero_merge"() : () -> () 77 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 128 : i32}> : () -> () 78 %zero_za7d = arm_sme.zero : vector<[2]x[2]xf64> 79 "test.some_use"(%zero_za0d) : (vector<[2]x[2]xi64>) -> () 80 "test.some_use"(%zero_za1d) : (vector<[2]x[2]xi64>) -> () 81 "test.some_use"(%zero_za2d) : (vector<[2]x[2]xi64>) -> () 82 "test.some_use"(%zero_za3d) : (vector<[2]x[2]xi64>) -> () 83 "test.some_use"(%zero_za4d) : (vector<[2]x[2]xi64>) -> () 84 "test.some_use"(%zero_za5d) : (vector<[2]x[2]xi64>) -> () 85 "test.some_use"(%zero_za6d) : (vector<[2]x[2]xi64>) -> () 86 "test.some_use"(%zero_za7d) : (vector<[2]x[2]xf64>) -> () 87 return 88} 89 90// ----- 91 92// CHECK-LABEL: merge_consecutive_tile_zero_ops 93func.func @merge_consecutive_tile_zero_ops() { 94 // CHECK-NOT: arm_sme.intr.zero 95 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 255 : i32}> : () -> () 96 // CHECK-NOT: arm_sme.intr.zero 97 %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32> 98 %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32> 99 %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32> 100 %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32> 101 "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> () 102 "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> () 103 "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> () 104 "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> () 105 return 106} 107 108// ----- 109 110/// arm_sme.intr.zero intrinsics are not merged when there is an op other than 111/// arm_sme.intr.zero between them. 112 113// CHECK-LABEL: merge_consecutive_tile_zero_ops_with_barrier 114func.func @merge_consecutive_tile_zero_ops_with_barrier() { 115 // CHECK-NOT: arm_sme.intr.zero 116 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 51 : i32}> : () -> () 117 // CHECK-NOT: arm_sme.intr.zero 118 %zero_za0s = arm_sme.zero : vector<[4]x[4]xi32> 119 %zero_za1s = arm_sme.zero : vector<[4]x[4]xi32> 120 "test.prevent_zero_merge"() : () -> () 121 // CHECK: "arm_sme.intr.zero"() <{tile_mask = 204 : i32}> : () -> () 122 // CHECK-NOT: arm_sme.intr.zero 123 %zero_za2s = arm_sme.zero : vector<[4]x[4]xi32> 124 %zero_za3s = arm_sme.zero : vector<[4]x[4]xf32> 125 "test.some_use"(%zero_za0s) : (vector<[4]x[4]xi32>) -> () 126 "test.some_use"(%zero_za1s) : (vector<[4]x[4]xi32>) -> () 127 "test.some_use"(%zero_za2s) : (vector<[4]x[4]xi32>) -> () 128 "test.some_use"(%zero_za3s) : (vector<[4]x[4]xf32>) -> () 129 return 130} 131