1// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification -cse -split-input-file | \ 2// RUN: FileCheck %s --check-prefix=CHECK-SCALAR 3// RUN: mlir-opt %s --sparse-reinterpret-map --sparse-reinterpret-map -sparsification -cse -sparse-vectorization="vl=16" -cse -split-input-file | \ 4// RUN: FileCheck %s --check-prefix=CHECK-VEC16 5// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification -cse -sparse-vectorization="vl=16 enable-simd-index32=true" -cse -split-input-file | \ 6// RUN: FileCheck %s --check-prefix=CHECK-VEC16-IDX32 7// RUN: mlir-opt %s --sparse-reinterpret-map -sparsification -cse -sparse-vectorization="vl=4 enable-vla-vectorization=true" -cse -split-input-file | \ 8// RUN: FileCheck %s --check-prefix=CHECK-VEC4-SVE 9 10#DenseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : dense) }> 11 12#trait_scale_d = { 13 indexing_maps = [ 14 affine_map<(i) -> (i)>, // a 15 affine_map<(i) -> (i)> // x (out) 16 ], 17 iterator_types = ["parallel"], 18 doc = "x(i) = a(i) * b" 19} 20 21// 22// CHECK-SCALAR-LABEL: func @scale_d 23// CHECK-SCALAR-DAG: %[[c0:.*]] = arith.constant 0 : index 24// CHECK-SCALAR-DAG: %[[c1:.*]] = arith.constant 1 : index 25// CHECK-SCALAR-DAG: %[[c1024:.*]] = arith.constant 1024 : index 26// CHECK-SCALAR: scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c1]] { 27// CHECK-SCALAR: %[[l:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xf32> 28// CHECK-SCALAR: %[[m:.*]] = arith.mulf %[[l]], %{{.*}} : f32 29// CHECK-SCALAR: store %[[m]], %{{.*}}[%[[i]]] : memref<1024xf32> 30// CHECK-SCALAR: } 31// CHECK-SCALAR: return 32// 33// CHECK-VEC16-LABEL: func @scale_d 34// CHECK-VEC16-DAG: %[[c0:.*]] = arith.constant 0 : index 35// CHECK-VEC16-DAG: %[[c16:.*]] = arith.constant 16 : index 36// CHECK-VEC16-DAG: %[[c1024:.*]] = arith.constant 1024 : index 37// CHECK-VEC16: scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] { 38// CHECK-VEC16: %[[r:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32> 39// CHECK-VEC16: %[[b:.*]] = vector.broadcast %{{.*}} : f32 to vector<16xf32> 40// CHECK-VEC16: %[[m:.*]] = arith.mulf %[[r]], %[[b]] : vector<16xf32> 41// CHECK-VEC16: vector.store %[[m]], %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32> 42// CHECK-VEC16: } 43// CHECK-VEC16: return 44// 45// CHECK-VEC16-IDX32-LABEL: func @scale_d 46// CHECK-VEC16-IDX32-DAG: %[[c0:.*]] = arith.constant 0 : index 47// CHECK-VEC16-IDX32-DAG: %[[c16:.*]] = arith.constant 16 : index 48// CHECK-VEC16-IDX32-DAG: %[[c1024:.*]] = arith.constant 1024 : index 49// CHECK-VEC16-IDX32: scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] { 50// CHECK-VEC16-IDX32: %[[r:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32> 51// CHECK-VEC16-IDX32: %[[b:.*]] = vector.broadcast %{{.*}} : f32 to vector<16xf32> 52// CHECK-VEC16-IDX32: %[[m:.*]] = arith.mulf %[[r]], %[[b]] : vector<16xf32> 53// CHECK-VEC16-IDX32: vector.store %[[m]], %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32> 54// CHECK-VEC16-IDX32: } 55// CHECK-VEC16-IDX32: return 56// 57// CHECK-VEC4-SVE: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1) 58// CHECK-VEC4-SVE-LABEL: func @scale_d 59// CHECK-VEC4-SVE-DAG: %[[c0:.*]] = arith.constant 0 : index 60// CHECK-VEC4-SVE-DAG: %[[c4:.*]] = arith.constant 4 : index 61// CHECK-VEC4-SVE-DAG: %[[c1024:.*]] = arith.constant 1024 : index 62// CHECK-VEC4-SVE-DAG: %[[v0:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32> 63// CHECK-VEC4-SVE-DAG: %[[vscale:.*]] = vector.vscale 64// CHECK-VEC4-SVE: %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index 65// CHECK-VEC4-SVE: scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[step]] { 66// CHECK-VEC4-SVE: %[[sub:.*]] = affine.min #[[$map]](%[[c1024]], %[[i]])[%[[step]]] 67// CHECK-VEC4-SVE: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1> 68// CHECK-VEC4-SVE: %[[val:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[v0]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 69// CHECK-VEC4-SVE: %[[scalev:.*]] = vector.broadcast %{{.*}} : f32 to vector<[4]xf32> 70// CHECK-VEC4-SVE: %[[scaled:.*]] = arith.mulf %[[val]], %[[scalev]] : vector<[4]xf32> 71// CHECK-VEC4-SVE: vector.maskedstore %{{.*}}[%[[i]]], %[[mask]], %[[scaled]] : memref<1024xf32>, vector<[4]xi1>, vector<[4]xf32> 72// CHECK-VEC4-SVE: } 73// CHECK-VEC4-SVE: return 74// 75func.func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> { 76 %0 = linalg.generic #trait_scale_d 77 ins(%arga: tensor<1024xf32, #DenseVector>) 78 outs(%argx: tensor<1024xf32>) { 79 ^bb(%a: f32, %x: f32): 80 %0 = arith.mulf %a, %b : f32 81 linalg.yield %0 : f32 82 } -> tensor<1024xf32> 83 return %0 : tensor<1024xf32> 84} 85 86// ----- 87 88#SparseVector = #sparse_tensor.encoding<{ 89 map = (d0) -> (d0 : compressed), 90 posWidth = 32, 91 crdWidth = 32 92}> 93 94#trait_mul_s = { 95 indexing_maps = [ 96 affine_map<(i) -> (i)>, // a 97 affine_map<(i) -> (i)>, // b 98 affine_map<(i) -> (i)> // x (out) 99 ], 100 iterator_types = ["parallel"], 101 doc = "x(i) = a(i) * b(i)" 102} 103 104// 105// CHECK-SCALAR-LABEL: func @mul_s 106// CHECK-SCALAR-DAG: %[[c0:.*]] = arith.constant 0 : index 107// CHECK-SCALAR-DAG: %[[c1:.*]] = arith.constant 1 : index 108// CHECK-SCALAR: %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32> 109// CHECK-SCALAR: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 110// CHECK-SCALAR: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 111// CHECK-SCALAR: %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32> 112// CHECK-SCALAR: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 113// CHECK-SCALAR: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 114// CHECK-SCALAR: scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c1]] { 115// CHECK-SCALAR: %[[li:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32> 116// CHECK-SCALAR: %[[zi:.*]] = arith.extui %[[li]] : i32 to i64 117// CHECK-SCALAR: %[[ci:.*]] = arith.index_cast %[[zi]] : i64 to index 118// CHECK-SCALAR: %[[la:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xf32> 119// CHECK-SCALAR: %[[lb:.*]] = memref.load %{{.*}}[%[[ci]]] : memref<1024xf32> 120// CHECK-SCALAR: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : f32 121// CHECK-SCALAR: store %[[m]], %{{.*}}[%[[ci]]] : memref<1024xf32> 122// CHECK-SCALAR: } 123// CHECK-SCALAR: return 124// 125// CHECK-VEC16: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 126// CHECK-VEC16-LABEL: func @mul_s 127// CHECK-VEC16-DAG: %[[c0:.*]] = arith.constant 0 : index 128// CHECK-VEC16-DAG: %[[c1:.*]] = arith.constant 1 : index 129// CHECK-VEC16-DAG: %[[c16:.*]] = arith.constant 16 : index 130// CHECK-VEC16: %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32> 131// CHECK-VEC16: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 132// CHECK-VEC16: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 133// CHECK-VEC16: %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32> 134// CHECK-VEC16: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 135// CHECK-VEC16: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 136// CHECK-VEC16: scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c16]] { 137// CHECK-VEC16: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[i]])[%[[c16]]] 138// CHECK-VEC16: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 139// CHECK-VEC16: %[[li:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32> 140// CHECK-VEC16: %[[zi:.*]] = arith.extui %[[li]] : vector<16xi32> to vector<16xi64> 141// CHECK-VEC16: %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 142// CHECK-VEC16: %[[lb:.*]] = vector.gather %{{.*}}[%[[c0]]] [%[[zi]]], %[[mask]], %{{.*}} : memref<1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32> into vector<16xf32> 143// CHECK-VEC16: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 144// CHECK-VEC16: vector.scatter %{{.*}}[%[[c0]]] [%[[zi]]], %[[mask]], %[[m]] : memref<1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32> 145// CHECK-VEC16: } 146// CHECK-VEC16: return 147// 148// CHECK-VEC16-IDX32: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 149// CHECK-VEC16-IDX32-LABEL: func @mul_s 150// CHECK-VEC16-IDX32-DAG: %[[c0:.*]] = arith.constant 0 : index 151// CHECK-VEC16-IDX32-DAG: %[[c1:.*]] = arith.constant 1 : index 152// CHECK-VEC16-IDX32-DAG: %[[c16:.*]] = arith.constant 16 : index 153// CHECK-VEC16-IDX32: %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32> 154// CHECK-VEC16-IDX32: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 155// CHECK-VEC16-IDX32: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 156// CHECK-VEC16-IDX32: %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32> 157// CHECK-VEC16-IDX32: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 158// CHECK-VEC16-IDX32: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 159// CHECK-VEC16-IDX32: scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[c16]] { 160// CHECK-VEC16-IDX32: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[i]])[%[[c16]]] 161// CHECK-VEC16-IDX32: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 162// CHECK-VEC16-IDX32: %[[li:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32> 163// CHECK-VEC16-IDX32: %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 164// CHECK-VEC16-IDX32: %[[lb:.*]] = vector.gather %{{.*}}[%[[c0]]] [%[[li]]], %[[mask]], %{{.*}} : memref<1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 165// CHECK-VEC16-IDX32: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 166// CHECK-VEC16-IDX32: vector.scatter %{{.*}}[%[[c0]]] [%[[li]]], %[[mask]], %[[m]] : memref<1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 167// CHECK-VEC16-IDX32: } 168// CHECK-VEC16-IDX32: return 169// 170// CHECK-VEC4-SVE: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1) 171// CHECK-VEC4-SVE-LABEL: func @mul_s 172// CHECK-VEC4-SVE-DAG: %[[c0:.*]] = arith.constant 0 : index 173// CHECK-VEC4-SVE-DAG: %[[c1:.*]] = arith.constant 1 : index 174// CHECK-VEC4-SVE-DAG: %[[c4:.*]] = arith.constant 4 : index 175// CHECK-VEC4-SVE-DAG: %[[v0i:.*]] = arith.constant dense<0> : vector<[4]xi32> 176// CHECK-VEC4-SVE-DAG: %[[v0f:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32> 177// CHECK-VEC4-SVE: %[[p:.*]] = memref.load %{{.*}}[%[[c0]]] : memref<?xi32> 178// CHECK-VEC4-SVE: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 179// CHECK-VEC4-SVE: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 180// CHECK-VEC4-SVE: %[[r:.*]] = memref.load %{{.*}}[%[[c1]]] : memref<?xi32> 181// CHECK-VEC4-SVE: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 182// CHECK-VEC4-SVE: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 183// CHECK-VEC4-SVE: %[[vscale:.*]] = vector.vscale 184// CHECK-VEC4-SVE: %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index 185// CHECK-VEC4-SVE: scf.for %[[i:.*]] = %[[q]] to %[[s]] step %[[step]] { 186// CHECK-VEC4-SVE: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[i]])[%[[step]]] 187// CHECK-VEC4-SVE: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1> 188// CHECK-VEC4-SVE: %[[li:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[v0i]] : memref<?xi32>, vector<[4]xi1>, vector<[4]xi32> into vector<[4]xi32> 189// CHECK-VEC4-SVE: %[[lii64:.*]] = arith.extui %[[li]] : vector<[4]xi32> to vector<[4]xi64> 190// CHECK-VEC4-SVE: %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[v0f]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 191// CHECK-VEC4-SVE: %[[lb:.*]] = vector.gather %{{.*}}[%[[c0]]] [%[[lii64]]], %[[mask]], %[[v0f]] : memref<1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 192// CHECK-VEC4-SVE: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<[4]xf32> 193// CHECK-VEC4-SVE: vector.scatter %{{.*}}[%[[c0]]] [%[[lii64]]], %[[mask]], %[[m]] : memref<1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32> 194// CHECK-VEC4-SVE: } 195// CHECK-VEC4-SVE: return 196// 197func.func @mul_s(%arga: tensor<1024xf32, #SparseVector>, 198 %argb: tensor<1024xf32>, 199 %argx: tensor<1024xf32>) -> tensor<1024xf32> { 200 %0 = linalg.generic #trait_mul_s 201 ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>) 202 outs(%argx: tensor<1024xf32>) { 203 ^bb(%a: f32, %b: f32, %x: f32): 204 %0 = arith.mulf %a, %b : f32 205 linalg.yield %0 : f32 206 } -> tensor<1024xf32> 207 return %0 : tensor<1024xf32> 208} 209 210// ----- 211 212#DenseVector = #sparse_tensor.encoding<{ map = (d0) -> (d0 : dense) }> 213 214#trait_reduction_d = { 215 indexing_maps = [ 216 affine_map<(i) -> (i)>, // a 217 affine_map<(i) -> (i)>, // b 218 affine_map<(i) -> ()> // x (out) 219 ], 220 iterator_types = ["reduction"], 221 doc = "x += a(i) * b(i)" 222} 223 224// 225// CHECK-SCALAR-LABEL: func @reduction_d 226// CHECK-SCALAR-DAG: %[[c0:.*]] = arith.constant 0 : index 227// CHECK-SCALAR-DAG: %[[c1:.*]] = arith.constant 1 : index 228// CHECK-SCALAR-DAG: %[[c1024:.*]] = arith.constant 1024 : index 229// CHECK-SCALAR: %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c1]] iter_args(%[[red_in:.*]] = %{{.*}}) -> (f32) { 230// CHECK-SCALAR: %[[la:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xf32> 231// CHECK-SCALAR: %[[lb:.*]] = memref.load %{{.*}}[%[[i]]] : memref<1024xf32> 232// CHECK-SCALAR: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : f32 233// CHECK-SCALAR: %[[a:.*]] = arith.addf %[[red_in]], %[[m]] : f32 234// CHECK-SCALAR: scf.yield %[[a]] : f32 235// CHECK-SCALAR: } 236// CHECK-SCALAR: return 237// 238// CHECK-VEC16-LABEL: func @reduction_d 239// CHECK-VEC16-DAG: %[[c0:.*]] = arith.constant 0 : index 240// CHECK-VEC16-DAG: %[[c16:.*]] = arith.constant 16 : index 241// CHECK-VEC16-DAG: %[[c1024:.*]] = arith.constant 1024 : index 242// CHECK-VEC16-DAG: %[[v0:.*]] = arith.constant dense<0.000000e+00> : vector<16xf32> 243// CHECK-VEC16: %[[l:.*]] = memref.load %{{.*}}[] : memref<f32> 244// CHECK-VEC16: %[[r:.*]] = vector.insertelement %[[l]], %[[v0]][%[[c0]] : index] : vector<16xf32> 245// CHECK-VEC16: %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[r]]) -> (vector<16xf32>) { 246// CHECK-VEC16: %[[la:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32> 247// CHECK-VEC16: %[[lb:.*]] = vector.load %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32> 248// CHECK-VEC16: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 249// CHECK-VEC16: %[[a:.*]] = arith.addf %[[red_in]], %[[m]] : vector<16xf32> 250// CHECK-VEC16: scf.yield %[[a]] : vector<16xf32> 251// CHECK-VEC16: } 252// CHECK-VEC16: %{{.*}} = vector.reduction <add>, %[[red]] : vector<16xf32> into f32 253// CHECK-VEC16: return 254// 255// CHECK-VEC16-IDX32-LABEL: func @reduction_d 256// CHECK-VEC16-IDX32-DAG: %[[c0:.*]] = arith.constant 0 : index 257// CHECK-VEC16-IDX32-DAG: %[[c16:.*]] = arith.constant 16 : index 258// CHECK-VEC16-IDX32-DAG: %[[c1024:.*]] = arith.constant 1024 : index 259// CHECK-VEC16-IDX32-DAG: %[[v0:.*]] = arith.constant dense<0.000000e+00> : vector<16xf32> 260// CHECK-VEC16-IDX32: %[[l:.*]] = memref.load %{{.*}}[] : memref<f32> 261// CHECK-VEC16-IDX32: %[[r:.*]] = vector.insertelement %[[l]], %[[v0]][%[[c0]] : index] : vector<16xf32> 262// CHECK-VEC16-IDX32: %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[c16]] iter_args(%[[red_in:.*]] = %[[r]]) -> (vector<16xf32>) { 263// CHECK-VEC16-IDX32: %[[la:.*]] = vector.load %{{.*}}[%[[i]]] : memref<?xf32>, vector<16xf32> 264// CHECK-VEC16-IDX32: %[[lb:.*]] = vector.load %{{.*}}[%[[i]]] : memref<1024xf32>, vector<16xf32> 265// CHECK-VEC16-IDX32: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 266// CHECK-VEC16-IDX32: %[[a:.*]] = arith.addf %[[red_in]], %[[m]] : vector<16xf32> 267// CHECK-VEC16-IDX32: scf.yield %[[a]] : vector<16xf32> 268// CHECK-VEC16-IDX32: } 269// CHECK-VEC16-IDX32: %{{.*}} = vector.reduction <add>, %[[red]] : vector<16xf32> into f32 270// CHECK-VEC16-IDX32: return 271// 272// CHECK-VEC4-SVE: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1) 273// CHECK-VEC4-SVE-LABEL: func @reduction_d 274// CHECK-VEC4-SVE-DAG: %[[c0:.*]] = arith.constant 0 : index 275// CHECK-VEC4-SVE-DAG: %[[c4:.*]] = arith.constant 4 : index 276// CHECK-VEC4-SVE-DAG: %[[c1024:.*]] = arith.constant 1024 : index 277// CHECK-VEC4-SVE-DAG: %[[v0:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32> 278// CHECK-VEC4-SVE: %[[l:.*]] = memref.load %{{.*}}[] : memref<f32> 279// CHECK-VEC4-SVE: %[[vscale:.*]] = vector.vscale 280// CHECK-VEC4-SVE: %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index 281// CHECK-VEC4-SVE: %[[r:.*]] = vector.insertelement %[[l]], %[[v0]][%[[c0]] : index] : vector<[4]xf32> 282// CHECK-VEC4-SVE: %[[red:.*]] = scf.for %[[i:.*]] = %[[c0]] to %[[c1024]] step %[[step]] iter_args(%[[red_in:.*]] = %[[r]]) -> (vector<[4]xf32>) { 283// CHECK-VEC4-SVE: %[[sub:.*]] = affine.min #[[$map]](%[[c1024]], %[[i]])[%[[step]]] 284// CHECK-VEC4-SVE: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1> 285// CHECK-VEC4-SVE: %[[la:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[v0]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 286// CHECK-VEC4-SVE: %[[lb:.*]] = vector.maskedload %{{.*}}[%[[i]]], %[[mask]], %[[v0]] : memref<1024xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 287// CHECK-VEC4-SVE: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<[4]xf32> 288// CHECK-VEC4-SVE: %[[a:.*]] = arith.addf %[[red_in]], %[[m]] : vector<[4]xf32> 289// CHECK-VEC4-SVE: %[[sa:.*]] = arith.select %[[mask]], %[[a]], %[[red_in]] : vector<[4]xi1>, vector<[4]xf32> 290// CHECK-VEC4-SVE: scf.yield %[[sa]] : vector<[4]xf32> 291// CHECK-VEC4-SVE: } 292// CHECK-VEC4-SVE: %{{.*}} = vector.reduction <add>, %[[red]] : vector<[4]xf32> into f32 293// CHECK-VEC4-SVE: return 294// 295func.func @reduction_d(%arga: tensor<1024xf32, #DenseVector>, 296 %argb: tensor<1024xf32>, 297 %argx: tensor<f32>) -> tensor<f32> { 298 %0 = linalg.generic #trait_reduction_d 299 ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32>) 300 outs(%argx: tensor<f32>) { 301 ^bb(%a: f32, %b: f32, %x: f32): 302 %0 = arith.mulf %a, %b : f32 303 %1 = arith.addf %x, %0 : f32 304 linalg.yield %1 : f32 305 } -> tensor<f32> 306 return %0 : tensor<f32> 307} 308 309// ----- 310 311#SparseMatrix = #sparse_tensor.encoding<{ 312 map = (d0, d1) -> (d0 : dense, d1 : compressed), 313 posWidth = 32, 314 crdWidth = 32 315}> 316 317#trait_mul_ds = { 318 indexing_maps = [ 319 affine_map<(i,j) -> (i,j)>, // A 320 affine_map<(i,j) -> (i,j)>, // B 321 affine_map<(i,j) -> (i,j)> // X (out) 322 ], 323 iterator_types = ["parallel", "parallel"], 324 doc = "X(i,j) = A(i,j) * B(i,j)" 325} 326 327// 328// CHECK-SCALAR-LABEL: func @mul_ds 329// CHECK-SCALAR-DAG: %[[c0:.*]] = arith.constant 0 : index 330// CHECK-SCALAR-DAG: %[[c1:.*]] = arith.constant 1 : index 331// CHECK-SCALAR-DAG: %[[c512:.*]] = arith.constant 512 : index 332// CHECK-SCALAR: scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] { 333// CHECK-SCALAR: %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32> 334// CHECK-SCALAR: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 335// CHECK-SCALAR: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 336// CHECK-SCALAR: %[[a:.*]] = arith.addi %[[i]], %[[c1]] : index 337// CHECK-SCALAR: %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32> 338// CHECK-SCALAR: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 339// CHECK-SCALAR: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 340// CHECK-SCALAR: scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c1]] { 341// CHECK-SCALAR: %[[lj:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xi32> 342// CHECK-SCALAR: %[[zj:.*]] = arith.extui %[[lj]] : i32 to i64 343// CHECK-SCALAR: %[[cj:.*]] = arith.index_cast %[[zj]] : i64 to index 344// CHECK-SCALAR: %[[la:.*]] = memref.load %{{.*}}[%[[j]]] : memref<?xf32> 345// CHECK-SCALAR: %[[lb:.*]] = memref.load %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32> 346// CHECK-SCALAR: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : f32 347// CHECK-SCALAR: store %[[m]], %{{.*}}[%[[i]], %[[cj]]] : memref<512x1024xf32> 348// CHECK-SCALAR: } 349// CHECK-SCALAR: } 350// CHECK-SCALAR: return 351// 352// CHECK-VEC16: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 353// CHECK-VEC16-LABEL: func @mul_ds 354// CHECK-VEC16-DAG: %[[c0:.*]] = arith.constant 0 : index 355// CHECK-VEC16-DAG: %[[c1:.*]] = arith.constant 1 : index 356// CHECK-VEC16-DAG: %[[c16:.*]] = arith.constant 16 : index 357// CHECK-VEC16-DAG: %[[c512:.*]] = arith.constant 512 : index 358// CHECK-VEC16: scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] { 359// CHECK-VEC16: %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32> 360// CHECK-VEC16: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 361// CHECK-VEC16: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 362// CHECK-VEC16: %[[a:.*]] = arith.addi %[[i]], %[[c1]] : index 363// CHECK-VEC16: %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32> 364// CHECK-VEC16: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 365// CHECK-VEC16: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 366// CHECK-VEC16: scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c16]] { 367// CHECK-VEC16: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[j]])[%[[c16]]] 368// CHECK-VEC16: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 369// CHECK-VEC16: %[[lj:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32> 370// CHECK-VEC16: %[[zj:.*]] = arith.extui %[[lj]] : vector<16xi32> to vector<16xi64> 371// CHECK-VEC16: %[[la:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 372// CHECK-VEC16: %[[lb:.*]] = vector.gather %{{.*}}[%[[i]], %[[c0]]] [%[[zj]]], %[[mask]], %{{.*}} : memref<512x1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32> into vector<16xf32> 373// CHECK-VEC16: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 374// CHECK-VEC16: vector.scatter %{{.*}}[%[[i]], %[[c0]]] [%[[zj]]], %[[mask]], %[[m]] : memref<512x1024xf32>, vector<16xi64>, vector<16xi1>, vector<16xf32> 375// CHECK-VEC16: } 376// CHECK-VEC16: } 377// CHECK-VEC16: return 378// 379// CHECK-VEC16-IDX32: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 380// CHECK-VEC16-IDX32-LABEL: func @mul_ds 381// CHECK-VEC16-IDX32-DAG: %[[c0:.*]] = arith.constant 0 : index 382// CHECK-VEC16-IDX32-DAG: %[[c1:.*]] = arith.constant 1 : index 383// CHECK-VEC16-IDX32-DAG: %[[c16:.*]] = arith.constant 16 : index 384// CHECK-VEC16-IDX32-DAG: %[[c512:.*]] = arith.constant 512 : index 385// CHECK-VEC16-IDX32: scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] { 386// CHECK-VEC16-IDX32: %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32> 387// CHECK-VEC16-IDX32: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 388// CHECK-VEC16-IDX32: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 389// CHECK-VEC16-IDX32: %[[a:.*]] = arith.addi %[[i]], %[[c1]] : index 390// CHECK-VEC16-IDX32: %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32> 391// CHECK-VEC16-IDX32: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 392// CHECK-VEC16-IDX32: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 393// CHECK-VEC16-IDX32: scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[c16]] { 394// CHECK-VEC16-IDX32: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[j]])[%[[c16]]] 395// CHECK-VEC16-IDX32: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 396// CHECK-VEC16-IDX32: %[[lj:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xi32>, vector<16xi1>, vector<16xi32> into vector<16xi32> 397// CHECK-VEC16-IDX32: %[[la:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %{{.*}} : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 398// CHECK-VEC16-IDX32: %[[lb:.*]] = vector.gather %{{.*}}[%[[i]], %[[c0]]] [%[[lj]]], %[[mask]], %{{.*}} : memref<512x1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 399// CHECK-VEC16-IDX32: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<16xf32> 400// CHECK-VEC16-IDX32: vector.scatter %{{.*}}[%[[i]], %[[c0]]] [%[[lj]]], %[[mask]], %[[m]] : memref<512x1024xf32>, vector<16xi32>, vector<16xi1>, vector<16xf32> 401// CHECK-VEC16-IDX32: } 402// CHECK-VEC16-IDX32: } 403// CHECK-VEC16-IDX32: return 404// 405// CHECK-VEC4-SVE: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1) 406// CHECK-VEC4-SVE-LABEL: func @mul_ds 407// CHECK-VEC4-SVE-DAG: %[[c0:.*]] = arith.constant 0 : index 408// CHECK-VEC4-SVE-DAG: %[[c1:.*]] = arith.constant 1 : index 409// CHECK-VEC4-SVE-DAG: %[[c4:.*]] = arith.constant 4 : index 410// CHECK-VEC4-SVE-DAG: %[[c512:.*]] = arith.constant 512 : index 411// CHECK-VEC4-SVE-DAG: %[[v0i:.*]] = arith.constant dense<0> : vector<[4]xi32> 412// CHECK-VEC4-SVE-DAG: %[[v0f:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf32> 413// CHECK-VEC4-SVE: scf.for %[[i:.*]] = %[[c0]] to %[[c512]] step %[[c1]] { 414// CHECK-VEC4-SVE: %[[p:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xi32> 415// CHECK-VEC4-SVE: %[[a:.*]] = arith.extui %[[p]] : i32 to i64 416// CHECK-VEC4-SVE: %[[q:.*]] = arith.index_cast %[[a]] : i64 to index 417// CHECK-VEC4-SVE: %[[a:.*]] = arith.addi %[[i]], %[[c1]] : index 418// CHECK-VEC4-SVE: %[[r:.*]] = memref.load %{{.*}}[%[[a]]] : memref<?xi32> 419// CHECK-VEC4-SVE: %[[b:.*]] = arith.extui %[[r]] : i32 to i64 420// CHECK-VEC4-SVE: %[[s:.*]] = arith.index_cast %[[b]] : i64 to index 421// CHECK-VEC4-SVE: %[[vscale:.*]] = vector.vscale 422// CHECK-VEC4-SVE: %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index 423// CHECK-VEC4-SVE: scf.for %[[j:.*]] = %[[q]] to %[[s]] step %[[step]] { 424// CHECK-VEC4-SVE: %[[sub:.*]] = affine.min #[[$map]](%[[s]], %[[j]])[%[[step]]] 425// CHECK-VEC4-SVE: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1> 426// CHECK-VEC4-SVE: %[[lji32:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %[[v0i]] : memref<?xi32>, vector<[4]xi1>, vector<[4]xi32> into vector<[4]xi32> 427// CHECK-VEC4-SVE: %[[lj:.*]] = arith.extui %[[lji32]] : vector<[4]xi32> to vector<[4]xi64> 428// CHECK-VEC4-SVE: %[[la:.*]] = vector.maskedload %{{.*}}[%[[j]]], %[[mask]], %[[v0f]] : memref<?xf32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 429// CHECK-VEC4-SVE: %[[lb:.*]] = vector.gather %{{.*}}[%[[i]], %[[c0]]] [%[[lj]]], %[[mask]], %[[v0f]] : memref<512x1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 430// CHECK-VEC4-SVE: %[[m:.*]] = arith.mulf %[[la]], %[[lb]] : vector<[4]xf32> 431// CHECK-VEC4-SVE: vector.scatter %{{.*}}[%[[i]], %[[c0]]] [%[[lj]]], %[[mask]], %[[m]] : memref<512x1024xf32>, vector<[4]xi64>, vector<[4]xi1>, vector<[4]xf32> 432// CHECK-VEC4-SVE: } 433// CHECK-VEC4-SVE: } 434// CHECK-VEC4-SVE: return 435// 436func.func @mul_ds(%arga: tensor<512x1024xf32, #SparseMatrix>, 437 %argb: tensor<512x1024xf32>, 438 %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> { 439 %0 = linalg.generic #trait_mul_ds 440 ins(%arga, %argb: tensor<512x1024xf32, #SparseMatrix>, tensor<512x1024xf32>) 441 outs(%argx: tensor<512x1024xf32>) { 442 ^bb(%a: f32, %b: f32, %x: f32): 443 %0 = arith.mulf %a, %b : f32 444 linalg.yield %0 : f32 445 } -> tensor<512x1024xf32> 446 return %0 : tensor<512x1024xf32> 447} 448 449// ----- 450 451#SparseMatrix = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : dense, d1 : compressed)}> 452 453#trait_affine = { 454 indexing_maps = [ 455 affine_map<(i,j) -> (i,j)>, 456 affine_map<(i,j) -> (i+1,j)> 457 ], 458 iterator_types = ["parallel","parallel"], 459 doc = "X(i+1,j) += A(i,j)" 460} 461 462// 463// CHECK-SCALAR-LABEL: func @add_dense 464// CHECK-SCALAR-DAG: %[[c0:.*]] = arith.constant 0 : index 465// CHECK-SCALAR-DAG: %[[c1:.*]] = arith.constant 1 : index 466// CHECK-SCALAR-DAG: %[[c32:.*]] = arith.constant 32 : index 467// CHECK-SCALAR: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] { 468// CHECK-SCALAR: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex> 469// CHECK-SCALAR: %[[i1:.*]] = arith.addi %[[i]], %[[c1]] : index 470// CHECK-SCALAR: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex> 471// CHECK-SCALAR: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c1]] { 472// CHECK-SCALAR: %[[j:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xindex> 473// CHECK-SCALAR: %[[x:.*]] = memref.load %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64> 474// CHECK-SCALAR: %[[a:.*]] = memref.load %{{.*}}[%[[jj]]] : memref<?xf64> 475// CHECK-SCALAR: %[[s:.*]] = arith.addf %[[x]], %[[a]] : f64 476// CHECK-SCALAR: memref.store %[[s]], %{{.*}}[%[[i1]], %[[j]]] : memref<33x64xf64> 477// CHECK-SCALAR: } 478// CHECK-SCALAR: } 479// CHECK-SCALAR: return 480// 481// CHECK-VEC16: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 482// CHECK-VEC16-LABEL: func @add_dense 483// CHECK-VEC16-DAG: %[[c0:.*]] = arith.constant 0 : index 484// CHECK-VEC16-DAG: %[[c1:.*]] = arith.constant 1 : index 485// CHECK-VEC16-DAG: %[[c16:.*]] = arith.constant 16 : index 486// CHECK-VEC16-DAG: %[[c32:.*]] = arith.constant 32 : index 487// CHECK-VEC16: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] { 488// CHECK-VEC16: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex> 489// CHECK-VEC16: %[[i1:.*]] = arith.addi %[[i]], %[[c1]] : index 490// CHECK-VEC16: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex> 491// CHECK-VEC16: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c16]] { 492// CHECK-VEC16: %[[sub:.*]] = affine.min #[[$map]](%[[hi]], %[[jj]])[%[[c16]]] 493// CHECK-VEC16: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 494// CHECK-VEC16: %[[j:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xindex> 495// CHECK-VEC16: %[[x:.*]] = vector.gather %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %{{.*}} : memref<33x64xf64> 496// CHECK-VEC16: %[[a:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xf64> 497// CHECK-VEC16: %[[s:.*]] = arith.addf %[[x]], %[[a]] : vector<16xf64> 498// CHECK-VEC16: vector.scatter %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %[[s]] : memref<33x64xf64> 499// CHECK-VEC16: } 500// CHECK-VEC16: } 501// CHECK-VEC16: return 502// 503// CHECK-VEC16-IDX32: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (16, d0 - d1) 504// CHECK-VEC16-IDX32-LABEL: func @add_dense 505// CHECK-VEC16-IDX32-DAG: %[[c0:.*]] = arith.constant 0 : index 506// CHECK-VEC16-IDX32-DAG: %[[c1:.*]] = arith.constant 1 : index 507// CHECK-VEC16-IDX32-DAG: %[[c16:.*]] = arith.constant 16 : index 508// CHECK-VEC16-IDX32-DAG: %[[c32:.*]] = arith.constant 32 : index 509// CHECK-VEC16-IDX32: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] { 510// CHECK-VEC16-IDX32: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex> 511// CHECK-VEC16-IDX32: %[[i1:.*]] = arith.addi %[[i]], %[[c1]] : index 512// CHECK-VEC16-IDX32: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex> 513// CHECK-VEC16-IDX32: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[c16]] { 514// CHECK-VEC16-IDX32: %[[sub:.*]] = affine.min #[[$map]](%[[hi]], %[[jj]])[%[[c16]]] 515// CHECK-VEC16-IDX32: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<16xi1> 516// CHECK-VEC16-IDX32: %[[j:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xindex> 517// CHECK-VEC16-IDX32: %[[x:.*]] = vector.gather %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %{{.*}} : memref<33x64xf64> 518// CHECK-VEC16-IDX32: %[[a:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %{{.*}} : memref<?xf64> 519// CHECK-VEC16-IDX32: %[[s:.*]] = arith.addf %[[x]], %[[a]] : vector<16xf64> 520// CHECK-VEC16-IDX32: vector.scatter %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %[[s]] : memref<33x64xf64> 521// CHECK-VEC16-IDX32: } 522// CHECK-VEC16-IDX32: } 523// CHECK-VEC16-IDX32: return 524// 525// CHECK-VEC4-SVE: #[[$map:.*]] = affine_map<(d0, d1)[s0] -> (s0, d0 - d1) 526// CHECK-VEC4-SVE-LABEL: func @add_dense 527// CHECK-VEC4-SVE-DAG: %[[c0:.*]] = arith.constant 0 : index 528// CHECK-VEC4-SVE-DAG: %[[c1:.*]] = arith.constant 1 : index 529// CHECK-VEC4-SVE-DAG: %[[c4:.*]] = arith.constant 4 : index 530// CHECK-VEC4-SVE-DAG: %[[c32:.*]] = arith.constant 32 : index 531// CHECK-VEC4-SVE-DAG: %[[v0idx:.*]] = arith.constant dense<0> : vector<[4]xindex> 532// CHECK-VEC4-SVE-DAG: %[[v0f64:.*]] = arith.constant dense<0.000000e+00> : vector<[4]xf64> 533// CHECK-VEC4-SVE: scf.for %[[i:.*]] = %[[c0]] to %[[c32]] step %[[c1]] { 534// CHECK-VEC4-SVE: %[[lo:.*]] = memref.load %{{.*}}[%[[i]]] : memref<?xindex> 535// CHECK-VEC4-SVE: %[[i1:.*]] = arith.addi %[[i]], %[[c1]] : index 536// CHECK-VEC4-SVE: %[[hi:.*]] = memref.load %{{.*}}[%[[i1]]] : memref<?xindex> 537// CHECK-VEC4-SVE: %[[vscale:.*]] = vector.vscale 538// CHECK-VEC4-SVE: %[[step:.*]] = arith.muli %[[vscale]], %[[c4]] : index 539// CHECK-VEC4-SVE: scf.for %[[jj:.*]] = %[[lo]] to %[[hi]] step %[[step]] { 540// CHECK-VEC4-SVE: %[[sub:.*]] = affine.min #[[$map]](%[[hi]], %[[jj]])[%[[step]]] 541// CHECK-VEC4-SVE: %[[mask:.*]] = vector.create_mask %[[sub]] : vector<[4]xi1> 542// CHECK-VEC4-SVE: %[[j:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %[[v0idx]] : memref<?xindex> 543// CHECK-VEC4-SVE: %[[x:.*]] = vector.gather %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %[[v0f64]] : memref<33x64xf64> 544// CHECK-VEC4-SVE: %[[a:.*]] = vector.maskedload %{{.*}}[%[[jj]]], %[[mask]], %[[v0f64]] : memref<?xf64> 545// CHECK-VEC4-SVE: %[[s:.*]] = arith.addf %[[x]], %[[a]] : vector<[4]xf64> 546// CHECK-VEC4-SVE: vector.scatter %{{.*}}[%[[i1]], %[[c0]]] [%[[j]]], %[[mask]], %[[s]] : memref<33x64xf64> 547// CHECK-VEC4-SVE: } 548// CHECK-VEC4-SVE: } 549// CHECK-VEC4-SVE: return 550// 551func.func @add_dense(%arga: tensor<32x64xf64, #SparseMatrix>, 552 %argx: tensor<33x64xf64>) -> tensor<33x64xf64> { 553 %0 = linalg.generic #trait_affine 554 ins(%arga: tensor<32x64xf64, #SparseMatrix>) 555 outs(%argx: tensor<33x64xf64>) { 556 ^bb(%a: f64, %x: f64): 557 %0 = arith.addf %x, %a : f64 558 linalg.yield %0 : f64 559 } -> tensor<33x64xf64> 560 return %0 : tensor<33x64xf64> 561} 562