1// RUN: mlir-opt %s -convert-vector-to-llvm -split-input-file | FileCheck %s 2 3//===----------------------------------------------------------------------===// 4// vector.bitcast 5//===----------------------------------------------------------------------===// 6 7func.func @bitcast_f32_to_i32_vector_0d(%arg0: vector<f32>) -> vector<i32> { 8 %0 = vector.bitcast %arg0 : vector<f32> to vector<i32> 9 return %0 : vector<i32> 10} 11 12// CHECK-LABEL: @bitcast_f32_to_i32_vector_0d 13// CHECK-SAME: %[[ARG_0:.*]]: vector<f32> 14// CHECK: %[[VEC_F32_1D:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<f32> to vector<1xf32> 15// CHECK: %[[VEC_I32_1D:.*]] = llvm.bitcast %[[VEC_F32_1D]] : vector<1xf32> to vector<1xi32> 16// CHECK: %[[VEC_I32_0D:.*]] = builtin.unrealized_conversion_cast %[[VEC_I32_1D]] : vector<1xi32> to vector<i32> 17// CHECK: return %[[VEC_I32_0D]] : vector<i32> 18 19// ----- 20 21func.func @bitcast_f32_to_i32_vector(%arg0: vector<16xf32>) -> vector<16xi32> { 22 %0 = vector.bitcast %arg0 : vector<16xf32> to vector<16xi32> 23 return %0 : vector<16xi32> 24} 25 26 27// CHECK-LABEL: @bitcast_f32_to_i32_vector 28// CHECK-SAME: %[[ARG_0:.*]]: vector<16xf32> 29// CHECK: llvm.bitcast %[[ARG_0]] : vector<16xf32> to vector<16xi32> 30 31// ----- 32 33func.func @bitcast_f32_to_i32_vector_scalable(%arg0: vector<[16]xf32>) -> vector<[16]xi32> { 34 %0 = vector.bitcast %arg0 : vector<[16]xf32> to vector<[16]xi32> 35 return %0 : vector<[16]xi32> 36} 37 38// CHECK-LABEL: @bitcast_f32_to_i32_vector_scalable 39// CHECK-SAME: %[[ARG_0:.*]]: vector<[16]xf32> 40// CHECK: llvm.bitcast %[[ARG_0]] : vector<[16]xf32> to vector<[16]xi32> 41 42// ----- 43 44func.func @bitcast_i8_to_f32_vector(%arg0: vector<64xi8>) -> vector<16xf32> { 45 %0 = vector.bitcast %arg0 : vector<64xi8> to vector<16xf32> 46 return %0 : vector<16xf32> 47} 48 49// CHECK-LABEL: @bitcast_i8_to_f32_vector 50// CHECK-SAME: %[[ARG_0:.*]]: vector<64xi8> 51// CHECK: llvm.bitcast %[[ARG_0]] : vector<64xi8> to vector<16xf32> 52 53// ----- 54 55func.func @bitcast_i8_to_f32_vector_scalable(%arg0: vector<[64]xi8>) -> vector<[16]xf32> { 56 %0 = vector.bitcast %arg0 : vector<[64]xi8> to vector<[16]xf32> 57 return %0 : vector<[16]xf32> 58} 59 60// CHECK-LABEL: @bitcast_i8_to_f32_vector_scalable 61// CHECK-SAME: %[[ARG_0:.*]]: vector<[64]xi8> 62// CHECK: llvm.bitcast %[[ARG_0]] : vector<[64]xi8> to vector<[16]xf32> 63 64// ----- 65 66func.func @bitcast_index_to_i8_vector(%arg0: vector<16xindex>) -> vector<128xi8> { 67 %0 = vector.bitcast %arg0 : vector<16xindex> to vector<128xi8> 68 return %0 : vector<128xi8> 69} 70 71// CHECK-LABEL: @bitcast_index_to_i8_vector 72// CHECK-SAME: %[[ARG_0:.*]]: vector<16xindex> 73// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<16xindex> to vector<16xi64> 74// CHECK: llvm.bitcast %[[T0]] : vector<16xi64> to vector<128xi8> 75 76// ----- 77 78func.func @bitcast_index_to_i8_vector_scalable(%arg0: vector<[16]xindex>) -> vector<[128]xi8> { 79 %0 = vector.bitcast %arg0 : vector<[16]xindex> to vector<[128]xi8> 80 return %0 : vector<[128]xi8> 81} 82 83// CHECK-LABEL: @bitcast_index_to_i8_vector_scalable 84// CHECK-SAME: %[[ARG_0:.*]]: vector<[16]xindex> 85// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<[16]xindex> to vector<[16]xi64> 86// CHECK: llvm.bitcast %[[T0]] : vector<[16]xi64> to vector<[128]xi8> 87 88// ----- 89 90// CHECK-LABEL: func.func @bitcast_2d( 91// CHECK-SAME: %[[ARG_0:.*]]: vector<2x4xi32>) -> vector<2x2xi64> { 92// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<2x4xi32> to !llvm.array<2 x vector<4xi32>> 93// CHECK: %[[VEC_1:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.array<2 x vector<4xi32>> 94// CHECK: %[[BCAST_1:.*]] = llvm.bitcast %[[VEC_1]] : vector<4xi32> to vector<2xi64> 95// CHECK: %[[OUT_1:.*]] = llvm.insertvalue %[[BCAST_1]], {{.*}}[0] : !llvm.array<2 x vector<2xi64>> 96// CHECK: %[[VEC_2:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.array<2 x vector<4xi32>> 97// CHECK: %[[BCAST_2:.*]] = llvm.bitcast %[[VEC_2]] : vector<4xi32> to vector<2xi64> 98// CHECK: %[[OUT_2:.*]] = llvm.insertvalue %[[BCAST_2]], %[[OUT_1]][1] : !llvm.array<2 x vector<2xi64>> 99func.func @bitcast_2d(%arg0: vector<2x4xi32>) -> vector<2x2xi64> { 100 %0 = vector.bitcast %arg0 : vector<2x4xi32> to vector<2x2xi64> 101 return %0 : vector<2x2xi64> 102} 103 104// ----- 105 106// CHECK-LABEL: func.func @bitcast_2d_scalable( 107// CHECK-SAME: %[[ARG_0:.*]]: vector<2x[4]xi32>) -> vector<2x[2]xi64> { 108// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[ARG_0]] : vector<2x[4]xi32> to !llvm.array<2 x vector<[4]xi32>> 109// CHECK: %[[VEC_1:.*]] = llvm.extractvalue %[[T0]][0] : !llvm.array<2 x vector<[4]xi32>> 110// CHECK: %[[BCAST_1:.*]] = llvm.bitcast %[[VEC_1]] : vector<[4]xi32> to vector<[2]xi64> 111// CHECK: %[[OUT_1:.*]] = llvm.insertvalue %[[BCAST_1]], {{.*}}[0] : !llvm.array<2 x vector<[2]xi64>> 112// CHECK: %[[VEC_2:.*]] = llvm.extractvalue %[[T0]][1] : !llvm.array<2 x vector<[4]xi32>> 113// CHECK: %[[BCAST_2:.*]] = llvm.bitcast %[[VEC_2]] : vector<[4]xi32> to vector<[2]xi64> 114// CHECK: %[[OUT_2:.*]] = llvm.insertvalue %[[BCAST_2]], %[[OUT_1]][1] : !llvm.array<2 x vector<[2]xi64>> 115func.func @bitcast_2d_scalable(%arg0: vector<2x[4]xi32>) -> vector<2x[2]xi64> { 116 %0 = vector.bitcast %arg0 : vector<2x[4]xi32> to vector<2x[2]xi64> 117 return %0 : vector<2x[2]xi64> 118} 119 120// ----- 121 122//===----------------------------------------------------------------------===// 123// vector.broadcast 124//===----------------------------------------------------------------------===// 125 126func.func @broadcast_vec0d_from_f32(%arg0: f32) -> vector<f32> { 127 %0 = vector.broadcast %arg0 : f32 to vector<f32> 128 return %0 : vector<f32> 129} 130// CHECK-LABEL: @broadcast_vec0d_from_f32 131// CHECK-SAME: %[[A:.*]]: f32) 132// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 133// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<1xf32> to vector<f32> 134// CHECK: return %[[T1]] : vector<f32> 135 136// ----- 137 138func.func @broadcast_vec0d_from_vec0d(%arg0: vector<f32>) -> vector<f32> { 139 %0 = vector.broadcast %arg0 : vector<f32> to vector<f32> 140 return %0 : vector<f32> 141} 142// CHECK-LABEL: @broadcast_vec0d_from_vec0d( 143// CHECK-SAME: %[[A:.*]]: vector<f32>) 144// CHECK: return %[[A]] : vector<f32> 145 146// ----- 147 148func.func @broadcast_vec1d_from_f32(%arg0: f32) -> vector<2xf32> { 149 %0 = vector.broadcast %arg0 : f32 to vector<2xf32> 150 return %0 : vector<2xf32> 151} 152// CHECK-LABEL: @broadcast_vec1d_from_f32 153// CHECK-SAME: %[[A:.*]]: f32) 154// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 155// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 156// CHECK: return %[[T1]] : vector<2xf32> 157 158// ----- 159 160func.func @broadcast_vec1d_from_f32_scalable(%arg0: f32) -> vector<[2]xf32> { 161 %0 = vector.broadcast %arg0 : f32 to vector<[2]xf32> 162 return %0 : vector<[2]xf32> 163} 164// CHECK-LABEL: @broadcast_vec1d_from_f32_scalable 165// CHECK-SAME: %[[A:.*]]: f32) 166// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 167// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 168// CHECK: return %[[T1]] : vector<[2]xf32> 169 170// ----- 171 172func.func @broadcast_vec1d_from_index(%arg0: index) -> vector<2xindex> { 173 %0 = vector.broadcast %arg0 : index to vector<2xindex> 174 return %0 : vector<2xindex> 175} 176// CHECK-LABEL: @broadcast_vec1d_from_index 177// CHECK-SAME: %[[A:.*]]: index) 178// CHECK: %[[A1:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64 179// CHECK: %[[T0:.*]] = llvm.insertelement %[[A1]] 180// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 181// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<2xi64> to vector<2xindex> 182// CHECK: return %[[T2]] : vector<2xindex> 183 184// ----- 185 186func.func @broadcast_vec1d_from_index_scalable(%arg0: index) -> vector<[2]xindex> { 187 %0 = vector.broadcast %arg0 : index to vector<[2]xindex> 188 return %0 : vector<[2]xindex> 189} 190// CHECK-LABEL: @broadcast_vec1d_from_index_scalable 191// CHECK-SAME: %[[A:.*]]: index) 192// CHECK: %[[A1:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64 193// CHECK: %[[T0:.*]] = llvm.insertelement %[[A1]] 194// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 195// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<[2]xi64> to vector<[2]xindex> 196// CHECK: return %[[T2]] : vector<[2]xindex> 197 198// ----- 199 200func.func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> { 201 %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32> 202 return %0 : vector<2x3xf32> 203} 204// CHECK-LABEL: @broadcast_vec2d_from_scalar( 205// CHECK-SAME: %[[A:.*]]: f32) 206// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 207// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 208// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0] : !llvm.array<2 x vector<3xf32>> 209// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1] : !llvm.array<2 x vector<3xf32>> 210// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> 211// CHECK: return %[[T4]] : vector<2x3xf32> 212 213// ----- 214 215func.func @broadcast_vec2d_from_scalar_scalable(%arg0: f32) -> vector<2x[3]xf32> { 216 %0 = vector.broadcast %arg0 : f32 to vector<2x[3]xf32> 217 return %0 : vector<2x[3]xf32> 218} 219// CHECK-LABEL: @broadcast_vec2d_from_scalar_scalable( 220// CHECK-SAME: %[[A:.*]]: f32) 221// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 222// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 223// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>> 224// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>> 225// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32> 226// CHECK: return %[[T4]] : vector<2x[3]xf32> 227 228// ----- 229 230func.func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> { 231 %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32> 232 return %0 : vector<2x3x4xf32> 233} 234// CHECK-LABEL: @broadcast_vec3d_from_scalar( 235// CHECK-SAME: %[[A:.*]]: f32) 236// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 237// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 238// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0, 0] : !llvm.array<2 x array<3 x vector<4xf32>>> 239// ... 240// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1, 2] : !llvm.array<2 x array<3 x vector<4xf32>>> 241// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x array<3 x vector<4xf32>>> to vector<2x3x4xf32> 242// CHECK: return %[[T4]] : vector<2x3x4xf32> 243 244// ----- 245 246func.func @broadcast_vec3d_from_scalar_scalable(%arg0: f32) -> vector<2x3x[4]xf32> { 247 %0 = vector.broadcast %arg0 : f32 to vector<2x3x[4]xf32> 248 return %0 : vector<2x3x[4]xf32> 249} 250// CHECK-LABEL: @broadcast_vec3d_from_scalar_scalable( 251// CHECK-SAME: %[[A:.*]]: f32) 252// CHECK: %[[T0:.*]] = llvm.insertelement %[[A]] 253// CHECK: %[[T1:.*]] = llvm.shufflevector %[[T0]] 254// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[0, 0] : !llvm.array<2 x array<3 x vector<[4]xf32>>> 255// ... 256// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %{{.*}}[1, 2] : !llvm.array<2 x array<3 x vector<[4]xf32>>> 257// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : !llvm.array<2 x array<3 x vector<[4]xf32>>> to vector<2x3x[4]xf32> 258// CHECK: return %[[T4]] : vector<2x3x[4]xf32> 259 260// ----- 261 262func.func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> { 263 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32> 264 return %0 : vector<2xf32> 265} 266// CHECK-LABEL: @broadcast_vec1d_from_vec1d( 267// CHECK-SAME: %[[A:.*]]: vector<2xf32>) 268// CHECK: return %[[A]] : vector<2xf32> 269 270// ----- 271 272func.func @broadcast_vec1d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<[2]xf32> { 273 %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<[2]xf32> 274 return %0 : vector<[2]xf32> 275} 276// CHECK-LABEL: @broadcast_vec1d_from_vec1d_scalable( 277// CHECK-SAME: %[[A:.*]]: vector<[2]xf32>) 278// CHECK: return %[[A]] : vector<[2]xf32> 279 280// ----- 281 282func.func @broadcast_vec2d_from_vec0d(%arg0: vector<f32>) -> vector<3x2xf32> { 283 %0 = vector.broadcast %arg0 : vector<f32> to vector<3x2xf32> 284 return %0 : vector<3x2xf32> 285} 286// CHECK-LABEL: @broadcast_vec2d_from_vec0d( 287// CHECK-SAME: %[[A:.*]]: vector<f32>) 288// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32> 289// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32> 290// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> 291// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : index) : i64 292// CHECK: %[[T5:.*]] = llvm.extractelement %[[T0]][%[[T4]] : i64] : vector<1xf32> 293// CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] 294// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] 295// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>> 296// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][1] : !llvm.array<3 x vector<2xf32>> 297// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T6]], %[[T8]][2] : !llvm.array<3 x vector<2xf32>> 298// CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32> 299// CHECK: return %[[T10]] : vector<3x2xf32> 300 301// ----- 302 303func.func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> { 304 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32> 305 return %0 : vector<3x2xf32> 306} 307// CHECK-LABEL: @broadcast_vec2d_from_vec1d( 308// CHECK-SAME: %[[A:.*]]: vector<2xf32>) 309// CHECK: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32> 310// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> 311// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<2xf32>> 312// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<2xf32>> 313// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<2xf32>> 314// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<3 x vector<2xf32>> to vector<3x2xf32> 315// CHECK: return %[[T5]] : vector<3x2xf32> 316 317// ----- 318 319func.func @broadcast_vec2d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<3x[2]xf32> { 320 %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<3x[2]xf32> 321 return %0 : vector<3x[2]xf32> 322} 323// CHECK-LABEL: @broadcast_vec2d_from_vec1d_scalable( 324// CHECK-SAME: %[[A:.*]]: vector<[2]xf32>) 325// CHECK: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32> 326// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>> 327// CHECK: %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][0] : !llvm.array<3 x vector<[2]xf32>> 328// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm.array<3 x vector<[2]xf32>> 329// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm.array<3 x vector<[2]xf32>> 330// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<3 x vector<[2]xf32>> to vector<3x[2]xf32> 331// CHECK: return %[[T5]] : vector<3x[2]xf32> 332 333// ----- 334 335func.func @broadcast_vec2d_from_index_vec1d(%arg0: vector<2xindex>) -> vector<3x2xindex> { 336 %0 = vector.broadcast %arg0 : vector<2xindex> to vector<3x2xindex> 337 return %0 : vector<3x2xindex> 338} 339// CHECK-LABEL: @broadcast_vec2d_from_index_vec1d( 340// CHECK-SAME: %[[A:.*]]: vector<2xindex>) 341// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64> 342// CHECK: %[[T0:.*]] = arith.constant dense<0> : vector<3x2xindex> 343// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xindex> to !llvm.array<3 x vector<2xi64>> 344// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<3 x vector<2xi64>> 345 346// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %{{.*}} : !llvm.array<3 x vector<2xi64>> to vector<3x2xindex> 347// CHECK: return %[[T4]] : vector<3x2xindex> 348 349// ----- 350 351func.func @broadcast_vec2d_from_index_vec1d_scalable(%arg0: vector<[2]xindex>) -> vector<3x[2]xindex> { 352 %0 = vector.broadcast %arg0 : vector<[2]xindex> to vector<3x[2]xindex> 353 return %0 : vector<3x[2]xindex> 354} 355// CHECK-LABEL: @broadcast_vec2d_from_index_vec1d_scalable( 356// CHECK-SAME: %[[A:.*]]: vector<[2]xindex>) 357// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[2]xindex> to vector<[2]xi64> 358// CHECK: %[[T0:.*]] = arith.constant dense<0> : vector<3x[2]xindex> 359// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xindex> to !llvm.array<3 x vector<[2]xi64>> 360// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<3 x vector<[2]xi64>> 361 362// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %{{.*}} : !llvm.array<3 x vector<[2]xi64>> to vector<3x[2]xindex> 363// CHECK: return %[[T4]] : vector<3x[2]xindex> 364 365// ----- 366 367func.func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> { 368 %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32> 369 return %0 : vector<4x3x2xf32> 370} 371// CHECK-LABEL: @broadcast_vec3d_from_vec1d( 372// CHECK-SAME: %[[A:.*]]: vector<2xf32>) 373// CHECK-DAG: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32> 374// CHECK-DAG: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> 375// CHECK-DAG: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32> 376// CHECK-DAG: %[[T6:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> 377 378// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<2xf32>> 379// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<2xf32>> 380// CHECK: %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<2xf32>> 381 382// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> 383// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> 384// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> 385// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> 386 387// CHECK: %[[T11:.*]] = builtin.unrealized_conversion_cast %[[T10]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> 388// CHECK: return %[[T11]] : vector<4x3x2xf32> 389 390// ----- 391 392func.func @broadcast_vec3d_from_vec1d_scalable(%arg0: vector<[2]xf32>) -> vector<4x3x[2]xf32> { 393 %0 = vector.broadcast %arg0 : vector<[2]xf32> to vector<4x3x[2]xf32> 394 return %0 : vector<4x3x[2]xf32> 395} 396// CHECK-LABEL: @broadcast_vec3d_from_vec1d_scalable( 397// CHECK-SAME: %[[A:.*]]: vector<[2]xf32>) 398// CHECK-DAG: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32> 399// CHECK-DAG: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>> 400// CHECK-DAG: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32> 401// CHECK-DAG: %[[T6:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>> 402 403// CHECK: %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][0] : !llvm.array<3 x vector<[2]xf32>> 404// CHECK: %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][1] : !llvm.array<3 x vector<[2]xf32>> 405// CHECK: %[[T5:.*]] = llvm.insertvalue %[[A]], %[[T4]][2] : !llvm.array<3 x vector<[2]xf32>> 406 407// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T5]], %[[T6]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 408// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T5]], %[[T7]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 409// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T5]], %[[T8]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 410// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T5]], %[[T9]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 411 412// CHECK: %[[T11:.*]] = builtin.unrealized_conversion_cast %[[T10]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32> 413// CHECK: return %[[T11]] : vector<4x3x[2]xf32> 414 415// ----- 416 417func.func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> { 418 %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32> 419 return %0 : vector<4x3x2xf32> 420} 421// CHECK-LABEL: @broadcast_vec3d_from_vec2d( 422// CHECK-SAME: %[[A:.*]]: vector<3x2xf32>) 423// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> 424// CHECK: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32> 425// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> 426// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> 427// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T1]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> 428// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T1]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> 429// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T1]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> 430// CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> 431// CHECK: return %[[T10]] : vector<4x3x2xf32> 432 433// ----- 434 435func.func @broadcast_vec3d_from_vec2d_scalable(%arg0: vector<3x[2]xf32>) -> vector<4x3x[2]xf32> { 436 %0 = vector.broadcast %arg0 : vector<3x[2]xf32> to vector<4x3x[2]xf32> 437 return %0 : vector<4x3x[2]xf32> 438} 439// CHECK-LABEL: @broadcast_vec3d_from_vec2d_scalable( 440// CHECK-SAME: %[[A:.*]]: vector<3x[2]xf32>) 441// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>> 442// CHECK: %[[T0:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32> 443// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>> 444// CHECK: %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 445// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T1]], %[[T3]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 446// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T1]], %[[T5]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 447// CHECK: %[[T9:.*]] = llvm.insertvalue %[[T1]], %[[T7]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 448// CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T9]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32> 449// CHECK: return %[[T10]] : vector<4x3x[2]xf32> 450 451 452// ----- 453 454func.func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> { 455 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32> 456 return %0 : vector<4xf32> 457} 458// CHECK-LABEL: @broadcast_stretch( 459// CHECK-SAME: %[[A:.*]]: vector<1xf32>) 460// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 461// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32> 462// CHECK: %[[T3:.*]] = llvm.insertelement %[[T2]] 463// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]] 464// CHECK: return %[[T4]] : vector<4xf32> 465 466// ----- 467 468func.func @broadcast_stretch_scalable(%arg0: vector<1xf32>) -> vector<[4]xf32> { 469 %0 = vector.broadcast %arg0 : vector<1xf32> to vector<[4]xf32> 470 return %0 : vector<[4]xf32> 471} 472// CHECK-LABEL: @broadcast_stretch_scalable( 473// CHECK-SAME: %[[A:.*]]: vector<1xf32>) 474// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 475// CHECK: %[[T2:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T1]] : i64] : vector<1xf32> 476// CHECK: %[[T3:.*]] = llvm.insertelement %[[T2]] 477// CHECK: %[[T4:.*]] = llvm.shufflevector %[[T3]] 478// CHECK: return %[[T4]] : vector<[4]xf32> 479 480// ----- 481 482func.func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> { 483 %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32> 484 return %0 : vector<3x4xf32> 485} 486// CHECK-LABEL: @broadcast_stretch_at_start( 487// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>) 488// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>> 489// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x4xf32> 490// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x4xf32> to !llvm.array<3 x vector<4xf32>> 491// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<4xf32>> 492// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<4xf32>> 493// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<4xf32>> 494// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<4xf32>> 495// CHECK: %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> 496// CHECK: return %[[T8]] : vector<3x4xf32> 497 498// ----- 499 500func.func @broadcast_stretch_at_start_scalable(%arg0: vector<1x[4]xf32>) -> vector<3x[4]xf32> { 501 %0 = vector.broadcast %arg0 : vector<1x[4]xf32> to vector<3x[4]xf32> 502 return %0 : vector<3x[4]xf32> 503} 504// CHECK-LABEL: @broadcast_stretch_at_start_scalable( 505// CHECK-SAME: %[[A:.*]]: vector<1x[4]xf32>) 506// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x[4]xf32> to !llvm.array<1 x vector<[4]xf32>> 507// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<3x[4]xf32> 508// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<3x[4]xf32> to !llvm.array<3 x vector<[4]xf32>> 509// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<1 x vector<[4]xf32>> 510// CHECK: %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][0] : !llvm.array<3 x vector<[4]xf32>> 511// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][1] : !llvm.array<3 x vector<[4]xf32>> 512// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T3]], %[[T6]][2] : !llvm.array<3 x vector<[4]xf32>> 513// CHECK: %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<3 x vector<[4]xf32>> to vector<3x[4]xf32> 514// CHECK: return %[[T8]] : vector<3x[4]xf32> 515 516// ----- 517 518func.func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> { 519 %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32> 520 return %0 : vector<4x3xf32> 521} 522// CHECK-LABEL: @broadcast_stretch_at_end( 523// CHECK-SAME: %[[A:.*]]: vector<4x1xf32>) 524// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1xf32> to !llvm.array<4 x vector<1xf32>> 525// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3xf32> 526// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3xf32> to !llvm.array<4 x vector<3xf32>> 527// CHECK: %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm.array<4 x vector<1xf32>> 528// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64 529// CHECK: %[[T5:.*]] = llvm.extractelement %[[T3]]{{\[}}%[[T4]] : i64] : vector<1xf32> 530// CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] 531// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] 532// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<4 x vector<3xf32>> 533// CHECK: %[[T10:.*]] = llvm.extractvalue %[[T2]][1] : !llvm.array<4 x vector<1xf32>> 534// CHECK: %[[T11:.*]] = llvm.mlir.constant(0 : i64) : i64 535// CHECK: %[[T12:.*]] = llvm.extractelement %[[T10]]{{\[}}%[[T11]] : i64] : vector<1xf32> 536// CHECK: %[[T13Insert:.*]] = llvm.insertelement %[[T12]] 537// CHECK: %[[T13:.*]] = llvm.shufflevector %[[T13Insert]] 538// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T13]], %[[T8]][1] : !llvm.array<4 x vector<3xf32>> 539// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T2]][2] : !llvm.array<4 x vector<1xf32>> 540// CHECK: %[[T17:.*]] = llvm.mlir.constant(0 : i64) : i64 541// CHECK: %[[T18:.*]] = llvm.extractelement %[[T16]]{{\[}}%[[T17]] : i64] : vector<1xf32> 542// CHECK: %[[T19Insert:.*]] = llvm.insertelement %[[T18]] 543// CHECK: %[[T19:.*]] = llvm.shufflevector %[[T19Insert]] 544// CHECK: %[[T20:.*]] = llvm.insertvalue %[[T19]], %[[T14]][2] : !llvm.array<4 x vector<3xf32>> 545// CHECK: %[[T22:.*]] = llvm.extractvalue %[[T2]][3] : !llvm.array<4 x vector<1xf32>> 546// CHECK: %[[T23:.*]] = llvm.mlir.constant(0 : i64) : i64 547// CHECK: %[[T24:.*]] = llvm.extractelement %[[T22]]{{\[}}%[[T23]] : i64] : vector<1xf32> 548// CHECK: %[[T25Insert:.*]] = llvm.insertelement %[[T24]] 549// CHECK: %[[T25:.*]] = llvm.shufflevector %[[T25Insert]] 550// CHECK: %[[T26:.*]] = llvm.insertvalue %[[T25]], %[[T20]][3] : !llvm.array<4 x vector<3xf32>> 551// CHECK: %[[T27:.*]] = builtin.unrealized_conversion_cast %[[T26]] : !llvm.array<4 x vector<3xf32>> to vector<4x3xf32> 552// CHECK: return %[[T27]] : vector<4x3xf32> 553 554// TODO: Add support for scalable vectors 555 556func.func @broadcast_stretch_at_end_scalable(%arg0: vector<[4]x1xf32>) -> vector<[4]x3xf32> { 557 %0 = vector.broadcast %arg0 : vector<[4]x1xf32> to vector<[4]x3xf32> 558 return %0 : vector<[4]x3xf32> 559} 560// CHECK-LABEL: @broadcast_stretch_at_end_scalable 561// CHECK-SAME: %[[A:.*]]: vector<[4]x1xf32>) 562// CHECK: vector.broadcast %[[A]] : vector<[4]x1xf32> to vector<[4]x3xf32> 563 564// ----- 565 566func.func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> { 567 %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32> 568 return %0 : vector<4x3x2xf32> 569} 570// CHECK-LABEL: @broadcast_stretch_in_middle( 571// CHECK-SAME: %[[A:.*]]: vector<4x1x2xf32>) -> vector<4x3x2xf32> { 572// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1x2xf32> to !llvm.array<4 x array<1 x vector<2xf32>>> 573// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x2xf32> 574// CHECK: %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x2xf32> to !llvm.array<4 x array<3 x vector<2xf32>>> 575// CHECK: %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<3x2xf32> 576// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<3x2xf32> to !llvm.array<3 x vector<2xf32>> 577// CHECK: %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> 578// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>> 579// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<2xf32>> 580// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<2xf32>> 581// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<2xf32>>> 582// CHECK: %[[T12:.*]] = llvm.extractvalue %[[T3]][1, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> 583// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>> 584// CHECK: %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<2xf32>> 585// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<2xf32>> 586// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<2xf32>>> 587// CHECK: %[[T19:.*]] = llvm.extractvalue %[[T3]][2, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> 588// CHECK: %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>> 589// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<2xf32>> 590// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<2xf32>> 591// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<2xf32>>> 592// CHECK: %[[T26:.*]] = llvm.extractvalue %[[T3]][3, 0] : !llvm.array<4 x array<1 x vector<2xf32>>> 593// CHECK: %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T5]][0] : !llvm.array<3 x vector<2xf32>> 594// CHECK: %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<2xf32>> 595// CHECK: %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<2xf32>> 596// CHECK: %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<2xf32>>> 597// CHECK: %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<2xf32>>> to vector<4x3x2xf32> 598// CHECK: return %[[T32]] : vector<4x3x2xf32> 599 600// ----- 601 602func.func @broadcast_stretch_in_middle_scalable_v1(%arg0: vector<4x1x[2]xf32>) -> vector<4x3x[2]xf32> { 603 %0 = vector.broadcast %arg0 : vector<4x1x[2]xf32> to vector<4x3x[2]xf32> 604 return %0 : vector<4x3x[2]xf32> 605} 606// CHECK-LABEL: @broadcast_stretch_in_middle_scalable_v1( 607// CHECK-SAME: %[[A:.*]]: vector<4x1x[2]xf32>) -> vector<4x3x[2]xf32> { 608// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4x1x[2]xf32> to !llvm.array<4 x array<1 x vector<[2]xf32>>> 609// CHECK: %[[T1:.*]] = arith.constant dense<0.000000e+00> : vector<4x3x[2]xf32> 610// CHECK: %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<4x3x[2]xf32> to !llvm.array<4 x array<3 x vector<[2]xf32>>> 611// CHECK: %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<3x[2]xf32> 612// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<3x[2]xf32> to !llvm.array<3 x vector<[2]xf32>> 613// CHECK: %[[T4:.*]] = llvm.extractvalue %[[T3]][0, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>> 614// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>> 615// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][1] : !llvm.array<3 x vector<[2]xf32>> 616// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][2] : !llvm.array<3 x vector<[2]xf32>> 617// CHECK: %[[T10:.*]] = llvm.insertvalue %[[T8]], %[[T9]][0] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 618// CHECK: %[[T12:.*]] = llvm.extractvalue %[[T3]][1, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>> 619// CHECK: %[[T14:.*]] = llvm.insertvalue %[[T12]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>> 620// CHECK: %[[T15:.*]] = llvm.insertvalue %[[T12]], %[[T14]][1] : !llvm.array<3 x vector<[2]xf32>> 621// CHECK: %[[T16:.*]] = llvm.insertvalue %[[T12]], %[[T15]][2] : !llvm.array<3 x vector<[2]xf32>> 622// CHECK: %[[T17:.*]] = llvm.insertvalue %[[T16]], %[[T10]][1] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 623// CHECK: %[[T19:.*]] = llvm.extractvalue %[[T3]][2, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>> 624// CHECK: %[[T21:.*]] = llvm.insertvalue %[[T19]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>> 625// CHECK: %[[T22:.*]] = llvm.insertvalue %[[T19]], %[[T21]][1] : !llvm.array<3 x vector<[2]xf32>> 626// CHECK: %[[T23:.*]] = llvm.insertvalue %[[T19]], %[[T22]][2] : !llvm.array<3 x vector<[2]xf32>> 627// CHECK: %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T17]][2] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 628// CHECK: %[[T26:.*]] = llvm.extractvalue %[[T3]][3, 0] : !llvm.array<4 x array<1 x vector<[2]xf32>>> 629// CHECK: %[[T28:.*]] = llvm.insertvalue %[[T26]], %[[T5]][0] : !llvm.array<3 x vector<[2]xf32>> 630// CHECK: %[[T29:.*]] = llvm.insertvalue %[[T26]], %[[T28]][1] : !llvm.array<3 x vector<[2]xf32>> 631// CHECK: %[[T30:.*]] = llvm.insertvalue %[[T26]], %[[T29]][2] : !llvm.array<3 x vector<[2]xf32>> 632// CHECK: %[[T31:.*]] = llvm.insertvalue %[[T30]], %[[T24]][3] : !llvm.array<4 x array<3 x vector<[2]xf32>>> 633// CHECK: %[[T32:.*]] = builtin.unrealized_conversion_cast %[[T31]] : !llvm.array<4 x array<3 x vector<[2]xf32>>> to vector<4x3x[2]xf32> 634// CHECK: return %[[T32]] : vector<4x3x[2]xf32> 635 636// ----- 637 638// TODO: Add support for scalable vectors 639 640func.func @broadcast_stretch_in_middle_scalable_v2(%arg0: vector<[4]x1x2xf32>) -> vector<[4]x3x2xf32> { 641 %0 = vector.broadcast %arg0 : vector<[4]x1x2xf32> to vector<[4]x3x2xf32> 642 return %0 : vector<[4]x3x2xf32> 643} 644// CHECK-LABEL: @broadcast_stretch_in_middle_scalable_v2( 645// CHECK-SAME: %[[A:.*]]: vector<[4]x1x2xf32>) -> vector<[4]x3x2xf32> { 646// CHECK: vector.broadcast %[[A]] : vector<[4]x1x2xf32> to vector<[4]x3x2xf32> 647 648// ----- 649 650//===----------------------------------------------------------------------===// 651// vector.outerproduct 652//===----------------------------------------------------------------------===// 653 654func.func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> { 655 %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32> 656 return %2 : vector<2x3xf32> 657} 658// CHECK-LABEL: @outerproduct( 659// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 660// CHECK-SAME: %[[B:.*]]: vector<3xf32>) 661// CHECK: %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32> 662// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> 663// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64 664// CHECK: %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32> 665// CHECK: %[[T5Insert:.*]] = llvm.insertelement %[[T4]] 666// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T5Insert]] 667// CHECK: %[[T6:.*]] = arith.mulf %[[T5]], %[[B]] : vector<3xf32> 668// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<3xf32>> 669// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64 670// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32> 671// CHECK: %[[T11Insert:.*]] = llvm.insertelement %[[T10]] 672// CHECK: %[[T11:.*]] = llvm.shufflevector %[[T11Insert]] 673// CHECK: %[[T12:.*]] = arith.mulf %[[T11]], %[[B]] : vector<3xf32> 674// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<3xf32>> 675// CHECK: %[[T14:.*]] = builtin.unrealized_conversion_cast %[[T13]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> 676// CHECK: return %[[T14]] : vector<2x3xf32> 677 678// ----- 679 680func.func @outerproduct_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>) -> vector<2x[3]xf32> { 681 %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<[3]xf32> 682 return %2 : vector<2x[3]xf32> 683} 684// CHECK-LABEL: @outerproduct_scalable 685// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 686// CHECK-SAME: %[[B:.*]]: vector<[3]xf32>) 687// CHECK: %[[T2:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32> 688// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>> 689// CHECK: %[[T3:.*]] = llvm.mlir.constant(0 : i64) : i64 690// CHECK: %[[T4:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T3]] : i64] : vector<2xf32> 691// CHECK: %[[T5Insert:.*]] = llvm.insertelement %[[T4]] 692// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T5Insert]] 693// CHECK: %[[T6:.*]] = arith.mulf %[[T5]], %[[B]] : vector<[3]xf32> 694// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T6]], %[[T7]][0] : !llvm.array<2 x vector<[3]xf32>> 695// CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : i64 696// CHECK: %[[T10:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T9]] : i64] : vector<2xf32> 697// CHECK: %[[T11Insert:.*]] = llvm.insertelement %[[T10]] 698// CHECK: %[[T11:.*]] = llvm.shufflevector %[[T11Insert]] 699// CHECK: %[[T12:.*]] = arith.mulf %[[T11]], %[[B]] : vector<[3]xf32> 700// CHECK: %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T8]][1] : !llvm.array<2 x vector<[3]xf32>> 701// CHECK: %[[T14:.*]] = builtin.unrealized_conversion_cast %[[T13]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32> 702// CHECK: return %[[T14]] : vector<2x[3]xf32> 703 704// ----- 705 706func.func @outerproduct_index(%arg0: vector<2xindex>, %arg1: vector<3xindex>) -> vector<2x3xindex> { 707 %2 = vector.outerproduct %arg0, %arg1 : vector<2xindex>, vector<3xindex> 708 return %2 : vector<2x3xindex> 709} 710// CHECK-LABEL: @outerproduct_index( 711// CHECK-SAME: %[[A:.*]]: vector<2xindex>, 712// CHECK-SAME: %[[B:.*]]: vector<3xindex>) 713// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64> 714// CHECK: %[[T0:.*]] = arith.constant dense<0> : vector<2x3xindex> 715// CHECK: %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<2x3xindex> to !llvm.array<2 x vector<3xi64>> 716// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64 717// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]]{{\[}}%[[T2]] : i64] : vector<2xi64> 718// CHECK: %[[T4:.*]] = llvm.insertelement %[[T3]] 719// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]] 720// CHECK: %[[T5Cast:.*]] = builtin.unrealized_conversion_cast %[[T5]] : vector<3xi64> to vector<3xindex> 721// CHECK: %[[T6:.*]] = arith.muli %[[T5Cast]], %[[B]] : vector<3xindex> 722// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T6]] : vector<3xindex> to vector<3xi64> 723// CHECK: %{{.*}} = llvm.insertvalue %[[T7]], %[[T8]][0] : !llvm.array<2 x vector<3xi64>> 724 725// ----- 726 727func.func @outerproduct_index_scalable(%arg0: vector<2xindex>, %arg1: vector<[3]xindex>) -> vector<2x[3]xindex> { 728 %2 = vector.outerproduct %arg0, %arg1 : vector<2xindex>, vector<[3]xindex> 729 return %2 : vector<2x[3]xindex> 730} 731// CHECK-LABEL: @outerproduct_index_scalable 732// CHECK-SAME: %[[A:.*]]: vector<2xindex>, 733// CHECK-SAME: %[[B:.*]]: vector<[3]xindex>) 734// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<2xindex> to vector<2xi64> 735// CHECK: %[[T0:.*]] = arith.constant dense<0> : vector<2x[3]xindex> 736// CHECK: %[[T8:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<2x[3]xindex> to !llvm.array<2 x vector<[3]xi64>> 737// CHECK: %[[T2:.*]] = llvm.mlir.constant(0 : i64) : i64 738// CHECK: %[[T3:.*]] = llvm.extractelement %[[T1]]{{\[}}%[[T2]] : i64] : vector<2xi64> 739// CHECK: %[[T4:.*]] = llvm.insertelement %[[T3]] 740// CHECK: %[[T5:.*]] = llvm.shufflevector %[[T4]] 741// CHECK: %[[T5Cast:.*]] = builtin.unrealized_conversion_cast %[[T5]] : vector<[3]xi64> to vector<[3]xindex> 742// CHECK: %[[T6:.*]] = arith.muli %[[T5Cast]], %[[B]] : vector<[3]xindex> 743// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[T6]] : vector<[3]xindex> to vector<[3]xi64> 744// CHECK: %{{.*}} = llvm.insertvalue %[[T7]], %[[T8]][0] : !llvm.array<2 x vector<[3]xi64>> 745 746// ----- 747 748func.func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { 749 %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<3xf32> 750 return %2 : vector<2x3xf32> 751} 752// CHECK-LABEL: @outerproduct_add( 753// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 754// CHECK-SAME: %[[B:.*]]: vector<3xf32>, 755// CHECK-SAME: %[[C:.*]]: vector<2x3xf32>) -> vector<2x3xf32> 756// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[C]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> 757// CHECK: %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x3xf32> 758// CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x3xf32> to !llvm.array<2 x vector<3xf32>> 759// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64 760// CHECK: %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32> 761// CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] 762// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] 763// CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<3xf32>> 764// CHECK: %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> 765// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<3xf32>> 766// CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64 767// CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32> 768// CHECK: %[[T14Insert:.*]] = llvm.insertelement %[[T13]] 769// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T14Insert]] 770// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<3xf32>> 771// CHECK: %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<3xf32>, vector<3xf32>, vector<3xf32>) -> vector<3xf32> 772// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<3xf32>> 773// CHECK: %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<3xf32>> to vector<2x3xf32> 774// CHECK: return %[[T19]] : vector<2x3xf32> 775 776// ----- 777 778func.func @outerproduct_add_scalable(%arg0: vector<2xf32>, %arg1: vector<[3]xf32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> { 779 %2 = vector.outerproduct %arg0, %arg1, %arg2 : vector<2xf32>, vector<[3]xf32> 780 return %2 : vector<2x[3]xf32> 781} 782// CHECK-LABEL: @outerproduct_add_scalable 783// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 784// CHECK-SAME: %[[B:.*]]: vector<[3]xf32>, 785// CHECK-SAME: %[[C:.*]]: vector<2x[3]xf32>) -> vector<2x[3]xf32> 786// CHECK: %[[T7:.*]] = builtin.unrealized_conversion_cast %[[C]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>> 787// CHECK: %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32> 788// CHECK: %[[T10:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x[3]xf32> to !llvm.array<2 x vector<[3]xf32>> 789// CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i64) : i64 790// CHECK: %[[T5:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T4]] : i64] : vector<2xf32> 791// CHECK: %[[T6Insert:.*]] = llvm.insertelement %[[T5]] 792// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T6Insert]] 793// CHECK: %[[T8:.*]] = llvm.extractvalue %[[T7]][0] : !llvm.array<2 x vector<[3]xf32>> 794// CHECK: %[[T9:.*]] = llvm.intr.fmuladd(%[[T6]], %[[B]], %[[T8]]) : (vector<[3]xf32>, vector<[3]xf32>, vector<[3]xf32>) -> vector<[3]xf32> 795// CHECK: %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][0] : !llvm.array<2 x vector<[3]xf32>> 796// CHECK: %[[T12:.*]] = llvm.mlir.constant(1 : i64) : i64 797// CHECK: %[[T13:.*]] = llvm.extractelement %[[A]]{{\[}}%[[T12]] : i64] : vector<2xf32> 798// CHECK: %[[T14Insert:.*]] = llvm.insertelement %[[T13]] 799// CHECK: %[[T14:.*]] = llvm.shufflevector %[[T14Insert]] 800// CHECK: %[[T16:.*]] = llvm.extractvalue %[[T7]][1] : !llvm.array<2 x vector<[3]xf32>> 801// CHECK: %[[T17:.*]] = llvm.intr.fmuladd(%[[T14]], %[[B]], %[[T16]]) : (vector<[3]xf32>, vector<[3]xf32>, vector<[3]xf32>) -> vector<[3]xf32> 802// CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T11]][1] : !llvm.array<2 x vector<[3]xf32>> 803// CHECK: %[[T19:.*]] = builtin.unrealized_conversion_cast %[[T18]] : !llvm.array<2 x vector<[3]xf32>> to vector<2x[3]xf32> 804// CHECK: return %[[T19]] : vector<2x[3]xf32> 805 806// ----- 807 808//===----------------------------------------------------------------------===// 809// vector.mask { vector.outerproduct } 810//===----------------------------------------------------------------------===// 811 812func.func @masked_float_add_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> { 813 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> 814 return %0 : vector<2xf32> 815} 816 817// CHECK-LABEL: func.func @masked_float_add_outerprod( 818// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> { 819// CHECK: %[[VAL_8:.*]] = llvm.intr.fmuladd(%[[VAL_0]], %{{.*}}, %[[VAL_2]]) : (vector<2xf32>, vector<2xf32>, vector<2xf32>) -> vector<2xf32> 820// CHECK: %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_8]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> 821 822// ----- 823 824func.func @masked_float_add_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { 825 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> 826 return %0 : vector<[2]xf32> 827} 828 829// CHECK-LABEL: func.func @masked_float_add_outerprod_scalable( 830// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> { 831// CHECK: %[[VAL_8:.*]] = llvm.intr.fmuladd(%[[VAL_0]], %{{.*}}, %[[VAL_2]]) : (vector<[2]xf32>, vector<[2]xf32>, vector<[2]xf32>) -> vector<[2]xf32> 832// CHECK: %[[VAL_9:.*]] = arith.select %[[VAL_3]], %[[VAL_8]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32> 833 834// ----- 835 836func.func @masked_float_mul_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> { 837 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> 838 return %0 : vector<2xf32> 839} 840 841// CHECK-LABEL: func.func @masked_float_mul_outerprod( 842// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> { 843// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32> 844// CHECK: %[[VAL_9:.*]] = arith.mulf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> 845// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> 846 847// ----- 848 849func.func @masked_float_mul_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { 850 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> 851 return %0 : vector<[2]xf32> 852} 853 854// CHECK-LABEL: func.func @masked_float_mul_outerprod_scalable( 855// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> { 856// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32> 857// CHECK: %[[VAL_9:.*]] = arith.mulf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32> 858// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32> 859 860// ----- 861 862func.func @masked_float_max_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> { 863 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxnumf>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> 864 return %0 : vector<2xf32> 865} 866 867// CHECK-LABEL: func.func @masked_float_max_outerprod( 868// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> { 869// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32> 870// CHECK: %[[VAL_9:.*]] = arith.maxnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> 871// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> 872 873// ----- 874 875func.func @masked_float_max_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { 876 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxnumf>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> 877 return %0 : vector<[2]xf32> 878} 879 880// CHECK-LABEL: func.func @masked_float_max_outerprod_scalable( 881// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> { 882// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32> 883// CHECK: %[[VAL_9:.*]] = arith.maxnumf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32> 884// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32> 885 886// ----- 887 888func.func @masked_float_min_outerprod(%arg0: vector<2xf32>, %arg1: f32, %arg2: vector<2xf32>, %m: vector<2xi1>) -> vector<2xf32> { 889 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minnumf>} : vector<2xf32>, f32 } : vector<2xi1> -> vector<2xf32> 890 return %0 : vector<2xf32> 891} 892 893// CHECK-LABEL: func.func @masked_float_min_outerprod( 894// CHECK-SAME: %[[VAL_0:.*]]: vector<2xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<2xf32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xf32> { 895// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<2xf32> 896// CHECK: %[[VAL_9:.*]] = arith.minnumf %[[VAL_8]], %[[VAL_2]] : vector<2xf32> 897// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xf32> 898 899// ----- 900 901func.func @masked_float_min_outerprod_scalable(%arg0: vector<[2]xf32>, %arg1: f32, %arg2: vector<[2]xf32>, %m: vector<[2]xi1>) -> vector<[2]xf32> { 902 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minnumf>} : vector<[2]xf32>, f32 } : vector<[2]xi1> -> vector<[2]xf32> 903 return %0 : vector<[2]xf32> 904} 905 906// CHECK-LABEL: func.func @masked_float_min_outerprod_scalable( 907// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xf32>, %[[VAL_1:.*]]: f32, %[[VAL_2:.*]]: vector<[2]xf32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xf32> { 908// CHECK: %[[VAL_8:.*]] = arith.mulf %[[VAL_0]], %{{.*}} : vector<[2]xf32> 909// CHECK: %[[VAL_9:.*]] = arith.minnumf %[[VAL_8]], %[[VAL_2]] : vector<[2]xf32> 910// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xf32> 911 912// ----- 913 914func.func @masked_int_add_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 915 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 916 return %0 : vector<2xi32> 917} 918 919// CHECK-LABEL: func.func @masked_int_add_outerprod( 920// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 921// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 922// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 923// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 924 925// ----- 926 927func.func @masked_int_add_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 928 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<add>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 929 return %0 : vector<[2]xi32> 930} 931 932// CHECK-LABEL: func.func @masked_int_add_outerprod_scalable( 933// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 934// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 935// CHECK: %[[VAL_9:.*]] = arith.addi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 936// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 937 938// ----- 939 940func.func @masked_int_mul_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 941 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 942 return %0 : vector<2xi32> 943} 944 945// CHECK-LABEL: func.func @masked_int_mul_outerprod( 946// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 947// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 948// CHECK: %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 949// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 950 951// ----- 952 953func.func @masked_int_mul_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 954 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<mul>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 955 return %0 : vector<[2]xi32> 956} 957 958// CHECK-LABEL: func.func @masked_int_mul_outerprod_scalable( 959// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 960// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 961// CHECK: %[[VAL_9:.*]] = arith.muli %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 962// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 963 964// ----- 965 966func.func @masked_int_max_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 967 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxsi>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 968 return %0 : vector<2xi32> 969} 970 971// CHECK-LABEL: func.func @masked_int_max_outerprod( 972// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 973// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 974// CHECK: %[[VAL_9:.*]] = arith.maxsi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 975// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 976 977// ----- 978 979func.func @masked_int_max_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 980 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<maxsi>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 981 return %0 : vector<[2]xi32> 982} 983 984// CHECK-LABEL: func.func @masked_int_max_outerprod_scalable( 985// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 986// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 987// CHECK: %[[VAL_9:.*]] = arith.maxsi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 988// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 989 990// ----- 991 992func.func @masked_int_min_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 993 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minui>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 994 return %0 : vector<2xi32> 995} 996 997// CHECK-LABEL: func.func @masked_int_min_outerprod( 998// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 999// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 1000// CHECK: %[[VAL_9:.*]] = arith.minui %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 1001// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 1002 1003// ----- 1004 1005func.func @masked_int_min_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 1006 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<minui>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 1007 return %0 : vector<[2]xi32> 1008} 1009 1010// CHECK-LABEL: func.func @masked_int_min_outerprod_scalable( 1011// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 1012// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 1013// CHECK: %[[VAL_9:.*]] = arith.minui %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 1014// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 1015 1016// ----- 1017 1018func.func @masked_int_and_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 1019 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<and>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 1020 return %0 : vector<2xi32> 1021} 1022 1023// CHECK-LABEL: func.func @masked_int_and_outerprod( 1024// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 1025// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 1026// CHECK: %[[VAL_9:.*]] = arith.andi %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 1027// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 1028 1029// ----- 1030 1031func.func @masked_int_and_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 1032 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<and>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 1033 return %0 : vector<[2]xi32> 1034} 1035 1036// CHECK-LABEL: func.func @masked_int_and_outerprod_scalable( 1037// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 1038// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 1039// CHECK: %[[VAL_9:.*]] = arith.andi %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 1040// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 1041 1042// ----- 1043 1044func.func @masked_int_or_outerprod(%arg0: vector<2xi32>, %arg1: i32, %arg2: vector<2xi32>, %m: vector<2xi1>) -> vector<2xi32> { 1045 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<or>} : vector<2xi32>, i32 } : vector<2xi1> -> vector<2xi32> 1046 return %0 : vector<2xi32> 1047} 1048 1049// CHECK-LABEL: func.func @masked_int_or_outerprod( 1050// CHECK-SAME: %[[VAL_0:.*]]: vector<2xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<2xi32>, %[[VAL_3:.*]]: vector<2xi1>) -> vector<2xi32> { 1051// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<2xi32> 1052// CHECK: %[[VAL_9:.*]] = arith.ori %[[VAL_8]], %[[VAL_2]] : vector<2xi32> 1053// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<2xi1>, vector<2xi32> 1054 1055// ----- 1056 1057func.func @masked_int_or_outerprod_scalable(%arg0: vector<[2]xi32>, %arg1: i32, %arg2: vector<[2]xi32>, %m: vector<[2]xi1>) -> vector<[2]xi32> { 1058 %0 = vector.mask %m { vector.outerproduct %arg0, %arg1, %arg2 {kind = #vector.kind<or>} : vector<[2]xi32>, i32 } : vector<[2]xi1> -> vector<[2]xi32> 1059 return %0 : vector<[2]xi32> 1060} 1061 1062// CHECK-LABEL: func.func @masked_int_or_outerprod_scalable 1063// CHECK-SAME: %[[VAL_0:.*]]: vector<[2]xi32>, %[[VAL_1:.*]]: i32, %[[VAL_2:.*]]: vector<[2]xi32>, %[[VAL_3:.*]]: vector<[2]xi1>) -> vector<[2]xi32> { 1064// CHECK: %[[VAL_8:.*]] = arith.muli %[[VAL_0]], %{{.*}} : vector<[2]xi32> 1065// CHECK: %[[VAL_9:.*]] = arith.ori %[[VAL_8]], %[[VAL_2]] : vector<[2]xi32> 1066// CHECK: %[[VAL_10:.*]] = arith.select %[[VAL_3]], %[[VAL_9]], %[[VAL_2]] : vector<[2]xi1>, vector<[2]xi32> 1067 1068// ----- 1069 1070//===----------------------------------------------------------------------===// 1071// vector.shuffle 1072//===----------------------------------------------------------------------===// 1073 1074func.func @shuffle_0D_direct(%arg0: vector<f32>) -> vector<3xf32> { 1075 %1 = vector.shuffle %arg0, %arg0 [0, 1, 0] : vector<f32>, vector<f32> 1076 return %1 : vector<3xf32> 1077} 1078// CHECK-LABEL: @shuffle_0D_direct( 1079// CHECK-SAME: %[[A:.*]]: vector<f32> 1080// CHECK: %[[c:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32> 1081// CHECK: %[[s:.*]] = llvm.shufflevector %[[c]], %[[c]] [0, 1, 0] : vector<1xf32> 1082// CHECK: return %[[s]] : vector<3xf32> 1083 1084// ----- 1085 1086func.func @shuffle_1D_direct(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<2xf32> { 1087 %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xf32>, vector<2xf32> 1088 return %1 : vector<2xf32> 1089} 1090// CHECK-LABEL: @shuffle_1D_direct( 1091// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 1092// CHECK-SAME: %[[B:.*]]: vector<2xf32>) 1093// CHECK: return %[[A:.*]]: vector<2xf32> 1094 1095// ----- 1096 1097func.func @shuffle_1D_index_direct(%arg0: vector<2xindex>, %arg1: vector<2xindex>) -> vector<2xindex> { 1098 %1 = vector.shuffle %arg0, %arg1 [0, 1] : vector<2xindex>, vector<2xindex> 1099 return %1 : vector<2xindex> 1100} 1101// CHECK-LABEL: @shuffle_1D_index_direct( 1102// CHECK-SAME: %[[A:.*]]: vector<2xindex>, 1103// CHECK-SAME: %[[B:.*]]: vector<2xindex>) 1104// CHECK: return %[[A:.*]]: vector<2xindex> 1105 1106// ----- 1107 1108func.func @shuffle_poison_mask(%arg0: vector<2xf32>, %arg1: vector<2xf32>) -> vector<4xf32> { 1109 %1 = vector.shuffle %arg0, %arg1 [0, -1, 3, -1] : vector<2xf32>, vector<2xf32> 1110 return %1 : vector<4xf32> 1111} 1112// CHECK-LABEL: @shuffle_poison_mask( 1113// CHECK-SAME: %[[A:.*]]: vector<2xf32>, %[[B:.*]]: vector<2xf32>) 1114// CHECK: %[[s:.*]] = llvm.shufflevector %[[A]], %[[B]] [0, -1, 3, -1] : vector<2xf32> 1115 1116// ----- 1117 1118func.func @shuffle_1D(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<5xf32> { 1119 %1 = vector.shuffle %arg0, %arg1 [4, 3, 2, 1, 0] : vector<2xf32>, vector<3xf32> 1120 return %1 : vector<5xf32> 1121} 1122// CHECK-LABEL: @shuffle_1D( 1123// CHECK-SAME: %[[A:.*]]: vector<2xf32>, 1124// CHECK-SAME: %[[B:.*]]: vector<3xf32>) 1125// CHECK: %[[U0:.*]] = llvm.mlir.undef : vector<5xf32> 1126// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 1127// CHECK: %[[E1:.*]] = llvm.extractelement %[[B]][%[[C2]] : i64] : vector<3xf32> 1128// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 1129// CHECK: %[[I1:.*]] = llvm.insertelement %[[E1]], %[[U0]][%[[C0]] : i64] : vector<5xf32> 1130// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 1131// CHECK: %[[E2:.*]] = llvm.extractelement %[[B]][%[[C1]] : i64] : vector<3xf32> 1132// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 1133// CHECK: %[[I2:.*]] = llvm.insertelement %[[E2]], %[[I1]][%[[C1]] : i64] : vector<5xf32> 1134// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 1135// CHECK: %[[E3:.*]] = llvm.extractelement %[[B]][%[[C0]] : i64] : vector<3xf32> 1136// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : index) : i64 1137// CHECK: %[[I3:.*]] = llvm.insertelement %[[E3]], %[[I2]][%[[C2]] : i64] : vector<5xf32> 1138// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : index) : i64 1139// CHECK: %[[E4:.*]] = llvm.extractelement %[[A]][%[[C1]] : i64] : vector<2xf32> 1140// CHECK: %[[C3:.*]] = llvm.mlir.constant(3 : index) : i64 1141// CHECK: %[[I4:.*]] = llvm.insertelement %[[E4]], %[[I3]][%[[C3]] : i64] : vector<5xf32> 1142// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 1143// CHECK: %[[E5:.*]] = llvm.extractelement %[[A]][%[[C0]] : i64] : vector<2xf32> 1144// CHECK: %[[C4:.*]] = llvm.mlir.constant(4 : index) : i64 1145// CHECK: %[[I5:.*]] = llvm.insertelement %[[E5]], %[[I4]][%[[C4]] : i64] : vector<5xf32> 1146// CHECK: return %[[I5]] : vector<5xf32> 1147 1148// ----- 1149 1150func.func @shuffle_2D(%a: vector<1x4xf32>, %b: vector<2x4xf32>) -> vector<3x4xf32> { 1151 %1 = vector.shuffle %a, %b[1, 0, 2] : vector<1x4xf32>, vector<2x4xf32> 1152 return %1 : vector<3x4xf32> 1153} 1154// CHECK-LABEL: @shuffle_2D( 1155// CHECK-SAME: %[[A:.*]]: vector<1x4xf32>, 1156// CHECK-SAME: %[[B:.*]]: vector<2x4xf32>) 1157// CHECK-DAG: %[[VAL_0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<1x4xf32> to !llvm.array<1 x vector<4xf32>> 1158// CHECK-DAG: %[[VAL_1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> 1159// CHECK: %[[U0:.*]] = llvm.mlir.undef : !llvm.array<3 x vector<4xf32>> 1160// CHECK: %[[E1:.*]] = llvm.extractvalue %[[VAL_1]][0] : !llvm.array<2 x vector<4xf32>> 1161// CHECK: %[[I1:.*]] = llvm.insertvalue %[[E1]], %[[U0]][0] : !llvm.array<3 x vector<4xf32>> 1162// CHECK: %[[E2:.*]] = llvm.extractvalue %[[VAL_0]][0] : !llvm.array<1 x vector<4xf32>> 1163// CHECK: %[[I2:.*]] = llvm.insertvalue %[[E2]], %[[I1]][1] : !llvm.array<3 x vector<4xf32>> 1164// CHECK: %[[E3:.*]] = llvm.extractvalue %[[VAL_1]][1] : !llvm.array<2 x vector<4xf32>> 1165// CHECK: %[[I3:.*]] = llvm.insertvalue %[[E3]], %[[I2]][2] : !llvm.array<3 x vector<4xf32>> 1166// CHECK: %[[VAL_3:.*]] = builtin.unrealized_conversion_cast %[[I3]] : !llvm.array<3 x vector<4xf32>> to vector<3x4xf32> 1167// CHECK: return %[[VAL_3]] : vector<3x4xf32> 1168 1169// ----- 1170 1171//===----------------------------------------------------------------------===// 1172// vector.extractelement 1173//===----------------------------------------------------------------------===// 1174 1175func.func @extractelement_from_vec_0d_f32(%arg0: vector<f32>) -> f32 { 1176 %1 = vector.extractelement %arg0[] : vector<f32> 1177 return %1 : f32 1178} 1179// CHECK-LABEL: @extractelement_from_vec_0d_f32 1180// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 1181// CHECK: llvm.extractelement %{{.*}}[%[[C0]] : {{.*}}] : vector<1xf32> 1182 1183// ----- 1184 1185func.func @extractelement_from_vec_1d_f32_idx_as_i32(%arg0: vector<16xf32>) -> f32 { 1186 %0 = arith.constant 15 : i32 1187 %1 = vector.extractelement %arg0[%0 : i32]: vector<16xf32> 1188 return %1 : f32 1189} 1190// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32( 1191// CHECK-SAME: %[[A:.*]]: vector<16xf32>) 1192// CHECK: %[[C:.*]] = arith.constant 15 : i32 1193// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<16xf32> 1194// CHECK: return %[[X]] : f32 1195 1196// ----- 1197 1198func.func @extractelement_from_vec_1d_f32_idx_as_i32_scalable(%arg0: vector<[16]xf32>) -> f32 { 1199 %0 = arith.constant 15 : i32 1200 %1 = vector.extractelement %arg0[%0 : i32]: vector<[16]xf32> 1201 return %1 : f32 1202} 1203// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_i32_scalable( 1204// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) 1205// CHECK: %[[C:.*]] = arith.constant 15 : i32 1206// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[C]] : i32] : vector<[16]xf32> 1207// CHECK: return %[[X]] : f32 1208 1209// ----- 1210func.func @extractelement_from_vec_1d_f32_idx_as_index(%arg0: vector<16xf32>) -> f32 { 1211 %0 = arith.constant 15 : index 1212 %1 = vector.extractelement %arg0[%0 : index]: vector<16xf32> 1213 return %1 : f32 1214} 1215// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index( 1216// CHECK-SAME: %[[A:.*]]: vector<16xf32>) 1217// CHECK: %[[C:.*]] = arith.constant 15 : index 1218// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 1219// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<16xf32> 1220// CHECK: return %[[X]] : f32 1221 1222// ----- 1223 1224func.func @extractelement_from_vec_1d_f32_idx_as_index_scalable(%arg0: vector<[16]xf32>) -> f32 { 1225 %0 = arith.constant 15 : index 1226 %1 = vector.extractelement %arg0[%0 : index]: vector<[16]xf32> 1227 return %1 : f32 1228} 1229// CHECK-LABEL: @extractelement_from_vec_1d_f32_idx_as_index_scalable( 1230// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) 1231// CHECK: %[[C:.*]] = arith.constant 15 : index 1232// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 1233// CHECK: %[[X:.*]] = llvm.extractelement %[[A]][%[[I]] : i64] : vector<[16]xf32> 1234// CHECK: return %[[X]] : f32 1235 1236// ----- 1237 1238//===----------------------------------------------------------------------===// 1239// vector.extract 1240//===----------------------------------------------------------------------===// 1241 1242func.func @extract_scalar_from_vec_1d_f32(%arg0: vector<16xf32>) -> f32 { 1243 %0 = vector.extract %arg0[15]: f32 from vector<16xf32> 1244 return %0 : f32 1245} 1246// CHECK-LABEL: @extract_scalar_from_vec_1d_f32 1247// CHECK: llvm.mlir.constant(15 : i64) : i64 1248// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> 1249// CHECK: return {{.*}} : f32 1250 1251// ----- 1252 1253func.func @extract_poison_idx(%arg0: vector<16xf32>) -> f32 { 1254 %0 = vector.extract %arg0[-1]: f32 from vector<16xf32> 1255 return %0 : f32 1256} 1257// CHECK-LABEL: @extract_poison_idx 1258// CHECK: %[[IDX:.*]] = llvm.mlir.constant(-1 : i64) : i64 1259// CHECK: llvm.extractelement {{.*}}[%[[IDX]] : i64] : vector<16xf32> 1260 1261// ----- 1262 1263func.func @extract_scalar_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> f32 { 1264 %0 = vector.extract %arg0[15]: f32 from vector<[16]xf32> 1265 return %0 : f32 1266} 1267// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_scalable 1268// CHECK: llvm.mlir.constant(15 : i64) : i64 1269// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32> 1270// CHECK: return {{.*}} : f32 1271 1272// ----- 1273 1274func.func @extract_vec_1e_from_vec_1d_f32(%arg0: vector<16xf32>) -> vector<1xf32> { 1275 %0 = vector.extract %arg0[15]: vector<1xf32> from vector<16xf32> 1276 return %0 : vector<1xf32> 1277} 1278// CHECK-LABEL: @extract_vec_1e_from_vec_1d_f32( 1279// CHECK-SAME: %[[A:.*]]: vector<16xf32>) 1280// CHECK: %[[T0:.*]] = llvm.mlir.constant(15 : i64) : i64 1281// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<16xf32> 1282// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : f32 to vector<1xf32> 1283// CHECK: return %[[T2]] : vector<1xf32> 1284 1285// ----- 1286 1287func.func @extract_vec_1e_from_vec_1d_f32_scalable(%arg0: vector<[16]xf32>) -> vector<1xf32> { 1288 %0 = vector.extract %arg0[15]: vector<1xf32> from vector<[16]xf32> 1289 return %0 : vector<1xf32> 1290} 1291// CHECK-LABEL: @extract_vec_1e_from_vec_1d_f32_scalable( 1292// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) 1293// CHECK: %[[T0:.*]] = llvm.mlir.constant(15 : i64) : i64 1294// CHECK: %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : i64] : vector<[16]xf32> 1295// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : f32 to vector<1xf32> 1296// CHECK: return %[[T2]] : vector<1xf32> 1297 1298// ----- 1299 1300func.func @extract_scalar_from_vec_1d_index(%arg0: vector<16xindex>) -> index { 1301 %0 = vector.extract %arg0[15]: index from vector<16xindex> 1302 return %0 : index 1303} 1304// CHECK-LABEL: @extract_scalar_from_vec_1d_index( 1305// CHECK-SAME: %[[A:.*]]: vector<16xindex>) 1306// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64> 1307// CHECK: %[[T1:.*]] = llvm.mlir.constant(15 : i64) : i64 1308// CHECK: %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<16xi64> 1309// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index 1310// CHECK: return %[[T3]] : index 1311 1312// ----- 1313 1314func.func @extract_scalar_from_vec_1d_index_scalable(%arg0: vector<[16]xindex>) -> index { 1315 %0 = vector.extract %arg0[15]: index from vector<[16]xindex> 1316 return %0 : index 1317} 1318// CHECK-LABEL: @extract_scalar_from_vec_1d_index_scalable( 1319// CHECK-SAME: %[[A:.*]]: vector<[16]xindex>) 1320// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[16]xindex> to vector<[16]xi64> 1321// CHECK: %[[T1:.*]] = llvm.mlir.constant(15 : i64) : i64 1322// CHECK: %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<[16]xi64> 1323// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index 1324// CHECK: return %[[T3]] : index 1325 1326// ----- 1327 1328func.func @extract_vec_2d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<3x16xf32> { 1329 %0 = vector.extract %arg0[0]: vector<3x16xf32> from vector<4x3x16xf32> 1330 return %0 : vector<3x16xf32> 1331} 1332// CHECK-LABEL: @extract_vec_2d_from_vec_3d_f32 1333// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<16xf32>>> 1334// CHECK: return {{.*}} : vector<3x16xf32> 1335 1336// ----- 1337 1338func.func @extract_vec_2d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<3x[16]xf32> { 1339 %0 = vector.extract %arg0[0]: vector<3x[16]xf32> from vector<4x3x[16]xf32> 1340 return %0 : vector<3x[16]xf32> 1341} 1342// CHECK-LABEL: @extract_vec_2d_from_vec_3d_f32_scalable 1343// CHECK: llvm.extractvalue {{.*}}[0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> 1344// CHECK: return {{.*}} : vector<3x[16]xf32> 1345 1346// ----- 1347 1348func.func @extract_vec_1d_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> vector<16xf32> { 1349 %0 = vector.extract %arg0[0, 0]: vector<16xf32> from vector<4x3x16xf32> 1350 return %0 : vector<16xf32> 1351} 1352// CHECK-LABEL: @extract_vec_1d_from_vec_3d_f32 1353// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> 1354// CHECK: return {{.*}} : vector<16xf32> 1355 1356// ----- 1357 1358func.func @extract_vec_1d_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> vector<[16]xf32> { 1359 %0 = vector.extract %arg0[0, 0]: vector<[16]xf32> from vector<4x3x[16]xf32> 1360 return %0 : vector<[16]xf32> 1361} 1362// CHECK-LABEL: @extract_vec_1d_from_vec_3d_f32_scalable 1363// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> 1364// CHECK: return {{.*}} : vector<[16]xf32> 1365 1366// ----- 1367 1368func.func @extract_scalar_from_vec_3d_f32(%arg0: vector<4x3x16xf32>) -> f32 { 1369 %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x16xf32> 1370 return %0 : f32 1371} 1372// CHECK-LABEL: @extract_scalar_from_vec_3d_f32 1373// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<16xf32>>> 1374// CHECK: llvm.mlir.constant(0 : i64) : i64 1375// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<16xf32> 1376// CHECK: return {{.*}} : f32 1377 1378// ----- 1379 1380func.func @extract_scalar_from_vec_3d_f32_scalable(%arg0: vector<4x3x[16]xf32>) -> f32 { 1381 %0 = vector.extract %arg0[0, 0, 0]: f32 from vector<4x3x[16]xf32> 1382 return %0 : f32 1383} 1384// CHECK-LABEL: @extract_scalar_from_vec_3d_f32_scalable 1385// CHECK: llvm.extractvalue {{.*}}[0, 0] : !llvm.array<4 x array<3 x vector<[16]xf32>>> 1386// CHECK: llvm.mlir.constant(0 : i64) : i64 1387// CHECK: llvm.extractelement {{.*}}[{{.*}} : i64] : vector<[16]xf32> 1388// CHECK: return {{.*}} : f32 1389 1390// ----- 1391 1392func.func @extract_scalar_from_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %arg1: index) -> f32 { 1393 %0 = vector.extract %arg0[%arg1]: f32 from vector<16xf32> 1394 return %0 : f32 1395} 1396// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_dynamic_idx 1397// CHECK-SAME: %[[VEC:.+]]: vector<16xf32>, %[[INDEX:.+]]: index 1398// CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 1399// CHECK: llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<16xf32> 1400 1401// ----- 1402 1403func.func @extract_scalar_from_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: index) -> f32 { 1404 %0 = vector.extract %arg0[%arg1]: f32 from vector<[16]xf32> 1405 return %0 : f32 1406} 1407// CHECK-LABEL: @extract_scalar_from_vec_1d_f32_dynamic_idx_scalable 1408// CHECK-SAME: %[[VEC:.+]]: vector<[16]xf32>, %[[INDEX:.+]]: index 1409// CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 1410// CHECK: llvm.extractelement %[[VEC]][%[[UC]] : i64] : vector<[16]xf32> 1411 1412// ----- 1413 1414func.func @extract_scalar_from_vec_2d_f32_inner_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: index) -> f32 { 1415 %0 = vector.extract %arg0[0, %arg1]: f32 from vector<1x16xf32> 1416 return %0 : f32 1417} 1418 1419// Lowering supports extracting from multi-dim vectors with dynamic indices 1420// provided that only the trailing index is dynamic. 1421 1422// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_inner_dynamic_idx( 1423// CHECK: llvm.extractvalue 1424// CHECK: llvm.extractelement 1425 1426func.func @extract_scalar_from_vec_2d_f32_inner_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: index) -> f32 { 1427 %0 = vector.extract %arg0[0, %arg1]: f32 from vector<1x[16]xf32> 1428 return %0 : f32 1429} 1430 1431// Lowering supports extracting from multi-dim vectors with dynamic indices 1432// provided that only the trailing index is dynamic. 1433 1434// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_inner_dynamic_idx_scalable( 1435// CHECK: llvm.extractvalue 1436// CHECK: llvm.extractelement 1437 1438// ----- 1439 1440func.func @extract_scalar_from_vec_2d_f32_outer_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: index) -> f32 { 1441 %0 = vector.extract %arg0[%arg1, 0]: f32 from vector<1x16xf32> 1442 return %0 : f32 1443} 1444 1445// Lowering supports extracting from multi-dim vectors with dynamic indices 1446// provided that only the trailing index is dynamic. 1447 1448// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_outer_dynamic_idx( 1449// CHECK: vector.extract 1450 1451func.func @extract_scalar_from_vec_2d_f32_outer_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: index) -> f32 { 1452 %0 = vector.extract %arg0[%arg1, 0]: f32 from vector<1x[16]xf32> 1453 return %0 : f32 1454} 1455 1456// Lowering does not support extracting from multi-dim vectors with non trailing 1457// dynamic index, but it shouldn't crash. 1458 1459// CHECK-LABEL: @extract_scalar_from_vec_2d_f32_outer_dynamic_idx_scalable( 1460// CHECK: vector.extract 1461 1462// ----- 1463 1464func.func @extract_scalar_from_vec_0d_index(%arg0: vector<index>) -> index { 1465 %0 = vector.extract %arg0[]: index from vector<index> 1466 return %0 : index 1467} 1468// CHECK-LABEL: @extract_scalar_from_vec_0d_index( 1469// CHECK-SAME: %[[A:.*]]: vector<index>) 1470// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<index> to vector<1xi64> 1471// CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : i64 1472// CHECK: %[[T2:.*]] = llvm.extractelement %[[T0]][%[[T1]] : i64] : vector<1xi64> 1473// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : i64 to index 1474// CHECK: return %[[T3]] : index 1475 1476// ----- 1477 1478//===----------------------------------------------------------------------===// 1479// vector.insertelement 1480//===----------------------------------------------------------------------===// 1481 1482func.func @insertelement_into_vec_0d_f32(%arg0: f32, %arg1: vector<f32>) -> vector<f32> { 1483 %1 = vector.insertelement %arg0, %arg1[] : vector<f32> 1484 return %1 : vector<f32> 1485} 1486// CHECK-LABEL: @insertelement_into_vec_0d_f32 1487// CHECK-SAME: %[[A:.*]]: f32, 1488// CHECK: %[[B:.*]] = builtin.unrealized_conversion_cast %{{.*}} : 1489// CHECK: vector<f32> to vector<1xf32> 1490// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : index) : i64 1491// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C0]] : {{.*}}] : vector<1xf32> 1492 1493// ----- 1494 1495func.func @insertelement_into_vec_1d_f32_idx_as_i32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { 1496 %0 = arith.constant 3 : i32 1497 %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<4xf32> 1498 return %1 : vector<4xf32> 1499} 1500// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32( 1501// CHECK-SAME: %[[A:.*]]: f32, 1502// CHECK-SAME: %[[B:.*]]: vector<4xf32>) 1503// CHECK: %[[C:.*]] = arith.constant 3 : i32 1504// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<4xf32> 1505// CHECK: return %[[X]] : vector<4xf32> 1506 1507// ----- 1508 1509func.func @insertelement_into_vec_1d_f32_idx_as_i32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { 1510 %0 = arith.constant 3 : i32 1511 %1 = vector.insertelement %arg0, %arg1[%0 : i32] : vector<[4]xf32> 1512 return %1 : vector<[4]xf32> 1513} 1514// CHECK-LABEL: @insertelement_into_vec_1d_f32_idx_as_i32_scalable( 1515// CHECK-SAME: %[[A:.*]]: f32, 1516// CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) 1517// CHECK: %[[C:.*]] = arith.constant 3 : i32 1518// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[C]] : i32] : vector<[4]xf32> 1519// CHECK: return %[[X]] : vector<[4]xf32> 1520 1521// ----- 1522 1523func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { 1524 %0 = arith.constant 3 : index 1525 %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<4xf32> 1526 return %1 : vector<4xf32> 1527} 1528// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index( 1529// CHECK-SAME: %[[A:.*]]: f32, 1530// CHECK-SAME: %[[B:.*]]: vector<4xf32>) 1531// CHECK: %[[C:.*]] = arith.constant 3 : index 1532// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 1533// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<4xf32> 1534// CHECK: return %[[X]] : vector<4xf32> 1535 1536// ----- 1537 1538func.func @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { 1539 %0 = arith.constant 3 : index 1540 %1 = vector.insertelement %arg0, %arg1[%0 : index] : vector<[4]xf32> 1541 return %1 : vector<[4]xf32> 1542} 1543// CHECK-LABEL: @insertelement_into_vec_1d_f32_scalable_idx_as_index_scalable( 1544// CHECK-SAME: %[[A:.*]]: f32, 1545// CHECK-SAME: %[[B:.*]]: vector<[4]xf32>) 1546// CHECK: %[[C:.*]] = arith.constant 3 : index 1547// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %[[C]] : index to i64 1548// CHECK: %[[X:.*]] = llvm.insertelement %[[A]], %[[B]][%[[I]] : i64] : vector<[4]xf32> 1549// CHECK: return %[[X]] : vector<[4]xf32> 1550 1551// ----- 1552 1553//===----------------------------------------------------------------------===// 1554// vector.insert 1555//===----------------------------------------------------------------------===// 1556 1557func.func @insert_scalar_into_vec_1d_f32(%arg0: f32, %arg1: vector<4xf32>) -> vector<4xf32> { 1558 %0 = vector.insert %arg0, %arg1[3] : f32 into vector<4xf32> 1559 return %0 : vector<4xf32> 1560} 1561// CHECK-LABEL: @insert_scalar_into_vec_1d_f32 1562// CHECK: llvm.mlir.constant(3 : i64) : i64 1563// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<4xf32> 1564// CHECK: return {{.*}} : vector<4xf32> 1565 1566// ----- 1567 1568func.func @insert_scalar_into_vec_1d_f32_scalable(%arg0: f32, %arg1: vector<[4]xf32>) -> vector<[4]xf32> { 1569 %0 = vector.insert %arg0, %arg1[3] : f32 into vector<[4]xf32> 1570 return %0 : vector<[4]xf32> 1571} 1572// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_scalable 1573// CHECK: llvm.mlir.constant(3 : i64) : i64 1574// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<[4]xf32> 1575// CHECK: return {{.*}} : vector<[4]xf32> 1576 1577// ----- 1578 1579func.func @insert_scalar_into_vec_1d_index(%arg0: index, %arg1: vector<4xindex>) -> vector<4xindex> { 1580 %0 = vector.insert %arg0, %arg1[3] : index into vector<4xindex> 1581 return %0 : vector<4xindex> 1582} 1583// CHECK-LABEL: @insert_scalar_into_vec_1d_index( 1584// CHECK-SAME: %[[A:.*]]: index, 1585// CHECK-SAME: %[[B:.*]]: vector<4xindex>) 1586// CHECK-DAG: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64 1587// CHECK-DAG: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<4xindex> to vector<4xi64> 1588// CHECK: %[[T3:.*]] = llvm.mlir.constant(3 : i64) : i64 1589// CHECK: %[[T4:.*]] = llvm.insertelement %[[T0]], %[[T1]][%[[T3]] : i64] : vector<4xi64> 1590// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : vector<4xi64> to vector<4xindex> 1591// CHECK: return %[[T5]] : vector<4xindex> 1592 1593// ----- 1594 1595func.func @insert_scalar_into_vec_1d_index_scalable(%arg0: index, %arg1: vector<[4]xindex>) -> vector<[4]xindex> { 1596 %0 = vector.insert %arg0, %arg1[3] : index into vector<[4]xindex> 1597 return %0 : vector<[4]xindex> 1598} 1599// CHECK-LABEL: @insert_scalar_into_vec_1d_index_scalable( 1600// CHECK-SAME: %[[A:.*]]: index, 1601// CHECK-SAME: %[[B:.*]]: vector<[4]xindex>) 1602// CHECK-DAG: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64 1603// CHECK-DAG: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[B]] : vector<[4]xindex> to vector<[4]xi64> 1604// CHECK: %[[T3:.*]] = llvm.mlir.constant(3 : i64) : i64 1605// CHECK: %[[T4:.*]] = llvm.insertelement %[[T0]], %[[T1]][%[[T3]] : i64] : vector<[4]xi64> 1606// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : vector<[4]xi64> to vector<[4]xindex> 1607// CHECK: return %[[T5]] : vector<[4]xindex> 1608 1609// ----- 1610 1611func.func @insert_vec_2d_into_vec_3d_f32(%arg0: vector<8x16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { 1612 %0 = vector.insert %arg0, %arg1[3] : vector<8x16xf32> into vector<4x8x16xf32> 1613 return %0 : vector<4x8x16xf32> 1614} 1615// CHECK-LABEL: @insert_vec_2d_into_vec_3d_f32 1616// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<16xf32>>> 1617// CHECK: return {{.*}} : vector<4x8x16xf32> 1618 1619// ----- 1620 1621func.func @insert_vec_2d_into_vec_3d_f32_scalable(%arg0: vector<8x[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { 1622 %0 = vector.insert %arg0, %arg1[3] : vector<8x[16]xf32> into vector<4x8x[16]xf32> 1623 return %0 : vector<4x8x[16]xf32> 1624} 1625// CHECK-LABEL: @insert_vec_2d_into_vec_3d_f32_scalable 1626// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm.array<4 x array<8 x vector<[16]xf32>>> 1627// CHECK: return {{.*}} : vector<4x8x[16]xf32> 1628 1629// ----- 1630 1631func.func @insert_vec_1d_into_vec_3d_f32(%arg0: vector<16xf32>, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { 1632 %0 = vector.insert %arg0, %arg1[3, 7] : vector<16xf32> into vector<4x8x16xf32> 1633 return %0 : vector<4x8x16xf32> 1634} 1635// CHECK-LABEL: @insert_vec_1d_into_vec_3d_f32 1636// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> 1637// CHECK: return {{.*}} : vector<4x8x16xf32> 1638 1639// ----- 1640 1641func.func @insert_vec_1d_into_vec_3d_f32_scalable(%arg0: vector<[16]xf32>, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { 1642 %0 = vector.insert %arg0, %arg1[3, 7] : vector<[16]xf32> into vector<4x8x[16]xf32> 1643 return %0 : vector<4x8x[16]xf32> 1644} 1645// CHECK-LABEL: @insert_vec_1d_into_vec_3d_f32_scalable 1646// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>> 1647// CHECK: return {{.*}} : vector<4x8x[16]xf32> 1648 1649// ----- 1650 1651func.func @insert_scalar_into_vec_3d_f32(%arg0: f32, %arg1: vector<4x8x16xf32>) -> vector<4x8x16xf32> { 1652 %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x16xf32> 1653 return %0 : vector<4x8x16xf32> 1654} 1655// CHECK-LABEL: @insert_scalar_into_vec_3d_f32 1656// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> 1657// CHECK: llvm.mlir.constant(15 : i64) : i64 1658// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<16xf32> 1659// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<16xf32>>> 1660// CHECK: return {{.*}} : vector<4x8x16xf32> 1661 1662// ----- 1663 1664func.func @insert_scalar_into_vec_3d_f32_scalable(%arg0: f32, %arg1: vector<4x8x[16]xf32>) -> vector<4x8x[16]xf32> { 1665 %0 = vector.insert %arg0, %arg1[3, 7, 15] : f32 into vector<4x8x[16]xf32> 1666 return %0 : vector<4x8x[16]xf32> 1667} 1668// CHECK-LABEL: @insert_scalar_into_vec_3d_f32_scalable 1669// CHECK: llvm.extractvalue {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>> 1670// CHECK: llvm.mlir.constant(15 : i64) : i64 1671// CHECK: llvm.insertelement {{.*}}, {{.*}}[{{.*}} : i64] : vector<[16]xf32> 1672// CHECK: llvm.insertvalue {{.*}}, {{.*}}[3, 7] : !llvm.array<4 x array<8 x vector<[16]xf32>>> 1673// CHECK: return {{.*}} : vector<4x8x[16]xf32> 1674 1675// ----- 1676 1677func.func @insert_scalar_into_vec_1d_f32_dynamic_idx(%arg0: vector<16xf32>, %arg1: f32, %arg2: index) 1678 -> vector<16xf32> { 1679 %0 = vector.insert %arg1, %arg0[%arg2]: f32 into vector<16xf32> 1680 return %0 : vector<16xf32> 1681} 1682 1683// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_dynamic_idx 1684// CHECK-SAME: %[[DST:.+]]: vector<16xf32>, %[[SRC:.+]]: f32, %[[INDEX:.+]]: index 1685// CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 1686// CHECK: llvm.insertelement %[[SRC]], %[[DST]][%[[UC]] : i64] : vector<16xf32> 1687 1688// ----- 1689 1690func.func @insert_scalar_into_vec_1d_f32_dynamic_idx_scalable(%arg0: vector<[16]xf32>, %arg1: f32, %arg2: index) 1691 -> vector<[16]xf32> { 1692 %0 = vector.insert %arg1, %arg0[%arg2]: f32 into vector<[16]xf32> 1693 return %0 : vector<[16]xf32> 1694} 1695 1696// CHECK-LABEL: @insert_scalar_into_vec_1d_f32_dynamic_idx_scalable 1697// CHECK-SAME: %[[DST:.+]]: vector<[16]xf32>, %[[SRC:.+]]: f32, %[[INDEX:.+]]: index 1698// CHECK: %[[UC:.+]] = builtin.unrealized_conversion_cast %[[INDEX]] : index to i64 1699// CHECK: llvm.insertelement %[[SRC]], %[[DST]][%[[UC]] : i64] : vector<[16]xf32> 1700 1701// ----- 1702 1703func.func @insert_scalar_into_vec_2d_f32_dynamic_idx(%arg0: vector<1x16xf32>, %arg1: f32, %idx: index) 1704 -> vector<1x16xf32> { 1705 %0 = vector.insert %arg1, %arg0[0, %idx]: f32 into vector<1x16xf32> 1706 return %0 : vector<1x16xf32> 1707} 1708 1709// Multi-dim vectors are not supported but this test shouldn't crash. 1710 1711// CHECK-LABEL: @insert_scalar_into_vec_2d_f32_dynamic_idx( 1712// CHECK: vector.insert 1713 1714// ----- 1715 1716func.func @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable(%arg0: vector<1x[16]xf32>, %arg1: f32, %idx: index) 1717 -> vector<1x[16]xf32> { 1718 %0 = vector.insert %arg1, %arg0[0, %idx]: f32 into vector<1x[16]xf32> 1719 return %0 : vector<1x[16]xf32> 1720} 1721 1722// Multi-dim vectors are not supported but this test shouldn't crash. 1723 1724// CHECK-LABEL: @insert_scalar_into_vec_2d_f32_dynamic_idx_scalable( 1725// CHECK: vector.insert 1726 1727// ----- 1728 1729//===----------------------------------------------------------------------===// 1730// vector.type_cast 1731// 1732// TODO: Add tests for for vector.type_cast that would cover scalable vectors 1733//===----------------------------------------------------------------------===// 1734 1735func.func @type_cast_f32(%arg0: memref<8x8x8xf32>) -> memref<vector<8x8x8xf32>> { 1736 %0 = vector.type_cast %arg0: memref<8x8x8xf32> to memref<vector<8x8x8xf32>> 1737 return %0 : memref<vector<8x8x8xf32>> 1738} 1739// CHECK-LABEL: @type_cast_f32 1740// CHECK: llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64)> 1741// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> 1742// CHECK: llvm.insertvalue %[[allocated]], {{.*}}[0] : !llvm.struct<(ptr, ptr, i64)> 1743// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> 1744// CHECK: llvm.insertvalue %[[aligned]], {{.*}}[1] : !llvm.struct<(ptr, ptr, i64)> 1745// CHECK: llvm.mlir.constant(0 : index 1746// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr, ptr, i64)> 1747 1748// NOTE: No test for scalable vectors - the input memref is fixed size. 1749 1750// ----- 1751 1752func.func @type_cast_index(%arg0: memref<8x8x8xindex>) -> memref<vector<8x8x8xindex>> { 1753 %0 = vector.type_cast %arg0: memref<8x8x8xindex> to memref<vector<8x8x8xindex>> 1754 return %0 : memref<vector<8x8x8xindex>> 1755} 1756// CHECK-LABEL: @type_cast_index( 1757// CHECK-SAME: %[[A:.*]]: memref<8x8x8xindex>) 1758// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[A]] : memref<8x8x8xindex> to !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> 1759 1760// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %{{.*}} : !llvm.struct<(ptr, ptr, i64)> to memref<vector<8x8x8xindex>> 1761 1762// NOTE: No test for scalable vectors - the input memref is fixed size. 1763 1764// ----- 1765 1766func.func @type_cast_non_zero_addrspace(%arg0: memref<8x8x8xf32, 3>) -> memref<vector<8x8x8xf32>, 3> { 1767 %0 = vector.type_cast %arg0: memref<8x8x8xf32, 3> to memref<vector<8x8x8xf32>, 3> 1768 return %0 : memref<vector<8x8x8xf32>, 3> 1769} 1770// CHECK-LABEL: @type_cast_non_zero_addrspace 1771// CHECK: llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64)> 1772// CHECK: %[[allocated:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)> 1773// CHECK: llvm.insertvalue %[[allocated]], {{.*}}[0] : !llvm.struct<(ptr<3>, ptr<3>, i64)> 1774// CHECK: %[[aligned:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)> 1775// CHECK: llvm.insertvalue %[[aligned]], {{.*}}[1] : !llvm.struct<(ptr<3>, ptr<3>, i64)> 1776// CHECK: llvm.mlir.constant(0 : index 1777// CHECK: llvm.insertvalue {{.*}}[2] : !llvm.struct<(ptr<3>, ptr<3>, i64)> 1778 1779// NOTE: No test for scalable vectors - the input memref is fixed size. 1780 1781// ----- 1782 1783//===----------------------------------------------------------------------===// 1784// vector.print 1785//===----------------------------------------------------------------------===// 1786 1787func.func @print_scalar_i1(%arg0: i1) { 1788 vector.print %arg0 : i1 1789 return 1790} 1791// 1792// Type "boolean" always uses zero extension. 1793// 1794// CHECK-LABEL: @print_scalar_i1( 1795// CHECK-SAME: %[[A:.*]]: i1) 1796// CHECK: %[[S:.*]] = arith.extui %[[A]] : i1 to i64 1797// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1798// CHECK: llvm.call @printNewline() : () -> () 1799 1800// ----- 1801 1802func.func @print_scalar_i4(%arg0: i4) { 1803 vector.print %arg0 : i4 1804 return 1805} 1806// CHECK-LABEL: @print_scalar_i4( 1807// CHECK-SAME: %[[A:.*]]: i4) 1808// CHECK: %[[S:.*]] = arith.extsi %[[A]] : i4 to i64 1809// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1810// CHECK: llvm.call @printNewline() : () -> () 1811 1812// ----- 1813 1814func.func @print_scalar_si4(%arg0: si4) { 1815 vector.print %arg0 : si4 1816 return 1817} 1818// CHECK-LABEL: @print_scalar_si4( 1819// CHECK-SAME: %[[A:.*]]: si4) 1820// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : si4 to i4 1821// CHECK: %[[S:.*]] = arith.extsi %[[C]] : i4 to i64 1822// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1823// CHECK: llvm.call @printNewline() : () -> () 1824 1825// ----- 1826 1827func.func @print_scalar_ui4(%arg0: ui4) { 1828 vector.print %arg0 : ui4 1829 return 1830} 1831// CHECK-LABEL: @print_scalar_ui4( 1832// CHECK-SAME: %[[A:.*]]: ui4) 1833// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui4 to i4 1834// CHECK: %[[S:.*]] = arith.extui %[[C]] : i4 to i64 1835// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () 1836// CHECK: llvm.call @printNewline() : () -> () 1837 1838// ----- 1839 1840func.func @print_scalar_i32(%arg0: i32) { 1841 vector.print %arg0 : i32 1842 return 1843} 1844// CHECK-LABEL: @print_scalar_i32( 1845// CHECK-SAME: %[[A:.*]]: i32) 1846// CHECK: %[[S:.*]] = arith.extsi %[[A]] : i32 to i64 1847// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1848// CHECK: llvm.call @printNewline() : () -> () 1849 1850// ----- 1851 1852func.func @print_scalar_ui32(%arg0: ui32) { 1853 vector.print %arg0 : ui32 1854 return 1855} 1856// CHECK-LABEL: @print_scalar_ui32( 1857// CHECK-SAME: %[[A:.*]]: ui32) 1858// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui32 to i32 1859// CHECK: %[[S:.*]] = arith.extui %[[C]] : i32 to i64 1860// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () 1861 1862// ----- 1863 1864func.func @print_scalar_i40(%arg0: i40) { 1865 vector.print %arg0 : i40 1866 return 1867} 1868// CHECK-LABEL: @print_scalar_i40( 1869// CHECK-SAME: %[[A:.*]]: i40) 1870// CHECK: %[[S:.*]] = arith.extsi %[[A]] : i40 to i64 1871// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1872// CHECK: llvm.call @printNewline() : () -> () 1873 1874// ----- 1875 1876func.func @print_scalar_si40(%arg0: si40) { 1877 vector.print %arg0 : si40 1878 return 1879} 1880// CHECK-LABEL: @print_scalar_si40( 1881// CHECK-SAME: %[[A:.*]]: si40) 1882// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : si40 to i40 1883// CHECK: %[[S:.*]] = arith.extsi %[[C]] : i40 to i64 1884// CHECK: llvm.call @printI64(%[[S]]) : (i64) -> () 1885// CHECK: llvm.call @printNewline() : () -> () 1886 1887// ----- 1888 1889func.func @print_scalar_ui40(%arg0: ui40) { 1890 vector.print %arg0 : ui40 1891 return 1892} 1893// CHECK-LABEL: @print_scalar_ui40( 1894// CHECK-SAME: %[[A:.*]]: ui40) 1895// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui40 to i40 1896// CHECK: %[[S:.*]] = arith.extui %[[C]] : i40 to i64 1897// CHECK: llvm.call @printU64(%[[S]]) : (i64) -> () 1898// CHECK: llvm.call @printNewline() : () -> () 1899 1900// ----- 1901 1902func.func @print_scalar_i64(%arg0: i64) { 1903 vector.print %arg0 : i64 1904 return 1905} 1906// CHECK-LABEL: @print_scalar_i64( 1907// CHECK-SAME: %[[A:.*]]: i64) 1908// CHECK: llvm.call @printI64(%[[A]]) : (i64) -> () 1909// CHECK: llvm.call @printNewline() : () -> () 1910 1911// ----- 1912 1913func.func @print_scalar_ui64(%arg0: ui64) { 1914 vector.print %arg0 : ui64 1915 return 1916} 1917// CHECK-LABEL: @print_scalar_ui64( 1918// CHECK-SAME: %[[A:.*]]: ui64) 1919// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : ui64 to i64 1920// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> () 1921// CHECK: llvm.call @printNewline() : () -> () 1922 1923// ----- 1924 1925func.func @print_scalar_index(%arg0: index) { 1926 vector.print %arg0 : index 1927 return 1928} 1929// CHECK-LABEL: @print_scalar_index( 1930// CHECK-SAME: %[[A:.*]]: index) 1931// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[A]] : index to i64 1932// CHECK: llvm.call @printU64(%[[C]]) : (i64) -> () 1933// CHECK: llvm.call @printNewline() : () -> () 1934 1935// ----- 1936 1937func.func @print_scalar_f32(%arg0: f32) { 1938 vector.print %arg0 : f32 1939 return 1940} 1941// CHECK-LABEL: @print_scalar_f32( 1942// CHECK-SAME: %[[A:.*]]: f32) 1943// CHECK: llvm.call @printF32(%[[A]]) : (f32) -> () 1944// CHECK: llvm.call @printNewline() : () -> () 1945 1946// ----- 1947 1948func.func @print_scalar_f64(%arg0: f64) { 1949 vector.print %arg0 : f64 1950 return 1951} 1952// CHECK-LABEL: @print_scalar_f64( 1953// CHECK-SAME: %[[A:.*]]: f64) 1954// CHECK: llvm.call @printF64(%[[A]]) : (f64) -> () 1955// CHECK: llvm.call @printNewline() : () -> () 1956 1957// ----- 1958 1959// CHECK-LABEL: module { 1960// CHECK: llvm.func @printString(!llvm.ptr) 1961// CHECK: llvm.mlir.global private constant @[[GLOBAL_STR:.*]]({{.*}}) 1962// CHECK: @print_string 1963// CHECK-NEXT: %[[GLOBAL_ADDR:.*]] = llvm.mlir.addressof @[[GLOBAL_STR]] : !llvm.ptr 1964// CHECK-NEXT: %[[STR_PTR:.*]] = llvm.getelementptr %[[GLOBAL_ADDR]][0] : (!llvm.ptr) -> !llvm.ptr 1965// CHECK-NEXT: llvm.call @printString(%[[STR_PTR]]) : (!llvm.ptr) -> () 1966func.func @print_string() { 1967 vector.print str "Hello, World!" 1968 return 1969} 1970 1971// ----- 1972 1973//===----------------------------------------------------------------------===// 1974// vector.extract_strided_slice 1975//===----------------------------------------------------------------------===// 1976 1977func.func @extract_strided_slice_f32_1d_from_1d(%arg0: vector<4xf32>) -> vector<2xf32> { 1978 %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32> 1979 return %0 : vector<2xf32> 1980} 1981// CHECK-LABEL: @extract_strided_slice_f32_1d_from_1d 1982// CHECK-SAME: %[[A:.*]]: vector<4xf32>) 1983// CHECK: %[[T0:.*]] = llvm.shufflevector %[[A]], %[[A]] [2, 3] : vector<4xf32> 1984// CHECK: return %[[T0]] : vector<2xf32> 1985 1986// NOTE: For scalable vectors we could only extract vector<[4]xf32> from vector<[4]xf32>, but that would be a NOP. 1987 1988// ----- 1989 1990func.func @extract_strided_slice_index_1d_from_1d(%arg0: vector<4xindex>) -> vector<2xindex> { 1991 %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4xindex> to vector<2xindex> 1992 return %0 : vector<2xindex> 1993} 1994// CHECK-LABEL: @extract_strided_slice_index_1d_from_1d 1995// CHECK-SAME: %[[A:.*]]: vector<4xindex>) 1996// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<4xindex> to vector<4xi64> 1997// CHECK: %[[T2:.*]] = llvm.shufflevector %[[T0]], %[[T0]] [2, 3] : vector<4xi64> 1998// CHECK: %[[T3:.*]] = builtin.unrealized_conversion_cast %[[T2]] : vector<2xi64> to vector<2xindex> 1999// CHECK: return %[[T3]] : vector<2xindex> 2000 2001// NOTE: For scalable vectors we could only extract vector<[4]xindex> from vector<[4]xindex>, but that would be a NOP. 2002 2003// ----- 2004 2005func.func @extract_strided_slice_f32_1d_from_2d(%arg0: vector<4x8xf32>) -> vector<2x8xf32> { 2006 %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x8xf32> to vector<2x8xf32> 2007 return %0 : vector<2x8xf32> 2008} 2009// CHECK-LABEL: @extract_strided_slice_f32_1d_from_2d( 2010// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) 2011// CHECK: %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>> 2012// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<8xf32>> 2013// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> 2014// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<8xf32>> 2015// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> 2016// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<8xf32>> 2017// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<2 x vector<8xf32>> to vector<2x8xf32> 2018// CHECK: return %[[T5]] 2019 2020// ----- 2021 2022func.func @extract_strided_slice_f32_1d_from_2d_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> { 2023 %0 = vector.extract_strided_slice %arg0 {offsets = [2], sizes = [2], strides = [1]} : vector<4x[8]xf32> to vector<2x[8]xf32> 2024 return %0 : vector<2x[8]xf32> 2025} 2026// CHECK-LABEL: func.func @extract_strided_slice_f32_1d_from_2d_scalable( 2027// CHECK-SAME: %[[ARG:.*]]: vector<4x[8]xf32>) 2028// CHECK: %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x[8]xf32> to !llvm.array<4 x vector<[8]xf32>> 2029// CHECK: %[[T0:.*]] = llvm.mlir.undef : !llvm.array<2 x vector<[8]xf32>> 2030// CHECK: %[[T1:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<[8]xf32>> 2031// CHECK: %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm.array<2 x vector<[8]xf32>> 2032// CHECK: %[[T3:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<[8]xf32>> 2033// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T2]][1] : !llvm.array<2 x vector<[8]xf32>> 2034// CHECK: %[[T5:.*]] = builtin.unrealized_conversion_cast %[[T4]] : !llvm.array<2 x vector<[8]xf32>> to vector<2x[8]xf32> 2035// CHECK: return %[[T5]] 2036 2037// ----- 2038 2039func.func @extract_strided_slice_f32_2d_from_2d(%arg0: vector<4x8xf32>) -> vector<2x2xf32> { 2040 %0 = vector.extract_strided_slice %arg0 {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x8xf32> to vector<2x2xf32> 2041 return %0 : vector<2x2xf32> 2042} 2043// CHECK-LABEL: @extract_strided_slice_f32_2d_from_2d( 2044// CHECK-SAME: %[[ARG:.*]]: vector<4x8xf32>) 2045// CHECK: %[[A:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x8xf32> to !llvm.array<4 x vector<8xf32>> 2046// CHECK: %[[VAL_2:.*]] = arith.constant dense<0.000000e+00> : vector<2x2xf32> 2047// CHECK: %[[VAL_6:.*]] = builtin.unrealized_conversion_cast %[[VAL_2]] : vector<2x2xf32> to !llvm.array<2 x vector<2xf32>> 2048// CHECK: %[[T2:.*]] = llvm.extractvalue %[[A]][2] : !llvm.array<4 x vector<8xf32>> 2049// CHECK: %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T2]] [2, 3] : vector<8xf32> 2050// CHECK: %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[VAL_6]][0] : !llvm.array<2 x vector<2xf32>> 2051// CHECK: %[[T5:.*]] = llvm.extractvalue %[[A]][3] : !llvm.array<4 x vector<8xf32>> 2052// CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T5]] [2, 3] : vector<8xf32> 2053// CHECK: %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T4]][1] : !llvm.array<2 x vector<2xf32>> 2054// CHECK: %[[VAL_12:.*]] = builtin.unrealized_conversion_cast %[[T7]] : !llvm.array<2 x vector<2xf32>> to vector<2x2xf32> 2055// CHECK: return %[[VAL_12]] : vector<2x2xf32> 2056 2057// ----- 2058 2059// NOTE: For scalable vectors, we can only extract "full" scalable dimensions 2060// (e.g. [8] from [8], but not [4] from [8]). 2061 2062func.func @extract_strided_slice_f32_2d_from_2d_scalable(%arg0: vector<4x[8]xf32>) -> vector<2x[8]xf32> { 2063 %0 = vector.extract_strided_slice %arg0 {offsets = [2, 0], sizes = [2, 8], strides = [1, 1]} : vector<4x[8]xf32> to vector<2x[8]xf32> 2064 return %0 : vector<2x[8]xf32> 2065} 2066// CHECK-LABEL: @extract_strided_slice_f32_2d_from_2d_scalable( 2067// CHECK-SAME: %[[ARG:.*]]: vector<4x[8]xf32>) 2068// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : vector<4x[8]xf32> to !llvm.array<4 x vector<[8]xf32>> 2069// CHECK: %[[T3:.*]] = arith.constant dense<0.000000e+00> : vector<2x[8]xf32> 2070// CHECK: %[[T4:.*]] = builtin.unrealized_conversion_cast %[[T3]] : vector<2x[8]xf32> to !llvm.array<2 x vector<[8]xf32>> 2071// CHECK: %[[T5:.*]] = llvm.extractvalue %[[T1]][2] : !llvm.array<4 x vector<[8]xf32>> 2072// CHECK: %[[T6:.*]] = llvm.insertvalue %[[T5]], %[[T4]][0] : !llvm.array<2 x vector<[8]xf32>> 2073// CHECK: %[[T7:.*]] = llvm.extractvalue %[[T1]][3] : !llvm.array<4 x vector<[8]xf32>> 2074// CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T6]][1] : !llvm.array<2 x vector<[8]xf32>> 2075// CHECK: %[[T9:.*]] = builtin.unrealized_conversion_cast %[[T8]] : !llvm.array<2 x vector<[8]xf32>> to vector<2x[8]xf32> 2076// CHECK: return %[[T9]] : vector<2x[8]xf32> 2077 2078// ----- 2079 2080//===----------------------------------------------------------------------===// 2081// vector.insert_strided_slice 2082//===----------------------------------------------------------------------===// 2083 2084func.func @insert_strided_slice_f32_2d_into_3d(%b: vector<4x4xf32>, %c: vector<4x4x4xf32>) -> vector<4x4x4xf32> { 2085 %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xf32> into vector<4x4x4xf32> 2086 return %0 : vector<4x4x4xf32> 2087} 2088// CHECK-LABEL: @insert_strided_slice_f32_2d_into_3d 2089// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xf32>>> 2090 2091// ----- 2092 2093func.func @insert_strided_slice_f32_2d_into_3d_scalable(%b: vector<4x[4]xf32>, %c: vector<4x4x[4]xf32>) -> vector<4x4x[4]xf32> { 2094 %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xf32> into vector<4x4x[4]xf32> 2095 return %0 : vector<4x4x[4]xf32> 2096} 2097// CHECK-LABEL: @insert_strided_slice_f32_2d_into_3d_scalable 2098// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<[4]xf32>>> 2099 2100// ----- 2101 2102func.func @insert_strided_index_slice_index_2d_into_3d(%b: vector<4x4xindex>, %c: vector<4x4x4xindex>) -> vector<4x4x4xindex> { 2103 %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x4xindex> into vector<4x4x4xindex> 2104 return %0 : vector<4x4x4xindex> 2105} 2106// CHECK-LABEL: @insert_strided_index_slice_index_2d_into_3d 2107// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<4xi64>>> 2108 2109// ----- 2110 2111func.func @insert_strided_index_slice_index_2d_into_3d_scalable(%b: vector<4x[4]xindex>, %c: vector<4x4x[4]xindex>) -> vector<4x4x[4]xindex> { 2112 %0 = vector.insert_strided_slice %b, %c {offsets = [2, 0, 0], strides = [1, 1]} : vector<4x[4]xindex> into vector<4x4x[4]xindex> 2113 return %0 : vector<4x4x[4]xindex> 2114} 2115// CHECK-LABEL: @insert_strided_index_slice_index_2d_into_3d_scalable 2116// CHECK: llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm.array<4 x array<4 x vector<[4]xi64>>> 2117 2118// ----- 2119 2120func.func @insert_strided_slice_f32_2d_into_2d(%a: vector<2x2xf32>, %b: vector<4x4xf32>) -> vector<4x4xf32> { 2121 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32> 2122 return %0 : vector<4x4xf32> 2123} 2124 2125// CHECK-LABEL: @insert_strided_slice_f32_2d_into_2d 2126// 2127// Subvector vector<2xf32> @0 into vector<4xf32> @2 2128// CHECK: %[[V2_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<2xf32>> 2129// CHECK: %[[V4_0:.*]] = llvm.extractvalue {{.*}}[2] : !llvm.array<4 x vector<4xf32>> 2130// Element @0 -> element @2 2131// CHECK: %[[R4_0:.*]] = llvm.shufflevector %[[V2_0]], %[[V2_0]] [0, 1, 0, 0] : vector<2xf32> 2132// CHECK: %[[R4_1:.*]] = llvm.shufflevector %[[R4_0]], %[[V4_0]] [4, 5, 0, 1] : vector<4xf32> 2133// CHECK: llvm.insertvalue %[[R4_1]], {{.*}}[2] : !llvm.array<4 x vector<4xf32>> 2134// 2135// Subvector vector<2xf32> @1 into vector<4xf32> @3 2136// CHECK: %[[V2_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<2xf32>> 2137// CHECK: %[[V4_3:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.array<4 x vector<4xf32>> 2138// Element @0 -> element @2 2139// CHECK: %[[R4_2:.*]] = llvm.shufflevector %[[V2_1]], %[[V2_1]] [0, 1, 0, 0] : vector<2xf32> 2140// CHECK: %[[R4_3:.*]] = llvm.shufflevector %[[R4_2]], %[[V4_3]] [4, 5, 0, 1] : vector<4xf32> 2141// CHECK: llvm.insertvalue %[[R4_3]], {{.*}}[3] : !llvm.array<4 x vector<4xf32>> 2142 2143// ----- 2144 2145// NOTE: For scalable dimensions, the corresponding "base" size must match 2146// (i.e. we can only insert "full" scalable dimensions, e.g. [2] into [2], but 2147// not [2] from [4]). 2148 2149func.func @insert_strided_slice_f32_2d_into_2d_scalable(%a: vector<2x[2]xf32>, %b: vector<4x[2]xf32>) -> vector<4x[2]xf32> { 2150 %0 = vector.insert_strided_slice %a, %b {offsets = [2, 0], strides = [1, 1]} : vector<2x[2]xf32> into vector<4x[2]xf32> 2151 return %0 : vector<4x[2]xf32> 2152} 2153 2154// CHECK-LABEL: func.func @insert_strided_slice_f32_2d_into_2d_scalable 2155// Subvector vector<[2]xf32> @0 into vector<[4]xf32> @2 2156// CHECK: %[[A_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<[2]xf32>> 2157// Element @0 -> element @2 2158// CHECK: %[[B_UPDATED:.*]] = llvm.insertvalue %[[A_0]], {{.*}}[2] : !llvm.array<4 x vector<[2]xf32>> 2159// Subvector vector<[2]xf32> @1 into vector<[4]xf32> @3 2160// CHECK: %[[A_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<[2]xf32>> 2161// Element @0 -> element @2 2162// CHECK: llvm.insertvalue %[[A_1]], %[[B_UPDATED]][3] : !llvm.array<4 x vector<[2]xf32>> 2163 2164// ----- 2165 2166func.func @insert_strided_slice_f32_2d_into_3d(%arg0: vector<2x4xf32>, %arg1: vector<16x4x8xf32>) -> vector<16x4x8xf32> { 2167 %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [0, 0, 2], strides = [1, 1]}: 2168 vector<2x4xf32> into vector<16x4x8xf32> 2169 return %0 : vector<16x4x8xf32> 2170} 2171// CHECK-LABEL: func @insert_strided_slice_f32_2d_into_3d 2172// CHECK: %[[V4_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<4xf32>> 2173// CHECK: %[[V4_0_0:.*]] = llvm.extractvalue {{.*}}[0, 0] : !llvm.array<16 x array<4 x vector<8xf32>>> 2174// CHECK: %[[R8_0:.*]] = llvm.shufflevector %[[V4_0]], %[[V4_0]] [0, 1, 2, 3, 0, 0, 0, 0] : vector<4xf32> 2175// CHECK: %[[R8_1:.*]] = llvm.shufflevector %[[R8_0:.*]], %[[V4_0_0]] [8, 9, 0, 1, 2, 3, 14, 15] : vector<8xf32> 2176// CHECK: llvm.insertvalue %[[R8_1]], {{.*}}[0] : !llvm.array<4 x vector<8xf32>> 2177 2178// CHECK: %[[V4_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<4xf32>> 2179// CHECK: %[[V4_0_1:.*]] = llvm.extractvalue {{.*}}[0, 1] : !llvm.array<16 x array<4 x vector<8xf32>>> 2180// CHECK: %[[R8_2:.*]] = llvm.shufflevector %[[V4_1]], %[[V4_1]] [0, 1, 2, 3, 0, 0, 0, 0] : vector<4xf32> 2181// CHECK: %[[R8_3:.*]] = llvm.shufflevector %[[R8_2]], %[[V4_0_1]] [8, 9, 0, 1, 2, 3, 14, 15] : vector<8xf32> 2182// CHECK: llvm.insertvalue %[[R8_3]], {{.*}}[1] : !llvm.array<4 x vector<8xf32>> 2183 2184// ----- 2185 2186// NOTE: For scalable dimensions, the corresponding "base" size must match 2187// (i.e. we can only insert "full" scalable dimensions, e.g. [4] into [4], but 2188// not [4] from [8]). 2189 2190func.func @insert_strided_slice_f32_2d_into_3d_scalable(%arg0: vector<2x[4]xf32>, %arg1: vector<16x4x[4]xf32>) -> vector<16x4x[4]xf32> { 2191 %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [3, 2, 0], strides = [1, 1]}: 2192 vector<2x[4]xf32> into vector<16x4x[4]xf32> 2193 return %0 : vector<16x4x[4]xf32> 2194} 2195 2196// CHECK-LABEL: func.func @insert_strided_slice_f32_2d_into_3d_scalable( 2197 2198// Subvector vector<4x[4]xf32> from vector<16x4x[4]xf32> @3 2199// CHECK: %[[ARG_1_0:.*]] = llvm.extractvalue {{.*}}[3] : !llvm.array<16 x array<4 x vector<[4]xf32>>> 2200 2201// Subvector vector<[4]xf32> @0 into vector<4x[4]xf32> @2 2202// CHECK: %[[ARG_0_0:.*]] = llvm.extractvalue {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>> 2203// CHECK: %[[B_UPDATED_0:.*]] = llvm.insertvalue %[[ARG_0_0]], %[[ARG_1_0]][2] : !llvm.array<4 x vector<[4]xf32>> 2204 2205// Subvector vector<[4]xf32> @1 into vector<4x[4]xf32> @3 2206// CHECK: %[[ARG_0_1:.*]] = llvm.extractvalue {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>> 2207// CHECK: %[[B_UPDATED_1:.*]] = llvm.insertvalue %[[ARG_0_1]], %[[B_UPDATED_0]][3] : !llvm.array<4 x vector<[4]xf32>> 2208 2209// Subvector vector<4x[4]xf32> into vector<16x4x[4]xf32> @3 2210// CHECK: llvm.insertvalue %[[B_UPDATED_1]], {{.*}}[3] : !llvm.array<16 x array<4 x vector<[4]xf32>>> 2211 2212// ----- 2213 2214//===----------------------------------------------------------------------===// 2215// vector.fma 2216//===----------------------------------------------------------------------===// 2217 2218func.func @fma(%vec_1d: vector<8xf32>, %vec_2d: vector<2x4xf32>, %vec_3d: vector<1x1x1xf32>, %vec_0d: vector<f32>) -> (vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector<f32>) { 2219 // CHECK-LABEL: @fma 2220 // CHECK-SAME: %[[VEC_1D:.*]]: vector<8xf32> 2221 // CHECK-SAME: %[[VEC_2D:.*]]: vector<2x4xf32> 2222 // CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x1xf32> 2223 // CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x4xf32> to !llvm.array<2 x vector<4xf32>> 2224 // CHECK: llvm.intr.fmuladd 2225 // CHECK-SAME: (vector<8xf32>, vector<8xf32>, vector<8xf32>) -> vector<8xf32> 2226 %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<8xf32> 2227 2228 // CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> 2229 // CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> 2230 // CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<4xf32>> 2231 // CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) : 2232 // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> 2233 // CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<4xf32>> 2234 // CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> 2235 // CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> 2236 // CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<4xf32>> 2237 // CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) : 2238 // CHECK-SAME: (vector<4xf32>, vector<4xf32>, vector<4xf32>) -> vector<4xf32> 2239 // CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<4xf32>> 2240 %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x4xf32> 2241 2242 // CHECK: %[[C0:.*]] = llvm.intr.fmuladd 2243 // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> 2244 %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x1xf32> 2245 2246 // CHECK: %[[D0:.*]] = llvm.intr.fmuladd 2247 // CHECK-SAME: (vector<1xf32>, vector<1xf32>, vector<1xf32>) -> vector<1xf32> 2248 %3 = vector.fma %vec_0d, %vec_0d, %vec_0d : vector<f32> 2249 2250 return %0, %1, %2, %3: vector<8xf32>, vector<2x4xf32>, vector<1x1x1xf32>, vector<f32> 2251} 2252 2253// ----- 2254 2255func.func @fma_scalable(%vec_1d: vector<[8]xf32>, %vec_2d: vector<2x[4]xf32>, %vec_3d: vector<1x1x[1]xf32>, %vec_0d: vector<f32>) -> (vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32>) { 2256 // CHECK-LABEL: @fma_scalable 2257 // CHECK-SAME: %[[VEC_1D:.*]]: vector<[8]xf32> 2258 // CHECK-SAME: %[[VEC_2D:.*]]: vector<2x[4]xf32> 2259 // CHECK-SAME: %[[VEC_3D:.*]]: vector<1x1x[1]xf32> 2260 // CHECK: %[[VEC_2D_CAST:.*]] = builtin.unrealized_conversion_cast %[[VEC_2D]] : vector<2x[4]xf32> to !llvm.array<2 x vector<[4]xf32>> 2261 // CHECK: llvm.intr.fmuladd 2262 // CHECK-SAME: (vector<[8]xf32>, vector<[8]xf32>, vector<[8]xf32>) -> vector<[8]xf32> 2263 %0 = vector.fma %vec_1d, %vec_1d, %vec_1d : vector<[8]xf32> 2264 2265 // CHECK: %[[VEC_2D_00:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> 2266 // CHECK: %[[VEC_2D_01:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> 2267 // CHECK: %[[VEC_2D_02:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][0] : !llvm.array<2 x vector<[4]xf32>> 2268 // CHECK: %[[VEC_2D_ADD_1:.*]] = llvm.intr.fmuladd(%[[VEC_2D_00]], %[[VEC_2D_01]], %[[VEC_2D_02]]) : 2269 // CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32> 2270 // CHECK: llvm.insertvalue %[[VEC_2D_ADD_1]], {{.*}}[0] : !llvm.array<2 x vector<[4]xf32>> 2271 // CHECK: %[[VEC_2D_10:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> 2272 // CHECK: %[[VEC_2D_11:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> 2273 // CHECK: %[[VEC_2D_12:.*]] = llvm.extractvalue %[[VEC_2D_CAST]][1] : !llvm.array<2 x vector<[4]xf32>> 2274 // CHECK: %[[VEC_2D_ADD_2:.*]] = llvm.intr.fmuladd(%[[VEC_2D_10]], %[[VEC_2D_11]], %[[VEC_2D_12]]) : 2275 // CHECK-SAME: (vector<[4]xf32>, vector<[4]xf32>, vector<[4]xf32>) -> vector<[4]xf32> 2276 // CHECK: llvm.insertvalue %[[VEC_2D_ADD_2]], {{.*}}[1] : !llvm.array<2 x vector<[4]xf32>> 2277 %1 = vector.fma %vec_2d, %vec_2d, %vec_2d : vector<2x[4]xf32> 2278 2279 // CHECK: %[[C0:.*]] = llvm.intr.fmuladd 2280 // CHECK-SAME: (vector<[1]xf32>, vector<[1]xf32>, vector<[1]xf32>) -> vector<[1]xf32> 2281 %2 = vector.fma %vec_3d, %vec_3d, %vec_3d : vector<1x1x[1]xf32> 2282 2283 return %0, %1, %2: vector<[8]xf32>, vector<2x[4]xf32>, vector<1x1x[1]xf32> 2284} 2285 2286// ----- 2287 2288//===----------------------------------------------------------------------===// 2289// vector.reduction 2290//===----------------------------------------------------------------------===// 2291 2292func.func @reduce_0d_f32(%arg0: vector<f32>) -> f32 { 2293 %0 = vector.reduction <add>, %arg0 : vector<f32> into f32 2294 return %0 : f32 2295} 2296// CHECK-LABEL: @reduce_0d_f32( 2297// CHECK-SAME: %[[A:.*]]: vector<f32>) 2298// CHECK: %[[CA:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<f32> to vector<1xf32> 2299// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 2300// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[CA]]) 2301// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<1xf32>) -> f32 2302// CHECK: return %[[V]] : f32 2303 2304// ----- 2305 2306func.func @reduce_f16(%arg0: vector<16xf16>) -> f16 { 2307 %0 = vector.reduction <add>, %arg0 : vector<16xf16> into f16 2308 return %0 : f16 2309} 2310// CHECK-LABEL: @reduce_f16( 2311// CHECK-SAME: %[[A:.*]]: vector<16xf16>) 2312// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16 2313// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2314// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f16, vector<16xf16>) -> f16 2315// CHECK: return %[[V]] : f16 2316 2317// ----- 2318 2319func.func @reduce_f16_scalable(%arg0: vector<[16]xf16>) -> f16 { 2320 %0 = vector.reduction <add>, %arg0 : vector<[16]xf16> into f16 2321 return %0 : f16 2322} 2323// CHECK-LABEL: @reduce_f16_scalable( 2324// CHECK-SAME: %[[A:.*]]: vector<[16]xf16>) 2325// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f16) : f16 2326// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2327// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f16, vector<[16]xf16>) -> f16 2328// CHECK: return %[[V]] : f16 2329 2330// ----- 2331 2332func.func @reduce_f32(%arg0: vector<16xf32>) -> f32 { 2333 %0 = vector.reduction <add>, %arg0 : vector<16xf32> into f32 2334 return %0 : f32 2335} 2336// CHECK-LABEL: @reduce_f32( 2337// CHECK-SAME: %[[A:.*]]: vector<16xf32>) 2338// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 2339// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2340// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<16xf32>) -> f32 2341// CHECK: return %[[V]] : f32 2342 2343// ----- 2344 2345func.func @reduce_f32_scalable(%arg0: vector<[16]xf32>) -> f32 { 2346 %0 = vector.reduction <add>, %arg0 : vector<[16]xf32> into f32 2347 return %0 : f32 2348} 2349// CHECK-LABEL: @reduce_f32_scalable( 2350// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>) 2351// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f32) : f32 2352// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2353// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f32, vector<[16]xf32>) -> f32 2354// CHECK: return %[[V]] : f32 2355 2356// ----- 2357 2358func.func @reduce_f64(%arg0: vector<16xf64>) -> f64 { 2359 %0 = vector.reduction <add>, %arg0 : vector<16xf64> into f64 2360 return %0 : f64 2361} 2362// CHECK-LABEL: @reduce_f64( 2363// CHECK-SAME: %[[A:.*]]: vector<16xf64>) 2364// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64 2365// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2366// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f64, vector<16xf64>) -> f64 2367// CHECK: return %[[V]] : f64 2368 2369// ----- 2370 2371func.func @reduce_f64_scalable(%arg0: vector<[16]xf64>) -> f64 { 2372 %0 = vector.reduction <add>, %arg0 : vector<[16]xf64> into f64 2373 return %0 : f64 2374} 2375// CHECK-LABEL: @reduce_f64_scalable( 2376// CHECK-SAME: %[[A:.*]]: vector<[16]xf64>) 2377// CHECK: %[[C:.*]] = llvm.mlir.constant(0.000000e+00 : f64) : f64 2378// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.fadd"(%[[C]], %[[A]]) 2379// CHECK-SAME: <{fastmathFlags = #llvm.fastmath<none>}> : (f64, vector<[16]xf64>) -> f64 2380// CHECK: return %[[V]] : f64 2381 2382// ----- 2383 2384func.func @reduce_i8(%arg0: vector<16xi8>) -> i8 { 2385 %0 = vector.reduction <add>, %arg0 : vector<16xi8> into i8 2386 return %0 : i8 2387} 2388// CHECK-LABEL: @reduce_i8( 2389// CHECK-SAME: %[[A:.*]]: vector<16xi8>) 2390// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2391// CHECK: return %[[V]] : i8 2392 2393// ----- 2394 2395func.func @reduce_i8_scalable(%arg0: vector<[16]xi8>) -> i8 { 2396 %0 = vector.reduction <add>, %arg0 : vector<[16]xi8> into i8 2397 return %0 : i8 2398} 2399// CHECK-LABEL: @reduce_i8_scalable( 2400// CHECK-SAME: %[[A:.*]]: vector<[16]xi8>) 2401// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2402// CHECK: return %[[V]] : i8 2403 2404// ----- 2405 2406func.func @reduce_i32(%arg0: vector<16xi32>) -> i32 { 2407 %0 = vector.reduction <add>, %arg0 : vector<16xi32> into i32 2408 return %0 : i32 2409} 2410// CHECK-LABEL: @reduce_i32( 2411// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2412// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2413// CHECK: return %[[V]] : i32 2414 2415// ----- 2416 2417func.func @reduce_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2418 %0 = vector.reduction <add>, %arg0 : vector<[16]xi32> into i32 2419 return %0 : i32 2420} 2421// CHECK-LABEL: @reduce_i32_scalable( 2422// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2423// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2424// CHECK: return %[[V]] : i32 2425 2426// ----- 2427 2428func.func @reduce_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2429 %0 = vector.reduction <add>, %arg0, %arg1 : vector<16xi32> into i32 2430 return %0 : i32 2431} 2432// CHECK-LABEL: @reduce_acc_i32( 2433// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2434// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2435// CHECK: %[[V:.*]] = llvm.add %[[ACC]], %[[R]] 2436// CHECK: return %[[V]] : i32 2437 2438// ----- 2439 2440func.func @reduce_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2441 %0 = vector.reduction <add>, %arg0, %arg1 : vector<[16]xi32> into i32 2442 return %0 : i32 2443} 2444// CHECK-LABEL: @reduce_acc_i32_scalable( 2445// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2446// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2447// CHECK: %[[V:.*]] = llvm.add %[[ACC]], %[[R]] 2448// CHECK: return %[[V]] : i32 2449 2450// ----- 2451 2452func.func @reduce_mul_i32(%arg0: vector<16xi32>) -> i32 { 2453 %0 = vector.reduction <mul>, %arg0 : vector<16xi32> into i32 2454 return %0 : i32 2455} 2456// CHECK-LABEL: @reduce_mul_i32( 2457// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2458// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]]) 2459// CHECK: return %[[V]] : i32 2460 2461// ----- 2462 2463func.func @reduce_mul_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2464 %0 = vector.reduction <mul>, %arg0 : vector<[16]xi32> into i32 2465 return %0 : i32 2466} 2467// CHECK-LABEL: @reduce_mul_i32_scalable( 2468// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2469// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]]) 2470// CHECK: return %[[V]] : i32 2471 2472// ----- 2473 2474func.func @reduce_mul_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2475 %0 = vector.reduction <mul>, %arg0, %arg1 : vector<16xi32> into i32 2476 return %0 : i32 2477} 2478// CHECK-LABEL: @reduce_mul_acc_i32( 2479// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2480// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]]) 2481// CHECK: %[[V:.*]] = llvm.mul %[[ACC]], %[[R]] 2482// CHECK: return %[[V]] : i32 2483 2484// ----- 2485 2486func.func @reduce_mul_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2487 %0 = vector.reduction <mul>, %arg0, %arg1 : vector<[16]xi32> into i32 2488 return %0 : i32 2489} 2490// CHECK-LABEL: @reduce_mul_acc_i32_scalable( 2491// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2492// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.mul"(%[[A]]) 2493// CHECK: %[[V:.*]] = llvm.mul %[[ACC]], %[[R]] 2494// CHECK: return %[[V]] : i32 2495 2496// ----- 2497 2498func.func @reduce_fmaximum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { 2499 %0 = vector.reduction <maximumf>, %arg0, %arg1 : vector<16xf32> into f32 2500 return %0 : f32 2501} 2502// CHECK-LABEL: @reduce_fmaximum_f32( 2503// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32) 2504// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmaximum(%[[A]]) : (vector<16xf32>) -> f32 2505// CHECK: %[[R:.*]] = llvm.intr.maximum(%[[V]], %[[B]]) : (f32, f32) -> f32 2506// CHECK: return %[[R]] : f32 2507 2508// ----- 2509 2510func.func @reduce_fmaximum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { 2511 %0 = vector.reduction <maximumf>, %arg0, %arg1 : vector<[16]xf32> into f32 2512 return %0 : f32 2513} 2514// CHECK-LABEL: @reduce_fmaximum_f32_scalable( 2515// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32) 2516// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmaximum(%[[A]]) : (vector<[16]xf32>) -> f32 2517// CHECK: %[[R:.*]] = llvm.intr.maximum(%[[V]], %[[B]]) : (f32, f32) -> f32 2518// CHECK: return %[[R]] : f32 2519 2520// ----- 2521 2522func.func @reduce_fminimum_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { 2523 %0 = vector.reduction <minimumf>, %arg0, %arg1 : vector<16xf32> into f32 2524 return %0 : f32 2525} 2526// CHECK-LABEL: @reduce_fminimum_f32( 2527// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32) 2528// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fminimum(%[[A]]) : (vector<16xf32>) -> f32 2529// CHECK: %[[R:.*]] = llvm.intr.minimum(%[[V]], %[[B]]) : (f32, f32) -> f32 2530// CHECK: return %[[R]] : f32 2531 2532// ----- 2533 2534func.func @reduce_fminimum_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { 2535 %0 = vector.reduction <minimumf>, %arg0, %arg1 : vector<[16]xf32> into f32 2536 return %0 : f32 2537} 2538// CHECK-LABEL: @reduce_fminimum_f32_scalable( 2539// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32) 2540// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fminimum(%[[A]]) : (vector<[16]xf32>) -> f32 2541// CHECK: %[[R:.*]] = llvm.intr.minimum(%[[V]], %[[B]]) : (f32, f32) -> f32 2542// CHECK: return %[[R]] : f32 2543 2544// ----- 2545 2546func.func @reduce_fmax_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { 2547 %0 = vector.reduction <maxnumf>, %arg0, %arg1 : vector<16xf32> into f32 2548 return %0 : f32 2549} 2550// CHECK-LABEL: @reduce_fmax_f32( 2551// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32) 2552// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmax(%[[A]]) : (vector<16xf32>) -> f32 2553// CHECK: %[[R:.*]] = llvm.intr.maxnum(%[[V]], %[[B]]) : (f32, f32) -> f32 2554// CHECK: return %[[R]] : f32 2555 2556// ----- 2557 2558func.func @reduce_fmax_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { 2559 %0 = vector.reduction <maxnumf>, %arg0, %arg1 : vector<[16]xf32> into f32 2560 return %0 : f32 2561} 2562// CHECK-LABEL: @reduce_fmax_f32_scalable( 2563// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32) 2564// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmax(%[[A]]) : (vector<[16]xf32>) -> f32 2565// CHECK: %[[R:.*]] = llvm.intr.maxnum(%[[V]], %[[B]]) : (f32, f32) -> f32 2566// CHECK: return %[[R]] : f32 2567 2568// ----- 2569 2570func.func @reduce_fmin_f32(%arg0: vector<16xf32>, %arg1: f32) -> f32 { 2571 %0 = vector.reduction <minnumf>, %arg0, %arg1 : vector<16xf32> into f32 2572 return %0 : f32 2573} 2574// CHECK-LABEL: @reduce_fmin_f32( 2575// CHECK-SAME: %[[A:.*]]: vector<16xf32>, %[[B:.*]]: f32) 2576// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmin(%[[A]]) : (vector<16xf32>) -> f32 2577// CHECK: %[[R:.*]] = llvm.intr.minnum(%[[V]], %[[B]]) : (f32, f32) -> f32 2578// CHECK: return %[[R]] : f32 2579 2580// ----- 2581 2582func.func @reduce_fmin_f32_scalable(%arg0: vector<[16]xf32>, %arg1: f32) -> f32 { 2583 %0 = vector.reduction <minnumf>, %arg0, %arg1 : vector<[16]xf32> into f32 2584 return %0 : f32 2585} 2586// CHECK-LABEL: @reduce_fmin_f32_scalable( 2587// CHECK-SAME: %[[A:.*]]: vector<[16]xf32>, %[[B:.*]]: f32) 2588// CHECK: %[[V:.*]] = llvm.intr.vector.reduce.fmin(%[[A]]) : (vector<[16]xf32>) -> f32 2589// CHECK: %[[R:.*]] = llvm.intr.minnum(%[[V]], %[[B]]) : (f32, f32) -> f32 2590// CHECK: return %[[R]] : f32 2591 2592// ----- 2593 2594func.func @reduce_minui_i32(%arg0: vector<16xi32>) -> i32 { 2595 %0 = vector.reduction <minui>, %arg0 : vector<16xi32> into i32 2596 return %0 : i32 2597} 2598// CHECK-LABEL: @reduce_minui_i32( 2599// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2600// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]]) 2601// CHECK: return %[[V]] : i32 2602 2603// ----- 2604 2605func.func @reduce_minui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2606 %0 = vector.reduction <minui>, %arg0 : vector<[16]xi32> into i32 2607 return %0 : i32 2608} 2609// CHECK-LABEL: @reduce_minui_i32_scalable( 2610// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2611// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]]) 2612// CHECK: return %[[V]] : i32 2613 2614// ----- 2615 2616func.func @reduce_minui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2617 %0 = vector.reduction <minui>, %arg0, %arg1 : vector<16xi32> into i32 2618 return %0 : i32 2619} 2620// CHECK-LABEL: @reduce_minui_acc_i32( 2621// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2622// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]]) 2623// CHECK: %[[S:.*]] = llvm.icmp "ule" %[[ACC]], %[[R]] 2624// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2625// CHECK: return %[[V]] : i32 2626 2627// ----- 2628 2629func.func @reduce_minui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2630 %0 = vector.reduction <minui>, %arg0, %arg1 : vector<[16]xi32> into i32 2631 return %0 : i32 2632} 2633// CHECK-LABEL: @reduce_minui_acc_i32_scalable( 2634// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2635// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umin"(%[[A]]) 2636// CHECK: %[[S:.*]] = llvm.icmp "ule" %[[ACC]], %[[R]] 2637// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2638// CHECK: return %[[V]] : i32 2639 2640// ----- 2641 2642func.func @reduce_maxui_i32(%arg0: vector<16xi32>) -> i32 { 2643 %0 = vector.reduction <maxui>, %arg0 : vector<16xi32> into i32 2644 return %0 : i32 2645} 2646// CHECK-LABEL: @reduce_maxui_i32( 2647// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2648// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]]) 2649// CHECK: return %[[V]] : i32 2650 2651// ----- 2652 2653func.func @reduce_maxui_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2654 %0 = vector.reduction <maxui>, %arg0 : vector<[16]xi32> into i32 2655 return %0 : i32 2656} 2657// CHECK-LABEL: @reduce_maxui_i32_scalable( 2658// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2659// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]]) 2660// CHECK: return %[[V]] : i32 2661 2662// ----- 2663 2664func.func @reduce_maxui_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2665 %0 = vector.reduction <maxui>, %arg0, %arg1 : vector<16xi32> into i32 2666 return %0 : i32 2667} 2668// CHECK-LABEL: @reduce_maxui_acc_i32( 2669// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2670// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]]) 2671// CHECK: %[[S:.*]] = llvm.icmp "uge" %[[ACC]], %[[R]] 2672// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2673// CHECK: return %[[V]] : i32 2674 2675// ----- 2676 2677func.func @reduce_maxui_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2678 %0 = vector.reduction <maxui>, %arg0, %arg1 : vector<[16]xi32> into i32 2679 return %0 : i32 2680} 2681// CHECK-LABEL: @reduce_maxui_acc_i32_scalable( 2682// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2683// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.umax"(%[[A]]) 2684// CHECK: %[[S:.*]] = llvm.icmp "uge" %[[ACC]], %[[R]] 2685// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2686// CHECK: return %[[V]] : i32 2687 2688// ----- 2689 2690func.func @reduce_minsi_i32(%arg0: vector<16xi32>) -> i32 { 2691 %0 = vector.reduction <minsi>, %arg0 : vector<16xi32> into i32 2692 return %0 : i32 2693} 2694// CHECK-LABEL: @reduce_minsi_i32( 2695// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2696// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]]) 2697// CHECK: return %[[V]] : i32 2698 2699// ----- 2700 2701func.func @reduce_minsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2702 %0 = vector.reduction <minsi>, %arg0 : vector<[16]xi32> into i32 2703 return %0 : i32 2704} 2705// CHECK-LABEL: @reduce_minsi_i32_scalable( 2706// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2707// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]]) 2708// CHECK: return %[[V]] : i32 2709 2710// ----- 2711 2712func.func @reduce_minsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2713 %0 = vector.reduction <minsi>, %arg0, %arg1 : vector<16xi32> into i32 2714 return %0 : i32 2715} 2716// CHECK-LABEL: @reduce_minsi_acc_i32( 2717// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2718// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]]) 2719// CHECK: %[[S:.*]] = llvm.icmp "sle" %[[ACC]], %[[R]] 2720// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2721// CHECK: return %[[V]] : i32 2722 2723// ----- 2724 2725func.func @reduce_minsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2726 %0 = vector.reduction <minsi>, %arg0, %arg1 : vector<[16]xi32> into i32 2727 return %0 : i32 2728} 2729// CHECK-LABEL: @reduce_minsi_acc_i32_scalable( 2730// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2731// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smin"(%[[A]]) 2732// CHECK: %[[S:.*]] = llvm.icmp "sle" %[[ACC]], %[[R]] 2733// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2734// CHECK: return %[[V]] : i32 2735 2736// ----- 2737 2738func.func @reduce_maxsi_i32(%arg0: vector<16xi32>) -> i32 { 2739 %0 = vector.reduction <maxsi>, %arg0 : vector<16xi32> into i32 2740 return %0 : i32 2741} 2742// CHECK-LABEL: @reduce_maxsi_i32( 2743// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2744// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]]) 2745// CHECK: return %[[V]] : i32 2746 2747// ----- 2748 2749func.func @reduce_maxsi_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2750 %0 = vector.reduction <maxsi>, %arg0 : vector<[16]xi32> into i32 2751 return %0 : i32 2752} 2753// CHECK-LABEL: @reduce_maxsi_i32_scalable( 2754// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2755// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]]) 2756// CHECK: return %[[V]] : i32 2757 2758// ----- 2759 2760func.func @reduce_maxsi_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2761 %0 = vector.reduction <maxsi>, %arg0, %arg1 : vector<16xi32> into i32 2762 return %0 : i32 2763} 2764// CHECK-LABEL: @reduce_maxsi_acc_i32( 2765// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2766// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]]) 2767// CHECK: %[[S:.*]] = llvm.icmp "sge" %[[ACC]], %[[R]] 2768// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2769// CHECK: return %[[V]] : i32 2770 2771// ----- 2772 2773func.func @reduce_maxsi_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2774 %0 = vector.reduction <maxsi>, %arg0, %arg1 : vector<[16]xi32> into i32 2775 return %0 : i32 2776} 2777// CHECK-LABEL: @reduce_maxsi_acc_i32_scalable( 2778// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2779// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.smax"(%[[A]]) 2780// CHECK: %[[S:.*]] = llvm.icmp "sge" %[[ACC]], %[[R]] 2781// CHECK: %[[V:.*]] = llvm.select %[[S]], %[[ACC]], %[[R]] 2782// CHECK: return %[[V]] : i32 2783 2784// ----- 2785 2786func.func @reduce_and_i32(%arg0: vector<16xi32>) -> i32 { 2787 %0 = vector.reduction <and>, %arg0 : vector<16xi32> into i32 2788 return %0 : i32 2789} 2790// CHECK-LABEL: @reduce_and_i32( 2791// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2792// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.and"(%[[A]]) 2793// CHECK: return %[[V]] : i32 2794 2795// ----- 2796 2797func.func @reduce_and_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2798 %0 = vector.reduction <and>, %arg0 : vector<[16]xi32> into i32 2799 return %0 : i32 2800} 2801// CHECK-LABEL: @reduce_and_i32_scalable( 2802// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2803// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.and"(%[[A]]) 2804// CHECK: return %[[V]] : i32 2805 2806// ----- 2807 2808func.func @reduce_and_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2809 %0 = vector.reduction <and>, %arg0, %arg1 : vector<16xi32> into i32 2810 return %0 : i32 2811} 2812// CHECK-LABEL: @reduce_and_acc_i32( 2813// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2814// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.and"(%[[A]]) 2815// CHECK: %[[V:.*]] = llvm.and %[[ACC]], %[[R]] 2816// CHECK: return %[[V]] : i32 2817 2818// ----- 2819 2820func.func @reduce_and_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2821 %0 = vector.reduction <and>, %arg0, %arg1 : vector<[16]xi32> into i32 2822 return %0 : i32 2823} 2824// CHECK-LABEL: @reduce_and_acc_i32_scalable( 2825// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2826// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.and"(%[[A]]) 2827// CHECK: %[[V:.*]] = llvm.and %[[ACC]], %[[R]] 2828// CHECK: return %[[V]] : i32 2829 2830// ----- 2831 2832func.func @reduce_or_i32(%arg0: vector<16xi32>) -> i32 { 2833 %0 = vector.reduction <or>, %arg0 : vector<16xi32> into i32 2834 return %0 : i32 2835} 2836// CHECK-LABEL: @reduce_or_i32( 2837// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2838// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.or"(%[[A]]) 2839// CHECK: return %[[V]] : i32 2840 2841// ----- 2842 2843func.func @reduce_or_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2844 %0 = vector.reduction <or>, %arg0 : vector<[16]xi32> into i32 2845 return %0 : i32 2846} 2847// CHECK-LABEL: @reduce_or_i32_scalable( 2848// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2849// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.or"(%[[A]]) 2850// CHECK: return %[[V]] : i32 2851 2852// ----- 2853 2854func.func @reduce_or_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2855 %0 = vector.reduction <or>, %arg0, %arg1 : vector<16xi32> into i32 2856 return %0 : i32 2857} 2858// CHECK-LABEL: @reduce_or_acc_i32( 2859// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2860// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.or"(%[[A]]) 2861// CHECK: %[[V:.*]] = llvm.or %[[ACC]], %[[R]] 2862// CHECK: return %[[V]] : i32 2863 2864// ----- 2865 2866func.func @reduce_or_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2867 %0 = vector.reduction <or>, %arg0, %arg1 : vector<[16]xi32> into i32 2868 return %0 : i32 2869} 2870// CHECK-LABEL: @reduce_or_acc_i32_scalable( 2871// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2872// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.or"(%[[A]]) 2873// CHECK: %[[V:.*]] = llvm.or %[[ACC]], %[[R]] 2874// CHECK: return %[[V]] : i32 2875 2876// ----- 2877 2878func.func @reduce_xor_i32(%arg0: vector<16xi32>) -> i32 { 2879 %0 = vector.reduction <xor>, %arg0 : vector<16xi32> into i32 2880 return %0 : i32 2881} 2882// CHECK-LABEL: @reduce_xor_i32( 2883// CHECK-SAME: %[[A:.*]]: vector<16xi32>) 2884// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]]) 2885// CHECK: return %[[V]] : i32 2886 2887// ----- 2888 2889func.func @reduce_xor_i32_scalable(%arg0: vector<[16]xi32>) -> i32 { 2890 %0 = vector.reduction <xor>, %arg0 : vector<[16]xi32> into i32 2891 return %0 : i32 2892} 2893// CHECK-LABEL: @reduce_xor_i32_scalable( 2894// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>) 2895// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]]) 2896// CHECK: return %[[V]] : i32 2897 2898// ----- 2899 2900func.func @reduce_xor_acc_i32(%arg0: vector<16xi32>, %arg1 : i32) -> i32 { 2901 %0 = vector.reduction <xor>, %arg0, %arg1 : vector<16xi32> into i32 2902 return %0 : i32 2903} 2904// CHECK-LABEL: @reduce_xor_acc_i32( 2905// CHECK-SAME: %[[A:.*]]: vector<16xi32>, %[[ACC:.*]]: i32) 2906// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]]) 2907// CHECK: %[[V:.*]] = llvm.xor %[[ACC]], %[[R]] 2908// CHECK: return %[[V]] : i32 2909 2910// ----- 2911 2912func.func @reduce_xor_acc_i32_scalable(%arg0: vector<[16]xi32>, %arg1 : i32) -> i32 { 2913 %0 = vector.reduction <xor>, %arg0, %arg1 : vector<[16]xi32> into i32 2914 return %0 : i32 2915} 2916// CHECK-LABEL: @reduce_xor_acc_i32_scalable( 2917// CHECK-SAME: %[[A:.*]]: vector<[16]xi32>, %[[ACC:.*]]: i32) 2918// CHECK: %[[R:.*]] = "llvm.intr.vector.reduce.xor"(%[[A]]) 2919// CHECK: %[[V:.*]] = llvm.xor %[[ACC]], %[[R]] 2920// CHECK: return %[[V]] : i32 2921 2922// ----- 2923 2924func.func @reduce_i64(%arg0: vector<16xi64>) -> i64 { 2925 %0 = vector.reduction <add>, %arg0 : vector<16xi64> into i64 2926 return %0 : i64 2927} 2928// CHECK-LABEL: @reduce_i64( 2929// CHECK-SAME: %[[A:.*]]: vector<16xi64>) 2930// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2931// CHECK: return %[[V]] : i64 2932 2933// ----- 2934 2935func.func @reduce_i64_scalable(%arg0: vector<[16]xi64>) -> i64 { 2936 %0 = vector.reduction <add>, %arg0 : vector<[16]xi64> into i64 2937 return %0 : i64 2938} 2939// CHECK-LABEL: @reduce_i64_scalable( 2940// CHECK-SAME: %[[A:.*]]: vector<[16]xi64>) 2941// CHECK: %[[V:.*]] = "llvm.intr.vector.reduce.add"(%[[A]]) 2942// CHECK: return %[[V]] : i64 2943 2944// ----- 2945 2946func.func @reduce_index(%arg0: vector<16xindex>) -> index { 2947 %0 = vector.reduction <add>, %arg0 : vector<16xindex> into index 2948 return %0 : index 2949} 2950// CHECK-LABEL: @reduce_index( 2951// CHECK-SAME: %[[A:.*]]: vector<16xindex>) 2952// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64> 2953// CHECK: %[[T1:.*]] = "llvm.intr.vector.reduce.add"(%[[T0]]) 2954// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index 2955// CHECK: return %[[T2]] : index 2956 2957// ----- 2958 2959func.func @reduce_index_scalable(%arg0: vector<[16]xindex>) -> index { 2960 %0 = vector.reduction <add>, %arg0 : vector<[16]xindex> into index 2961 return %0 : index 2962} 2963// CHECK-LABEL: @reduce_index_scalable( 2964// CHECK-SAME: %[[A:.*]]: vector<[16]xindex>) 2965// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<[16]xindex> to vector<[16]xi64> 2966// CHECK: %[[T1:.*]] = "llvm.intr.vector.reduce.add"(%[[T0]]) 2967// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : i64 to index 2968// CHECK: return %[[T2]] : index 2969 2970// ----- 2971 2972//===----------------------------------------------------------------------===// 2973// vector.matrix_multiply 2974//===----------------------------------------------------------------------===// 2975 2976// 4x16 16x3 4x3 2977func.func @matrix_ops(%A: vector<64xf64>, %B: vector<48xf64>) -> vector<12xf64> { 2978 %C = vector.matrix_multiply %A, %B 2979 { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } : 2980 (vector<64xf64>, vector<48xf64>) -> vector<12xf64> 2981 return %C: vector<12xf64> 2982} 2983// CHECK-LABEL: @matrix_ops 2984// CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} { 2985// CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32 2986// CHECK-SAME: } : (vector<64xf64>, vector<48xf64>) -> vector<12xf64> 2987 2988// ----- 2989 2990func.func @matrix_ops_index(%A: vector<64xindex>, %B: vector<48xindex>) -> vector<12xindex> { 2991 %C = vector.matrix_multiply %A, %B 2992 { lhs_rows = 4: i32, lhs_columns = 16: i32 , rhs_columns = 3: i32 } : 2993 (vector<64xindex>, vector<48xindex>) -> vector<12xindex> 2994 return %C: vector<12xindex> 2995} 2996// CHECK-LABEL: @matrix_ops_index 2997// CHECK: llvm.intr.matrix.multiply %{{.*}}, %{{.*}} { 2998// CHECK-SAME: lhs_columns = 16 : i32, lhs_rows = 4 : i32, rhs_columns = 3 : i32 2999// CHECK-SAME: } : (vector<64xi64>, vector<48xi64>) -> vector<12xi64> 3000 3001// ----- 3002 3003//===----------------------------------------------------------------------===// 3004// vector.constant_mask 3005//===----------------------------------------------------------------------===// 3006 3007func.func @constant_mask_0d_f() -> vector<i1> { 3008 %0 = vector.constant_mask [0] : vector<i1> 3009 return %0 : vector<i1> 3010} 3011// CHECK-LABEL: func @constant_mask_0d_f 3012// CHECK: %[[VAL_0:.*]] = arith.constant dense<false> : vector<i1> 3013// CHECK: return %[[VAL_0]] : vector<i1> 3014 3015// ----- 3016 3017func.func @constant_mask_0d_t() -> vector<i1> { 3018 %0 = vector.constant_mask [1] : vector<i1> 3019 return %0 : vector<i1> 3020} 3021// CHECK-LABEL: func @constant_mask_0d_t 3022// CHECK: %[[VAL_0:.*]] = arith.constant dense<true> : vector<i1> 3023// CHECK: return %[[VAL_0]] : vector<i1> 3024 3025// ----- 3026 3027func.func @constant_mask_1d() -> vector<8xi1> { 3028 %0 = vector.constant_mask [4] : vector<8xi1> 3029 return %0 : vector<8xi1> 3030} 3031// CHECK-LABEL: func @constant_mask_1d 3032// CHECK: %[[VAL_0:.*]] = arith.constant dense<[true, true, true, true, false, false, false, false]> : vector<8xi1> 3033// CHECK: return %[[VAL_0]] : vector<8xi1> 3034 3035// ----- 3036 3037func.func @constant_mask_1d_scalable_all_false() -> vector<[8]xi1> { 3038 %0 = vector.constant_mask [0] : vector<[8]xi1> 3039 return %0 : vector<[8]xi1> 3040} 3041// CHECK-LABEL: func @constant_mask_1d_scalable_all_false 3042// CHECK: %[[VAL_0:.*]] = arith.constant dense<false> : vector<[8]xi1> 3043// CHECK: return %[[VAL_0]] : vector<[8]xi1> 3044 3045// ----- 3046 3047func.func @constant_mask_1d_scalable_all_true() -> vector<[8]xi1> { 3048 %0 = vector.constant_mask [8] : vector<[8]xi1> 3049 return %0 : vector<[8]xi1> 3050} 3051// CHECK-LABEL: func @constant_mask_1d_scalable_all_true 3052// CHECK: %[[VAL_0:.*]] = arith.constant dense<true> : vector<[8]xi1> 3053// CHECK: return %[[VAL_0]] : vector<[8]xi1> 3054 3055// ----- 3056 3057func.func @constant_mask_2d() -> vector<4x4xi1> { 3058 %v = vector.constant_mask [2, 2] : vector<4x4xi1> 3059 return %v: vector<4x4xi1> 3060} 3061 3062// CHECK-LABEL: func @constant_mask_2d 3063// CHECK: %[[VAL_0:.*]] = arith.constant dense<[true, true, false, false]> : vector<4xi1> 3064// CHECK: %[[VAL_1:.*]] = arith.constant dense<false> : vector<4x4xi1> 3065// CHECK: %[[VAL_2:.*]] = builtin.unrealized_conversion_cast %[[VAL_1]] : vector<4x4xi1> to !llvm.array<4 x vector<4xi1>> 3066// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<4xi1>> 3067// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<4xi1>> 3068// CHECK: %[[VAL_5:.*]] = builtin.unrealized_conversion_cast %[[VAL_4]] : !llvm.array<4 x vector<4xi1>> to vector<4x4xi1> 3069// CHECK: return %[[VAL_5]] : vector<4x4xi1> 3070 3071// ----- 3072 3073func.func @constant_mask_2d_trailing_scalable() -> vector<4x[4]xi1> { 3074 %0 = vector.constant_mask [2, 4] : vector<4x[4]xi1> 3075 return %0 : vector<4x[4]xi1> 3076} 3077// CHECK-LABEL: func.func @constant_mask_2d_trailing_scalable 3078// CHECK: %[[VAL_0:.*]] = arith.constant dense<true> : vector<[4]xi1> 3079// CHECK: %[[VAL_1:.*]] = arith.constant dense<false> : vector<4x[4]xi1> 3080// CHECK: %[[VAL_2:.*]] = builtin.unrealized_conversion_cast %[[VAL_1]] : vector<4x[4]xi1> to !llvm.array<4 x vector<[4]xi1>> 3081// CHECK: %[[VAL_3:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_2]][0] : !llvm.array<4 x vector<[4]xi1>> 3082// CHECK: %[[VAL_4:.*]] = llvm.insertvalue %[[VAL_0]], %[[VAL_3]][1] : !llvm.array<4 x vector<[4]xi1>> 3083// CHECK: %[[VAL_5:.*]] = builtin.unrealized_conversion_cast %[[VAL_4]] : !llvm.array<4 x vector<[4]xi1>> to vector<4x[4]xi1> 3084// CHECK: return %[[VAL_5]] : vector<4x[4]xi1> 3085 3086// ----- 3087 3088/// Currently, this is not supported as generating the mask would require 3089/// unrolling the leading scalable dimension at compile time. 3090func.func @negative_constant_mask_2d_leading_scalable() -> vector<[4]x4xi1> { 3091 %0 = vector.constant_mask [4, 2] : vector<[4]x4xi1> 3092 return %0 : vector<[4]x4xi1> 3093} 3094// CHECK-LABEL: func.func @negative_constant_mask_2d_leading_scalable 3095// CHECK: %[[VAL_0:.*]] = vector.constant_mask [4, 2] : vector<[4]x4xi1> 3096// CHECK: return %[[VAL_0]] : vector<[4]x4xi1> 3097 3098// ----- 3099 3100//===----------------------------------------------------------------------===// 3101// vector.create_mask 3102//===----------------------------------------------------------------------===// 3103 3104func.func @create_mask_0d(%num_elems : index) -> vector<i1> { 3105 %v = vector.create_mask %num_elems : vector<i1> 3106 return %v: vector<i1> 3107} 3108 3109// CHECK-LABEL: func @create_mask_0d 3110// CHECK-SAME: %[[NUM_ELEMS:.*]]: index 3111// CHECK: %[[INDICES:.*]] = arith.constant dense<0> : vector<i32> 3112// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 3113// CHECK: %[[BOUNDS:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] 3114// CHECK: %[[BOUNDS_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOUNDS]] : vector<1xi32> to vector<i32> 3115// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS_CAST]], %[[INDICES]] : vector<i32> 3116// CHECK: return %[[RESULT]] : vector<i1> 3117 3118// ----- 3119 3120func.func @create_mask_1d(%num_elems : index) -> vector<4xi1> { 3121 %v = vector.create_mask %num_elems : vector<4xi1> 3122 return %v: vector<4xi1> 3123} 3124 3125// CHECK-LABEL: func @create_mask_1d 3126// CHECK-SAME: %[[NUM_ELEMS:.*]]: index 3127// CHECK: %[[INDICES:.*]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> 3128// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 3129// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]] 3130// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]] 3131// CHECK: %[[RESULT:.*]] = arith.cmpi sgt, %[[BOUNDS]], %[[INDICES]] : vector<4xi32> 3132// CHECK: return %[[RESULT]] : vector<4xi1> 3133 3134// ----- 3135 3136func.func @create_mask_1d_scalable(%num_elems : index) -> vector<[4]xi1> { 3137 %v = vector.create_mask %num_elems : vector<[4]xi1> 3138 return %v: vector<[4]xi1> 3139} 3140 3141// CHECK-LABEL: func @create_mask_1d_scalable 3142// CHECK-SAME: %[[NUM_ELEMS:.*]]: index 3143// CHECK: %[[INDICES:.*]] = llvm.intr.stepvector : vector<[4]xi32> 3144// CHECK: %[[NUM_ELEMS_i32:.*]] = arith.index_cast %[[NUM_ELEMS]] : index to i32 3145// CHECK: %[[BOUNDS_INSERT:.*]] = llvm.insertelement %[[NUM_ELEMS_i32]], {{.*}} : vector<[4]xi32> 3146// CHECK: %[[BOUNDS:.*]] = llvm.shufflevector %[[BOUNDS_INSERT]], {{.*}} : vector<[4]xi32> 3147// CHECK: %[[RESULT:.*]] = arith.cmpi slt, %[[INDICES]], %[[BOUNDS]] : vector<[4]xi32> 3148// CHECK: return %[[RESULT]] : vector<[4]xi1> 3149 3150// ----- 3151 3152//===----------------------------------------------------------------------===// 3153// vector.transpose 3154//===----------------------------------------------------------------------===// 3155 3156func.func @transpose_0d(%arg0: vector<f32>) -> vector<f32> { 3157 %0 = vector.transpose %arg0, [] : vector<f32> to vector<f32> 3158 return %0 : vector<f32> 3159} 3160 3161// CHECK-LABEL: func @transpose_0d 3162// CHECK-SAME: %[[A:.*]]: vector<f32> 3163// CHECK: return %[[A]] : vector<f32> 3164 3165// ----- 3166 3167//===----------------------------------------------------------------------===// 3168// vector.flat_transpose 3169//===----------------------------------------------------------------------===// 3170 3171func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { 3172 %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } 3173 : vector<16xf32> -> vector<16xf32> 3174 return %0 : vector<16xf32> 3175} 3176 3177// CHECK-LABEL: func @flat_transpose 3178// CHECK-SAME: %[[A:.*]]: vector<16xf32> 3179// CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] 3180// CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : 3181// CHECK-SAME: vector<16xf32> into vector<16xf32> 3182// CHECK: return %[[T]] : vector<16xf32> 3183 3184// ----- 3185 3186func.func @flat_transpose_index(%arg0: vector<16xindex>) -> vector<16xindex> { 3187 %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } 3188 : vector<16xindex> -> vector<16xindex> 3189 return %0 : vector<16xindex> 3190} 3191// CHECK-LABEL: func @flat_transpose_index 3192// CHECK-SAME: %[[A:.*]]: vector<16xindex> 3193// CHECK: %[[T0:.*]] = builtin.unrealized_conversion_cast %[[A]] : vector<16xindex> to vector<16xi64> 3194// CHECK: %[[T1:.*]] = llvm.intr.matrix.transpose %[[T0]] 3195// CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : 3196// CHECK-SAME: vector<16xi64> into vector<16xi64> 3197// CHECK: %[[T2:.*]] = builtin.unrealized_conversion_cast %[[T1]] : vector<16xi64> to vector<16xindex> 3198// CHECK: return %[[T2]] : vector<16xindex> 3199 3200// ----- 3201 3202func.func @flat_transpose(%arg0: vector<16xf32>) -> vector<16xf32> { 3203 %0 = vector.flat_transpose %arg0 { rows = 4: i32, columns = 4: i32 } 3204 : vector<16xf32> -> vector<16xf32> 3205 return %0 : vector<16xf32> 3206} 3207 3208// CHECK-LABEL: func @flat_transpose 3209// CHECK-SAME: %[[A:.*]]: vector<16xf32> 3210// CHECK: %[[T:.*]] = llvm.intr.matrix.transpose %[[A]] 3211// CHECK-SAME: {columns = 4 : i32, rows = 4 : i32} : 3212// CHECK-SAME: vector<16xf32> into vector<16xf32> 3213// CHECK: return %[[T]] : vector<16xf32> 3214 3215// ----- 3216 3217//===----------------------------------------------------------------------===// 3218// vector.load 3219//===----------------------------------------------------------------------===// 3220 3221func.func @load(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { 3222 %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<8xf32> 3223 return %0 : vector<8xf32> 3224} 3225 3226// CHECK-LABEL: func @load 3227// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3228// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3229// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3230// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3231// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<8xf32> 3232 3233// ----- 3234 3235func.func @load_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { 3236 %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<[8]xf32> 3237 return %0 : vector<[8]xf32> 3238} 3239 3240// CHECK-LABEL: func @load_scalable 3241// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3242// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3243// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3244// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3245// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64} : !llvm.ptr -> vector<[8]xf32> 3246 3247// ----- 3248 3249func.func @load_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<8xf32> { 3250 %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<8xf32> 3251 return %0 : vector<8xf32> 3252} 3253 3254// CHECK-LABEL: func @load_nontemporal 3255// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3256// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3257// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3258// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3259// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<8xf32> 3260 3261// ----- 3262 3263func.func @load_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<[8]xf32> { 3264 %0 = vector.load %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[8]xf32> 3265 return %0 : vector<[8]xf32> 3266} 3267 3268// CHECK-LABEL: func @load_nontemporal_scalable 3269// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3270// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3271// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3272// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3273// CHECK: llvm.load %[[GEP]] {alignment = 4 : i64, nontemporal} : !llvm.ptr -> vector<[8]xf32> 3274 3275// ----- 3276 3277func.func @load_index(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<8xindex> { 3278 %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<8xindex> 3279 return %0 : vector<8xindex> 3280} 3281// CHECK-LABEL: func @load_index 3282// CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<8xi64> 3283// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<8xi64> to vector<8xindex> 3284// CHECK: return %[[T1]] : vector<8xindex> 3285 3286// ----- 3287 3288func.func @load_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) -> vector<[8]xindex> { 3289 %0 = vector.load %memref[%i, %j] : memref<200x100xindex>, vector<[8]xindex> 3290 return %0 : vector<[8]xindex> 3291} 3292// CHECK-LABEL: func @load_index_scalable 3293// CHECK: %[[T0:.*]] = llvm.load %{{.*}} {alignment = 8 : i64} : !llvm.ptr -> vector<[8]xi64> 3294// CHECK: %[[T1:.*]] = builtin.unrealized_conversion_cast %[[T0]] : vector<[8]xi64> to vector<[8]xindex> 3295// CHECK: return %[[T1]] : vector<[8]xindex> 3296 3297// ----- 3298 3299func.func @load_0d(%memref : memref<200x100xf32>, %i : index, %j : index) -> vector<f32> { 3300 %0 = vector.load %memref[%i, %j] : memref<200x100xf32>, vector<f32> 3301 return %0 : vector<f32> 3302} 3303 3304// CHECK-LABEL: func @load_0d 3305// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64 3306// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64 3307// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 3308// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 3309// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3310// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64 3311// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64 3312// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3313// CHECK: %[[LOAD:.*]] = llvm.load %[[ADDR]] {alignment = 4 : i64} : !llvm.ptr -> vector<1xf32> 3314// CHECK: %[[RES:.*]] = builtin.unrealized_conversion_cast %[[LOAD]] : vector<1xf32> to vector<f32> 3315// CHECK: return %[[RES]] : vector<f32> 3316// ----- 3317 3318//===----------------------------------------------------------------------===// 3319// vector.store 3320//===----------------------------------------------------------------------===// 3321 3322func.func @store(%memref : memref<200x100xf32>, %i : index, %j : index) { 3323 %val = arith.constant dense<11.0> : vector<4xf32> 3324 vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<4xf32> 3325 return 3326} 3327 3328// CHECK-LABEL: func @store 3329// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3330// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3331// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3332// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3333// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} : vector<4xf32>, !llvm.ptr 3334 3335// ----- 3336 3337func.func @store_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { 3338 %val = arith.constant dense<11.0> : vector<[4]xf32> 3339 vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<[4]xf32> 3340 return 3341} 3342 3343// CHECK-LABEL: func @store_scalable 3344// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3345// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3346// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3347// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3348// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64} : vector<[4]xf32>, !llvm.ptr 3349 3350// ----- 3351 3352func.func @store_nontemporal(%memref : memref<200x100xf32>, %i : index, %j : index) { 3353 %val = arith.constant dense<11.0> : vector<4xf32> 3354 vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<4xf32> 3355 return 3356} 3357 3358// CHECK-LABEL: func @store_nontemporal 3359// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3360// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3361// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3362// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3363// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} : vector<4xf32>, !llvm.ptr 3364 3365// ----- 3366 3367func.func @store_nontemporal_scalable(%memref : memref<200x100xf32>, %i : index, %j : index) { 3368 %val = arith.constant dense<11.0> : vector<[4]xf32> 3369 vector.store %val, %memref[%i, %j] {nontemporal = true} : memref<200x100xf32>, vector<[4]xf32> 3370 return 3371} 3372 3373// CHECK-LABEL: func @store_nontemporal_scalable 3374// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3375// CHECK: %[[MUL:.*]] = llvm.mul %{{.*}}, %[[C100]] : i64 3376// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %{{.*}} : i64 3377// CHECK: %[[GEP:.*]] = llvm.getelementptr %{{.*}}[%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3378// CHECK: llvm.store %{{.*}}, %[[GEP]] {alignment = 4 : i64, nontemporal} : vector<[4]xf32>, !llvm.ptr 3379 3380// ----- 3381 3382func.func @store_index(%memref : memref<200x100xindex>, %i : index, %j : index) { 3383 %val = arith.constant dense<11> : vector<4xindex> 3384 vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<4xindex> 3385 return 3386} 3387// CHECK-LABEL: func @store_index 3388// CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<4xi64>, !llvm.ptr 3389 3390// ----- 3391 3392func.func @store_index_scalable(%memref : memref<200x100xindex>, %i : index, %j : index) { 3393 %val = arith.constant dense<11> : vector<[4]xindex> 3394 vector.store %val, %memref[%i, %j] : memref<200x100xindex>, vector<[4]xindex> 3395 return 3396} 3397// CHECK-LABEL: func @store_index_scalable 3398// CHECK: llvm.store %{{.*}}, %{{.*}} {alignment = 8 : i64} : vector<[4]xi64>, !llvm.ptr 3399 3400// ----- 3401 3402func.func @store_0d(%memref : memref<200x100xf32>, %i : index, %j : index) { 3403 %val = arith.constant dense<11.0> : vector<f32> 3404 vector.store %val, %memref[%i, %j] : memref<200x100xf32>, vector<f32> 3405 return 3406} 3407 3408// CHECK-LABEL: func @store_0d 3409// CHECK: %[[J:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64 3410// CHECK: %[[I:.*]] = builtin.unrealized_conversion_cast %{{.*}} : index to i64 3411// CHECK: %[[CAST_MEMREF:.*]] = builtin.unrealized_conversion_cast %{{.*}} : memref<200x100xf32> to !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 3412// CHECK: %[[CST:.*]] = arith.constant dense<1.100000e+01> : vector<f32> 3413// CHECK: %[[VAL:.*]] = builtin.unrealized_conversion_cast %[[CST]] : vector<f32> to vector<1xf32> 3414// CHECK: %[[REF:.*]] = llvm.extractvalue %[[CAST_MEMREF]][1] : !llvm.struct<(ptr, ptr, i64, array<2 x i64>, array<2 x i64>)> 3415// CHECK: %[[C100:.*]] = llvm.mlir.constant(100 : index) : i64 3416// CHECK: %[[MUL:.*]] = llvm.mul %[[I]], %[[C100]] : i64 3417// CHECK: %[[ADD:.*]] = llvm.add %[[MUL]], %[[J]] : i64 3418// CHECK: %[[ADDR:.*]] = llvm.getelementptr %[[REF]][%[[ADD]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3419// CHECK: llvm.store %[[VAL]], %[[ADDR]] {alignment = 4 : i64} : vector<1xf32>, !llvm.ptr 3420// CHECK: return 3421 3422// ----- 3423 3424//===----------------------------------------------------------------------===// 3425// vector.maskedload 3426//===----------------------------------------------------------------------===// 3427 3428func.func @masked_load(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<16xf32>) -> vector<16xf32> { 3429 %c0 = arith.constant 0: index 3430 %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<16xi1>, vector<16xf32> into vector<16xf32> 3431 return %0 : vector<16xf32> 3432} 3433 3434// CHECK-LABEL: func @masked_load 3435// CHECK: %[[CO:.*]] = arith.constant 0 : index 3436// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3437// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3438// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr, vector<16xi1>, vector<16xf32>) -> vector<16xf32> 3439// CHECK: return %[[L]] : vector<16xf32> 3440 3441// ----- 3442 3443func.func @masked_load_scalable(%arg0: memref<?xf32>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xf32>) -> vector<[16]xf32> { 3444 %c0 = arith.constant 0: index 3445 %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<[16]xi1>, vector<[16]xf32> into vector<[16]xf32> 3446 return %0 : vector<[16]xf32> 3447} 3448 3449// CHECK-LABEL: func @masked_load_scalable 3450// CHECK: %[[CO:.*]] = arith.constant 0 : index 3451// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3452// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3453// CHECK: %[[L:.*]] = llvm.intr.masked.load %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.ptr, vector<[16]xi1>, vector<[16]xf32>) -> vector<[16]xf32> 3454// CHECK: return %[[L]] : vector<[16]xf32> 3455 3456// ----- 3457 3458func.func @masked_load_index(%arg0: memref<?xindex>, %arg1: vector<16xi1>, %arg2: vector<16xindex>) -> vector<16xindex> { 3459 %c0 = arith.constant 0: index 3460 %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<16xi1>, vector<16xindex> into vector<16xindex> 3461 return %0 : vector<16xindex> 3462} 3463// CHECK-LABEL: func @masked_load_index 3464// CHECK: %{{.*}} = llvm.intr.masked.load %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.ptr, vector<16xi1>, vector<16xi64>) -> vector<16xi64> 3465 3466// ----- 3467 3468func.func @masked_load_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xindex>) -> vector<[16]xindex> { 3469 %c0 = arith.constant 0: index 3470 %0 = vector.maskedload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<[16]xi1>, vector<[16]xindex> into vector<[16]xindex> 3471 return %0 : vector<[16]xindex> 3472} 3473// CHECK-LABEL: func @masked_load_index_scalable 3474// CHECK: %{{.*}} = llvm.intr.masked.load %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.ptr, vector<[16]xi1>, vector<[16]xi64>) -> vector<[16]xi64> 3475 3476// ----- 3477 3478//===----------------------------------------------------------------------===// 3479// vector.maskedstore 3480//===----------------------------------------------------------------------===// 3481 3482func.func @masked_store(%arg0: memref<?xf32>, %arg1: vector<16xi1>, %arg2: vector<16xf32>) { 3483 %c0 = arith.constant 0: index 3484 vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<16xi1>, vector<16xf32> 3485 return 3486} 3487 3488// CHECK-LABEL: func @masked_store 3489// CHECK: %[[CO:.*]] = arith.constant 0 : index 3490// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3491// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3492// CHECK: llvm.intr.masked.store %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<16xf32>, vector<16xi1> into !llvm.ptr 3493 3494// ----- 3495 3496func.func @masked_store_scalable(%arg0: memref<?xf32>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xf32>) { 3497 %c0 = arith.constant 0: index 3498 vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<[16]xi1>, vector<[16]xf32> 3499 return 3500} 3501 3502// CHECK-LABEL: func @masked_store_scalable 3503// CHECK: %[[CO:.*]] = arith.constant 0 : index 3504// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3505// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3506// CHECK: llvm.intr.masked.store %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[16]xf32>, vector<[16]xi1> into !llvm.ptr 3507 3508// ----- 3509 3510func.func @masked_store_index(%arg0: memref<?xindex>, %arg1: vector<16xi1>, %arg2: vector<16xindex>) { 3511 %c0 = arith.constant 0: index 3512 vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<16xi1>, vector<16xindex> 3513 return 3514} 3515// CHECK-LABEL: func @masked_store_index 3516// CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : vector<16xi64>, vector<16xi1> into !llvm.ptr 3517 3518// ----- 3519 3520func.func @masked_store_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[16]xi1>, %arg2: vector<[16]xindex>) { 3521 %c0 = arith.constant 0: index 3522 vector.maskedstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<[16]xi1>, vector<[16]xindex> 3523 return 3524} 3525// CHECK-LABEL: func @masked_store_index_scalable 3526// CHECK: llvm.intr.masked.store %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : vector<[16]xi64>, vector<[16]xi1> into !llvm.ptr 3527 3528// ----- 3529 3530//===----------------------------------------------------------------------===// 3531// vector.gather 3532//===----------------------------------------------------------------------===// 3533 3534func.func @gather(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> { 3535 %0 = arith.constant 0: index 3536 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<3xi32>, vector<3xi1>, vector<3xf32> into vector<3xf32> 3537 return %1 : vector<3xf32> 3538} 3539 3540// CHECK-LABEL: func @gather 3541// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32 3542// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3543// CHECK: return %[[G]] : vector<3xf32> 3544 3545// ----- 3546 3547func.func @gather_scalable(%arg0: memref<?xf32>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) -> vector<[3]xf32> { 3548 %0 = arith.constant 0: index 3549 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32> 3550 return %1 : vector<[3]xf32> 3551} 3552 3553// CHECK-LABEL: func @gather_scalable 3554// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32 3555// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3556// CHECK: return %[[G]] : vector<[3]xf32> 3557 3558// ----- 3559 3560func.func @gather_global_memory(%arg0: memref<?xf32, 1>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) -> vector<3xf32> { 3561 %0 = arith.constant 0: index 3562 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32, 1>, vector<3xi32>, vector<3xi1>, vector<3xf32> into vector<3xf32> 3563 return %1 : vector<3xf32> 3564} 3565 3566// CHECK-LABEL: func @gather_global_memory 3567// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr<1>, vector<3xi32>) -> !llvm.vec<3 x ptr<1>>, f32 3568// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr<1>>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3569// CHECK: return %[[G]] : vector<3xf32> 3570 3571// ----- 3572 3573func.func @gather_global_memory_scalable(%arg0: memref<?xf32, 1>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) -> vector<[3]xf32> { 3574 %0 = arith.constant 0: index 3575 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32, 1>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32> 3576 return %1 : vector<[3]xf32> 3577} 3578 3579// CHECK-LABEL: func @gather_global_memory_scalable 3580// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr<1>, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr<1>>, f32 3581// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr<1>>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3582// CHECK: return %[[G]] : vector<[3]xf32> 3583 3584// ----- 3585 3586 3587func.func @gather_index(%arg0: memref<?xindex>, %arg1: vector<3xindex>, %arg2: vector<3xi1>, %arg3: vector<3xindex>) -> vector<3xindex> { 3588 %0 = arith.constant 0: index 3589 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<3xindex>, vector<3xi1>, vector<3xindex> into vector<3xindex> 3590 return %1 : vector<3xindex> 3591} 3592 3593// CHECK-LABEL: func @gather_index 3594// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi64>) -> !llvm.vec<3 x ptr>, i64 3595// CHECK: %[[G:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xi64>) -> vector<3xi64> 3596// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[G]] : vector<3xi64> to vector<3xindex> 3597 3598// ----- 3599 3600func.func @gather_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[3]xindex>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xindex>) -> vector<[3]xindex> { 3601 %0 = arith.constant 0: index 3602 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xindex> into vector<[3]xindex> 3603 return %1 : vector<[3]xindex> 3604} 3605 3606// CHECK-LABEL: func @gather_index_scalable 3607// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi64>) -> !llvm.vec<? x 3 x ptr>, i64 3608// CHECK: %[[G:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 8 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xi64>) -> vector<[3]xi64> 3609// CHECK: %{{.*}} = builtin.unrealized_conversion_cast %[[G]] : vector<[3]xi64> to vector<[3]xindex> 3610 3611// ----- 3612 3613func.func @gather_2d_from_1d(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xi1>, %arg3: vector<2x3xf32>) -> vector<2x3xf32> { 3614 %0 = arith.constant 0: index 3615 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32> 3616 return %1 : vector<2x3xf32> 3617} 3618 3619// CHECK-LABEL: func @gather_2d_from_1d 3620// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32 3621// CHECK: %[[I0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xi32>> 3622// CHECK: %[[M0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xi1>> 3623// CHECK: %[[S0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<3xf32>> 3624// CHECK: %[[P0:.*]] = llvm.getelementptr %[[B]][%[[I0]]] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32 3625// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %[[P0]], %[[M0]], %[[S0]] {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3626// CHECK: %{{.*}} = llvm.insertvalue %[[G0]], %{{.*}}[0] : !llvm.array<2 x vector<3xf32>> 3627// CHECK: %[[I1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xi32>> 3628// CHECK: %[[M1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xi1>> 3629// CHECK: %[[S1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<3xf32>> 3630// CHECK: %[[P1:.*]] = llvm.getelementptr %[[B]][%[[I1]]] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32 3631// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %[[P1]], %[[M1]], %[[S1]] {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3632// CHECK: %{{.*}} = llvm.insertvalue %[[G1]], %{{.*}}[1] : !llvm.array<2 x vector<3xf32>> 3633 3634// ----- 3635 3636func.func @gather_2d_from_1d_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xi1>, %arg3: vector<2x[3]xf32>) -> vector<2x[3]xf32> { 3637 %0 = arith.constant 0: index 3638 %1 = vector.gather %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32> 3639 return %1 : vector<2x[3]xf32> 3640} 3641 3642// CHECK-LABEL: func @gather_2d_from_1d_scalable 3643// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}} : (!llvm.ptr, i64) -> !llvm.ptr, f32 3644// CHECK: %[[I0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xi32>> 3645// CHECK: %[[M0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xi1>> 3646// CHECK: %[[S0:.*]] = llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>> 3647// CHECK: %[[P0:.*]] = llvm.getelementptr %[[B]][%[[I0]]] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32 3648// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %[[P0]], %[[M0]], %[[S0]] {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3649// CHECK: %{{.*}} = llvm.insertvalue %[[G0]], %{{.*}}[0] : !llvm.array<2 x vector<[3]xf32>> 3650// CHECK: %[[I1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xi32>> 3651// CHECK: %[[M1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xi1>> 3652// CHECK: %[[S1:.*]] = llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>> 3653// CHECK: %[[P1:.*]] = llvm.getelementptr %[[B]][%[[I1]]] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32 3654// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %[[P1]], %[[M1]], %[[S1]] {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3655// CHECK: %{{.*}} = llvm.insertvalue %[[G1]], %{{.*}}[1] : !llvm.array<2 x vector<[3]xf32>> 3656 3657// ----- 3658 3659func.func @gather_with_mask(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { 3660 %0 = arith.constant 0: index 3661 %1 = vector.constant_mask [1, 2] : vector<2x3xi1> 3662 %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32> 3663 return %2 : vector<2x3xf32> 3664} 3665 3666// CHECK-LABEL: func @gather_with_mask 3667// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3668// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<3 x ptr>, vector<3xi1>, vector<3xf32>) -> vector<3xf32> 3669 3670// ----- 3671 3672func.func @gather_with_mask_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> { 3673 %0 = arith.constant 0: index 3674 // vector.constant_mask only supports 'none set' or 'all set' scalable 3675 // dimensions, hence [1, 3] rather than [1, 2] as in the example for fixed 3676 // width vectors above. 3677 %1 = vector.constant_mask [1, 3] : vector<2x[3]xi1> 3678 %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32> 3679 return %2 : vector<2x[3]xf32> 3680} 3681 3682// CHECK-LABEL: func @gather_with_mask_scalable 3683// CHECK: %[[G0:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3684// CHECK: %[[G1:.*]] = llvm.intr.masked.gather %{{.*}}, %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 3 x ptr>, vector<[3]xi1>, vector<[3]xf32>) -> vector<[3]xf32> 3685 3686 3687// ----- 3688 3689func.func @gather_with_zero_mask(%arg0: memref<?xf32>, %arg1: vector<2x3xi32>, %arg2: vector<2x3xf32>) -> vector<2x3xf32> { 3690 %0 = arith.constant 0: index 3691 %1 = vector.constant_mask [0, 0] : vector<2x3xi1> 3692 %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x3xi32>, vector<2x3xi1>, vector<2x3xf32> into vector<2x3xf32> 3693 return %2 : vector<2x3xf32> 3694} 3695 3696// CHECK-LABEL: func @gather_with_zero_mask 3697// CHECK-SAME: (%{{.*}}: memref<?xf32>, %{{.*}}: vector<2x3xi32>, %[[S:.*]]: vector<2x3xf32>) 3698// CHECK-NOT: %{{.*}} = llvm.intr.masked.gather 3699// CHECK: return %[[S]] : vector<2x3xf32> 3700 3701// ----- 3702 3703func.func @gather_with_zero_mask_scalable(%arg0: memref<?xf32>, %arg1: vector<2x[3]xi32>, %arg2: vector<2x[3]xf32>) -> vector<2x[3]xf32> { 3704 %0 = arith.constant 0: index 3705 %1 = vector.constant_mask [0, 0] : vector<2x[3]xi1> 3706 %2 = vector.gather %arg0[%0][%arg1], %1, %arg2 : memref<?xf32>, vector<2x[3]xi32>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32> 3707 return %2 : vector<2x[3]xf32> 3708} 3709 3710// CHECK-LABEL: func @gather_with_zero_mask_scalable 3711// CHECK-SAME: (%{{.*}}: memref<?xf32>, %{{.*}}: vector<2x[3]xi32>, %[[S:.*]]: vector<2x[3]xf32>) 3712// CHECK-NOT: %{{.*}} = llvm.intr.masked.gather 3713// CHECK: return %[[S]] : vector<2x[3]xf32> 3714 3715// ----- 3716 3717func.func @gather_1d_from_2d(%arg0: memref<4x4xf32>, %arg1: vector<4xi32>, %arg2: vector<4xi1>, %arg3: vector<4xf32>) -> vector<4xf32> { 3718 %0 = arith.constant 3 : index 3719 %1 = vector.gather %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x4xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> into vector<4xf32> 3720 return %1 : vector<4xf32> 3721} 3722 3723// CHECK-LABEL: func @gather_1d_from_2d 3724// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3725// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<4xi32>) -> !llvm.vec<4 x ptr>, f32 3726// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<4 x ptr>, vector<4xi1>, vector<4xf32>) -> vector<4xf32> 3727// CHECK: return %[[G]] : vector<4xf32> 3728 3729// ----- 3730 3731func.func @gather_1d_from_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4]xi32>, %arg2: vector<[4]xi1>, %arg3: vector<[4]xf32>) -> vector<[4]xf32> { 3732 %0 = arith.constant 3 : index 3733 %1 = vector.gather %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x?xf32>, vector<[4]xi32>, vector<[4]xi1>, vector<[4]xf32> into vector<[4]xf32> 3734 return %1 : vector<[4]xf32> 3735} 3736 3737// CHECK-LABEL: func @gather_1d_from_2d_scalable 3738// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3739// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<[4]xi32>) -> !llvm.vec<? x 4 x ptr>, f32 3740// CHECK: %[[G:.*]] = llvm.intr.masked.gather %[[P]], %{{.*}}, %{{.*}} {alignment = 4 : i32} : (!llvm.vec<? x 4 x ptr>, vector<[4]xi1>, vector<[4]xf32>) -> vector<[4]xf32> 3741// CHECK: return %[[G]] : vector<[4]xf32> 3742 3743// ----- 3744 3745//===----------------------------------------------------------------------===// 3746// vector.scatter 3747//===----------------------------------------------------------------------===// 3748 3749func.func @scatter(%arg0: memref<?xf32>, %arg1: vector<3xi32>, %arg2: vector<3xi1>, %arg3: vector<3xf32>) { 3750 %0 = arith.constant 0: index 3751 vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<3xi32>, vector<3xi1>, vector<3xf32> 3752 return 3753} 3754 3755// CHECK-LABEL: func @scatter 3756// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi32>) -> !llvm.vec<3 x ptr>, f32 3757// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<3xf32>, vector<3xi1> into !llvm.vec<3 x ptr> 3758 3759// ----- 3760 3761func.func @scatter_scalable(%arg0: memref<?xf32>, %arg1: vector<[3]xi32>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xf32>) { 3762 %0 = arith.constant 0: index 3763 vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xf32>, vector<[3]xi32>, vector<[3]xi1>, vector<[3]xf32> 3764 return 3765} 3766 3767// CHECK-LABEL: func @scatter_scalable 3768// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi32>) -> !llvm.vec<? x 3 x ptr>, f32 3769// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[3]xf32>, vector<[3]xi1> into !llvm.vec<? x 3 x ptr> 3770 3771// ----- 3772 3773func.func @scatter_index(%arg0: memref<?xindex>, %arg1: vector<3xindex>, %arg2: vector<3xi1>, %arg3: vector<3xindex>) { 3774 %0 = arith.constant 0: index 3775 vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<3xindex>, vector<3xi1>, vector<3xindex> 3776 return 3777} 3778 3779// CHECK-LABEL: func @scatter_index 3780// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<3xi64>) -> !llvm.vec<3 x ptr>, i64 3781// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 8 : i32} : vector<3xi64>, vector<3xi1> into !llvm.vec<3 x ptr> 3782 3783// ----- 3784 3785func.func @scatter_index_scalable(%arg0: memref<?xindex>, %arg1: vector<[3]xindex>, %arg2: vector<[3]xi1>, %arg3: vector<[3]xindex>) { 3786 %0 = arith.constant 0: index 3787 vector.scatter %arg0[%0][%arg1], %arg2, %arg3 : memref<?xindex>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xindex> 3788 return 3789} 3790 3791// CHECK-LABEL: func @scatter_index_scalable 3792// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, vector<[3]xi64>) -> !llvm.vec<? x 3 x ptr>, i64 3793// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 8 : i32} : vector<[3]xi64>, vector<[3]xi1> into !llvm.vec<? x 3 x ptr> 3794 3795// ----- 3796 3797func.func @scatter_1d_into_2d(%arg0: memref<4x4xf32>, %arg1: vector<4xi32>, %arg2: vector<4xi1>, %arg3: vector<4xf32>) { 3798 %0 = arith.constant 3 : index 3799 vector.scatter %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x4xf32>, vector<4xi32>, vector<4xi1>, vector<4xf32> 3800 return 3801} 3802 3803// CHECK-LABEL: func @scatter_1d_into_2d 3804// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3805// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<4xi32>) -> !llvm.vec<4 x ptr>, f32 3806// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<4xf32>, vector<4xi1> into !llvm.vec<4 x ptr> 3807 3808// ----- 3809 3810func.func @scatter_1d_into_2d_scalable(%arg0: memref<4x?xf32>, %arg1: vector<[4]xi32>, %arg2: vector<[4]xi1>, %arg3: vector<[4]xf32>) { 3811 %0 = arith.constant 3 : index 3812 vector.scatter %arg0[%0, %0][%arg1], %arg2, %arg3 : memref<4x?xf32>, vector<[4]xi32>, vector<[4]xi1>, vector<[4]xf32> 3813 return 3814} 3815 3816// CHECK-LABEL: func @scatter_1d_into_2d_scalable 3817// CHECK: %[[B:.*]] = llvm.getelementptr %{{.*}}[%{{.*}}] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3818// CHECK: %[[P:.*]] = llvm.getelementptr %[[B]][%{{.*}}] : (!llvm.ptr, vector<[4]xi32>) -> !llvm.vec<? x 4 x ptr>, f32 3819// CHECK: llvm.intr.masked.scatter %{{.*}}, %[[P]], %{{.*}} {alignment = 4 : i32} : vector<[4]xf32>, vector<[4]xi1> into !llvm.vec<? x 4 x ptr> 3820 3821// ----- 3822 3823//===----------------------------------------------------------------------===// 3824// vector.expandload 3825//===----------------------------------------------------------------------===// 3826 3827func.func @expand_load_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vector<11xf32>) -> vector<11xf32> { 3828 %c0 = arith.constant 0: index 3829 %0 = vector.expandload %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<11xi1>, vector<11xf32> into vector<11xf32> 3830 return %0 : vector<11xf32> 3831} 3832 3833// CHECK-LABEL: func @expand_load_op 3834// CHECK: %[[CO:.*]] = arith.constant 0 : index 3835// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3836// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3837// CHECK: %[[E:.*]] = "llvm.intr.masked.expandload"(%[[P]], %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xf32>) -> vector<11xf32> 3838// CHECK: return %[[E]] : vector<11xf32> 3839 3840// ----- 3841 3842func.func @expand_load_op_index(%arg0: memref<?xindex>, %arg1: vector<11xi1>, %arg2: vector<11xindex>) -> vector<11xindex> { 3843 %c0 = arith.constant 0: index 3844 %0 = vector.expandload %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<11xi1>, vector<11xindex> into vector<11xindex> 3845 return %0 : vector<11xindex> 3846} 3847// CHECK-LABEL: func @expand_load_op_index 3848// CHECK: %{{.*}} = "llvm.intr.masked.expandload"(%{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, vector<11xi1>, vector<11xi64>) -> vector<11xi64> 3849 3850// ----- 3851 3852//===----------------------------------------------------------------------===// 3853// vector.compressstore 3854//===----------------------------------------------------------------------===// 3855 3856func.func @compress_store_op(%arg0: memref<?xf32>, %arg1: vector<11xi1>, %arg2: vector<11xf32>) { 3857 %c0 = arith.constant 0: index 3858 vector.compressstore %arg0[%c0], %arg1, %arg2 : memref<?xf32>, vector<11xi1>, vector<11xf32> 3859 return 3860} 3861 3862// CHECK-LABEL: func @compress_store_op 3863// CHECK: %[[CO:.*]] = arith.constant 0 : index 3864// CHECK: %[[C:.*]] = builtin.unrealized_conversion_cast %[[CO]] : index to i64 3865// CHECK: %[[P:.*]] = llvm.getelementptr %{{.*}}[%[[C]]] : (!llvm.ptr, i64) -> !llvm.ptr, f32 3866// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %[[P]], %{{.*}}) : (vector<11xf32>, !llvm.ptr, vector<11xi1>) -> () 3867 3868// ----- 3869 3870func.func @compress_store_op_index(%arg0: memref<?xindex>, %arg1: vector<11xi1>, %arg2: vector<11xindex>) { 3871 %c0 = arith.constant 0: index 3872 vector.compressstore %arg0[%c0], %arg1, %arg2 : memref<?xindex>, vector<11xi1>, vector<11xindex> 3873 return 3874} 3875// CHECK-LABEL: func @compress_store_op_index 3876// CHECK: "llvm.intr.masked.compressstore"(%{{.*}}, %{{.*}}, %{{.*}}) : (vector<11xi64>, !llvm.ptr, vector<11xi1>) -> () 3877 3878// ----- 3879 3880//===----------------------------------------------------------------------===// 3881// vector.splat 3882//===----------------------------------------------------------------------===// 3883 3884// CHECK-LABEL: @splat_0d 3885// CHECK-SAME: %[[ELT:.*]]: f32 3886func.func @splat_0d(%elt: f32) -> vector<f32> { 3887 %v = vector.splat %elt : vector<f32> 3888 return %v : vector<f32> 3889} 3890// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<1xf32> 3891// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 3892// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<1xf32> 3893// CHECK-NEXT: %[[VCAST:[0-9]+]] = builtin.unrealized_conversion_cast %[[V]] : vector<1xf32> to vector<f32> 3894// CHECK-NEXT: return %[[VCAST]] : vector<f32> 3895 3896// ----- 3897 3898// CHECK-LABEL: @splat 3899// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<4xf32> 3900// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32 3901func.func @splat(%vec: vector<4xf32>, %elt: f32) -> vector<4xf32> { 3902 %vb = vector.splat %elt : vector<4xf32> 3903 %r = arith.mulf %vec, %vb : vector<4xf32> 3904 return %r : vector<4xf32> 3905} 3906// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<4xf32> 3907// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 3908// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<4xf32> 3909// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0] 3910// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<4xf32> 3911// CHECK-NEXT: return %[[SCALE]] : vector<4xf32> 3912 3913// ----- 3914 3915// CHECK-LABEL: @splat_scalable 3916// CHECK-SAME: %[[VEC:[0-9a-zA-Z]+]]: vector<[4]xf32> 3917// CHECK-SAME: %[[ELT:[0-9a-zA-Z]+]]: f32 3918func.func @splat_scalable(%vec: vector<[4]xf32>, %elt: f32) -> vector<[4]xf32> { 3919 %vb = vector.splat %elt : vector<[4]xf32> 3920 %r = arith.mulf %vec, %vb : vector<[4]xf32> 3921 return %r : vector<[4]xf32> 3922} 3923// CHECK-NEXT: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : vector<[4]xf32> 3924// CHECK-NEXT: %[[ZERO:[0-9]+]] = llvm.mlir.constant(0 : i32) : i32 3925// CHECK-NEXT: %[[V:[0-9]+]] = llvm.insertelement %[[ELT]], %[[UNDEF]][%[[ZERO]] : i32] : vector<[4]xf32> 3926// CHECK-NEXT: %[[SPLAT:[0-9]+]] = llvm.shufflevector %[[V]], %[[UNDEF]] [0, 0, 0, 0] 3927// CHECK-NEXT: %[[SCALE:[0-9]+]] = arith.mulf %[[VEC]], %[[SPLAT]] : vector<[4]xf32> 3928// CHECK-NEXT: return %[[SCALE]] : vector<[4]xf32> 3929 3930// ----- 3931 3932//===----------------------------------------------------------------------===// 3933// vector.scalable_insert 3934//===----------------------------------------------------------------------===// 3935 3936// CHECK-LABEL: @scalable_insert 3937// CHECK-SAME: %[[SUB:.*]]: vector<4xf32>, %[[SV:.*]]: vector<[4]xf32> 3938func.func @scalable_insert(%sub: vector<4xf32>, %dsv: vector<[4]xf32>) -> vector<[4]xf32> { 3939 // CHECK-NEXT: %[[TMP:.*]] = llvm.intr.vector.insert %[[SUB]], %[[SV]][0] : vector<4xf32> into vector<[4]xf32> 3940 %0 = vector.scalable.insert %sub, %dsv[0] : vector<4xf32> into vector<[4]xf32> 3941 // CHECK-NEXT: llvm.intr.vector.insert %[[SUB]], %[[TMP]][4] : vector<4xf32> into vector<[4]xf32> 3942 %1 = vector.scalable.insert %sub, %0[4] : vector<4xf32> into vector<[4]xf32> 3943 return %1 : vector<[4]xf32> 3944} 3945 3946// ----- 3947 3948//===----------------------------------------------------------------------===// 3949// vector.scalable_extract 3950//===----------------------------------------------------------------------===// 3951 3952// CHECK-LABEL: @scalable_extract 3953// CHECK-SAME: %[[VEC:.*]]: vector<[4]xf32> 3954func.func @scalable_extract(%vec: vector<[4]xf32>) -> vector<8xf32> { 3955 // CHECK-NEXT: %{{.*}} = llvm.intr.vector.extract %[[VEC]][0] : vector<8xf32> from vector<[4]xf32> 3956 %0 = vector.scalable.extract %vec[0] : vector<8xf32> from vector<[4]xf32> 3957 return %0 : vector<8xf32> 3958} 3959 3960// ----- 3961 3962//===----------------------------------------------------------------------===// 3963// vector.interleave 3964//===----------------------------------------------------------------------===// 3965 3966// CHECK-LABEL: @interleave_0d 3967// CHECK-SAME: %[[LHS:.*]]: vector<i8>, %[[RHS:.*]]: vector<i8>) 3968func.func @interleave_0d(%a: vector<i8>, %b: vector<i8>) -> vector<2xi8> { 3969 // CHECK-DAG: %[[LHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[LHS]] : vector<i8> to vector<1xi8> 3970 // CHECK-DAG: %[[RHS_RANK1:.*]] = builtin.unrealized_conversion_cast %[[RHS]] : vector<i8> to vector<1xi8> 3971 // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS_RANK1]], %[[RHS_RANK1]] [0, 1] : vector<1xi8> 3972 // CHECK: return %[[ZIP]] 3973 %0 = vector.interleave %a, %b : vector<i8> -> vector<2xi8> 3974 return %0 : vector<2xi8> 3975} 3976 3977// ----- 3978 3979// CHECK-LABEL: @interleave_1d 3980// CHECK-SAME: %[[LHS:.*]]: vector<8xf32>, %[[RHS:.*]]: vector<8xf32>) 3981func.func @interleave_1d(%a: vector<8xf32>, %b: vector<8xf32>) -> vector<16xf32> { 3982 // CHECK: %[[ZIP:.*]] = llvm.shufflevector %[[LHS]], %[[RHS]] [0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15] : vector<8xf32> 3983 // CHECK: return %[[ZIP]] 3984 %0 = vector.interleave %a, %b : vector<8xf32> -> vector<16xf32> 3985 return %0 : vector<16xf32> 3986} 3987 3988// ----- 3989 3990// CHECK-LABEL: @interleave_1d_scalable 3991// CHECK-SAME: %[[LHS:.*]]: vector<[4]xi32>, %[[RHS:.*]]: vector<[4]xi32>) 3992func.func @interleave_1d_scalable(%a: vector<[4]xi32>, %b: vector<[4]xi32>) -> vector<[8]xi32> { 3993 // CHECK: %[[ZIP:.*]] = "llvm.intr.vector.interleave2"(%[[LHS]], %[[RHS]]) : (vector<[4]xi32>, vector<[4]xi32>) -> vector<[8]xi32> 3994 // CHECK: return %[[ZIP]] 3995 %0 = vector.interleave %a, %b : vector<[4]xi32> -> vector<[8]xi32> 3996 return %0 : vector<[8]xi32> 3997} 3998 3999// ----- 4000 4001// CHECK-LABEL: @interleave_2d 4002// CHECK-SAME: %[[LHS:.*]]: vector<2x3xi8>, %[[RHS:.*]]: vector<2x3xi8>) 4003func.func @interleave_2d(%a: vector<2x3xi8>, %b: vector<2x3xi8>) -> vector<2x6xi8> { 4004 // CHECK: llvm.shufflevector 4005 // CHECK-NOT: vector.interleave {{.*}} : vector<2x3xi8> 4006 %0 = vector.interleave %a, %b : vector<2x3xi8> -> vector<2x6xi8> 4007 return %0 : vector<2x6xi8> 4008} 4009 4010// ----- 4011 4012// CHECK-LABEL: @interleave_2d_scalable 4013// CHECK-SAME: %[[LHS:.*]]: vector<2x[8]xi16>, %[[RHS:.*]]: vector<2x[8]xi16>) 4014func.func @interleave_2d_scalable(%a: vector<2x[8]xi16>, %b: vector<2x[8]xi16>) -> vector<2x[16]xi16> { 4015 // CHECK: llvm.intr.vector.interleave2 4016 // CHECK-NOT: vector.interleave {{.*}} : vector<2x[8]xi16> 4017 %0 = vector.interleave %a, %b : vector<2x[8]xi16> -> vector<2x[16]xi16> 4018 return %0 : vector<2x[16]xi16> 4019} 4020 4021// ----- 4022 4023//===----------------------------------------------------------------------===// 4024// vector.deinterleave 4025//===----------------------------------------------------------------------===// 4026 4027// CHECK-LABEL: @deinterleave_1d 4028// CHECK-SAME: (%[[ARG:.*]]: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) 4029func.func @deinterleave_1d(%arg: vector<4xi32>) -> (vector<2xi32>, vector<2xi32>) { 4030 // CHECK: %[[POISON:.*]] = llvm.mlir.poison : vector<4xi32> 4031 // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [0, 2] : vector<4xi32> 4032 // CHECK: llvm.shufflevector %[[ARG]], %[[POISON]] [1, 3] : vector<4xi32> 4033 %0, %1 = vector.deinterleave %arg : vector<4xi32> -> vector<2xi32> 4034 return %0, %1 : vector<2xi32>, vector<2xi32> 4035} 4036 4037// ----- 4038 4039// CHECK-LABEL: @deinterleave_1d_scalable 4040// CHECK-SAME: %[[ARG:.*]]: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) 4041func.func @deinterleave_1d_scalable(%arg: vector<[4]xi32>) -> (vector<[2]xi32>, vector<[2]xi32>) { 4042 // CHECK: %[[RES:.*]] = "llvm.intr.vector.deinterleave2"(%[[ARG]]) : (vector<[4]xi32>) -> !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> 4043 // CHECK: llvm.extractvalue %[[RES]][0] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> 4044 // CHECK: llvm.extractvalue %[[RES]][1] : !llvm.struct<(vector<[2]xi32>, vector<[2]xi32>)> 4045 %0, %1 = vector.deinterleave %arg : vector<[4]xi32> -> vector<[2]xi32> 4046 return %0, %1 : vector<[2]xi32>, vector<[2]xi32> 4047} 4048 4049// ----- 4050 4051// CHECK-LABEL: @deinterleave_2d 4052// CHECK-SAME: %[[ARG:.*]]: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) 4053func.func @deinterleave_2d(%arg: vector<2x8xf32>) -> (vector<2x4xf32>, vector<2x4xf32>) { 4054 // CHECK: llvm.shufflevector 4055 // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x8xf32> 4056 %0, %1 = vector.deinterleave %arg : vector<2x8xf32> -> vector<2x4xf32> 4057 return %0, %1 : vector<2x4xf32>, vector<2x4xf32> 4058} 4059 4060// ----- 4061 4062func.func @deinterleave_2d_scalable(%arg: vector<2x[8]xf32>) -> (vector<2x[4]xf32>, vector<2x[4]xf32>) { 4063 // CHECK: llvm.intr.vector.deinterleave2 4064 // CHECK-NOT: vector.deinterleave %{{.*}} : vector<2x[8]xf32> 4065 %0, %1 = vector.deinterleave %arg : vector<2x[8]xf32> -> vector<2x[4]xf32> 4066 return %0, %1 : vector<2x[4]xf32>, vector<2x[4]xf32> 4067} 4068 4069// ----- 4070 4071//===----------------------------------------------------------------------===// 4072// vector.from_elements 4073//===----------------------------------------------------------------------===// 4074 4075// CHECK-LABEL: func.func @from_elements_1d( 4076// CHECK-SAME: %[[ARG_0:.*]]: f32, %[[ARG_1:.*]]: f32) 4077// CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : vector<3xf32> 4078// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 4079// CHECK: %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<3xf32> 4080// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 4081// CHECK: %[[INSERT1:.*]] = llvm.insertelement %[[ARG_1]], %[[INSERT0]][%[[C1]] : i64] : vector<3xf32> 4082// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 4083// CHECK: %[[INSERT2:.*]] = llvm.insertelement %[[ARG_0]], %[[INSERT1]][%[[C2]] : i64] : vector<3xf32> 4084// CHECK: return %[[INSERT2]] 4085func.func @from_elements_1d(%arg0: f32, %arg1: f32) -> vector<3xf32> { 4086 %0 = vector.from_elements %arg0, %arg1, %arg0 : vector<3xf32> 4087 return %0 : vector<3xf32> 4088} 4089 4090// ----- 4091 4092// CHECK-LABEL: func.func @from_elements_0d( 4093// CHECK-SAME: %[[ARG_0:.*]]: f32) 4094// CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : vector<1xf32> 4095// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64 4096// CHECK: %[[INSERT0:.*]] = llvm.insertelement %[[ARG_0]], %[[UNDEF]][%[[C0]] : i64] : vector<1xf32> 4097// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[INSERT0]] : vector<1xf32> to vector<f32> 4098// CHECK: return %[[CAST]] 4099func.func @from_elements_0d(%arg0: f32) -> vector<f32> { 4100 %0 = vector.from_elements %arg0 : vector<f32> 4101 return %0 : vector<f32> 4102} 4103 4104// ----- 4105 4106//===----------------------------------------------------------------------===// 4107// vector.step 4108//===----------------------------------------------------------------------===// 4109 4110// CHECK-LABEL: @step 4111// CHECK: %[[CST:.+]] = arith.constant dense<[0, 1, 2, 3]> : vector<4xindex> 4112// CHECK: return %[[CST]] : vector<4xindex> 4113func.func @step() -> vector<4xindex> { 4114 %0 = vector.step : vector<4xindex> 4115 return %0 : vector<4xindex> 4116} 4117 4118// ----- 4119 4120// CHECK-LABEL: @step_scalable 4121// CHECK: %[[STEPVECTOR:.*]] = llvm.intr.stepvector : vector<[4]xi64> 4122// CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %[[STEPVECTOR]] : vector<[4]xi64> to vector<[4]xindex> 4123// CHECK: return %[[CAST]] : vector<[4]xindex> 4124func.func @step_scalable() -> vector<[4]xindex> { 4125 %0 = vector.step : vector<[4]xindex> 4126 return %0 : vector<[4]xindex> 4127} 4128