1// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s 2 3// CHECK-LABEL: arm_neon_smull 4llvm.func @arm_neon_smull(%arg0: vector<8xi8>, %arg1: vector<8xi8>) -> !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> { 5 // CHECK: %[[V0:.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}) 6 // CHECK-NEXT: %[[V00:.*]] = shufflevector <8 x i16> %3, <8 x i16> %[[V0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6> 7 %0 = arm_neon.intr.smull %arg0, %arg1 : vector<8xi8> to vector<8xi16> 8 %1 = llvm.shufflevector %0, %0 [3, 4, 5, 6] : vector<8xi16> 9 10 // CHECK-NEXT: %[[V1:.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %[[V00]], <4 x i16> %[[V00]]) 11 // CHECK-NEXT: %[[V11:.*]] = shufflevector <4 x i32> %[[V1]], <4 x i32> %[[V1]], <2 x i32> <i32 1, i32 2> 12 %2 = arm_neon.intr.smull %1, %1 : vector<4xi16> to vector<4xi32> 13 %3 = llvm.shufflevector %2, %2 [1, 2] : vector<4xi32> 14 15 // CHECK-NEXT: %[[V1:.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %[[V11]], <2 x i32> %[[V11]]) 16 %4 = arm_neon.intr.smull %3, %3 : vector<2xi32> to vector<2xi64> 17 18 %5 = llvm.mlir.undef : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> 19 %6 = llvm.insertvalue %0, %5[0] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> 20 %7 = llvm.insertvalue %2, %6[1] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> 21 %8 = llvm.insertvalue %4, %7[2] : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> 22 23 // CHECK: ret { <8 x i16>, <4 x i32>, <2 x i64> } 24 llvm.return %8 : !llvm.struct<(vector<8xi16>, vector<4xi32>, vector<2xi64>)> 25} 26 27// ----- 28 29// CHECK-LABEL: arm_neon_sdot_8_i8i8 30llvm.func @arm_neon_sdot_8_i8i8(%a: vector<2xi32>, %b: vector<8xi8>, %c: vector<8xi8>) -> vector<2xi32> { 31 // CHECK: %[[V0:.*]] = call <2 x i32> @llvm.aarch64.neon.sdot.v2i32.v8i8(<2 x i32> %{{.*}}, <8 x i8> %{{.*}}, <8 x i8> %{{.*}}) 32 // CHECK-NEXT: ret <2 x i32> 33 %0 = arm_neon.intr.sdot %a, %b, %c : vector<8xi8>, vector<8xi8> to vector<2xi32> 34 llvm.return %0 : vector<2xi32> 35} 36 37// ----- 38 39// CHECK-LABEL: arm_neon_sdot_16_i8i8 40llvm.func @arm_neon_sdot_16_i8i8(%a: vector<4xi32>, %b: vector<16xi8>, %c: vector<16xi8>) -> vector<4xi32> { 41 // CHECK: %[[V0:.*]] = call <4 x i32> @llvm.aarch64.neon.sdot.v4i32.v16i8(<4 x i32> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 42 // CHECK-NEXT: ret <4 x i32> 43 %0 = arm_neon.intr.sdot %a, %b, %c : vector<16xi8>, vector<16xi8> to vector<4xi32> 44 llvm.return %0 : vector<4xi32> 45} 46 47// ----- 48 49// CHECK-LABEL: arm_neon_smmla 50llvm.func @arm_neon_smmla(%arg0: vector<16xi8>, 51 %arg1: vector<16xi8>, 52 %arg2: vector<4xi32>) -> vector<4xi32> { 53 // CHECK: <4 x i32> @llvm.aarch64.neon.smmla.v4i32.v16i8(<4 x i32 54 %0 = "arm_neon.intr.smmla"(%arg2, %arg0, %arg1) : 55 (vector<4xi32>, vector<16xi8>, vector<16xi8>) 56 -> vector<4xi32> 57 llvm.return %0 : vector<4xi32> 58} 59 60// ----- 61 62// CHECK-LABEL: arm_neon_ummla 63llvm.func @arm_neon_ummla(%arg0: vector<16xi8>, 64 %arg1: vector<16xi8>, 65 %arg2: vector<4xi32>) -> vector<4xi32> { 66 // CHECK: <4 x i32> @llvm.aarch64.neon.ummla.v4i32.v16i8(<4 x i32 67 %0 = "arm_neon.intr.ummla"(%arg2, %arg0, %arg1) : 68 (vector<4xi32>, vector<16xi8>, vector<16xi8>) 69 -> vector<4xi32> 70 llvm.return %0 : vector<4xi32> 71} 72 73// ----- 74 75// CHECK-LABEL: arm_neon_usmmla 76llvm.func @arm_neon_usmmla(%arg0: vector<16xi8>, 77 %arg1: vector<16xi8>, 78 %arg2: vector<4xi32>) -> vector<4xi32> { 79 // CHECK: <4 x i32> @llvm.aarch64.neon.usmmla.v4i32.v16i8(<4 x i32 80 %0 = "arm_neon.intr.usmmla"(%arg2, %arg0, %arg1) : 81 (vector<4xi32>, vector<16xi8>, vector<16xi8>) 82 -> vector<4xi32> 83 llvm.return %0 : vector<4xi32> 84} 85