1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 2; RUN: llc < %s -global-isel=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 3 4define signext i8 @test_vminv_s8(<8 x i8> %a1) { 5; CHECK: test_vminv_s8 6; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v0 7; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] 8; CHECK-NEXT: ret 9entry: 10 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a1) 11 %0 = trunc i32 %vminv.i to i8 12 ret i8 %0 13} 14 15define signext i16 @test_vminv_s16(<4 x i16> %a1) { 16; CHECK: test_vminv_s16 17; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v0 18; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] 19; CHECK-NEXT: ret 20entry: 21 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a1) 22 %0 = trunc i32 %vminv.i to i16 23 ret i16 %0 24} 25 26define i32 @test_vminv_s32(<2 x i32> %a1) { 27; CHECK: test_vminv_s32 28; 2 x i32 is not supported by the ISA, thus, this is a special case 29; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v0, v0 30; CHECK-NEXT: fmov w0, s[[REGNUM]] 31; CHECK-NEXT: ret 32entry: 33 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a1) 34 ret i32 %vminv.i 35} 36 37define signext i8 @test_vminvq_s8(<16 x i8> %a1) { 38; CHECK: test_vminvq_s8 39; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v0 40; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] 41; CHECK-NEXT: ret 42entry: 43 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a1) 44 %0 = trunc i32 %vminv.i to i8 45 ret i8 %0 46} 47 48define signext i16 @test_vminvq_s16(<8 x i16> %a1) { 49; CHECK: test_vminvq_s16 50; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v0 51; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] 52; CHECK-NEXT: ret 53entry: 54 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a1) 55 %0 = trunc i32 %vminv.i to i16 56 ret i16 %0 57} 58 59define i32 @test_vminvq_s32(<4 x i32> %a1) { 60; CHECK: test_vminvq_s32 61; CHECK: sminv.4s [[REGNUM:s[0-9]+]], v0 62; CHECK-NEXT: fmov w0, [[REGNUM]] 63; CHECK-NEXT: ret 64entry: 65 %vminv.i = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a1) 66 ret i32 %vminv.i 67} 68 69define <8 x i8> @test_vminv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) { 70; CHECK-LABEL: test_vminv_s8_used_by_laneop: 71; CHECK: sminv.8b b[[REGNUM:[0-9]+]], v1 72; CHECK-NEXT: mov.b v0[3], v[[REGNUM]][0] 73; CHECK-NEXT: ret 74entry: 75 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8> %a2) 76 %1 = trunc i32 %0 to i8 77 %2 = insertelement <8 x i8> %a1, i8 %1, i32 3 78 ret <8 x i8> %2 79} 80 81define <4 x i16> @test_vminv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) { 82; CHECK-LABEL: test_vminv_s16_used_by_laneop: 83; CHECK: sminv.4h h[[REGNUM:[0-9]+]], v1 84; CHECK-NEXT: mov.h v0[3], v[[REGNUM]][0] 85; CHECK-NEXT: ret 86entry: 87 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16> %a2) 88 %1 = trunc i32 %0 to i16 89 %2 = insertelement <4 x i16> %a1, i16 %1, i32 3 90 ret <4 x i16> %2 91} 92 93define <2 x i32> @test_vminv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) { 94; CHECK-LABEL: test_vminv_s32_used_by_laneop: 95; CHECK: sminp.2s v[[REGNUM:[0-9]+]], v1, v1 96; CHECK-NEXT: mov.s v0[1], v[[REGNUM]][0] 97; CHECK-NEXT: ret 98entry: 99 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a2) 100 %1 = insertelement <2 x i32> %a1, i32 %0, i32 1 101 ret <2 x i32> %1 102} 103 104define <16 x i8> @test_vminvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) { 105; CHECK-LABEL: test_vminvq_s8_used_by_laneop: 106; CHECK: sminv.16b b[[REGNUM:[0-9]+]], v1 107; CHECK-NEXT: mov.b v0[3], v[[REGNUM]][0] 108; CHECK-NEXT: ret 109entry: 110 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8> %a2) 111 %1 = trunc i32 %0 to i8 112 %2 = insertelement <16 x i8> %a1, i8 %1, i32 3 113 ret <16 x i8> %2 114} 115 116define <8 x i16> @test_vminvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) { 117; CHECK-LABEL: test_vminvq_s16_used_by_laneop: 118; CHECK: sminv.8h h[[REGNUM:[0-9]+]], v1 119; CHECK-NEXT: mov.h v0[3], v[[REGNUM]][0] 120; CHECK-NEXT: ret 121entry: 122 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16> %a2) 123 %1 = trunc i32 %0 to i16 124 %2 = insertelement <8 x i16> %a1, i16 %1, i32 3 125 ret <8 x i16> %2 126} 127 128define <4 x i32> @test_vminvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) { 129; CHECK-LABEL: test_vminvq_s32_used_by_laneop: 130; CHECK: sminv.4s s[[REGNUM:[0-9]+]], v1 131; CHECK-NEXT: mov.s v0[3], v[[REGNUM]][0] 132; CHECK-NEXT: ret 133entry: 134 %0 = tail call i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32> %a2) 135 %1 = insertelement <4 x i32> %a1, i32 %0, i32 3 136 ret <4 x i32> %1 137} 138 139declare i32 @llvm.aarch64.neon.sminv.i32.v4i32(<4 x i32>) 140declare i32 @llvm.aarch64.neon.sminv.i32.v8i16(<8 x i16>) 141declare i32 @llvm.aarch64.neon.sminv.i32.v16i8(<16 x i8>) 142declare i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32>) 143declare i32 @llvm.aarch64.neon.sminv.i32.v4i16(<4 x i16>) 144declare i32 @llvm.aarch64.neon.sminv.i32.v8i8(<8 x i8>) 145 146