1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 2; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 3; RUN: llc < %s -global-isel=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -asm-verbose=false | FileCheck %s 4 5define signext i8 @test_vmaxv_s8(<8 x i8> %a1) { 6; CHECK: test_vmaxv_s8 7; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v0 8; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] 9; CHECK-NEXT: ret 10entry: 11 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a1) 12 %0 = trunc i32 %vmaxv.i to i8 13 ret i8 %0 14} 15 16define signext i16 @test_vmaxv_s16(<4 x i16> %a1) { 17; CHECK: test_vmaxv_s16 18; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v0 19; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] 20; CHECK-NEXT: ret 21entry: 22 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a1) 23 %0 = trunc i32 %vmaxv.i to i16 24 ret i16 %0 25} 26 27define i32 @test_vmaxv_s32(<2 x i32> %a1) { 28; CHECK: test_vmaxv_s32 29; 2 x i32 is not supported by the ISA, thus, this is a special case 30; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v0, v0 31; CHECK-NEXT: fmov w0, s[[REGNUM]] 32; CHECK-NEXT: ret 33entry: 34 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a1) 35 ret i32 %vmaxv.i 36} 37 38define signext i8 @test_vmaxvq_s8(<16 x i8> %a1) { 39; CHECK: test_vmaxvq_s8 40; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v0 41; CHECK-NEXT: smov.b w0, v[[REGNUM]][0] 42; CHECK-NEXT: ret 43entry: 44 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a1) 45 %0 = trunc i32 %vmaxv.i to i8 46 ret i8 %0 47} 48 49define signext i16 @test_vmaxvq_s16(<8 x i16> %a1) { 50; CHECK: test_vmaxvq_s16 51; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v0 52; CHECK-NEXT: smov.h w0, v[[REGNUM]][0] 53; CHECK-NEXT: ret 54entry: 55 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a1) 56 %0 = trunc i32 %vmaxv.i to i16 57 ret i16 %0 58} 59 60define i32 @test_vmaxvq_s32(<4 x i32> %a1) { 61; CHECK: test_vmaxvq_s32 62; CHECK: smaxv.4s [[REGNUM:s[0-9]+]], v0 63; CHECK-NEXT: fmov w0, [[REGNUM]] 64; CHECK-NEXT: ret 65entry: 66 %vmaxv.i = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a1) 67 ret i32 %vmaxv.i 68} 69 70define <8 x i8> @test_vmaxv_s8_used_by_laneop(<8 x i8> %a1, <8 x i8> %a2) { 71; CHECK-LABEL: test_vmaxv_s8_used_by_laneop: 72; CHECK: smaxv.8b b[[REGNUM:[0-9]+]], v1 73; CHECK-NEXT: mov.b v0[3], v[[REGNUM]][0] 74; CHECK-NEXT: ret 75entry: 76 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8> %a2) 77 %1 = trunc i32 %0 to i8 78 %2 = insertelement <8 x i8> %a1, i8 %1, i32 3 79 ret <8 x i8> %2 80} 81 82define <4 x i16> @test_vmaxv_s16_used_by_laneop(<4 x i16> %a1, <4 x i16> %a2) { 83; CHECK-LABEL: test_vmaxv_s16_used_by_laneop: 84; CHECK: smaxv.4h h[[REGNUM:[0-9]+]], v1 85; CHECK-NEXT: mov.h v0[3], v[[REGNUM]][0] 86; CHECK-NEXT: ret 87entry: 88 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16> %a2) 89 %1 = trunc i32 %0 to i16 90 %2 = insertelement <4 x i16> %a1, i16 %1, i32 3 91 ret <4 x i16> %2 92} 93 94define <2 x i32> @test_vmaxv_s32_used_by_laneop(<2 x i32> %a1, <2 x i32> %a2) { 95; CHECK-LABEL: test_vmaxv_s32_used_by_laneop: 96; CHECK: smaxp.2s v[[REGNUM:[0-9]+]], v1, v1 97; CHECK-NEXT: mov.s v0[1], v[[REGNUM]][0] 98; CHECK-NEXT: ret 99entry: 100 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a2) 101 %1 = insertelement <2 x i32> %a1, i32 %0, i32 1 102 ret <2 x i32> %1 103} 104 105define <16 x i8> @test_vmaxvq_s8_used_by_laneop(<16 x i8> %a1, <16 x i8> %a2) { 106; CHECK-LABEL: test_vmaxvq_s8_used_by_laneop: 107; CHECK: smaxv.16b b[[REGNUM:[0-9]+]], v1 108; CHECK-NEXT: mov.b v0[3], v[[REGNUM]][0] 109; CHECK-NEXT: ret 110entry: 111 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8> %a2) 112 %1 = trunc i32 %0 to i8 113 %2 = insertelement <16 x i8> %a1, i8 %1, i32 3 114 ret <16 x i8> %2 115} 116 117define <8 x i16> @test_vmaxvq_s16_used_by_laneop(<8 x i16> %a1, <8 x i16> %a2) { 118; CHECK-LABEL: test_vmaxvq_s16_used_by_laneop: 119; CHECK: smaxv.8h h[[REGNUM:[0-9]+]], v1 120; CHECK-NEXT: mov.h v0[3], v[[REGNUM]][0] 121; CHECK-NEXT: ret 122entry: 123 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16> %a2) 124 %1 = trunc i32 %0 to i16 125 %2 = insertelement <8 x i16> %a1, i16 %1, i32 3 126 ret <8 x i16> %2 127} 128 129define <4 x i32> @test_vmaxvq_s32_used_by_laneop(<4 x i32> %a1, <4 x i32> %a2) { 130; CHECK-LABEL: test_vmaxvq_s32_used_by_laneop: 131; CHECK: smaxv.4s s[[REGNUM:[0-9]+]], v1 132; CHECK-NEXT: mov.s v0[3], v[[REGNUM]][0] 133; CHECK-NEXT: ret 134entry: 135 %0 = tail call i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32> %a2) 136 %1 = insertelement <4 x i32> %a1, i32 %0, i32 3 137 ret <4 x i32> %1 138} 139 140declare i32 @llvm.aarch64.neon.smaxv.i32.v4i32(<4 x i32>) 141declare i32 @llvm.aarch64.neon.smaxv.i32.v8i16(<8 x i16>) 142declare i32 @llvm.aarch64.neon.smaxv.i32.v16i8(<16 x i8>) 143declare i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32>) 144declare i32 @llvm.aarch64.neon.smaxv.i32.v4i16(<4 x i16>) 145declare i32 @llvm.aarch64.neon.smaxv.i32.v8i8(<8 x i8>) 146 147