1*207e5cccSFangrui Song // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 2*207e5cccSFangrui Song // REQUIRES: aarch64-registered-target 3*207e5cccSFangrui Song #include <arm_neon.h> 4*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s 5*207e5cccSFangrui Song // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s 6*207e5cccSFangrui Song 7*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8( 8*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { 9*207e5cccSFangrui Song // CHECK-NEXT: entry: 10*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 11*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 12*207e5cccSFangrui Song // 13*207e5cccSFangrui Song uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) { 14*207e5cccSFangrui Song return vluti2_lane_u8(vn, vm, 0); 15*207e5cccSFangrui Song } 16*207e5cccSFangrui Song 17*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8( 18*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 19*207e5cccSFangrui Song // CHECK-NEXT: entry: 20*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 21*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 22*207e5cccSFangrui Song // 23*207e5cccSFangrui Song uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) { 24*207e5cccSFangrui Song return vluti2_laneq_u8(vn, vm, 0); 25*207e5cccSFangrui Song } 26*207e5cccSFangrui Song 27*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8( 28*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 29*207e5cccSFangrui Song // CHECK-NEXT: entry: 30*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 31*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 32*207e5cccSFangrui Song // 33*207e5cccSFangrui Song uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) { 34*207e5cccSFangrui Song return vluti2q_lane_u8(vn, vm, 1); 35*207e5cccSFangrui Song } 36*207e5cccSFangrui Song 37*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8( 38*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 39*207e5cccSFangrui Song // CHECK-NEXT: entry: 40*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 41*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 42*207e5cccSFangrui Song // 43*207e5cccSFangrui Song uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { 44*207e5cccSFangrui Song return vluti2q_laneq_u8(vn, vm, 3); 45*207e5cccSFangrui Song } 46*207e5cccSFangrui Song 47*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8( 48*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 49*207e5cccSFangrui Song // CHECK-NEXT: entry: 50*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 51*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 52*207e5cccSFangrui Song // 53*207e5cccSFangrui Song int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) { 54*207e5cccSFangrui Song return vluti2_lane_s8(vn, vm, 0); 55*207e5cccSFangrui Song } 56*207e5cccSFangrui Song 57*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8( 58*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 59*207e5cccSFangrui Song // CHECK-NEXT: entry: 60*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 61*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 62*207e5cccSFangrui Song // 63*207e5cccSFangrui Song int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) { 64*207e5cccSFangrui Song return vluti2_laneq_s8(vn, vm, 0); 65*207e5cccSFangrui Song } 66*207e5cccSFangrui Song 67*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8( 68*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 69*207e5cccSFangrui Song // CHECK-NEXT: entry: 70*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 71*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 72*207e5cccSFangrui Song // 73*207e5cccSFangrui Song int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) { 74*207e5cccSFangrui Song return vluti2q_lane_s8(vn, vm, 1); 75*207e5cccSFangrui Song } 76*207e5cccSFangrui Song 77*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8( 78*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 79*207e5cccSFangrui Song // CHECK-NEXT: entry: 80*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 81*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 82*207e5cccSFangrui Song // 83*207e5cccSFangrui Song int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) { 84*207e5cccSFangrui Song return vluti2q_laneq_s8(vn, vm, 3); 85*207e5cccSFangrui Song } 86*207e5cccSFangrui Song 87*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8( 88*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 89*207e5cccSFangrui Song // CHECK-NEXT: entry: 90*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 91*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 92*207e5cccSFangrui Song // 93*207e5cccSFangrui Song poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) { 94*207e5cccSFangrui Song return vluti2_lane_p8(vn, vm, 0); 95*207e5cccSFangrui Song } 96*207e5cccSFangrui Song 97*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8( 98*207e5cccSFangrui Song // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 99*207e5cccSFangrui Song // CHECK-NEXT: entry: 100*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 101*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 102*207e5cccSFangrui Song // 103*207e5cccSFangrui Song poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) { 104*207e5cccSFangrui Song return vluti2_laneq_p8(vn, vm, 0); 105*207e5cccSFangrui Song } 106*207e5cccSFangrui Song 107*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8( 108*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 109*207e5cccSFangrui Song // CHECK-NEXT: entry: 110*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 111*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 112*207e5cccSFangrui Song // 113*207e5cccSFangrui Song poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) { 114*207e5cccSFangrui Song return vluti2q_lane_p8(vn, vm, 1); 115*207e5cccSFangrui Song } 116*207e5cccSFangrui Song 117*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8( 118*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 119*207e5cccSFangrui Song // CHECK-NEXT: entry: 120*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 121*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 122*207e5cccSFangrui Song // 123*207e5cccSFangrui Song poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { 124*207e5cccSFangrui Song return vluti2q_laneq_p8(vn, vm, 3); 125*207e5cccSFangrui Song } 126*207e5cccSFangrui Song 127*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16( 128*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 129*207e5cccSFangrui Song // CHECK-NEXT: entry: 130*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 131*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 132*207e5cccSFangrui Song // 133*207e5cccSFangrui Song uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) { 134*207e5cccSFangrui Song return vluti2_lane_u16(vn, vm, 0); 135*207e5cccSFangrui Song } 136*207e5cccSFangrui Song 137*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16( 138*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 139*207e5cccSFangrui Song // CHECK-NEXT: entry: 140*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 141*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 142*207e5cccSFangrui Song // 143*207e5cccSFangrui Song uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) { 144*207e5cccSFangrui Song return vluti2_laneq_u16(vn, vm, 0); 145*207e5cccSFangrui Song } 146*207e5cccSFangrui Song 147*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16( 148*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 149*207e5cccSFangrui Song // CHECK-NEXT: entry: 150*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 151*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 152*207e5cccSFangrui Song // 153*207e5cccSFangrui Song uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) { 154*207e5cccSFangrui Song return vluti2q_lane_u16(vn, vm, 3); 155*207e5cccSFangrui Song } 156*207e5cccSFangrui Song 157*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16( 158*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 159*207e5cccSFangrui Song // CHECK-NEXT: entry: 160*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 161*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 162*207e5cccSFangrui Song // 163*207e5cccSFangrui Song uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) { 164*207e5cccSFangrui Song return vluti2q_laneq_u16(vn, vm, 7); 165*207e5cccSFangrui Song } 166*207e5cccSFangrui Song 167*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16( 168*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 169*207e5cccSFangrui Song // CHECK-NEXT: entry: 170*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 171*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 172*207e5cccSFangrui Song // 173*207e5cccSFangrui Song int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) { 174*207e5cccSFangrui Song return vluti2_lane_s16(vn, vm, 0); 175*207e5cccSFangrui Song } 176*207e5cccSFangrui Song 177*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16( 178*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 179*207e5cccSFangrui Song // CHECK-NEXT: entry: 180*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 181*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 182*207e5cccSFangrui Song // 183*207e5cccSFangrui Song int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) { 184*207e5cccSFangrui Song return vluti2_laneq_s16(vn, vm, 0); 185*207e5cccSFangrui Song } 186*207e5cccSFangrui Song 187*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16( 188*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 189*207e5cccSFangrui Song // CHECK-NEXT: entry: 190*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 191*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 192*207e5cccSFangrui Song // 193*207e5cccSFangrui Song int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) { 194*207e5cccSFangrui Song return vluti2q_lane_s16(vn, vm, 3); 195*207e5cccSFangrui Song } 196*207e5cccSFangrui Song 197*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16( 198*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 199*207e5cccSFangrui Song // CHECK-NEXT: entry: 200*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 201*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 202*207e5cccSFangrui Song // 203*207e5cccSFangrui Song int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) { 204*207e5cccSFangrui Song return vluti2q_laneq_s16(vn, vm, 7); 205*207e5cccSFangrui Song } 206*207e5cccSFangrui Song 207*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16( 208*207e5cccSFangrui Song // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 209*207e5cccSFangrui Song // CHECK-NEXT: entry: 210*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> [[VN]], <8 x i8> [[VM]], i32 0) 211*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]] 212*207e5cccSFangrui Song // 213*207e5cccSFangrui Song float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) { 214*207e5cccSFangrui Song return vluti2_lane_f16(vn, vm, 0); 215*207e5cccSFangrui Song } 216*207e5cccSFangrui Song 217*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16( 218*207e5cccSFangrui Song // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 219*207e5cccSFangrui Song // CHECK-NEXT: entry: 220*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4f16(<4 x half> [[VN]], <16 x i8> [[VM]], i32 0) 221*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]] 222*207e5cccSFangrui Song // 223*207e5cccSFangrui Song float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) { 224*207e5cccSFangrui Song return vluti2_laneq_f16(vn, vm, 0); 225*207e5cccSFangrui Song } 226*207e5cccSFangrui Song 227*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16( 228*207e5cccSFangrui Song // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 229*207e5cccSFangrui Song // CHECK-NEXT: entry: 230*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> [[VN]], <8 x i8> [[VM]], i32 3) 231*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]] 232*207e5cccSFangrui Song // 233*207e5cccSFangrui Song float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) { 234*207e5cccSFangrui Song return vluti2q_lane_f16(vn, vm, 3); 235*207e5cccSFangrui Song } 236*207e5cccSFangrui Song 237*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16( 238*207e5cccSFangrui Song // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 239*207e5cccSFangrui Song // CHECK-NEXT: entry: 240*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> [[VN]], <16 x i8> [[VM]], i32 7) 241*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]] 242*207e5cccSFangrui Song // 243*207e5cccSFangrui Song float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) { 244*207e5cccSFangrui Song return vluti2q_laneq_f16(vn, vm, 7); 245*207e5cccSFangrui Song } 246*207e5cccSFangrui Song 247*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16( 248*207e5cccSFangrui Song // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 249*207e5cccSFangrui Song // CHECK-NEXT: entry: 250*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> [[VN]], <8 x i8> [[VM]], i32 0) 251*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]] 252*207e5cccSFangrui Song // 253*207e5cccSFangrui Song bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) { 254*207e5cccSFangrui Song return vluti2_lane_bf16(vn, vm, 0); 255*207e5cccSFangrui Song } 256*207e5cccSFangrui Song 257*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16( 258*207e5cccSFangrui Song // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 259*207e5cccSFangrui Song // CHECK-NEXT: entry: 260*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> [[VN]], <16 x i8> [[VM]], i32 0) 261*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]] 262*207e5cccSFangrui Song // 263*207e5cccSFangrui Song bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) { 264*207e5cccSFangrui Song return vluti2_laneq_bf16(vn, vm, 0); 265*207e5cccSFangrui Song } 266*207e5cccSFangrui Song 267*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16( 268*207e5cccSFangrui Song // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 269*207e5cccSFangrui Song // CHECK-NEXT: entry: 270*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<8 x bfloat> [[VN]], <8 x i8> [[VM]], i32 3) 271*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]] 272*207e5cccSFangrui Song // 273*207e5cccSFangrui Song bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) { 274*207e5cccSFangrui Song return vluti2q_lane_bf16(vn, vm, 3); 275*207e5cccSFangrui Song } 276*207e5cccSFangrui Song 277*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16( 278*207e5cccSFangrui Song // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 279*207e5cccSFangrui Song // CHECK-NEXT: entry: 280*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> [[VN]], <16 x i8> [[VM]], i32 7) 281*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]] 282*207e5cccSFangrui Song // 283*207e5cccSFangrui Song bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) { 284*207e5cccSFangrui Song return vluti2q_laneq_bf16(vn, vm, 7); 285*207e5cccSFangrui Song } 286*207e5cccSFangrui Song 287*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16( 288*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 289*207e5cccSFangrui Song // CHECK-NEXT: entry: 290*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 291*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 292*207e5cccSFangrui Song // 293*207e5cccSFangrui Song poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) { 294*207e5cccSFangrui Song return vluti2_lane_p16(vn, vm, 0); 295*207e5cccSFangrui Song } 296*207e5cccSFangrui Song 297*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_p16( 298*207e5cccSFangrui Song // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 299*207e5cccSFangrui Song // CHECK-NEXT: entry: 300*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 301*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 302*207e5cccSFangrui Song // 303*207e5cccSFangrui Song poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) { 304*207e5cccSFangrui Song return vluti2_laneq_p16(vn, vm, 0); 305*207e5cccSFangrui Song } 306*207e5cccSFangrui Song 307*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_p16( 308*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 309*207e5cccSFangrui Song // CHECK-NEXT: entry: 310*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 311*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 312*207e5cccSFangrui Song // 313*207e5cccSFangrui Song poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) { 314*207e5cccSFangrui Song return vluti2q_lane_p16(vn, vm, 3); 315*207e5cccSFangrui Song } 316*207e5cccSFangrui Song 317*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_p16( 318*207e5cccSFangrui Song // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 319*207e5cccSFangrui Song // CHECK-NEXT: entry: 320*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 321*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 322*207e5cccSFangrui Song // 323*207e5cccSFangrui Song poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) { 324*207e5cccSFangrui Song return vluti2q_laneq_p16(vn, vm, 7); 325*207e5cccSFangrui Song } 326*207e5cccSFangrui Song 327*207e5cccSFangrui Song // 328*207e5cccSFangrui Song 329*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_u8( 330*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 331*207e5cccSFangrui Song // CHECK-NEXT: entry: 332*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 333*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 334*207e5cccSFangrui Song // 335*207e5cccSFangrui Song uint8x16_t test_vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm) { 336*207e5cccSFangrui Song return vluti4q_lane_u8(vn, vm, 0); 337*207e5cccSFangrui Song } 338*207e5cccSFangrui Song 339*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_u8( 340*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 341*207e5cccSFangrui Song // CHECK-NEXT: entry: 342*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 343*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 344*207e5cccSFangrui Song // 345*207e5cccSFangrui Song uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { 346*207e5cccSFangrui Song return vluti4q_laneq_u8(vn, vm, 0); 347*207e5cccSFangrui Song } 348*207e5cccSFangrui Song 349*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8( 350*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 351*207e5cccSFangrui Song // CHECK-NEXT: entry: 352*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 353*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 354*207e5cccSFangrui Song // 355*207e5cccSFangrui Song int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) { 356*207e5cccSFangrui Song return vluti4q_lane_s8(vn, vm, 0); 357*207e5cccSFangrui Song } 358*207e5cccSFangrui Song 359*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8( 360*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 361*207e5cccSFangrui Song // CHECK-NEXT: entry: 362*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1) 363*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 364*207e5cccSFangrui Song // 365*207e5cccSFangrui Song int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) { 366*207e5cccSFangrui Song return vluti4q_laneq_s8(vn, vm, 1); 367*207e5cccSFangrui Song } 368*207e5cccSFangrui Song 369*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8( 370*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 371*207e5cccSFangrui Song // CHECK-NEXT: entry: 372*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 373*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 374*207e5cccSFangrui Song // 375*207e5cccSFangrui Song poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) { 376*207e5cccSFangrui Song return vluti4q_lane_p8(vn, vm, 0); 377*207e5cccSFangrui Song } 378*207e5cccSFangrui Song 379*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8( 380*207e5cccSFangrui Song // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 381*207e5cccSFangrui Song // CHECK-NEXT: entry: 382*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1) 383*207e5cccSFangrui Song // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 384*207e5cccSFangrui Song // 385*207e5cccSFangrui Song poly8x16_t test_vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { 386*207e5cccSFangrui Song return vluti4q_laneq_p8(vn, vm, 1); 387*207e5cccSFangrui Song } 388*207e5cccSFangrui Song 389*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_u16_x2( 390*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 391*207e5cccSFangrui Song // CHECK-NEXT: entry: 392*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 393*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 394*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0) 395*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 396*207e5cccSFangrui Song // 397*207e5cccSFangrui Song uint16x8_t test_vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm) { 398*207e5cccSFangrui Song return vluti4q_lane_u16_x2(vn, vm, 0); 399*207e5cccSFangrui Song } 400*207e5cccSFangrui Song 401*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_u16_x2( 402*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 403*207e5cccSFangrui Song // CHECK-NEXT: entry: 404*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 405*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 406*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0) 407*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 408*207e5cccSFangrui Song // 409*207e5cccSFangrui Song uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) { 410*207e5cccSFangrui Song return vluti4q_laneq_u16_x2(vn, vm, 0); 411*207e5cccSFangrui Song } 412*207e5cccSFangrui Song 413*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_s16_x2( 414*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 415*207e5cccSFangrui Song // CHECK-NEXT: entry: 416*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 417*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 418*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 419*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 420*207e5cccSFangrui Song // 421*207e5cccSFangrui Song int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) { 422*207e5cccSFangrui Song return vluti4q_lane_s16_x2(vn, vm, 1); 423*207e5cccSFangrui Song } 424*207e5cccSFangrui Song 425*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2( 426*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 427*207e5cccSFangrui Song // CHECK-NEXT: entry: 428*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 429*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 430*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 3) 431*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 432*207e5cccSFangrui Song // 433*207e5cccSFangrui Song int16x8_t test_vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm) { 434*207e5cccSFangrui Song return vluti4q_laneq_s16_x2(vn, vm, 3); 435*207e5cccSFangrui Song } 436*207e5cccSFangrui Song 437*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_lane_f16_x2( 438*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 439*207e5cccSFangrui Song // CHECK-NEXT: entry: 440*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0 441*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1 442*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 443*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANE_X24]] 444*207e5cccSFangrui Song // 445*207e5cccSFangrui Song float16x8_t test_vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm) { 446*207e5cccSFangrui Song return vluti4q_lane_f16_x2(vn, vm, 1); 447*207e5cccSFangrui Song } 448*207e5cccSFangrui Song 449*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_laneq_f16_x2( 450*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 451*207e5cccSFangrui Song // CHECK-NEXT: entry: 452*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0 453*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1 454*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 1) 455*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANEQ_X24]] 456*207e5cccSFangrui Song // 457*207e5cccSFangrui Song float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) { 458*207e5cccSFangrui Song return vluti4q_laneq_f16_x2(vn, vm, 1); 459*207e5cccSFangrui Song } 460*207e5cccSFangrui Song 461*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_lane_bf16_x2( 462*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 463*207e5cccSFangrui Song // CHECK-NEXT: entry: 464*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0 465*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1 466*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 467*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANE_X24]] 468*207e5cccSFangrui Song // 469*207e5cccSFangrui Song bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) { 470*207e5cccSFangrui Song return vluti4q_lane_bf16_x2(vn, vm, 1); 471*207e5cccSFangrui Song } 472*207e5cccSFangrui Song 473*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2( 474*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 475*207e5cccSFangrui Song // CHECK-NEXT: entry: 476*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0 477*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1 478*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 2) 479*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANEQ_X24]] 480*207e5cccSFangrui Song // 481*207e5cccSFangrui Song bfloat16x8_t test_vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm) { 482*207e5cccSFangrui Song return vluti4q_laneq_bf16_x2(vn, vm, 2); 483*207e5cccSFangrui Song } 484*207e5cccSFangrui Song 485*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_p16_x2( 486*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 487*207e5cccSFangrui Song // CHECK-NEXT: entry: 488*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 489*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 490*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0) 491*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 492*207e5cccSFangrui Song // 493*207e5cccSFangrui Song poly16x8_t test_vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm) { 494*207e5cccSFangrui Song return vluti4q_lane_p16_x2(vn, vm, 0); 495*207e5cccSFangrui Song } 496*207e5cccSFangrui Song 497*207e5cccSFangrui Song // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_p16_x2( 498*207e5cccSFangrui Song // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 499*207e5cccSFangrui Song // CHECK-NEXT: entry: 500*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 501*207e5cccSFangrui Song // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 502*207e5cccSFangrui Song // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0) 503*207e5cccSFangrui Song // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 504*207e5cccSFangrui Song // 505*207e5cccSFangrui Song poly16x8_t test_vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm) { 506*207e5cccSFangrui Song return vluti4q_laneq_p16_x2(vn, vm, 0); 507*207e5cccSFangrui Song } 508