1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 2 // REQUIRES: aarch64-registered-target 3 #include <arm_neon.h> 4 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -O3 -emit-llvm -o - %s | FileCheck %s 5 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +lut -target-feature +bf16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s 6 7 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_u8( 8 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { 9 // CHECK-NEXT: entry: 10 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 11 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 12 // 13 uint8x16_t test_vluti2_lane_u8(uint8x8_t vn, uint8x8_t vm) { 14 return vluti2_lane_u8(vn, vm, 0); 15 } 16 17 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_u8( 18 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 19 // CHECK-NEXT: entry: 20 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 21 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 22 // 23 uint8x16_t test_vluti2_laneq_u8(uint8x8_t vn, uint8x16_t vm) { 24 return vluti2_laneq_u8(vn, vm, 0); 25 } 26 27 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_u8( 28 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 29 // CHECK-NEXT: entry: 30 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 31 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 32 // 33 uint8x16_t test_vluti2q_lane_u8(uint8x16_t vn, uint8x8_t vm) { 34 return vluti2q_lane_u8(vn, vm, 1); 35 } 36 37 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_u8( 38 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 39 // CHECK-NEXT: entry: 40 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 41 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 42 // 43 uint8x16_t test_vluti2q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { 44 return vluti2q_laneq_u8(vn, vm, 3); 45 } 46 47 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_s8( 48 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 49 // CHECK-NEXT: entry: 50 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 51 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 52 // 53 int8x16_t test_vluti2_lane_s8(int8x8_t vn, uint8x8_t vm) { 54 return vluti2_lane_s8(vn, vm, 0); 55 } 56 57 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_s8( 58 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 59 // CHECK-NEXT: entry: 60 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 61 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 62 // 63 int8x16_t test_vluti2_laneq_s8(int8x8_t vn, uint8x16_t vm) { 64 return vluti2_laneq_s8(vn, vm, 0); 65 } 66 67 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_s8( 68 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 69 // CHECK-NEXT: entry: 70 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 71 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 72 // 73 int8x16_t test_vluti2q_lane_s8(int8x16_t vn, uint8x8_t vm) { 74 return vluti2q_lane_s8(vn, vm, 1); 75 } 76 77 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_s8( 78 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 79 // CHECK-NEXT: entry: 80 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 81 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 82 // 83 int8x16_t test_vluti2q_laneq_s8(int8x16_t vn, uint8x16_t vm) { 84 return vluti2q_laneq_s8(vn, vm, 3); 85 } 86 87 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_lane_p8( 88 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 89 // CHECK-NEXT: entry: 90 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v8i8(<8 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 91 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 92 // 93 poly8x16_t test_vluti2_lane_p8(poly8x8_t vn, uint8x8_t vm) { 94 return vluti2_lane_p8(vn, vm, 0); 95 } 96 97 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2_laneq_p8( 98 // CHECK-SAME: <8 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 99 // CHECK-NEXT: entry: 100 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v8i8(<8 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 101 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 102 // 103 poly8x16_t test_vluti2_laneq_p8(poly8x8_t vn, uint8x16_t vm) { 104 return vluti2_laneq_p8(vn, vm, 0); 105 } 106 107 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_lane_p8( 108 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 109 // CHECK-NEXT: entry: 110 // CHECK-NEXT: [[VLUTI2_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.lane.v16i8.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 1) 111 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANE]] 112 // 113 poly8x16_t test_vluti2q_lane_p8(poly8x16_t vn, uint8x8_t vm) { 114 return vluti2q_lane_p8(vn, vm, 1); 115 } 116 117 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti2q_laneq_p8( 118 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 119 // CHECK-NEXT: entry: 120 // CHECK-NEXT: [[VLUTI2_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti2.laneq.v16i8.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 3) 121 // CHECK-NEXT: ret <16 x i8> [[VLUTI2_LANEQ]] 122 // 123 poly8x16_t test_vluti2q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { 124 return vluti2q_laneq_p8(vn, vm, 3); 125 } 126 127 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_u16( 128 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 129 // CHECK-NEXT: entry: 130 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 131 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 132 // 133 uint16x8_t test_vluti2_lane_u16(uint16x4_t vn, uint8x8_t vm) { 134 return vluti2_lane_u16(vn, vm, 0); 135 } 136 137 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_u16( 138 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 139 // CHECK-NEXT: entry: 140 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 141 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 142 // 143 uint16x8_t test_vluti2_laneq_u16(uint16x4_t vn, uint8x16_t vm) { 144 return vluti2_laneq_u16(vn, vm, 0); 145 } 146 147 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_u16( 148 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 149 // CHECK-NEXT: entry: 150 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 151 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 152 // 153 uint16x8_t test_vluti2q_lane_u16(uint16x8_t vn, uint8x8_t vm) { 154 return vluti2q_lane_u16(vn, vm, 3); 155 } 156 157 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_u16( 158 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 159 // CHECK-NEXT: entry: 160 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 161 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 162 // 163 uint16x8_t test_vluti2q_laneq_u16(uint16x8_t vn, uint8x16_t vm) { 164 return vluti2q_laneq_u16(vn, vm, 7); 165 } 166 167 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_s16( 168 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 169 // CHECK-NEXT: entry: 170 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 171 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 172 // 173 int16x8_t test_vluti2_lane_s16(int16x4_t vn, uint8x8_t vm) { 174 return vluti2_lane_s16(vn, vm, 0); 175 } 176 177 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_s16( 178 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 179 // CHECK-NEXT: entry: 180 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 181 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 182 // 183 int16x8_t test_vluti2_laneq_s16(int16x4_t vn, uint8x16_t vm) { 184 return vluti2_laneq_s16(vn, vm, 0); 185 } 186 187 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_s16( 188 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 189 // CHECK-NEXT: entry: 190 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 191 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 192 // 193 int16x8_t test_vluti2q_lane_s16(int16x8_t vn, uint8x8_t vm) { 194 return vluti2q_lane_s16(vn, vm, 3); 195 } 196 197 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_s16( 198 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 199 // CHECK-NEXT: entry: 200 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 201 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 202 // 203 int16x8_t test_vluti2q_laneq_s16(int16x8_t vn, uint8x16_t vm) { 204 return vluti2q_laneq_s16(vn, vm, 7); 205 } 206 207 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_lane_f16( 208 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 209 // CHECK-NEXT: entry: 210 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v4f16(<4 x half> [[VN]], <8 x i8> [[VM]], i32 0) 211 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]] 212 // 213 float16x8_t test_vluti2_lane_f16(float16x4_t vn, uint8x8_t vm) { 214 return vluti2_lane_f16(vn, vm, 0); 215 } 216 217 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2_laneq_f16( 218 // CHECK-SAME: <4 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 219 // CHECK-NEXT: entry: 220 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v4f16(<4 x half> [[VN]], <16 x i8> [[VM]], i32 0) 221 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]] 222 // 223 float16x8_t test_vluti2_laneq_f16(float16x4_t vn, uint8x16_t vm) { 224 return vluti2_laneq_f16(vn, vm, 0); 225 } 226 227 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_lane_f16( 228 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 229 // CHECK-NEXT: entry: 230 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.lane.v8f16.v8f16(<8 x half> [[VN]], <8 x i8> [[VM]], i32 3) 231 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANE1]] 232 // 233 float16x8_t test_vluti2q_lane_f16(float16x8_t vn, uint8x8_t vm) { 234 return vluti2q_lane_f16(vn, vm, 3); 235 } 236 237 // CHECK-LABEL: define dso_local <8 x half> @test_vluti2q_laneq_f16( 238 // CHECK-SAME: <8 x half> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 239 // CHECK-NEXT: entry: 240 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti2.laneq.v8f16.v8f16(<8 x half> [[VN]], <16 x i8> [[VM]], i32 7) 241 // CHECK-NEXT: ret <8 x half> [[VLUTI2_LANEQ1]] 242 // 243 float16x8_t test_vluti2q_laneq_f16(float16x8_t vn, uint8x16_t vm) { 244 return vluti2q_laneq_f16(vn, vm, 7); 245 } 246 247 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_lane_bf16( 248 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 249 // CHECK-NEXT: entry: 250 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v4bf16(<4 x bfloat> [[VN]], <8 x i8> [[VM]], i32 0) 251 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]] 252 // 253 bfloat16x8_t test_vluti2_lane_bf16(bfloat16x4_t vn, uint8x8_t vm) { 254 return vluti2_lane_bf16(vn, vm, 0); 255 } 256 257 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2_laneq_bf16( 258 // CHECK-SAME: <4 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 259 // CHECK-NEXT: entry: 260 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v4bf16(<4 x bfloat> [[VN]], <16 x i8> [[VM]], i32 0) 261 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]] 262 // 263 bfloat16x8_t test_vluti2_laneq_bf16(bfloat16x4_t vn, uint8x16_t vm) { 264 return vluti2_laneq_bf16(vn, vm, 0); 265 } 266 267 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_lane_bf16( 268 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 269 // CHECK-NEXT: entry: 270 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.lane.v8bf16.v8bf16(<8 x bfloat> [[VN]], <8 x i8> [[VM]], i32 3) 271 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANE1]] 272 // 273 bfloat16x8_t test_vluti2q_lane_bf16(bfloat16x8_t vn, uint8x8_t vm) { 274 return vluti2q_lane_bf16(vn, vm, 3); 275 } 276 277 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti2q_laneq_bf16( 278 // CHECK-SAME: <8 x bfloat> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 279 // CHECK-NEXT: entry: 280 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti2.laneq.v8bf16.v8bf16(<8 x bfloat> [[VN]], <16 x i8> [[VM]], i32 7) 281 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI2_LANEQ1]] 282 // 283 bfloat16x8_t test_vluti2q_laneq_bf16(bfloat16x8_t vn, uint8x16_t vm) { 284 return vluti2q_laneq_bf16(vn, vm, 7); 285 } 286 287 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_lane_p16( 288 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 289 // CHECK-NEXT: entry: 290 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v4i16(<4 x i16> [[VN]], <8 x i8> [[VM]], i32 0) 291 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 292 // 293 poly16x8_t test_vluti2_lane_p16(poly16x4_t vn, uint8x8_t vm) { 294 return vluti2_lane_p16(vn, vm, 0); 295 } 296 297 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2_laneq_p16( 298 // CHECK-SAME: <4 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 299 // CHECK-NEXT: entry: 300 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v4i16(<4 x i16> [[VN]], <16 x i8> [[VM]], i32 0) 301 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 302 // 303 poly16x8_t test_vluti2_laneq_p16(poly16x4_t vn, uint8x16_t vm) { 304 return vluti2_laneq_p16(vn, vm, 0); 305 } 306 307 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_lane_p16( 308 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 309 // CHECK-NEXT: entry: 310 // CHECK-NEXT: [[VLUTI2_LANE1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.lane.v8i16.v8i16(<8 x i16> [[VN]], <8 x i8> [[VM]], i32 3) 311 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANE1]] 312 // 313 poly16x8_t test_vluti2q_lane_p16(poly16x8_t vn, uint8x8_t vm) { 314 return vluti2q_lane_p16(vn, vm, 3); 315 } 316 317 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti2q_laneq_p16( 318 // CHECK-SAME: <8 x i16> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 319 // CHECK-NEXT: entry: 320 // CHECK-NEXT: [[VLUTI2_LANEQ1:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti2.laneq.v8i16.v8i16(<8 x i16> [[VN]], <16 x i8> [[VM]], i32 7) 321 // CHECK-NEXT: ret <8 x i16> [[VLUTI2_LANEQ1]] 322 // 323 poly16x8_t test_vluti2q_laneq_p16(poly16x8_t vn, uint8x16_t vm) { 324 return vluti2q_laneq_p16(vn, vm, 7); 325 } 326 327 // 328 329 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_u8( 330 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 331 // CHECK-NEXT: entry: 332 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 333 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 334 // 335 uint8x16_t test_vluti4q_lane_u8(uint8x16_t vn, uint8x8_t vm) { 336 return vluti4q_lane_u8(vn, vm, 0); 337 } 338 339 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_u8( 340 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 341 // CHECK-NEXT: entry: 342 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 0) 343 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 344 // 345 uint8x16_t test_vluti4q_laneq_u8(uint8x16_t vn, uint8x16_t vm) { 346 return vluti4q_laneq_u8(vn, vm, 0); 347 } 348 349 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_s8( 350 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 351 // CHECK-NEXT: entry: 352 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 353 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 354 // 355 int8x16_t test_vluti4q_lane_s8(int8x16_t vn, uint8x8_t vm) { 356 return vluti4q_lane_s8(vn, vm, 0); 357 } 358 359 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_s8( 360 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 361 // CHECK-NEXT: entry: 362 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1) 363 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 364 // 365 int8x16_t test_vluti4q_laneq_s8(int8x16_t vn, uint8x16_t vm) { 366 return vluti4q_laneq_s8(vn, vm, 1); 367 } 368 369 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_lane_p8( 370 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 371 // CHECK-NEXT: entry: 372 // CHECK-NEXT: [[VLUTI4Q_LANE:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.lane.v16i8(<16 x i8> [[VN]], <8 x i8> [[VM]], i32 0) 373 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANE]] 374 // 375 poly8x16_t test_vluti4q_lane_p8(poly8x16_t vn, uint8x8_t vm) { 376 return vluti4q_lane_p8(vn, vm, 0); 377 } 378 379 // CHECK-LABEL: define dso_local <16 x i8> @test_vluti4q_laneq_p8( 380 // CHECK-SAME: <16 x i8> noundef [[VN:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 381 // CHECK-NEXT: entry: 382 // CHECK-NEXT: [[VLUTI4Q_LANEQ:%.*]] = tail call <16 x i8> @llvm.aarch64.neon.vluti4q.laneq.v16i8(<16 x i8> [[VN]], <16 x i8> [[VM]], i32 1) 383 // CHECK-NEXT: ret <16 x i8> [[VLUTI4Q_LANEQ]] 384 // 385 poly8x16_t test_vluti4q_laneq_p8(poly8x16_t vn, uint8x16_t vm) { 386 return vluti4q_laneq_p8(vn, vm, 1); 387 } 388 389 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_u16_x2( 390 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 391 // CHECK-NEXT: entry: 392 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 393 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 394 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0) 395 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 396 // 397 uint16x8_t test_vluti4q_lane_u16_x2(uint16x8x2_t vn, uint8x8_t vm) { 398 return vluti4q_lane_u16_x2(vn, vm, 0); 399 } 400 401 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_u16_x2( 402 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 403 // CHECK-NEXT: entry: 404 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 405 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 406 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0) 407 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 408 // 409 uint16x8_t test_vluti4q_laneq_u16_x2(uint16x8x2_t vn, uint8x16_t vm) { 410 return vluti4q_laneq_u16_x2(vn, vm, 0); 411 } 412 413 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_s16_x2( 414 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 415 // CHECK-NEXT: entry: 416 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 417 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 418 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 419 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 420 // 421 int16x8_t test_vluti4q_lane_s16_x2(int16x8x2_t vn, uint8x8_t vm) { 422 return vluti4q_lane_s16_x2(vn, vm, 1); 423 } 424 425 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_s16_x2( 426 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 427 // CHECK-NEXT: entry: 428 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 429 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 430 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 3) 431 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 432 // 433 int16x8_t test_vluti4q_laneq_s16_x2(int16x8x2_t vn, uint8x16_t vm) { 434 return vluti4q_laneq_s16_x2(vn, vm, 3); 435 } 436 437 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_lane_f16_x2( 438 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 439 // CHECK-NEXT: entry: 440 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0 441 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1 442 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.lane.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 443 // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANE_X24]] 444 // 445 float16x8_t test_vluti4q_lane_f16_x2(float16x8x2_t vn, uint8x8_t vm) { 446 return vluti4q_lane_f16_x2(vn, vm, 1); 447 } 448 449 // CHECK-LABEL: define dso_local <8 x half> @test_vluti4q_laneq_f16_x2( 450 // CHECK-SAME: [2 x <8 x half>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 451 // CHECK-NEXT: entry: 452 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 0 453 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x half>] [[VN_COERCE]], 1 454 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x half> @llvm.aarch64.neon.vluti4q.laneq.x2.v8f16(<8 x half> [[VN_COERCE_FCA_0_EXTRACT]], <8 x half> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 1) 455 // CHECK-NEXT: ret <8 x half> [[VLUTI4Q_LANEQ_X24]] 456 // 457 float16x8_t test_vluti4q_laneq_f16_x2(float16x8x2_t vn, uint8x16_t vm) { 458 return vluti4q_laneq_f16_x2(vn, vm, 1); 459 } 460 461 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_lane_bf16_x2( 462 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 463 // CHECK-NEXT: entry: 464 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0 465 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1 466 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.lane.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 1) 467 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANE_X24]] 468 // 469 bfloat16x8_t test_vluti4q_lane_bf16_x2(bfloat16x8x2_t vn, uint8x8_t vm) { 470 return vluti4q_lane_bf16_x2(vn, vm, 1); 471 } 472 473 // CHECK-LABEL: define dso_local <8 x bfloat> @test_vluti4q_laneq_bf16_x2( 474 // CHECK-SAME: [2 x <8 x bfloat>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 475 // CHECK-NEXT: entry: 476 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 0 477 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x bfloat>] [[VN_COERCE]], 1 478 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x bfloat> @llvm.aarch64.neon.vluti4q.laneq.x2.v8bf16(<8 x bfloat> [[VN_COERCE_FCA_0_EXTRACT]], <8 x bfloat> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 2) 479 // CHECK-NEXT: ret <8 x bfloat> [[VLUTI4Q_LANEQ_X24]] 480 // 481 bfloat16x8_t test_vluti4q_laneq_bf16_x2(bfloat16x8x2_t vn, uint8x16_t vm) { 482 return vluti4q_laneq_bf16_x2(vn, vm, 2); 483 } 484 485 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_lane_p16_x2( 486 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <8 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 487 // CHECK-NEXT: entry: 488 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 489 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 490 // CHECK-NEXT: [[VLUTI4Q_LANE_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.lane.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <8 x i8> [[VM]], i32 0) 491 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANE_X24]] 492 // 493 poly16x8_t test_vluti4q_lane_p16_x2(poly16x8x2_t vn, uint8x8_t vm) { 494 return vluti4q_lane_p16_x2(vn, vm, 0); 495 } 496 497 // CHECK-LABEL: define dso_local <8 x i16> @test_vluti4q_laneq_p16_x2( 498 // CHECK-SAME: [2 x <8 x i16>] alignstack(16) [[VN_COERCE:%.*]], <16 x i8> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { 499 // CHECK-NEXT: entry: 500 // CHECK-NEXT: [[VN_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 0 501 // CHECK-NEXT: [[VN_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[VN_COERCE]], 1 502 // CHECK-NEXT: [[VLUTI4Q_LANEQ_X24:%.*]] = tail call <8 x i16> @llvm.aarch64.neon.vluti4q.laneq.x2.v8i16(<8 x i16> [[VN_COERCE_FCA_0_EXTRACT]], <8 x i16> [[VN_COERCE_FCA_1_EXTRACT]], <16 x i8> [[VM]], i32 0) 503 // CHECK-NEXT: ret <8 x i16> [[VLUTI4Q_LANEQ_X24]] 504 // 505 poly16x8_t test_vluti4q_laneq_p16_x2(poly16x8x2_t vn, uint8x16_t vm) { 506 return vluti4q_laneq_p16_x2(vn, vm, 0); 507 } 508