1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ 2 // RUN: -disable-O0-optnone \ 3 // RUN: -flax-vector-conversions=none -emit-llvm -o - %s \ 4 // RUN: | opt -S -passes=mem2reg \ 5 // RUN: | FileCheck %s 6 7 // REQUIRES: aarch64-registered-target || arm-registered-target 8 9 #include <arm_neon.h> 10 11 // CHECK-LABEL: @test_vadd_s8( 12 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 13 // CHECK: ret <8 x i8> [[ADD_I]] 14 int8x8_t test_vadd_s8(int8x8_t v1, int8x8_t v2) { 15 return vadd_s8(v1, v2); 16 } 17 18 // CHECK-LABEL: @test_vadd_s16( 19 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 20 // CHECK: ret <4 x i16> [[ADD_I]] 21 int16x4_t test_vadd_s16(int16x4_t v1, int16x4_t v2) { 22 return vadd_s16(v1, v2); 23 } 24 25 // CHECK-LABEL: @test_vadd_s32( 26 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 27 // CHECK: ret <2 x i32> [[ADD_I]] 28 int32x2_t test_vadd_s32(int32x2_t v1, int32x2_t v2) { 29 return vadd_s32(v1, v2); 30 } 31 32 // CHECK-LABEL: @test_vadd_s64( 33 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 34 // CHECK: ret <1 x i64> [[ADD_I]] 35 int64x1_t test_vadd_s64(int64x1_t v1, int64x1_t v2) { 36 return vadd_s64(v1, v2); 37 } 38 39 // CHECK-LABEL: @test_vadd_f32( 40 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, %v2 41 // CHECK: ret <2 x float> [[ADD_I]] 42 float32x2_t test_vadd_f32(float32x2_t v1, float32x2_t v2) { 43 return vadd_f32(v1, v2); 44 } 45 46 // CHECK-LABEL: @test_vadd_u8( 47 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, %v2 48 // CHECK: ret <8 x i8> [[ADD_I]] 49 uint8x8_t test_vadd_u8(uint8x8_t v1, uint8x8_t v2) { 50 return vadd_u8(v1, v2); 51 } 52 53 // CHECK-LABEL: @test_vadd_u16( 54 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, %v2 55 // CHECK: ret <4 x i16> [[ADD_I]] 56 uint16x4_t test_vadd_u16(uint16x4_t v1, uint16x4_t v2) { 57 return vadd_u16(v1, v2); 58 } 59 60 // CHECK-LABEL: @test_vadd_u32( 61 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, %v2 62 // CHECK: ret <2 x i32> [[ADD_I]] 63 uint32x2_t test_vadd_u32(uint32x2_t v1, uint32x2_t v2) { 64 return vadd_u32(v1, v2); 65 } 66 67 // CHECK-LABEL: @test_vadd_u64( 68 // CHECK: [[ADD_I:%.*]] = add <1 x i64> %v1, %v2 69 // CHECK: ret <1 x i64> [[ADD_I]] 70 uint64x1_t test_vadd_u64(uint64x1_t v1, uint64x1_t v2) { 71 return vadd_u64(v1, v2); 72 } 73 74 // CHECK-LABEL: @test_vaddq_s8( 75 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 76 // CHECK: ret <16 x i8> [[ADD_I]] 77 int8x16_t test_vaddq_s8(int8x16_t v1, int8x16_t v2) { 78 return vaddq_s8(v1, v2); 79 } 80 81 // CHECK-LABEL: @test_vaddq_s16( 82 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 83 // CHECK: ret <8 x i16> [[ADD_I]] 84 int16x8_t test_vaddq_s16(int16x8_t v1, int16x8_t v2) { 85 return vaddq_s16(v1, v2); 86 } 87 88 // CHECK-LABEL: @test_vaddq_s32( 89 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 90 // CHECK: ret <4 x i32> [[ADD_I]] 91 int32x4_t test_vaddq_s32(int32x4_t v1, int32x4_t v2) { 92 return vaddq_s32(v1, v2); 93 } 94 95 // CHECK-LABEL: @test_vaddq_s64( 96 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 97 // CHECK: ret <2 x i64> [[ADD_I]] 98 int64x2_t test_vaddq_s64(int64x2_t v1, int64x2_t v2) { 99 return vaddq_s64(v1, v2); 100 } 101 102 // CHECK-LABEL: @test_vaddq_f32( 103 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, %v2 104 // CHECK: ret <4 x float> [[ADD_I]] 105 float32x4_t test_vaddq_f32(float32x4_t v1, float32x4_t v2) { 106 return vaddq_f32(v1, v2); 107 } 108 109 // CHECK-LABEL: @test_vaddq_f64( 110 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, %v2 111 // CHECK: ret <2 x double> [[ADD_I]] 112 float64x2_t test_vaddq_f64(float64x2_t v1, float64x2_t v2) { 113 return vaddq_f64(v1, v2); 114 } 115 116 // CHECK-LABEL: @test_vaddq_u8( 117 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, %v2 118 // CHECK: ret <16 x i8> [[ADD_I]] 119 uint8x16_t test_vaddq_u8(uint8x16_t v1, uint8x16_t v2) { 120 return vaddq_u8(v1, v2); 121 } 122 123 // CHECK-LABEL: @test_vaddq_u16( 124 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, %v2 125 // CHECK: ret <8 x i16> [[ADD_I]] 126 uint16x8_t test_vaddq_u16(uint16x8_t v1, uint16x8_t v2) { 127 return vaddq_u16(v1, v2); 128 } 129 130 // CHECK-LABEL: @test_vaddq_u32( 131 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, %v2 132 // CHECK: ret <4 x i32> [[ADD_I]] 133 uint32x4_t test_vaddq_u32(uint32x4_t v1, uint32x4_t v2) { 134 return vaddq_u32(v1, v2); 135 } 136 137 // CHECK-LABEL: @test_vaddq_u64( 138 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %v1, %v2 139 // CHECK: ret <2 x i64> [[ADD_I]] 140 uint64x2_t test_vaddq_u64(uint64x2_t v1, uint64x2_t v2) { 141 return vaddq_u64(v1, v2); 142 } 143 144 // CHECK-LABEL: @test_vsub_s8( 145 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 146 // CHECK: ret <8 x i8> [[SUB_I]] 147 int8x8_t test_vsub_s8(int8x8_t v1, int8x8_t v2) { 148 return vsub_s8(v1, v2); 149 } 150 151 // CHECK-LABEL: @test_vsub_s16( 152 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 153 // CHECK: ret <4 x i16> [[SUB_I]] 154 int16x4_t test_vsub_s16(int16x4_t v1, int16x4_t v2) { 155 return vsub_s16(v1, v2); 156 } 157 158 // CHECK-LABEL: @test_vsub_s32( 159 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 160 // CHECK: ret <2 x i32> [[SUB_I]] 161 int32x2_t test_vsub_s32(int32x2_t v1, int32x2_t v2) { 162 return vsub_s32(v1, v2); 163 } 164 165 // CHECK-LABEL: @test_vsub_s64( 166 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 167 // CHECK: ret <1 x i64> [[SUB_I]] 168 int64x1_t test_vsub_s64(int64x1_t v1, int64x1_t v2) { 169 return vsub_s64(v1, v2); 170 } 171 172 // CHECK-LABEL: @test_vsub_f32( 173 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, %v2 174 // CHECK: ret <2 x float> [[SUB_I]] 175 float32x2_t test_vsub_f32(float32x2_t v1, float32x2_t v2) { 176 return vsub_f32(v1, v2); 177 } 178 179 // CHECK-LABEL: @test_vsub_u8( 180 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, %v2 181 // CHECK: ret <8 x i8> [[SUB_I]] 182 uint8x8_t test_vsub_u8(uint8x8_t v1, uint8x8_t v2) { 183 return vsub_u8(v1, v2); 184 } 185 186 // CHECK-LABEL: @test_vsub_u16( 187 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, %v2 188 // CHECK: ret <4 x i16> [[SUB_I]] 189 uint16x4_t test_vsub_u16(uint16x4_t v1, uint16x4_t v2) { 190 return vsub_u16(v1, v2); 191 } 192 193 // CHECK-LABEL: @test_vsub_u32( 194 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, %v2 195 // CHECK: ret <2 x i32> [[SUB_I]] 196 uint32x2_t test_vsub_u32(uint32x2_t v1, uint32x2_t v2) { 197 return vsub_u32(v1, v2); 198 } 199 200 // CHECK-LABEL: @test_vsub_u64( 201 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> %v1, %v2 202 // CHECK: ret <1 x i64> [[SUB_I]] 203 uint64x1_t test_vsub_u64(uint64x1_t v1, uint64x1_t v2) { 204 return vsub_u64(v1, v2); 205 } 206 207 // CHECK-LABEL: @test_vsubq_s8( 208 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 209 // CHECK: ret <16 x i8> [[SUB_I]] 210 int8x16_t test_vsubq_s8(int8x16_t v1, int8x16_t v2) { 211 return vsubq_s8(v1, v2); 212 } 213 214 // CHECK-LABEL: @test_vsubq_s16( 215 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 216 // CHECK: ret <8 x i16> [[SUB_I]] 217 int16x8_t test_vsubq_s16(int16x8_t v1, int16x8_t v2) { 218 return vsubq_s16(v1, v2); 219 } 220 221 // CHECK-LABEL: @test_vsubq_s32( 222 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 223 // CHECK: ret <4 x i32> [[SUB_I]] 224 int32x4_t test_vsubq_s32(int32x4_t v1, int32x4_t v2) { 225 return vsubq_s32(v1, v2); 226 } 227 228 // CHECK-LABEL: @test_vsubq_s64( 229 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 230 // CHECK: ret <2 x i64> [[SUB_I]] 231 int64x2_t test_vsubq_s64(int64x2_t v1, int64x2_t v2) { 232 return vsubq_s64(v1, v2); 233 } 234 235 // CHECK-LABEL: @test_vsubq_f32( 236 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, %v2 237 // CHECK: ret <4 x float> [[SUB_I]] 238 float32x4_t test_vsubq_f32(float32x4_t v1, float32x4_t v2) { 239 return vsubq_f32(v1, v2); 240 } 241 242 // CHECK-LABEL: @test_vsubq_f64( 243 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, %v2 244 // CHECK: ret <2 x double> [[SUB_I]] 245 float64x2_t test_vsubq_f64(float64x2_t v1, float64x2_t v2) { 246 return vsubq_f64(v1, v2); 247 } 248 249 // CHECK-LABEL: @test_vsubq_u8( 250 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, %v2 251 // CHECK: ret <16 x i8> [[SUB_I]] 252 uint8x16_t test_vsubq_u8(uint8x16_t v1, uint8x16_t v2) { 253 return vsubq_u8(v1, v2); 254 } 255 256 // CHECK-LABEL: @test_vsubq_u16( 257 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, %v2 258 // CHECK: ret <8 x i16> [[SUB_I]] 259 uint16x8_t test_vsubq_u16(uint16x8_t v1, uint16x8_t v2) { 260 return vsubq_u16(v1, v2); 261 } 262 263 // CHECK-LABEL: @test_vsubq_u32( 264 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, %v2 265 // CHECK: ret <4 x i32> [[SUB_I]] 266 uint32x4_t test_vsubq_u32(uint32x4_t v1, uint32x4_t v2) { 267 return vsubq_u32(v1, v2); 268 } 269 270 // CHECK-LABEL: @test_vsubq_u64( 271 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %v1, %v2 272 // CHECK: ret <2 x i64> [[SUB_I]] 273 uint64x2_t test_vsubq_u64(uint64x2_t v1, uint64x2_t v2) { 274 return vsubq_u64(v1, v2); 275 } 276 277 // CHECK-LABEL: @test_vmul_s8( 278 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 279 // CHECK: ret <8 x i8> [[MUL_I]] 280 int8x8_t test_vmul_s8(int8x8_t v1, int8x8_t v2) { 281 return vmul_s8(v1, v2); 282 } 283 284 // CHECK-LABEL: @test_vmul_s16( 285 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 286 // CHECK: ret <4 x i16> [[MUL_I]] 287 int16x4_t test_vmul_s16(int16x4_t v1, int16x4_t v2) { 288 return vmul_s16(v1, v2); 289 } 290 291 // CHECK-LABEL: @test_vmul_s32( 292 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 293 // CHECK: ret <2 x i32> [[MUL_I]] 294 int32x2_t test_vmul_s32(int32x2_t v1, int32x2_t v2) { 295 return vmul_s32(v1, v2); 296 } 297 298 // CHECK-LABEL: @test_vmul_f32( 299 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v1, %v2 300 // CHECK: ret <2 x float> [[MUL_I]] 301 float32x2_t test_vmul_f32(float32x2_t v1, float32x2_t v2) { 302 return vmul_f32(v1, v2); 303 } 304 305 // CHECK-LABEL: @test_vmul_u8( 306 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v1, %v2 307 // CHECK: ret <8 x i8> [[MUL_I]] 308 uint8x8_t test_vmul_u8(uint8x8_t v1, uint8x8_t v2) { 309 return vmul_u8(v1, v2); 310 } 311 312 // CHECK-LABEL: @test_vmul_u16( 313 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v1, %v2 314 // CHECK: ret <4 x i16> [[MUL_I]] 315 uint16x4_t test_vmul_u16(uint16x4_t v1, uint16x4_t v2) { 316 return vmul_u16(v1, v2); 317 } 318 319 // CHECK-LABEL: @test_vmul_u32( 320 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v1, %v2 321 // CHECK: ret <2 x i32> [[MUL_I]] 322 uint32x2_t test_vmul_u32(uint32x2_t v1, uint32x2_t v2) { 323 return vmul_u32(v1, v2); 324 } 325 326 // CHECK-LABEL: @test_vmulq_s8( 327 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 328 // CHECK: ret <16 x i8> [[MUL_I]] 329 int8x16_t test_vmulq_s8(int8x16_t v1, int8x16_t v2) { 330 return vmulq_s8(v1, v2); 331 } 332 333 // CHECK-LABEL: @test_vmulq_s16( 334 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 335 // CHECK: ret <8 x i16> [[MUL_I]] 336 int16x8_t test_vmulq_s16(int16x8_t v1, int16x8_t v2) { 337 return vmulq_s16(v1, v2); 338 } 339 340 // CHECK-LABEL: @test_vmulq_s32( 341 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 342 // CHECK: ret <4 x i32> [[MUL_I]] 343 int32x4_t test_vmulq_s32(int32x4_t v1, int32x4_t v2) { 344 return vmulq_s32(v1, v2); 345 } 346 347 // CHECK-LABEL: @test_vmulq_u8( 348 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v1, %v2 349 // CHECK: ret <16 x i8> [[MUL_I]] 350 uint8x16_t test_vmulq_u8(uint8x16_t v1, uint8x16_t v2) { 351 return vmulq_u8(v1, v2); 352 } 353 354 // CHECK-LABEL: @test_vmulq_u16( 355 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v1, %v2 356 // CHECK: ret <8 x i16> [[MUL_I]] 357 uint16x8_t test_vmulq_u16(uint16x8_t v1, uint16x8_t v2) { 358 return vmulq_u16(v1, v2); 359 } 360 361 // CHECK-LABEL: @test_vmulq_u32( 362 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v1, %v2 363 // CHECK: ret <4 x i32> [[MUL_I]] 364 uint32x4_t test_vmulq_u32(uint32x4_t v1, uint32x4_t v2) { 365 return vmulq_u32(v1, v2); 366 } 367 368 // CHECK-LABEL: @test_vmulq_f32( 369 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v1, %v2 370 // CHECK: ret <4 x float> [[MUL_I]] 371 float32x4_t test_vmulq_f32(float32x4_t v1, float32x4_t v2) { 372 return vmulq_f32(v1, v2); 373 } 374 375 // CHECK-LABEL: @test_vmulq_f64( 376 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v1, %v2 377 // CHECK: ret <2 x double> [[MUL_I]] 378 float64x2_t test_vmulq_f64(float64x2_t v1, float64x2_t v2) { 379 return vmulq_f64(v1, v2); 380 } 381 382 // CHECK-LABEL: @test_vmul_p8( 383 // CHECK: [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %v1, <8 x i8> %v2) 384 // CHECK: ret <8 x i8> [[VMUL_V_I]] 385 poly8x8_t test_vmul_p8(poly8x8_t v1, poly8x8_t v2) { 386 return vmul_p8(v1, v2); 387 } 388 389 // CHECK-LABEL: @test_vmulq_p8( 390 // CHECK: [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %v1, <16 x i8> %v2) 391 // CHECK: ret <16 x i8> [[VMULQ_V_I]] 392 poly8x16_t test_vmulq_p8(poly8x16_t v1, poly8x16_t v2) { 393 return vmulq_p8(v1, v2); 394 } 395 396 // CHECK-LABEL: @test_vmla_s8( 397 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 398 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 399 // CHECK: ret <8 x i8> [[ADD_I]] 400 int8x8_t test_vmla_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 401 return vmla_s8(v1, v2, v3); 402 } 403 404 // CHECK-LABEL: @test_vmla_s16( 405 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 406 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 407 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[ADD_I]] to <8 x i8> 408 // CHECK: ret <8 x i8> [[TMP0]] 409 int8x8_t test_vmla_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 410 return (int8x8_t)vmla_s16(v1, v2, v3); 411 } 412 413 // CHECK-LABEL: @test_vmla_s32( 414 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 415 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 416 // CHECK: ret <2 x i32> [[ADD_I]] 417 int32x2_t test_vmla_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 418 return vmla_s32(v1, v2, v3); 419 } 420 421 // CHECK-LABEL: @test_vmla_f32( 422 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 423 // CHECK: [[ADD_I:%.*]] = fadd <2 x float> %v1, [[MUL_I]] 424 // CHECK: ret <2 x float> [[ADD_I]] 425 float32x2_t test_vmla_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 426 return vmla_f32(v1, v2, v3); 427 } 428 429 // CHECK-LABEL: @test_vmla_u8( 430 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 431 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[MUL_I]] 432 // CHECK: ret <8 x i8> [[ADD_I]] 433 uint8x8_t test_vmla_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 434 return vmla_u8(v1, v2, v3); 435 } 436 437 // CHECK-LABEL: @test_vmla_u16( 438 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 439 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[MUL_I]] 440 // CHECK: ret <4 x i16> [[ADD_I]] 441 uint16x4_t test_vmla_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 442 return vmla_u16(v1, v2, v3); 443 } 444 445 // CHECK-LABEL: @test_vmla_u32( 446 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 447 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[MUL_I]] 448 // CHECK: ret <2 x i32> [[ADD_I]] 449 uint32x2_t test_vmla_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 450 return vmla_u32(v1, v2, v3); 451 } 452 453 // CHECK-LABEL: @test_vmlaq_s8( 454 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 455 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 456 // CHECK: ret <16 x i8> [[ADD_I]] 457 int8x16_t test_vmlaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 458 return vmlaq_s8(v1, v2, v3); 459 } 460 461 // CHECK-LABEL: @test_vmlaq_s16( 462 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 463 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 464 // CHECK: ret <8 x i16> [[ADD_I]] 465 int16x8_t test_vmlaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 466 return vmlaq_s16(v1, v2, v3); 467 } 468 469 // CHECK-LABEL: @test_vmlaq_s32( 470 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 471 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 472 // CHECK: ret <4 x i32> [[ADD_I]] 473 int32x4_t test_vmlaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 474 return vmlaq_s32(v1, v2, v3); 475 } 476 477 // CHECK-LABEL: @test_vmlaq_f32( 478 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 479 // CHECK: [[ADD_I:%.*]] = fadd <4 x float> %v1, [[MUL_I]] 480 // CHECK: ret <4 x float> [[ADD_I]] 481 float32x4_t test_vmlaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 482 return vmlaq_f32(v1, v2, v3); 483 } 484 485 // CHECK-LABEL: @test_vmlaq_u8( 486 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 487 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[MUL_I]] 488 // CHECK: ret <16 x i8> [[ADD_I]] 489 uint8x16_t test_vmlaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 490 return vmlaq_u8(v1, v2, v3); 491 } 492 493 // CHECK-LABEL: @test_vmlaq_u16( 494 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 495 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[MUL_I]] 496 // CHECK: ret <8 x i16> [[ADD_I]] 497 uint16x8_t test_vmlaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 498 return vmlaq_u16(v1, v2, v3); 499 } 500 501 // CHECK-LABEL: @test_vmlaq_u32( 502 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 503 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[MUL_I]] 504 // CHECK: ret <4 x i32> [[ADD_I]] 505 uint32x4_t test_vmlaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 506 return vmlaq_u32(v1, v2, v3); 507 } 508 509 // CHECK-LABEL: @test_vmlaq_f64( 510 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 511 // CHECK: [[ADD_I:%.*]] = fadd <2 x double> %v1, [[MUL_I]] 512 // CHECK: ret <2 x double> [[ADD_I]] 513 float64x2_t test_vmlaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 514 return vmlaq_f64(v1, v2, v3); 515 } 516 517 // CHECK-LABEL: @test_vmls_s8( 518 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 519 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 520 // CHECK: ret <8 x i8> [[SUB_I]] 521 int8x8_t test_vmls_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 522 return vmls_s8(v1, v2, v3); 523 } 524 525 // CHECK-LABEL: @test_vmls_s16( 526 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 527 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 528 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SUB_I]] to <8 x i8> 529 // CHECK: ret <8 x i8> [[TMP0]] 530 int8x8_t test_vmls_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 531 return (int8x8_t)vmls_s16(v1, v2, v3); 532 } 533 534 // CHECK-LABEL: @test_vmls_s32( 535 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 536 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 537 // CHECK: ret <2 x i32> [[SUB_I]] 538 int32x2_t test_vmls_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 539 return vmls_s32(v1, v2, v3); 540 } 541 542 // CHECK-LABEL: @test_vmls_f32( 543 // CHECK: [[MUL_I:%.*]] = fmul <2 x float> %v2, %v3 544 // CHECK: [[SUB_I:%.*]] = fsub <2 x float> %v1, [[MUL_I]] 545 // CHECK: ret <2 x float> [[SUB_I]] 546 float32x2_t test_vmls_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 547 return vmls_f32(v1, v2, v3); 548 } 549 550 // CHECK-LABEL: @test_vmls_u8( 551 // CHECK: [[MUL_I:%.*]] = mul <8 x i8> %v2, %v3 552 // CHECK: [[SUB_I:%.*]] = sub <8 x i8> %v1, [[MUL_I]] 553 // CHECK: ret <8 x i8> [[SUB_I]] 554 uint8x8_t test_vmls_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 555 return vmls_u8(v1, v2, v3); 556 } 557 558 // CHECK-LABEL: @test_vmls_u16( 559 // CHECK: [[MUL_I:%.*]] = mul <4 x i16> %v2, %v3 560 // CHECK: [[SUB_I:%.*]] = sub <4 x i16> %v1, [[MUL_I]] 561 // CHECK: ret <4 x i16> [[SUB_I]] 562 uint16x4_t test_vmls_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 563 return vmls_u16(v1, v2, v3); 564 } 565 566 // CHECK-LABEL: @test_vmls_u32( 567 // CHECK: [[MUL_I:%.*]] = mul <2 x i32> %v2, %v3 568 // CHECK: [[SUB_I:%.*]] = sub <2 x i32> %v1, [[MUL_I]] 569 // CHECK: ret <2 x i32> [[SUB_I]] 570 uint32x2_t test_vmls_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 571 return vmls_u32(v1, v2, v3); 572 } 573 574 // CHECK-LABEL: @test_vmlsq_s8( 575 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 576 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 577 // CHECK: ret <16 x i8> [[SUB_I]] 578 int8x16_t test_vmlsq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 579 return vmlsq_s8(v1, v2, v3); 580 } 581 582 // CHECK-LABEL: @test_vmlsq_s16( 583 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 584 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 585 // CHECK: ret <8 x i16> [[SUB_I]] 586 int16x8_t test_vmlsq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 587 return vmlsq_s16(v1, v2, v3); 588 } 589 590 // CHECK-LABEL: @test_vmlsq_s32( 591 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 592 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 593 // CHECK: ret <4 x i32> [[SUB_I]] 594 int32x4_t test_vmlsq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 595 return vmlsq_s32(v1, v2, v3); 596 } 597 598 // CHECK-LABEL: @test_vmlsq_f32( 599 // CHECK: [[MUL_I:%.*]] = fmul <4 x float> %v2, %v3 600 // CHECK: [[SUB_I:%.*]] = fsub <4 x float> %v1, [[MUL_I]] 601 // CHECK: ret <4 x float> [[SUB_I]] 602 float32x4_t test_vmlsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 603 return vmlsq_f32(v1, v2, v3); 604 } 605 606 // CHECK-LABEL: @test_vmlsq_u8( 607 // CHECK: [[MUL_I:%.*]] = mul <16 x i8> %v2, %v3 608 // CHECK: [[SUB_I:%.*]] = sub <16 x i8> %v1, [[MUL_I]] 609 // CHECK: ret <16 x i8> [[SUB_I]] 610 uint8x16_t test_vmlsq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 611 return vmlsq_u8(v1, v2, v3); 612 } 613 614 // CHECK-LABEL: @test_vmlsq_u16( 615 // CHECK: [[MUL_I:%.*]] = mul <8 x i16> %v2, %v3 616 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %v1, [[MUL_I]] 617 // CHECK: ret <8 x i16> [[SUB_I]] 618 uint16x8_t test_vmlsq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 619 return vmlsq_u16(v1, v2, v3); 620 } 621 622 // CHECK-LABEL: @test_vmlsq_u32( 623 // CHECK: [[MUL_I:%.*]] = mul <4 x i32> %v2, %v3 624 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %v1, [[MUL_I]] 625 // CHECK: ret <4 x i32> [[SUB_I]] 626 uint32x4_t test_vmlsq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 627 return vmlsq_u32(v1, v2, v3); 628 } 629 630 // CHECK-LABEL: @test_vmlsq_f64( 631 // CHECK: [[MUL_I:%.*]] = fmul <2 x double> %v2, %v3 632 // CHECK: [[SUB_I:%.*]] = fsub <2 x double> %v1, [[MUL_I]] 633 // CHECK: ret <2 x double> [[SUB_I]] 634 float64x2_t test_vmlsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 635 return vmlsq_f64(v1, v2, v3); 636 } 637 638 // CHECK-LABEL: @test_vfma_f32( 639 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 640 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 641 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 642 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %v2, <2 x float> %v3, <2 x float> %v1) 643 // CHECK: ret <2 x float> [[TMP3]] 644 float32x2_t test_vfma_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 645 return vfma_f32(v1, v2, v3); 646 } 647 648 // CHECK-LABEL: @test_vfmaq_f32( 649 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 650 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 651 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 652 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %v2, <4 x float> %v3, <4 x float> %v1) 653 // CHECK: ret <4 x float> [[TMP3]] 654 float32x4_t test_vfmaq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 655 return vfmaq_f32(v1, v2, v3); 656 } 657 658 // CHECK-LABEL: @test_vfmaq_f64( 659 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 660 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 661 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 662 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %v2, <2 x double> %v3, <2 x double> %v1) 663 // CHECK: ret <2 x double> [[TMP3]] 664 float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 665 return vfmaq_f64(v1, v2, v3); 666 } 667 668 // CHECK-LABEL: @test_vfms_f32( 669 // CHECK: [[SUB_I:%.*]] = fneg <2 x float> %v2 670 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 671 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> 672 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 673 // CHECK: [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %v3, <2 x float> %v1) 674 // CHECK: ret <2 x float> [[TMP3]] 675 float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { 676 return vfms_f32(v1, v2, v3); 677 } 678 679 // CHECK-LABEL: @test_vfmsq_f32( 680 // CHECK: [[SUB_I:%.*]] = fneg <4 x float> %v2 681 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 682 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> 683 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 684 // CHECK: [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %v3, <4 x float> %v1) 685 // CHECK: ret <4 x float> [[TMP3]] 686 float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { 687 return vfmsq_f32(v1, v2, v3); 688 } 689 690 // CHECK-LABEL: @test_vfmsq_f64( 691 // CHECK: [[SUB_I:%.*]] = fneg <2 x double> %v2 692 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 693 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> 694 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 695 // CHECK: [[TMP3:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[SUB_I]], <2 x double> %v3, <2 x double> %v1) 696 // CHECK: ret <2 x double> [[TMP3]] 697 float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { 698 return vfmsq_f64(v1, v2, v3); 699 } 700 701 // CHECK-LABEL: @test_vdivq_f64( 702 // CHECK: [[DIV_I:%.*]] = fdiv <2 x double> %v1, %v2 703 // CHECK: ret <2 x double> [[DIV_I]] 704 float64x2_t test_vdivq_f64(float64x2_t v1, float64x2_t v2) { 705 return vdivq_f64(v1, v2); 706 } 707 708 // CHECK-LABEL: @test_vdivq_f32( 709 // CHECK: [[DIV_I:%.*]] = fdiv <4 x float> %v1, %v2 710 // CHECK: ret <4 x float> [[DIV_I]] 711 float32x4_t test_vdivq_f32(float32x4_t v1, float32x4_t v2) { 712 return vdivq_f32(v1, v2); 713 } 714 715 // CHECK-LABEL: @test_vdiv_f32( 716 // CHECK: [[DIV_I:%.*]] = fdiv <2 x float> %v1, %v2 717 // CHECK: ret <2 x float> [[DIV_I]] 718 float32x2_t test_vdiv_f32(float32x2_t v1, float32x2_t v2) { 719 return vdiv_f32(v1, v2); 720 } 721 722 // CHECK-LABEL: @test_vaba_s8( 723 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) 724 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 725 // CHECK: ret <8 x i8> [[ADD_I]] 726 int8x8_t test_vaba_s8(int8x8_t v1, int8x8_t v2, int8x8_t v3) { 727 return vaba_s8(v1, v2, v3); 728 } 729 730 // CHECK-LABEL: @test_vaba_s16( 731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 733 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) 734 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 735 // CHECK: ret <4 x i16> [[ADD_I]] 736 int16x4_t test_vaba_s16(int16x4_t v1, int16x4_t v2, int16x4_t v3) { 737 return vaba_s16(v1, v2, v3); 738 } 739 740 // CHECK-LABEL: @test_vaba_s32( 741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 743 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) 744 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 745 // CHECK: ret <2 x i32> [[ADD_I]] 746 int32x2_t test_vaba_s32(int32x2_t v1, int32x2_t v2, int32x2_t v3) { 747 return vaba_s32(v1, v2, v3); 748 } 749 750 // CHECK-LABEL: @test_vaba_u8( 751 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v2, <8 x i8> %v3) 752 // CHECK: [[ADD_I:%.*]] = add <8 x i8> %v1, [[VABD_I_I]] 753 // CHECK: ret <8 x i8> [[ADD_I]] 754 uint8x8_t test_vaba_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 755 return vaba_u8(v1, v2, v3); 756 } 757 758 // CHECK-LABEL: @test_vaba_u16( 759 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 760 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 761 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v2, <4 x i16> %v3) 762 // CHECK: [[ADD_I:%.*]] = add <4 x i16> %v1, [[VABD2_I_I]] 763 // CHECK: ret <4 x i16> [[ADD_I]] 764 uint16x4_t test_vaba_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 765 return vaba_u16(v1, v2, v3); 766 } 767 768 // CHECK-LABEL: @test_vaba_u32( 769 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 770 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 771 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v2, <2 x i32> %v3) 772 // CHECK: [[ADD_I:%.*]] = add <2 x i32> %v1, [[VABD2_I_I]] 773 // CHECK: ret <2 x i32> [[ADD_I]] 774 uint32x2_t test_vaba_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 775 return vaba_u32(v1, v2, v3); 776 } 777 778 // CHECK-LABEL: @test_vabaq_s8( 779 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) 780 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 781 // CHECK: ret <16 x i8> [[ADD_I]] 782 int8x16_t test_vabaq_s8(int8x16_t v1, int8x16_t v2, int8x16_t v3) { 783 return vabaq_s8(v1, v2, v3); 784 } 785 786 // CHECK-LABEL: @test_vabaq_s16( 787 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 789 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) 790 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 791 // CHECK: ret <8 x i16> [[ADD_I]] 792 int16x8_t test_vabaq_s16(int16x8_t v1, int16x8_t v2, int16x8_t v3) { 793 return vabaq_s16(v1, v2, v3); 794 } 795 796 // CHECK-LABEL: @test_vabaq_s32( 797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 799 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) 800 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 801 // CHECK: ret <4 x i32> [[ADD_I]] 802 int32x4_t test_vabaq_s32(int32x4_t v1, int32x4_t v2, int32x4_t v3) { 803 return vabaq_s32(v1, v2, v3); 804 } 805 806 // CHECK-LABEL: @test_vabaq_u8( 807 // CHECK: [[VABD_I_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v2, <16 x i8> %v3) 808 // CHECK: [[ADD_I:%.*]] = add <16 x i8> %v1, [[VABD_I_I]] 809 // CHECK: ret <16 x i8> [[ADD_I]] 810 uint8x16_t test_vabaq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 811 return vabaq_u8(v1, v2, v3); 812 } 813 814 // CHECK-LABEL: @test_vabaq_u16( 815 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 816 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 817 // CHECK: [[VABD2_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v2, <8 x i16> %v3) 818 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %v1, [[VABD2_I_I]] 819 // CHECK: ret <8 x i16> [[ADD_I]] 820 uint16x8_t test_vabaq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 821 return vabaq_u16(v1, v2, v3); 822 } 823 824 // CHECK-LABEL: @test_vabaq_u32( 825 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 826 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 827 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v2, <4 x i32> %v3) 828 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %v1, [[VABD2_I_I]] 829 // CHECK: ret <4 x i32> [[ADD_I]] 830 uint32x4_t test_vabaq_u32(uint32x4_t v1, uint32x4_t v2, uint32x4_t v3) { 831 return vabaq_u32(v1, v2, v3); 832 } 833 834 // CHECK-LABEL: @test_vabd_s8( 835 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 836 // CHECK: ret <8 x i8> [[VABD_I]] 837 int8x8_t test_vabd_s8(int8x8_t v1, int8x8_t v2) { 838 return vabd_s8(v1, v2); 839 } 840 841 // CHECK-LABEL: @test_vabd_s16( 842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 844 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 845 // CHECK: ret <4 x i16> [[VABD2_I]] 846 int16x4_t test_vabd_s16(int16x4_t v1, int16x4_t v2) { 847 return vabd_s16(v1, v2); 848 } 849 850 // CHECK-LABEL: @test_vabd_s32( 851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 852 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 853 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 854 // CHECK: ret <2 x i32> [[VABD2_I]] 855 int32x2_t test_vabd_s32(int32x2_t v1, int32x2_t v2) { 856 return vabd_s32(v1, v2); 857 } 858 859 // CHECK-LABEL: @test_vabd_u8( 860 // CHECK: [[VABD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 861 // CHECK: ret <8 x i8> [[VABD_I]] 862 uint8x8_t test_vabd_u8(uint8x8_t v1, uint8x8_t v2) { 863 return vabd_u8(v1, v2); 864 } 865 866 // CHECK-LABEL: @test_vabd_u16( 867 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 868 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 869 // CHECK: [[VABD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 870 // CHECK: ret <4 x i16> [[VABD2_I]] 871 uint16x4_t test_vabd_u16(uint16x4_t v1, uint16x4_t v2) { 872 return vabd_u16(v1, v2); 873 } 874 875 // CHECK-LABEL: @test_vabd_u32( 876 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 877 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 878 // CHECK: [[VABD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 879 // CHECK: ret <2 x i32> [[VABD2_I]] 880 uint32x2_t test_vabd_u32(uint32x2_t v1, uint32x2_t v2) { 881 return vabd_u32(v1, v2); 882 } 883 884 // CHECK-LABEL: @test_vabd_f32( 885 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 886 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 887 // CHECK: [[VABD2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fabd.v2f32(<2 x float> %v1, <2 x float> %v2) 888 // CHECK: ret <2 x float> [[VABD2_I]] 889 float32x2_t test_vabd_f32(float32x2_t v1, float32x2_t v2) { 890 return vabd_f32(v1, v2); 891 } 892 893 // CHECK-LABEL: @test_vabdq_s8( 894 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 895 // CHECK: ret <16 x i8> [[VABD_I]] 896 int8x16_t test_vabdq_s8(int8x16_t v1, int8x16_t v2) { 897 return vabdq_s8(v1, v2); 898 } 899 900 // CHECK-LABEL: @test_vabdq_s16( 901 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 902 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 903 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 904 // CHECK: ret <8 x i16> [[VABD2_I]] 905 int16x8_t test_vabdq_s16(int16x8_t v1, int16x8_t v2) { 906 return vabdq_s16(v1, v2); 907 } 908 909 // CHECK-LABEL: @test_vabdq_s32( 910 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 911 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 912 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 913 // CHECK: ret <4 x i32> [[VABD2_I]] 914 int32x4_t test_vabdq_s32(int32x4_t v1, int32x4_t v2) { 915 return vabdq_s32(v1, v2); 916 } 917 918 // CHECK-LABEL: @test_vabdq_u8( 919 // CHECK: [[VABD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 920 // CHECK: ret <16 x i8> [[VABD_I]] 921 uint8x16_t test_vabdq_u8(uint8x16_t v1, uint8x16_t v2) { 922 return vabdq_u8(v1, v2); 923 } 924 925 // CHECK-LABEL: @test_vabdq_u16( 926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 927 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 928 // CHECK: [[VABD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 929 // CHECK: ret <8 x i16> [[VABD2_I]] 930 uint16x8_t test_vabdq_u16(uint16x8_t v1, uint16x8_t v2) { 931 return vabdq_u16(v1, v2); 932 } 933 934 // CHECK-LABEL: @test_vabdq_u32( 935 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 936 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 937 // CHECK: [[VABD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 938 // CHECK: ret <4 x i32> [[VABD2_I]] 939 uint32x4_t test_vabdq_u32(uint32x4_t v1, uint32x4_t v2) { 940 return vabdq_u32(v1, v2); 941 } 942 943 // CHECK-LABEL: @test_vabdq_f32( 944 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 945 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 946 // CHECK: [[VABD2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fabd.v4f32(<4 x float> %v1, <4 x float> %v2) 947 // CHECK: ret <4 x float> [[VABD2_I]] 948 float32x4_t test_vabdq_f32(float32x4_t v1, float32x4_t v2) { 949 return vabdq_f32(v1, v2); 950 } 951 952 // CHECK-LABEL: @test_vabdq_f64( 953 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 954 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 955 // CHECK: [[VABD2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fabd.v2f64(<2 x double> %v1, <2 x double> %v2) 956 // CHECK: ret <2 x double> [[VABD2_I]] 957 float64x2_t test_vabdq_f64(float64x2_t v1, float64x2_t v2) { 958 return vabdq_f64(v1, v2); 959 } 960 961 // CHECK-LABEL: @test_vbsl_s8( 962 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 963 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) 964 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 965 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 966 // CHECK: ret <8 x i8> [[VBSL2_I]] 967 int8x8_t test_vbsl_s8(uint8x8_t v1, int8x8_t v2, int8x8_t v3) { 968 return vbsl_s8(v1, v2, v3); 969 } 970 971 // CHECK-LABEL: @test_vbsl_s16( 972 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 973 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 974 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 975 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 976 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) 977 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 978 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 979 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <8 x i8> 980 // CHECK: ret <8 x i8> [[TMP4]] 981 int8x8_t test_vbsl_s16(uint16x4_t v1, int16x4_t v2, int16x4_t v3) { 982 return (int8x8_t)vbsl_s16(v1, v2, v3); 983 } 984 985 // CHECK-LABEL: @test_vbsl_s32( 986 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 987 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 988 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 989 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 990 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) 991 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 992 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 993 // CHECK: ret <2 x i32> [[VBSL5_I]] 994 int32x2_t test_vbsl_s32(uint32x2_t v1, int32x2_t v2, int32x2_t v3) { 995 return vbsl_s32(v1, v2, v3); 996 } 997 998 // CHECK-LABEL: @test_vbsl_s64( 999 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1000 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1001 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1002 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 1003 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) 1004 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 1005 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1006 // CHECK: ret <1 x i64> [[VBSL5_I]] 1007 int64x1_t test_vbsl_s64(uint64x1_t v1, int64x1_t v2, int64x1_t v3) { 1008 return vbsl_s64(v1, v2, v3); 1009 } 1010 1011 // CHECK-LABEL: @test_vbsl_u8( 1012 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1013 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) 1014 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1015 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1016 // CHECK: ret <8 x i8> [[VBSL2_I]] 1017 uint8x8_t test_vbsl_u8(uint8x8_t v1, uint8x8_t v2, uint8x8_t v3) { 1018 return vbsl_u8(v1, v2, v3); 1019 } 1020 1021 // CHECK-LABEL: @test_vbsl_u16( 1022 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1023 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1024 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1025 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 1026 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) 1027 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 1028 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1029 // CHECK: ret <4 x i16> [[VBSL5_I]] 1030 uint16x4_t test_vbsl_u16(uint16x4_t v1, uint16x4_t v2, uint16x4_t v3) { 1031 return vbsl_u16(v1, v2, v3); 1032 } 1033 1034 // CHECK-LABEL: @test_vbsl_u32( 1035 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1036 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1037 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %v3 to <8 x i8> 1038 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, %v2 1039 // CHECK: [[TMP3:%.*]] = xor <2 x i32> %v1, splat (i32 -1) 1040 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP3]], %v3 1041 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1042 // CHECK: ret <2 x i32> [[VBSL5_I]] 1043 uint32x2_t test_vbsl_u32(uint32x2_t v1, uint32x2_t v2, uint32x2_t v3) { 1044 return vbsl_u32(v1, v2, v3); 1045 } 1046 1047 // CHECK-LABEL: @test_vbsl_u64( 1048 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1049 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %v2 to <8 x i8> 1050 // CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %v3 to <8 x i8> 1051 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, %v2 1052 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) 1053 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %v3 1054 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1055 // CHECK: ret <1 x i64> [[VBSL5_I]] 1056 uint64x1_t test_vbsl_u64(uint64x1_t v1, uint64x1_t v2, uint64x1_t v3) { 1057 return vbsl_u64(v1, v2, v3); 1058 } 1059 1060 // CHECK-LABEL: @test_vbsl_f32( 1061 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1062 // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1063 // CHECK: [[TMP3:%.*]] = bitcast <2 x float> %v3 to <8 x i8> 1064 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> 1065 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> 1066 // CHECK: [[VBSL3_I:%.*]] = and <2 x i32> %v1, [[VBSL1_I]] 1067 // CHECK: [[TMP4:%.*]] = xor <2 x i32> %v1, splat (i32 -1) 1068 // CHECK: [[VBSL4_I:%.*]] = and <2 x i32> [[TMP4]], [[VBSL2_I]] 1069 // CHECK: [[VBSL5_I:%.*]] = or <2 x i32> [[VBSL3_I]], [[VBSL4_I]] 1070 // CHECK: [[TMP5:%.*]] = bitcast <2 x i32> [[VBSL5_I]] to <2 x float> 1071 // CHECK: ret <2 x float> [[TMP5]] 1072 float32x2_t test_vbsl_f32(uint32x2_t v1, float32x2_t v2, float32x2_t v3) { 1073 return vbsl_f32(v1, v2, v3); 1074 } 1075 1076 // CHECK-LABEL: @test_vbsl_f64( 1077 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v1 to <8 x i8> 1078 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %v2 to <8 x i8> 1079 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v3 to <8 x i8> 1080 // CHECK: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 1081 // CHECK: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> 1082 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %v1, [[VBSL1_I]] 1083 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %v1, splat (i64 -1) 1084 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], [[VBSL2_I]] 1085 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]] 1086 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[VBSL5_I]] to <1 x double> 1087 // CHECK: ret <1 x double> [[TMP4]] 1088 float64x1_t test_vbsl_f64(uint64x1_t v1, float64x1_t v2, float64x1_t v3) { 1089 return vbsl_f64(v1, v2, v3); 1090 } 1091 1092 // CHECK-LABEL: @test_vbsl_p8( 1093 // CHECK: [[VBSL_I:%.*]] = and <8 x i8> %v1, %v2 1094 // CHECK: [[TMP0:%.*]] = xor <8 x i8> %v1, splat (i8 -1) 1095 // CHECK: [[VBSL1_I:%.*]] = and <8 x i8> [[TMP0]], %v3 1096 // CHECK: [[VBSL2_I:%.*]] = or <8 x i8> [[VBSL_I]], [[VBSL1_I]] 1097 // CHECK: ret <8 x i8> [[VBSL2_I]] 1098 poly8x8_t test_vbsl_p8(uint8x8_t v1, poly8x8_t v2, poly8x8_t v3) { 1099 return vbsl_p8(v1, v2, v3); 1100 } 1101 1102 // CHECK-LABEL: @test_vbsl_p16( 1103 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1104 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1105 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %v3 to <8 x i8> 1106 // CHECK: [[VBSL3_I:%.*]] = and <4 x i16> %v1, %v2 1107 // CHECK: [[TMP3:%.*]] = xor <4 x i16> %v1, splat (i16 -1) 1108 // CHECK: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], %v3 1109 // CHECK: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] 1110 // CHECK: ret <4 x i16> [[VBSL5_I]] 1111 poly16x4_t test_vbsl_p16(uint16x4_t v1, poly16x4_t v2, poly16x4_t v3) { 1112 return vbsl_p16(v1, v2, v3); 1113 } 1114 1115 // CHECK-LABEL: @test_vbslq_s8( 1116 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1117 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) 1118 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1119 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1120 // CHECK: ret <16 x i8> [[VBSL2_I]] 1121 int8x16_t test_vbslq_s8(uint8x16_t v1, int8x16_t v2, int8x16_t v3) { 1122 return vbslq_s8(v1, v2, v3); 1123 } 1124 1125 // CHECK-LABEL: @test_vbslq_s16( 1126 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1127 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1128 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1129 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 1130 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) 1131 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 1132 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1133 // CHECK: ret <8 x i16> [[VBSL5_I]] 1134 int16x8_t test_vbslq_s16(uint16x8_t v1, int16x8_t v2, int16x8_t v3) { 1135 return vbslq_s16(v1, v2, v3); 1136 } 1137 1138 // CHECK-LABEL: @test_vbslq_s32( 1139 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1140 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1141 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1142 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 1143 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) 1144 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 1145 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1146 // CHECK: ret <4 x i32> [[VBSL5_I]] 1147 int32x4_t test_vbslq_s32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1148 return vbslq_s32(v1, v2, v3); 1149 } 1150 1151 // CHECK-LABEL: @test_vbslq_s64( 1152 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1153 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1154 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1155 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 1156 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) 1157 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 1158 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1159 // CHECK: ret <2 x i64> [[VBSL5_I]] 1160 int64x2_t test_vbslq_s64(uint64x2_t v1, int64x2_t v2, int64x2_t v3) { 1161 return vbslq_s64(v1, v2, v3); 1162 } 1163 1164 // CHECK-LABEL: @test_vbslq_u8( 1165 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1166 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) 1167 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1168 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1169 // CHECK: ret <16 x i8> [[VBSL2_I]] 1170 uint8x16_t test_vbslq_u8(uint8x16_t v1, uint8x16_t v2, uint8x16_t v3) { 1171 return vbslq_u8(v1, v2, v3); 1172 } 1173 1174 // CHECK-LABEL: @test_vbslq_u16( 1175 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1176 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1177 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1178 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 1179 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) 1180 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 1181 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1182 // CHECK: ret <8 x i16> [[VBSL5_I]] 1183 uint16x8_t test_vbslq_u16(uint16x8_t v1, uint16x8_t v2, uint16x8_t v3) { 1184 return vbslq_u16(v1, v2, v3); 1185 } 1186 1187 // CHECK-LABEL: @test_vbslq_u32( 1188 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1189 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1190 // CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %v3 to <16 x i8> 1191 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, %v2 1192 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) 1193 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], %v3 1194 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1195 // CHECK: ret <4 x i32> [[VBSL5_I]] 1196 int32x4_t test_vbslq_u32(uint32x4_t v1, int32x4_t v2, int32x4_t v3) { 1197 return vbslq_s32(v1, v2, v3); 1198 } 1199 1200 // CHECK-LABEL: @test_vbslq_u64( 1201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1202 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1203 // CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %v3 to <16 x i8> 1204 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, %v2 1205 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) 1206 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %v3 1207 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1208 // CHECK: ret <2 x i64> [[VBSL5_I]] 1209 uint64x2_t test_vbslq_u64(uint64x2_t v1, uint64x2_t v2, uint64x2_t v3) { 1210 return vbslq_u64(v1, v2, v3); 1211 } 1212 1213 // CHECK-LABEL: @test_vbslq_f32( 1214 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1215 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1216 // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> 1217 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 1218 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> 1219 // CHECK: [[VBSL3_I:%.*]] = and <4 x i32> %v1, [[VBSL1_I]] 1220 // CHECK: [[TMP3:%.*]] = xor <4 x i32> %v1, splat (i32 -1) 1221 // CHECK: [[VBSL4_I:%.*]] = and <4 x i32> [[TMP3]], [[VBSL2_I]] 1222 // CHECK: [[VBSL5_I:%.*]] = or <4 x i32> [[VBSL3_I]], [[VBSL4_I]] 1223 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[VBSL5_I]] to <4 x float> 1224 // CHECK: ret <4 x float> [[TMP4]] 1225 float32x4_t test_vbslq_f32(uint32x4_t v1, float32x4_t v2, float32x4_t v3) { 1226 return vbslq_f32(v1, v2, v3); 1227 } 1228 1229 // CHECK-LABEL: @test_vbslq_p8( 1230 // CHECK: [[VBSL_I:%.*]] = and <16 x i8> %v1, %v2 1231 // CHECK: [[TMP0:%.*]] = xor <16 x i8> %v1, splat (i8 -1) 1232 // CHECK: [[VBSL1_I:%.*]] = and <16 x i8> [[TMP0]], %v3 1233 // CHECK: [[VBSL2_I:%.*]] = or <16 x i8> [[VBSL_I]], [[VBSL1_I]] 1234 // CHECK: ret <16 x i8> [[VBSL2_I]] 1235 poly8x16_t test_vbslq_p8(uint8x16_t v1, poly8x16_t v2, poly8x16_t v3) { 1236 return vbslq_p8(v1, v2, v3); 1237 } 1238 1239 // CHECK-LABEL: @test_vbslq_p16( 1240 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1241 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1242 // CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %v3 to <16 x i8> 1243 // CHECK: [[VBSL3_I:%.*]] = and <8 x i16> %v1, %v2 1244 // CHECK: [[TMP3:%.*]] = xor <8 x i16> %v1, splat (i16 -1) 1245 // CHECK: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], %v3 1246 // CHECK: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] 1247 // CHECK: ret <8 x i16> [[VBSL5_I]] 1248 poly16x8_t test_vbslq_p16(uint16x8_t v1, poly16x8_t v2, poly16x8_t v3) { 1249 return vbslq_p16(v1, v2, v3); 1250 } 1251 1252 // CHECK-LABEL: @test_vbslq_f64( 1253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1254 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1255 // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> 1256 // CHECK: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 1257 // CHECK: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> 1258 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %v1, [[VBSL1_I]] 1259 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %v1, splat (i64 -1) 1260 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], [[VBSL2_I]] 1261 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]] 1262 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[VBSL5_I]] to <2 x double> 1263 // CHECK: ret <2 x double> [[TMP4]] 1264 float64x2_t test_vbslq_f64(uint64x2_t v1, float64x2_t v2, float64x2_t v3) { 1265 return vbslq_f64(v1, v2, v3); 1266 } 1267 1268 // CHECK-LABEL: @test_vrecps_f32( 1269 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1270 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1271 // CHECK: [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frecps.v2f32(<2 x float> %v1, <2 x float> %v2) 1272 // CHECK: ret <2 x float> [[VRECPS_V2_I]] 1273 float32x2_t test_vrecps_f32(float32x2_t v1, float32x2_t v2) { 1274 return vrecps_f32(v1, v2); 1275 } 1276 1277 // CHECK-LABEL: @test_vrecpsq_f32( 1278 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1279 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1280 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frecps.v4f32(<4 x float> %v1, <4 x float> %v2) 1281 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8> 1282 // CHECK: ret <4 x float> [[VRECPSQ_V2_I]] 1283 float32x4_t test_vrecpsq_f32(float32x4_t v1, float32x4_t v2) { 1284 return vrecpsq_f32(v1, v2); 1285 } 1286 1287 // CHECK-LABEL: @test_vrecpsq_f64( 1288 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1289 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1290 // CHECK: [[VRECPSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frecps.v2f64(<2 x double> %v1, <2 x double> %v2) 1291 // CHECK: [[VRECPSQ_V3_I:%.*]] = bitcast <2 x double> [[VRECPSQ_V2_I]] to <16 x i8> 1292 // CHECK: ret <2 x double> [[VRECPSQ_V2_I]] 1293 float64x2_t test_vrecpsq_f64(float64x2_t v1, float64x2_t v2) { 1294 return vrecpsq_f64(v1, v2); 1295 } 1296 1297 // CHECK-LABEL: @test_vrsqrts_f32( 1298 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1299 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1300 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.frsqrts.v2f32(<2 x float> %v1, <2 x float> %v2) 1301 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8> 1302 // CHECK: ret <2 x float> [[VRSQRTS_V2_I]] 1303 float32x2_t test_vrsqrts_f32(float32x2_t v1, float32x2_t v2) { 1304 return vrsqrts_f32(v1, v2); 1305 } 1306 1307 // CHECK-LABEL: @test_vrsqrtsq_f32( 1308 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1309 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1310 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.frsqrts.v4f32(<4 x float> %v1, <4 x float> %v2) 1311 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8> 1312 // CHECK: ret <4 x float> [[VRSQRTSQ_V2_I]] 1313 float32x4_t test_vrsqrtsq_f32(float32x4_t v1, float32x4_t v2) { 1314 return vrsqrtsq_f32(v1, v2); 1315 } 1316 1317 // CHECK-LABEL: @test_vrsqrtsq_f64( 1318 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1319 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1320 // CHECK: [[VRSQRTSQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.frsqrts.v2f64(<2 x double> %v1, <2 x double> %v2) 1321 // CHECK: [[VRSQRTSQ_V3_I:%.*]] = bitcast <2 x double> [[VRSQRTSQ_V2_I]] to <16 x i8> 1322 // CHECK: ret <2 x double> [[VRSQRTSQ_V2_I]] 1323 float64x2_t test_vrsqrtsq_f64(float64x2_t v1, float64x2_t v2) { 1324 return vrsqrtsq_f64(v1, v2); 1325 } 1326 1327 // CHECK-LABEL: @test_vcage_f32( 1328 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1329 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1330 // CHECK: [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) 1331 // CHECK: ret <2 x i32> [[VCAGE_V2_I]] 1332 uint32x2_t test_vcage_f32(float32x2_t v1, float32x2_t v2) { 1333 return vcage_f32(v1, v2); 1334 } 1335 1336 // CHECK-LABEL: @test_vcage_f64( 1337 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1338 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1339 // CHECK: [[VCAGE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %a, <1 x double> %b) 1340 // CHECK: ret <1 x i64> [[VCAGE_V2_I]] 1341 uint64x1_t test_vcage_f64(float64x1_t a, float64x1_t b) { 1342 return vcage_f64(a, b); 1343 } 1344 1345 // CHECK-LABEL: @test_vcageq_f32( 1346 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1347 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1348 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) 1349 // CHECK: ret <4 x i32> [[VCAGEQ_V2_I]] 1350 uint32x4_t test_vcageq_f32(float32x4_t v1, float32x4_t v2) { 1351 return vcageq_f32(v1, v2); 1352 } 1353 1354 // CHECK-LABEL: @test_vcageq_f64( 1355 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1356 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1357 // CHECK: [[VCAGEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) 1358 // CHECK: ret <2 x i64> [[VCAGEQ_V2_I]] 1359 uint64x2_t test_vcageq_f64(float64x2_t v1, float64x2_t v2) { 1360 return vcageq_f64(v1, v2); 1361 } 1362 1363 // CHECK-LABEL: @test_vcagt_f32( 1364 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1365 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1366 // CHECK: [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v1, <2 x float> %v2) 1367 // CHECK: ret <2 x i32> [[VCAGT_V2_I]] 1368 uint32x2_t test_vcagt_f32(float32x2_t v1, float32x2_t v2) { 1369 return vcagt_f32(v1, v2); 1370 } 1371 1372 // CHECK-LABEL: @test_vcagt_f64( 1373 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1374 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1375 // CHECK: [[VCAGT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %a, <1 x double> %b) 1376 // CHECK: ret <1 x i64> [[VCAGT_V2_I]] 1377 uint64x1_t test_vcagt_f64(float64x1_t a, float64x1_t b) { 1378 return vcagt_f64(a, b); 1379 } 1380 1381 // CHECK-LABEL: @test_vcagtq_f32( 1382 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1383 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1384 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v1, <4 x float> %v2) 1385 // CHECK: ret <4 x i32> [[VCAGTQ_V2_I]] 1386 uint32x4_t test_vcagtq_f32(float32x4_t v1, float32x4_t v2) { 1387 return vcagtq_f32(v1, v2); 1388 } 1389 1390 // CHECK-LABEL: @test_vcagtq_f64( 1391 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1392 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1393 // CHECK: [[VCAGTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v1, <2 x double> %v2) 1394 // CHECK: ret <2 x i64> [[VCAGTQ_V2_I]] 1395 uint64x2_t test_vcagtq_f64(float64x2_t v1, float64x2_t v2) { 1396 return vcagtq_f64(v1, v2); 1397 } 1398 1399 // CHECK-LABEL: @test_vcale_f32( 1400 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1401 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1402 // CHECK: [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facge.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) 1403 // CHECK: ret <2 x i32> [[VCALE_V2_I]] 1404 uint32x2_t test_vcale_f32(float32x2_t v1, float32x2_t v2) { 1405 return vcale_f32(v1, v2); 1406 // Using registers other than v0, v1 are possible, but would be odd. 1407 } 1408 1409 // CHECK-LABEL: @test_vcale_f64( 1410 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1411 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1412 // CHECK: [[VCALE_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facge.v1i64.v1f64(<1 x double> %b, <1 x double> %a) 1413 // CHECK: ret <1 x i64> [[VCALE_V2_I]] 1414 uint64x1_t test_vcale_f64(float64x1_t a, float64x1_t b) { 1415 return vcale_f64(a, b); 1416 } 1417 1418 // CHECK-LABEL: @test_vcaleq_f32( 1419 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1420 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1421 // CHECK: [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facge.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) 1422 // CHECK: ret <4 x i32> [[VCALEQ_V2_I]] 1423 uint32x4_t test_vcaleq_f32(float32x4_t v1, float32x4_t v2) { 1424 return vcaleq_f32(v1, v2); 1425 // Using registers other than v0, v1 are possible, but would be odd. 1426 } 1427 1428 // CHECK-LABEL: @test_vcaleq_f64( 1429 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1430 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1431 // CHECK: [[VCALEQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facge.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) 1432 // CHECK: ret <2 x i64> [[VCALEQ_V2_I]] 1433 uint64x2_t test_vcaleq_f64(float64x2_t v1, float64x2_t v2) { 1434 return vcaleq_f64(v1, v2); 1435 // Using registers other than v0, v1 are possible, but would be odd. 1436 } 1437 1438 // CHECK-LABEL: @test_vcalt_f32( 1439 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> 1440 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> 1441 // CHECK: [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.facgt.v2i32.v2f32(<2 x float> %v2, <2 x float> %v1) 1442 // CHECK: ret <2 x i32> [[VCALT_V2_I]] 1443 uint32x2_t test_vcalt_f32(float32x2_t v1, float32x2_t v2) { 1444 return vcalt_f32(v1, v2); 1445 // Using registers other than v0, v1 are possible, but would be odd. 1446 } 1447 1448 // CHECK-LABEL: @test_vcalt_f64( 1449 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 1450 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 1451 // CHECK: [[VCALT_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.facgt.v1i64.v1f64(<1 x double> %b, <1 x double> %a) 1452 // CHECK: ret <1 x i64> [[VCALT_V2_I]] 1453 uint64x1_t test_vcalt_f64(float64x1_t a, float64x1_t b) { 1454 return vcalt_f64(a, b); 1455 } 1456 1457 // CHECK-LABEL: @test_vcaltq_f32( 1458 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> 1459 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> 1460 // CHECK: [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.facgt.v4i32.v4f32(<4 x float> %v2, <4 x float> %v1) 1461 // CHECK: ret <4 x i32> [[VCALTQ_V2_I]] 1462 uint32x4_t test_vcaltq_f32(float32x4_t v1, float32x4_t v2) { 1463 return vcaltq_f32(v1, v2); 1464 // Using registers other than v0, v1 are possible, but would be odd. 1465 } 1466 1467 // CHECK-LABEL: @test_vcaltq_f64( 1468 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> 1469 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> 1470 // CHECK: [[VCALTQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.facgt.v2i64.v2f64(<2 x double> %v2, <2 x double> %v1) 1471 // CHECK: ret <2 x i64> [[VCALTQ_V2_I]] 1472 uint64x2_t test_vcaltq_f64(float64x2_t v1, float64x2_t v2) { 1473 return vcaltq_f64(v1, v2); 1474 // Using registers other than v0, v1 are possible, but would be odd. 1475 } 1476 1477 // CHECK-LABEL: @test_vtst_s8( 1478 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1479 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1480 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1481 // CHECK: ret <8 x i8> [[VTST_I]] 1482 uint8x8_t test_vtst_s8(int8x8_t v1, int8x8_t v2) { 1483 return vtst_s8(v1, v2); 1484 } 1485 1486 // CHECK-LABEL: @test_vtst_s16( 1487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1488 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1489 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 1490 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer 1491 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> 1492 // CHECK: ret <4 x i16> [[VTST_I]] 1493 uint16x4_t test_vtst_s16(int16x4_t v1, int16x4_t v2) { 1494 return vtst_s16(v1, v2); 1495 } 1496 1497 // CHECK-LABEL: @test_vtst_s32( 1498 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1499 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1500 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 1501 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer 1502 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> 1503 // CHECK: ret <2 x i32> [[VTST_I]] 1504 uint32x2_t test_vtst_s32(int32x2_t v1, int32x2_t v2) { 1505 return vtst_s32(v1, v2); 1506 } 1507 1508 // CHECK-LABEL: @test_vtst_u8( 1509 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1510 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1511 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1512 // CHECK: ret <8 x i8> [[VTST_I]] 1513 uint8x8_t test_vtst_u8(uint8x8_t v1, uint8x8_t v2) { 1514 return vtst_u8(v1, v2); 1515 } 1516 1517 // CHECK-LABEL: @test_vtst_u16( 1518 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1519 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1520 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 1521 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer 1522 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> 1523 // CHECK: ret <4 x i16> [[VTST_I]] 1524 uint16x4_t test_vtst_u16(uint16x4_t v1, uint16x4_t v2) { 1525 return vtst_u16(v1, v2); 1526 } 1527 1528 // CHECK-LABEL: @test_vtst_u32( 1529 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 1530 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 1531 // CHECK: [[TMP2:%.*]] = and <2 x i32> %v1, %v2 1532 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer 1533 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32> 1534 // CHECK: ret <2 x i32> [[VTST_I]] 1535 uint32x2_t test_vtst_u32(uint32x2_t v1, uint32x2_t v2) { 1536 return vtst_u32(v1, v2); 1537 } 1538 1539 // CHECK-LABEL: @test_vtstq_s8( 1540 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1541 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1542 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1543 // CHECK: ret <16 x i8> [[VTST_I]] 1544 uint8x16_t test_vtstq_s8(int8x16_t v1, int8x16_t v2) { 1545 return vtstq_s8(v1, v2); 1546 } 1547 1548 // CHECK-LABEL: @test_vtstq_s16( 1549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1550 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1551 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 1552 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer 1553 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> 1554 // CHECK: ret <8 x i16> [[VTST_I]] 1555 uint16x8_t test_vtstq_s16(int16x8_t v1, int16x8_t v2) { 1556 return vtstq_s16(v1, v2); 1557 } 1558 1559 // CHECK-LABEL: @test_vtstq_s32( 1560 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1561 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1562 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 1563 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer 1564 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 1565 // CHECK: ret <4 x i32> [[VTST_I]] 1566 uint32x4_t test_vtstq_s32(int32x4_t v1, int32x4_t v2) { 1567 return vtstq_s32(v1, v2); 1568 } 1569 1570 // CHECK-LABEL: @test_vtstq_u8( 1571 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1572 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1573 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1574 // CHECK: ret <16 x i8> [[VTST_I]] 1575 uint8x16_t test_vtstq_u8(uint8x16_t v1, uint8x16_t v2) { 1576 return vtstq_u8(v1, v2); 1577 } 1578 1579 // CHECK-LABEL: @test_vtstq_u16( 1580 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1581 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1582 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 1583 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer 1584 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> 1585 // CHECK: ret <8 x i16> [[VTST_I]] 1586 uint16x8_t test_vtstq_u16(uint16x8_t v1, uint16x8_t v2) { 1587 return vtstq_u16(v1, v2); 1588 } 1589 1590 // CHECK-LABEL: @test_vtstq_u32( 1591 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 1592 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 1593 // CHECK: [[TMP2:%.*]] = and <4 x i32> %v1, %v2 1594 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer 1595 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32> 1596 // CHECK: ret <4 x i32> [[VTST_I]] 1597 uint32x4_t test_vtstq_u32(uint32x4_t v1, uint32x4_t v2) { 1598 return vtstq_u32(v1, v2); 1599 } 1600 1601 // CHECK-LABEL: @test_vtstq_s64( 1602 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1603 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1604 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 1605 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer 1606 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> 1607 // CHECK: ret <2 x i64> [[VTST_I]] 1608 uint64x2_t test_vtstq_s64(int64x2_t v1, int64x2_t v2) { 1609 return vtstq_s64(v1, v2); 1610 } 1611 1612 // CHECK-LABEL: @test_vtstq_u64( 1613 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v1 to <16 x i8> 1614 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %v2 to <16 x i8> 1615 // CHECK: [[TMP2:%.*]] = and <2 x i64> %v1, %v2 1616 // CHECK: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP2]], zeroinitializer 1617 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i64> 1618 // CHECK: ret <2 x i64> [[VTST_I]] 1619 uint64x2_t test_vtstq_u64(uint64x2_t v1, uint64x2_t v2) { 1620 return vtstq_u64(v1, v2); 1621 } 1622 1623 // CHECK-LABEL: @test_vtst_p8( 1624 // CHECK: [[TMP0:%.*]] = and <8 x i8> %v1, %v2 1625 // CHECK: [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer 1626 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8> 1627 // CHECK: ret <8 x i8> [[VTST_I]] 1628 uint8x8_t test_vtst_p8(poly8x8_t v1, poly8x8_t v2) { 1629 return vtst_p8(v1, v2); 1630 } 1631 1632 // CHECK-LABEL: @test_vtst_p16( 1633 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 1634 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 1635 // CHECK: [[TMP2:%.*]] = and <4 x i16> %v1, %v2 1636 // CHECK: [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer 1637 // CHECK: [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16> 1638 // CHECK: ret <4 x i16> [[VTST_I]] 1639 uint16x4_t test_vtst_p16(poly16x4_t v1, poly16x4_t v2) { 1640 return vtst_p16(v1, v2); 1641 } 1642 1643 // CHECK-LABEL: @test_vtstq_p8( 1644 // CHECK: [[TMP0:%.*]] = and <16 x i8> %v1, %v2 1645 // CHECK: [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer 1646 // CHECK: [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8> 1647 // CHECK: ret <16 x i8> [[VTST_I]] 1648 uint8x16_t test_vtstq_p8(poly8x16_t v1, poly8x16_t v2) { 1649 return vtstq_p8(v1, v2); 1650 } 1651 1652 // CHECK-LABEL: @test_vtstq_p16( 1653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 1654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 1655 // CHECK: [[TMP2:%.*]] = and <8 x i16> %v1, %v2 1656 // CHECK: [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer 1657 // CHECK: [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16> 1658 // CHECK: ret <8 x i16> [[VTST_I]] 1659 uint16x8_t test_vtstq_p16(poly16x8_t v1, poly16x8_t v2) { 1660 return vtstq_p16(v1, v2); 1661 } 1662 1663 // CHECK-LABEL: @test_vtst_s64( 1664 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1665 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1666 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b 1667 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer 1668 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> 1669 // CHECK: ret <1 x i64> [[VTST_I]] 1670 uint64x1_t test_vtst_s64(int64x1_t a, int64x1_t b) { 1671 return vtst_s64(a, b); 1672 } 1673 1674 // CHECK-LABEL: @test_vtst_u64( 1675 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 1676 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 1677 // CHECK: [[TMP2:%.*]] = and <1 x i64> %a, %b 1678 // CHECK: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer 1679 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64> 1680 // CHECK: ret <1 x i64> [[VTST_I]] 1681 uint64x1_t test_vtst_u64(uint64x1_t a, uint64x1_t b) { 1682 return vtst_u64(a, b); 1683 } 1684 1685 // CHECK-LABEL: @test_vceq_s8( 1686 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1687 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1688 // CHECK: ret <8 x i8> [[SEXT_I]] 1689 uint8x8_t test_vceq_s8(int8x8_t v1, int8x8_t v2) { 1690 return vceq_s8(v1, v2); 1691 } 1692 1693 // CHECK-LABEL: @test_vceq_s16( 1694 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1695 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1696 // CHECK: ret <4 x i16> [[SEXT_I]] 1697 uint16x4_t test_vceq_s16(int16x4_t v1, int16x4_t v2) { 1698 return vceq_s16(v1, v2); 1699 } 1700 1701 // CHECK-LABEL: @test_vceq_s32( 1702 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1703 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1704 // CHECK: ret <2 x i32> [[SEXT_I]] 1705 uint32x2_t test_vceq_s32(int32x2_t v1, int32x2_t v2) { 1706 return vceq_s32(v1, v2); 1707 } 1708 1709 // CHECK-LABEL: @test_vceq_s64( 1710 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1711 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1712 // CHECK: ret <1 x i64> [[SEXT_I]] 1713 uint64x1_t test_vceq_s64(int64x1_t a, int64x1_t b) { 1714 return vceq_s64(a, b); 1715 } 1716 1717 // CHECK-LABEL: @test_vceq_u64( 1718 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b 1719 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1720 // CHECK: ret <1 x i64> [[SEXT_I]] 1721 uint64x1_t test_vceq_u64(uint64x1_t a, uint64x1_t b) { 1722 return vceq_u64(a, b); 1723 } 1724 1725 // CHECK-LABEL: @test_vceq_f32( 1726 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x float> %v1, %v2 1727 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1728 // CHECK: ret <2 x i32> [[SEXT_I]] 1729 uint32x2_t test_vceq_f32(float32x2_t v1, float32x2_t v2) { 1730 return vceq_f32(v1, v2); 1731 } 1732 1733 // CHECK-LABEL: @test_vceq_f64( 1734 // CHECK: [[CMP_I:%.*]] = fcmp oeq <1 x double> %a, %b 1735 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1736 // CHECK: ret <1 x i64> [[SEXT_I]] 1737 uint64x1_t test_vceq_f64(float64x1_t a, float64x1_t b) { 1738 return vceq_f64(a, b); 1739 } 1740 1741 // CHECK-LABEL: @test_vceq_u8( 1742 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1743 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1744 // CHECK: ret <8 x i8> [[SEXT_I]] 1745 uint8x8_t test_vceq_u8(uint8x8_t v1, uint8x8_t v2) { 1746 return vceq_u8(v1, v2); 1747 } 1748 1749 // CHECK-LABEL: @test_vceq_u16( 1750 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i16> %v1, %v2 1751 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1752 // CHECK: ret <4 x i16> [[SEXT_I]] 1753 uint16x4_t test_vceq_u16(uint16x4_t v1, uint16x4_t v2) { 1754 return vceq_u16(v1, v2); 1755 } 1756 1757 // CHECK-LABEL: @test_vceq_u32( 1758 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i32> %v1, %v2 1759 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1760 // CHECK: ret <2 x i32> [[SEXT_I]] 1761 uint32x2_t test_vceq_u32(uint32x2_t v1, uint32x2_t v2) { 1762 return vceq_u32(v1, v2); 1763 } 1764 1765 // CHECK-LABEL: @test_vceq_p8( 1766 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i8> %v1, %v2 1767 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1768 // CHECK: ret <8 x i8> [[SEXT_I]] 1769 uint8x8_t test_vceq_p8(poly8x8_t v1, poly8x8_t v2) { 1770 return vceq_p8(v1, v2); 1771 } 1772 1773 // CHECK-LABEL: @test_vceqq_s8( 1774 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1775 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1776 // CHECK: ret <16 x i8> [[SEXT_I]] 1777 uint8x16_t test_vceqq_s8(int8x16_t v1, int8x16_t v2) { 1778 return vceqq_s8(v1, v2); 1779 } 1780 1781 // CHECK-LABEL: @test_vceqq_s16( 1782 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1783 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1784 // CHECK: ret <8 x i16> [[SEXT_I]] 1785 uint16x8_t test_vceqq_s16(int16x8_t v1, int16x8_t v2) { 1786 return vceqq_s16(v1, v2); 1787 } 1788 1789 // CHECK-LABEL: @test_vceqq_s32( 1790 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 1791 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1792 // CHECK: ret <4 x i32> [[SEXT_I]] 1793 uint32x4_t test_vceqq_s32(int32x4_t v1, int32x4_t v2) { 1794 return vceqq_s32(v1, v2); 1795 } 1796 1797 // CHECK-LABEL: @test_vceqq_f32( 1798 // CHECK: [[CMP_I:%.*]] = fcmp oeq <4 x float> %v1, %v2 1799 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1800 // CHECK: ret <4 x i32> [[SEXT_I]] 1801 uint32x4_t test_vceqq_f32(float32x4_t v1, float32x4_t v2) { 1802 return vceqq_f32(v1, v2); 1803 } 1804 1805 // CHECK-LABEL: @test_vceqq_u8( 1806 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1807 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1808 // CHECK: ret <16 x i8> [[SEXT_I]] 1809 uint8x16_t test_vceqq_u8(uint8x16_t v1, uint8x16_t v2) { 1810 return vceqq_u8(v1, v2); 1811 } 1812 1813 // CHECK-LABEL: @test_vceqq_u16( 1814 // CHECK: [[CMP_I:%.*]] = icmp eq <8 x i16> %v1, %v2 1815 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1816 // CHECK: ret <8 x i16> [[SEXT_I]] 1817 uint16x8_t test_vceqq_u16(uint16x8_t v1, uint16x8_t v2) { 1818 return vceqq_u16(v1, v2); 1819 } 1820 1821 // CHECK-LABEL: @test_vceqq_u32( 1822 // CHECK: [[CMP_I:%.*]] = icmp eq <4 x i32> %v1, %v2 1823 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1824 // CHECK: ret <4 x i32> [[SEXT_I]] 1825 uint32x4_t test_vceqq_u32(uint32x4_t v1, uint32x4_t v2) { 1826 return vceqq_u32(v1, v2); 1827 } 1828 1829 // CHECK-LABEL: @test_vceqq_p8( 1830 // CHECK: [[CMP_I:%.*]] = icmp eq <16 x i8> %v1, %v2 1831 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1832 // CHECK: ret <16 x i8> [[SEXT_I]] 1833 uint8x16_t test_vceqq_p8(poly8x16_t v1, poly8x16_t v2) { 1834 return vceqq_p8(v1, v2); 1835 } 1836 1837 // CHECK-LABEL: @test_vceqq_s64( 1838 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 1839 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 1840 // CHECK: ret <2 x i64> [[SEXT_I]] 1841 uint64x2_t test_vceqq_s64(int64x2_t v1, int64x2_t v2) { 1842 return vceqq_s64(v1, v2); 1843 } 1844 1845 // CHECK-LABEL: @test_vceqq_u64( 1846 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %v1, %v2 1847 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 1848 // CHECK: ret <2 x i64> [[SEXT_I]] 1849 uint64x2_t test_vceqq_u64(uint64x2_t v1, uint64x2_t v2) { 1850 return vceqq_u64(v1, v2); 1851 } 1852 1853 // CHECK-LABEL: @test_vceqq_f64( 1854 // CHECK: [[CMP_I:%.*]] = fcmp oeq <2 x double> %v1, %v2 1855 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 1856 // CHECK: ret <2 x i64> [[SEXT_I]] 1857 uint64x2_t test_vceqq_f64(float64x2_t v1, float64x2_t v2) { 1858 return vceqq_f64(v1, v2); 1859 } 1860 1861 // CHECK-LABEL: @test_vcge_s8( 1862 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i8> %v1, %v2 1863 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1864 // CHECK: ret <8 x i8> [[SEXT_I]] 1865 uint8x8_t test_vcge_s8(int8x8_t v1, int8x8_t v2) { 1866 return vcge_s8(v1, v2); 1867 } 1868 1869 // CHECK-LABEL: @test_vcge_s16( 1870 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i16> %v1, %v2 1871 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1872 // CHECK: ret <4 x i16> [[SEXT_I]] 1873 uint16x4_t test_vcge_s16(int16x4_t v1, int16x4_t v2) { 1874 return vcge_s16(v1, v2); 1875 } 1876 1877 // CHECK-LABEL: @test_vcge_s32( 1878 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i32> %v1, %v2 1879 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1880 // CHECK: ret <2 x i32> [[SEXT_I]] 1881 uint32x2_t test_vcge_s32(int32x2_t v1, int32x2_t v2) { 1882 return vcge_s32(v1, v2); 1883 } 1884 1885 // CHECK-LABEL: @test_vcge_s64( 1886 // CHECK: [[CMP_I:%.*]] = icmp sge <1 x i64> %a, %b 1887 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1888 // CHECK: ret <1 x i64> [[SEXT_I]] 1889 uint64x1_t test_vcge_s64(int64x1_t a, int64x1_t b) { 1890 return vcge_s64(a, b); 1891 } 1892 1893 // CHECK-LABEL: @test_vcge_u64( 1894 // CHECK: [[CMP_I:%.*]] = icmp uge <1 x i64> %a, %b 1895 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1896 // CHECK: ret <1 x i64> [[SEXT_I]] 1897 uint64x1_t test_vcge_u64(uint64x1_t a, uint64x1_t b) { 1898 return vcge_u64(a, b); 1899 } 1900 1901 // CHECK-LABEL: @test_vcge_f32( 1902 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x float> %v1, %v2 1903 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1904 // CHECK: ret <2 x i32> [[SEXT_I]] 1905 uint32x2_t test_vcge_f32(float32x2_t v1, float32x2_t v2) { 1906 return vcge_f32(v1, v2); 1907 } 1908 1909 // CHECK-LABEL: @test_vcge_f64( 1910 // CHECK: [[CMP_I:%.*]] = fcmp oge <1 x double> %a, %b 1911 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 1912 // CHECK: ret <1 x i64> [[SEXT_I]] 1913 uint64x1_t test_vcge_f64(float64x1_t a, float64x1_t b) { 1914 return vcge_f64(a, b); 1915 } 1916 1917 // CHECK-LABEL: @test_vcge_u8( 1918 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i8> %v1, %v2 1919 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 1920 // CHECK: ret <8 x i8> [[SEXT_I]] 1921 uint8x8_t test_vcge_u8(uint8x8_t v1, uint8x8_t v2) { 1922 return vcge_u8(v1, v2); 1923 } 1924 1925 // CHECK-LABEL: @test_vcge_u16( 1926 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i16> %v1, %v2 1927 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 1928 // CHECK: ret <4 x i16> [[SEXT_I]] 1929 uint16x4_t test_vcge_u16(uint16x4_t v1, uint16x4_t v2) { 1930 return vcge_u16(v1, v2); 1931 } 1932 1933 // CHECK-LABEL: @test_vcge_u32( 1934 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i32> %v1, %v2 1935 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 1936 // CHECK: ret <2 x i32> [[SEXT_I]] 1937 uint32x2_t test_vcge_u32(uint32x2_t v1, uint32x2_t v2) { 1938 return vcge_u32(v1, v2); 1939 } 1940 1941 // CHECK-LABEL: @test_vcgeq_s8( 1942 // CHECK: [[CMP_I:%.*]] = icmp sge <16 x i8> %v1, %v2 1943 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1944 // CHECK: ret <16 x i8> [[SEXT_I]] 1945 uint8x16_t test_vcgeq_s8(int8x16_t v1, int8x16_t v2) { 1946 return vcgeq_s8(v1, v2); 1947 } 1948 1949 // CHECK-LABEL: @test_vcgeq_s16( 1950 // CHECK: [[CMP_I:%.*]] = icmp sge <8 x i16> %v1, %v2 1951 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1952 // CHECK: ret <8 x i16> [[SEXT_I]] 1953 uint16x8_t test_vcgeq_s16(int16x8_t v1, int16x8_t v2) { 1954 return vcgeq_s16(v1, v2); 1955 } 1956 1957 // CHECK-LABEL: @test_vcgeq_s32( 1958 // CHECK: [[CMP_I:%.*]] = icmp sge <4 x i32> %v1, %v2 1959 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1960 // CHECK: ret <4 x i32> [[SEXT_I]] 1961 uint32x4_t test_vcgeq_s32(int32x4_t v1, int32x4_t v2) { 1962 return vcgeq_s32(v1, v2); 1963 } 1964 1965 // CHECK-LABEL: @test_vcgeq_f32( 1966 // CHECK: [[CMP_I:%.*]] = fcmp oge <4 x float> %v1, %v2 1967 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1968 // CHECK: ret <4 x i32> [[SEXT_I]] 1969 uint32x4_t test_vcgeq_f32(float32x4_t v1, float32x4_t v2) { 1970 return vcgeq_f32(v1, v2); 1971 } 1972 1973 // CHECK-LABEL: @test_vcgeq_u8( 1974 // CHECK: [[CMP_I:%.*]] = icmp uge <16 x i8> %v1, %v2 1975 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 1976 // CHECK: ret <16 x i8> [[SEXT_I]] 1977 uint8x16_t test_vcgeq_u8(uint8x16_t v1, uint8x16_t v2) { 1978 return vcgeq_u8(v1, v2); 1979 } 1980 1981 // CHECK-LABEL: @test_vcgeq_u16( 1982 // CHECK: [[CMP_I:%.*]] = icmp uge <8 x i16> %v1, %v2 1983 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 1984 // CHECK: ret <8 x i16> [[SEXT_I]] 1985 uint16x8_t test_vcgeq_u16(uint16x8_t v1, uint16x8_t v2) { 1986 return vcgeq_u16(v1, v2); 1987 } 1988 1989 // CHECK-LABEL: @test_vcgeq_u32( 1990 // CHECK: [[CMP_I:%.*]] = icmp uge <4 x i32> %v1, %v2 1991 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 1992 // CHECK: ret <4 x i32> [[SEXT_I]] 1993 uint32x4_t test_vcgeq_u32(uint32x4_t v1, uint32x4_t v2) { 1994 return vcgeq_u32(v1, v2); 1995 } 1996 1997 // CHECK-LABEL: @test_vcgeq_s64( 1998 // CHECK: [[CMP_I:%.*]] = icmp sge <2 x i64> %v1, %v2 1999 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2000 // CHECK: ret <2 x i64> [[SEXT_I]] 2001 uint64x2_t test_vcgeq_s64(int64x2_t v1, int64x2_t v2) { 2002 return vcgeq_s64(v1, v2); 2003 } 2004 2005 // CHECK-LABEL: @test_vcgeq_u64( 2006 // CHECK: [[CMP_I:%.*]] = icmp uge <2 x i64> %v1, %v2 2007 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2008 // CHECK: ret <2 x i64> [[SEXT_I]] 2009 uint64x2_t test_vcgeq_u64(uint64x2_t v1, uint64x2_t v2) { 2010 return vcgeq_u64(v1, v2); 2011 } 2012 2013 // CHECK-LABEL: @test_vcgeq_f64( 2014 // CHECK: [[CMP_I:%.*]] = fcmp oge <2 x double> %v1, %v2 2015 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2016 // CHECK: ret <2 x i64> [[SEXT_I]] 2017 uint64x2_t test_vcgeq_f64(float64x2_t v1, float64x2_t v2) { 2018 return vcgeq_f64(v1, v2); 2019 } 2020 2021 // CHECK-LABEL: @test_vcle_s8( 2022 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i8> %v1, %v2 2023 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2024 // CHECK: ret <8 x i8> [[SEXT_I]] 2025 // Notes about vcle: 2026 // LE condition predicate implemented as GE, so check reversed operands. 2027 // Using registers other than v0, v1 are possible, but would be odd. 2028 uint8x8_t test_vcle_s8(int8x8_t v1, int8x8_t v2) { 2029 return vcle_s8(v1, v2); 2030 } 2031 2032 // CHECK-LABEL: @test_vcle_s16( 2033 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i16> %v1, %v2 2034 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2035 // CHECK: ret <4 x i16> [[SEXT_I]] 2036 uint16x4_t test_vcle_s16(int16x4_t v1, int16x4_t v2) { 2037 return vcle_s16(v1, v2); 2038 } 2039 2040 // CHECK-LABEL: @test_vcle_s32( 2041 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i32> %v1, %v2 2042 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2043 // CHECK: ret <2 x i32> [[SEXT_I]] 2044 uint32x2_t test_vcle_s32(int32x2_t v1, int32x2_t v2) { 2045 return vcle_s32(v1, v2); 2046 } 2047 2048 // CHECK-LABEL: @test_vcle_s64( 2049 // CHECK: [[CMP_I:%.*]] = icmp sle <1 x i64> %a, %b 2050 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2051 // CHECK: ret <1 x i64> [[SEXT_I]] 2052 uint64x1_t test_vcle_s64(int64x1_t a, int64x1_t b) { 2053 return vcle_s64(a, b); 2054 } 2055 2056 // CHECK-LABEL: @test_vcle_u64( 2057 // CHECK: [[CMP_I:%.*]] = icmp ule <1 x i64> %a, %b 2058 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2059 // CHECK: ret <1 x i64> [[SEXT_I]] 2060 uint64x1_t test_vcle_u64(uint64x1_t a, uint64x1_t b) { 2061 return vcle_u64(a, b); 2062 } 2063 2064 // CHECK-LABEL: @test_vcle_f32( 2065 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x float> %v1, %v2 2066 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2067 // CHECK: ret <2 x i32> [[SEXT_I]] 2068 uint32x2_t test_vcle_f32(float32x2_t v1, float32x2_t v2) { 2069 return vcle_f32(v1, v2); 2070 } 2071 2072 // CHECK-LABEL: @test_vcle_f64( 2073 // CHECK: [[CMP_I:%.*]] = fcmp ole <1 x double> %a, %b 2074 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2075 // CHECK: ret <1 x i64> [[SEXT_I]] 2076 uint64x1_t test_vcle_f64(float64x1_t a, float64x1_t b) { 2077 return vcle_f64(a, b); 2078 } 2079 2080 // CHECK-LABEL: @test_vcle_u8( 2081 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i8> %v1, %v2 2082 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2083 // CHECK: ret <8 x i8> [[SEXT_I]] 2084 uint8x8_t test_vcle_u8(uint8x8_t v1, uint8x8_t v2) { 2085 return vcle_u8(v1, v2); 2086 } 2087 2088 // CHECK-LABEL: @test_vcle_u16( 2089 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i16> %v1, %v2 2090 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2091 // CHECK: ret <4 x i16> [[SEXT_I]] 2092 uint16x4_t test_vcle_u16(uint16x4_t v1, uint16x4_t v2) { 2093 return vcle_u16(v1, v2); 2094 } 2095 2096 // CHECK-LABEL: @test_vcle_u32( 2097 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i32> %v1, %v2 2098 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2099 // CHECK: ret <2 x i32> [[SEXT_I]] 2100 uint32x2_t test_vcle_u32(uint32x2_t v1, uint32x2_t v2) { 2101 return vcle_u32(v1, v2); 2102 } 2103 2104 // CHECK-LABEL: @test_vcleq_s8( 2105 // CHECK: [[CMP_I:%.*]] = icmp sle <16 x i8> %v1, %v2 2106 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2107 // CHECK: ret <16 x i8> [[SEXT_I]] 2108 uint8x16_t test_vcleq_s8(int8x16_t v1, int8x16_t v2) { 2109 return vcleq_s8(v1, v2); 2110 } 2111 2112 // CHECK-LABEL: @test_vcleq_s16( 2113 // CHECK: [[CMP_I:%.*]] = icmp sle <8 x i16> %v1, %v2 2114 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2115 // CHECK: ret <8 x i16> [[SEXT_I]] 2116 uint16x8_t test_vcleq_s16(int16x8_t v1, int16x8_t v2) { 2117 return vcleq_s16(v1, v2); 2118 } 2119 2120 // CHECK-LABEL: @test_vcleq_s32( 2121 // CHECK: [[CMP_I:%.*]] = icmp sle <4 x i32> %v1, %v2 2122 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2123 // CHECK: ret <4 x i32> [[SEXT_I]] 2124 uint32x4_t test_vcleq_s32(int32x4_t v1, int32x4_t v2) { 2125 return vcleq_s32(v1, v2); 2126 } 2127 2128 // CHECK-LABEL: @test_vcleq_f32( 2129 // CHECK: [[CMP_I:%.*]] = fcmp ole <4 x float> %v1, %v2 2130 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2131 // CHECK: ret <4 x i32> [[SEXT_I]] 2132 uint32x4_t test_vcleq_f32(float32x4_t v1, float32x4_t v2) { 2133 return vcleq_f32(v1, v2); 2134 } 2135 2136 // CHECK-LABEL: @test_vcleq_u8( 2137 // CHECK: [[CMP_I:%.*]] = icmp ule <16 x i8> %v1, %v2 2138 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2139 // CHECK: ret <16 x i8> [[SEXT_I]] 2140 uint8x16_t test_vcleq_u8(uint8x16_t v1, uint8x16_t v2) { 2141 return vcleq_u8(v1, v2); 2142 } 2143 2144 // CHECK-LABEL: @test_vcleq_u16( 2145 // CHECK: [[CMP_I:%.*]] = icmp ule <8 x i16> %v1, %v2 2146 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2147 // CHECK: ret <8 x i16> [[SEXT_I]] 2148 uint16x8_t test_vcleq_u16(uint16x8_t v1, uint16x8_t v2) { 2149 return vcleq_u16(v1, v2); 2150 } 2151 2152 // CHECK-LABEL: @test_vcleq_u32( 2153 // CHECK: [[CMP_I:%.*]] = icmp ule <4 x i32> %v1, %v2 2154 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2155 // CHECK: ret <4 x i32> [[SEXT_I]] 2156 uint32x4_t test_vcleq_u32(uint32x4_t v1, uint32x4_t v2) { 2157 return vcleq_u32(v1, v2); 2158 } 2159 2160 // CHECK-LABEL: @test_vcleq_s64( 2161 // CHECK: [[CMP_I:%.*]] = icmp sle <2 x i64> %v1, %v2 2162 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2163 // CHECK: ret <2 x i64> [[SEXT_I]] 2164 uint64x2_t test_vcleq_s64(int64x2_t v1, int64x2_t v2) { 2165 return vcleq_s64(v1, v2); 2166 } 2167 2168 // CHECK-LABEL: @test_vcleq_u64( 2169 // CHECK: [[CMP_I:%.*]] = icmp ule <2 x i64> %v1, %v2 2170 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2171 // CHECK: ret <2 x i64> [[SEXT_I]] 2172 uint64x2_t test_vcleq_u64(uint64x2_t v1, uint64x2_t v2) { 2173 return vcleq_u64(v1, v2); 2174 } 2175 2176 // CHECK-LABEL: @test_vcleq_f64( 2177 // CHECK: [[CMP_I:%.*]] = fcmp ole <2 x double> %v1, %v2 2178 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2179 // CHECK: ret <2 x i64> [[SEXT_I]] 2180 uint64x2_t test_vcleq_f64(float64x2_t v1, float64x2_t v2) { 2181 return vcleq_f64(v1, v2); 2182 } 2183 2184 // CHECK-LABEL: @test_vcgt_s8( 2185 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i8> %v1, %v2 2186 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2187 // CHECK: ret <8 x i8> [[SEXT_I]] 2188 uint8x8_t test_vcgt_s8(int8x8_t v1, int8x8_t v2) { 2189 return vcgt_s8(v1, v2); 2190 } 2191 2192 // CHECK-LABEL: @test_vcgt_s16( 2193 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i16> %v1, %v2 2194 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2195 // CHECK: ret <4 x i16> [[SEXT_I]] 2196 uint16x4_t test_vcgt_s16(int16x4_t v1, int16x4_t v2) { 2197 return vcgt_s16(v1, v2); 2198 } 2199 2200 // CHECK-LABEL: @test_vcgt_s32( 2201 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i32> %v1, %v2 2202 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2203 // CHECK: ret <2 x i32> [[SEXT_I]] 2204 uint32x2_t test_vcgt_s32(int32x2_t v1, int32x2_t v2) { 2205 return vcgt_s32(v1, v2); 2206 } 2207 2208 // CHECK-LABEL: @test_vcgt_s64( 2209 // CHECK: [[CMP_I:%.*]] = icmp sgt <1 x i64> %a, %b 2210 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2211 // CHECK: ret <1 x i64> [[SEXT_I]] 2212 uint64x1_t test_vcgt_s64(int64x1_t a, int64x1_t b) { 2213 return vcgt_s64(a, b); 2214 } 2215 2216 // CHECK-LABEL: @test_vcgt_u64( 2217 // CHECK: [[CMP_I:%.*]] = icmp ugt <1 x i64> %a, %b 2218 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2219 // CHECK: ret <1 x i64> [[SEXT_I]] 2220 uint64x1_t test_vcgt_u64(uint64x1_t a, uint64x1_t b) { 2221 return vcgt_u64(a, b); 2222 } 2223 2224 // CHECK-LABEL: @test_vcgt_f32( 2225 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x float> %v1, %v2 2226 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2227 // CHECK: ret <2 x i32> [[SEXT_I]] 2228 uint32x2_t test_vcgt_f32(float32x2_t v1, float32x2_t v2) { 2229 return vcgt_f32(v1, v2); 2230 } 2231 2232 // CHECK-LABEL: @test_vcgt_f64( 2233 // CHECK: [[CMP_I:%.*]] = fcmp ogt <1 x double> %a, %b 2234 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2235 // CHECK: ret <1 x i64> [[SEXT_I]] 2236 uint64x1_t test_vcgt_f64(float64x1_t a, float64x1_t b) { 2237 return vcgt_f64(a, b); 2238 } 2239 2240 // CHECK-LABEL: @test_vcgt_u8( 2241 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i8> %v1, %v2 2242 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2243 // CHECK: ret <8 x i8> [[SEXT_I]] 2244 uint8x8_t test_vcgt_u8(uint8x8_t v1, uint8x8_t v2) { 2245 return vcgt_u8(v1, v2); 2246 } 2247 2248 // CHECK-LABEL: @test_vcgt_u16( 2249 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i16> %v1, %v2 2250 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2251 // CHECK: ret <4 x i16> [[SEXT_I]] 2252 uint16x4_t test_vcgt_u16(uint16x4_t v1, uint16x4_t v2) { 2253 return vcgt_u16(v1, v2); 2254 } 2255 2256 // CHECK-LABEL: @test_vcgt_u32( 2257 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i32> %v1, %v2 2258 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2259 // CHECK: ret <2 x i32> [[SEXT_I]] 2260 uint32x2_t test_vcgt_u32(uint32x2_t v1, uint32x2_t v2) { 2261 return vcgt_u32(v1, v2); 2262 } 2263 2264 // CHECK-LABEL: @test_vcgtq_s8( 2265 // CHECK: [[CMP_I:%.*]] = icmp sgt <16 x i8> %v1, %v2 2266 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2267 // CHECK: ret <16 x i8> [[SEXT_I]] 2268 uint8x16_t test_vcgtq_s8(int8x16_t v1, int8x16_t v2) { 2269 return vcgtq_s8(v1, v2); 2270 } 2271 2272 // CHECK-LABEL: @test_vcgtq_s16( 2273 // CHECK: [[CMP_I:%.*]] = icmp sgt <8 x i16> %v1, %v2 2274 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2275 // CHECK: ret <8 x i16> [[SEXT_I]] 2276 uint16x8_t test_vcgtq_s16(int16x8_t v1, int16x8_t v2) { 2277 return vcgtq_s16(v1, v2); 2278 } 2279 2280 // CHECK-LABEL: @test_vcgtq_s32( 2281 // CHECK: [[CMP_I:%.*]] = icmp sgt <4 x i32> %v1, %v2 2282 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2283 // CHECK: ret <4 x i32> [[SEXT_I]] 2284 uint32x4_t test_vcgtq_s32(int32x4_t v1, int32x4_t v2) { 2285 return vcgtq_s32(v1, v2); 2286 } 2287 2288 // CHECK-LABEL: @test_vcgtq_f32( 2289 // CHECK: [[CMP_I:%.*]] = fcmp ogt <4 x float> %v1, %v2 2290 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2291 // CHECK: ret <4 x i32> [[SEXT_I]] 2292 uint32x4_t test_vcgtq_f32(float32x4_t v1, float32x4_t v2) { 2293 return vcgtq_f32(v1, v2); 2294 } 2295 2296 // CHECK-LABEL: @test_vcgtq_u8( 2297 // CHECK: [[CMP_I:%.*]] = icmp ugt <16 x i8> %v1, %v2 2298 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2299 // CHECK: ret <16 x i8> [[SEXT_I]] 2300 uint8x16_t test_vcgtq_u8(uint8x16_t v1, uint8x16_t v2) { 2301 return vcgtq_u8(v1, v2); 2302 } 2303 2304 // CHECK-LABEL: @test_vcgtq_u16( 2305 // CHECK: [[CMP_I:%.*]] = icmp ugt <8 x i16> %v1, %v2 2306 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2307 // CHECK: ret <8 x i16> [[SEXT_I]] 2308 uint16x8_t test_vcgtq_u16(uint16x8_t v1, uint16x8_t v2) { 2309 return vcgtq_u16(v1, v2); 2310 } 2311 2312 // CHECK-LABEL: @test_vcgtq_u32( 2313 // CHECK: [[CMP_I:%.*]] = icmp ugt <4 x i32> %v1, %v2 2314 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2315 // CHECK: ret <4 x i32> [[SEXT_I]] 2316 uint32x4_t test_vcgtq_u32(uint32x4_t v1, uint32x4_t v2) { 2317 return vcgtq_u32(v1, v2); 2318 } 2319 2320 // CHECK-LABEL: @test_vcgtq_s64( 2321 // CHECK: [[CMP_I:%.*]] = icmp sgt <2 x i64> %v1, %v2 2322 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2323 // CHECK: ret <2 x i64> [[SEXT_I]] 2324 uint64x2_t test_vcgtq_s64(int64x2_t v1, int64x2_t v2) { 2325 return vcgtq_s64(v1, v2); 2326 } 2327 2328 // CHECK-LABEL: @test_vcgtq_u64( 2329 // CHECK: [[CMP_I:%.*]] = icmp ugt <2 x i64> %v1, %v2 2330 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2331 // CHECK: ret <2 x i64> [[SEXT_I]] 2332 uint64x2_t test_vcgtq_u64(uint64x2_t v1, uint64x2_t v2) { 2333 return vcgtq_u64(v1, v2); 2334 } 2335 2336 // CHECK-LABEL: @test_vcgtq_f64( 2337 // CHECK: [[CMP_I:%.*]] = fcmp ogt <2 x double> %v1, %v2 2338 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2339 // CHECK: ret <2 x i64> [[SEXT_I]] 2340 uint64x2_t test_vcgtq_f64(float64x2_t v1, float64x2_t v2) { 2341 return vcgtq_f64(v1, v2); 2342 } 2343 2344 // CHECK-LABEL: @test_vclt_s8( 2345 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i8> %v1, %v2 2346 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2347 // CHECK: ret <8 x i8> [[SEXT_I]] 2348 // Notes about vclt: 2349 // LT condition predicate implemented as GT, so check reversed operands. 2350 // Using registers other than v0, v1 are possible, but would be odd. 2351 uint8x8_t test_vclt_s8(int8x8_t v1, int8x8_t v2) { 2352 return vclt_s8(v1, v2); 2353 } 2354 2355 // CHECK-LABEL: @test_vclt_s16( 2356 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i16> %v1, %v2 2357 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2358 // CHECK: ret <4 x i16> [[SEXT_I]] 2359 uint16x4_t test_vclt_s16(int16x4_t v1, int16x4_t v2) { 2360 return vclt_s16(v1, v2); 2361 } 2362 2363 // CHECK-LABEL: @test_vclt_s32( 2364 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i32> %v1, %v2 2365 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2366 // CHECK: ret <2 x i32> [[SEXT_I]] 2367 uint32x2_t test_vclt_s32(int32x2_t v1, int32x2_t v2) { 2368 return vclt_s32(v1, v2); 2369 } 2370 2371 // CHECK-LABEL: @test_vclt_s64( 2372 // CHECK: [[CMP_I:%.*]] = icmp slt <1 x i64> %a, %b 2373 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2374 // CHECK: ret <1 x i64> [[SEXT_I]] 2375 uint64x1_t test_vclt_s64(int64x1_t a, int64x1_t b) { 2376 return vclt_s64(a, b); 2377 } 2378 2379 // CHECK-LABEL: @test_vclt_u64( 2380 // CHECK: [[CMP_I:%.*]] = icmp ult <1 x i64> %a, %b 2381 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2382 // CHECK: ret <1 x i64> [[SEXT_I]] 2383 uint64x1_t test_vclt_u64(uint64x1_t a, uint64x1_t b) { 2384 return vclt_u64(a, b); 2385 } 2386 2387 // CHECK-LABEL: @test_vclt_f32( 2388 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x float> %v1, %v2 2389 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2390 // CHECK: ret <2 x i32> [[SEXT_I]] 2391 uint32x2_t test_vclt_f32(float32x2_t v1, float32x2_t v2) { 2392 return vclt_f32(v1, v2); 2393 } 2394 2395 // CHECK-LABEL: @test_vclt_f64( 2396 // CHECK: [[CMP_I:%.*]] = fcmp olt <1 x double> %a, %b 2397 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64> 2398 // CHECK: ret <1 x i64> [[SEXT_I]] 2399 uint64x1_t test_vclt_f64(float64x1_t a, float64x1_t b) { 2400 return vclt_f64(a, b); 2401 } 2402 2403 // CHECK-LABEL: @test_vclt_u8( 2404 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i8> %v1, %v2 2405 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8> 2406 // CHECK: ret <8 x i8> [[SEXT_I]] 2407 uint8x8_t test_vclt_u8(uint8x8_t v1, uint8x8_t v2) { 2408 return vclt_u8(v1, v2); 2409 } 2410 2411 // CHECK-LABEL: @test_vclt_u16( 2412 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i16> %v1, %v2 2413 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16> 2414 // CHECK: ret <4 x i16> [[SEXT_I]] 2415 uint16x4_t test_vclt_u16(uint16x4_t v1, uint16x4_t v2) { 2416 return vclt_u16(v1, v2); 2417 } 2418 2419 // CHECK-LABEL: @test_vclt_u32( 2420 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i32> %v1, %v2 2421 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32> 2422 // CHECK: ret <2 x i32> [[SEXT_I]] 2423 uint32x2_t test_vclt_u32(uint32x2_t v1, uint32x2_t v2) { 2424 return vclt_u32(v1, v2); 2425 } 2426 2427 // CHECK-LABEL: @test_vcltq_s8( 2428 // CHECK: [[CMP_I:%.*]] = icmp slt <16 x i8> %v1, %v2 2429 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2430 // CHECK: ret <16 x i8> [[SEXT_I]] 2431 uint8x16_t test_vcltq_s8(int8x16_t v1, int8x16_t v2) { 2432 return vcltq_s8(v1, v2); 2433 } 2434 2435 // CHECK-LABEL: @test_vcltq_s16( 2436 // CHECK: [[CMP_I:%.*]] = icmp slt <8 x i16> %v1, %v2 2437 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2438 // CHECK: ret <8 x i16> [[SEXT_I]] 2439 uint16x8_t test_vcltq_s16(int16x8_t v1, int16x8_t v2) { 2440 return vcltq_s16(v1, v2); 2441 } 2442 2443 // CHECK-LABEL: @test_vcltq_s32( 2444 // CHECK: [[CMP_I:%.*]] = icmp slt <4 x i32> %v1, %v2 2445 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2446 // CHECK: ret <4 x i32> [[SEXT_I]] 2447 uint32x4_t test_vcltq_s32(int32x4_t v1, int32x4_t v2) { 2448 return vcltq_s32(v1, v2); 2449 } 2450 2451 // CHECK-LABEL: @test_vcltq_f32( 2452 // CHECK: [[CMP_I:%.*]] = fcmp olt <4 x float> %v1, %v2 2453 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2454 // CHECK: ret <4 x i32> [[SEXT_I]] 2455 uint32x4_t test_vcltq_f32(float32x4_t v1, float32x4_t v2) { 2456 return vcltq_f32(v1, v2); 2457 } 2458 2459 // CHECK-LABEL: @test_vcltq_u8( 2460 // CHECK: [[CMP_I:%.*]] = icmp ult <16 x i8> %v1, %v2 2461 // CHECK: [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8> 2462 // CHECK: ret <16 x i8> [[SEXT_I]] 2463 uint8x16_t test_vcltq_u8(uint8x16_t v1, uint8x16_t v2) { 2464 return vcltq_u8(v1, v2); 2465 } 2466 2467 // CHECK-LABEL: @test_vcltq_u16( 2468 // CHECK: [[CMP_I:%.*]] = icmp ult <8 x i16> %v1, %v2 2469 // CHECK: [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16> 2470 // CHECK: ret <8 x i16> [[SEXT_I]] 2471 uint16x8_t test_vcltq_u16(uint16x8_t v1, uint16x8_t v2) { 2472 return vcltq_u16(v1, v2); 2473 } 2474 2475 // CHECK-LABEL: @test_vcltq_u32( 2476 // CHECK: [[CMP_I:%.*]] = icmp ult <4 x i32> %v1, %v2 2477 // CHECK: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> 2478 // CHECK: ret <4 x i32> [[SEXT_I]] 2479 uint32x4_t test_vcltq_u32(uint32x4_t v1, uint32x4_t v2) { 2480 return vcltq_u32(v1, v2); 2481 } 2482 2483 // CHECK-LABEL: @test_vcltq_s64( 2484 // CHECK: [[CMP_I:%.*]] = icmp slt <2 x i64> %v1, %v2 2485 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2486 // CHECK: ret <2 x i64> [[SEXT_I]] 2487 uint64x2_t test_vcltq_s64(int64x2_t v1, int64x2_t v2) { 2488 return vcltq_s64(v1, v2); 2489 } 2490 2491 // CHECK-LABEL: @test_vcltq_u64( 2492 // CHECK: [[CMP_I:%.*]] = icmp ult <2 x i64> %v1, %v2 2493 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2494 // CHECK: ret <2 x i64> [[SEXT_I]] 2495 uint64x2_t test_vcltq_u64(uint64x2_t v1, uint64x2_t v2) { 2496 return vcltq_u64(v1, v2); 2497 } 2498 2499 // CHECK-LABEL: @test_vcltq_f64( 2500 // CHECK: [[CMP_I:%.*]] = fcmp olt <2 x double> %v1, %v2 2501 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64> 2502 // CHECK: ret <2 x i64> [[SEXT_I]] 2503 uint64x2_t test_vcltq_f64(float64x2_t v1, float64x2_t v2) { 2504 return vcltq_f64(v1, v2); 2505 } 2506 2507 // CHECK-LABEL: @test_vhadd_s8( 2508 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2509 // CHECK: ret <8 x i8> [[VHADD_V_I]] 2510 int8x8_t test_vhadd_s8(int8x8_t v1, int8x8_t v2) { 2511 return vhadd_s8(v1, v2); 2512 } 2513 2514 // CHECK-LABEL: @test_vhadd_s16( 2515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2517 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2518 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2519 // CHECK: ret <4 x i16> [[VHADD_V2_I]] 2520 int16x4_t test_vhadd_s16(int16x4_t v1, int16x4_t v2) { 2521 return vhadd_s16(v1, v2); 2522 } 2523 2524 // CHECK-LABEL: @test_vhadd_s32( 2525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2527 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2528 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2529 // CHECK: ret <2 x i32> [[VHADD_V2_I]] 2530 int32x2_t test_vhadd_s32(int32x2_t v1, int32x2_t v2) { 2531 return vhadd_s32(v1, v2); 2532 } 2533 2534 // CHECK-LABEL: @test_vhadd_u8( 2535 // CHECK: [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2536 // CHECK: ret <8 x i8> [[VHADD_V_I]] 2537 uint8x8_t test_vhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2538 return vhadd_u8(v1, v2); 2539 } 2540 2541 // CHECK-LABEL: @test_vhadd_u16( 2542 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2543 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2544 // CHECK: [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2545 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8> 2546 // CHECK: ret <4 x i16> [[VHADD_V2_I]] 2547 uint16x4_t test_vhadd_u16(uint16x4_t v1, uint16x4_t v2) { 2548 return vhadd_u16(v1, v2); 2549 } 2550 2551 // CHECK-LABEL: @test_vhadd_u32( 2552 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2553 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2554 // CHECK: [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2555 // CHECK: [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8> 2556 // CHECK: ret <2 x i32> [[VHADD_V2_I]] 2557 uint32x2_t test_vhadd_u32(uint32x2_t v1, uint32x2_t v2) { 2558 return vhadd_u32(v1, v2); 2559 } 2560 2561 // CHECK-LABEL: @test_vhaddq_s8( 2562 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2563 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2564 int8x16_t test_vhaddq_s8(int8x16_t v1, int8x16_t v2) { 2565 return vhaddq_s8(v1, v2); 2566 } 2567 2568 // CHECK-LABEL: @test_vhaddq_s16( 2569 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2570 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2571 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2572 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2573 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] 2574 int16x8_t test_vhaddq_s16(int16x8_t v1, int16x8_t v2) { 2575 return vhaddq_s16(v1, v2); 2576 } 2577 2578 // CHECK-LABEL: @test_vhaddq_s32( 2579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2581 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2582 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2583 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] 2584 int32x4_t test_vhaddq_s32(int32x4_t v1, int32x4_t v2) { 2585 return vhaddq_s32(v1, v2); 2586 } 2587 2588 // CHECK-LABEL: @test_vhaddq_u8( 2589 // CHECK: [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2590 // CHECK: ret <16 x i8> [[VHADDQ_V_I]] 2591 uint8x16_t test_vhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 2592 return vhaddq_u8(v1, v2); 2593 } 2594 2595 // CHECK-LABEL: @test_vhaddq_u16( 2596 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2597 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2598 // CHECK: [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2599 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8> 2600 // CHECK: ret <8 x i16> [[VHADDQ_V2_I]] 2601 uint16x8_t test_vhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 2602 return vhaddq_u16(v1, v2); 2603 } 2604 2605 // CHECK-LABEL: @test_vhaddq_u32( 2606 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2607 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2608 // CHECK: [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2609 // CHECK: [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8> 2610 // CHECK: ret <4 x i32> [[VHADDQ_V2_I]] 2611 uint32x4_t test_vhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 2612 return vhaddq_u32(v1, v2); 2613 } 2614 2615 // CHECK-LABEL: @test_vhsub_s8( 2616 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.shsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2617 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 2618 int8x8_t test_vhsub_s8(int8x8_t v1, int8x8_t v2) { 2619 return vhsub_s8(v1, v2); 2620 } 2621 2622 // CHECK-LABEL: @test_vhsub_s16( 2623 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2624 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2625 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.shsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2626 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2627 // CHECK: ret <4 x i16> [[VHSUB_V2_I]] 2628 int16x4_t test_vhsub_s16(int16x4_t v1, int16x4_t v2) { 2629 return vhsub_s16(v1, v2); 2630 } 2631 2632 // CHECK-LABEL: @test_vhsub_s32( 2633 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2634 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2635 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.shsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2636 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2637 // CHECK: ret <2 x i32> [[VHSUB_V2_I]] 2638 int32x2_t test_vhsub_s32(int32x2_t v1, int32x2_t v2) { 2639 return vhsub_s32(v1, v2); 2640 } 2641 2642 // CHECK-LABEL: @test_vhsub_u8( 2643 // CHECK: [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uhsub.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2644 // CHECK: ret <8 x i8> [[VHSUB_V_I]] 2645 uint8x8_t test_vhsub_u8(uint8x8_t v1, uint8x8_t v2) { 2646 return vhsub_u8(v1, v2); 2647 } 2648 2649 // CHECK-LABEL: @test_vhsub_u16( 2650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2651 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2652 // CHECK: [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uhsub.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2653 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8> 2654 // CHECK: ret <4 x i16> [[VHSUB_V2_I]] 2655 uint16x4_t test_vhsub_u16(uint16x4_t v1, uint16x4_t v2) { 2656 return vhsub_u16(v1, v2); 2657 } 2658 2659 // CHECK-LABEL: @test_vhsub_u32( 2660 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2661 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2662 // CHECK: [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uhsub.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2663 // CHECK: [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8> 2664 // CHECK: ret <2 x i32> [[VHSUB_V2_I]] 2665 uint32x2_t test_vhsub_u32(uint32x2_t v1, uint32x2_t v2) { 2666 return vhsub_u32(v1, v2); 2667 } 2668 2669 // CHECK-LABEL: @test_vhsubq_s8( 2670 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.shsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2671 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2672 int8x16_t test_vhsubq_s8(int8x16_t v1, int8x16_t v2) { 2673 return vhsubq_s8(v1, v2); 2674 } 2675 2676 // CHECK-LABEL: @test_vhsubq_s16( 2677 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2678 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2679 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.shsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2680 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2681 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] 2682 int16x8_t test_vhsubq_s16(int16x8_t v1, int16x8_t v2) { 2683 return vhsubq_s16(v1, v2); 2684 } 2685 2686 // CHECK-LABEL: @test_vhsubq_s32( 2687 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2688 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2689 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.shsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2690 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2691 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] 2692 int32x4_t test_vhsubq_s32(int32x4_t v1, int32x4_t v2) { 2693 return vhsubq_s32(v1, v2); 2694 } 2695 2696 // CHECK-LABEL: @test_vhsubq_u8( 2697 // CHECK: [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uhsub.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2698 // CHECK: ret <16 x i8> [[VHSUBQ_V_I]] 2699 uint8x16_t test_vhsubq_u8(uint8x16_t v1, uint8x16_t v2) { 2700 return vhsubq_u8(v1, v2); 2701 } 2702 2703 // CHECK-LABEL: @test_vhsubq_u16( 2704 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2705 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2706 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uhsub.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2707 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8> 2708 // CHECK: ret <8 x i16> [[VHSUBQ_V2_I]] 2709 uint16x8_t test_vhsubq_u16(uint16x8_t v1, uint16x8_t v2) { 2710 return vhsubq_u16(v1, v2); 2711 } 2712 2713 // CHECK-LABEL: @test_vhsubq_u32( 2714 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2715 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2716 // CHECK: [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uhsub.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2717 // CHECK: [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8> 2718 // CHECK: ret <4 x i32> [[VHSUBQ_V2_I]] 2719 uint32x4_t test_vhsubq_u32(uint32x4_t v1, uint32x4_t v2) { 2720 return vhsubq_u32(v1, v2); 2721 } 2722 2723 // CHECK-LABEL: @test_vrhadd_s8( 2724 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2725 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 2726 int8x8_t test_vrhadd_s8(int8x8_t v1, int8x8_t v2) { 2727 return vrhadd_s8(v1, v2); 2728 } 2729 2730 // CHECK-LABEL: @test_vrhadd_s16( 2731 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2732 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2733 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2734 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 2735 // CHECK: ret <4 x i16> [[VRHADD_V2_I]] 2736 int16x4_t test_vrhadd_s16(int16x4_t v1, int16x4_t v2) { 2737 return vrhadd_s16(v1, v2); 2738 } 2739 2740 // CHECK-LABEL: @test_vrhadd_s32( 2741 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2742 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2743 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2744 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 2745 // CHECK: ret <2 x i32> [[VRHADD_V2_I]] 2746 int32x2_t test_vrhadd_s32(int32x2_t v1, int32x2_t v2) { 2747 return vrhadd_s32(v1, v2); 2748 } 2749 2750 // CHECK-LABEL: @test_vrhadd_u8( 2751 // CHECK: [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urhadd.v8i8(<8 x i8> %v1, <8 x i8> %v2) 2752 // CHECK: ret <8 x i8> [[VRHADD_V_I]] 2753 uint8x8_t test_vrhadd_u8(uint8x8_t v1, uint8x8_t v2) { 2754 return vrhadd_u8(v1, v2); 2755 } 2756 2757 // CHECK-LABEL: @test_vrhadd_u16( 2758 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %v1 to <8 x i8> 2759 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %v2 to <8 x i8> 2760 // CHECK: [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urhadd.v4i16(<4 x i16> %v1, <4 x i16> %v2) 2761 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8> 2762 // CHECK: ret <4 x i16> [[VRHADD_V2_I]] 2763 uint16x4_t test_vrhadd_u16(uint16x4_t v1, uint16x4_t v2) { 2764 return vrhadd_u16(v1, v2); 2765 } 2766 2767 // CHECK-LABEL: @test_vrhadd_u32( 2768 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %v1 to <8 x i8> 2769 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %v2 to <8 x i8> 2770 // CHECK: [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urhadd.v2i32(<2 x i32> %v1, <2 x i32> %v2) 2771 // CHECK: [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8> 2772 // CHECK: ret <2 x i32> [[VRHADD_V2_I]] 2773 uint32x2_t test_vrhadd_u32(uint32x2_t v1, uint32x2_t v2) { 2774 return vrhadd_u32(v1, v2); 2775 } 2776 2777 // CHECK-LABEL: @test_vrhaddq_s8( 2778 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2779 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 2780 int8x16_t test_vrhaddq_s8(int8x16_t v1, int8x16_t v2) { 2781 return vrhaddq_s8(v1, v2); 2782 } 2783 2784 // CHECK-LABEL: @test_vrhaddq_s16( 2785 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2786 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2787 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2788 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 2789 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] 2790 int16x8_t test_vrhaddq_s16(int16x8_t v1, int16x8_t v2) { 2791 return vrhaddq_s16(v1, v2); 2792 } 2793 2794 // CHECK-LABEL: @test_vrhaddq_s32( 2795 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2796 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2797 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2798 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 2799 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] 2800 int32x4_t test_vrhaddq_s32(int32x4_t v1, int32x4_t v2) { 2801 return vrhaddq_s32(v1, v2); 2802 } 2803 2804 // CHECK-LABEL: @test_vrhaddq_u8( 2805 // CHECK: [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8> %v1, <16 x i8> %v2) 2806 // CHECK: ret <16 x i8> [[VRHADDQ_V_I]] 2807 uint8x16_t test_vrhaddq_u8(uint8x16_t v1, uint8x16_t v2) { 2808 return vrhaddq_u8(v1, v2); 2809 } 2810 2811 // CHECK-LABEL: @test_vrhaddq_u16( 2812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> 2813 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> 2814 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %v1, <8 x i16> %v2) 2815 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8> 2816 // CHECK: ret <8 x i16> [[VRHADDQ_V2_I]] 2817 uint16x8_t test_vrhaddq_u16(uint16x8_t v1, uint16x8_t v2) { 2818 return vrhaddq_u16(v1, v2); 2819 } 2820 2821 // CHECK-LABEL: @test_vrhaddq_u32( 2822 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> 2823 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> 2824 // CHECK: [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32> %v1, <4 x i32> %v2) 2825 // CHECK: [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8> 2826 // CHECK: ret <4 x i32> [[VRHADDQ_V2_I]] 2827 uint32x4_t test_vrhaddq_u32(uint32x4_t v1, uint32x4_t v2) { 2828 return vrhaddq_u32(v1, v2); 2829 } 2830 2831 // CHECK-LABEL: @test_vqadd_s8( 2832 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> %a, <8 x i8> %b) 2833 // CHECK: ret <8 x i8> [[VQADD_V_I]] 2834 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) { 2835 return vqadd_s8(a, b); 2836 } 2837 2838 // CHECK-LABEL: @test_vqadd_s16( 2839 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2840 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2841 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> %a, <4 x i16> %b) 2842 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 2843 // CHECK: ret <4 x i16> [[VQADD_V2_I]] 2844 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) { 2845 return vqadd_s16(a, b); 2846 } 2847 2848 // CHECK-LABEL: @test_vqadd_s32( 2849 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2850 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2851 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqadd.v2i32(<2 x i32> %a, <2 x i32> %b) 2852 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 2853 // CHECK: ret <2 x i32> [[VQADD_V2_I]] 2854 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) { 2855 return vqadd_s32(a, b); 2856 } 2857 2858 // CHECK-LABEL: @test_vqadd_s64( 2859 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 2860 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 2861 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqadd.v1i64(<1 x i64> %a, <1 x i64> %b) 2862 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 2863 // CHECK: ret <1 x i64> [[VQADD_V2_I]] 2864 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) { 2865 return vqadd_s64(a, b); 2866 } 2867 2868 // CHECK-LABEL: @test_vqadd_u8( 2869 // CHECK: [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> %a, <8 x i8> %b) 2870 // CHECK: ret <8 x i8> [[VQADD_V_I]] 2871 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) { 2872 return vqadd_u8(a, b); 2873 } 2874 2875 // CHECK-LABEL: @test_vqadd_u16( 2876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2878 // CHECK: [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> %a, <4 x i16> %b) 2879 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8> 2880 // CHECK: ret <4 x i16> [[VQADD_V2_I]] 2881 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) { 2882 return vqadd_u16(a, b); 2883 } 2884 2885 // CHECK-LABEL: @test_vqadd_u32( 2886 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2887 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2888 // CHECK: [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqadd.v2i32(<2 x i32> %a, <2 x i32> %b) 2889 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8> 2890 // CHECK: ret <2 x i32> [[VQADD_V2_I]] 2891 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) { 2892 return vqadd_u32(a, b); 2893 } 2894 2895 // CHECK-LABEL: @test_vqadd_u64( 2896 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 2897 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 2898 // CHECK: [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqadd.v1i64(<1 x i64> %a, <1 x i64> %b) 2899 // CHECK: [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8> 2900 // CHECK: ret <1 x i64> [[VQADD_V2_I]] 2901 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) { 2902 return vqadd_u64(a, b); 2903 } 2904 2905 // CHECK-LABEL: @test_vqaddq_s8( 2906 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqadd.v16i8(<16 x i8> %a, <16 x i8> %b) 2907 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 2908 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) { 2909 return vqaddq_s8(a, b); 2910 } 2911 2912 // CHECK-LABEL: @test_vqaddq_s16( 2913 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2914 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 2915 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqadd.v8i16(<8 x i16> %a, <8 x i16> %b) 2916 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 2917 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] 2918 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) { 2919 return vqaddq_s16(a, b); 2920 } 2921 2922 // CHECK-LABEL: @test_vqaddq_s32( 2923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 2925 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> %b) 2926 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 2927 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] 2928 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) { 2929 return vqaddq_s32(a, b); 2930 } 2931 2932 // CHECK-LABEL: @test_vqaddq_s64( 2933 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2934 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 2935 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> %b) 2936 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 2937 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] 2938 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) { 2939 return vqaddq_s64(a, b); 2940 } 2941 2942 // CHECK-LABEL: @test_vqaddq_u8( 2943 // CHECK: [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqadd.v16i8(<16 x i8> %a, <16 x i8> %b) 2944 // CHECK: ret <16 x i8> [[VQADDQ_V_I]] 2945 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) { 2946 return vqaddq_u8(a, b); 2947 } 2948 2949 // CHECK-LABEL: @test_vqaddq_u16( 2950 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 2951 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 2952 // CHECK: [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqadd.v8i16(<8 x i16> %a, <8 x i16> %b) 2953 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8> 2954 // CHECK: ret <8 x i16> [[VQADDQ_V2_I]] 2955 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) { 2956 return vqaddq_u16(a, b); 2957 } 2958 2959 // CHECK-LABEL: @test_vqaddq_u32( 2960 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 2961 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 2962 // CHECK: [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqadd.v4i32(<4 x i32> %a, <4 x i32> %b) 2963 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8> 2964 // CHECK: ret <4 x i32> [[VQADDQ_V2_I]] 2965 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) { 2966 return vqaddq_u32(a, b); 2967 } 2968 2969 // CHECK-LABEL: @test_vqaddq_u64( 2970 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 2971 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 2972 // CHECK: [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqadd.v2i64(<2 x i64> %a, <2 x i64> %b) 2973 // CHECK: [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8> 2974 // CHECK: ret <2 x i64> [[VQADDQ_V2_I]] 2975 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) { 2976 return vqaddq_u64(a, b); 2977 } 2978 2979 // CHECK-LABEL: @test_vqsub_s8( 2980 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> %a, <8 x i8> %b) 2981 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 2982 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) { 2983 return vqsub_s8(a, b); 2984 } 2985 2986 // CHECK-LABEL: @test_vqsub_s16( 2987 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 2988 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 2989 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> %a, <4 x i16> %b) 2990 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 2991 // CHECK: ret <4 x i16> [[VQSUB_V2_I]] 2992 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) { 2993 return vqsub_s16(a, b); 2994 } 2995 2996 // CHECK-LABEL: @test_vqsub_s32( 2997 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 2998 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 2999 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqsub.v2i32(<2 x i32> %a, <2 x i32> %b) 3000 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3001 // CHECK: ret <2 x i32> [[VQSUB_V2_I]] 3002 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) { 3003 return vqsub_s32(a, b); 3004 } 3005 3006 // CHECK-LABEL: @test_vqsub_s64( 3007 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3008 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3009 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqsub.v1i64(<1 x i64> %a, <1 x i64> %b) 3010 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3011 // CHECK: ret <1 x i64> [[VQSUB_V2_I]] 3012 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) { 3013 return vqsub_s64(a, b); 3014 } 3015 3016 // CHECK-LABEL: @test_vqsub_u8( 3017 // CHECK: [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> %a, <8 x i8> %b) 3018 // CHECK: ret <8 x i8> [[VQSUB_V_I]] 3019 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) { 3020 return vqsub_u8(a, b); 3021 } 3022 3023 // CHECK-LABEL: @test_vqsub_u16( 3024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3025 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3026 // CHECK: [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> %a, <4 x i16> %b) 3027 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8> 3028 // CHECK: ret <4 x i16> [[VQSUB_V2_I]] 3029 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) { 3030 return vqsub_u16(a, b); 3031 } 3032 3033 // CHECK-LABEL: @test_vqsub_u32( 3034 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3035 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3036 // CHECK: [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqsub.v2i32(<2 x i32> %a, <2 x i32> %b) 3037 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8> 3038 // CHECK: ret <2 x i32> [[VQSUB_V2_I]] 3039 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) { 3040 return vqsub_u32(a, b); 3041 } 3042 3043 // CHECK-LABEL: @test_vqsub_u64( 3044 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3045 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3046 // CHECK: [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqsub.v1i64(<1 x i64> %a, <1 x i64> %b) 3047 // CHECK: [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8> 3048 // CHECK: ret <1 x i64> [[VQSUB_V2_I]] 3049 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) { 3050 return vqsub_u64(a, b); 3051 } 3052 3053 // CHECK-LABEL: @test_vqsubq_s8( 3054 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqsub.v16i8(<16 x i8> %a, <16 x i8> %b) 3055 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3056 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) { 3057 return vqsubq_s8(a, b); 3058 } 3059 3060 // CHECK-LABEL: @test_vqsubq_s16( 3061 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3062 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3063 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqsub.v8i16(<8 x i16> %a, <8 x i16> %b) 3064 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3065 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] 3066 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) { 3067 return vqsubq_s16(a, b); 3068 } 3069 3070 // CHECK-LABEL: @test_vqsubq_s32( 3071 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3072 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3073 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> %b) 3074 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3075 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] 3076 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) { 3077 return vqsubq_s32(a, b); 3078 } 3079 3080 // CHECK-LABEL: @test_vqsubq_s64( 3081 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3082 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3083 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> %b) 3084 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3085 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] 3086 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) { 3087 return vqsubq_s64(a, b); 3088 } 3089 3090 // CHECK-LABEL: @test_vqsubq_u8( 3091 // CHECK: [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqsub.v16i8(<16 x i8> %a, <16 x i8> %b) 3092 // CHECK: ret <16 x i8> [[VQSUBQ_V_I]] 3093 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) { 3094 return vqsubq_u8(a, b); 3095 } 3096 3097 // CHECK-LABEL: @test_vqsubq_u16( 3098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3099 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3100 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqsub.v8i16(<8 x i16> %a, <8 x i16> %b) 3101 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8> 3102 // CHECK: ret <8 x i16> [[VQSUBQ_V2_I]] 3103 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) { 3104 return vqsubq_u16(a, b); 3105 } 3106 3107 // CHECK-LABEL: @test_vqsubq_u32( 3108 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3109 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3110 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqsub.v4i32(<4 x i32> %a, <4 x i32> %b) 3111 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8> 3112 // CHECK: ret <4 x i32> [[VQSUBQ_V2_I]] 3113 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) { 3114 return vqsubq_u32(a, b); 3115 } 3116 3117 // CHECK-LABEL: @test_vqsubq_u64( 3118 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3119 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3120 // CHECK: [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqsub.v2i64(<2 x i64> %a, <2 x i64> %b) 3121 // CHECK: [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8> 3122 // CHECK: ret <2 x i64> [[VQSUBQ_V2_I]] 3123 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) { 3124 return vqsubq_u64(a, b); 3125 } 3126 3127 // CHECK-LABEL: @test_vshl_s8( 3128 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3129 // CHECK: ret <8 x i8> [[VSHL_V_I]] 3130 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) { 3131 return vshl_s8(a, b); 3132 } 3133 3134 // CHECK-LABEL: @test_vshl_s16( 3135 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3136 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3137 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3138 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3139 // CHECK: ret <4 x i16> [[VSHL_V2_I]] 3140 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) { 3141 return vshl_s16(a, b); 3142 } 3143 3144 // CHECK-LABEL: @test_vshl_s32( 3145 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3146 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3147 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3148 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3149 // CHECK: ret <2 x i32> [[VSHL_V2_I]] 3150 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) { 3151 return vshl_s32(a, b); 3152 } 3153 3154 // CHECK-LABEL: @test_vshl_s64( 3155 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3156 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3157 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3158 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3159 // CHECK: ret <1 x i64> [[VSHL_V2_I]] 3160 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) { 3161 return vshl_s64(a, b); 3162 } 3163 3164 // CHECK-LABEL: @test_vshl_u8( 3165 // CHECK: [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.ushl.v8i8(<8 x i8> %a, <8 x i8> %b) 3166 // CHECK: ret <8 x i8> [[VSHL_V_I]] 3167 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) { 3168 return vshl_u8(a, b); 3169 } 3170 3171 // CHECK-LABEL: @test_vshl_u16( 3172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3173 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3174 // CHECK: [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.ushl.v4i16(<4 x i16> %a, <4 x i16> %b) 3175 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8> 3176 // CHECK: ret <4 x i16> [[VSHL_V2_I]] 3177 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) { 3178 return vshl_u16(a, b); 3179 } 3180 3181 // CHECK-LABEL: @test_vshl_u32( 3182 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3183 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3184 // CHECK: [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ushl.v2i32(<2 x i32> %a, <2 x i32> %b) 3185 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8> 3186 // CHECK: ret <2 x i32> [[VSHL_V2_I]] 3187 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) { 3188 return vshl_u32(a, b); 3189 } 3190 3191 // CHECK-LABEL: @test_vshl_u64( 3192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3193 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3194 // CHECK: [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %a, <1 x i64> %b) 3195 // CHECK: [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8> 3196 // CHECK: ret <1 x i64> [[VSHL_V2_I]] 3197 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) { 3198 return vshl_u64(a, b); 3199 } 3200 3201 // CHECK-LABEL: @test_vshlq_s8( 3202 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3203 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3204 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) { 3205 return vshlq_s8(a, b); 3206 } 3207 3208 // CHECK-LABEL: @test_vshlq_s16( 3209 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3210 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3211 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3212 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3213 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] 3214 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) { 3215 return vshlq_s16(a, b); 3216 } 3217 3218 // CHECK-LABEL: @test_vshlq_s32( 3219 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3220 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3221 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3222 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3223 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] 3224 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) { 3225 return vshlq_s32(a, b); 3226 } 3227 3228 // CHECK-LABEL: @test_vshlq_s64( 3229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3230 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3231 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3232 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3233 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] 3234 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) { 3235 return vshlq_s64(a, b); 3236 } 3237 3238 // CHECK-LABEL: @test_vshlq_u8( 3239 // CHECK: [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8> %a, <16 x i8> %b) 3240 // CHECK: ret <16 x i8> [[VSHLQ_V_I]] 3241 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) { 3242 return vshlq_u8(a, b); 3243 } 3244 3245 // CHECK-LABEL: @test_vshlq_u16( 3246 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3247 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3248 // CHECK: [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %a, <8 x i16> %b) 3249 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8> 3250 // CHECK: ret <8 x i16> [[VSHLQ_V2_I]] 3251 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) { 3252 return vshlq_u16(a, b); 3253 } 3254 3255 // CHECK-LABEL: @test_vshlq_u32( 3256 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3257 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3258 // CHECK: [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %a, <4 x i32> %b) 3259 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8> 3260 // CHECK: ret <4 x i32> [[VSHLQ_V2_I]] 3261 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) { 3262 return vshlq_u32(a, b); 3263 } 3264 3265 // CHECK-LABEL: @test_vshlq_u64( 3266 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3267 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3268 // CHECK: [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %a, <2 x i64> %b) 3269 // CHECK: [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8> 3270 // CHECK: ret <2 x i64> [[VSHLQ_V2_I]] 3271 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) { 3272 return vshlq_u64(a, b); 3273 } 3274 3275 // CHECK-LABEL: @test_vqshl_s8( 3276 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3277 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 3278 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) { 3279 return vqshl_s8(a, b); 3280 } 3281 3282 // CHECK-LABEL: @test_vqshl_s16( 3283 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3284 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3285 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3286 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3287 // CHECK: ret <4 x i16> [[VQSHL_V2_I]] 3288 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) { 3289 return vqshl_s16(a, b); 3290 } 3291 3292 // CHECK-LABEL: @test_vqshl_s32( 3293 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3295 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3296 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3297 // CHECK: ret <2 x i32> [[VQSHL_V2_I]] 3298 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) { 3299 return vqshl_s32(a, b); 3300 } 3301 3302 // CHECK-LABEL: @test_vqshl_s64( 3303 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3304 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3305 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3306 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3307 // CHECK: ret <1 x i64> [[VQSHL_V2_I]] 3308 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) { 3309 return vqshl_s64(a, b); 3310 } 3311 3312 // CHECK-LABEL: @test_vqshl_u8( 3313 // CHECK: [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3314 // CHECK: ret <8 x i8> [[VQSHL_V_I]] 3315 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) { 3316 return vqshl_u8(a, b); 3317 } 3318 3319 // CHECK-LABEL: @test_vqshl_u16( 3320 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3321 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3322 // CHECK: [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3323 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8> 3324 // CHECK: ret <4 x i16> [[VQSHL_V2_I]] 3325 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) { 3326 return vqshl_u16(a, b); 3327 } 3328 3329 // CHECK-LABEL: @test_vqshl_u32( 3330 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3331 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3332 // CHECK: [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3333 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8> 3334 // CHECK: ret <2 x i32> [[VQSHL_V2_I]] 3335 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) { 3336 return vqshl_u32(a, b); 3337 } 3338 3339 // CHECK-LABEL: @test_vqshl_u64( 3340 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3341 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3342 // CHECK: [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3343 // CHECK: [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8> 3344 // CHECK: ret <1 x i64> [[VQSHL_V2_I]] 3345 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) { 3346 return vqshl_u64(a, b); 3347 } 3348 3349 // CHECK-LABEL: @test_vqshlq_s8( 3350 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3351 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3352 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) { 3353 return vqshlq_s8(a, b); 3354 } 3355 3356 // CHECK-LABEL: @test_vqshlq_s16( 3357 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3358 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3359 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3360 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3361 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] 3362 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) { 3363 return vqshlq_s16(a, b); 3364 } 3365 3366 // CHECK-LABEL: @test_vqshlq_s32( 3367 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3368 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3369 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3370 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3371 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] 3372 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) { 3373 return vqshlq_s32(a, b); 3374 } 3375 3376 // CHECK-LABEL: @test_vqshlq_s64( 3377 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3378 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3379 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3380 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3381 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] 3382 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) { 3383 return vqshlq_s64(a, b); 3384 } 3385 3386 // CHECK-LABEL: @test_vqshlq_u8( 3387 // CHECK: [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3388 // CHECK: ret <16 x i8> [[VQSHLQ_V_I]] 3389 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) { 3390 return vqshlq_u8(a, b); 3391 } 3392 3393 // CHECK-LABEL: @test_vqshlq_u16( 3394 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3395 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3396 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3397 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8> 3398 // CHECK: ret <8 x i16> [[VQSHLQ_V2_I]] 3399 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) { 3400 return vqshlq_u16(a, b); 3401 } 3402 3403 // CHECK-LABEL: @test_vqshlq_u32( 3404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3406 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3407 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8> 3408 // CHECK: ret <4 x i32> [[VQSHLQ_V2_I]] 3409 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) { 3410 return vqshlq_u32(a, b); 3411 } 3412 3413 // CHECK-LABEL: @test_vqshlq_u64( 3414 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3415 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3416 // CHECK: [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3417 // CHECK: [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8> 3418 // CHECK: ret <2 x i64> [[VQSHLQ_V2_I]] 3419 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) { 3420 return vqshlq_u64(a, b); 3421 } 3422 3423 // CHECK-LABEL: @test_vrshl_s8( 3424 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3425 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 3426 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) { 3427 return vrshl_s8(a, b); 3428 } 3429 3430 // CHECK-LABEL: @test_vrshl_s16( 3431 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3432 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3433 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3434 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3435 // CHECK: ret <4 x i16> [[VRSHL_V2_I]] 3436 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) { 3437 return vrshl_s16(a, b); 3438 } 3439 3440 // CHECK-LABEL: @test_vrshl_s32( 3441 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3442 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3443 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3444 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3445 // CHECK: ret <2 x i32> [[VRSHL_V2_I]] 3446 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) { 3447 return vrshl_s32(a, b); 3448 } 3449 3450 // CHECK-LABEL: @test_vrshl_s64( 3451 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3452 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3453 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3454 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3455 // CHECK: ret <1 x i64> [[VRSHL_V2_I]] 3456 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) { 3457 return vrshl_s64(a, b); 3458 } 3459 3460 // CHECK-LABEL: @test_vrshl_u8( 3461 // CHECK: [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3462 // CHECK: ret <8 x i8> [[VRSHL_V_I]] 3463 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) { 3464 return vrshl_u8(a, b); 3465 } 3466 3467 // CHECK-LABEL: @test_vrshl_u16( 3468 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3469 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3470 // CHECK: [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3471 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8> 3472 // CHECK: ret <4 x i16> [[VRSHL_V2_I]] 3473 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) { 3474 return vrshl_u16(a, b); 3475 } 3476 3477 // CHECK-LABEL: @test_vrshl_u32( 3478 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3479 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3480 // CHECK: [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3481 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8> 3482 // CHECK: ret <2 x i32> [[VRSHL_V2_I]] 3483 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) { 3484 return vrshl_u32(a, b); 3485 } 3486 3487 // CHECK-LABEL: @test_vrshl_u64( 3488 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3489 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3490 // CHECK: [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3491 // CHECK: [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8> 3492 // CHECK: ret <1 x i64> [[VRSHL_V2_I]] 3493 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) { 3494 return vrshl_u64(a, b); 3495 } 3496 3497 // CHECK-LABEL: @test_vrshlq_s8( 3498 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3499 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3500 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) { 3501 return vrshlq_s8(a, b); 3502 } 3503 3504 // CHECK-LABEL: @test_vrshlq_s16( 3505 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3506 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3507 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3508 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3509 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] 3510 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) { 3511 return vrshlq_s16(a, b); 3512 } 3513 3514 // CHECK-LABEL: @test_vrshlq_s32( 3515 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3516 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3517 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3518 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3519 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] 3520 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) { 3521 return vrshlq_s32(a, b); 3522 } 3523 3524 // CHECK-LABEL: @test_vrshlq_s64( 3525 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3526 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3527 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3528 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 3529 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] 3530 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) { 3531 return vrshlq_s64(a, b); 3532 } 3533 3534 // CHECK-LABEL: @test_vrshlq_u8( 3535 // CHECK: [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3536 // CHECK: ret <16 x i8> [[VRSHLQ_V_I]] 3537 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) { 3538 return vrshlq_u8(a, b); 3539 } 3540 3541 // CHECK-LABEL: @test_vrshlq_u16( 3542 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3543 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3544 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3545 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8> 3546 // CHECK: ret <8 x i16> [[VRSHLQ_V2_I]] 3547 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) { 3548 return vrshlq_u16(a, b); 3549 } 3550 3551 // CHECK-LABEL: @test_vrshlq_u32( 3552 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3553 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3554 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3555 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8> 3556 // CHECK: ret <4 x i32> [[VRSHLQ_V2_I]] 3557 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) { 3558 return vrshlq_u32(a, b); 3559 } 3560 3561 // CHECK-LABEL: @test_vrshlq_u64( 3562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3564 // CHECK: [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3565 // CHECK: [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8> 3566 // CHECK: ret <2 x i64> [[VRSHLQ_V2_I]] 3567 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) { 3568 return vrshlq_u64(a, b); 3569 } 3570 3571 // CHECK-LABEL: @test_vqrshl_s8( 3572 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3573 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 3574 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) { 3575 return vqrshl_s8(a, b); 3576 } 3577 3578 // CHECK-LABEL: @test_vqrshl_s16( 3579 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3580 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3581 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3582 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 3583 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] 3584 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) { 3585 return vqrshl_s16(a, b); 3586 } 3587 3588 // CHECK-LABEL: @test_vqrshl_s32( 3589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3590 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3591 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3592 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 3593 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] 3594 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) { 3595 return vqrshl_s32(a, b); 3596 } 3597 3598 // CHECK-LABEL: @test_vqrshl_s64( 3599 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3600 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3601 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3602 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 3603 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] 3604 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) { 3605 return vqrshl_s64(a, b); 3606 } 3607 3608 // CHECK-LABEL: @test_vqrshl_u8( 3609 // CHECK: [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %a, <8 x i8> %b) 3610 // CHECK: ret <8 x i8> [[VQRSHL_V_I]] 3611 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) { 3612 return vqrshl_u8(a, b); 3613 } 3614 3615 // CHECK-LABEL: @test_vqrshl_u16( 3616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3617 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3618 // CHECK: [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %a, <4 x i16> %b) 3619 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8> 3620 // CHECK: ret <4 x i16> [[VQRSHL_V2_I]] 3621 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) { 3622 return vqrshl_u16(a, b); 3623 } 3624 3625 // CHECK-LABEL: @test_vqrshl_u32( 3626 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3628 // CHECK: [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %a, <2 x i32> %b) 3629 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8> 3630 // CHECK: ret <2 x i32> [[VQRSHL_V2_I]] 3631 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) { 3632 return vqrshl_u32(a, b); 3633 } 3634 3635 // CHECK-LABEL: @test_vqrshl_u64( 3636 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3637 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3638 // CHECK: [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %a, <1 x i64> %b) 3639 // CHECK: [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8> 3640 // CHECK: ret <1 x i64> [[VQRSHL_V2_I]] 3641 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) { 3642 return vqrshl_u64(a, b); 3643 } 3644 3645 // CHECK-LABEL: @test_vqrshlq_s8( 3646 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3647 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 3648 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) { 3649 return vqrshlq_s8(a, b); 3650 } 3651 3652 // CHECK-LABEL: @test_vqrshlq_s16( 3653 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3654 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3655 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3656 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 3657 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] 3658 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) { 3659 return vqrshlq_s16(a, b); 3660 } 3661 3662 // CHECK-LABEL: @test_vqrshlq_s32( 3663 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3664 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3665 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3666 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 3667 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] 3668 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) { 3669 return vqrshlq_s32(a, b); 3670 } 3671 3672 // CHECK-LABEL: @test_vqrshlq_s64( 3673 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3674 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3675 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3676 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 3677 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] 3678 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) { 3679 return vqrshlq_s64(a, b); 3680 } 3681 3682 // CHECK-LABEL: @test_vqrshlq_u8( 3683 // CHECK: [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %a, <16 x i8> %b) 3684 // CHECK: ret <16 x i8> [[VQRSHLQ_V_I]] 3685 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) { 3686 return vqrshlq_u8(a, b); 3687 } 3688 3689 // CHECK-LABEL: @test_vqrshlq_u16( 3690 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3691 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3692 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %a, <8 x i16> %b) 3693 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8> 3694 // CHECK: ret <8 x i16> [[VQRSHLQ_V2_I]] 3695 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) { 3696 return vqrshlq_u16(a, b); 3697 } 3698 3699 // CHECK-LABEL: @test_vqrshlq_u32( 3700 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3701 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3702 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %a, <4 x i32> %b) 3703 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8> 3704 // CHECK: ret <4 x i32> [[VQRSHLQ_V2_I]] 3705 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) { 3706 return vqrshlq_u32(a, b); 3707 } 3708 3709 // CHECK-LABEL: @test_vqrshlq_u64( 3710 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3711 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3712 // CHECK: [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %a, <2 x i64> %b) 3713 // CHECK: [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8> 3714 // CHECK: ret <2 x i64> [[VQRSHLQ_V2_I]] 3715 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) { 3716 return vqrshlq_u64(a, b); 3717 } 3718 3719 // CHECK-LABEL: @test_vsli_n_p64( 3720 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 3721 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 3722 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 3723 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 3724 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 0) 3725 // CHECK: ret <1 x i64> [[VSLI_N2]] 3726 poly64x1_t test_vsli_n_p64(poly64x1_t a, poly64x1_t b) { 3727 return vsli_n_p64(a, b, 0); 3728 } 3729 3730 // CHECK-LABEL: @test_vsliq_n_p64( 3731 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 3732 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 3733 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 3734 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 3735 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 0) 3736 // CHECK: ret <2 x i64> [[VSLI_N2]] 3737 poly64x2_t test_vsliq_n_p64(poly64x2_t a, poly64x2_t b) { 3738 return vsliq_n_p64(a, b, 0); 3739 } 3740 3741 // CHECK-LABEL: @test_vmax_s8( 3742 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smax.v8i8(<8 x i8> %a, <8 x i8> %b) 3743 // CHECK: ret <8 x i8> [[VMAX_I]] 3744 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) { 3745 return vmax_s8(a, b); 3746 } 3747 3748 // CHECK-LABEL: @test_vmax_s16( 3749 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3750 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3751 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smax.v4i16(<4 x i16> %a, <4 x i16> %b) 3752 // CHECK: ret <4 x i16> [[VMAX2_I]] 3753 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) { 3754 return vmax_s16(a, b); 3755 } 3756 3757 // CHECK-LABEL: @test_vmax_s32( 3758 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3759 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3760 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smax.v2i32(<2 x i32> %a, <2 x i32> %b) 3761 // CHECK: ret <2 x i32> [[VMAX2_I]] 3762 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) { 3763 return vmax_s32(a, b); 3764 } 3765 3766 // CHECK-LABEL: @test_vmax_u8( 3767 // CHECK: [[VMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umax.v8i8(<8 x i8> %a, <8 x i8> %b) 3768 // CHECK: ret <8 x i8> [[VMAX_I]] 3769 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) { 3770 return vmax_u8(a, b); 3771 } 3772 3773 // CHECK-LABEL: @test_vmax_u16( 3774 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3775 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3776 // CHECK: [[VMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umax.v4i16(<4 x i16> %a, <4 x i16> %b) 3777 // CHECK: ret <4 x i16> [[VMAX2_I]] 3778 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) { 3779 return vmax_u16(a, b); 3780 } 3781 3782 // CHECK-LABEL: @test_vmax_u32( 3783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3784 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3785 // CHECK: [[VMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umax.v2i32(<2 x i32> %a, <2 x i32> %b) 3786 // CHECK: ret <2 x i32> [[VMAX2_I]] 3787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) { 3788 return vmax_u32(a, b); 3789 } 3790 3791 // CHECK-LABEL: @test_vmax_f32( 3792 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3793 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3794 // CHECK: [[VMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmax.v2f32(<2 x float> %a, <2 x float> %b) 3795 // CHECK: ret <2 x float> [[VMAX2_I]] 3796 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) { 3797 return vmax_f32(a, b); 3798 } 3799 3800 // CHECK-LABEL: @test_vmaxq_s8( 3801 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smax.v16i8(<16 x i8> %a, <16 x i8> %b) 3802 // CHECK: ret <16 x i8> [[VMAX_I]] 3803 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) { 3804 return vmaxq_s8(a, b); 3805 } 3806 3807 // CHECK-LABEL: @test_vmaxq_s16( 3808 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3809 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3810 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smax.v8i16(<8 x i16> %a, <8 x i16> %b) 3811 // CHECK: ret <8 x i16> [[VMAX2_I]] 3812 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) { 3813 return vmaxq_s16(a, b); 3814 } 3815 3816 // CHECK-LABEL: @test_vmaxq_s32( 3817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3819 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smax.v4i32(<4 x i32> %a, <4 x i32> %b) 3820 // CHECK: ret <4 x i32> [[VMAX2_I]] 3821 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) { 3822 return vmaxq_s32(a, b); 3823 } 3824 3825 // CHECK-LABEL: @test_vmaxq_u8( 3826 // CHECK: [[VMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umax.v16i8(<16 x i8> %a, <16 x i8> %b) 3827 // CHECK: ret <16 x i8> [[VMAX_I]] 3828 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) { 3829 return vmaxq_u8(a, b); 3830 } 3831 3832 // CHECK-LABEL: @test_vmaxq_u16( 3833 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3834 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3835 // CHECK: [[VMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umax.v8i16(<8 x i16> %a, <8 x i16> %b) 3836 // CHECK: ret <8 x i16> [[VMAX2_I]] 3837 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) { 3838 return vmaxq_u16(a, b); 3839 } 3840 3841 // CHECK-LABEL: @test_vmaxq_u32( 3842 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3843 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3844 // CHECK: [[VMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umax.v4i32(<4 x i32> %a, <4 x i32> %b) 3845 // CHECK: ret <4 x i32> [[VMAX2_I]] 3846 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) { 3847 return vmaxq_u32(a, b); 3848 } 3849 3850 // CHECK-LABEL: @test_vmaxq_f32( 3851 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3852 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3853 // CHECK: [[VMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmax.v4f32(<4 x float> %a, <4 x float> %b) 3854 // CHECK: ret <4 x float> [[VMAX2_I]] 3855 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) { 3856 return vmaxq_f32(a, b); 3857 } 3858 3859 // CHECK-LABEL: @test_vmaxq_f64( 3860 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 3861 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 3862 // CHECK: [[VMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmax.v2f64(<2 x double> %a, <2 x double> %b) 3863 // CHECK: ret <2 x double> [[VMAX2_I]] 3864 float64x2_t test_vmaxq_f64(float64x2_t a, float64x2_t b) { 3865 return vmaxq_f64(a, b); 3866 } 3867 3868 // CHECK-LABEL: @test_vmin_s8( 3869 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smin.v8i8(<8 x i8> %a, <8 x i8> %b) 3870 // CHECK: ret <8 x i8> [[VMIN_I]] 3871 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) { 3872 return vmin_s8(a, b); 3873 } 3874 3875 // CHECK-LABEL: @test_vmin_s16( 3876 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3877 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3878 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smin.v4i16(<4 x i16> %a, <4 x i16> %b) 3879 // CHECK: ret <4 x i16> [[VMIN2_I]] 3880 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) { 3881 return vmin_s16(a, b); 3882 } 3883 3884 // CHECK-LABEL: @test_vmin_s32( 3885 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3886 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3887 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smin.v2i32(<2 x i32> %a, <2 x i32> %b) 3888 // CHECK: ret <2 x i32> [[VMIN2_I]] 3889 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) { 3890 return vmin_s32(a, b); 3891 } 3892 3893 // CHECK-LABEL: @test_vmin_u8( 3894 // CHECK: [[VMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umin.v8i8(<8 x i8> %a, <8 x i8> %b) 3895 // CHECK: ret <8 x i8> [[VMIN_I]] 3896 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) { 3897 return vmin_u8(a, b); 3898 } 3899 3900 // CHECK-LABEL: @test_vmin_u16( 3901 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 3902 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 3903 // CHECK: [[VMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umin.v4i16(<4 x i16> %a, <4 x i16> %b) 3904 // CHECK: ret <4 x i16> [[VMIN2_I]] 3905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) { 3906 return vmin_u16(a, b); 3907 } 3908 3909 // CHECK-LABEL: @test_vmin_u32( 3910 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 3911 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 3912 // CHECK: [[VMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umin.v2i32(<2 x i32> %a, <2 x i32> %b) 3913 // CHECK: ret <2 x i32> [[VMIN2_I]] 3914 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) { 3915 return vmin_u32(a, b); 3916 } 3917 3918 // CHECK-LABEL: @test_vmin_f32( 3919 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3920 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3921 // CHECK: [[VMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmin.v2f32(<2 x float> %a, <2 x float> %b) 3922 // CHECK: ret <2 x float> [[VMIN2_I]] 3923 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) { 3924 return vmin_f32(a, b); 3925 } 3926 3927 // CHECK-LABEL: @test_vminq_s8( 3928 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smin.v16i8(<16 x i8> %a, <16 x i8> %b) 3929 // CHECK: ret <16 x i8> [[VMIN_I]] 3930 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) { 3931 return vminq_s8(a, b); 3932 } 3933 3934 // CHECK-LABEL: @test_vminq_s16( 3935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3936 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3937 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smin.v8i16(<8 x i16> %a, <8 x i16> %b) 3938 // CHECK: ret <8 x i16> [[VMIN2_I]] 3939 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) { 3940 return vminq_s16(a, b); 3941 } 3942 3943 // CHECK-LABEL: @test_vminq_s32( 3944 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3945 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3946 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smin.v4i32(<4 x i32> %a, <4 x i32> %b) 3947 // CHECK: ret <4 x i32> [[VMIN2_I]] 3948 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) { 3949 return vminq_s32(a, b); 3950 } 3951 3952 // CHECK-LABEL: @test_vminq_u8( 3953 // CHECK: [[VMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umin.v16i8(<16 x i8> %a, <16 x i8> %b) 3954 // CHECK: ret <16 x i8> [[VMIN_I]] 3955 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) { 3956 return vminq_u8(a, b); 3957 } 3958 3959 // CHECK-LABEL: @test_vminq_u16( 3960 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 3961 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 3962 // CHECK: [[VMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umin.v8i16(<8 x i16> %a, <8 x i16> %b) 3963 // CHECK: ret <8 x i16> [[VMIN2_I]] 3964 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) { 3965 return vminq_u16(a, b); 3966 } 3967 3968 // CHECK-LABEL: @test_vminq_u32( 3969 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 3970 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 3971 // CHECK: [[VMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umin.v4i32(<4 x i32> %a, <4 x i32> %b) 3972 // CHECK: ret <4 x i32> [[VMIN2_I]] 3973 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) { 3974 return vminq_u32(a, b); 3975 } 3976 3977 // CHECK-LABEL: @test_vminq_f32( 3978 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 3979 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 3980 // CHECK: [[VMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmin.v4f32(<4 x float> %a, <4 x float> %b) 3981 // CHECK: ret <4 x float> [[VMIN2_I]] 3982 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) { 3983 return vminq_f32(a, b); 3984 } 3985 3986 // CHECK-LABEL: @test_vminq_f64( 3987 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 3988 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 3989 // CHECK: [[VMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmin.v2f64(<2 x double> %a, <2 x double> %b) 3990 // CHECK: ret <2 x double> [[VMIN2_I]] 3991 float64x2_t test_vminq_f64(float64x2_t a, float64x2_t b) { 3992 return vminq_f64(a, b); 3993 } 3994 3995 // CHECK-LABEL: @test_vmaxnm_f32( 3996 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 3997 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 3998 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnm.v2f32(<2 x float> %a, <2 x float> %b) 3999 // CHECK: ret <2 x float> [[VMAXNM2_I]] 4000 float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) { 4001 return vmaxnm_f32(a, b); 4002 } 4003 4004 // CHECK-LABEL: @test_vmaxnmq_f32( 4005 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4006 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4007 // CHECK: [[VMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnm.v4f32(<4 x float> %a, <4 x float> %b) 4008 // CHECK: ret <4 x float> [[VMAXNM2_I]] 4009 float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) { 4010 return vmaxnmq_f32(a, b); 4011 } 4012 4013 // CHECK-LABEL: @test_vmaxnmq_f64( 4014 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4015 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4016 // CHECK: [[VMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnm.v2f64(<2 x double> %a, <2 x double> %b) 4017 // CHECK: ret <2 x double> [[VMAXNM2_I]] 4018 float64x2_t test_vmaxnmq_f64(float64x2_t a, float64x2_t b) { 4019 return vmaxnmq_f64(a, b); 4020 } 4021 4022 // CHECK-LABEL: @test_vminnm_f32( 4023 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4024 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4025 // CHECK: [[VMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnm.v2f32(<2 x float> %a, <2 x float> %b) 4026 // CHECK: ret <2 x float> [[VMINNM2_I]] 4027 float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) { 4028 return vminnm_f32(a, b); 4029 } 4030 4031 // CHECK-LABEL: @test_vminnmq_f32( 4032 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4033 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4034 // CHECK: [[VMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnm.v4f32(<4 x float> %a, <4 x float> %b) 4035 // CHECK: ret <4 x float> [[VMINNM2_I]] 4036 float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) { 4037 return vminnmq_f32(a, b); 4038 } 4039 4040 // CHECK-LABEL: @test_vminnmq_f64( 4041 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4042 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4043 // CHECK: [[VMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnm.v2f64(<2 x double> %a, <2 x double> %b) 4044 // CHECK: ret <2 x double> [[VMINNM2_I]] 4045 float64x2_t test_vminnmq_f64(float64x2_t a, float64x2_t b) { 4046 return vminnmq_f64(a, b); 4047 } 4048 4049 // CHECK-LABEL: @test_vpmax_s8( 4050 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.smaxp.v8i8(<8 x i8> %a, <8 x i8> %b) 4051 // CHECK: ret <8 x i8> [[VPMAX_I]] 4052 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) { 4053 return vpmax_s8(a, b); 4054 } 4055 4056 // CHECK-LABEL: @test_vpmax_s16( 4057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4058 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4059 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.smaxp.v4i16(<4 x i16> %a, <4 x i16> %b) 4060 // CHECK: ret <4 x i16> [[VPMAX2_I]] 4061 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) { 4062 return vpmax_s16(a, b); 4063 } 4064 4065 // CHECK-LABEL: @test_vpmax_s32( 4066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4067 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4068 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.smaxp.v2i32(<2 x i32> %a, <2 x i32> %b) 4069 // CHECK: ret <2 x i32> [[VPMAX2_I]] 4070 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) { 4071 return vpmax_s32(a, b); 4072 } 4073 4074 // CHECK-LABEL: @test_vpmax_u8( 4075 // CHECK: [[VPMAX_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.umaxp.v8i8(<8 x i8> %a, <8 x i8> %b) 4076 // CHECK: ret <8 x i8> [[VPMAX_I]] 4077 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) { 4078 return vpmax_u8(a, b); 4079 } 4080 4081 // CHECK-LABEL: @test_vpmax_u16( 4082 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4083 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4084 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.umaxp.v4i16(<4 x i16> %a, <4 x i16> %b) 4085 // CHECK: ret <4 x i16> [[VPMAX2_I]] 4086 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) { 4087 return vpmax_u16(a, b); 4088 } 4089 4090 // CHECK-LABEL: @test_vpmax_u32( 4091 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4092 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4093 // CHECK: [[VPMAX2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.umaxp.v2i32(<2 x i32> %a, <2 x i32> %b) 4094 // CHECK: ret <2 x i32> [[VPMAX2_I]] 4095 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) { 4096 return vpmax_u32(a, b); 4097 } 4098 4099 // CHECK-LABEL: @test_vpmax_f32( 4100 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4101 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4102 // CHECK: [[VPMAX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxp.v2f32(<2 x float> %a, <2 x float> %b) 4103 // CHECK: ret <2 x float> [[VPMAX2_I]] 4104 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) { 4105 return vpmax_f32(a, b); 4106 } 4107 4108 // CHECK-LABEL: @test_vpmaxq_s8( 4109 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.smaxp.v16i8(<16 x i8> %a, <16 x i8> %b) 4110 // CHECK: ret <16 x i8> [[VPMAX_I]] 4111 int8x16_t test_vpmaxq_s8(int8x16_t a, int8x16_t b) { 4112 return vpmaxq_s8(a, b); 4113 } 4114 4115 // CHECK-LABEL: @test_vpmaxq_s16( 4116 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4117 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4118 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smaxp.v8i16(<8 x i16> %a, <8 x i16> %b) 4119 // CHECK: ret <8 x i16> [[VPMAX2_I]] 4120 int16x8_t test_vpmaxq_s16(int16x8_t a, int16x8_t b) { 4121 return vpmaxq_s16(a, b); 4122 } 4123 4124 // CHECK-LABEL: @test_vpmaxq_s32( 4125 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4126 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4127 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smaxp.v4i32(<4 x i32> %a, <4 x i32> %b) 4128 // CHECK: ret <4 x i32> [[VPMAX2_I]] 4129 int32x4_t test_vpmaxq_s32(int32x4_t a, int32x4_t b) { 4130 return vpmaxq_s32(a, b); 4131 } 4132 4133 // CHECK-LABEL: @test_vpmaxq_u8( 4134 // CHECK: [[VPMAX_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.umaxp.v16i8(<16 x i8> %a, <16 x i8> %b) 4135 // CHECK: ret <16 x i8> [[VPMAX_I]] 4136 uint8x16_t test_vpmaxq_u8(uint8x16_t a, uint8x16_t b) { 4137 return vpmaxq_u8(a, b); 4138 } 4139 4140 // CHECK-LABEL: @test_vpmaxq_u16( 4141 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4142 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4143 // CHECK: [[VPMAX2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umaxp.v8i16(<8 x i16> %a, <8 x i16> %b) 4144 // CHECK: ret <8 x i16> [[VPMAX2_I]] 4145 uint16x8_t test_vpmaxq_u16(uint16x8_t a, uint16x8_t b) { 4146 return vpmaxq_u16(a, b); 4147 } 4148 4149 // CHECK-LABEL: @test_vpmaxq_u32( 4150 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4151 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4152 // CHECK: [[VPMAX2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umaxp.v4i32(<4 x i32> %a, <4 x i32> %b) 4153 // CHECK: ret <4 x i32> [[VPMAX2_I]] 4154 uint32x4_t test_vpmaxq_u32(uint32x4_t a, uint32x4_t b) { 4155 return vpmaxq_u32(a, b); 4156 } 4157 4158 // CHECK-LABEL: @test_vpmaxq_f32( 4159 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4160 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4161 // CHECK: [[VPMAX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxp.v4f32(<4 x float> %a, <4 x float> %b) 4162 // CHECK: ret <4 x float> [[VPMAX2_I]] 4163 float32x4_t test_vpmaxq_f32(float32x4_t a, float32x4_t b) { 4164 return vpmaxq_f32(a, b); 4165 } 4166 4167 // CHECK-LABEL: @test_vpmaxq_f64( 4168 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4169 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4170 // CHECK: [[VPMAX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxp.v2f64(<2 x double> %a, <2 x double> %b) 4171 // CHECK: ret <2 x double> [[VPMAX2_I]] 4172 float64x2_t test_vpmaxq_f64(float64x2_t a, float64x2_t b) { 4173 return vpmaxq_f64(a, b); 4174 } 4175 4176 // CHECK-LABEL: @test_vpmin_s8( 4177 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sminp.v8i8(<8 x i8> %a, <8 x i8> %b) 4178 // CHECK: ret <8 x i8> [[VPMIN_I]] 4179 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) { 4180 return vpmin_s8(a, b); 4181 } 4182 4183 // CHECK-LABEL: @test_vpmin_s16( 4184 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4185 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4186 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sminp.v4i16(<4 x i16> %a, <4 x i16> %b) 4187 // CHECK: ret <4 x i16> [[VPMIN2_I]] 4188 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) { 4189 return vpmin_s16(a, b); 4190 } 4191 4192 // CHECK-LABEL: @test_vpmin_s32( 4193 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4194 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4195 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sminp.v2i32(<2 x i32> %a, <2 x i32> %b) 4196 // CHECK: ret <2 x i32> [[VPMIN2_I]] 4197 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) { 4198 return vpmin_s32(a, b); 4199 } 4200 4201 // CHECK-LABEL: @test_vpmin_u8( 4202 // CHECK: [[VPMIN_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uminp.v8i8(<8 x i8> %a, <8 x i8> %b) 4203 // CHECK: ret <8 x i8> [[VPMIN_I]] 4204 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) { 4205 return vpmin_u8(a, b); 4206 } 4207 4208 // CHECK-LABEL: @test_vpmin_u16( 4209 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4210 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4211 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uminp.v4i16(<4 x i16> %a, <4 x i16> %b) 4212 // CHECK: ret <4 x i16> [[VPMIN2_I]] 4213 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) { 4214 return vpmin_u16(a, b); 4215 } 4216 4217 // CHECK-LABEL: @test_vpmin_u32( 4218 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4219 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4220 // CHECK: [[VPMIN2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uminp.v2i32(<2 x i32> %a, <2 x i32> %b) 4221 // CHECK: ret <2 x i32> [[VPMIN2_I]] 4222 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) { 4223 return vpmin_u32(a, b); 4224 } 4225 4226 // CHECK-LABEL: @test_vpmin_f32( 4227 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4228 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4229 // CHECK: [[VPMIN2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminp.v2f32(<2 x float> %a, <2 x float> %b) 4230 // CHECK: ret <2 x float> [[VPMIN2_I]] 4231 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) { 4232 return vpmin_f32(a, b); 4233 } 4234 4235 // CHECK-LABEL: @test_vpminq_s8( 4236 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.sminp.v16i8(<16 x i8> %a, <16 x i8> %b) 4237 // CHECK: ret <16 x i8> [[VPMIN_I]] 4238 int8x16_t test_vpminq_s8(int8x16_t a, int8x16_t b) { 4239 return vpminq_s8(a, b); 4240 } 4241 4242 // CHECK-LABEL: @test_vpminq_s16( 4243 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4244 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4245 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sminp.v8i16(<8 x i16> %a, <8 x i16> %b) 4246 // CHECK: ret <8 x i16> [[VPMIN2_I]] 4247 int16x8_t test_vpminq_s16(int16x8_t a, int16x8_t b) { 4248 return vpminq_s16(a, b); 4249 } 4250 4251 // CHECK-LABEL: @test_vpminq_s32( 4252 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4253 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4254 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sminp.v4i32(<4 x i32> %a, <4 x i32> %b) 4255 // CHECK: ret <4 x i32> [[VPMIN2_I]] 4256 int32x4_t test_vpminq_s32(int32x4_t a, int32x4_t b) { 4257 return vpminq_s32(a, b); 4258 } 4259 4260 // CHECK-LABEL: @test_vpminq_u8( 4261 // CHECK: [[VPMIN_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.uminp.v16i8(<16 x i8> %a, <16 x i8> %b) 4262 // CHECK: ret <16 x i8> [[VPMIN_I]] 4263 uint8x16_t test_vpminq_u8(uint8x16_t a, uint8x16_t b) { 4264 return vpminq_u8(a, b); 4265 } 4266 4267 // CHECK-LABEL: @test_vpminq_u16( 4268 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4270 // CHECK: [[VPMIN2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.uminp.v8i16(<8 x i16> %a, <8 x i16> %b) 4271 // CHECK: ret <8 x i16> [[VPMIN2_I]] 4272 uint16x8_t test_vpminq_u16(uint16x8_t a, uint16x8_t b) { 4273 return vpminq_u16(a, b); 4274 } 4275 4276 // CHECK-LABEL: @test_vpminq_u32( 4277 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4278 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4279 // CHECK: [[VPMIN2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.uminp.v4i32(<4 x i32> %a, <4 x i32> %b) 4280 // CHECK: ret <4 x i32> [[VPMIN2_I]] 4281 uint32x4_t test_vpminq_u32(uint32x4_t a, uint32x4_t b) { 4282 return vpminq_u32(a, b); 4283 } 4284 4285 // CHECK-LABEL: @test_vpminq_f32( 4286 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4287 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4288 // CHECK: [[VPMIN2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminp.v4f32(<4 x float> %a, <4 x float> %b) 4289 // CHECK: ret <4 x float> [[VPMIN2_I]] 4290 float32x4_t test_vpminq_f32(float32x4_t a, float32x4_t b) { 4291 return vpminq_f32(a, b); 4292 } 4293 4294 // CHECK-LABEL: @test_vpminq_f64( 4295 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4296 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4297 // CHECK: [[VPMIN2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminp.v2f64(<2 x double> %a, <2 x double> %b) 4298 // CHECK: ret <2 x double> [[VPMIN2_I]] 4299 float64x2_t test_vpminq_f64(float64x2_t a, float64x2_t b) { 4300 return vpminq_f64(a, b); 4301 } 4302 4303 // CHECK-LABEL: @test_vpmaxnm_f32( 4304 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4305 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4306 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmaxnmp.v2f32(<2 x float> %a, <2 x float> %b) 4307 // CHECK: ret <2 x float> [[VPMAXNM2_I]] 4308 float32x2_t test_vpmaxnm_f32(float32x2_t a, float32x2_t b) { 4309 return vpmaxnm_f32(a, b); 4310 } 4311 4312 // CHECK-LABEL: @test_vpmaxnmq_f32( 4313 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4314 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4315 // CHECK: [[VPMAXNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmaxnmp.v4f32(<4 x float> %a, <4 x float> %b) 4316 // CHECK: ret <4 x float> [[VPMAXNM2_I]] 4317 float32x4_t test_vpmaxnmq_f32(float32x4_t a, float32x4_t b) { 4318 return vpmaxnmq_f32(a, b); 4319 } 4320 4321 // CHECK-LABEL: @test_vpmaxnmq_f64( 4322 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4323 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4324 // CHECK: [[VPMAXNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmaxnmp.v2f64(<2 x double> %a, <2 x double> %b) 4325 // CHECK: ret <2 x double> [[VPMAXNM2_I]] 4326 float64x2_t test_vpmaxnmq_f64(float64x2_t a, float64x2_t b) { 4327 return vpmaxnmq_f64(a, b); 4328 } 4329 4330 // CHECK-LABEL: @test_vpminnm_f32( 4331 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4332 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4333 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fminnmp.v2f32(<2 x float> %a, <2 x float> %b) 4334 // CHECK: ret <2 x float> [[VPMINNM2_I]] 4335 float32x2_t test_vpminnm_f32(float32x2_t a, float32x2_t b) { 4336 return vpminnm_f32(a, b); 4337 } 4338 4339 // CHECK-LABEL: @test_vpminnmq_f32( 4340 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4341 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4342 // CHECK: [[VPMINNM2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fminnmp.v4f32(<4 x float> %a, <4 x float> %b) 4343 // CHECK: ret <4 x float> [[VPMINNM2_I]] 4344 float32x4_t test_vpminnmq_f32(float32x4_t a, float32x4_t b) { 4345 return vpminnmq_f32(a, b); 4346 } 4347 4348 // CHECK-LABEL: @test_vpminnmq_f64( 4349 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4350 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4351 // CHECK: [[VPMINNM2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fminnmp.v2f64(<2 x double> %a, <2 x double> %b) 4352 // CHECK: ret <2 x double> [[VPMINNM2_I]] 4353 float64x2_t test_vpminnmq_f64(float64x2_t a, float64x2_t b) { 4354 return vpminnmq_f64(a, b); 4355 } 4356 4357 // CHECK-LABEL: @test_vpadd_s8( 4358 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) 4359 // CHECK: ret <8 x i8> [[VPADD_V_I]] 4360 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) { 4361 return vpadd_s8(a, b); 4362 } 4363 4364 // CHECK-LABEL: @test_vpadd_s16( 4365 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4366 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4367 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) 4368 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4369 // CHECK: ret <4 x i16> [[VPADD_V2_I]] 4370 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) { 4371 return vpadd_s16(a, b); 4372 } 4373 4374 // CHECK-LABEL: @test_vpadd_s32( 4375 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4376 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4377 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) 4378 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 4379 // CHECK: ret <2 x i32> [[VPADD_V2_I]] 4380 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) { 4381 return vpadd_s32(a, b); 4382 } 4383 4384 // CHECK-LABEL: @test_vpadd_u8( 4385 // CHECK: [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.addp.v8i8(<8 x i8> %a, <8 x i8> %b) 4386 // CHECK: ret <8 x i8> [[VPADD_V_I]] 4387 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) { 4388 return vpadd_u8(a, b); 4389 } 4390 4391 // CHECK-LABEL: @test_vpadd_u16( 4392 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4393 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4394 // CHECK: [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) 4395 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8> 4396 // CHECK: ret <4 x i16> [[VPADD_V2_I]] 4397 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) { 4398 return vpadd_u16(a, b); 4399 } 4400 4401 // CHECK-LABEL: @test_vpadd_u32( 4402 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4403 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4404 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.addp.v2i32(<2 x i32> %a, <2 x i32> %b) 4405 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8> 4406 // CHECK: ret <2 x i32> [[VPADD_V2_I]] 4407 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) { 4408 return vpadd_u32(a, b); 4409 } 4410 4411 // CHECK-LABEL: @test_vpadd_f32( 4412 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4413 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4414 // CHECK: [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %a, <2 x float> %b) 4415 // CHECK: [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8> 4416 // CHECK: ret <2 x float> [[VPADD_V2_I]] 4417 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) { 4418 return vpadd_f32(a, b); 4419 } 4420 4421 // CHECK-LABEL: @test_vpaddq_s8( 4422 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) 4423 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] 4424 int8x16_t test_vpaddq_s8(int8x16_t a, int8x16_t b) { 4425 return vpaddq_s8(a, b); 4426 } 4427 4428 // CHECK-LABEL: @test_vpaddq_s16( 4429 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4430 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4431 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) 4432 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 4433 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]] 4434 int16x8_t test_vpaddq_s16(int16x8_t a, int16x8_t b) { 4435 return vpaddq_s16(a, b); 4436 } 4437 4438 // CHECK-LABEL: @test_vpaddq_s32( 4439 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4440 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4441 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) 4442 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 4443 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]] 4444 int32x4_t test_vpaddq_s32(int32x4_t a, int32x4_t b) { 4445 return vpaddq_s32(a, b); 4446 } 4447 4448 // CHECK-LABEL: @test_vpaddq_u8( 4449 // CHECK: [[VPADDQ_V_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.addp.v16i8(<16 x i8> %a, <16 x i8> %b) 4450 // CHECK: ret <16 x i8> [[VPADDQ_V_I]] 4451 uint8x16_t test_vpaddq_u8(uint8x16_t a, uint8x16_t b) { 4452 return vpaddq_u8(a, b); 4453 } 4454 4455 // CHECK-LABEL: @test_vpaddq_u16( 4456 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4457 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4458 // CHECK: [[VPADDQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.addp.v8i16(<8 x i16> %a, <8 x i16> %b) 4459 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VPADDQ_V2_I]] to <16 x i8> 4460 // CHECK: ret <8 x i16> [[VPADDQ_V2_I]] 4461 uint16x8_t test_vpaddq_u16(uint16x8_t a, uint16x8_t b) { 4462 return vpaddq_u16(a, b); 4463 } 4464 4465 // CHECK-LABEL: @test_vpaddq_u32( 4466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4467 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4468 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.addp.v4i32(<4 x i32> %a, <4 x i32> %b) 4469 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VPADDQ_V2_I]] to <16 x i8> 4470 // CHECK: ret <4 x i32> [[VPADDQ_V2_I]] 4471 uint32x4_t test_vpaddq_u32(uint32x4_t a, uint32x4_t b) { 4472 return vpaddq_u32(a, b); 4473 } 4474 4475 // CHECK-LABEL: @test_vpaddq_f32( 4476 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4477 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4478 // CHECK: [[VPADDQ_V2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.faddp.v4f32(<4 x float> %a, <4 x float> %b) 4479 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <4 x float> [[VPADDQ_V2_I]] to <16 x i8> 4480 // CHECK: ret <4 x float> [[VPADDQ_V2_I]] 4481 float32x4_t test_vpaddq_f32(float32x4_t a, float32x4_t b) { 4482 return vpaddq_f32(a, b); 4483 } 4484 4485 // CHECK-LABEL: @test_vpaddq_f64( 4486 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4487 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4488 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.faddp.v2f64(<2 x double> %a, <2 x double> %b) 4489 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x double> [[VPADDQ_V2_I]] to <16 x i8> 4490 // CHECK: ret <2 x double> [[VPADDQ_V2_I]] 4491 float64x2_t test_vpaddq_f64(float64x2_t a, float64x2_t b) { 4492 return vpaddq_f64(a, b); 4493 } 4494 4495 // CHECK-LABEL: @test_vqdmulh_s16( 4496 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4497 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4498 // CHECK: [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) 4499 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8> 4500 // CHECK: ret <4 x i16> [[VQDMULH_V2_I]] 4501 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) { 4502 return vqdmulh_s16(a, b); 4503 } 4504 4505 // CHECK-LABEL: @test_vqdmulh_s32( 4506 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4507 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4508 // CHECK: [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) 4509 // CHECK: [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8> 4510 // CHECK: ret <2 x i32> [[VQDMULH_V2_I]] 4511 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) { 4512 return vqdmulh_s32(a, b); 4513 } 4514 4515 // CHECK-LABEL: @test_vqdmulhq_s16( 4516 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4517 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4518 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) 4519 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8> 4520 // CHECK: ret <8 x i16> [[VQDMULHQ_V2_I]] 4521 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) { 4522 return vqdmulhq_s16(a, b); 4523 } 4524 4525 // CHECK-LABEL: @test_vqdmulhq_s32( 4526 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4527 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4528 // CHECK: [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) 4529 // CHECK: [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8> 4530 // CHECK: ret <4 x i32> [[VQDMULHQ_V2_I]] 4531 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) { 4532 return vqdmulhq_s32(a, b); 4533 } 4534 4535 // CHECK-LABEL: @test_vqrdmulh_s16( 4536 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4537 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4538 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b) 4539 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8> 4540 // CHECK: ret <4 x i16> [[VQRDMULH_V2_I]] 4541 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) { 4542 return vqrdmulh_s16(a, b); 4543 } 4544 4545 // CHECK-LABEL: @test_vqrdmulh_s32( 4546 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4547 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4548 // CHECK: [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b) 4549 // CHECK: [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8> 4550 // CHECK: ret <2 x i32> [[VQRDMULH_V2_I]] 4551 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) { 4552 return vqrdmulh_s32(a, b); 4553 } 4554 4555 // CHECK-LABEL: @test_vqrdmulhq_s16( 4556 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4557 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4558 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b) 4559 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8> 4560 // CHECK: ret <8 x i16> [[VQRDMULHQ_V2_I]] 4561 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) { 4562 return vqrdmulhq_s16(a, b); 4563 } 4564 4565 // CHECK-LABEL: @test_vqrdmulhq_s32( 4566 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4567 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4568 // CHECK: [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b) 4569 // CHECK: [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8> 4570 // CHECK: ret <4 x i32> [[VQRDMULHQ_V2_I]] 4571 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) { 4572 return vqrdmulhq_s32(a, b); 4573 } 4574 4575 // CHECK-LABEL: @test_vmulx_f32( 4576 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 4577 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 4578 // CHECK: [[VMULX2_I:%.*]] = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %b) 4579 // CHECK: ret <2 x float> [[VMULX2_I]] 4580 float32x2_t test_vmulx_f32(float32x2_t a, float32x2_t b) { 4581 return vmulx_f32(a, b); 4582 } 4583 4584 // CHECK-LABEL: @test_vmulxq_f32( 4585 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 4586 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 4587 // CHECK: [[VMULX2_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %b) 4588 // CHECK: ret <4 x float> [[VMULX2_I]] 4589 float32x4_t test_vmulxq_f32(float32x4_t a, float32x4_t b) { 4590 return vmulxq_f32(a, b); 4591 } 4592 4593 // CHECK-LABEL: @test_vmulxq_f64( 4594 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 4595 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 4596 // CHECK: [[VMULX2_I:%.*]] = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %b) 4597 // CHECK: ret <2 x double> [[VMULX2_I]] 4598 float64x2_t test_vmulxq_f64(float64x2_t a, float64x2_t b) { 4599 return vmulxq_f64(a, b); 4600 } 4601 4602 // CHECK-LABEL: @test_vshl_n_s8( 4603 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) 4604 // CHECK: ret <8 x i8> [[VSHL_N]] 4605 int8x8_t test_vshl_n_s8(int8x8_t a) { 4606 return vshl_n_s8(a, 3); 4607 } 4608 4609 // CHECK-LABEL: @test_vshl_n_s16( 4610 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4611 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4612 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) 4613 // CHECK: ret <4 x i16> [[VSHL_N]] 4614 int16x4_t test_vshl_n_s16(int16x4_t a) { 4615 return vshl_n_s16(a, 3); 4616 } 4617 4618 // CHECK-LABEL: @test_vshl_n_s32( 4619 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4620 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4621 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) 4622 // CHECK: ret <2 x i32> [[VSHL_N]] 4623 int32x2_t test_vshl_n_s32(int32x2_t a) { 4624 return vshl_n_s32(a, 3); 4625 } 4626 4627 // CHECK-LABEL: @test_vshlq_n_s8( 4628 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) 4629 // CHECK: ret <16 x i8> [[VSHL_N]] 4630 int8x16_t test_vshlq_n_s8(int8x16_t a) { 4631 return vshlq_n_s8(a, 3); 4632 } 4633 4634 // CHECK-LABEL: @test_vshlq_n_s16( 4635 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4636 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4637 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) 4638 // CHECK: ret <8 x i16> [[VSHL_N]] 4639 int16x8_t test_vshlq_n_s16(int16x8_t a) { 4640 return vshlq_n_s16(a, 3); 4641 } 4642 4643 // CHECK-LABEL: @test_vshlq_n_s32( 4644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4645 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4646 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) 4647 // CHECK: ret <4 x i32> [[VSHL_N]] 4648 int32x4_t test_vshlq_n_s32(int32x4_t a) { 4649 return vshlq_n_s32(a, 3); 4650 } 4651 4652 // CHECK-LABEL: @test_vshlq_n_s64( 4653 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4654 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4655 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) 4656 // CHECK: ret <2 x i64> [[VSHL_N]] 4657 int64x2_t test_vshlq_n_s64(int64x2_t a) { 4658 return vshlq_n_s64(a, 3); 4659 } 4660 4661 // CHECK-LABEL: @test_vshl_n_u8( 4662 // CHECK: [[VSHL_N:%.*]] = shl <8 x i8> %a, splat (i8 3) 4663 // CHECK: ret <8 x i8> [[VSHL_N]] 4664 uint8x8_t test_vshl_n_u8(uint8x8_t a) { 4665 return vshl_n_u8(a, 3); 4666 } 4667 4668 // CHECK-LABEL: @test_vshl_n_u16( 4669 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4670 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4671 // CHECK: [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], splat (i16 3) 4672 // CHECK: ret <4 x i16> [[VSHL_N]] 4673 uint16x4_t test_vshl_n_u16(uint16x4_t a) { 4674 return vshl_n_u16(a, 3); 4675 } 4676 4677 // CHECK-LABEL: @test_vshl_n_u32( 4678 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4679 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4680 // CHECK: [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 3) 4681 // CHECK: ret <2 x i32> [[VSHL_N]] 4682 uint32x2_t test_vshl_n_u32(uint32x2_t a) { 4683 return vshl_n_u32(a, 3); 4684 } 4685 4686 // CHECK-LABEL: @test_vshlq_n_u8( 4687 // CHECK: [[VSHL_N:%.*]] = shl <16 x i8> %a, splat (i8 3) 4688 // CHECK: ret <16 x i8> [[VSHL_N]] 4689 uint8x16_t test_vshlq_n_u8(uint8x16_t a) { 4690 return vshlq_n_u8(a, 3); 4691 } 4692 4693 // CHECK-LABEL: @test_vshlq_n_u16( 4694 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4695 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4696 // CHECK: [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], splat (i16 3) 4697 // CHECK: ret <8 x i16> [[VSHL_N]] 4698 uint16x8_t test_vshlq_n_u16(uint16x8_t a) { 4699 return vshlq_n_u16(a, 3); 4700 } 4701 4702 // CHECK-LABEL: @test_vshlq_n_u32( 4703 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4704 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4705 // CHECK: [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], splat (i32 3) 4706 // CHECK: ret <4 x i32> [[VSHL_N]] 4707 uint32x4_t test_vshlq_n_u32(uint32x4_t a) { 4708 return vshlq_n_u32(a, 3); 4709 } 4710 4711 // CHECK-LABEL: @test_vshlq_n_u64( 4712 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4713 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4714 // CHECK: [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], splat (i64 3) 4715 // CHECK: ret <2 x i64> [[VSHL_N]] 4716 uint64x2_t test_vshlq_n_u64(uint64x2_t a) { 4717 return vshlq_n_u64(a, 3); 4718 } 4719 4720 // CHECK-LABEL: @test_vshr_n_s8( 4721 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i8> %a, splat (i8 3) 4722 // CHECK: ret <8 x i8> [[VSHR_N]] 4723 int8x8_t test_vshr_n_s8(int8x8_t a) { 4724 return vshr_n_s8(a, 3); 4725 } 4726 4727 // CHECK-LABEL: @test_vshr_n_s16( 4728 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4729 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4730 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], splat (i16 3) 4731 // CHECK: ret <4 x i16> [[VSHR_N]] 4732 int16x4_t test_vshr_n_s16(int16x4_t a) { 4733 return vshr_n_s16(a, 3); 4734 } 4735 4736 // CHECK-LABEL: @test_vshr_n_s32( 4737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4738 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4739 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], splat (i32 3) 4740 // CHECK: ret <2 x i32> [[VSHR_N]] 4741 int32x2_t test_vshr_n_s32(int32x2_t a) { 4742 return vshr_n_s32(a, 3); 4743 } 4744 4745 // CHECK-LABEL: @test_vshrq_n_s8( 4746 // CHECK: [[VSHR_N:%.*]] = ashr <16 x i8> %a, splat (i8 3) 4747 // CHECK: ret <16 x i8> [[VSHR_N]] 4748 int8x16_t test_vshrq_n_s8(int8x16_t a) { 4749 return vshrq_n_s8(a, 3); 4750 } 4751 4752 // CHECK-LABEL: @test_vshrq_n_s16( 4753 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4754 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4755 // CHECK: [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) 4756 // CHECK: ret <8 x i16> [[VSHR_N]] 4757 int16x8_t test_vshrq_n_s16(int16x8_t a) { 4758 return vshrq_n_s16(a, 3); 4759 } 4760 4761 // CHECK-LABEL: @test_vshrq_n_s32( 4762 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4763 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4764 // CHECK: [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 3) 4765 // CHECK: ret <4 x i32> [[VSHR_N]] 4766 int32x4_t test_vshrq_n_s32(int32x4_t a) { 4767 return vshrq_n_s32(a, 3); 4768 } 4769 4770 // CHECK-LABEL: @test_vshrq_n_s64( 4771 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4772 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4773 // CHECK: [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 3) 4774 // CHECK: ret <2 x i64> [[VSHR_N]] 4775 int64x2_t test_vshrq_n_s64(int64x2_t a) { 4776 return vshrq_n_s64(a, 3); 4777 } 4778 4779 // CHECK-LABEL: @test_vshr_n_u8( 4780 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i8> %a, splat (i8 3) 4781 // CHECK: ret <8 x i8> [[VSHR_N]] 4782 uint8x8_t test_vshr_n_u8(uint8x8_t a) { 4783 return vshr_n_u8(a, 3); 4784 } 4785 4786 // CHECK-LABEL: @test_vshr_n_u16( 4787 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4788 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4789 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], splat (i16 3) 4790 // CHECK: ret <4 x i16> [[VSHR_N]] 4791 uint16x4_t test_vshr_n_u16(uint16x4_t a) { 4792 return vshr_n_u16(a, 3); 4793 } 4794 4795 // CHECK-LABEL: @test_vshr_n_u32( 4796 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4797 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4798 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], splat (i32 3) 4799 // CHECK: ret <2 x i32> [[VSHR_N]] 4800 uint32x2_t test_vshr_n_u32(uint32x2_t a) { 4801 return vshr_n_u32(a, 3); 4802 } 4803 4804 // CHECK-LABEL: @test_vshrq_n_u8( 4805 // CHECK: [[VSHR_N:%.*]] = lshr <16 x i8> %a, splat (i8 3) 4806 // CHECK: ret <16 x i8> [[VSHR_N]] 4807 uint8x16_t test_vshrq_n_u8(uint8x16_t a) { 4808 return vshrq_n_u8(a, 3); 4809 } 4810 4811 // CHECK-LABEL: @test_vshrq_n_u16( 4812 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4813 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4814 // CHECK: [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) 4815 // CHECK: ret <8 x i16> [[VSHR_N]] 4816 uint16x8_t test_vshrq_n_u16(uint16x8_t a) { 4817 return vshrq_n_u16(a, 3); 4818 } 4819 4820 // CHECK-LABEL: @test_vshrq_n_u32( 4821 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4822 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4823 // CHECK: [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 3) 4824 // CHECK: ret <4 x i32> [[VSHR_N]] 4825 uint32x4_t test_vshrq_n_u32(uint32x4_t a) { 4826 return vshrq_n_u32(a, 3); 4827 } 4828 4829 // CHECK-LABEL: @test_vshrq_n_u64( 4830 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4831 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4832 // CHECK: [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 3) 4833 // CHECK: ret <2 x i64> [[VSHR_N]] 4834 uint64x2_t test_vshrq_n_u64(uint64x2_t a) { 4835 return vshrq_n_u64(a, 3); 4836 } 4837 4838 // CHECK-LABEL: @test_vsra_n_s8( 4839 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i8> %b, splat (i8 3) 4840 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 4841 // CHECK: ret <8 x i8> [[TMP0]] 4842 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) { 4843 return vsra_n_s8(a, b, 3); 4844 } 4845 4846 // CHECK-LABEL: @test_vsra_n_s16( 4847 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4848 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4849 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4850 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4851 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], splat (i16 3) 4852 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 4853 // CHECK: ret <4 x i16> [[TMP4]] 4854 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) { 4855 return vsra_n_s16(a, b, 3); 4856 } 4857 4858 // CHECK-LABEL: @test_vsra_n_s32( 4859 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4860 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4861 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4862 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4863 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], splat (i32 3) 4864 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 4865 // CHECK: ret <2 x i32> [[TMP4]] 4866 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) { 4867 return vsra_n_s32(a, b, 3); 4868 } 4869 4870 // CHECK-LABEL: @test_vsraq_n_s8( 4871 // CHECK: [[VSRA_N:%.*]] = ashr <16 x i8> %b, splat (i8 3) 4872 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 4873 // CHECK: ret <16 x i8> [[TMP0]] 4874 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) { 4875 return vsraq_n_s8(a, b, 3); 4876 } 4877 4878 // CHECK-LABEL: @test_vsraq_n_s16( 4879 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4880 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4881 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4882 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4883 // CHECK: [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], splat (i16 3) 4884 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 4885 // CHECK: ret <8 x i16> [[TMP4]] 4886 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) { 4887 return vsraq_n_s16(a, b, 3); 4888 } 4889 4890 // CHECK-LABEL: @test_vsraq_n_s32( 4891 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4892 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4893 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4894 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4895 // CHECK: [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], splat (i32 3) 4896 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 4897 // CHECK: ret <4 x i32> [[TMP4]] 4898 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) { 4899 return vsraq_n_s32(a, b, 3); 4900 } 4901 4902 // CHECK-LABEL: @test_vsraq_n_s64( 4903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4905 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4906 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4907 // CHECK: [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], splat (i64 3) 4908 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 4909 // CHECK: ret <2 x i64> [[TMP4]] 4910 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) { 4911 return vsraq_n_s64(a, b, 3); 4912 } 4913 4914 // CHECK-LABEL: @test_vsra_n_u8( 4915 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i8> %b, splat (i8 3) 4916 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]] 4917 // CHECK: ret <8 x i8> [[TMP0]] 4918 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) { 4919 return vsra_n_u8(a, b, 3); 4920 } 4921 4922 // CHECK-LABEL: @test_vsra_n_u16( 4923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 4925 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 4926 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 4927 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], splat (i16 3) 4928 // CHECK: [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]] 4929 // CHECK: ret <4 x i16> [[TMP4]] 4930 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) { 4931 return vsra_n_u16(a, b, 3); 4932 } 4933 4934 // CHECK-LABEL: @test_vsra_n_u32( 4935 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 4936 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 4937 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 4938 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 4939 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], splat (i32 3) 4940 // CHECK: [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]] 4941 // CHECK: ret <2 x i32> [[TMP4]] 4942 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) { 4943 return vsra_n_u32(a, b, 3); 4944 } 4945 4946 // CHECK-LABEL: @test_vsraq_n_u8( 4947 // CHECK: [[VSRA_N:%.*]] = lshr <16 x i8> %b, splat (i8 3) 4948 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]] 4949 // CHECK: ret <16 x i8> [[TMP0]] 4950 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) { 4951 return vsraq_n_u8(a, b, 3); 4952 } 4953 4954 // CHECK-LABEL: @test_vsraq_n_u16( 4955 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 4956 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 4957 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 4958 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 4959 // CHECK: [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], splat (i16 3) 4960 // CHECK: [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]] 4961 // CHECK: ret <8 x i16> [[TMP4]] 4962 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) { 4963 return vsraq_n_u16(a, b, 3); 4964 } 4965 4966 // CHECK-LABEL: @test_vsraq_n_u32( 4967 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 4968 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 4969 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 4970 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 4971 // CHECK: [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], splat (i32 3) 4972 // CHECK: [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]] 4973 // CHECK: ret <4 x i32> [[TMP4]] 4974 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) { 4975 return vsraq_n_u32(a, b, 3); 4976 } 4977 4978 // CHECK-LABEL: @test_vsraq_n_u64( 4979 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 4980 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 4981 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 4982 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 4983 // CHECK: [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], splat (i64 3) 4984 // CHECK: [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]] 4985 // CHECK: ret <2 x i64> [[TMP4]] 4986 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) { 4987 return vsraq_n_u64(a, b, 3); 4988 } 4989 4990 // CHECK-LABEL: @test_vrshr_n_s8( 4991 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) 4992 // CHECK: ret <8 x i8> [[VRSHR_N]] 4993 int8x8_t test_vrshr_n_s8(int8x8_t a) { 4994 return vrshr_n_s8(a, 3); 4995 } 4996 4997 // CHECK-LABEL: @test_vrshr_n_s16( 4998 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 4999 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5000 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) 5001 // CHECK: ret <4 x i16> [[VRSHR_N1]] 5002 int16x4_t test_vrshr_n_s16(int16x4_t a) { 5003 return vrshr_n_s16(a, 3); 5004 } 5005 5006 // CHECK-LABEL: @test_vrshr_n_s32( 5007 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5008 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5009 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) 5010 // CHECK: ret <2 x i32> [[VRSHR_N1]] 5011 int32x2_t test_vrshr_n_s32(int32x2_t a) { 5012 return vrshr_n_s32(a, 3); 5013 } 5014 5015 // CHECK-LABEL: @test_vrshrq_n_s8( 5016 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) 5017 // CHECK: ret <16 x i8> [[VRSHR_N]] 5018 int8x16_t test_vrshrq_n_s8(int8x16_t a) { 5019 return vrshrq_n_s8(a, 3); 5020 } 5021 5022 // CHECK-LABEL: @test_vrshrq_n_s16( 5023 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5024 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5025 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) 5026 // CHECK: ret <8 x i16> [[VRSHR_N1]] 5027 int16x8_t test_vrshrq_n_s16(int16x8_t a) { 5028 return vrshrq_n_s16(a, 3); 5029 } 5030 5031 // CHECK-LABEL: @test_vrshrq_n_s32( 5032 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5033 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5034 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) 5035 // CHECK: ret <4 x i32> [[VRSHR_N1]] 5036 int32x4_t test_vrshrq_n_s32(int32x4_t a) { 5037 return vrshrq_n_s32(a, 3); 5038 } 5039 5040 // CHECK-LABEL: @test_vrshrq_n_s64( 5041 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5042 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5043 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) 5044 // CHECK: ret <2 x i64> [[VRSHR_N1]] 5045 int64x2_t test_vrshrq_n_s64(int64x2_t a) { 5046 return vrshrq_n_s64(a, 3); 5047 } 5048 5049 // CHECK-LABEL: @test_vrshr_n_u8( 5050 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %a, <8 x i8> splat (i8 -3)) 5051 // CHECK: ret <8 x i8> [[VRSHR_N]] 5052 uint8x8_t test_vrshr_n_u8(uint8x8_t a) { 5053 return vrshr_n_u8(a, 3); 5054 } 5055 5056 // CHECK-LABEL: @test_vrshr_n_u16( 5057 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5058 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5059 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) 5060 // CHECK: ret <4 x i16> [[VRSHR_N1]] 5061 uint16x4_t test_vrshr_n_u16(uint16x4_t a) { 5062 return vrshr_n_u16(a, 3); 5063 } 5064 5065 // CHECK-LABEL: @test_vrshr_n_u32( 5066 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5067 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5068 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) 5069 // CHECK: ret <2 x i32> [[VRSHR_N1]] 5070 uint32x2_t test_vrshr_n_u32(uint32x2_t a) { 5071 return vrshr_n_u32(a, 3); 5072 } 5073 5074 // CHECK-LABEL: @test_vrshrq_n_u8( 5075 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %a, <16 x i8> splat (i8 -3)) 5076 // CHECK: ret <16 x i8> [[VRSHR_N]] 5077 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) { 5078 return vrshrq_n_u8(a, 3); 5079 } 5080 5081 // CHECK-LABEL: @test_vrshrq_n_u16( 5082 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5083 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5084 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) 5085 // CHECK: ret <8 x i16> [[VRSHR_N1]] 5086 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) { 5087 return vrshrq_n_u16(a, 3); 5088 } 5089 5090 // CHECK-LABEL: @test_vrshrq_n_u32( 5091 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5092 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5093 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) 5094 // CHECK: ret <4 x i32> [[VRSHR_N1]] 5095 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) { 5096 return vrshrq_n_u32(a, 3); 5097 } 5098 5099 // CHECK-LABEL: @test_vrshrq_n_u64( 5100 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5101 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5102 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) 5103 // CHECK: ret <2 x i64> [[VRSHR_N1]] 5104 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) { 5105 return vrshrq_n_u64(a, 3); 5106 } 5107 5108 // CHECK-LABEL: @test_vrsra_n_s8( 5109 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) 5110 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5111 // CHECK: ret <8 x i8> [[TMP0]] 5112 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) { 5113 return vrsra_n_s8(a, b, 3); 5114 } 5115 5116 // CHECK-LABEL: @test_vrsra_n_s16( 5117 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5118 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5119 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5120 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) 5121 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5122 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5123 // CHECK: ret <4 x i16> [[TMP3]] 5124 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) { 5125 return vrsra_n_s16(a, b, 3); 5126 } 5127 5128 // CHECK-LABEL: @test_vrsra_n_s32( 5129 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5130 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5131 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5132 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) 5133 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5134 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5135 // CHECK: ret <2 x i32> [[TMP3]] 5136 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) { 5137 return vrsra_n_s32(a, b, 3); 5138 } 5139 5140 // CHECK-LABEL: @test_vrsraq_n_s8( 5141 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) 5142 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5143 // CHECK: ret <16 x i8> [[TMP0]] 5144 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) { 5145 return vrsraq_n_s8(a, b, 3); 5146 } 5147 5148 // CHECK-LABEL: @test_vrsraq_n_s16( 5149 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5150 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5151 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5152 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) 5153 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5154 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5155 // CHECK: ret <8 x i16> [[TMP3]] 5156 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) { 5157 return vrsraq_n_s16(a, b, 3); 5158 } 5159 5160 // CHECK-LABEL: @test_vrsraq_n_s32( 5161 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5162 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5163 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5164 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) 5165 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5166 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5167 // CHECK: ret <4 x i32> [[TMP3]] 5168 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) { 5169 return vrsraq_n_s32(a, b, 3); 5170 } 5171 5172 // CHECK-LABEL: @test_vrsraq_n_s64( 5173 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5174 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5175 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5176 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) 5177 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5178 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5179 // CHECK: ret <2 x i64> [[TMP3]] 5180 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) { 5181 return vrsraq_n_s64(a, b, 3); 5182 } 5183 5184 // CHECK-LABEL: @test_vrsra_n_u8( 5185 // CHECK: [[VRSHR_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %b, <8 x i8> splat (i8 -3)) 5186 // CHECK: [[TMP0:%.*]] = add <8 x i8> %a, [[VRSHR_N]] 5187 // CHECK: ret <8 x i8> [[TMP0]] 5188 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) { 5189 return vrsra_n_u8(a, b, 3); 5190 } 5191 5192 // CHECK-LABEL: @test_vrsra_n_u16( 5193 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5194 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5195 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5196 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> splat (i16 -3)) 5197 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5198 // CHECK: [[TMP3:%.*]] = add <4 x i16> [[TMP2]], [[VRSHR_N1]] 5199 // CHECK: ret <4 x i16> [[TMP3]] 5200 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) { 5201 return vrsra_n_u16(a, b, 3); 5202 } 5203 5204 // CHECK-LABEL: @test_vrsra_n_u32( 5205 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5206 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5207 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5208 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> splat (i32 -3)) 5209 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5210 // CHECK: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], [[VRSHR_N1]] 5211 // CHECK: ret <2 x i32> [[TMP3]] 5212 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) { 5213 return vrsra_n_u32(a, b, 3); 5214 } 5215 5216 // CHECK-LABEL: @test_vrsraq_n_u8( 5217 // CHECK: [[VRSHR_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %b, <16 x i8> splat (i8 -3)) 5218 // CHECK: [[TMP0:%.*]] = add <16 x i8> %a, [[VRSHR_N]] 5219 // CHECK: ret <16 x i8> [[TMP0]] 5220 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) { 5221 return vrsraq_n_u8(a, b, 3); 5222 } 5223 5224 // CHECK-LABEL: @test_vrsraq_n_u16( 5225 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5226 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5227 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5228 // CHECK: [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> splat (i16 -3)) 5229 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5230 // CHECK: [[TMP3:%.*]] = add <8 x i16> [[TMP2]], [[VRSHR_N1]] 5231 // CHECK: ret <8 x i16> [[TMP3]] 5232 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) { 5233 return vrsraq_n_u16(a, b, 3); 5234 } 5235 5236 // CHECK-LABEL: @test_vrsraq_n_u32( 5237 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5238 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5239 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5240 // CHECK: [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> splat (i32 -3)) 5241 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5242 // CHECK: [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[VRSHR_N1]] 5243 // CHECK: ret <4 x i32> [[TMP3]] 5244 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) { 5245 return vrsraq_n_u32(a, b, 3); 5246 } 5247 5248 // CHECK-LABEL: @test_vrsraq_n_u64( 5249 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5250 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5251 // CHECK: [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5252 // CHECK: [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> splat (i64 -3)) 5253 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5254 // CHECK: [[TMP3:%.*]] = add <2 x i64> [[TMP2]], [[VRSHR_N1]] 5255 // CHECK: ret <2 x i64> [[TMP3]] 5256 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) { 5257 return vrsraq_n_u64(a, b, 3); 5258 } 5259 5260 // CHECK-LABEL: @test_vsri_n_s8( 5261 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5262 // CHECK: ret <8 x i8> [[VSRI_N]] 5263 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) { 5264 return vsri_n_s8(a, b, 3); 5265 } 5266 5267 // CHECK-LABEL: @test_vsri_n_s16( 5268 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5269 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5270 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5271 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5272 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5273 // CHECK: ret <4 x i16> [[VSRI_N2]] 5274 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) { 5275 return vsri_n_s16(a, b, 3); 5276 } 5277 5278 // CHECK-LABEL: @test_vsri_n_s32( 5279 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5280 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5281 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5282 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5283 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 5284 // CHECK: ret <2 x i32> [[VSRI_N2]] 5285 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) { 5286 return vsri_n_s32(a, b, 3); 5287 } 5288 5289 // CHECK-LABEL: @test_vsriq_n_s8( 5290 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5291 // CHECK: ret <16 x i8> [[VSRI_N]] 5292 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) { 5293 return vsriq_n_s8(a, b, 3); 5294 } 5295 5296 // CHECK-LABEL: @test_vsriq_n_s16( 5297 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5298 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5299 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5300 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5301 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 5302 // CHECK: ret <8 x i16> [[VSRI_N2]] 5303 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) { 5304 return vsriq_n_s16(a, b, 3); 5305 } 5306 5307 // CHECK-LABEL: @test_vsriq_n_s32( 5308 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5309 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5310 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5311 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5312 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 5313 // CHECK: ret <4 x i32> [[VSRI_N2]] 5314 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) { 5315 return vsriq_n_s32(a, b, 3); 5316 } 5317 5318 // CHECK-LABEL: @test_vsriq_n_s64( 5319 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5320 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5321 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5322 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5323 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 5324 // CHECK: ret <2 x i64> [[VSRI_N2]] 5325 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) { 5326 return vsriq_n_s64(a, b, 3); 5327 } 5328 5329 // CHECK-LABEL: @test_vsri_n_u8( 5330 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5331 // CHECK: ret <8 x i8> [[VSRI_N]] 5332 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) { 5333 return vsri_n_u8(a, b, 3); 5334 } 5335 5336 // CHECK-LABEL: @test_vsri_n_u16( 5337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5339 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5340 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5341 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 3) 5342 // CHECK: ret <4 x i16> [[VSRI_N2]] 5343 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) { 5344 return vsri_n_u16(a, b, 3); 5345 } 5346 5347 // CHECK-LABEL: @test_vsri_n_u32( 5348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5350 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5351 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5352 // CHECK: [[VSRI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsri.v2i32(<2 x i32> [[VSRI_N]], <2 x i32> [[VSRI_N1]], i32 3) 5353 // CHECK: ret <2 x i32> [[VSRI_N2]] 5354 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) { 5355 return vsri_n_u32(a, b, 3); 5356 } 5357 5358 // CHECK-LABEL: @test_vsriq_n_u8( 5359 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5360 // CHECK: ret <16 x i8> [[VSRI_N]] 5361 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) { 5362 return vsriq_n_u8(a, b, 3); 5363 } 5364 5365 // CHECK-LABEL: @test_vsriq_n_u16( 5366 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5367 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5368 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5369 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5370 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 3) 5371 // CHECK: ret <8 x i16> [[VSRI_N2]] 5372 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) { 5373 return vsriq_n_u16(a, b, 3); 5374 } 5375 5376 // CHECK-LABEL: @test_vsriq_n_u32( 5377 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5378 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5379 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5380 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5381 // CHECK: [[VSRI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsri.v4i32(<4 x i32> [[VSRI_N]], <4 x i32> [[VSRI_N1]], i32 3) 5382 // CHECK: ret <4 x i32> [[VSRI_N2]] 5383 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) { 5384 return vsriq_n_u32(a, b, 3); 5385 } 5386 5387 // CHECK-LABEL: @test_vsriq_n_u64( 5388 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5389 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5390 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5391 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5392 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 3) 5393 // CHECK: ret <2 x i64> [[VSRI_N2]] 5394 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) { 5395 return vsriq_n_u64(a, b, 3); 5396 } 5397 5398 // CHECK-LABEL: @test_vsri_n_p8( 5399 // CHECK: [[VSRI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsri.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5400 // CHECK: ret <8 x i8> [[VSRI_N]] 5401 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) { 5402 return vsri_n_p8(a, b, 3); 5403 } 5404 5405 // CHECK-LABEL: @test_vsri_n_p16( 5406 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5407 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5408 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5409 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5410 // CHECK: [[VSRI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsri.v4i16(<4 x i16> [[VSRI_N]], <4 x i16> [[VSRI_N1]], i32 15) 5411 // CHECK: ret <4 x i16> [[VSRI_N2]] 5412 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) { 5413 return vsri_n_p16(a, b, 15); 5414 } 5415 5416 // CHECK-LABEL: @test_vsriq_n_p8( 5417 // CHECK: [[VSRI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsri.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5418 // CHECK: ret <16 x i8> [[VSRI_N]] 5419 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) { 5420 return vsriq_n_p8(a, b, 3); 5421 } 5422 5423 // CHECK-LABEL: @test_vsriq_n_p16( 5424 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5425 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5426 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5427 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5428 // CHECK: [[VSRI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsri.v8i16(<8 x i16> [[VSRI_N]], <8 x i16> [[VSRI_N1]], i32 15) 5429 // CHECK: ret <8 x i16> [[VSRI_N2]] 5430 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) { 5431 return vsriq_n_p16(a, b, 15); 5432 } 5433 5434 // CHECK-LABEL: @test_vsli_n_s8( 5435 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5436 // CHECK: ret <8 x i8> [[VSLI_N]] 5437 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) { 5438 return vsli_n_s8(a, b, 3); 5439 } 5440 5441 // CHECK-LABEL: @test_vsli_n_s16( 5442 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5443 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5444 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5445 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5446 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 5447 // CHECK: ret <4 x i16> [[VSLI_N2]] 5448 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) { 5449 return vsli_n_s16(a, b, 3); 5450 } 5451 5452 // CHECK-LABEL: @test_vsli_n_s32( 5453 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5454 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5455 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5456 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5457 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 5458 // CHECK: ret <2 x i32> [[VSLI_N2]] 5459 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) { 5460 return vsli_n_s32(a, b, 3); 5461 } 5462 5463 // CHECK-LABEL: @test_vsliq_n_s8( 5464 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5465 // CHECK: ret <16 x i8> [[VSLI_N]] 5466 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) { 5467 return vsliq_n_s8(a, b, 3); 5468 } 5469 5470 // CHECK-LABEL: @test_vsliq_n_s16( 5471 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5472 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5473 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5474 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5475 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 5476 // CHECK: ret <8 x i16> [[VSLI_N2]] 5477 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) { 5478 return vsliq_n_s16(a, b, 3); 5479 } 5480 5481 // CHECK-LABEL: @test_vsliq_n_s32( 5482 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5483 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5484 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5485 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5486 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 5487 // CHECK: ret <4 x i32> [[VSLI_N2]] 5488 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) { 5489 return vsliq_n_s32(a, b, 3); 5490 } 5491 5492 // CHECK-LABEL: @test_vsliq_n_s64( 5493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5494 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5495 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5496 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5497 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 5498 // CHECK: ret <2 x i64> [[VSLI_N2]] 5499 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) { 5500 return vsliq_n_s64(a, b, 3); 5501 } 5502 5503 // CHECK-LABEL: @test_vsli_n_u8( 5504 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5505 // CHECK: ret <8 x i8> [[VSLI_N]] 5506 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) { 5507 return vsli_n_u8(a, b, 3); 5508 } 5509 5510 // CHECK-LABEL: @test_vsli_n_u16( 5511 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5512 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5513 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5514 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5515 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 3) 5516 // CHECK: ret <4 x i16> [[VSLI_N2]] 5517 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) { 5518 return vsli_n_u16(a, b, 3); 5519 } 5520 5521 // CHECK-LABEL: @test_vsli_n_u32( 5522 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5523 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 5524 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5525 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 5526 // CHECK: [[VSLI_N2:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], i32 3) 5527 // CHECK: ret <2 x i32> [[VSLI_N2]] 5528 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) { 5529 return vsli_n_u32(a, b, 3); 5530 } 5531 5532 // CHECK-LABEL: @test_vsliq_n_u8( 5533 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5534 // CHECK: ret <16 x i8> [[VSLI_N]] 5535 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) { 5536 return vsliq_n_u8(a, b, 3); 5537 } 5538 5539 // CHECK-LABEL: @test_vsliq_n_u16( 5540 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5541 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5542 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5543 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5544 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 3) 5545 // CHECK: ret <8 x i16> [[VSLI_N2]] 5546 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) { 5547 return vsliq_n_u16(a, b, 3); 5548 } 5549 5550 // CHECK-LABEL: @test_vsliq_n_u32( 5551 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5552 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5553 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5554 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 5555 // CHECK: [[VSLI_N2:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], i32 3) 5556 // CHECK: ret <4 x i32> [[VSLI_N2]] 5557 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) { 5558 return vsliq_n_u32(a, b, 3); 5559 } 5560 5561 // CHECK-LABEL: @test_vsliq_n_u64( 5562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5563 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5564 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5565 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 5566 // CHECK: [[VSLI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], i32 3) 5567 // CHECK: ret <2 x i64> [[VSLI_N2]] 5568 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) { 5569 return vsliq_n_u64(a, b, 3); 5570 } 5571 5572 // CHECK-LABEL: @test_vsli_n_p8( 5573 // CHECK: [[VSLI_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %a, <8 x i8> %b, i32 3) 5574 // CHECK: ret <8 x i8> [[VSLI_N]] 5575 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) { 5576 return vsli_n_p8(a, b, 3); 5577 } 5578 5579 // CHECK-LABEL: @test_vsli_n_p16( 5580 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5581 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 5582 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5583 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 5584 // CHECK: [[VSLI_N2:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], i32 15) 5585 // CHECK: ret <4 x i16> [[VSLI_N2]] 5586 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) { 5587 return vsli_n_p16(a, b, 15); 5588 } 5589 5590 // CHECK-LABEL: @test_vsliq_n_p8( 5591 // CHECK: [[VSLI_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %a, <16 x i8> %b, i32 3) 5592 // CHECK: ret <16 x i8> [[VSLI_N]] 5593 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) { 5594 return vsliq_n_p8(a, b, 3); 5595 } 5596 5597 // CHECK-LABEL: @test_vsliq_n_p16( 5598 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5599 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5600 // CHECK: [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5601 // CHECK: [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 5602 // CHECK: [[VSLI_N2:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], i32 15) 5603 // CHECK: ret <8 x i16> [[VSLI_N2]] 5604 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) { 5605 return vsliq_n_p16(a, b, 15); 5606 } 5607 5608 // CHECK-LABEL: @test_vqshlu_n_s8( 5609 // CHECK: [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %a, <8 x i8> splat (i8 3)) 5610 // CHECK: ret <8 x i8> [[VQSHLU_N]] 5611 uint8x8_t test_vqshlu_n_s8(int8x8_t a) { 5612 return vqshlu_n_s8(a, 3); 5613 } 5614 5615 // CHECK-LABEL: @test_vqshlu_n_s16( 5616 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 5617 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 5618 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> splat (i16 3)) 5619 // CHECK: ret <4 x i16> [[VQSHLU_N1]] 5620 uint16x4_t test_vqshlu_n_s16(int16x4_t a) { 5621 return vqshlu_n_s16(a, 3); 5622 } 5623 5624 // CHECK-LABEL: @test_vqshlu_n_s32( 5625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 5626 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 5627 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> splat (i32 3)) 5628 // CHECK: ret <2 x i32> [[VQSHLU_N1]] 5629 uint32x2_t test_vqshlu_n_s32(int32x2_t a) { 5630 return vqshlu_n_s32(a, 3); 5631 } 5632 5633 // CHECK-LABEL: @test_vqshluq_n_s8( 5634 // CHECK: [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %a, <16 x i8> splat (i8 3)) 5635 // CHECK: ret <16 x i8> [[VQSHLU_N]] 5636 uint8x16_t test_vqshluq_n_s8(int8x16_t a) { 5637 return vqshluq_n_s8(a, 3); 5638 } 5639 5640 // CHECK-LABEL: @test_vqshluq_n_s16( 5641 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5642 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5643 // CHECK: [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> splat (i16 3)) 5644 // CHECK: ret <8 x i16> [[VQSHLU_N1]] 5645 uint16x8_t test_vqshluq_n_s16(int16x8_t a) { 5646 return vqshluq_n_s16(a, 3); 5647 } 5648 5649 // CHECK-LABEL: @test_vqshluq_n_s32( 5650 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5651 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5652 // CHECK: [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> splat (i32 3)) 5653 // CHECK: ret <4 x i32> [[VQSHLU_N1]] 5654 uint32x4_t test_vqshluq_n_s32(int32x4_t a) { 5655 return vqshluq_n_s32(a, 3); 5656 } 5657 5658 // CHECK-LABEL: @test_vqshluq_n_s64( 5659 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5660 // CHECK: [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5661 // CHECK: [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> splat (i64 3)) 5662 // CHECK: ret <2 x i64> [[VQSHLU_N1]] 5663 uint64x2_t test_vqshluq_n_s64(int64x2_t a) { 5664 return vqshluq_n_s64(a, 3); 5665 } 5666 5667 // CHECK-LABEL: @test_vshrn_n_s16( 5668 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5669 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5670 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) 5671 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 5672 // CHECK: ret <8 x i8> [[VSHRN_N]] 5673 int8x8_t test_vshrn_n_s16(int16x8_t a) { 5674 return vshrn_n_s16(a, 3); 5675 } 5676 5677 // CHECK-LABEL: @test_vshrn_n_s32( 5678 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5679 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5680 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) 5681 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 5682 // CHECK: ret <4 x i16> [[VSHRN_N]] 5683 int16x4_t test_vshrn_n_s32(int32x4_t a) { 5684 return vshrn_n_s32(a, 9); 5685 } 5686 5687 // CHECK-LABEL: @test_vshrn_n_s64( 5688 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5689 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5690 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) 5691 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 5692 // CHECK: ret <2 x i32> [[VSHRN_N]] 5693 int32x2_t test_vshrn_n_s64(int64x2_t a) { 5694 return vshrn_n_s64(a, 19); 5695 } 5696 5697 // CHECK-LABEL: @test_vshrn_n_u16( 5698 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5699 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5700 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) 5701 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 5702 // CHECK: ret <8 x i8> [[VSHRN_N]] 5703 uint8x8_t test_vshrn_n_u16(uint16x8_t a) { 5704 return vshrn_n_u16(a, 3); 5705 } 5706 5707 // CHECK-LABEL: @test_vshrn_n_u32( 5708 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5709 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5710 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) 5711 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 5712 // CHECK: ret <4 x i16> [[VSHRN_N]] 5713 uint16x4_t test_vshrn_n_u32(uint32x4_t a) { 5714 return vshrn_n_u32(a, 9); 5715 } 5716 5717 // CHECK-LABEL: @test_vshrn_n_u64( 5718 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5719 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5720 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) 5721 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 5722 // CHECK: ret <2 x i32> [[VSHRN_N]] 5723 uint32x2_t test_vshrn_n_u64(uint64x2_t a) { 5724 return vshrn_n_u64(a, 19); 5725 } 5726 5727 // CHECK-LABEL: @test_vshrn_high_n_s16( 5728 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5729 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5730 // CHECK: [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], splat (i16 3) 5731 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 5732 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5733 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5734 int8x16_t test_vshrn_high_n_s16(int8x8_t a, int16x8_t b) { 5735 return vshrn_high_n_s16(a, b, 3); 5736 } 5737 5738 // CHECK-LABEL: @test_vshrn_high_n_s32( 5739 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5740 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5741 // CHECK: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], splat (i32 9) 5742 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 5743 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5744 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 5745 int16x8_t test_vshrn_high_n_s32(int16x4_t a, int32x4_t b) { 5746 return vshrn_high_n_s32(a, b, 9); 5747 } 5748 5749 // CHECK-LABEL: @test_vshrn_high_n_s64( 5750 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5751 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5752 // CHECK: [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], splat (i64 19) 5753 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 5754 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5755 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 5756 int32x4_t test_vshrn_high_n_s64(int32x2_t a, int64x2_t b) { 5757 return vshrn_high_n_s64(a, b, 19); 5758 } 5759 5760 // CHECK-LABEL: @test_vshrn_high_n_u16( 5761 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5762 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5763 // CHECK: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], splat (i16 3) 5764 // CHECK: [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8> 5765 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VSHRN_N]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5766 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5767 uint8x16_t test_vshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 5768 return vshrn_high_n_u16(a, b, 3); 5769 } 5770 5771 // CHECK-LABEL: @test_vshrn_high_n_u32( 5772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5773 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5774 // CHECK: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], splat (i32 9) 5775 // CHECK: [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16> 5776 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VSHRN_N]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5777 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 5778 uint16x8_t test_vshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 5779 return vshrn_high_n_u32(a, b, 9); 5780 } 5781 5782 // CHECK-LABEL: @test_vshrn_high_n_u64( 5783 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5784 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5785 // CHECK: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], splat (i64 19) 5786 // CHECK: [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32> 5787 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VSHRN_N]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5788 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 5789 uint32x4_t test_vshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 5790 return vshrn_high_n_u64(a, b, 19); 5791 } 5792 5793 // CHECK-LABEL: @test_vqshrun_n_s16( 5794 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5795 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5796 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 5797 // CHECK: ret <8 x i8> [[VQSHRUN_N1]] 5798 uint8x8_t test_vqshrun_n_s16(int16x8_t a) { 5799 return vqshrun_n_s16(a, 3); 5800 } 5801 5802 // CHECK-LABEL: @test_vqshrun_n_s32( 5803 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5804 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5805 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 5806 // CHECK: ret <4 x i16> [[VQSHRUN_N1]] 5807 uint16x4_t test_vqshrun_n_s32(int32x4_t a) { 5808 return vqshrun_n_s32(a, 9); 5809 } 5810 5811 // CHECK-LABEL: @test_vqshrun_n_s64( 5812 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5813 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5814 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 5815 // CHECK: ret <2 x i32> [[VQSHRUN_N1]] 5816 uint32x2_t test_vqshrun_n_s64(int64x2_t a) { 5817 return vqshrun_n_s64(a, 19); 5818 } 5819 5820 // CHECK-LABEL: @test_vqshrun_high_n_s16( 5821 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5822 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5823 // CHECK: [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[VQSHRUN_N]], i32 3) 5824 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5825 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5826 int8x16_t test_vqshrun_high_n_s16(int8x8_t a, int16x8_t b) { 5827 return vqshrun_high_n_s16(a, b, 3); 5828 } 5829 5830 // CHECK-LABEL: @test_vqshrun_high_n_s32( 5831 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5832 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5833 // CHECK: [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[VQSHRUN_N]], i32 9) 5834 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5835 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 5836 int16x8_t test_vqshrun_high_n_s32(int16x4_t a, int32x4_t b) { 5837 return vqshrun_high_n_s32(a, b, 9); 5838 } 5839 5840 // CHECK-LABEL: @test_vqshrun_high_n_s64( 5841 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5842 // CHECK: [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5843 // CHECK: [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[VQSHRUN_N]], i32 19) 5844 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5845 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 5846 int32x4_t test_vqshrun_high_n_s64(int32x2_t a, int64x2_t b) { 5847 return vqshrun_high_n_s64(a, b, 19); 5848 } 5849 5850 // CHECK-LABEL: @test_vrshrn_n_s16( 5851 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5852 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5853 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 5854 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 5855 int8x8_t test_vrshrn_n_s16(int16x8_t a) { 5856 return vrshrn_n_s16(a, 3); 5857 } 5858 5859 // CHECK-LABEL: @test_vrshrn_n_s32( 5860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5861 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5862 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 5863 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 5864 int16x4_t test_vrshrn_n_s32(int32x4_t a) { 5865 return vrshrn_n_s32(a, 9); 5866 } 5867 5868 // CHECK-LABEL: @test_vrshrn_n_s64( 5869 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5870 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5871 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 5872 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 5873 int32x2_t test_vrshrn_n_s64(int64x2_t a) { 5874 return vrshrn_n_s64(a, 19); 5875 } 5876 5877 // CHECK-LABEL: @test_vrshrn_n_u16( 5878 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5879 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5880 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 5881 // CHECK: ret <8 x i8> [[VRSHRN_N1]] 5882 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) { 5883 return vrshrn_n_u16(a, 3); 5884 } 5885 5886 // CHECK-LABEL: @test_vrshrn_n_u32( 5887 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5888 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5889 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 5890 // CHECK: ret <4 x i16> [[VRSHRN_N1]] 5891 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) { 5892 return vrshrn_n_u32(a, 9); 5893 } 5894 5895 // CHECK-LABEL: @test_vrshrn_n_u64( 5896 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5897 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5898 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 5899 // CHECK: ret <2 x i32> [[VRSHRN_N1]] 5900 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) { 5901 return vrshrn_n_u64(a, 19); 5902 } 5903 5904 // CHECK-LABEL: @test_vrshrn_high_n_s16( 5905 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5906 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5907 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 5908 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5909 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5910 int8x16_t test_vrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 5911 return vrshrn_high_n_s16(a, b, 3); 5912 } 5913 5914 // CHECK-LABEL: @test_vrshrn_high_n_s32( 5915 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5916 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5917 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 5918 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5919 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 5920 int16x8_t test_vrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 5921 return vrshrn_high_n_s32(a, b, 9); 5922 } 5923 5924 // CHECK-LABEL: @test_vrshrn_high_n_s64( 5925 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5926 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5927 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 5928 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5929 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 5930 int32x4_t test_vrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 5931 return vrshrn_high_n_s64(a, b, 19); 5932 } 5933 5934 // CHECK-LABEL: @test_vrshrn_high_n_u16( 5935 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5936 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5937 // CHECK: [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[VRSHRN_N]], i32 3) 5938 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5939 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5940 uint8x16_t test_vrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 5941 return vrshrn_high_n_u16(a, b, 3); 5942 } 5943 5944 // CHECK-LABEL: @test_vrshrn_high_n_u32( 5945 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 5946 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5947 // CHECK: [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[VRSHRN_N]], i32 9) 5948 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5949 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 5950 uint16x8_t test_vrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 5951 return vrshrn_high_n_u32(a, b, 9); 5952 } 5953 5954 // CHECK-LABEL: @test_vrshrn_high_n_u64( 5955 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 5956 // CHECK: [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5957 // CHECK: [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[VRSHRN_N]], i32 19) 5958 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 5959 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 5960 uint32x4_t test_vrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 5961 return vrshrn_high_n_u64(a, b, 19); 5962 } 5963 5964 // CHECK-LABEL: @test_vqrshrun_n_s16( 5965 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 5966 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5967 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 5968 // CHECK: ret <8 x i8> [[VQRSHRUN_N1]] 5969 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) { 5970 return vqrshrun_n_s16(a, 3); 5971 } 5972 5973 // CHECK-LABEL: @test_vqrshrun_n_s32( 5974 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 5975 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 5976 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 5977 // CHECK: ret <4 x i16> [[VQRSHRUN_N1]] 5978 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) { 5979 return vqrshrun_n_s32(a, 9); 5980 } 5981 5982 // CHECK-LABEL: @test_vqrshrun_n_s64( 5983 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 5984 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 5985 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 5986 // CHECK: ret <2 x i32> [[VQRSHRUN_N1]] 5987 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) { 5988 return vqrshrun_n_s64(a, 19); 5989 } 5990 5991 // CHECK-LABEL: @test_vqrshrun_high_n_s16( 5992 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 5993 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 5994 // CHECK: [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[VQRSHRUN_N]], i32 3) 5995 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRUN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 5996 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 5997 int8x16_t test_vqrshrun_high_n_s16(int8x8_t a, int16x8_t b) { 5998 return vqrshrun_high_n_s16(a, b, 3); 5999 } 6000 6001 // CHECK-LABEL: @test_vqrshrun_high_n_s32( 6002 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6003 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6004 // CHECK: [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[VQRSHRUN_N]], i32 9) 6005 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRUN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6006 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6007 int16x8_t test_vqrshrun_high_n_s32(int16x4_t a, int32x4_t b) { 6008 return vqrshrun_high_n_s32(a, b, 9); 6009 } 6010 6011 // CHECK-LABEL: @test_vqrshrun_high_n_s64( 6012 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6013 // CHECK: [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6014 // CHECK: [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[VQRSHRUN_N]], i32 19) 6015 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRUN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6016 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6017 int32x4_t test_vqrshrun_high_n_s64(int32x2_t a, int64x2_t b) { 6018 return vqrshrun_high_n_s64(a, b, 19); 6019 } 6020 6021 // CHECK-LABEL: @test_vqshrn_n_s16( 6022 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6023 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6024 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6025 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 6026 int8x8_t test_vqshrn_n_s16(int16x8_t a) { 6027 return vqshrn_n_s16(a, 3); 6028 } 6029 6030 // CHECK-LABEL: @test_vqshrn_n_s32( 6031 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6032 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6033 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6034 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 6035 int16x4_t test_vqshrn_n_s32(int32x4_t a) { 6036 return vqshrn_n_s32(a, 9); 6037 } 6038 6039 // CHECK-LABEL: @test_vqshrn_n_s64( 6040 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6041 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6042 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6043 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 6044 int32x2_t test_vqshrn_n_s64(int64x2_t a) { 6045 return vqshrn_n_s64(a, 19); 6046 } 6047 6048 // CHECK-LABEL: @test_vqshrn_n_u16( 6049 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6050 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6051 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6052 // CHECK: ret <8 x i8> [[VQSHRN_N1]] 6053 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) { 6054 return vqshrn_n_u16(a, 3); 6055 } 6056 6057 // CHECK-LABEL: @test_vqshrn_n_u32( 6058 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6059 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6060 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6061 // CHECK: ret <4 x i16> [[VQSHRN_N1]] 6062 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) { 6063 return vqshrn_n_u32(a, 9); 6064 } 6065 6066 // CHECK-LABEL: @test_vqshrn_n_u64( 6067 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6068 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6069 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6070 // CHECK: ret <2 x i32> [[VQSHRN_N1]] 6071 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) { 6072 return vqshrn_n_u64(a, 19); 6073 } 6074 6075 // CHECK-LABEL: @test_vqshrn_high_n_s16( 6076 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6077 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6078 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6079 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6080 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6081 int8x16_t test_vqshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6082 return vqshrn_high_n_s16(a, b, 3); 6083 } 6084 6085 // CHECK-LABEL: @test_vqshrn_high_n_s32( 6086 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6087 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6088 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6089 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6090 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6091 int16x8_t test_vqshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6092 return vqshrn_high_n_s32(a, b, 9); 6093 } 6094 6095 // CHECK-LABEL: @test_vqshrn_high_n_s64( 6096 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6097 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6098 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6099 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6100 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6101 int32x4_t test_vqshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6102 return vqshrn_high_n_s64(a, b, 19); 6103 } 6104 6105 // CHECK-LABEL: @test_vqshrn_high_n_u16( 6106 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6107 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6108 // CHECK: [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[VQSHRN_N]], i32 3) 6109 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6110 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6111 uint8x16_t test_vqshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6112 return vqshrn_high_n_u16(a, b, 3); 6113 } 6114 6115 // CHECK-LABEL: @test_vqshrn_high_n_u32( 6116 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6117 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6118 // CHECK: [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[VQSHRN_N]], i32 9) 6119 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6120 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6121 uint16x8_t test_vqshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6122 return vqshrn_high_n_u32(a, b, 9); 6123 } 6124 6125 // CHECK-LABEL: @test_vqshrn_high_n_u64( 6126 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6127 // CHECK: [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6128 // CHECK: [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[VQSHRN_N]], i32 19) 6129 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6130 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6131 uint32x4_t test_vqshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6132 return vqshrn_high_n_u64(a, b, 19); 6133 } 6134 6135 // CHECK-LABEL: @test_vqrshrn_n_s16( 6136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6137 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6138 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6139 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6140 int8x8_t test_vqrshrn_n_s16(int16x8_t a) { 6141 return vqrshrn_n_s16(a, 3); 6142 } 6143 6144 // CHECK-LABEL: @test_vqrshrn_n_s32( 6145 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6146 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6147 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6148 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6149 int16x4_t test_vqrshrn_n_s32(int32x4_t a) { 6150 return vqrshrn_n_s32(a, 9); 6151 } 6152 6153 // CHECK-LABEL: @test_vqrshrn_n_s64( 6154 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6155 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6156 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6157 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6158 int32x2_t test_vqrshrn_n_s64(int64x2_t a) { 6159 return vqrshrn_n_s64(a, 19); 6160 } 6161 6162 // CHECK-LABEL: @test_vqrshrn_n_u16( 6163 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 6164 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6165 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6166 // CHECK: ret <8 x i8> [[VQRSHRN_N1]] 6167 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) { 6168 return vqrshrn_n_u16(a, 3); 6169 } 6170 6171 // CHECK-LABEL: @test_vqrshrn_n_u32( 6172 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6173 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6174 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6175 // CHECK: ret <4 x i16> [[VQRSHRN_N1]] 6176 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) { 6177 return vqrshrn_n_u32(a, 9); 6178 } 6179 6180 // CHECK-LABEL: @test_vqrshrn_n_u64( 6181 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6182 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6183 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6184 // CHECK: ret <2 x i32> [[VQRSHRN_N1]] 6185 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) { 6186 return vqrshrn_n_u64(a, 19); 6187 } 6188 6189 // CHECK-LABEL: @test_vqrshrn_high_n_s16( 6190 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6191 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6192 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6193 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6194 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6195 int8x16_t test_vqrshrn_high_n_s16(int8x8_t a, int16x8_t b) { 6196 return vqrshrn_high_n_s16(a, b, 3); 6197 } 6198 6199 // CHECK-LABEL: @test_vqrshrn_high_n_s32( 6200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6201 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6202 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6203 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6204 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6205 int16x8_t test_vqrshrn_high_n_s32(int16x4_t a, int32x4_t b) { 6206 return vqrshrn_high_n_s32(a, b, 9); 6207 } 6208 6209 // CHECK-LABEL: @test_vqrshrn_high_n_s64( 6210 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6211 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6212 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6213 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6214 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6215 int32x4_t test_vqrshrn_high_n_s64(int32x2_t a, int64x2_t b) { 6216 return vqrshrn_high_n_s64(a, b, 19); 6217 } 6218 6219 // CHECK-LABEL: @test_vqrshrn_high_n_u16( 6220 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> 6221 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 6222 // CHECK: [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[VQRSHRN_N]], i32 3) 6223 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> [[VQRSHRN_N1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6224 // CHECK: ret <16 x i8> [[SHUFFLE_I]] 6225 uint8x16_t test_vqrshrn_high_n_u16(uint8x8_t a, uint16x8_t b) { 6226 return vqrshrn_high_n_u16(a, b, 3); 6227 } 6228 6229 // CHECK-LABEL: @test_vqrshrn_high_n_u32( 6230 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> 6231 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6232 // CHECK: [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[VQRSHRN_N]], i32 9) 6233 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> [[VQRSHRN_N1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 6234 // CHECK: ret <8 x i16> [[SHUFFLE_I]] 6235 uint16x8_t test_vqrshrn_high_n_u32(uint16x4_t a, uint32x4_t b) { 6236 return vqrshrn_high_n_u32(a, b, 9); 6237 } 6238 6239 // CHECK-LABEL: @test_vqrshrn_high_n_u64( 6240 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> 6241 // CHECK: [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6242 // CHECK: [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[VQRSHRN_N]], i32 19) 6243 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> [[VQRSHRN_N1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 6244 // CHECK: ret <4 x i32> [[SHUFFLE_I]] 6245 uint32x4_t test_vqrshrn_high_n_u64(uint32x2_t a, uint64x2_t b) { 6246 return vqrshrn_high_n_u64(a, b, 19); 6247 } 6248 6249 // CHECK-LABEL: @test_vshll_n_s8( 6250 // CHECK: [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16> 6251 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) 6252 // CHECK: ret <8 x i16> [[VSHLL_N]] 6253 int16x8_t test_vshll_n_s8(int8x8_t a) { 6254 return vshll_n_s8(a, 3); 6255 } 6256 6257 // CHECK-LABEL: @test_vshll_n_s16( 6258 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6259 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6260 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6261 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) 6262 // CHECK: ret <4 x i32> [[VSHLL_N]] 6263 int32x4_t test_vshll_n_s16(int16x4_t a) { 6264 return vshll_n_s16(a, 9); 6265 } 6266 6267 // CHECK-LABEL: @test_vshll_n_s32( 6268 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6270 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6271 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) 6272 // CHECK: ret <2 x i64> [[VSHLL_N]] 6273 int64x2_t test_vshll_n_s32(int32x2_t a) { 6274 return vshll_n_s32(a, 19); 6275 } 6276 6277 // CHECK-LABEL: @test_vshll_n_u8( 6278 // CHECK: [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16> 6279 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) 6280 // CHECK: ret <8 x i16> [[VSHLL_N]] 6281 uint16x8_t test_vshll_n_u8(uint8x8_t a) { 6282 return vshll_n_u8(a, 3); 6283 } 6284 6285 // CHECK-LABEL: @test_vshll_n_u16( 6286 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6287 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6288 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 6289 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) 6290 // CHECK: ret <4 x i32> [[VSHLL_N]] 6291 uint32x4_t test_vshll_n_u16(uint16x4_t a) { 6292 return vshll_n_u16(a, 9); 6293 } 6294 6295 // CHECK-LABEL: @test_vshll_n_u32( 6296 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6297 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6298 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 6299 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) 6300 // CHECK: ret <2 x i64> [[VSHLL_N]] 6301 uint64x2_t test_vshll_n_u32(uint32x2_t a) { 6302 return vshll_n_u32(a, 19); 6303 } 6304 6305 // CHECK-LABEL: @test_vshll_high_n_s8( 6306 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6307 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6308 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) 6309 // CHECK: ret <8 x i16> [[VSHLL_N]] 6310 int16x8_t test_vshll_high_n_s8(int8x16_t a) { 6311 return vshll_high_n_s8(a, 3); 6312 } 6313 6314 // CHECK-LABEL: @test_vshll_high_n_s16( 6315 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6316 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 6317 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6318 // CHECK: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> 6319 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) 6320 // CHECK: ret <4 x i32> [[VSHLL_N]] 6321 int32x4_t test_vshll_high_n_s16(int16x8_t a) { 6322 return vshll_high_n_s16(a, 9); 6323 } 6324 6325 // CHECK-LABEL: @test_vshll_high_n_s32( 6326 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6327 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 6328 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6329 // CHECK: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> 6330 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) 6331 // CHECK: ret <2 x i64> [[VSHLL_N]] 6332 int64x2_t test_vshll_high_n_s32(int32x4_t a) { 6333 return vshll_high_n_s32(a, 19); 6334 } 6335 6336 // CHECK-LABEL: @test_vshll_high_n_u8( 6337 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6338 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> 6339 // CHECK: [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 3) 6340 // CHECK: ret <8 x i16> [[VSHLL_N]] 6341 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) { 6342 return vshll_high_n_u8(a, 3); 6343 } 6344 6345 // CHECK-LABEL: @test_vshll_high_n_u16( 6346 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6347 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> 6348 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 6349 // CHECK: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> 6350 // CHECK: [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 9) 6351 // CHECK: ret <4 x i32> [[VSHLL_N]] 6352 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) { 6353 return vshll_high_n_u16(a, 9); 6354 } 6355 6356 // CHECK-LABEL: @test_vshll_high_n_u32( 6357 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6358 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> 6359 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6360 // CHECK: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> 6361 // CHECK: [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 19) 6362 // CHECK: ret <2 x i64> [[VSHLL_N]] 6363 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) { 6364 return vshll_high_n_u32(a, 19); 6365 } 6366 6367 // CHECK-LABEL: @test_vmovl_s8( 6368 // CHECK: [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16> 6369 // CHECK: ret <8 x i16> [[VMOVL_I]] 6370 int16x8_t test_vmovl_s8(int8x8_t a) { 6371 return vmovl_s8(a); 6372 } 6373 6374 // CHECK-LABEL: @test_vmovl_s16( 6375 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6376 // CHECK: [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32> 6377 // CHECK: ret <4 x i32> [[VMOVL_I]] 6378 int32x4_t test_vmovl_s16(int16x4_t a) { 6379 return vmovl_s16(a); 6380 } 6381 6382 // CHECK-LABEL: @test_vmovl_s32( 6383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6384 // CHECK: [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64> 6385 // CHECK: ret <2 x i64> [[VMOVL_I]] 6386 int64x2_t test_vmovl_s32(int32x2_t a) { 6387 return vmovl_s32(a); 6388 } 6389 6390 // CHECK-LABEL: @test_vmovl_u8( 6391 // CHECK: [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16> 6392 // CHECK: ret <8 x i16> [[VMOVL_I]] 6393 uint16x8_t test_vmovl_u8(uint8x8_t a) { 6394 return vmovl_u8(a); 6395 } 6396 6397 // CHECK-LABEL: @test_vmovl_u16( 6398 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6399 // CHECK: [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32> 6400 // CHECK: ret <4 x i32> [[VMOVL_I]] 6401 uint32x4_t test_vmovl_u16(uint16x4_t a) { 6402 return vmovl_u16(a); 6403 } 6404 6405 // CHECK-LABEL: @test_vmovl_u32( 6406 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6407 // CHECK: [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64> 6408 // CHECK: ret <2 x i64> [[VMOVL_I]] 6409 uint64x2_t test_vmovl_u32(uint32x2_t a) { 6410 return vmovl_u32(a); 6411 } 6412 6413 // CHECK-LABEL: @test_vmovl_high_s8( 6414 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6415 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 6416 // CHECK: ret <8 x i16> [[TMP0]] 6417 int16x8_t test_vmovl_high_s8(int8x16_t a) { 6418 return vmovl_high_s8(a); 6419 } 6420 6421 // CHECK-LABEL: @test_vmovl_high_s16( 6422 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6423 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 6424 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32> 6425 // CHECK: ret <4 x i32> [[TMP1]] 6426 int32x4_t test_vmovl_high_s16(int16x8_t a) { 6427 return vmovl_high_s16(a); 6428 } 6429 6430 // CHECK-LABEL: @test_vmovl_high_s32( 6431 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6432 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 6433 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64> 6434 // CHECK: ret <2 x i64> [[TMP1]] 6435 int64x2_t test_vmovl_high_s32(int32x4_t a) { 6436 return vmovl_high_s32(a); 6437 } 6438 6439 // CHECK-LABEL: @test_vmovl_high_u8( 6440 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6441 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I]] to <8 x i16> 6442 // CHECK: ret <8 x i16> [[TMP0]] 6443 uint16x8_t test_vmovl_high_u8(uint8x16_t a) { 6444 return vmovl_high_u8(a); 6445 } 6446 6447 // CHECK-LABEL: @test_vmovl_high_u16( 6448 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6449 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 6450 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I]] to <4 x i32> 6451 // CHECK: ret <4 x i32> [[TMP1]] 6452 uint32x4_t test_vmovl_high_u16(uint16x8_t a) { 6453 return vmovl_high_u16(a); 6454 } 6455 6456 // CHECK-LABEL: @test_vmovl_high_u32( 6457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6458 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 6459 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I]] to <2 x i64> 6460 // CHECK: ret <2 x i64> [[TMP1]] 6461 uint64x2_t test_vmovl_high_u32(uint32x4_t a) { 6462 return vmovl_high_u32(a); 6463 } 6464 6465 // CHECK-LABEL: @test_vcvt_n_f32_s32( 6466 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6467 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6468 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 6469 // CHECK: ret <2 x float> [[VCVT_N1]] 6470 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) { 6471 return vcvt_n_f32_s32(a, 31); 6472 } 6473 6474 // CHECK-LABEL: @test_vcvtq_n_f32_s32( 6475 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6476 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6477 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 6478 // CHECK: ret <4 x float> [[VCVT_N1]] 6479 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) { 6480 return vcvtq_n_f32_s32(a, 31); 6481 } 6482 6483 // CHECK-LABEL: @test_vcvtq_n_f64_s64( 6484 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6485 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6486 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxs2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 6487 // CHECK: ret <2 x double> [[VCVT_N1]] 6488 float64x2_t test_vcvtq_n_f64_s64(int64x2_t a) { 6489 return vcvtq_n_f64_s64(a, 50); 6490 } 6491 6492 // CHECK-LABEL: @test_vcvt_n_f32_u32( 6493 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6494 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 6495 // CHECK: [[VCVT_N1:%.*]] = call <2 x float> @llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 31) 6496 // CHECK: ret <2 x float> [[VCVT_N1]] 6497 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) { 6498 return vcvt_n_f32_u32(a, 31); 6499 } 6500 6501 // CHECK-LABEL: @test_vcvtq_n_f32_u32( 6502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 6503 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 6504 // CHECK: [[VCVT_N1:%.*]] = call <4 x float> @llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 31) 6505 // CHECK: ret <4 x float> [[VCVT_N1]] 6506 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) { 6507 return vcvtq_n_f32_u32(a, 31); 6508 } 6509 6510 // CHECK-LABEL: @test_vcvtq_n_f64_u64( 6511 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 6512 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 6513 // CHECK: [[VCVT_N1:%.*]] = call <2 x double> @llvm.aarch64.neon.vcvtfxu2fp.v2f64.v2i64(<2 x i64> [[VCVT_N]], i32 50) 6514 // CHECK: ret <2 x double> [[VCVT_N1]] 6515 float64x2_t test_vcvtq_n_f64_u64(uint64x2_t a) { 6516 return vcvtq_n_f64_u64(a, 50); 6517 } 6518 6519 // CHECK-LABEL: @test_vcvt_n_s32_f32( 6520 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 6521 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 6522 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 6523 // CHECK: ret <2 x i32> [[VCVT_N1]] 6524 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) { 6525 return vcvt_n_s32_f32(a, 31); 6526 } 6527 6528 // CHECK-LABEL: @test_vcvtq_n_s32_f32( 6529 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 6530 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 6531 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 6532 // CHECK: ret <4 x i32> [[VCVT_N1]] 6533 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) { 6534 return vcvtq_n_s32_f32(a, 31); 6535 } 6536 6537 // CHECK-LABEL: @test_vcvtq_n_s64_f64( 6538 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 6539 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 6540 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 6541 // CHECK: ret <2 x i64> [[VCVT_N1]] 6542 int64x2_t test_vcvtq_n_s64_f64(float64x2_t a) { 6543 return vcvtq_n_s64_f64(a, 50); 6544 } 6545 6546 // CHECK-LABEL: @test_vcvt_n_u32_f32( 6547 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 6548 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> 6549 // CHECK: [[VCVT_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 31) 6550 // CHECK: ret <2 x i32> [[VCVT_N1]] 6551 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) { 6552 return vcvt_n_u32_f32(a, 31); 6553 } 6554 6555 // CHECK-LABEL: @test_vcvtq_n_u32_f32( 6556 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 6557 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> 6558 // CHECK: [[VCVT_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 31) 6559 // CHECK: ret <4 x i32> [[VCVT_N1]] 6560 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) { 6561 return vcvtq_n_u32_f32(a, 31); 6562 } 6563 6564 // CHECK-LABEL: @test_vcvtq_n_u64_f64( 6565 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 6566 // CHECK: [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> 6567 // CHECK: [[VCVT_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v2i64.v2f64(<2 x double> [[VCVT_N]], i32 50) 6568 // CHECK: ret <2 x i64> [[VCVT_N1]] 6569 uint64x2_t test_vcvtq_n_u64_f64(float64x2_t a) { 6570 return vcvtq_n_u64_f64(a, 50); 6571 } 6572 6573 // CHECK-LABEL: @test_vaddl_s8( 6574 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 6575 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 6576 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6577 // CHECK: ret <8 x i16> [[ADD_I]] 6578 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) { 6579 return vaddl_s8(a, b); 6580 } 6581 6582 // CHECK-LABEL: @test_vaddl_s16( 6583 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6584 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> 6585 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6586 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> 6587 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6588 // CHECK: ret <4 x i32> [[ADD_I]] 6589 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) { 6590 return vaddl_s16(a, b); 6591 } 6592 6593 // CHECK-LABEL: @test_vaddl_s32( 6594 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6595 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> 6596 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6597 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> 6598 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6599 // CHECK: ret <2 x i64> [[ADD_I]] 6600 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) { 6601 return vaddl_s32(a, b); 6602 } 6603 6604 // CHECK-LABEL: @test_vaddl_u8( 6605 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 6606 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 6607 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6608 // CHECK: ret <8 x i16> [[ADD_I]] 6609 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) { 6610 return vaddl_u8(a, b); 6611 } 6612 6613 // CHECK-LABEL: @test_vaddl_u16( 6614 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6615 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> 6616 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6617 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> 6618 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6619 // CHECK: ret <4 x i32> [[ADD_I]] 6620 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) { 6621 return vaddl_u16(a, b); 6622 } 6623 6624 // CHECK-LABEL: @test_vaddl_u32( 6625 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6626 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> 6627 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6628 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> 6629 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6630 // CHECK: ret <2 x i64> [[ADD_I]] 6631 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) { 6632 return vaddl_u32(a, b); 6633 } 6634 6635 // CHECK-LABEL: @test_vaddl_high_s8( 6636 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6637 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6638 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6639 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 6640 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 6641 // CHECK: ret <8 x i16> [[ADD_I]] 6642 int16x8_t test_vaddl_high_s8(int8x16_t a, int8x16_t b) { 6643 return vaddl_high_s8(a, b); 6644 } 6645 6646 // CHECK-LABEL: @test_vaddl_high_s16( 6647 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6648 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6649 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6650 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6651 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 6652 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> 6653 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] 6654 // CHECK: ret <4 x i32> [[ADD_I]] 6655 int32x4_t test_vaddl_high_s16(int16x8_t a, int16x8_t b) { 6656 return vaddl_high_s16(a, b); 6657 } 6658 6659 // CHECK-LABEL: @test_vaddl_high_s32( 6660 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6661 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6662 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6663 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6664 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 6665 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> 6666 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 6667 // CHECK: ret <2 x i64> [[ADD_I]] 6668 int64x2_t test_vaddl_high_s32(int32x4_t a, int32x4_t b) { 6669 return vaddl_high_s32(a, b); 6670 } 6671 6672 // CHECK-LABEL: @test_vaddl_high_u8( 6673 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6674 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6675 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6676 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 6677 // CHECK: [[ADD_I:%.*]] = add <8 x i16> [[TMP0]], [[TMP1]] 6678 // CHECK: ret <8 x i16> [[ADD_I]] 6679 uint16x8_t test_vaddl_high_u8(uint8x16_t a, uint8x16_t b) { 6680 return vaddl_high_u8(a, b); 6681 } 6682 6683 // CHECK-LABEL: @test_vaddl_high_u16( 6684 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6685 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6686 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6687 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6688 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 6689 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> 6690 // CHECK: [[ADD_I:%.*]] = add <4 x i32> [[TMP1]], [[TMP3]] 6691 // CHECK: ret <4 x i32> [[ADD_I]] 6692 uint32x4_t test_vaddl_high_u16(uint16x8_t a, uint16x8_t b) { 6693 return vaddl_high_u16(a, b); 6694 } 6695 6696 // CHECK-LABEL: @test_vaddl_high_u32( 6697 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6698 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6699 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6700 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6701 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 6702 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> 6703 // CHECK: [[ADD_I:%.*]] = add <2 x i64> [[TMP1]], [[TMP3]] 6704 // CHECK: ret <2 x i64> [[ADD_I]] 6705 uint64x2_t test_vaddl_high_u32(uint32x4_t a, uint32x4_t b) { 6706 return vaddl_high_u32(a, b); 6707 } 6708 6709 // CHECK-LABEL: @test_vaddw_s8( 6710 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 6711 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 6712 // CHECK: ret <8 x i16> [[ADD_I]] 6713 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) { 6714 return vaddw_s8(a, b); 6715 } 6716 6717 // CHECK-LABEL: @test_vaddw_s16( 6718 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6719 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> 6720 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 6721 // CHECK: ret <4 x i32> [[ADD_I]] 6722 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) { 6723 return vaddw_s16(a, b); 6724 } 6725 6726 // CHECK-LABEL: @test_vaddw_s32( 6727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6728 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> 6729 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 6730 // CHECK: ret <2 x i64> [[ADD_I]] 6731 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) { 6732 return vaddw_s32(a, b); 6733 } 6734 6735 // CHECK-LABEL: @test_vaddw_u8( 6736 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 6737 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]] 6738 // CHECK: ret <8 x i16> [[ADD_I]] 6739 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) { 6740 return vaddw_u8(a, b); 6741 } 6742 6743 // CHECK-LABEL: @test_vaddw_u16( 6744 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6745 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> 6746 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]] 6747 // CHECK: ret <4 x i32> [[ADD_I]] 6748 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) { 6749 return vaddw_u16(a, b); 6750 } 6751 6752 // CHECK-LABEL: @test_vaddw_u32( 6753 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6754 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> 6755 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]] 6756 // CHECK: ret <2 x i64> [[ADD_I]] 6757 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) { 6758 return vaddw_u32(a, b); 6759 } 6760 6761 // CHECK-LABEL: @test_vaddw_high_s8( 6762 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6763 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6764 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 6765 // CHECK: ret <8 x i16> [[ADD_I]] 6766 int16x8_t test_vaddw_high_s8(int16x8_t a, int8x16_t b) { 6767 return vaddw_high_s8(a, b); 6768 } 6769 6770 // CHECK-LABEL: @test_vaddw_high_s16( 6771 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6772 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6773 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6774 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]] 6775 // CHECK: ret <4 x i32> [[ADD_I]] 6776 int32x4_t test_vaddw_high_s16(int32x4_t a, int16x8_t b) { 6777 return vaddw_high_s16(a, b); 6778 } 6779 6780 // CHECK-LABEL: @test_vaddw_high_s32( 6781 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6782 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6783 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6784 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]] 6785 // CHECK: ret <2 x i64> [[ADD_I]] 6786 int64x2_t test_vaddw_high_s32(int64x2_t a, int32x4_t b) { 6787 return vaddw_high_s32(a, b); 6788 } 6789 6790 // CHECK-LABEL: @test_vaddw_high_u8( 6791 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6792 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6793 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[TMP0]] 6794 // CHECK: ret <8 x i16> [[ADD_I]] 6795 uint16x8_t test_vaddw_high_u8(uint16x8_t a, uint8x16_t b) { 6796 return vaddw_high_u8(a, b); 6797 } 6798 6799 // CHECK-LABEL: @test_vaddw_high_u16( 6800 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6801 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6802 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6803 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[TMP1]] 6804 // CHECK: ret <4 x i32> [[ADD_I]] 6805 uint32x4_t test_vaddw_high_u16(uint32x4_t a, uint16x8_t b) { 6806 return vaddw_high_u16(a, b); 6807 } 6808 6809 // CHECK-LABEL: @test_vaddw_high_u32( 6810 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6812 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6813 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[TMP1]] 6814 // CHECK: ret <2 x i64> [[ADD_I]] 6815 uint64x2_t test_vaddw_high_u32(uint64x2_t a, uint32x4_t b) { 6816 return vaddw_high_u32(a, b); 6817 } 6818 6819 // CHECK-LABEL: @test_vsubl_s8( 6820 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16> 6821 // CHECK: [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16> 6822 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6823 // CHECK: ret <8 x i16> [[SUB_I]] 6824 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) { 6825 return vsubl_s8(a, b); 6826 } 6827 6828 // CHECK-LABEL: @test_vsubl_s16( 6829 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6830 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32> 6831 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6832 // CHECK: [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32> 6833 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6834 // CHECK: ret <4 x i32> [[SUB_I]] 6835 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) { 6836 return vsubl_s16(a, b); 6837 } 6838 6839 // CHECK-LABEL: @test_vsubl_s32( 6840 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6841 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64> 6842 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6843 // CHECK: [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64> 6844 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6845 // CHECK: ret <2 x i64> [[SUB_I]] 6846 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) { 6847 return vsubl_s32(a, b); 6848 } 6849 6850 // CHECK-LABEL: @test_vsubl_u8( 6851 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16> 6852 // CHECK: [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16> 6853 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6854 // CHECK: ret <8 x i16> [[SUB_I]] 6855 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) { 6856 return vsubl_u8(a, b); 6857 } 6858 6859 // CHECK-LABEL: @test_vsubl_u16( 6860 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 6861 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32> 6862 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6863 // CHECK: [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32> 6864 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6865 // CHECK: ret <4 x i32> [[SUB_I]] 6866 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) { 6867 return vsubl_u16(a, b); 6868 } 6869 6870 // CHECK-LABEL: @test_vsubl_u32( 6871 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 6872 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64> 6873 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6874 // CHECK: [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64> 6875 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]] 6876 // CHECK: ret <2 x i64> [[SUB_I]] 6877 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) { 6878 return vsubl_u32(a, b); 6879 } 6880 6881 // CHECK-LABEL: @test_vsubl_high_s8( 6882 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6883 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6884 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6885 // CHECK: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 6886 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 6887 // CHECK: ret <8 x i16> [[SUB_I]] 6888 int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { 6889 return vsubl_high_s8(a, b); 6890 } 6891 6892 // CHECK-LABEL: @test_vsubl_high_s16( 6893 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6895 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6896 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6897 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 6898 // CHECK: [[TMP3:%.*]] = sext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> 6899 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] 6900 // CHECK: ret <4 x i32> [[SUB_I]] 6901 int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { 6902 return vsubl_high_s16(a, b); 6903 } 6904 6905 // CHECK-LABEL: @test_vsubl_high_s32( 6906 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6907 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6908 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6909 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6910 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 6911 // CHECK: [[TMP3:%.*]] = sext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> 6912 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] 6913 // CHECK: ret <2 x i64> [[SUB_I]] 6914 int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { 6915 return vsubl_high_s32(a, b); 6916 } 6917 6918 // CHECK-LABEL: @test_vsubl_high_u8( 6919 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6920 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 6921 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 6922 // CHECK: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I10_I]] to <8 x i16> 6923 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] 6924 // CHECK: ret <8 x i16> [[SUB_I]] 6925 uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { 6926 return vsubl_high_u8(a, b); 6927 } 6928 6929 // CHECK-LABEL: @test_vsubl_high_u16( 6930 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6931 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 6932 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 6933 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 6934 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I10_I]] to <8 x i8> 6935 // CHECK: [[TMP3:%.*]] = zext <4 x i16> [[SHUFFLE_I_I10_I]] to <4 x i32> 6936 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> [[TMP1]], [[TMP3]] 6937 // CHECK: ret <4 x i32> [[SUB_I]] 6938 uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { 6939 return vsubl_high_u16(a, b); 6940 } 6941 6942 // CHECK-LABEL: @test_vsubl_high_u32( 6943 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 6944 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 6945 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 6946 // CHECK: [[SHUFFLE_I_I10_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 6947 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I10_I]] to <8 x i8> 6948 // CHECK: [[TMP3:%.*]] = zext <2 x i32> [[SHUFFLE_I_I10_I]] to <2 x i64> 6949 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> [[TMP1]], [[TMP3]] 6950 // CHECK: ret <2 x i64> [[SUB_I]] 6951 uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { 6952 return vsubl_high_u32(a, b); 6953 } 6954 6955 // CHECK-LABEL: @test_vsubw_s8( 6956 // CHECK: [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16> 6957 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 6958 // CHECK: ret <8 x i16> [[SUB_I]] 6959 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) { 6960 return vsubw_s8(a, b); 6961 } 6962 6963 // CHECK-LABEL: @test_vsubw_s16( 6964 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6965 // CHECK: [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32> 6966 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 6967 // CHECK: ret <4 x i32> [[SUB_I]] 6968 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) { 6969 return vsubw_s16(a, b); 6970 } 6971 6972 // CHECK-LABEL: @test_vsubw_s32( 6973 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 6974 // CHECK: [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64> 6975 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 6976 // CHECK: ret <2 x i64> [[SUB_I]] 6977 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) { 6978 return vsubw_s32(a, b); 6979 } 6980 6981 // CHECK-LABEL: @test_vsubw_u8( 6982 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16> 6983 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]] 6984 // CHECK: ret <8 x i16> [[SUB_I]] 6985 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) { 6986 return vsubw_u8(a, b); 6987 } 6988 6989 // CHECK-LABEL: @test_vsubw_u16( 6990 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 6991 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32> 6992 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]] 6993 // CHECK: ret <4 x i32> [[SUB_I]] 6994 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) { 6995 return vsubw_u16(a, b); 6996 } 6997 6998 // CHECK-LABEL: @test_vsubw_u32( 6999 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7000 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64> 7001 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]] 7002 // CHECK: ret <2 x i64> [[SUB_I]] 7003 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { 7004 return vsubw_u32(a, b); 7005 } 7006 7007 // CHECK-LABEL: @test_vsubw_high_s8( 7008 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7009 // CHECK: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7010 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7011 // CHECK: ret <8 x i16> [[SUB_I]] 7012 int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { 7013 return vsubw_high_s8(a, b); 7014 } 7015 7016 // CHECK-LABEL: @test_vsubw_high_s16( 7017 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7018 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7019 // CHECK: [[TMP1:%.*]] = sext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 7020 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]] 7021 // CHECK: ret <4 x i32> [[SUB_I]] 7022 int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { 7023 return vsubw_high_s16(a, b); 7024 } 7025 7026 // CHECK-LABEL: @test_vsubw_high_s32( 7027 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7028 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7029 // CHECK: [[TMP1:%.*]] = sext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 7030 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]] 7031 // CHECK: ret <2 x i64> [[SUB_I]] 7032 int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { 7033 return vsubw_high_s32(a, b); 7034 } 7035 7036 // CHECK-LABEL: @test_vsubw_high_u8( 7037 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7038 // CHECK: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> 7039 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[TMP0]] 7040 // CHECK: ret <8 x i16> [[SUB_I]] 7041 uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { 7042 return vsubw_high_u8(a, b); 7043 } 7044 7045 // CHECK-LABEL: @test_vsubw_high_u16( 7046 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7047 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> 7048 // CHECK: [[TMP1:%.*]] = zext <4 x i16> [[SHUFFLE_I_I_I]] to <4 x i32> 7049 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[TMP1]] 7050 // CHECK: ret <4 x i32> [[SUB_I]] 7051 uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { 7052 return vsubw_high_u16(a, b); 7053 } 7054 7055 // CHECK-LABEL: @test_vsubw_high_u32( 7056 // CHECK: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7057 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> 7058 // CHECK: [[TMP1:%.*]] = zext <2 x i32> [[SHUFFLE_I_I_I]] to <2 x i64> 7059 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[TMP1]] 7060 // CHECK: ret <2 x i64> [[SUB_I]] 7061 uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { 7062 return vsubw_high_u32(a, b); 7063 } 7064 7065 // CHECK-LABEL: @test_vaddhn_s16( 7066 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7067 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7068 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b 7069 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) 7070 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7071 // CHECK: ret <8 x i8> [[VADDHN2_I]] 7072 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) { 7073 return vaddhn_s16(a, b); 7074 } 7075 7076 // CHECK-LABEL: @test_vaddhn_s32( 7077 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7078 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7079 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b 7080 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) 7081 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7082 // CHECK: ret <4 x i16> [[VADDHN2_I]] 7083 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) { 7084 return vaddhn_s32(a, b); 7085 } 7086 7087 // CHECK-LABEL: @test_vaddhn_s64( 7088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7090 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b 7091 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) 7092 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7093 // CHECK: ret <2 x i32> [[VADDHN2_I]] 7094 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) { 7095 return vaddhn_s64(a, b); 7096 } 7097 7098 // CHECK-LABEL: @test_vaddhn_u16( 7099 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7100 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7101 // CHECK: [[VADDHN_I:%.*]] = add <8 x i16> %a, %b 7102 // CHECK: [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], splat (i16 8) 7103 // CHECK: [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8> 7104 // CHECK: ret <8 x i8> [[VADDHN2_I]] 7105 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) { 7106 return vaddhn_u16(a, b); 7107 } 7108 7109 // CHECK-LABEL: @test_vaddhn_u32( 7110 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7111 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7112 // CHECK: [[VADDHN_I:%.*]] = add <4 x i32> %a, %b 7113 // CHECK: [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], splat (i32 16) 7114 // CHECK: [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16> 7115 // CHECK: ret <4 x i16> [[VADDHN2_I]] 7116 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) { 7117 return vaddhn_u32(a, b); 7118 } 7119 7120 // CHECK-LABEL: @test_vaddhn_u64( 7121 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7122 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7123 // CHECK: [[VADDHN_I:%.*]] = add <2 x i64> %a, %b 7124 // CHECK: [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], splat (i64 32) 7125 // CHECK: [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32> 7126 // CHECK: ret <2 x i32> [[VADDHN2_I]] 7127 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) { 7128 return vaddhn_u64(a, b); 7129 } 7130 7131 // CHECK-LABEL: @test_vaddhn_high_s16( 7132 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7133 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7134 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b 7135 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) 7136 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7137 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7138 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7139 int8x16_t test_vaddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7140 return vaddhn_high_s16(r, a, b); 7141 } 7142 7143 // CHECK-LABEL: @test_vaddhn_high_s32( 7144 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7145 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7146 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b 7147 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) 7148 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7149 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7150 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7151 int16x8_t test_vaddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7152 return vaddhn_high_s32(r, a, b); 7153 } 7154 7155 // CHECK-LABEL: @test_vaddhn_high_s64( 7156 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7157 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7158 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b 7159 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) 7160 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7161 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7162 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7163 int32x4_t test_vaddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7164 return vaddhn_high_s64(r, a, b); 7165 } 7166 7167 // CHECK-LABEL: @test_vaddhn_high_u16( 7168 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7169 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7170 // CHECK: [[VADDHN_I_I:%.*]] = add <8 x i16> %a, %b 7171 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <8 x i16> [[VADDHN_I_I]], splat (i16 8) 7172 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <8 x i16> [[VADDHN1_I_I]] to <8 x i8> 7173 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VADDHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7174 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7175 uint8x16_t test_vaddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7176 return vaddhn_high_u16(r, a, b); 7177 } 7178 7179 // CHECK-LABEL: @test_vaddhn_high_u32( 7180 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7181 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7182 // CHECK: [[VADDHN_I_I:%.*]] = add <4 x i32> %a, %b 7183 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <4 x i32> [[VADDHN_I_I]], splat (i32 16) 7184 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <4 x i32> [[VADDHN1_I_I]] to <4 x i16> 7185 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VADDHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7186 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7187 uint16x8_t test_vaddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7188 return vaddhn_high_u32(r, a, b); 7189 } 7190 7191 // CHECK-LABEL: @test_vaddhn_high_u64( 7192 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7193 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7194 // CHECK: [[VADDHN_I_I:%.*]] = add <2 x i64> %a, %b 7195 // CHECK: [[VADDHN1_I_I:%.*]] = lshr <2 x i64> [[VADDHN_I_I]], splat (i64 32) 7196 // CHECK: [[VADDHN2_I_I:%.*]] = trunc <2 x i64> [[VADDHN1_I_I]] to <2 x i32> 7197 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VADDHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7198 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7199 uint32x4_t test_vaddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7200 return vaddhn_high_u64(r, a, b); 7201 } 7202 7203 // CHECK-LABEL: @test_vraddhn_s16( 7204 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7205 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7206 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7207 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7208 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) { 7209 return vraddhn_s16(a, b); 7210 } 7211 7212 // CHECK-LABEL: @test_vraddhn_s32( 7213 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7214 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7215 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7216 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7217 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] 7218 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) { 7219 return vraddhn_s32(a, b); 7220 } 7221 7222 // CHECK-LABEL: @test_vraddhn_s64( 7223 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7224 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7225 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7226 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 7227 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] 7228 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) { 7229 return vraddhn_s64(a, b); 7230 } 7231 7232 // CHECK-LABEL: @test_vraddhn_u16( 7233 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7234 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7235 // CHECK: [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7236 // CHECK: ret <8 x i8> [[VRADDHN_V2_I]] 7237 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) { 7238 return vraddhn_u16(a, b); 7239 } 7240 7241 // CHECK-LABEL: @test_vraddhn_u32( 7242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7244 // CHECK: [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7245 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8> 7246 // CHECK: ret <4 x i16> [[VRADDHN_V2_I]] 7247 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) { 7248 return vraddhn_u32(a, b); 7249 } 7250 7251 // CHECK-LABEL: @test_vraddhn_u64( 7252 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7253 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7254 // CHECK: [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7255 // CHECK: [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8> 7256 // CHECK: ret <2 x i32> [[VRADDHN_V2_I]] 7257 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) { 7258 return vraddhn_u64(a, b); 7259 } 7260 7261 // CHECK-LABEL: @test_vraddhn_high_s16( 7262 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7263 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7264 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7265 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7266 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7267 int8x16_t test_vraddhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7268 return vraddhn_high_s16(r, a, b); 7269 } 7270 7271 // CHECK-LABEL: @test_vraddhn_high_s32( 7272 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7273 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7274 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7275 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 7276 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7277 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7278 int16x8_t test_vraddhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7279 return vraddhn_high_s32(r, a, b); 7280 } 7281 7282 // CHECK-LABEL: @test_vraddhn_high_s64( 7283 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7284 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7285 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7286 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 7287 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7288 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7289 int32x4_t test_vraddhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7290 return vraddhn_high_s64(r, a, b); 7291 } 7292 7293 // CHECK-LABEL: @test_vraddhn_high_u16( 7294 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7295 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7296 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7297 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRADDHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7298 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7299 uint8x16_t test_vraddhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7300 return vraddhn_high_u16(r, a, b); 7301 } 7302 7303 // CHECK-LABEL: @test_vraddhn_high_u32( 7304 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7305 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7306 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7307 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I_I]] to <8 x i8> 7308 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRADDHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7309 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7310 uint16x8_t test_vraddhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7311 return vraddhn_high_u32(r, a, b); 7312 } 7313 7314 // CHECK-LABEL: @test_vraddhn_high_u64( 7315 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7316 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7317 // CHECK: [[VRADDHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7318 // CHECK: [[VRADDHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I_I]] to <8 x i8> 7319 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRADDHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7320 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7321 uint32x4_t test_vraddhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7322 return vraddhn_high_u64(r, a, b); 7323 } 7324 7325 // CHECK-LABEL: @test_vsubhn_s16( 7326 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7327 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7328 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b 7329 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) 7330 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 7331 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 7332 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) { 7333 return vsubhn_s16(a, b); 7334 } 7335 7336 // CHECK-LABEL: @test_vsubhn_s32( 7337 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7338 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7339 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b 7340 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) 7341 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 7342 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 7343 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) { 7344 return vsubhn_s32(a, b); 7345 } 7346 7347 // CHECK-LABEL: @test_vsubhn_s64( 7348 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7349 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7350 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b 7351 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) 7352 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 7353 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 7354 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) { 7355 return vsubhn_s64(a, b); 7356 } 7357 7358 // CHECK-LABEL: @test_vsubhn_u16( 7359 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7360 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7361 // CHECK: [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b 7362 // CHECK: [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], splat (i16 8) 7363 // CHECK: [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8> 7364 // CHECK: ret <8 x i8> [[VSUBHN2_I]] 7365 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) { 7366 return vsubhn_u16(a, b); 7367 } 7368 7369 // CHECK-LABEL: @test_vsubhn_u32( 7370 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7371 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7372 // CHECK: [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b 7373 // CHECK: [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], splat (i32 16) 7374 // CHECK: [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16> 7375 // CHECK: ret <4 x i16> [[VSUBHN2_I]] 7376 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) { 7377 return vsubhn_u32(a, b); 7378 } 7379 7380 // CHECK-LABEL: @test_vsubhn_u64( 7381 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7382 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7383 // CHECK: [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b 7384 // CHECK: [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], splat (i64 32) 7385 // CHECK: [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32> 7386 // CHECK: ret <2 x i32> [[VSUBHN2_I]] 7387 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) { 7388 return vsubhn_u64(a, b); 7389 } 7390 7391 // CHECK-LABEL: @test_vsubhn_high_s16( 7392 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7393 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7394 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b 7395 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) 7396 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 7397 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7398 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7399 int8x16_t test_vsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7400 return vsubhn_high_s16(r, a, b); 7401 } 7402 7403 // CHECK-LABEL: @test_vsubhn_high_s32( 7404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7405 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7406 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b 7407 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) 7408 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 7409 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7410 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7411 int16x8_t test_vsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7412 return vsubhn_high_s32(r, a, b); 7413 } 7414 7415 // CHECK-LABEL: @test_vsubhn_high_s64( 7416 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7417 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7418 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b 7419 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) 7420 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 7421 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7422 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7423 int32x4_t test_vsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7424 return vsubhn_high_s64(r, a, b); 7425 } 7426 7427 // CHECK-LABEL: @test_vsubhn_high_u16( 7428 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7429 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7430 // CHECK: [[VSUBHN_I_I:%.*]] = sub <8 x i16> %a, %b 7431 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <8 x i16> [[VSUBHN_I_I]], splat (i16 8) 7432 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I_I]] to <8 x i8> 7433 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VSUBHN2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7434 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7435 uint8x16_t test_vsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7436 return vsubhn_high_u16(r, a, b); 7437 } 7438 7439 // CHECK-LABEL: @test_vsubhn_high_u32( 7440 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7441 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7442 // CHECK: [[VSUBHN_I_I:%.*]] = sub <4 x i32> %a, %b 7443 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <4 x i32> [[VSUBHN_I_I]], splat (i32 16) 7444 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I_I]] to <4 x i16> 7445 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VSUBHN2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7446 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7447 uint16x8_t test_vsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7448 return vsubhn_high_u32(r, a, b); 7449 } 7450 7451 // CHECK-LABEL: @test_vsubhn_high_u64( 7452 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7453 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7454 // CHECK: [[VSUBHN_I_I:%.*]] = sub <2 x i64> %a, %b 7455 // CHECK: [[VSUBHN1_I_I:%.*]] = lshr <2 x i64> [[VSUBHN_I_I]], splat (i64 32) 7456 // CHECK: [[VSUBHN2_I_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I_I]] to <2 x i32> 7457 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VSUBHN2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7458 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7459 uint32x4_t test_vsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7460 return vsubhn_high_u64(r, a, b); 7461 } 7462 7463 // CHECK-LABEL: @test_vrsubhn_s16( 7464 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7465 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7466 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7467 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 7468 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) { 7469 return vrsubhn_s16(a, b); 7470 } 7471 7472 // CHECK-LABEL: @test_vrsubhn_s32( 7473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7474 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7475 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7476 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 7477 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] 7478 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) { 7479 return vrsubhn_s32(a, b); 7480 } 7481 7482 // CHECK-LABEL: @test_vrsubhn_s64( 7483 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7484 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7485 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7486 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 7487 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] 7488 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) { 7489 return vrsubhn_s64(a, b); 7490 } 7491 7492 // CHECK-LABEL: @test_vrsubhn_u16( 7493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7494 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7495 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7496 // CHECK: ret <8 x i8> [[VRSUBHN_V2_I]] 7497 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) { 7498 return vrsubhn_u16(a, b); 7499 } 7500 7501 // CHECK-LABEL: @test_vrsubhn_u32( 7502 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7503 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7504 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7505 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8> 7506 // CHECK: ret <4 x i16> [[VRSUBHN_V2_I]] 7507 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) { 7508 return vrsubhn_u32(a, b); 7509 } 7510 7511 // CHECK-LABEL: @test_vrsubhn_u64( 7512 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7513 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7514 // CHECK: [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7515 // CHECK: [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8> 7516 // CHECK: ret <2 x i32> [[VRSUBHN_V2_I]] 7517 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) { 7518 return vrsubhn_u64(a, b); 7519 } 7520 7521 // CHECK-LABEL: @test_vrsubhn_high_s16( 7522 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7524 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7525 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7526 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7527 int8x16_t test_vrsubhn_high_s16(int8x8_t r, int16x8_t a, int16x8_t b) { 7528 return vrsubhn_high_s16(r, a, b); 7529 } 7530 7531 // CHECK-LABEL: @test_vrsubhn_high_s32( 7532 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7533 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7534 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7535 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 7536 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7537 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7538 int16x8_t test_vrsubhn_high_s32(int16x4_t r, int32x4_t a, int32x4_t b) { 7539 return vrsubhn_high_s32(r, a, b); 7540 } 7541 7542 // CHECK-LABEL: @test_vrsubhn_high_s64( 7543 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7544 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7545 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7546 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 7547 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7548 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7549 int32x4_t test_vrsubhn_high_s64(int32x2_t r, int64x2_t a, int64x2_t b) { 7550 return vrsubhn_high_s64(r, a, b); 7551 } 7552 7553 // CHECK-LABEL: @test_vrsubhn_high_u16( 7554 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 7555 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 7556 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b) 7557 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i8> %r, <8 x i8> [[VRSUBHN_V2_I_I]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7558 // CHECK: ret <16 x i8> [[SHUFFLE_I_I]] 7559 uint8x16_t test_vrsubhn_high_u16(uint8x8_t r, uint16x8_t a, uint16x8_t b) { 7560 return vrsubhn_high_u16(r, a, b); 7561 } 7562 7563 // CHECK-LABEL: @test_vrsubhn_high_u32( 7564 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 7565 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 7566 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b) 7567 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I_I]] to <8 x i8> 7568 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i16> %r, <4 x i16> [[VRSUBHN_V2_I_I]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 7569 // CHECK: ret <8 x i16> [[SHUFFLE_I_I]] 7570 uint16x8_t test_vrsubhn_high_u32(uint16x4_t r, uint32x4_t a, uint32x4_t b) { 7571 return vrsubhn_high_u32(r, a, b); 7572 } 7573 7574 // CHECK-LABEL: @test_vrsubhn_high_u64( 7575 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 7576 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 7577 // CHECK: [[VRSUBHN_V2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b) 7578 // CHECK: [[VRSUBHN_V3_I_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I_I]] to <8 x i8> 7579 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i32> %r, <2 x i32> [[VRSUBHN_V2_I_I]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 7580 // CHECK: ret <4 x i32> [[SHUFFLE_I_I]] 7581 uint32x4_t test_vrsubhn_high_u64(uint32x2_t r, uint64x2_t a, uint64x2_t b) { 7582 return vrsubhn_high_u64(r, a, b); 7583 } 7584 7585 // CHECK-LABEL: @test_vabdl_s8( 7586 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %a, <8 x i8> %b) 7587 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 7588 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 7589 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) { 7590 return vabdl_s8(a, b); 7591 } 7592 7593 // CHECK-LABEL: @test_vabdl_s16( 7594 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7595 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7596 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %a, <4 x i16> %b) 7597 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 7598 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> 7599 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 7600 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) { 7601 return vabdl_s16(a, b); 7602 } 7603 7604 // CHECK-LABEL: @test_vabdl_s32( 7605 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7606 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7607 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %a, <2 x i32> %b) 7608 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 7609 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> 7610 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 7611 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) { 7612 return vabdl_s32(a, b); 7613 } 7614 7615 // CHECK-LABEL: @test_vabdl_u8( 7616 // CHECK: [[VABD_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %a, <8 x i8> %b) 7617 // CHECK: [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_I_I]] to <8 x i16> 7618 // CHECK: ret <8 x i16> [[VMOVL_I_I]] 7619 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) { 7620 return vabdl_u8(a, b); 7621 } 7622 7623 // CHECK-LABEL: @test_vabdl_u16( 7624 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7625 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7626 // CHECK: [[VABD2_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %a, <4 x i16> %b) 7627 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I]] to <8 x i8> 7628 // CHECK: [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I]] to <4 x i32> 7629 // CHECK: ret <4 x i32> [[VMOVL_I_I]] 7630 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) { 7631 return vabdl_u16(a, b); 7632 } 7633 7634 // CHECK-LABEL: @test_vabdl_u32( 7635 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7636 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7637 // CHECK: [[VABD2_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %a, <2 x i32> %b) 7638 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I]] to <8 x i8> 7639 // CHECK: [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I]] to <2 x i64> 7640 // CHECK: ret <2 x i64> [[VMOVL_I_I]] 7641 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) { 7642 return vabdl_u32(a, b); 7643 } 7644 7645 // CHECK-LABEL: @test_vabal_s8( 7646 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %b, <8 x i8> %c) 7647 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 7648 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 7649 // CHECK: ret <8 x i16> [[ADD_I]] 7650 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 7651 return vabal_s8(a, b, c); 7652 } 7653 7654 // CHECK-LABEL: @test_vabal_s16( 7655 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7656 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 7657 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %b, <4 x i16> %c) 7658 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 7659 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> 7660 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 7661 // CHECK: ret <4 x i32> [[ADD_I]] 7662 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 7663 return vabal_s16(a, b, c); 7664 } 7665 7666 // CHECK-LABEL: @test_vabal_s32( 7667 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7668 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 7669 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %b, <2 x i32> %c) 7670 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 7671 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> 7672 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 7673 // CHECK: ret <2 x i64> [[ADD_I]] 7674 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 7675 return vabal_s32(a, b, c); 7676 } 7677 7678 // CHECK-LABEL: @test_vabal_u8( 7679 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %b, <8 x i8> %c) 7680 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 7681 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]] 7682 // CHECK: ret <8 x i16> [[ADD_I]] 7683 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 7684 return vabal_u8(a, b, c); 7685 } 7686 7687 // CHECK-LABEL: @test_vabal_u16( 7688 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7689 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 7690 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %b, <4 x i16> %c) 7691 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 7692 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> 7693 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]] 7694 // CHECK: ret <4 x i32> [[ADD_I]] 7695 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 7696 return vabal_u16(a, b, c); 7697 } 7698 7699 // CHECK-LABEL: @test_vabal_u32( 7700 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7701 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 7702 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %b, <2 x i32> %c) 7703 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 7704 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> 7705 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]] 7706 // CHECK: ret <2 x i64> [[ADD_I]] 7707 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 7708 return vabal_u32(a, b, c); 7709 } 7710 7711 // CHECK-LABEL: @test_vabdl_high_s8( 7712 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7713 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7714 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7715 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 7716 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 7717 int16x8_t test_vabdl_high_s8(int8x16_t a, int8x16_t b) { 7718 return vabdl_high_s8(a, b); 7719 } 7720 7721 // CHECK-LABEL: @test_vabdl_high_s16( 7722 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7723 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7724 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7725 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7726 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7727 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 7728 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> 7729 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 7730 int32x4_t test_vabdl_high_s16(int16x8_t a, int16x8_t b) { 7731 return vabdl_high_s16(a, b); 7732 } 7733 7734 // CHECK-LABEL: @test_vabdl_high_s32( 7735 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7736 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7737 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7738 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7739 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7740 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 7741 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> 7742 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 7743 int64x2_t test_vabdl_high_s32(int32x4_t a, int32x4_t b) { 7744 return vabdl_high_s32(a, b); 7745 } 7746 7747 // CHECK-LABEL: @test_vabdl_high_u8( 7748 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7749 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7750 // CHECK: [[VABD_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7751 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I]] to <8 x i16> 7752 // CHECK: ret <8 x i16> [[VMOVL_I_I_I]] 7753 uint16x8_t test_vabdl_high_u8(uint8x16_t a, uint8x16_t b) { 7754 return vabdl_high_u8(a, b); 7755 } 7756 7757 // CHECK-LABEL: @test_vabdl_high_u16( 7758 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7759 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7760 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7761 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7762 // CHECK: [[VABD2_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7763 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I]] to <8 x i8> 7764 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I]] to <4 x i32> 7765 // CHECK: ret <4 x i32> [[VMOVL_I_I_I]] 7766 uint32x4_t test_vabdl_high_u16(uint16x8_t a, uint16x8_t b) { 7767 return vabdl_high_u16(a, b); 7768 } 7769 7770 // CHECK-LABEL: @test_vabdl_high_u32( 7771 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7772 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7773 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7774 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7775 // CHECK: [[VABD2_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7776 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I]] to <8 x i8> 7777 // CHECK: [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I]] to <2 x i64> 7778 // CHECK: ret <2 x i64> [[VMOVL_I_I_I]] 7779 uint64x2_t test_vabdl_high_u32(uint32x4_t a, uint32x4_t b) { 7780 return vabdl_high_u32(a, b); 7781 } 7782 7783 // CHECK-LABEL: @test_vabal_high_s8( 7784 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7785 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7786 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7787 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 7788 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 7789 // CHECK: ret <8 x i16> [[ADD_I_I]] 7790 int16x8_t test_vabal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 7791 return vabal_high_s8(a, b, c); 7792 } 7793 7794 // CHECK-LABEL: @test_vabal_high_s16( 7795 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7796 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7797 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7798 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7799 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7800 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 7801 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> 7802 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 7803 // CHECK: ret <4 x i32> [[ADD_I_I]] 7804 int32x4_t test_vabal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 7805 return vabal_high_s16(a, b, c); 7806 } 7807 7808 // CHECK-LABEL: @test_vabal_high_s32( 7809 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7810 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 7811 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7812 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7813 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7814 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 7815 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> 7816 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 7817 // CHECK: ret <2 x i64> [[ADD_I_I]] 7818 int64x2_t test_vabal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 7819 return vabal_high_s32(a, b, c); 7820 } 7821 7822 // CHECK-LABEL: @test_vabal_high_u8( 7823 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7824 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7825 // CHECK: [[VABD_I_I_I_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7826 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <8 x i8> [[VABD_I_I_I_I]] to <8 x i16> 7827 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I_I]] 7828 // CHECK: ret <8 x i16> [[ADD_I_I]] 7829 uint16x8_t test_vabal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 7830 return vabal_high_u8(a, b, c); 7831 } 7832 7833 // CHECK-LABEL: @test_vabal_high_u16( 7834 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7835 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7837 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7838 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7839 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[VABD2_I_I_I_I]] to <8 x i8> 7840 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <4 x i16> [[VABD2_I_I_I_I]] to <4 x i32> 7841 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I_I]] 7842 // CHECK: ret <4 x i32> [[ADD_I_I]] 7843 uint32x4_t test_vabal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 7844 return vabal_high_u16(a, b, c); 7845 } 7846 7847 // CHECK-LABEL: @test_vabal_high_u32( 7848 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7849 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 7850 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7851 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7852 // CHECK: [[VABD2_I_I_I_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7853 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[VABD2_I_I_I_I]] to <8 x i8> 7854 // CHECK: [[VMOVL_I_I_I_I:%.*]] = zext <2 x i32> [[VABD2_I_I_I_I]] to <2 x i64> 7855 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I_I]] 7856 // CHECK: ret <2 x i64> [[ADD_I_I]] 7857 uint64x2_t test_vabal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 7858 return vabal_high_u32(a, b, c); 7859 } 7860 7861 // CHECK-LABEL: @test_vmull_s8( 7862 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %a, <8 x i8> %b) 7863 // CHECK: ret <8 x i16> [[VMULL_I]] 7864 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) { 7865 return vmull_s8(a, b); 7866 } 7867 7868 // CHECK-LABEL: @test_vmull_s16( 7869 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7870 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7871 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %a, <4 x i16> %b) 7872 // CHECK: ret <4 x i32> [[VMULL2_I]] 7873 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) { 7874 return vmull_s16(a, b); 7875 } 7876 7877 // CHECK-LABEL: @test_vmull_s32( 7878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7879 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7880 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %a, <2 x i32> %b) 7881 // CHECK: ret <2 x i64> [[VMULL2_I]] 7882 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) { 7883 return vmull_s32(a, b); 7884 } 7885 7886 // CHECK-LABEL: @test_vmull_u8( 7887 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %a, <8 x i8> %b) 7888 // CHECK: ret <8 x i16> [[VMULL_I]] 7889 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) { 7890 return vmull_u8(a, b); 7891 } 7892 7893 // CHECK-LABEL: @test_vmull_u16( 7894 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 7895 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7896 // CHECK: [[VMULL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %a, <4 x i16> %b) 7897 // CHECK: ret <4 x i32> [[VMULL2_I]] 7898 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) { 7899 return vmull_u16(a, b); 7900 } 7901 7902 // CHECK-LABEL: @test_vmull_u32( 7903 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 7904 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7905 // CHECK: [[VMULL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %a, <2 x i32> %b) 7906 // CHECK: ret <2 x i64> [[VMULL2_I]] 7907 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) { 7908 return vmull_u32(a, b); 7909 } 7910 7911 // CHECK-LABEL: @test_vmull_high_s8( 7912 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7913 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7914 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7915 // CHECK: ret <8 x i16> [[VMULL_I_I]] 7916 int16x8_t test_vmull_high_s8(int8x16_t a, int8x16_t b) { 7917 return vmull_high_s8(a, b); 7918 } 7919 7920 // CHECK-LABEL: @test_vmull_high_s16( 7921 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7922 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7923 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7924 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7925 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7926 // CHECK: ret <4 x i32> [[VMULL2_I_I]] 7927 int32x4_t test_vmull_high_s16(int16x8_t a, int16x8_t b) { 7928 return vmull_high_s16(a, b); 7929 } 7930 7931 // CHECK-LABEL: @test_vmull_high_s32( 7932 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7933 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7934 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7935 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7936 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7937 // CHECK: ret <2 x i64> [[VMULL2_I_I]] 7938 int64x2_t test_vmull_high_s32(int32x4_t a, int32x4_t b) { 7939 return vmull_high_s32(a, b); 7940 } 7941 7942 // CHECK-LABEL: @test_vmull_high_u8( 7943 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7944 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 7945 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 7946 // CHECK: ret <8 x i16> [[VMULL_I_I]] 7947 uint16x8_t test_vmull_high_u8(uint8x16_t a, uint8x16_t b) { 7948 return vmull_high_u8(a, b); 7949 } 7950 7951 // CHECK-LABEL: @test_vmull_high_u16( 7952 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7953 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 7954 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 7955 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 7956 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 7957 // CHECK: ret <4 x i32> [[VMULL2_I_I]] 7958 uint32x4_t test_vmull_high_u16(uint16x8_t a, uint16x8_t b) { 7959 return vmull_high_u16(a, b); 7960 } 7961 7962 // CHECK-LABEL: @test_vmull_high_u32( 7963 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 7964 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 7965 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 7966 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 7967 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 7968 // CHECK: ret <2 x i64> [[VMULL2_I_I]] 7969 uint64x2_t test_vmull_high_u32(uint32x4_t a, uint32x4_t b) { 7970 return vmull_high_u32(a, b); 7971 } 7972 7973 // CHECK-LABEL: @test_vmlal_s8( 7974 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 7975 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 7976 // CHECK: ret <8 x i16> [[ADD_I]] 7977 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 7978 return vmlal_s8(a, b, c); 7979 } 7980 7981 // CHECK-LABEL: @test_vmlal_s16( 7982 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 7983 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 7984 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 7985 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 7986 // CHECK: ret <4 x i32> [[ADD_I]] 7987 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 7988 return vmlal_s16(a, b, c); 7989 } 7990 7991 // CHECK-LABEL: @test_vmlal_s32( 7992 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 7993 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 7994 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 7995 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 7996 // CHECK: ret <2 x i64> [[ADD_I]] 7997 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 7998 return vmlal_s32(a, b, c); 7999 } 8000 8001 // CHECK-LABEL: @test_vmlal_u8( 8002 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 8003 // CHECK: [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]] 8004 // CHECK: ret <8 x i16> [[ADD_I]] 8005 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8006 return vmlal_u8(a, b, c); 8007 } 8008 8009 // CHECK-LABEL: @test_vmlal_u16( 8010 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8011 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8012 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 8013 // CHECK: [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]] 8014 // CHECK: ret <4 x i32> [[ADD_I]] 8015 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8016 return vmlal_u16(a, b, c); 8017 } 8018 8019 // CHECK-LABEL: @test_vmlal_u32( 8020 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8021 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8022 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 8023 // CHECK: [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]] 8024 // CHECK: ret <2 x i64> [[ADD_I]] 8025 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8026 return vmlal_u32(a, b, c); 8027 } 8028 8029 // CHECK-LABEL: @test_vmlal_high_s8( 8030 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8031 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8032 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 8033 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8034 // CHECK: ret <8 x i16> [[ADD_I_I]] 8035 int16x8_t test_vmlal_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8036 return vmlal_high_s8(a, b, c); 8037 } 8038 8039 // CHECK-LABEL: @test_vmlal_high_s16( 8040 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8041 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8042 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8043 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8044 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8045 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8046 // CHECK: ret <4 x i32> [[ADD_I_I]] 8047 int32x4_t test_vmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8048 return vmlal_high_s16(a, b, c); 8049 } 8050 8051 // CHECK-LABEL: @test_vmlal_high_s32( 8052 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8053 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8054 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8055 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8056 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8057 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8058 // CHECK: ret <2 x i64> [[ADD_I_I]] 8059 int64x2_t test_vmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8060 return vmlal_high_s32(a, b, c); 8061 } 8062 8063 // CHECK-LABEL: @test_vmlal_high_u8( 8064 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8065 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8066 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 8067 // CHECK: [[ADD_I_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I_I]] 8068 // CHECK: ret <8 x i16> [[ADD_I_I]] 8069 uint16x8_t test_vmlal_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8070 return vmlal_high_u8(a, b, c); 8071 } 8072 8073 // CHECK-LABEL: @test_vmlal_high_u16( 8074 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8075 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8076 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8077 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8078 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8079 // CHECK: [[ADD_I_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I_I]] 8080 // CHECK: ret <4 x i32> [[ADD_I_I]] 8081 uint32x4_t test_vmlal_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8082 return vmlal_high_u16(a, b, c); 8083 } 8084 8085 // CHECK-LABEL: @test_vmlal_high_u32( 8086 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8087 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8088 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8089 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8090 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8091 // CHECK: [[ADD_I_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I_I]] 8092 // CHECK: ret <2 x i64> [[ADD_I_I]] 8093 uint64x2_t test_vmlal_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8094 return vmlal_high_u32(a, b, c); 8095 } 8096 8097 // CHECK-LABEL: @test_vmlsl_s8( 8098 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> %b, <8 x i8> %c) 8099 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8100 // CHECK: ret <8 x i16> [[SUB_I]] 8101 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) { 8102 return vmlsl_s8(a, b, c); 8103 } 8104 8105 // CHECK-LABEL: @test_vmlsl_s16( 8106 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8107 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8108 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %b, <4 x i16> %c) 8109 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 8110 // CHECK: ret <4 x i32> [[SUB_I]] 8111 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8112 return vmlsl_s16(a, b, c); 8113 } 8114 8115 // CHECK-LABEL: @test_vmlsl_s32( 8116 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8117 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8118 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> %b, <2 x i32> %c) 8119 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 8120 // CHECK: ret <2 x i64> [[SUB_I]] 8121 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8122 return vmlsl_s32(a, b, c); 8123 } 8124 8125 // CHECK-LABEL: @test_vmlsl_u8( 8126 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> %b, <8 x i8> %c) 8127 // CHECK: [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]] 8128 // CHECK: ret <8 x i16> [[SUB_I]] 8129 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) { 8130 return vmlsl_u8(a, b, c); 8131 } 8132 8133 // CHECK-LABEL: @test_vmlsl_u16( 8134 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8135 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8136 // CHECK: [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> %b, <4 x i16> %c) 8137 // CHECK: [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]] 8138 // CHECK: ret <4 x i32> [[SUB_I]] 8139 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) { 8140 return vmlsl_u16(a, b, c); 8141 } 8142 8143 // CHECK-LABEL: @test_vmlsl_u32( 8144 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8145 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8146 // CHECK: [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %b, <2 x i32> %c) 8147 // CHECK: [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]] 8148 // CHECK: ret <2 x i64> [[SUB_I]] 8149 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) { 8150 return vmlsl_u32(a, b, c); 8151 } 8152 8153 // CHECK-LABEL: @test_vmlsl_high_s8( 8154 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8155 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8156 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.smull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 8157 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 8158 // CHECK: ret <8 x i16> [[SUB_I_I]] 8159 int16x8_t test_vmlsl_high_s8(int16x8_t a, int8x16_t b, int8x16_t c) { 8160 return vmlsl_high_s8(a, b, c); 8161 } 8162 8163 // CHECK-LABEL: @test_vmlsl_high_s16( 8164 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8165 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8166 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8167 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8168 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8169 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 8170 // CHECK: ret <4 x i32> [[SUB_I_I]] 8171 int32x4_t test_vmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8172 return vmlsl_high_s16(a, b, c); 8173 } 8174 8175 // CHECK-LABEL: @test_vmlsl_high_s32( 8176 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8177 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8178 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8179 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8180 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.smull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8181 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 8182 // CHECK: ret <2 x i64> [[SUB_I_I]] 8183 int64x2_t test_vmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8184 return vmlsl_high_s32(a, b, c); 8185 } 8186 8187 // CHECK-LABEL: @test_vmlsl_high_u8( 8188 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8189 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %c, <16 x i8> %c, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8190 // CHECK: [[VMULL_I_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.umull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 8191 // CHECK: [[SUB_I_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I_I]] 8192 // CHECK: ret <8 x i16> [[SUB_I_I]] 8193 uint16x8_t test_vmlsl_high_u8(uint16x8_t a, uint8x16_t b, uint8x16_t c) { 8194 return vmlsl_high_u8(a, b, c); 8195 } 8196 8197 // CHECK-LABEL: @test_vmlsl_high_u16( 8198 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8199 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8200 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8201 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8202 // CHECK: [[VMULL2_I_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8203 // CHECK: [[SUB_I_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I_I]] 8204 // CHECK: ret <4 x i32> [[SUB_I_I]] 8205 uint32x4_t test_vmlsl_high_u16(uint32x4_t a, uint16x8_t b, uint16x8_t c) { 8206 return vmlsl_high_u16(a, b, c); 8207 } 8208 8209 // CHECK-LABEL: @test_vmlsl_high_u32( 8210 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8211 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8213 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8214 // CHECK: [[VMULL2_I_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8215 // CHECK: [[SUB_I_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I_I]] 8216 // CHECK: ret <2 x i64> [[SUB_I_I]] 8217 uint64x2_t test_vmlsl_high_u32(uint64x2_t a, uint32x4_t b, uint32x4_t c) { 8218 return vmlsl_high_u32(a, b, c); 8219 } 8220 8221 // CHECK-LABEL: @test_vqdmull_s16( 8222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 8223 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8224 // CHECK: [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %a, <4 x i16> %b) 8225 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8> 8226 // CHECK: ret <4 x i32> [[VQDMULL_V2_I]] 8227 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) { 8228 return vqdmull_s16(a, b); 8229 } 8230 8231 // CHECK-LABEL: @test_vqdmull_s32( 8232 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8233 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8234 // CHECK: [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %a, <2 x i32> %b) 8235 // CHECK: [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8> 8236 // CHECK: ret <2 x i64> [[VQDMULL_V2_I]] 8237 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) { 8238 return vqdmull_s32(a, b); 8239 } 8240 8241 // CHECK-LABEL: @test_vqdmlal_s16( 8242 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8243 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8244 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8245 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 8246 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) 8247 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I]] 8248 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8249 return vqdmlal_s16(a, b, c); 8250 } 8251 8252 // CHECK-LABEL: @test_vqdmlal_s32( 8253 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8254 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8255 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8256 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 8257 // CHECK: [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) 8258 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I]] 8259 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8260 return vqdmlal_s32(a, b, c); 8261 } 8262 8263 // CHECK-LABEL: @test_vqdmlsl_s16( 8264 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8265 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 8266 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8> 8267 // CHECK: [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %b, <4 x i16> %c) 8268 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]]) 8269 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I]] 8270 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) { 8271 return vqdmlsl_s16(a, b, c); 8272 } 8273 8274 // CHECK-LABEL: @test_vqdmlsl_s32( 8275 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8276 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 8277 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8> 8278 // CHECK: [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> %b, <2 x i32> %c) 8279 // CHECK: [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]]) 8280 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I]] 8281 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) { 8282 return vqdmlsl_s32(a, b, c); 8283 } 8284 8285 // CHECK-LABEL: @test_vqdmull_high_s16( 8286 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8287 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8288 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8289 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8290 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8291 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I_I]] to <16 x i8> 8292 // CHECK: ret <4 x i32> [[VQDMULL_V2_I_I]] 8293 int32x4_t test_vqdmull_high_s16(int16x8_t a, int16x8_t b) { 8294 return vqdmull_high_s16(a, b); 8295 } 8296 8297 // CHECK-LABEL: @test_vqdmull_high_s32( 8298 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 8299 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8300 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8301 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8302 // CHECK: [[VQDMULL_V2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8303 // CHECK: [[VQDMULL_V3_I_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I_I]] to <16 x i8> 8304 // CHECK: ret <2 x i64> [[VQDMULL_V2_I_I]] 8305 int64x2_t test_vqdmull_high_s32(int32x4_t a, int32x4_t b) { 8306 return vqdmull_high_s32(a, b); 8307 } 8308 8309 // CHECK-LABEL: @test_vqdmlal_high_s16( 8310 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8311 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8312 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8313 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8314 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8315 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8316 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqadd.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) 8317 // CHECK: ret <4 x i32> [[VQDMLAL_V3_I_I]] 8318 int32x4_t test_vqdmlal_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8319 return vqdmlal_high_s16(a, b, c); 8320 } 8321 8322 // CHECK-LABEL: @test_vqdmlal_high_s32( 8323 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8324 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8325 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8326 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8327 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8328 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8329 // CHECK: [[VQDMLAL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqadd.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) 8330 // CHECK: ret <2 x i64> [[VQDMLAL_V3_I_I]] 8331 int64x2_t test_vqdmlal_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8332 return vqdmlal_high_s32(a, b, c); 8333 } 8334 8335 // CHECK-LABEL: @test_vqdmlsl_high_s16( 8336 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <8 x i16> %b, <8 x i16> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8337 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <8 x i16> %c, <8 x i16> %c, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 8338 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8339 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I]] to <8 x i8> 8340 // CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE_I7_I]] to <8 x i8> 8341 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[SHUFFLE_I_I]], <4 x i16> [[SHUFFLE_I7_I]]) 8342 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqsub.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I_I]]) 8343 // CHECK: ret <4 x i32> [[VQDMLSL_V3_I_I]] 8344 int32x4_t test_vqdmlsl_high_s16(int32x4_t a, int16x8_t b, int16x8_t c) { 8345 return vqdmlsl_high_s16(a, b, c); 8346 } 8347 8348 // CHECK-LABEL: @test_vqdmlsl_high_s32( 8349 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <4 x i32> %b, <4 x i32> %b, <2 x i32> <i32 2, i32 3> 8350 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <4 x i32> %c, <4 x i32> %c, <2 x i32> <i32 2, i32 3> 8351 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 8352 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I]] to <8 x i8> 8353 // CHECK: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE_I7_I]] to <8 x i8> 8354 // CHECK: [[VQDMLAL2_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqdmull.v2i64(<2 x i32> [[SHUFFLE_I_I]], <2 x i32> [[SHUFFLE_I7_I]]) 8355 // CHECK: [[VQDMLSL_V3_I_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqsub.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I_I]]) 8356 // CHECK: ret <2 x i64> [[VQDMLSL_V3_I_I]] 8357 int64x2_t test_vqdmlsl_high_s32(int64x2_t a, int32x4_t b, int32x4_t c) { 8358 return vqdmlsl_high_s32(a, b, c); 8359 } 8360 8361 // CHECK-LABEL: @test_vmull_p8( 8362 // CHECK: [[VMULL_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> %a, <8 x i8> %b) 8363 // CHECK: ret <8 x i16> [[VMULL_I]] 8364 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) { 8365 return vmull_p8(a, b); 8366 } 8367 8368 // CHECK-LABEL: @test_vmull_high_p8( 8369 // CHECK: [[SHUFFLE_I_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8370 // CHECK: [[SHUFFLE_I7_I:%.*]] = shufflevector <16 x i8> %b, <16 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 8371 // CHECK: [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8> [[SHUFFLE_I_I]], <8 x i8> [[SHUFFLE_I7_I]]) 8372 // CHECK: ret <8 x i16> [[VMULL_I_I]] 8373 poly16x8_t test_vmull_high_p8(poly8x16_t a, poly8x16_t b) { 8374 return vmull_high_p8(a, b); 8375 } 8376 8377 // CHECK-LABEL: @test_vaddd_s64( 8378 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 8379 // CHECK: ret i64 [[VADDD_I]] 8380 int64_t test_vaddd_s64(int64_t a, int64_t b) { 8381 return vaddd_s64(a, b); 8382 } 8383 8384 // CHECK-LABEL: @test_vaddd_u64( 8385 // CHECK: [[VADDD_I:%.*]] = add i64 %a, %b 8386 // CHECK: ret i64 [[VADDD_I]] 8387 uint64_t test_vaddd_u64(uint64_t a, uint64_t b) { 8388 return vaddd_u64(a, b); 8389 } 8390 8391 // CHECK-LABEL: @test_vsubd_s64( 8392 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 8393 // CHECK: ret i64 [[VSUBD_I]] 8394 int64_t test_vsubd_s64(int64_t a, int64_t b) { 8395 return vsubd_s64(a, b); 8396 } 8397 8398 // CHECK-LABEL: @test_vsubd_u64( 8399 // CHECK: [[VSUBD_I:%.*]] = sub i64 %a, %b 8400 // CHECK: ret i64 [[VSUBD_I]] 8401 uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { 8402 return vsubd_u64(a, b); 8403 } 8404 8405 // CHECK-LABEL: @test_vqaddb_s8( 8406 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8407 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8408 // CHECK: [[VQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8409 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_S8_I]], i64 0 8410 // CHECK: ret i8 [[TMP2]] 8411 int8_t test_vqaddb_s8(int8_t a, int8_t b) { 8412 return vqaddb_s8(a, b); 8413 } 8414 8415 // CHECK-LABEL: @test_vqaddh_s16( 8416 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8417 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8418 // CHECK: [[VQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8419 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_S16_I]], i64 0 8420 // CHECK: ret i16 [[TMP2]] 8421 int16_t test_vqaddh_s16(int16_t a, int16_t b) { 8422 return vqaddh_s16(a, b); 8423 } 8424 8425 // CHECK-LABEL: @test_vqadds_s32( 8426 // CHECK: [[VQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 %b) 8427 // CHECK: ret i32 [[VQADDS_S32_I]] 8428 int32_t test_vqadds_s32(int32_t a, int32_t b) { 8429 return vqadds_s32(a, b); 8430 } 8431 8432 // CHECK-LABEL: @test_vqaddd_s64( 8433 // CHECK: [[VQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 %b) 8434 // CHECK: ret i64 [[VQADDD_S64_I]] 8435 int64_t test_vqaddd_s64(int64_t a, int64_t b) { 8436 return vqaddd_s64(a, b); 8437 } 8438 8439 // CHECK-LABEL: @test_vqaddb_u8( 8440 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8441 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8442 // CHECK: [[VQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8443 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQADDB_U8_I]], i64 0 8444 // CHECK: ret i8 [[TMP2]] 8445 uint8_t test_vqaddb_u8(uint8_t a, uint8_t b) { 8446 return vqaddb_u8(a, b); 8447 } 8448 8449 // CHECK-LABEL: @test_vqaddh_u16( 8450 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8451 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8452 // CHECK: [[VQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8453 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQADDH_U16_I]], i64 0 8454 // CHECK: ret i16 [[TMP2]] 8455 uint16_t test_vqaddh_u16(uint16_t a, uint16_t b) { 8456 return vqaddh_u16(a, b); 8457 } 8458 8459 // CHECK-LABEL: @test_vqadds_u32( 8460 // CHECK: [[VQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqadd.i32(i32 %a, i32 %b) 8461 // CHECK: ret i32 [[VQADDS_U32_I]] 8462 uint32_t test_vqadds_u32(uint32_t a, uint32_t b) { 8463 return vqadds_u32(a, b); 8464 } 8465 8466 // CHECK-LABEL: @test_vqaddd_u64( 8467 // CHECK: [[VQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqadd.i64(i64 %a, i64 %b) 8468 // CHECK: ret i64 [[VQADDD_U64_I]] 8469 uint64_t test_vqaddd_u64(uint64_t a, uint64_t b) { 8470 return vqaddd_u64(a, b); 8471 } 8472 8473 // CHECK-LABEL: @test_vqsubb_s8( 8474 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8475 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8476 // CHECK: [[VQSUBB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8477 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_S8_I]], i64 0 8478 // CHECK: ret i8 [[TMP2]] 8479 int8_t test_vqsubb_s8(int8_t a, int8_t b) { 8480 return vqsubb_s8(a, b); 8481 } 8482 8483 // CHECK-LABEL: @test_vqsubh_s16( 8484 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8485 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8486 // CHECK: [[VQSUBH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8487 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_S16_I]], i64 0 8488 // CHECK: ret i16 [[TMP2]] 8489 int16_t test_vqsubh_s16(int16_t a, int16_t b) { 8490 return vqsubh_s16(a, b); 8491 } 8492 8493 // CHECK-LABEL: @test_vqsubs_s32( 8494 // CHECK: [[VQSUBS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 %b) 8495 // CHECK: ret i32 [[VQSUBS_S32_I]] 8496 int32_t test_vqsubs_s32(int32_t a, int32_t b) { 8497 return vqsubs_s32(a, b); 8498 } 8499 8500 // CHECK-LABEL: @test_vqsubd_s64( 8501 // CHECK: [[VQSUBD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 %b) 8502 // CHECK: ret i64 [[VQSUBD_S64_I]] 8503 int64_t test_vqsubd_s64(int64_t a, int64_t b) { 8504 return vqsubd_s64(a, b); 8505 } 8506 8507 // CHECK-LABEL: @test_vqsubb_u8( 8508 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8509 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8510 // CHECK: [[VQSUBB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqsub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8511 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSUBB_U8_I]], i64 0 8512 // CHECK: ret i8 [[TMP2]] 8513 uint8_t test_vqsubb_u8(uint8_t a, uint8_t b) { 8514 return vqsubb_u8(a, b); 8515 } 8516 8517 // CHECK-LABEL: @test_vqsubh_u16( 8518 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8519 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8520 // CHECK: [[VQSUBH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqsub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8521 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSUBH_U16_I]], i64 0 8522 // CHECK: ret i16 [[TMP2]] 8523 uint16_t test_vqsubh_u16(uint16_t a, uint16_t b) { 8524 return vqsubh_u16(a, b); 8525 } 8526 8527 // CHECK-LABEL: @test_vqsubs_u32( 8528 // CHECK: [[VQSUBS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqsub.i32(i32 %a, i32 %b) 8529 // CHECK: ret i32 [[VQSUBS_U32_I]] 8530 uint32_t test_vqsubs_u32(uint32_t a, uint32_t b) { 8531 return vqsubs_u32(a, b); 8532 } 8533 8534 // CHECK-LABEL: @test_vqsubd_u64( 8535 // CHECK: [[VQSUBD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqsub.i64(i64 %a, i64 %b) 8536 // CHECK: ret i64 [[VQSUBD_U64_I]] 8537 uint64_t test_vqsubd_u64(uint64_t a, uint64_t b) { 8538 return vqsubd_u64(a, b); 8539 } 8540 8541 // CHECK-LABEL: @test_vshld_s64( 8542 // CHECK: [[VSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 %a, i64 %b) 8543 // CHECK: ret i64 [[VSHLD_S64_I]] 8544 int64_t test_vshld_s64(int64_t a, int64_t b) { 8545 return vshld_s64(a, b); 8546 } 8547 8548 // CHECK-LABEL: @test_vshld_u64( 8549 // CHECK: [[VSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 %a, i64 %b) 8550 // CHECK: ret i64 [[VSHLD_U64_I]] 8551 uint64_t test_vshld_u64(uint64_t a, int64_t b) { 8552 return vshld_u64(a, b); 8553 } 8554 8555 // CHECK-LABEL: @test_vqshlb_s8( 8556 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8557 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8558 // CHECK: [[VQSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8559 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_S8_I]], i64 0 8560 // CHECK: ret i8 [[TMP2]] 8561 int8_t test_vqshlb_s8(int8_t a, int8_t b) { 8562 return vqshlb_s8(a, b); 8563 } 8564 8565 // CHECK-LABEL: @test_vqshlh_s16( 8566 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8567 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8568 // CHECK: [[VQSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8569 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_S16_I]], i64 0 8570 // CHECK: ret i16 [[TMP2]] 8571 int16_t test_vqshlh_s16(int16_t a, int16_t b) { 8572 return vqshlh_s16(a, b); 8573 } 8574 8575 // CHECK-LABEL: @test_vqshls_s32( 8576 // CHECK: [[VQSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 %b) 8577 // CHECK: ret i32 [[VQSHLS_S32_I]] 8578 int32_t test_vqshls_s32(int32_t a, int32_t b) { 8579 return vqshls_s32(a, b); 8580 } 8581 8582 // CHECK-LABEL: @test_vqshld_s64( 8583 // CHECK: [[VQSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 %b) 8584 // CHECK: ret i64 [[VQSHLD_S64_I]] 8585 int64_t test_vqshld_s64(int64_t a, int64_t b) { 8586 return vqshld_s64(a, b); 8587 } 8588 8589 // CHECK-LABEL: @test_vqshlb_u8( 8590 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8591 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8592 // CHECK: [[VQSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8593 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQSHLB_U8_I]], i64 0 8594 // CHECK: ret i8 [[TMP2]] 8595 uint8_t test_vqshlb_u8(uint8_t a, int8_t b) { 8596 return vqshlb_u8(a, b); 8597 } 8598 8599 // CHECK-LABEL: @test_vqshlh_u16( 8600 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8601 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8602 // CHECK: [[VQSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8603 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQSHLH_U16_I]], i64 0 8604 // CHECK: ret i16 [[TMP2]] 8605 uint16_t test_vqshlh_u16(uint16_t a, int16_t b) { 8606 return vqshlh_u16(a, b); 8607 } 8608 8609 // CHECK-LABEL: @test_vqshls_u32( 8610 // CHECK: [[VQSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 %b) 8611 // CHECK: ret i32 [[VQSHLS_U32_I]] 8612 uint32_t test_vqshls_u32(uint32_t a, int32_t b) { 8613 return vqshls_u32(a, b); 8614 } 8615 8616 // CHECK-LABEL: @test_vqshld_u64( 8617 // CHECK: [[VQSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 %b) 8618 // CHECK: ret i64 [[VQSHLD_U64_I]] 8619 uint64_t test_vqshld_u64(uint64_t a, int64_t b) { 8620 return vqshld_u64(a, b); 8621 } 8622 8623 // CHECK-LABEL: @test_vrshld_s64( 8624 // CHECK: [[VRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 %b) 8625 // CHECK: ret i64 [[VRSHLD_S64_I]] 8626 int64_t test_vrshld_s64(int64_t a, int64_t b) { 8627 return vrshld_s64(a, b); 8628 } 8629 8630 // CHECK-LABEL: @test_vrshld_u64( 8631 // CHECK: [[VRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 %b) 8632 // CHECK: ret i64 [[VRSHLD_U64_I]] 8633 uint64_t test_vrshld_u64(uint64_t a, int64_t b) { 8634 return vrshld_u64(a, b); 8635 } 8636 8637 // CHECK-LABEL: @test_vqrshlb_s8( 8638 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8639 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8640 // CHECK: [[VQRSHLB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8641 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_S8_I]], i64 0 8642 // CHECK: ret i8 [[TMP2]] 8643 int8_t test_vqrshlb_s8(int8_t a, int8_t b) { 8644 return vqrshlb_s8(a, b); 8645 } 8646 8647 // CHECK-LABEL: @test_vqrshlh_s16( 8648 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8649 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8650 // CHECK: [[VQRSHLH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8651 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_S16_I]], i64 0 8652 // CHECK: ret i16 [[TMP2]] 8653 int16_t test_vqrshlh_s16(int16_t a, int16_t b) { 8654 return vqrshlh_s16(a, b); 8655 } 8656 8657 // CHECK-LABEL: @test_vqrshls_s32( 8658 // CHECK: [[VQRSHLS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrshl.i32(i32 %a, i32 %b) 8659 // CHECK: ret i32 [[VQRSHLS_S32_I]] 8660 int32_t test_vqrshls_s32(int32_t a, int32_t b) { 8661 return vqrshls_s32(a, b); 8662 } 8663 8664 // CHECK-LABEL: @test_vqrshld_s64( 8665 // CHECK: [[VQRSHLD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %a, i64 %b) 8666 // CHECK: ret i64 [[VQRSHLD_S64_I]] 8667 int64_t test_vqrshld_s64(int64_t a, int64_t b) { 8668 return vqrshld_s64(a, b); 8669 } 8670 8671 // CHECK-LABEL: @test_vqrshlb_u8( 8672 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 8673 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 8674 // CHECK: [[VQRSHLB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 8675 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VQRSHLB_U8_I]], i64 0 8676 // CHECK: ret i8 [[TMP2]] 8677 uint8_t test_vqrshlb_u8(uint8_t a, int8_t b) { 8678 return vqrshlb_u8(a, b); 8679 } 8680 8681 // CHECK-LABEL: @test_vqrshlh_u16( 8682 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8683 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8684 // CHECK: [[VQRSHLH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8685 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRSHLH_U16_I]], i64 0 8686 // CHECK: ret i16 [[TMP2]] 8687 uint16_t test_vqrshlh_u16(uint16_t a, int16_t b) { 8688 return vqrshlh_u16(a, b); 8689 } 8690 8691 // CHECK-LABEL: @test_vqrshls_u32( 8692 // CHECK: [[VQRSHLS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uqrshl.i32(i32 %a, i32 %b) 8693 // CHECK: ret i32 [[VQRSHLS_U32_I]] 8694 uint32_t test_vqrshls_u32(uint32_t a, int32_t b) { 8695 return vqrshls_u32(a, b); 8696 } 8697 8698 // CHECK-LABEL: @test_vqrshld_u64( 8699 // CHECK: [[VQRSHLD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %a, i64 %b) 8700 // CHECK: ret i64 [[VQRSHLD_U64_I]] 8701 uint64_t test_vqrshld_u64(uint64_t a, int64_t b) { 8702 return vqrshld_u64(a, b); 8703 } 8704 8705 // CHECK-LABEL: @test_vpaddd_s64( 8706 // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) 8707 // CHECK: ret i64 [[VPADDD_S64_I]] 8708 int64_t test_vpaddd_s64(int64x2_t a) { 8709 return vpaddd_s64(a); 8710 } 8711 8712 // CHECK-LABEL: @test_vpadds_f32( 8713 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0 8714 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1 8715 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] 8716 // CHECK: ret float [[VPADDD_I]] 8717 float32_t test_vpadds_f32(float32x2_t a) { 8718 return vpadds_f32(a); 8719 } 8720 8721 // CHECK-LABEL: @test_vpaddd_f64( 8722 // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0 8723 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1 8724 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]] 8725 // CHECK: ret double [[VPADDD_I]] 8726 float64_t test_vpaddd_f64(float64x2_t a) { 8727 return vpaddd_f64(a); 8728 } 8729 8730 // CHECK-LABEL: @test_vpmaxnms_f32( 8731 // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) 8732 // CHECK: ret float [[VPMAXNMS_F32_I]] 8733 float32_t test_vpmaxnms_f32(float32x2_t a) { 8734 return vpmaxnms_f32(a); 8735 } 8736 8737 // CHECK-LABEL: @test_vpmaxnmqd_f64( 8738 // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) 8739 // CHECK: ret double [[VPMAXNMQD_F64_I]] 8740 float64_t test_vpmaxnmqd_f64(float64x2_t a) { 8741 return vpmaxnmqd_f64(a); 8742 } 8743 8744 // CHECK-LABEL: @test_vpmaxs_f32( 8745 // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 8746 // CHECK: ret float [[VPMAXS_F32_I]] 8747 float32_t test_vpmaxs_f32(float32x2_t a) { 8748 return vpmaxs_f32(a); 8749 } 8750 8751 // CHECK-LABEL: @test_vpmaxqd_f64( 8752 // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) 8753 // CHECK: ret double [[VPMAXQD_F64_I]] 8754 float64_t test_vpmaxqd_f64(float64x2_t a) { 8755 return vpmaxqd_f64(a); 8756 } 8757 8758 // CHECK-LABEL: @test_vpminnms_f32( 8759 // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) 8760 // CHECK: ret float [[VPMINNMS_F32_I]] 8761 float32_t test_vpminnms_f32(float32x2_t a) { 8762 return vpminnms_f32(a); 8763 } 8764 8765 // CHECK-LABEL: @test_vpminnmqd_f64( 8766 // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) 8767 // CHECK: ret double [[VPMINNMQD_F64_I]] 8768 float64_t test_vpminnmqd_f64(float64x2_t a) { 8769 return vpminnmqd_f64(a); 8770 } 8771 8772 // CHECK-LABEL: @test_vpmins_f32( 8773 // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) 8774 // CHECK: ret float [[VPMINS_F32_I]] 8775 float32_t test_vpmins_f32(float32x2_t a) { 8776 return vpmins_f32(a); 8777 } 8778 8779 // CHECK-LABEL: @test_vpminqd_f64( 8780 // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) 8781 // CHECK: ret double [[VPMINQD_F64_I]] 8782 float64_t test_vpminqd_f64(float64x2_t a) { 8783 return vpminqd_f64(a); 8784 } 8785 8786 // CHECK-LABEL: @test_vqdmulhh_s16( 8787 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8788 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8789 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8790 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQDMULHH_S16_I]], i64 0 8791 // CHECK: ret i16 [[TMP2]] 8792 int16_t test_vqdmulhh_s16(int16_t a, int16_t b) { 8793 return vqdmulhh_s16(a, b); 8794 } 8795 8796 // CHECK-LABEL: @test_vqdmulhs_s32( 8797 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 %b) 8798 // CHECK: ret i32 [[VQDMULHS_S32_I]] 8799 int32_t test_vqdmulhs_s32(int32_t a, int32_t b) { 8800 return vqdmulhs_s32(a, b); 8801 } 8802 8803 // CHECK-LABEL: @test_vqrdmulhh_s16( 8804 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 8805 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 8806 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 8807 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VQRDMULHH_S16_I]], i64 0 8808 // CHECK: ret i16 [[TMP2]] 8809 int16_t test_vqrdmulhh_s16(int16_t a, int16_t b) { 8810 return vqrdmulhh_s16(a, b); 8811 } 8812 8813 // CHECK-LABEL: @test_vqrdmulhs_s32( 8814 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 %b) 8815 // CHECK: ret i32 [[VQRDMULHS_S32_I]] 8816 int32_t test_vqrdmulhs_s32(int32_t a, int32_t b) { 8817 return vqrdmulhs_s32(a, b); 8818 } 8819 8820 // CHECK-LABEL: @test_vmulxs_f32( 8821 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float %b) 8822 // CHECK: ret float [[VMULXS_F32_I]] 8823 float32_t test_vmulxs_f32(float32_t a, float32_t b) { 8824 return vmulxs_f32(a, b); 8825 } 8826 8827 // CHECK-LABEL: @test_vmulxd_f64( 8828 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double %b) 8829 // CHECK: ret double [[VMULXD_F64_I]] 8830 float64_t test_vmulxd_f64(float64_t a, float64_t b) { 8831 return vmulxd_f64(a, b); 8832 } 8833 8834 // CHECK-LABEL: @test_vmulx_f64( 8835 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 8836 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 8837 // CHECK: [[VMULX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmulx.v1f64(<1 x double> %a, <1 x double> %b) 8838 // CHECK: ret <1 x double> [[VMULX2_I]] 8839 float64x1_t test_vmulx_f64(float64x1_t a, float64x1_t b) { 8840 return vmulx_f64(a, b); 8841 } 8842 8843 // CHECK-LABEL: @test_vrecpss_f32( 8844 // CHECK: [[VRECPS_I:%.*]] = call float @llvm.aarch64.neon.frecps.f32(float %a, float %b) 8845 // CHECK: ret float [[VRECPS_I]] 8846 float32_t test_vrecpss_f32(float32_t a, float32_t b) { 8847 return vrecpss_f32(a, b); 8848 } 8849 8850 // CHECK-LABEL: @test_vrecpsd_f64( 8851 // CHECK: [[VRECPS_I:%.*]] = call double @llvm.aarch64.neon.frecps.f64(double %a, double %b) 8852 // CHECK: ret double [[VRECPS_I]] 8853 float64_t test_vrecpsd_f64(float64_t a, float64_t b) { 8854 return vrecpsd_f64(a, b); 8855 } 8856 8857 // CHECK-LABEL: @test_vrsqrtss_f32( 8858 // CHECK: [[VRSQRTSS_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrts.f32(float %a, float %b) 8859 // CHECK: ret float [[VRSQRTSS_F32_I]] 8860 float32_t test_vrsqrtss_f32(float32_t a, float32_t b) { 8861 return vrsqrtss_f32(a, b); 8862 } 8863 8864 // CHECK-LABEL: @test_vrsqrtsd_f64( 8865 // CHECK: [[VRSQRTSD_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrts.f64(double %a, double %b) 8866 // CHECK: ret double [[VRSQRTSD_F64_I]] 8867 float64_t test_vrsqrtsd_f64(float64_t a, float64_t b) { 8868 return vrsqrtsd_f64(a, b); 8869 } 8870 8871 // CHECK-LABEL: @test_vcvts_f32_s32( 8872 // CHECK: [[TMP0:%.*]] = sitofp i32 %a to float 8873 // CHECK: ret float [[TMP0]] 8874 float32_t test_vcvts_f32_s32(int32_t a) { 8875 return vcvts_f32_s32(a); 8876 } 8877 8878 // CHECK-LABEL: @test_vcvtd_f64_s64( 8879 // CHECK: [[TMP0:%.*]] = sitofp i64 %a to double 8880 // CHECK: ret double [[TMP0]] 8881 float64_t test_vcvtd_f64_s64(int64_t a) { 8882 return vcvtd_f64_s64(a); 8883 } 8884 8885 // CHECK-LABEL: @test_vcvts_f32_u32( 8886 // CHECK: [[TMP0:%.*]] = uitofp i32 %a to float 8887 // CHECK: ret float [[TMP0]] 8888 float32_t test_vcvts_f32_u32(uint32_t a) { 8889 return vcvts_f32_u32(a); 8890 } 8891 8892 // CHECK-LABEL: @test_vcvtd_f64_u64( 8893 // CHECK: [[TMP0:%.*]] = uitofp i64 %a to double 8894 // CHECK: ret double [[TMP0]] 8895 float64_t test_vcvtd_f64_u64(uint64_t a) { 8896 return vcvtd_f64_u64(a); 8897 } 8898 8899 // CHECK-LABEL: @test_vrecpes_f32( 8900 // CHECK: [[VRECPES_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpe.f32(float %a) 8901 // CHECK: ret float [[VRECPES_F32_I]] 8902 float32_t test_vrecpes_f32(float32_t a) { 8903 return vrecpes_f32(a); 8904 } 8905 8906 // CHECK-LABEL: @test_vrecped_f64( 8907 // CHECK: [[VRECPED_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpe.f64(double %a) 8908 // CHECK: ret double [[VRECPED_F64_I]] 8909 float64_t test_vrecped_f64(float64_t a) { 8910 return vrecped_f64(a); 8911 } 8912 8913 // CHECK-LABEL: @test_vrecpxs_f32( 8914 // CHECK: [[VRECPXS_F32_I:%.*]] = call float @llvm.aarch64.neon.frecpx.f32(float %a) 8915 // CHECK: ret float [[VRECPXS_F32_I]] 8916 float32_t test_vrecpxs_f32(float32_t a) { 8917 return vrecpxs_f32(a); 8918 } 8919 8920 // CHECK-LABEL: @test_vrecpxd_f64( 8921 // CHECK: [[VRECPXD_F64_I:%.*]] = call double @llvm.aarch64.neon.frecpx.f64(double %a) 8922 // CHECK: ret double [[VRECPXD_F64_I]] 8923 float64_t test_vrecpxd_f64(float64_t a) { 8924 return vrecpxd_f64(a); 8925 } 8926 8927 // CHECK-LABEL: @test_vrsqrte_u32( 8928 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 8929 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.ursqrte.v2i32(<2 x i32> %a) 8930 // CHECK: ret <2 x i32> [[VRSQRTE_V1_I]] 8931 uint32x2_t test_vrsqrte_u32(uint32x2_t a) { 8932 return vrsqrte_u32(a); 8933 } 8934 8935 // CHECK-LABEL: @test_vrsqrteq_u32( 8936 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 8937 // CHECK: [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.ursqrte.v4i32(<4 x i32> %a) 8938 // CHECK: ret <4 x i32> [[VRSQRTEQ_V1_I]] 8939 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) { 8940 return vrsqrteq_u32(a); 8941 } 8942 8943 // CHECK-LABEL: @test_vrsqrtes_f32( 8944 // CHECK: [[VRSQRTES_F32_I:%.*]] = call float @llvm.aarch64.neon.frsqrte.f32(float %a) 8945 // CHECK: ret float [[VRSQRTES_F32_I]] 8946 float32_t test_vrsqrtes_f32(float32_t a) { 8947 return vrsqrtes_f32(a); 8948 } 8949 8950 // CHECK-LABEL: @test_vrsqrted_f64( 8951 // CHECK: [[VRSQRTED_F64_I:%.*]] = call double @llvm.aarch64.neon.frsqrte.f64(double %a) 8952 // CHECK: ret double [[VRSQRTED_F64_I]] 8953 float64_t test_vrsqrted_f64(float64_t a) { 8954 return vrsqrted_f64(a); 8955 } 8956 8957 // CHECK-LABEL: @test_vld1q_u8( 8958 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1 8959 // CHECK: ret <16 x i8> [[TMP1]] 8960 uint8x16_t test_vld1q_u8(uint8_t const *a) { 8961 return vld1q_u8(a); 8962 } 8963 8964 // CHECK-LABEL: @test_vld1q_u16( 8965 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2 8966 // CHECK: ret <8 x i16> [[TMP2]] 8967 uint16x8_t test_vld1q_u16(uint16_t const *a) { 8968 return vld1q_u16(a); 8969 } 8970 8971 // CHECK-LABEL: @test_vld1q_u32( 8972 // CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4 8973 // CHECK: ret <4 x i32> [[TMP2]] 8974 uint32x4_t test_vld1q_u32(uint32_t const *a) { 8975 return vld1q_u32(a); 8976 } 8977 8978 // CHECK-LABEL: @test_vld1q_u64( 8979 // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8 8980 // CHECK: ret <2 x i64> [[TMP2]] 8981 uint64x2_t test_vld1q_u64(uint64_t const *a) { 8982 return vld1q_u64(a); 8983 } 8984 8985 // CHECK-LABEL: @test_vld1q_s8( 8986 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1 8987 // CHECK: ret <16 x i8> [[TMP1]] 8988 int8x16_t test_vld1q_s8(int8_t const *a) { 8989 return vld1q_s8(a); 8990 } 8991 8992 // CHECK-LABEL: @test_vld1q_s16( 8993 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2 8994 // CHECK: ret <8 x i16> [[TMP2]] 8995 int16x8_t test_vld1q_s16(int16_t const *a) { 8996 return vld1q_s16(a); 8997 } 8998 8999 // CHECK-LABEL: @test_vld1q_s32( 9000 // CHECK: [[TMP2:%.*]] = load <4 x i32>, ptr %a, align 4 9001 // CHECK: ret <4 x i32> [[TMP2]] 9002 int32x4_t test_vld1q_s32(int32_t const *a) { 9003 return vld1q_s32(a); 9004 } 9005 9006 // CHECK-LABEL: @test_vld1q_s64( 9007 // CHECK: [[TMP2:%.*]] = load <2 x i64>, ptr %a, align 8 9008 // CHECK: ret <2 x i64> [[TMP2]] 9009 int64x2_t test_vld1q_s64(int64_t const *a) { 9010 return vld1q_s64(a); 9011 } 9012 9013 // CHECK-LABEL: @test_vld1q_f16( 9014 // CHECK: [[TMP2:%.*]] = load <8 x half>, ptr %a, align 2 9015 // CHECK: ret <8 x half> [[TMP2]] 9016 float16x8_t test_vld1q_f16(float16_t const *a) { 9017 return vld1q_f16(a); 9018 } 9019 9020 // CHECK-LABEL: @test_vld1q_f32( 9021 // CHECK: [[TMP2:%.*]] = load <4 x float>, ptr %a, align 4 9022 // CHECK: ret <4 x float> [[TMP2]] 9023 float32x4_t test_vld1q_f32(float32_t const *a) { 9024 return vld1q_f32(a); 9025 } 9026 9027 // CHECK-LABEL: @test_vld1q_f64( 9028 // CHECK: [[TMP2:%.*]] = load <2 x double>, ptr %a, align 8 9029 // CHECK: ret <2 x double> [[TMP2]] 9030 float64x2_t test_vld1q_f64(float64_t const *a) { 9031 return vld1q_f64(a); 9032 } 9033 9034 // CHECK-LABEL: @test_vld1q_p8( 9035 // CHECK: [[TMP1:%.*]] = load <16 x i8>, ptr %a, align 1 9036 // CHECK: ret <16 x i8> [[TMP1]] 9037 poly8x16_t test_vld1q_p8(poly8_t const *a) { 9038 return vld1q_p8(a); 9039 } 9040 9041 // CHECK-LABEL: @test_vld1q_p16( 9042 // CHECK: [[TMP2:%.*]] = load <8 x i16>, ptr %a, align 2 9043 // CHECK: ret <8 x i16> [[TMP2]] 9044 poly16x8_t test_vld1q_p16(poly16_t const *a) { 9045 return vld1q_p16(a); 9046 } 9047 9048 // CHECK-LABEL: @test_vld1_u8( 9049 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9050 // CHECK: ret <8 x i8> [[TMP1]] 9051 uint8x8_t test_vld1_u8(uint8_t const *a) { 9052 return vld1_u8(a); 9053 } 9054 9055 // CHECK-LABEL: @test_vld1_u16( 9056 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2 9057 // CHECK: ret <4 x i16> [[TMP2]] 9058 uint16x4_t test_vld1_u16(uint16_t const *a) { 9059 return vld1_u16(a); 9060 } 9061 9062 // CHECK-LABEL: @test_vld1_u32( 9063 // CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4 9064 // CHECK: ret <2 x i32> [[TMP2]] 9065 uint32x2_t test_vld1_u32(uint32_t const *a) { 9066 return vld1_u32(a); 9067 } 9068 9069 // CHECK-LABEL: @test_vld1_u64( 9070 // CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8 9071 // CHECK: ret <1 x i64> [[TMP2]] 9072 uint64x1_t test_vld1_u64(uint64_t const *a) { 9073 return vld1_u64(a); 9074 } 9075 9076 // CHECK-LABEL: @test_vld1_s8( 9077 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9078 // CHECK: ret <8 x i8> [[TMP1]] 9079 int8x8_t test_vld1_s8(int8_t const *a) { 9080 return vld1_s8(a); 9081 } 9082 9083 // CHECK-LABEL: @test_vld1_s16( 9084 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2 9085 // CHECK: ret <4 x i16> [[TMP2]] 9086 int16x4_t test_vld1_s16(int16_t const *a) { 9087 return vld1_s16(a); 9088 } 9089 9090 // CHECK-LABEL: @test_vld1_s32( 9091 // CHECK: [[TMP2:%.*]] = load <2 x i32>, ptr %a, align 4 9092 // CHECK: ret <2 x i32> [[TMP2]] 9093 int32x2_t test_vld1_s32(int32_t const *a) { 9094 return vld1_s32(a); 9095 } 9096 9097 // CHECK-LABEL: @test_vld1_s64( 9098 // CHECK: [[TMP2:%.*]] = load <1 x i64>, ptr %a, align 8 9099 // CHECK: ret <1 x i64> [[TMP2]] 9100 int64x1_t test_vld1_s64(int64_t const *a) { 9101 return vld1_s64(a); 9102 } 9103 9104 // CHECK-LABEL: @test_vld1_f16( 9105 // CHECK: [[TMP2:%.*]] = load <4 x half>, ptr %a, align 2 9106 // CHECK: ret <4 x half> [[TMP2]] 9107 float16x4_t test_vld1_f16(float16_t const *a) { 9108 return vld1_f16(a); 9109 } 9110 9111 // CHECK-LABEL: @test_vld1_f32( 9112 // CHECK: [[TMP2:%.*]] = load <2 x float>, ptr %a, align 4 9113 // CHECK: ret <2 x float> [[TMP2]] 9114 float32x2_t test_vld1_f32(float32_t const *a) { 9115 return vld1_f32(a); 9116 } 9117 9118 // CHECK-LABEL: @test_vld1_f64( 9119 // CHECK: [[TMP2:%.*]] = load <1 x double>, ptr %a, align 8 9120 // CHECK: ret <1 x double> [[TMP2]] 9121 float64x1_t test_vld1_f64(float64_t const *a) { 9122 return vld1_f64(a); 9123 } 9124 9125 // CHECK-LABEL: @test_vld1_p8( 9126 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9127 // CHECK: ret <8 x i8> [[TMP1]] 9128 poly8x8_t test_vld1_p8(poly8_t const *a) { 9129 return vld1_p8(a); 9130 } 9131 9132 // CHECK-LABEL: @test_vld1_p16( 9133 // CHECK: [[TMP2:%.*]] = load <4 x i16>, ptr %a, align 2 9134 // CHECK: ret <4 x i16> [[TMP2]] 9135 poly16x4_t test_vld1_p16(poly16_t const *a) { 9136 return vld1_p16(a); 9137 } 9138 9139 // CHECK-LABEL: @test_vld1_u8_void( 9140 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9141 // CHECK: ret <8 x i8> [[TMP1]] 9142 uint8x8_t test_vld1_u8_void(void *a) { 9143 return vld1_u8(a); 9144 } 9145 9146 // CHECK-LABEL: @test_vld1_u16_void( 9147 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1 9148 // CHECK: ret <4 x i16> [[TMP1]] 9149 uint16x4_t test_vld1_u16_void(void *a) { 9150 return vld1_u16(a); 9151 } 9152 9153 // CHECK-LABEL: @test_vld1_u32_void( 9154 // CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1 9155 // CHECK: ret <2 x i32> [[TMP1]] 9156 uint32x2_t test_vld1_u32_void(void *a) { 9157 return vld1_u32(a); 9158 } 9159 9160 // CHECK-LABEL: @test_vld1_u64_void( 9161 // CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1 9162 // CHECK: ret <1 x i64> [[TMP1]] 9163 uint64x1_t test_vld1_u64_void(void *a) { 9164 return vld1_u64(a); 9165 } 9166 9167 // CHECK-LABEL: @test_vld1_s8_void( 9168 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9169 // CHECK: ret <8 x i8> [[TMP1]] 9170 int8x8_t test_vld1_s8_void(void *a) { 9171 return vld1_s8(a); 9172 } 9173 9174 // CHECK-LABEL: @test_vld1_s16_void( 9175 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1 9176 // CHECK: ret <4 x i16> [[TMP1]] 9177 int16x4_t test_vld1_s16_void(void *a) { 9178 return vld1_s16(a); 9179 } 9180 9181 // CHECK-LABEL: @test_vld1_s32_void( 9182 // CHECK: [[TMP1:%.*]] = load <2 x i32>, ptr %a, align 1 9183 // CHECK: ret <2 x i32> [[TMP1]] 9184 int32x2_t test_vld1_s32_void(void *a) { 9185 return vld1_s32(a); 9186 } 9187 9188 // CHECK-LABEL: @test_vld1_s64_void( 9189 // CHECK: [[TMP1:%.*]] = load <1 x i64>, ptr %a, align 1 9190 // CHECK: ret <1 x i64> [[TMP1]] 9191 int64x1_t test_vld1_s64_void(void *a) { 9192 return vld1_s64(a); 9193 } 9194 9195 // CHECK-LABEL: @test_vld1_f16_void( 9196 // CHECK: [[TMP1:%.*]] = load <4 x half>, ptr %a, align 1 9197 // CHECK: ret <4 x half> [[TMP1]] 9198 float16x4_t test_vld1_f16_void(void *a) { 9199 return vld1_f16(a); 9200 } 9201 9202 // CHECK-LABEL: @test_vld1_f32_void( 9203 // CHECK: [[TMP1:%.*]] = load <2 x float>, ptr %a, align 1 9204 // CHECK: ret <2 x float> [[TMP1]] 9205 float32x2_t test_vld1_f32_void(void *a) { 9206 return vld1_f32(a); 9207 } 9208 9209 // CHECK-LABEL: @test_vld1_f64_void( 9210 // CHECK: [[TMP1:%.*]] = load <1 x double>, ptr %a, align 1 9211 // CHECK: ret <1 x double> [[TMP1]] 9212 float64x1_t test_vld1_f64_void(void *a) { 9213 return vld1_f64(a); 9214 } 9215 9216 // CHECK-LABEL: @test_vld1_p8_void( 9217 // CHECK: [[TMP1:%.*]] = load <8 x i8>, ptr %a, align 1 9218 // CHECK: ret <8 x i8> [[TMP1]] 9219 poly8x8_t test_vld1_p8_void(void *a) { 9220 return vld1_p8(a); 9221 } 9222 9223 // CHECK-LABEL: @test_vld1_p16_void( 9224 // CHECK: [[TMP1:%.*]] = load <4 x i16>, ptr %a, align 1 9225 // CHECK: ret <4 x i16> [[TMP1]] 9226 poly16x4_t test_vld1_p16_void(void *a) { 9227 return vld1_p16(a); 9228 } 9229 9230 // CHECK-LABEL: @test_vld2q_u8( 9231 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16 9232 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16 9233 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a) 9234 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]] 9235 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9236 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x2_t, ptr [[RETVAL]], align 16 9237 // CHECK: ret %struct.uint8x16x2_t [[TMP5]] 9238 uint8x16x2_t test_vld2q_u8(uint8_t const *a) { 9239 return vld2q_u8(a); 9240 } 9241 9242 // CHECK-LABEL: @test_vld2q_u16( 9243 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16 9244 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16 9245 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a) 9246 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]] 9247 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9248 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x2_t, ptr [[RETVAL]], align 16 9249 // CHECK: ret %struct.uint16x8x2_t [[TMP6]] 9250 uint16x8x2_t test_vld2q_u16(uint16_t const *a) { 9251 return vld2q_u16(a); 9252 } 9253 9254 // CHECK-LABEL: @test_vld2q_u32( 9255 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16 9256 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16 9257 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a) 9258 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]] 9259 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9260 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x2_t, ptr [[RETVAL]], align 16 9261 // CHECK: ret %struct.uint32x4x2_t [[TMP6]] 9262 uint32x4x2_t test_vld2q_u32(uint32_t const *a) { 9263 return vld2q_u32(a); 9264 } 9265 9266 // CHECK-LABEL: @test_vld2q_u64( 9267 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16 9268 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16 9269 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a) 9270 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 9271 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9272 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x2_t, ptr [[RETVAL]], align 16 9273 // CHECK: ret %struct.uint64x2x2_t [[TMP6]] 9274 uint64x2x2_t test_vld2q_u64(uint64_t const *a) { 9275 return vld2q_u64(a); 9276 } 9277 9278 // CHECK-LABEL: @test_vld2q_s8( 9279 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16 9280 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16 9281 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a) 9282 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]] 9283 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9284 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x2_t, ptr [[RETVAL]], align 16 9285 // CHECK: ret %struct.int8x16x2_t [[TMP5]] 9286 int8x16x2_t test_vld2q_s8(int8_t const *a) { 9287 return vld2q_s8(a); 9288 } 9289 9290 // CHECK-LABEL: @test_vld2q_s16( 9291 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16 9292 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16 9293 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a) 9294 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]] 9295 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9296 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x2_t, ptr [[RETVAL]], align 16 9297 // CHECK: ret %struct.int16x8x2_t [[TMP6]] 9298 int16x8x2_t test_vld2q_s16(int16_t const *a) { 9299 return vld2q_s16(a); 9300 } 9301 9302 // CHECK-LABEL: @test_vld2q_s32( 9303 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16 9304 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16 9305 // CHECK: [[VLD2:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr %a) 9306 // CHECK: store { <4 x i32>, <4 x i32> } [[VLD2]], ptr [[__RET]] 9307 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9308 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x2_t, ptr [[RETVAL]], align 16 9309 // CHECK: ret %struct.int32x4x2_t [[TMP6]] 9310 int32x4x2_t test_vld2q_s32(int32_t const *a) { 9311 return vld2q_s32(a); 9312 } 9313 9314 // CHECK-LABEL: @test_vld2q_s64( 9315 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16 9316 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16 9317 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr %a) 9318 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], ptr [[__RET]] 9319 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9320 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x2_t, ptr [[RETVAL]], align 16 9321 // CHECK: ret %struct.int64x2x2_t [[TMP6]] 9322 int64x2x2_t test_vld2q_s64(int64_t const *a) { 9323 return vld2q_s64(a); 9324 } 9325 9326 // CHECK-LABEL: @test_vld2q_f16( 9327 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16 9328 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16 9329 // CHECK: [[VLD2:%.*]] = call { <8 x half>, <8 x half> } @llvm.aarch64.neon.ld2.v8f16.p0(ptr %a) 9330 // CHECK: store { <8 x half>, <8 x half> } [[VLD2]], ptr [[__RET]] 9331 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9332 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x2_t, ptr [[RETVAL]], align 16 9333 // CHECK: ret %struct.float16x8x2_t [[TMP6]] 9334 float16x8x2_t test_vld2q_f16(float16_t const *a) { 9335 return vld2q_f16(a); 9336 } 9337 9338 // CHECK-LABEL: @test_vld2q_f32( 9339 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16 9340 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16 9341 // CHECK: [[VLD2:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %a) 9342 // CHECK: store { <4 x float>, <4 x float> } [[VLD2]], ptr [[__RET]] 9343 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9344 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x2_t, ptr [[RETVAL]], align 16 9345 // CHECK: ret %struct.float32x4x2_t [[TMP6]] 9346 float32x4x2_t test_vld2q_f32(float32_t const *a) { 9347 return vld2q_f32(a); 9348 } 9349 9350 // CHECK-LABEL: @test_vld2q_f64( 9351 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 9352 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 9353 // CHECK: [[VLD2:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr %a) 9354 // CHECK: store { <2 x double>, <2 x double> } [[VLD2]], ptr [[__RET]] 9355 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9356 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16 9357 // CHECK: ret %struct.float64x2x2_t [[TMP6]] 9358 float64x2x2_t test_vld2q_f64(float64_t const *a) { 9359 return vld2q_f64(a); 9360 } 9361 9362 // CHECK-LABEL: @test_vld2q_p8( 9363 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16 9364 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16 9365 // CHECK: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr %a) 9366 // CHECK: store { <16 x i8>, <16 x i8> } [[VLD2]], ptr [[__RET]] 9367 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9368 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x2_t, ptr [[RETVAL]], align 16 9369 // CHECK: ret %struct.poly8x16x2_t [[TMP5]] 9370 poly8x16x2_t test_vld2q_p8(poly8_t const *a) { 9371 return vld2q_p8(a); 9372 } 9373 9374 // CHECK-LABEL: @test_vld2q_p16( 9375 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16 9376 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16 9377 // CHECK: [[VLD2:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr %a) 9378 // CHECK: store { <8 x i16>, <8 x i16> } [[VLD2]], ptr [[__RET]] 9379 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 9380 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x2_t, ptr [[RETVAL]], align 16 9381 // CHECK: ret %struct.poly16x8x2_t [[TMP6]] 9382 poly16x8x2_t test_vld2q_p16(poly16_t const *a) { 9383 return vld2q_p16(a); 9384 } 9385 9386 // CHECK-LABEL: @test_vld2_u8( 9387 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8 9388 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8 9389 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a) 9390 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]] 9391 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9392 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x2_t, ptr [[RETVAL]], align 8 9393 // CHECK: ret %struct.uint8x8x2_t [[TMP5]] 9394 uint8x8x2_t test_vld2_u8(uint8_t const *a) { 9395 return vld2_u8(a); 9396 } 9397 9398 // CHECK-LABEL: @test_vld2_u16( 9399 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8 9400 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8 9401 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a) 9402 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]] 9403 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9404 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x2_t, ptr [[RETVAL]], align 8 9405 // CHECK: ret %struct.uint16x4x2_t [[TMP6]] 9406 uint16x4x2_t test_vld2_u16(uint16_t const *a) { 9407 return vld2_u16(a); 9408 } 9409 9410 // CHECK-LABEL: @test_vld2_u32( 9411 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8 9412 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8 9413 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a) 9414 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]] 9415 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9416 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x2_t, ptr [[RETVAL]], align 8 9417 // CHECK: ret %struct.uint32x2x2_t [[TMP6]] 9418 uint32x2x2_t test_vld2_u32(uint32_t const *a) { 9419 return vld2_u32(a); 9420 } 9421 9422 // CHECK-LABEL: @test_vld2_u64( 9423 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8 9424 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8 9425 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a) 9426 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]] 9427 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9428 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x2_t, ptr [[RETVAL]], align 8 9429 // CHECK: ret %struct.uint64x1x2_t [[TMP6]] 9430 uint64x1x2_t test_vld2_u64(uint64_t const *a) { 9431 return vld2_u64(a); 9432 } 9433 9434 // CHECK-LABEL: @test_vld2_s8( 9435 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8 9436 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8 9437 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a) 9438 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]] 9439 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9440 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x2_t, ptr [[RETVAL]], align 8 9441 // CHECK: ret %struct.int8x8x2_t [[TMP5]] 9442 int8x8x2_t test_vld2_s8(int8_t const *a) { 9443 return vld2_s8(a); 9444 } 9445 9446 // CHECK-LABEL: @test_vld2_s16( 9447 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8 9448 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8 9449 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a) 9450 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]] 9451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9452 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x2_t, ptr [[RETVAL]], align 8 9453 // CHECK: ret %struct.int16x4x2_t [[TMP6]] 9454 int16x4x2_t test_vld2_s16(int16_t const *a) { 9455 return vld2_s16(a); 9456 } 9457 9458 // CHECK-LABEL: @test_vld2_s32( 9459 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8 9460 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8 9461 // CHECK: [[VLD2:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr %a) 9462 // CHECK: store { <2 x i32>, <2 x i32> } [[VLD2]], ptr [[__RET]] 9463 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9464 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x2_t, ptr [[RETVAL]], align 8 9465 // CHECK: ret %struct.int32x2x2_t [[TMP6]] 9466 int32x2x2_t test_vld2_s32(int32_t const *a) { 9467 return vld2_s32(a); 9468 } 9469 9470 // CHECK-LABEL: @test_vld2_s64( 9471 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8 9472 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8 9473 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr %a) 9474 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], ptr [[__RET]] 9475 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9476 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x2_t, ptr [[RETVAL]], align 8 9477 // CHECK: ret %struct.int64x1x2_t [[TMP6]] 9478 int64x1x2_t test_vld2_s64(int64_t const *a) { 9479 return vld2_s64(a); 9480 } 9481 9482 // CHECK-LABEL: @test_vld2_f16( 9483 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8 9484 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8 9485 // CHECK: [[VLD2:%.*]] = call { <4 x half>, <4 x half> } @llvm.aarch64.neon.ld2.v4f16.p0(ptr %a) 9486 // CHECK: store { <4 x half>, <4 x half> } [[VLD2]], ptr [[__RET]] 9487 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9488 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x2_t, ptr [[RETVAL]], align 8 9489 // CHECK: ret %struct.float16x4x2_t [[TMP6]] 9490 float16x4x2_t test_vld2_f16(float16_t const *a) { 9491 return vld2_f16(a); 9492 } 9493 9494 // CHECK-LABEL: @test_vld2_f32( 9495 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8 9496 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8 9497 // CHECK: [[VLD2:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0(ptr %a) 9498 // CHECK: store { <2 x float>, <2 x float> } [[VLD2]], ptr [[__RET]] 9499 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9500 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x2_t, ptr [[RETVAL]], align 8 9501 // CHECK: ret %struct.float32x2x2_t [[TMP6]] 9502 float32x2x2_t test_vld2_f32(float32_t const *a) { 9503 return vld2_f32(a); 9504 } 9505 9506 // CHECK-LABEL: @test_vld2_f64( 9507 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 9508 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 9509 // CHECK: [[VLD2:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr %a) 9510 // CHECK: store { <1 x double>, <1 x double> } [[VLD2]], ptr [[__RET]] 9511 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9512 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8 9513 // CHECK: ret %struct.float64x1x2_t [[TMP6]] 9514 float64x1x2_t test_vld2_f64(float64_t const *a) { 9515 return vld2_f64(a); 9516 } 9517 9518 // CHECK-LABEL: @test_vld2_p8( 9519 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8 9520 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8 9521 // CHECK: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %a) 9522 // CHECK: store { <8 x i8>, <8 x i8> } [[VLD2]], ptr [[__RET]] 9523 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9524 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x2_t, ptr [[RETVAL]], align 8 9525 // CHECK: ret %struct.poly8x8x2_t [[TMP5]] 9526 poly8x8x2_t test_vld2_p8(poly8_t const *a) { 9527 return vld2_p8(a); 9528 } 9529 9530 // CHECK-LABEL: @test_vld2_p16( 9531 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8 9532 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8 9533 // CHECK: [[VLD2:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr %a) 9534 // CHECK: store { <4 x i16>, <4 x i16> } [[VLD2]], ptr [[__RET]] 9535 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 9536 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x2_t, ptr [[RETVAL]], align 8 9537 // CHECK: ret %struct.poly16x4x2_t [[TMP6]] 9538 poly16x4x2_t test_vld2_p16(poly16_t const *a) { 9539 return vld2_p16(a); 9540 } 9541 9542 // CHECK-LABEL: @test_vld3q_u8( 9543 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16 9544 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16 9545 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a) 9546 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]] 9547 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9548 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x3_t, ptr [[RETVAL]], align 16 9549 // CHECK: ret %struct.uint8x16x3_t [[TMP5]] 9550 uint8x16x3_t test_vld3q_u8(uint8_t const *a) { 9551 return vld3q_u8(a); 9552 } 9553 9554 // CHECK-LABEL: @test_vld3q_u16( 9555 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16 9556 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16 9557 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a) 9558 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]] 9559 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9560 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x3_t, ptr [[RETVAL]], align 16 9561 // CHECK: ret %struct.uint16x8x3_t [[TMP6]] 9562 uint16x8x3_t test_vld3q_u16(uint16_t const *a) { 9563 return vld3q_u16(a); 9564 } 9565 9566 // CHECK-LABEL: @test_vld3q_u32( 9567 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16 9568 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16 9569 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a) 9570 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]] 9571 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9572 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x3_t, ptr [[RETVAL]], align 16 9573 // CHECK: ret %struct.uint32x4x3_t [[TMP6]] 9574 uint32x4x3_t test_vld3q_u32(uint32_t const *a) { 9575 return vld3q_u32(a); 9576 } 9577 9578 // CHECK-LABEL: @test_vld3q_u64( 9579 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16 9580 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16 9581 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a) 9582 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 9583 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9584 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x3_t, ptr [[RETVAL]], align 16 9585 // CHECK: ret %struct.uint64x2x3_t [[TMP6]] 9586 uint64x2x3_t test_vld3q_u64(uint64_t const *a) { 9587 return vld3q_u64(a); 9588 } 9589 9590 // CHECK-LABEL: @test_vld3q_s8( 9591 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16 9592 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16 9593 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a) 9594 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]] 9595 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9596 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x3_t, ptr [[RETVAL]], align 16 9597 // CHECK: ret %struct.int8x16x3_t [[TMP5]] 9598 int8x16x3_t test_vld3q_s8(int8_t const *a) { 9599 return vld3q_s8(a); 9600 } 9601 9602 // CHECK-LABEL: @test_vld3q_s16( 9603 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16 9604 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16 9605 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a) 9606 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]] 9607 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9608 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x3_t, ptr [[RETVAL]], align 16 9609 // CHECK: ret %struct.int16x8x3_t [[TMP6]] 9610 int16x8x3_t test_vld3q_s16(int16_t const *a) { 9611 return vld3q_s16(a); 9612 } 9613 9614 // CHECK-LABEL: @test_vld3q_s32( 9615 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16 9616 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16 9617 // CHECK: [[VLD3:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr %a) 9618 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32> } [[VLD3]], ptr [[__RET]] 9619 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9620 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x3_t, ptr [[RETVAL]], align 16 9621 // CHECK: ret %struct.int32x4x3_t [[TMP6]] 9622 int32x4x3_t test_vld3q_s32(int32_t const *a) { 9623 return vld3q_s32(a); 9624 } 9625 9626 // CHECK-LABEL: @test_vld3q_s64( 9627 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16 9628 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16 9629 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr %a) 9630 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], ptr [[__RET]] 9631 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9632 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x3_t, ptr [[RETVAL]], align 16 9633 // CHECK: ret %struct.int64x2x3_t [[TMP6]] 9634 int64x2x3_t test_vld3q_s64(int64_t const *a) { 9635 return vld3q_s64(a); 9636 } 9637 9638 // CHECK-LABEL: @test_vld3q_f16( 9639 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16 9640 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16 9641 // CHECK: [[VLD3:%.*]] = call { <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld3.v8f16.p0(ptr %a) 9642 // CHECK: store { <8 x half>, <8 x half>, <8 x half> } [[VLD3]], ptr [[__RET]] 9643 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9644 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x3_t, ptr [[RETVAL]], align 16 9645 // CHECK: ret %struct.float16x8x3_t [[TMP6]] 9646 float16x8x3_t test_vld3q_f16(float16_t const *a) { 9647 return vld3q_f16(a); 9648 } 9649 9650 // CHECK-LABEL: @test_vld3q_f32( 9651 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16 9652 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16 9653 // CHECK: [[VLD3:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0(ptr %a) 9654 // CHECK: store { <4 x float>, <4 x float>, <4 x float> } [[VLD3]], ptr [[__RET]] 9655 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9656 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x3_t, ptr [[RETVAL]], align 16 9657 // CHECK: ret %struct.float32x4x3_t [[TMP6]] 9658 float32x4x3_t test_vld3q_f32(float32_t const *a) { 9659 return vld3q_f32(a); 9660 } 9661 9662 // CHECK-LABEL: @test_vld3q_f64( 9663 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 9664 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 9665 // CHECK: [[VLD3:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0(ptr %a) 9666 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD3]], ptr [[__RET]] 9667 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9668 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16 9669 // CHECK: ret %struct.float64x2x3_t [[TMP6]] 9670 float64x2x3_t test_vld3q_f64(float64_t const *a) { 9671 return vld3q_f64(a); 9672 } 9673 9674 // CHECK-LABEL: @test_vld3q_p8( 9675 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16 9676 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16 9677 // CHECK: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %a) 9678 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], ptr [[__RET]] 9679 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9680 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x3_t, ptr [[RETVAL]], align 16 9681 // CHECK: ret %struct.poly8x16x3_t [[TMP5]] 9682 poly8x16x3_t test_vld3q_p8(poly8_t const *a) { 9683 return vld3q_p8(a); 9684 } 9685 9686 // CHECK-LABEL: @test_vld3q_p16( 9687 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16 9688 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16 9689 // CHECK: [[VLD3:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr %a) 9690 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16> } [[VLD3]], ptr [[__RET]] 9691 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 9692 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x3_t, ptr [[RETVAL]], align 16 9693 // CHECK: ret %struct.poly16x8x3_t [[TMP6]] 9694 poly16x8x3_t test_vld3q_p16(poly16_t const *a) { 9695 return vld3q_p16(a); 9696 } 9697 9698 // CHECK-LABEL: @test_vld3_u8( 9699 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8 9700 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8 9701 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a) 9702 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]] 9703 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9704 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x3_t, ptr [[RETVAL]], align 8 9705 // CHECK: ret %struct.uint8x8x3_t [[TMP5]] 9706 uint8x8x3_t test_vld3_u8(uint8_t const *a) { 9707 return vld3_u8(a); 9708 } 9709 9710 // CHECK-LABEL: @test_vld3_u16( 9711 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8 9712 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8 9713 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a) 9714 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]] 9715 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9716 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x3_t, ptr [[RETVAL]], align 8 9717 // CHECK: ret %struct.uint16x4x3_t [[TMP6]] 9718 uint16x4x3_t test_vld3_u16(uint16_t const *a) { 9719 return vld3_u16(a); 9720 } 9721 9722 // CHECK-LABEL: @test_vld3_u32( 9723 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8 9724 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8 9725 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a) 9726 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]] 9727 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9728 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x3_t, ptr [[RETVAL]], align 8 9729 // CHECK: ret %struct.uint32x2x3_t [[TMP6]] 9730 uint32x2x3_t test_vld3_u32(uint32_t const *a) { 9731 return vld3_u32(a); 9732 } 9733 9734 // CHECK-LABEL: @test_vld3_u64( 9735 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8 9736 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8 9737 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a) 9738 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]] 9739 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9740 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x3_t, ptr [[RETVAL]], align 8 9741 // CHECK: ret %struct.uint64x1x3_t [[TMP6]] 9742 uint64x1x3_t test_vld3_u64(uint64_t const *a) { 9743 return vld3_u64(a); 9744 } 9745 9746 // CHECK-LABEL: @test_vld3_s8( 9747 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8 9748 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8 9749 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a) 9750 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]] 9751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9752 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x3_t, ptr [[RETVAL]], align 8 9753 // CHECK: ret %struct.int8x8x3_t [[TMP5]] 9754 int8x8x3_t test_vld3_s8(int8_t const *a) { 9755 return vld3_s8(a); 9756 } 9757 9758 // CHECK-LABEL: @test_vld3_s16( 9759 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8 9760 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8 9761 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a) 9762 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]] 9763 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9764 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x3_t, ptr [[RETVAL]], align 8 9765 // CHECK: ret %struct.int16x4x3_t [[TMP6]] 9766 int16x4x3_t test_vld3_s16(int16_t const *a) { 9767 return vld3_s16(a); 9768 } 9769 9770 // CHECK-LABEL: @test_vld3_s32( 9771 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8 9772 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8 9773 // CHECK: [[VLD3:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr %a) 9774 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32> } [[VLD3]], ptr [[__RET]] 9775 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9776 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x3_t, ptr [[RETVAL]], align 8 9777 // CHECK: ret %struct.int32x2x3_t [[TMP6]] 9778 int32x2x3_t test_vld3_s32(int32_t const *a) { 9779 return vld3_s32(a); 9780 } 9781 9782 // CHECK-LABEL: @test_vld3_s64( 9783 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8 9784 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8 9785 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr %a) 9786 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], ptr [[__RET]] 9787 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9788 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x3_t, ptr [[RETVAL]], align 8 9789 // CHECK: ret %struct.int64x1x3_t [[TMP6]] 9790 int64x1x3_t test_vld3_s64(int64_t const *a) { 9791 return vld3_s64(a); 9792 } 9793 9794 // CHECK-LABEL: @test_vld3_f16( 9795 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8 9796 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8 9797 // CHECK: [[VLD3:%.*]] = call { <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld3.v4f16.p0(ptr %a) 9798 // CHECK: store { <4 x half>, <4 x half>, <4 x half> } [[VLD3]], ptr [[__RET]] 9799 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9800 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x3_t, ptr [[RETVAL]], align 8 9801 // CHECK: ret %struct.float16x4x3_t [[TMP6]] 9802 float16x4x3_t test_vld3_f16(float16_t const *a) { 9803 return vld3_f16(a); 9804 } 9805 9806 // CHECK-LABEL: @test_vld3_f32( 9807 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8 9808 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8 9809 // CHECK: [[VLD3:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0(ptr %a) 9810 // CHECK: store { <2 x float>, <2 x float>, <2 x float> } [[VLD3]], ptr [[__RET]] 9811 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9812 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x3_t, ptr [[RETVAL]], align 8 9813 // CHECK: ret %struct.float32x2x3_t [[TMP6]] 9814 float32x2x3_t test_vld3_f32(float32_t const *a) { 9815 return vld3_f32(a); 9816 } 9817 9818 // CHECK-LABEL: @test_vld3_f64( 9819 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 9820 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 9821 // CHECK: [[VLD3:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr %a) 9822 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD3]], ptr [[__RET]] 9823 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9824 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8 9825 // CHECK: ret %struct.float64x1x3_t [[TMP6]] 9826 float64x1x3_t test_vld3_f64(float64_t const *a) { 9827 return vld3_f64(a); 9828 } 9829 9830 // CHECK-LABEL: @test_vld3_p8( 9831 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8 9832 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8 9833 // CHECK: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %a) 9834 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], ptr [[__RET]] 9835 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9836 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x3_t, ptr [[RETVAL]], align 8 9837 // CHECK: ret %struct.poly8x8x3_t [[TMP5]] 9838 poly8x8x3_t test_vld3_p8(poly8_t const *a) { 9839 return vld3_p8(a); 9840 } 9841 9842 // CHECK-LABEL: @test_vld3_p16( 9843 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8 9844 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8 9845 // CHECK: [[VLD3:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr %a) 9846 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16> } [[VLD3]], ptr [[__RET]] 9847 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 9848 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x3_t, ptr [[RETVAL]], align 8 9849 // CHECK: ret %struct.poly16x4x3_t [[TMP6]] 9850 poly16x4x3_t test_vld3_p16(poly16_t const *a) { 9851 return vld3_p16(a); 9852 } 9853 9854 // CHECK-LABEL: @test_vld4q_u8( 9855 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16 9856 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16 9857 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a) 9858 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]] 9859 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9860 // CHECK: [[TMP5:%.*]] = load %struct.uint8x16x4_t, ptr [[RETVAL]], align 16 9861 // CHECK: ret %struct.uint8x16x4_t [[TMP5]] 9862 uint8x16x4_t test_vld4q_u8(uint8_t const *a) { 9863 return vld4q_u8(a); 9864 } 9865 9866 // CHECK-LABEL: @test_vld4q_u16( 9867 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16 9868 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16 9869 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a) 9870 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]] 9871 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9872 // CHECK: [[TMP6:%.*]] = load %struct.uint16x8x4_t, ptr [[RETVAL]], align 16 9873 // CHECK: ret %struct.uint16x8x4_t [[TMP6]] 9874 uint16x8x4_t test_vld4q_u16(uint16_t const *a) { 9875 return vld4q_u16(a); 9876 } 9877 9878 // CHECK-LABEL: @test_vld4q_u32( 9879 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16 9880 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16 9881 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a) 9882 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]] 9883 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9884 // CHECK: [[TMP6:%.*]] = load %struct.uint32x4x4_t, ptr [[RETVAL]], align 16 9885 // CHECK: ret %struct.uint32x4x4_t [[TMP6]] 9886 uint32x4x4_t test_vld4q_u32(uint32_t const *a) { 9887 return vld4q_u32(a); 9888 } 9889 9890 // CHECK-LABEL: @test_vld4q_u64( 9891 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16 9892 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16 9893 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a) 9894 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 9895 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9896 // CHECK: [[TMP6:%.*]] = load %struct.uint64x2x4_t, ptr [[RETVAL]], align 16 9897 // CHECK: ret %struct.uint64x2x4_t [[TMP6]] 9898 uint64x2x4_t test_vld4q_u64(uint64_t const *a) { 9899 return vld4q_u64(a); 9900 } 9901 9902 // CHECK-LABEL: @test_vld4q_s8( 9903 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16 9904 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16 9905 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a) 9906 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]] 9907 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9908 // CHECK: [[TMP5:%.*]] = load %struct.int8x16x4_t, ptr [[RETVAL]], align 16 9909 // CHECK: ret %struct.int8x16x4_t [[TMP5]] 9910 int8x16x4_t test_vld4q_s8(int8_t const *a) { 9911 return vld4q_s8(a); 9912 } 9913 9914 // CHECK-LABEL: @test_vld4q_s16( 9915 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16 9916 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16 9917 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a) 9918 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]] 9919 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9920 // CHECK: [[TMP6:%.*]] = load %struct.int16x8x4_t, ptr [[RETVAL]], align 16 9921 // CHECK: ret %struct.int16x8x4_t [[TMP6]] 9922 int16x8x4_t test_vld4q_s16(int16_t const *a) { 9923 return vld4q_s16(a); 9924 } 9925 9926 // CHECK-LABEL: @test_vld4q_s32( 9927 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16 9928 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16 9929 // CHECK: [[VLD4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr %a) 9930 // CHECK: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[VLD4]], ptr [[__RET]] 9931 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9932 // CHECK: [[TMP6:%.*]] = load %struct.int32x4x4_t, ptr [[RETVAL]], align 16 9933 // CHECK: ret %struct.int32x4x4_t [[TMP6]] 9934 int32x4x4_t test_vld4q_s32(int32_t const *a) { 9935 return vld4q_s32(a); 9936 } 9937 9938 // CHECK-LABEL: @test_vld4q_s64( 9939 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16 9940 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16 9941 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr %a) 9942 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], ptr [[__RET]] 9943 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9944 // CHECK: [[TMP6:%.*]] = load %struct.int64x2x4_t, ptr [[RETVAL]], align 16 9945 // CHECK: ret %struct.int64x2x4_t [[TMP6]] 9946 int64x2x4_t test_vld4q_s64(int64_t const *a) { 9947 return vld4q_s64(a); 9948 } 9949 9950 // CHECK-LABEL: @test_vld4q_f16( 9951 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16 9952 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16 9953 // CHECK: [[VLD4:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.aarch64.neon.ld4.v8f16.p0(ptr %a) 9954 // CHECK: store { <8 x half>, <8 x half>, <8 x half>, <8 x half> } [[VLD4]], ptr [[__RET]] 9955 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9956 // CHECK: [[TMP6:%.*]] = load %struct.float16x8x4_t, ptr [[RETVAL]], align 16 9957 // CHECK: ret %struct.float16x8x4_t [[TMP6]] 9958 float16x8x4_t test_vld4q_f16(float16_t const *a) { 9959 return vld4q_f16(a); 9960 } 9961 9962 // CHECK-LABEL: @test_vld4q_f32( 9963 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16 9964 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16 9965 // CHECK: [[VLD4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0(ptr %a) 9966 // CHECK: store { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[VLD4]], ptr [[__RET]] 9967 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9968 // CHECK: [[TMP6:%.*]] = load %struct.float32x4x4_t, ptr [[RETVAL]], align 16 9969 // CHECK: ret %struct.float32x4x4_t [[TMP6]] 9970 float32x4x4_t test_vld4q_f32(float32_t const *a) { 9971 return vld4q_f32(a); 9972 } 9973 9974 // CHECK-LABEL: @test_vld4q_f64( 9975 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 9976 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 9977 // CHECK: [[VLD4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0(ptr %a) 9978 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD4]], ptr [[__RET]] 9979 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9980 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16 9981 // CHECK: ret %struct.float64x2x4_t [[TMP6]] 9982 float64x2x4_t test_vld4q_f64(float64_t const *a) { 9983 return vld4q_f64(a); 9984 } 9985 9986 // CHECK-LABEL: @test_vld4q_p8( 9987 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16 9988 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16 9989 // CHECK: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %a) 9990 // CHECK: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], ptr [[__RET]] 9991 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 9992 // CHECK: [[TMP5:%.*]] = load %struct.poly8x16x4_t, ptr [[RETVAL]], align 16 9993 // CHECK: ret %struct.poly8x16x4_t [[TMP5]] 9994 poly8x16x4_t test_vld4q_p8(poly8_t const *a) { 9995 return vld4q_p8(a); 9996 } 9997 9998 // CHECK-LABEL: @test_vld4q_p16( 9999 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16 10000 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16 10001 // CHECK: [[VLD4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr %a) 10002 // CHECK: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[VLD4]], ptr [[__RET]] 10003 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 10004 // CHECK: [[TMP6:%.*]] = load %struct.poly16x8x4_t, ptr [[RETVAL]], align 16 10005 // CHECK: ret %struct.poly16x8x4_t [[TMP6]] 10006 poly16x8x4_t test_vld4q_p16(poly16_t const *a) { 10007 return vld4q_p16(a); 10008 } 10009 10010 // CHECK-LABEL: @test_vld4_u8( 10011 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8 10012 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8 10013 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a) 10014 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]] 10015 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10016 // CHECK: [[TMP5:%.*]] = load %struct.uint8x8x4_t, ptr [[RETVAL]], align 8 10017 // CHECK: ret %struct.uint8x8x4_t [[TMP5]] 10018 uint8x8x4_t test_vld4_u8(uint8_t const *a) { 10019 return vld4_u8(a); 10020 } 10021 10022 // CHECK-LABEL: @test_vld4_u16( 10023 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8 10024 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8 10025 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a) 10026 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]] 10027 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10028 // CHECK: [[TMP6:%.*]] = load %struct.uint16x4x4_t, ptr [[RETVAL]], align 8 10029 // CHECK: ret %struct.uint16x4x4_t [[TMP6]] 10030 uint16x4x4_t test_vld4_u16(uint16_t const *a) { 10031 return vld4_u16(a); 10032 } 10033 10034 // CHECK-LABEL: @test_vld4_u32( 10035 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8 10036 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8 10037 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a) 10038 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]] 10039 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10040 // CHECK: [[TMP6:%.*]] = load %struct.uint32x2x4_t, ptr [[RETVAL]], align 8 10041 // CHECK: ret %struct.uint32x2x4_t [[TMP6]] 10042 uint32x2x4_t test_vld4_u32(uint32_t const *a) { 10043 return vld4_u32(a); 10044 } 10045 10046 // CHECK-LABEL: @test_vld4_u64( 10047 // CHECK: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8 10048 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8 10049 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a) 10050 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]] 10051 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10052 // CHECK: [[TMP6:%.*]] = load %struct.uint64x1x4_t, ptr [[RETVAL]], align 8 10053 // CHECK: ret %struct.uint64x1x4_t [[TMP6]] 10054 uint64x1x4_t test_vld4_u64(uint64_t const *a) { 10055 return vld4_u64(a); 10056 } 10057 10058 // CHECK-LABEL: @test_vld4_s8( 10059 // CHECK: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8 10060 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8 10061 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a) 10062 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]] 10063 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10064 // CHECK: [[TMP5:%.*]] = load %struct.int8x8x4_t, ptr [[RETVAL]], align 8 10065 // CHECK: ret %struct.int8x8x4_t [[TMP5]] 10066 int8x8x4_t test_vld4_s8(int8_t const *a) { 10067 return vld4_s8(a); 10068 } 10069 10070 // CHECK-LABEL: @test_vld4_s16( 10071 // CHECK: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8 10072 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8 10073 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a) 10074 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]] 10075 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10076 // CHECK: [[TMP6:%.*]] = load %struct.int16x4x4_t, ptr [[RETVAL]], align 8 10077 // CHECK: ret %struct.int16x4x4_t [[TMP6]] 10078 int16x4x4_t test_vld4_s16(int16_t const *a) { 10079 return vld4_s16(a); 10080 } 10081 10082 // CHECK-LABEL: @test_vld4_s32( 10083 // CHECK: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8 10084 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8 10085 // CHECK: [[VLD4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr %a) 10086 // CHECK: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[VLD4]], ptr [[__RET]] 10087 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10088 // CHECK: [[TMP6:%.*]] = load %struct.int32x2x4_t, ptr [[RETVAL]], align 8 10089 // CHECK: ret %struct.int32x2x4_t [[TMP6]] 10090 int32x2x4_t test_vld4_s32(int32_t const *a) { 10091 return vld4_s32(a); 10092 } 10093 10094 // CHECK-LABEL: @test_vld4_s64( 10095 // CHECK: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8 10096 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8 10097 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr %a) 10098 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], ptr [[__RET]] 10099 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10100 // CHECK: [[TMP6:%.*]] = load %struct.int64x1x4_t, ptr [[RETVAL]], align 8 10101 // CHECK: ret %struct.int64x1x4_t [[TMP6]] 10102 int64x1x4_t test_vld4_s64(int64_t const *a) { 10103 return vld4_s64(a); 10104 } 10105 10106 // CHECK-LABEL: @test_vld4_f16( 10107 // CHECK: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8 10108 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8 10109 // CHECK: [[VLD4:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.aarch64.neon.ld4.v4f16.p0(ptr %a) 10110 // CHECK: store { <4 x half>, <4 x half>, <4 x half>, <4 x half> } [[VLD4]], ptr [[__RET]] 10111 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10112 // CHECK: [[TMP6:%.*]] = load %struct.float16x4x4_t, ptr [[RETVAL]], align 8 10113 // CHECK: ret %struct.float16x4x4_t [[TMP6]] 10114 float16x4x4_t test_vld4_f16(float16_t const *a) { 10115 return vld4_f16(a); 10116 } 10117 10118 // CHECK-LABEL: @test_vld4_f32( 10119 // CHECK: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8 10120 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8 10121 // CHECK: [[VLD4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0(ptr %a) 10122 // CHECK: store { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[VLD4]], ptr [[__RET]] 10123 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10124 // CHECK: [[TMP6:%.*]] = load %struct.float32x2x4_t, ptr [[RETVAL]], align 8 10125 // CHECK: ret %struct.float32x2x4_t [[TMP6]] 10126 float32x2x4_t test_vld4_f32(float32_t const *a) { 10127 return vld4_f32(a); 10128 } 10129 10130 // CHECK-LABEL: @test_vld4_f64( 10131 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 10132 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 10133 // CHECK: [[VLD4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr %a) 10134 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD4]], ptr [[__RET]] 10135 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10136 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8 10137 // CHECK: ret %struct.float64x1x4_t [[TMP6]] 10138 float64x1x4_t test_vld4_f64(float64_t const *a) { 10139 return vld4_f64(a); 10140 } 10141 10142 // CHECK-LABEL: @test_vld4_p8( 10143 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8 10144 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8 10145 // CHECK: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr %a) 10146 // CHECK: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], ptr [[__RET]] 10147 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10148 // CHECK: [[TMP5:%.*]] = load %struct.poly8x8x4_t, ptr [[RETVAL]], align 8 10149 // CHECK: ret %struct.poly8x8x4_t [[TMP5]] 10150 poly8x8x4_t test_vld4_p8(poly8_t const *a) { 10151 return vld4_p8(a); 10152 } 10153 10154 // CHECK-LABEL: @test_vld4_p16( 10155 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8 10156 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8 10157 // CHECK: [[VLD4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr %a) 10158 // CHECK: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[VLD4]], ptr [[__RET]] 10159 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 10160 // CHECK: [[TMP6:%.*]] = load %struct.poly16x4x4_t, ptr [[RETVAL]], align 8 10161 // CHECK: ret %struct.poly16x4x4_t [[TMP6]] 10162 poly16x4x4_t test_vld4_p16(poly16_t const *a) { 10163 return vld4_p16(a); 10164 } 10165 10166 // CHECK-LABEL: @test_vst1q_u8( 10167 // CHECK: store <16 x i8> %b, ptr %a 10168 // CHECK: ret void 10169 void test_vst1q_u8(uint8_t *a, uint8x16_t b) { 10170 vst1q_u8(a, b); 10171 } 10172 10173 // CHECK-LABEL: @test_vst1q_u16( 10174 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 10175 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 10176 // CHECK: store <8 x i16> [[TMP3]], ptr %a 10177 // CHECK: ret void 10178 void test_vst1q_u16(uint16_t *a, uint16x8_t b) { 10179 vst1q_u16(a, b); 10180 } 10181 10182 // CHECK-LABEL: @test_vst1q_u32( 10183 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 10184 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 10185 // CHECK: store <4 x i32> [[TMP3]], ptr %a 10186 // CHECK: ret void 10187 void test_vst1q_u32(uint32_t *a, uint32x4_t b) { 10188 vst1q_u32(a, b); 10189 } 10190 10191 // CHECK-LABEL: @test_vst1q_u64( 10192 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 10193 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 10194 // CHECK: store <2 x i64> [[TMP3]], ptr %a 10195 // CHECK: ret void 10196 void test_vst1q_u64(uint64_t *a, uint64x2_t b) { 10197 vst1q_u64(a, b); 10198 } 10199 10200 // CHECK-LABEL: @test_vst1q_s8( 10201 // CHECK: store <16 x i8> %b, ptr %a 10202 // CHECK: ret void 10203 void test_vst1q_s8(int8_t *a, int8x16_t b) { 10204 vst1q_s8(a, b); 10205 } 10206 10207 // CHECK-LABEL: @test_vst1q_s16( 10208 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 10209 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 10210 // CHECK: store <8 x i16> [[TMP3]], ptr %a 10211 // CHECK: ret void 10212 void test_vst1q_s16(int16_t *a, int16x8_t b) { 10213 vst1q_s16(a, b); 10214 } 10215 10216 // CHECK-LABEL: @test_vst1q_s32( 10217 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 10218 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> 10219 // CHECK: store <4 x i32> [[TMP3]], ptr %a 10220 // CHECK: ret void 10221 void test_vst1q_s32(int32_t *a, int32x4_t b) { 10222 vst1q_s32(a, b); 10223 } 10224 10225 // CHECK-LABEL: @test_vst1q_s64( 10226 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 10227 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64> 10228 // CHECK: store <2 x i64> [[TMP3]], ptr %a 10229 // CHECK: ret void 10230 void test_vst1q_s64(int64_t *a, int64x2_t b) { 10231 vst1q_s64(a, b); 10232 } 10233 10234 // CHECK-LABEL: @test_vst1q_f16( 10235 // CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> 10236 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> 10237 // CHECK: store <8 x half> [[TMP3]], ptr %a 10238 // CHECK: ret void 10239 void test_vst1q_f16(float16_t *a, float16x8_t b) { 10240 vst1q_f16(a, b); 10241 } 10242 10243 // CHECK-LABEL: @test_vst1q_f32( 10244 // CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> 10245 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> 10246 // CHECK: store <4 x float> [[TMP3]], ptr %a 10247 // CHECK: ret void 10248 void test_vst1q_f32(float32_t *a, float32x4_t b) { 10249 vst1q_f32(a, b); 10250 } 10251 10252 // CHECK-LABEL: @test_vst1q_f64( 10253 // CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> 10254 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> 10255 // CHECK: store <2 x double> [[TMP3]], ptr %a 10256 // CHECK: ret void 10257 void test_vst1q_f64(float64_t *a, float64x2_t b) { 10258 vst1q_f64(a, b); 10259 } 10260 10261 // CHECK-LABEL: @test_vst1q_p8( 10262 // CHECK: store <16 x i8> %b, ptr %a 10263 // CHECK: ret void 10264 void test_vst1q_p8(poly8_t *a, poly8x16_t b) { 10265 vst1q_p8(a, b); 10266 } 10267 10268 // CHECK-LABEL: @test_vst1q_p16( 10269 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 10270 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> 10271 // CHECK: store <8 x i16> [[TMP3]], ptr %a 10272 // CHECK: ret void 10273 void test_vst1q_p16(poly16_t *a, poly16x8_t b) { 10274 vst1q_p16(a, b); 10275 } 10276 10277 // CHECK-LABEL: @test_vst1_u8( 10278 // CHECK: store <8 x i8> %b, ptr %a 10279 // CHECK: ret void 10280 void test_vst1_u8(uint8_t *a, uint8x8_t b) { 10281 vst1_u8(a, b); 10282 } 10283 10284 // CHECK-LABEL: @test_vst1_u16( 10285 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10286 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10287 // CHECK: store <4 x i16> [[TMP3]], ptr %a 10288 // CHECK: ret void 10289 void test_vst1_u16(uint16_t *a, uint16x4_t b) { 10290 vst1_u16(a, b); 10291 } 10292 10293 // CHECK-LABEL: @test_vst1_u32( 10294 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10295 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10296 // CHECK: store <2 x i32> [[TMP3]], ptr %a 10297 // CHECK: ret void 10298 void test_vst1_u32(uint32_t *a, uint32x2_t b) { 10299 vst1_u32(a, b); 10300 } 10301 10302 // CHECK-LABEL: @test_vst1_u64( 10303 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 10304 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 10305 // CHECK: store <1 x i64> [[TMP3]], ptr %a 10306 // CHECK: ret void 10307 void test_vst1_u64(uint64_t *a, uint64x1_t b) { 10308 vst1_u64(a, b); 10309 } 10310 10311 // CHECK-LABEL: @test_vst1_s8( 10312 // CHECK: store <8 x i8> %b, ptr %a 10313 // CHECK: ret void 10314 void test_vst1_s8(int8_t *a, int8x8_t b) { 10315 vst1_s8(a, b); 10316 } 10317 10318 // CHECK-LABEL: @test_vst1_s16( 10319 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10320 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10321 // CHECK: store <4 x i16> [[TMP3]], ptr %a 10322 // CHECK: ret void 10323 void test_vst1_s16(int16_t *a, int16x4_t b) { 10324 vst1_s16(a, b); 10325 } 10326 10327 // CHECK-LABEL: @test_vst1_s32( 10328 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 10329 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> 10330 // CHECK: store <2 x i32> [[TMP3]], ptr %a 10331 // CHECK: ret void 10332 void test_vst1_s32(int32_t *a, int32x2_t b) { 10333 vst1_s32(a, b); 10334 } 10335 10336 // CHECK-LABEL: @test_vst1_s64( 10337 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 10338 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 10339 // CHECK: store <1 x i64> [[TMP3]], ptr %a 10340 // CHECK: ret void 10341 void test_vst1_s64(int64_t *a, int64x1_t b) { 10342 vst1_s64(a, b); 10343 } 10344 10345 // CHECK-LABEL: @test_vst1_f16( 10346 // CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> 10347 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> 10348 // CHECK: store <4 x half> [[TMP3]], ptr %a 10349 // CHECK: ret void 10350 void test_vst1_f16(float16_t *a, float16x4_t b) { 10351 vst1_f16(a, b); 10352 } 10353 10354 // CHECK-LABEL: @test_vst1_f32( 10355 // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> 10356 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> 10357 // CHECK: store <2 x float> [[TMP3]], ptr %a 10358 // CHECK: ret void 10359 void test_vst1_f32(float32_t *a, float32x2_t b) { 10360 vst1_f32(a, b); 10361 } 10362 10363 // CHECK-LABEL: @test_vst1_f64( 10364 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 10365 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> 10366 // CHECK: store <1 x double> [[TMP3]], ptr %a 10367 // CHECK: ret void 10368 void test_vst1_f64(float64_t *a, float64x1_t b) { 10369 vst1_f64(a, b); 10370 } 10371 10372 // CHECK-LABEL: @test_vst1_p8( 10373 // CHECK: store <8 x i8> %b, ptr %a 10374 // CHECK: ret void 10375 void test_vst1_p8(poly8_t *a, poly8x8_t b) { 10376 vst1_p8(a, b); 10377 } 10378 10379 // CHECK-LABEL: @test_vst1_p16( 10380 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 10381 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> 10382 // CHECK: store <4 x i16> [[TMP3]], ptr %a 10383 // CHECK: ret void 10384 void test_vst1_p16(poly16_t *a, poly16x4_t b) { 10385 vst1_p16(a, b); 10386 } 10387 10388 // CHECK-LABEL: @test_vst2q_u8( 10389 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 10390 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16 10391 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0 10392 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10393 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10394 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 10395 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 10396 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 10397 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x2_t, ptr [[__S1]], i32 0, i32 0 10398 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 10399 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 10400 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a) 10401 // CHECK: ret void 10402 void test_vst2q_u8(uint8_t *a, uint8x16x2_t b) { 10403 vst2q_u8(a, b); 10404 } 10405 10406 // CHECK-LABEL: @test_vst2q_u16( 10407 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16 10408 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16 10409 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0 10410 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10411 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10412 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 10413 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 10414 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 10415 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 10416 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x2_t, ptr [[__S1]], i32 0, i32 0 10417 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 10418 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 10419 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 10420 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 10421 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 10422 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a) 10423 // CHECK: ret void 10424 void test_vst2q_u16(uint16_t *a, uint16x8x2_t b) { 10425 vst2q_u16(a, b); 10426 } 10427 10428 // CHECK-LABEL: @test_vst2q_u32( 10429 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16 10430 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16 10431 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0 10432 // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10433 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10434 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 10435 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 10436 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 10437 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 10438 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x2_t, ptr [[__S1]], i32 0, i32 0 10439 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 10440 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 10441 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 10442 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 10443 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 10444 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a) 10445 // CHECK: ret void 10446 void test_vst2q_u32(uint32_t *a, uint32x4x2_t b) { 10447 vst2q_u32(a, b); 10448 } 10449 10450 // CHECK-LABEL: @test_vst2q_u64( 10451 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16 10452 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16 10453 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0 10454 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10455 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10456 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 10457 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 10458 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 10459 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 10460 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x2_t, ptr [[__S1]], i32 0, i32 0 10461 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 10462 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 10463 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 10464 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 10465 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 10466 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a) 10467 // CHECK: ret void 10468 void test_vst2q_u64(uint64_t *a, uint64x2x2_t b) { 10469 vst2q_u64(a, b); 10470 } 10471 10472 // CHECK-LABEL: @test_vst2q_s8( 10473 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 10474 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16 10475 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0 10476 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10477 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10478 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 10479 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 10480 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 10481 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x2_t, ptr [[__S1]], i32 0, i32 0 10482 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 10483 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 10484 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a) 10485 // CHECK: ret void 10486 void test_vst2q_s8(int8_t *a, int8x16x2_t b) { 10487 vst2q_s8(a, b); 10488 } 10489 10490 // CHECK-LABEL: @test_vst2q_s16( 10491 // CHECK: [[B:%.*]] = alloca %struct.int16x8x2_t, align 16 10492 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16 10493 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0 10494 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10495 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10496 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 10497 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 10498 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 10499 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 10500 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x2_t, ptr [[__S1]], i32 0, i32 0 10501 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 10502 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 10503 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 10504 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 10505 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 10506 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a) 10507 // CHECK: ret void 10508 void test_vst2q_s16(int16_t *a, int16x8x2_t b) { 10509 vst2q_s16(a, b); 10510 } 10511 10512 // CHECK-LABEL: @test_vst2q_s32( 10513 // CHECK: [[B:%.*]] = alloca %struct.int32x4x2_t, align 16 10514 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16 10515 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0 10516 // CHECK: store [2 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10517 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10518 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 10519 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 10520 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 10521 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 10522 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x2_t, ptr [[__S1]], i32 0, i32 0 10523 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 10524 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 10525 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 10526 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 10527 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 10528 // CHECK: call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> [[TMP7]], <4 x i32> [[TMP8]], ptr %a) 10529 // CHECK: ret void 10530 void test_vst2q_s32(int32_t *a, int32x4x2_t b) { 10531 vst2q_s32(a, b); 10532 } 10533 10534 // CHECK-LABEL: @test_vst2q_s64( 10535 // CHECK: [[B:%.*]] = alloca %struct.int64x2x2_t, align 16 10536 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16 10537 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0 10538 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10539 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10540 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 10541 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 10542 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 10543 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 10544 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x2_t, ptr [[__S1]], i32 0, i32 0 10545 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 10546 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 10547 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 10548 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 10549 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 10550 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a) 10551 // CHECK: ret void 10552 void test_vst2q_s64(int64_t *a, int64x2x2_t b) { 10553 vst2q_s64(a, b); 10554 } 10555 10556 // CHECK-LABEL: @test_vst2q_f16( 10557 // CHECK: [[B:%.*]] = alloca %struct.float16x8x2_t, align 16 10558 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16 10559 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0 10560 // CHECK: store [2 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10561 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10562 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 10563 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL]], i64 0, i64 0 10564 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 10565 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 10566 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x2_t, ptr [[__S1]], i32 0, i32 0 10567 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 10568 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 10569 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 10570 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 10571 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 10572 // CHECK: call void @llvm.aarch64.neon.st2.v8f16.p0(<8 x half> [[TMP7]], <8 x half> [[TMP8]], ptr %a) 10573 // CHECK: ret void 10574 void test_vst2q_f16(float16_t *a, float16x8x2_t b) { 10575 vst2q_f16(a, b); 10576 } 10577 10578 // CHECK-LABEL: @test_vst2q_f32( 10579 // CHECK: [[B:%.*]] = alloca %struct.float32x4x2_t, align 16 10580 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16 10581 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0 10582 // CHECK: store [2 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10583 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10584 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 10585 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL]], i64 0, i64 0 10586 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 10587 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 10588 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x2_t, ptr [[__S1]], i32 0, i32 0 10589 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 10590 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 10591 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 10592 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 10593 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 10594 // CHECK: call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> [[TMP7]], <4 x float> [[TMP8]], ptr %a) 10595 // CHECK: ret void 10596 void test_vst2q_f32(float32_t *a, float32x4x2_t b) { 10597 vst2q_f32(a, b); 10598 } 10599 10600 // CHECK-LABEL: @test_vst2q_f64( 10601 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 10602 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 10603 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0 10604 // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10605 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10606 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 10607 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0 10608 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 10609 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 10610 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 10611 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 10612 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 10613 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 10614 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 10615 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 10616 // CHECK: call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a) 10617 // CHECK: ret void 10618 void test_vst2q_f64(float64_t *a, float64x2x2_t b) { 10619 vst2q_f64(a, b); 10620 } 10621 10622 // CHECK-LABEL: @test_vst2q_p8( 10623 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 10624 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16 10625 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0 10626 // CHECK: store [2 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10627 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10628 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 10629 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 10630 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 10631 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x2_t, ptr [[__S1]], i32 0, i32 0 10632 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 10633 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 10634 // CHECK: call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], ptr %a) 10635 // CHECK: ret void 10636 void test_vst2q_p8(poly8_t *a, poly8x16x2_t b) { 10637 vst2q_p8(a, b); 10638 } 10639 10640 // CHECK-LABEL: @test_vst2q_p16( 10641 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16 10642 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16 10643 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0 10644 // CHECK: store [2 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10645 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 10646 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 10647 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 10648 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 10649 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 10650 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x2_t, ptr [[__S1]], i32 0, i32 0 10651 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 10652 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 10653 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 10654 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 10655 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 10656 // CHECK: call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> [[TMP7]], <8 x i16> [[TMP8]], ptr %a) 10657 // CHECK: ret void 10658 void test_vst2q_p16(poly16_t *a, poly16x8x2_t b) { 10659 vst2q_p16(a, b); 10660 } 10661 10662 // CHECK-LABEL: @test_vst2_u8( 10663 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8 10664 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8 10665 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0 10666 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10667 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10668 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 10669 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 10670 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 10671 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x2_t, ptr [[__S1]], i32 0, i32 0 10672 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 10673 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 10674 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a) 10675 // CHECK: ret void 10676 void test_vst2_u8(uint8_t *a, uint8x8x2_t b) { 10677 vst2_u8(a, b); 10678 } 10679 10680 // CHECK-LABEL: @test_vst2_u16( 10681 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8 10682 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8 10683 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0 10684 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10685 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10686 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 10687 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 10688 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 10689 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 10690 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x2_t, ptr [[__S1]], i32 0, i32 0 10691 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 10692 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 10693 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 10694 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 10695 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 10696 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a) 10697 // CHECK: ret void 10698 void test_vst2_u16(uint16_t *a, uint16x4x2_t b) { 10699 vst2_u16(a, b); 10700 } 10701 10702 // CHECK-LABEL: @test_vst2_u32( 10703 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8 10704 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8 10705 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0 10706 // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10707 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10708 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 10709 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 10710 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 10711 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 10712 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x2_t, ptr [[__S1]], i32 0, i32 0 10713 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 10714 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 10715 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 10716 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 10717 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 10718 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a) 10719 // CHECK: ret void 10720 void test_vst2_u32(uint32_t *a, uint32x2x2_t b) { 10721 vst2_u32(a, b); 10722 } 10723 10724 // CHECK-LABEL: @test_vst2_u64( 10725 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8 10726 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8 10727 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0 10728 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10729 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10730 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 10731 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 10732 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 10733 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 10734 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x2_t, ptr [[__S1]], i32 0, i32 0 10735 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 10736 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 10737 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 10738 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 10739 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 10740 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a) 10741 // CHECK: ret void 10742 void test_vst2_u64(uint64_t *a, uint64x1x2_t b) { 10743 vst2_u64(a, b); 10744 } 10745 10746 // CHECK-LABEL: @test_vst2_s8( 10747 // CHECK: [[B:%.*]] = alloca %struct.int8x8x2_t, align 8 10748 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8 10749 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0 10750 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10752 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 10753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 10754 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 10755 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x2_t, ptr [[__S1]], i32 0, i32 0 10756 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 10757 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 10758 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a) 10759 // CHECK: ret void 10760 void test_vst2_s8(int8_t *a, int8x8x2_t b) { 10761 vst2_s8(a, b); 10762 } 10763 10764 // CHECK-LABEL: @test_vst2_s16( 10765 // CHECK: [[B:%.*]] = alloca %struct.int16x4x2_t, align 8 10766 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8 10767 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0 10768 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10769 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10770 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 10771 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 10772 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 10773 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 10774 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x2_t, ptr [[__S1]], i32 0, i32 0 10775 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 10776 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 10777 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 10778 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 10779 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 10780 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a) 10781 // CHECK: ret void 10782 void test_vst2_s16(int16_t *a, int16x4x2_t b) { 10783 vst2_s16(a, b); 10784 } 10785 10786 // CHECK-LABEL: @test_vst2_s32( 10787 // CHECK: [[B:%.*]] = alloca %struct.int32x2x2_t, align 8 10788 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8 10789 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0 10790 // CHECK: store [2 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10791 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10792 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 10793 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 10794 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 10795 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 10796 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x2_t, ptr [[__S1]], i32 0, i32 0 10797 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 10798 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 10799 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 10800 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 10801 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 10802 // CHECK: call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> [[TMP7]], <2 x i32> [[TMP8]], ptr %a) 10803 // CHECK: ret void 10804 void test_vst2_s32(int32_t *a, int32x2x2_t b) { 10805 vst2_s32(a, b); 10806 } 10807 10808 // CHECK-LABEL: @test_vst2_s64( 10809 // CHECK: [[B:%.*]] = alloca %struct.int64x1x2_t, align 8 10810 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8 10811 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0 10812 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10813 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10814 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 10815 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 10816 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 10817 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 10818 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x2_t, ptr [[__S1]], i32 0, i32 0 10819 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 10820 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 10821 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 10822 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 10823 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 10824 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a) 10825 // CHECK: ret void 10826 void test_vst2_s64(int64_t *a, int64x1x2_t b) { 10827 vst2_s64(a, b); 10828 } 10829 10830 // CHECK-LABEL: @test_vst2_f16( 10831 // CHECK: [[B:%.*]] = alloca %struct.float16x4x2_t, align 8 10832 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8 10833 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0 10834 // CHECK: store [2 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10835 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10836 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 10837 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL]], i64 0, i64 0 10838 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 10839 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 10840 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x2_t, ptr [[__S1]], i32 0, i32 0 10841 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 10842 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 10843 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 10844 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 10845 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 10846 // CHECK: call void @llvm.aarch64.neon.st2.v4f16.p0(<4 x half> [[TMP7]], <4 x half> [[TMP8]], ptr %a) 10847 // CHECK: ret void 10848 void test_vst2_f16(float16_t *a, float16x4x2_t b) { 10849 vst2_f16(a, b); 10850 } 10851 10852 // CHECK-LABEL: @test_vst2_f32( 10853 // CHECK: [[B:%.*]] = alloca %struct.float32x2x2_t, align 8 10854 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8 10855 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0 10856 // CHECK: store [2 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10857 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10858 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 10859 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL]], i64 0, i64 0 10860 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 10861 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 10862 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x2_t, ptr [[__S1]], i32 0, i32 0 10863 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 10864 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 10865 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 10866 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 10867 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 10868 // CHECK: call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> [[TMP7]], <2 x float> [[TMP8]], ptr %a) 10869 // CHECK: ret void 10870 void test_vst2_f32(float32_t *a, float32x2x2_t b) { 10871 vst2_f32(a, b); 10872 } 10873 10874 // CHECK-LABEL: @test_vst2_f64( 10875 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 10876 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 10877 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0 10878 // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10879 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10880 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 10881 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0 10882 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 10883 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 10884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 10885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 10886 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 10887 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 10888 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 10889 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 10890 // CHECK: call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a) 10891 // CHECK: ret void 10892 void test_vst2_f64(float64_t *a, float64x1x2_t b) { 10893 vst2_f64(a, b); 10894 } 10895 10896 // CHECK-LABEL: @test_vst2_p8( 10897 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8 10898 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8 10899 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0 10900 // CHECK: store [2 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10901 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10902 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 10903 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 10904 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 10905 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x2_t, ptr [[__S1]], i32 0, i32 0 10906 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 10907 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 10908 // CHECK: call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], ptr %a) 10909 // CHECK: ret void 10910 void test_vst2_p8(poly8_t *a, poly8x8x2_t b) { 10911 vst2_p8(a, b); 10912 } 10913 10914 // CHECK-LABEL: @test_vst2_p16( 10915 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8 10916 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8 10917 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0 10918 // CHECK: store [2 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 10919 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 10920 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 10921 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 10922 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 10923 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 10924 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x2_t, ptr [[__S1]], i32 0, i32 0 10925 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 10926 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 10927 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 10928 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 10929 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 10930 // CHECK: call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> [[TMP7]], <4 x i16> [[TMP8]], ptr %a) 10931 // CHECK: ret void 10932 void test_vst2_p16(poly16_t *a, poly16x4x2_t b) { 10933 vst2_p16(a, b); 10934 } 10935 10936 // CHECK-LABEL: @test_vst3q_u8( 10937 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 10938 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16 10939 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0 10940 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10941 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 10942 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 10943 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 10944 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 10945 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 10946 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 10947 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 10948 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x3_t, ptr [[__S1]], i32 0, i32 0 10949 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 10950 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 10951 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a) 10952 // CHECK: ret void 10953 void test_vst3q_u8(uint8_t *a, uint8x16x3_t b) { 10954 vst3q_u8(a, b); 10955 } 10956 10957 // CHECK-LABEL: @test_vst3q_u16( 10958 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16 10959 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16 10960 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0 10961 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10962 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 10963 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 10964 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 10965 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 10966 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 10967 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 10968 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 10969 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 10970 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 10971 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x3_t, ptr [[__S1]], i32 0, i32 0 10972 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 10973 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 10974 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 10975 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 10976 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 10977 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 10978 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a) 10979 // CHECK: ret void 10980 void test_vst3q_u16(uint16_t *a, uint16x8x3_t b) { 10981 vst3q_u16(a, b); 10982 } 10983 10984 // CHECK-LABEL: @test_vst3q_u32( 10985 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16 10986 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16 10987 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0 10988 // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 10989 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 10990 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 10991 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 10992 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 10993 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 10994 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 10995 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 10996 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 10997 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 10998 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x3_t, ptr [[__S1]], i32 0, i32 0 10999 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 11000 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 11001 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 11002 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11003 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11004 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 11005 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a) 11006 // CHECK: ret void 11007 void test_vst3q_u32(uint32_t *a, uint32x4x3_t b) { 11008 vst3q_u32(a, b); 11009 } 11010 11011 // CHECK-LABEL: @test_vst3q_u64( 11012 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16 11013 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16 11014 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0 11015 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11016 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11017 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 11018 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 11019 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 11020 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11021 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 11022 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 11023 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 11024 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11025 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x3_t, ptr [[__S1]], i32 0, i32 0 11026 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 11027 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 11028 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 11029 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11030 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11031 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 11032 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a) 11033 // CHECK: ret void 11034 void test_vst3q_u64(uint64_t *a, uint64x2x3_t b) { 11035 vst3q_u64(a, b); 11036 } 11037 11038 // CHECK-LABEL: @test_vst3q_s8( 11039 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 11040 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16 11041 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0 11042 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11043 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11044 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 11045 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 11046 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 11047 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 11048 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 11049 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 11050 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x3_t, ptr [[__S1]], i32 0, i32 0 11051 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 11052 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 11053 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a) 11054 // CHECK: ret void 11055 void test_vst3q_s8(int8_t *a, int8x16x3_t b) { 11056 vst3q_s8(a, b); 11057 } 11058 11059 // CHECK-LABEL: @test_vst3q_s16( 11060 // CHECK: [[B:%.*]] = alloca %struct.int16x8x3_t, align 16 11061 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16 11062 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0 11063 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11064 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11065 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 11066 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 11067 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 11068 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11069 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 11070 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 11071 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 11072 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11073 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x3_t, ptr [[__S1]], i32 0, i32 0 11074 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 11075 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 11076 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 11077 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11078 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11079 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 11080 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a) 11081 // CHECK: ret void 11082 void test_vst3q_s16(int16_t *a, int16x8x3_t b) { 11083 vst3q_s16(a, b); 11084 } 11085 11086 // CHECK-LABEL: @test_vst3q_s32( 11087 // CHECK: [[B:%.*]] = alloca %struct.int32x4x3_t, align 16 11088 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16 11089 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0 11090 // CHECK: store [3 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11091 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11092 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 11093 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 11094 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 11095 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11096 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 11097 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 11098 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 11099 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11100 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x3_t, ptr [[__S1]], i32 0, i32 0 11101 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 11102 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 11103 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 11104 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11105 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11106 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 11107 // CHECK: call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> [[TMP9]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], ptr %a) 11108 // CHECK: ret void 11109 void test_vst3q_s32(int32_t *a, int32x4x3_t b) { 11110 vst3q_s32(a, b); 11111 } 11112 11113 // CHECK-LABEL: @test_vst3q_s64( 11114 // CHECK: [[B:%.*]] = alloca %struct.int64x2x3_t, align 16 11115 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16 11116 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0 11117 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11118 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11119 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 11120 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 11121 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 11122 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 11124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 11125 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 11126 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11127 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x3_t, ptr [[__S1]], i32 0, i32 0 11128 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 11129 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 11130 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 11131 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11132 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11133 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 11134 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a) 11135 // CHECK: ret void 11136 void test_vst3q_s64(int64_t *a, int64x2x3_t b) { 11137 vst3q_s64(a, b); 11138 } 11139 11140 // CHECK-LABEL: @test_vst3q_f16( 11141 // CHECK: [[B:%.*]] = alloca %struct.float16x8x3_t, align 16 11142 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16 11143 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0 11144 // CHECK: store [3 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11145 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11146 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 11147 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL]], i64 0, i64 0 11148 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 11149 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 11150 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 11151 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 11152 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 11153 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 11154 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x3_t, ptr [[__S1]], i32 0, i32 0 11155 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 11156 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 11157 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 11158 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 11159 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 11160 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half> 11161 // CHECK: call void @llvm.aarch64.neon.st3.v8f16.p0(<8 x half> [[TMP9]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], ptr %a) 11162 // CHECK: ret void 11163 void test_vst3q_f16(float16_t *a, float16x8x3_t b) { 11164 vst3q_f16(a, b); 11165 } 11166 11167 // CHECK-LABEL: @test_vst3q_f32( 11168 // CHECK: [[B:%.*]] = alloca %struct.float32x4x3_t, align 16 11169 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16 11170 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0 11171 // CHECK: store [3 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11172 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11173 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 11174 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL]], i64 0, i64 0 11175 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 11176 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 11177 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 11178 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 11179 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 11180 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 11181 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x3_t, ptr [[__S1]], i32 0, i32 0 11182 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 11183 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 11184 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 11185 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 11186 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 11187 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 11188 // CHECK: call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], ptr %a) 11189 // CHECK: ret void 11190 void test_vst3q_f32(float32_t *a, float32x4x3_t b) { 11191 vst3q_f32(a, b); 11192 } 11193 11194 // CHECK-LABEL: @test_vst3q_f64( 11195 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 11196 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 11197 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0 11198 // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11199 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11200 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 11201 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0 11202 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 11203 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 11204 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 11205 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 11206 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 11207 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 11208 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 11209 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 11210 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 11211 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 11212 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 11213 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 11214 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 11215 // CHECK: call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a) 11216 // CHECK: ret void 11217 void test_vst3q_f64(float64_t *a, float64x2x3_t b) { 11218 vst3q_f64(a, b); 11219 } 11220 11221 // CHECK-LABEL: @test_vst3q_p8( 11222 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 11223 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16 11224 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0 11225 // CHECK: store [3 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11226 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11227 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 11228 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 11229 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 11230 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 11231 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 11232 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 11233 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x3_t, ptr [[__S1]], i32 0, i32 0 11234 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 11235 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 11236 // CHECK: call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], ptr %a) 11237 // CHECK: ret void 11238 void test_vst3q_p8(poly8_t *a, poly8x16x3_t b) { 11239 vst3q_p8(a, b); 11240 } 11241 11242 // CHECK-LABEL: @test_vst3q_p16( 11243 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16 11244 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16 11245 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0 11246 // CHECK: store [3 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11247 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 11248 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 11249 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 11250 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 11251 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11252 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 11253 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 11254 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 11255 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11256 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x3_t, ptr [[__S1]], i32 0, i32 0 11257 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 11258 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 11259 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 11260 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11261 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11262 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 11263 // CHECK: call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> [[TMP9]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], ptr %a) 11264 // CHECK: ret void 11265 void test_vst3q_p16(poly16_t *a, poly16x8x3_t b) { 11266 vst3q_p16(a, b); 11267 } 11268 11269 // CHECK-LABEL: @test_vst3_u8( 11270 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8 11271 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8 11272 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0 11273 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11274 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11275 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 11276 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 11277 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 11278 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 11279 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 11280 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 11281 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x3_t, ptr [[__S1]], i32 0, i32 0 11282 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 11283 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 11284 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a) 11285 // CHECK: ret void 11286 void test_vst3_u8(uint8_t *a, uint8x8x3_t b) { 11287 vst3_u8(a, b); 11288 } 11289 11290 // CHECK-LABEL: @test_vst3_u16( 11291 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8 11292 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8 11293 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0 11294 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11295 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11296 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 11297 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 11298 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 11299 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 11300 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 11301 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 11302 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 11303 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 11304 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x3_t, ptr [[__S1]], i32 0, i32 0 11305 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 11306 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 11307 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 11308 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 11309 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 11310 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 11311 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a) 11312 // CHECK: ret void 11313 void test_vst3_u16(uint16_t *a, uint16x4x3_t b) { 11314 vst3_u16(a, b); 11315 } 11316 11317 // CHECK-LABEL: @test_vst3_u32( 11318 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8 11319 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8 11320 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0 11321 // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11322 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11323 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 11324 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 11325 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 11326 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 11327 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 11328 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 11329 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 11330 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 11331 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x3_t, ptr [[__S1]], i32 0, i32 0 11332 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 11333 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 11334 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 11335 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 11336 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 11337 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 11338 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a) 11339 // CHECK: ret void 11340 void test_vst3_u32(uint32_t *a, uint32x2x3_t b) { 11341 vst3_u32(a, b); 11342 } 11343 11344 // CHECK-LABEL: @test_vst3_u64( 11345 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8 11346 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8 11347 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0 11348 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11349 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11350 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 11351 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 11352 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 11353 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 11354 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 11355 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 11356 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 11357 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 11358 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x3_t, ptr [[__S1]], i32 0, i32 0 11359 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 11360 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 11361 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 11362 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 11363 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 11364 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 11365 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a) 11366 // CHECK: ret void 11367 void test_vst3_u64(uint64_t *a, uint64x1x3_t b) { 11368 vst3_u64(a, b); 11369 } 11370 11371 // CHECK-LABEL: @test_vst3_s8( 11372 // CHECK: [[B:%.*]] = alloca %struct.int8x8x3_t, align 8 11373 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8 11374 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0 11375 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11376 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11377 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 11378 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 11379 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 11380 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 11381 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 11382 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 11383 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x3_t, ptr [[__S1]], i32 0, i32 0 11384 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 11385 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 11386 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a) 11387 // CHECK: ret void 11388 void test_vst3_s8(int8_t *a, int8x8x3_t b) { 11389 vst3_s8(a, b); 11390 } 11391 11392 // CHECK-LABEL: @test_vst3_s16( 11393 // CHECK: [[B:%.*]] = alloca %struct.int16x4x3_t, align 8 11394 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8 11395 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0 11396 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11397 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11398 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 11399 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 11400 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 11401 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 11402 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 11403 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 11404 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 11405 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 11406 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x3_t, ptr [[__S1]], i32 0, i32 0 11407 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 11408 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 11409 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 11410 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 11411 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 11412 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 11413 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a) 11414 // CHECK: ret void 11415 void test_vst3_s16(int16_t *a, int16x4x3_t b) { 11416 vst3_s16(a, b); 11417 } 11418 11419 // CHECK-LABEL: @test_vst3_s32( 11420 // CHECK: [[B:%.*]] = alloca %struct.int32x2x3_t, align 8 11421 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8 11422 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0 11423 // CHECK: store [3 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11424 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11425 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 11426 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 11427 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 11428 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 11429 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 11430 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 11431 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 11432 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 11433 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x3_t, ptr [[__S1]], i32 0, i32 0 11434 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 11435 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 11436 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 11437 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 11438 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 11439 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 11440 // CHECK: call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], ptr %a) 11441 // CHECK: ret void 11442 void test_vst3_s32(int32_t *a, int32x2x3_t b) { 11443 vst3_s32(a, b); 11444 } 11445 11446 // CHECK-LABEL: @test_vst3_s64( 11447 // CHECK: [[B:%.*]] = alloca %struct.int64x1x3_t, align 8 11448 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8 11449 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0 11450 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11452 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 11453 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 11454 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 11455 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 11456 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 11457 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 11458 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 11459 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 11460 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x3_t, ptr [[__S1]], i32 0, i32 0 11461 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 11462 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 11463 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 11464 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 11465 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 11466 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 11467 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a) 11468 // CHECK: ret void 11469 void test_vst3_s64(int64_t *a, int64x1x3_t b) { 11470 vst3_s64(a, b); 11471 } 11472 11473 // CHECK-LABEL: @test_vst3_f16( 11474 // CHECK: [[B:%.*]] = alloca %struct.float16x4x3_t, align 8 11475 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8 11476 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0 11477 // CHECK: store [3 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11478 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11479 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 11480 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL]], i64 0, i64 0 11481 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 11482 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 11483 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 11484 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 11485 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 11486 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 11487 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x3_t, ptr [[__S1]], i32 0, i32 0 11488 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 11489 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 11490 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 11491 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 11492 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 11493 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half> 11494 // CHECK: call void @llvm.aarch64.neon.st3.v4f16.p0(<4 x half> [[TMP9]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], ptr %a) 11495 // CHECK: ret void 11496 void test_vst3_f16(float16_t *a, float16x4x3_t b) { 11497 vst3_f16(a, b); 11498 } 11499 11500 // CHECK-LABEL: @test_vst3_f32( 11501 // CHECK: [[B:%.*]] = alloca %struct.float32x2x3_t, align 8 11502 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8 11503 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0 11504 // CHECK: store [3 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11505 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11506 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 11507 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL]], i64 0, i64 0 11508 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 11509 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 11510 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 11511 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 11512 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 11513 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 11514 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x3_t, ptr [[__S1]], i32 0, i32 0 11515 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 11516 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 11517 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 11518 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 11519 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 11520 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 11521 // CHECK: call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], ptr %a) 11522 // CHECK: ret void 11523 void test_vst3_f32(float32_t *a, float32x2x3_t b) { 11524 vst3_f32(a, b); 11525 } 11526 11527 // CHECK-LABEL: @test_vst3_f64( 11528 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 11529 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 11530 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0 11531 // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11532 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11533 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 11534 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0 11535 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 11536 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 11537 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 11538 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 11539 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 11540 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 11541 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 11542 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 11543 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 11544 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 11545 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 11546 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 11547 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 11548 // CHECK: call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a) 11549 // CHECK: ret void 11550 void test_vst3_f64(float64_t *a, float64x1x3_t b) { 11551 vst3_f64(a, b); 11552 } 11553 11554 // CHECK-LABEL: @test_vst3_p8( 11555 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8 11556 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8 11557 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0 11558 // CHECK: store [3 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11559 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11560 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 11561 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 11562 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 11563 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 11564 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 11565 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 11566 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x3_t, ptr [[__S1]], i32 0, i32 0 11567 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 11568 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 11569 // CHECK: call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], ptr %a) 11570 // CHECK: ret void 11571 void test_vst3_p8(poly8_t *a, poly8x8x3_t b) { 11572 vst3_p8(a, b); 11573 } 11574 11575 // CHECK-LABEL: @test_vst3_p16( 11576 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8 11577 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8 11578 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0 11579 // CHECK: store [3 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11580 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 11581 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 11582 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 11583 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 11584 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 11585 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 11586 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 11587 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 11588 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 11589 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x3_t, ptr [[__S1]], i32 0, i32 0 11590 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 11591 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 11592 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 11593 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 11594 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 11595 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 11596 // CHECK: call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> [[TMP9]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], ptr %a) 11597 // CHECK: ret void 11598 void test_vst3_p16(poly16_t *a, poly16x4x3_t b) { 11599 vst3_p16(a, b); 11600 } 11601 11602 // CHECK-LABEL: @test_vst4q_u8( 11603 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 11604 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16 11605 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0 11606 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11607 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11608 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 11609 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 11610 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 11611 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 11612 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 11613 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 11614 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 11615 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 11616 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 11617 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x16x4_t, ptr [[__S1]], i32 0, i32 0 11618 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 11619 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 11620 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a) 11621 // CHECK: ret void 11622 void test_vst4q_u8(uint8_t *a, uint8x16x4_t b) { 11623 vst4q_u8(a, b); 11624 } 11625 11626 // CHECK-LABEL: @test_vst4q_u16( 11627 // CHECK: [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16 11628 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16 11629 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0 11630 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11631 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11632 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 11633 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 11634 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 11635 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11636 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 11637 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 11638 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 11639 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11640 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 11641 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 11642 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 11643 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 11644 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x8x4_t, ptr [[__S1]], i32 0, i32 0 11645 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 11646 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 11647 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 11648 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11649 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11650 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 11651 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 11652 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a) 11653 // CHECK: ret void 11654 void test_vst4q_u16(uint16_t *a, uint16x8x4_t b) { 11655 vst4q_u16(a, b); 11656 } 11657 11658 // CHECK-LABEL: @test_vst4q_u32( 11659 // CHECK: [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16 11660 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16 11661 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0 11662 // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11663 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11664 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 11665 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 11666 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 11667 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11668 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 11669 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 11670 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 11671 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11672 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 11673 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 11674 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 11675 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 11676 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x4x4_t, ptr [[__S1]], i32 0, i32 0 11677 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 11678 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 11679 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 11680 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11681 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11682 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 11683 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 11684 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a) 11685 // CHECK: ret void 11686 void test_vst4q_u32(uint32_t *a, uint32x4x4_t b) { 11687 vst4q_u32(a, b); 11688 } 11689 11690 // CHECK-LABEL: @test_vst4q_u64( 11691 // CHECK: [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16 11692 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16 11693 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0 11694 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11695 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11696 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 11697 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 11698 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 11699 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11700 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 11701 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 11702 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 11703 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11704 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 11705 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 11706 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 11707 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 11708 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x2x4_t, ptr [[__S1]], i32 0, i32 0 11709 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 11710 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 11711 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 11712 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11713 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11714 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 11715 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 11716 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a) 11717 // CHECK: ret void 11718 void test_vst4q_u64(uint64_t *a, uint64x2x4_t b) { 11719 vst4q_u64(a, b); 11720 } 11721 11722 // CHECK-LABEL: @test_vst4q_s8( 11723 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 11724 // CHECK: [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16 11725 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0 11726 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11727 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11728 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 11729 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 11730 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 11731 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 11732 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 11733 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 11734 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 11735 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 11736 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 11737 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x16x4_t, ptr [[__S1]], i32 0, i32 0 11738 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 11739 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 11740 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a) 11741 // CHECK: ret void 11742 void test_vst4q_s8(int8_t *a, int8x16x4_t b) { 11743 vst4q_s8(a, b); 11744 } 11745 11746 // CHECK-LABEL: @test_vst4q_s16( 11747 // CHECK: [[B:%.*]] = alloca %struct.int16x8x4_t, align 16 11748 // CHECK: [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16 11749 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0 11750 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11751 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11752 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 11753 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 11754 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 11755 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11756 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 11757 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 11758 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 11759 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11760 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 11761 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 11762 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 11763 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 11764 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x8x4_t, ptr [[__S1]], i32 0, i32 0 11765 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 11766 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 11767 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 11768 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11769 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11770 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 11771 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 11772 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a) 11773 // CHECK: ret void 11774 void test_vst4q_s16(int16_t *a, int16x8x4_t b) { 11775 vst4q_s16(a, b); 11776 } 11777 11778 // CHECK-LABEL: @test_vst4q_s32( 11779 // CHECK: [[B:%.*]] = alloca %struct.int32x4x4_t, align 16 11780 // CHECK: [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16 11781 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0 11782 // CHECK: store [4 x <4 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11783 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11784 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 11785 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL]], i64 0, i64 0 11786 // CHECK: [[TMP3:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 16 11787 // CHECK: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <16 x i8> 11788 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 11789 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL1]], i64 0, i64 1 11790 // CHECK: [[TMP5:%.*]] = load <4 x i32>, ptr [[ARRAYIDX2]], align 16 11791 // CHECK: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8> 11792 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 11793 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL3]], i64 0, i64 2 11794 // CHECK: [[TMP7:%.*]] = load <4 x i32>, ptr [[ARRAYIDX4]], align 16 11795 // CHECK: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8> 11796 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x4x4_t, ptr [[__S1]], i32 0, i32 0 11797 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], ptr [[VAL5]], i64 0, i64 3 11798 // CHECK: [[TMP9:%.*]] = load <4 x i32>, ptr [[ARRAYIDX6]], align 16 11799 // CHECK: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8> 11800 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x i32> 11801 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32> 11802 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32> 11803 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32> 11804 // CHECK: call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], ptr %a) 11805 // CHECK: ret void 11806 void test_vst4q_s32(int32_t *a, int32x4x4_t b) { 11807 vst4q_s32(a, b); 11808 } 11809 11810 // CHECK-LABEL: @test_vst4q_s64( 11811 // CHECK: [[B:%.*]] = alloca %struct.int64x2x4_t, align 16 11812 // CHECK: [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16 11813 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0 11814 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11815 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11816 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 11817 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 11818 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 11819 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 11820 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 11821 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 11822 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 11823 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 11824 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 11825 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 11826 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 11827 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 11828 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x2x4_t, ptr [[__S1]], i32 0, i32 0 11829 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 11830 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 11831 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 11832 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 11833 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 11834 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 11835 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 11836 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a) 11837 // CHECK: ret void 11838 void test_vst4q_s64(int64_t *a, int64x2x4_t b) { 11839 vst4q_s64(a, b); 11840 } 11841 11842 // CHECK-LABEL: @test_vst4q_f16( 11843 // CHECK: [[B:%.*]] = alloca %struct.float16x8x4_t, align 16 11844 // CHECK: [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16 11845 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0 11846 // CHECK: store [4 x <8 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11847 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11848 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 11849 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL]], i64 0, i64 0 11850 // CHECK: [[TMP3:%.*]] = load <8 x half>, ptr [[ARRAYIDX]], align 16 11851 // CHECK: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <16 x i8> 11852 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 11853 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL1]], i64 0, i64 1 11854 // CHECK: [[TMP5:%.*]] = load <8 x half>, ptr [[ARRAYIDX2]], align 16 11855 // CHECK: [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> 11856 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 11857 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL3]], i64 0, i64 2 11858 // CHECK: [[TMP7:%.*]] = load <8 x half>, ptr [[ARRAYIDX4]], align 16 11859 // CHECK: [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8> 11860 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x8x4_t, ptr [[__S1]], i32 0, i32 0 11861 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], ptr [[VAL5]], i64 0, i64 3 11862 // CHECK: [[TMP9:%.*]] = load <8 x half>, ptr [[ARRAYIDX6]], align 16 11863 // CHECK: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8> 11864 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x half> 11865 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half> 11866 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half> 11867 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half> 11868 // CHECK: call void @llvm.aarch64.neon.st4.v8f16.p0(<8 x half> [[TMP11]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], ptr %a) 11869 // CHECK: ret void 11870 void test_vst4q_f16(float16_t *a, float16x8x4_t b) { 11871 vst4q_f16(a, b); 11872 } 11873 11874 // CHECK-LABEL: @test_vst4q_f32( 11875 // CHECK: [[B:%.*]] = alloca %struct.float32x4x4_t, align 16 11876 // CHECK: [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16 11877 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0 11878 // CHECK: store [4 x <4 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11879 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11880 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 11881 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL]], i64 0, i64 0 11882 // CHECK: [[TMP3:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 16 11883 // CHECK: [[TMP4:%.*]] = bitcast <4 x float> [[TMP3]] to <16 x i8> 11884 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 11885 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL1]], i64 0, i64 1 11886 // CHECK: [[TMP5:%.*]] = load <4 x float>, ptr [[ARRAYIDX2]], align 16 11887 // CHECK: [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8> 11888 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 11889 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL3]], i64 0, i64 2 11890 // CHECK: [[TMP7:%.*]] = load <4 x float>, ptr [[ARRAYIDX4]], align 16 11891 // CHECK: [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8> 11892 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x4x4_t, ptr [[__S1]], i32 0, i32 0 11893 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], ptr [[VAL5]], i64 0, i64 3 11894 // CHECK: [[TMP9:%.*]] = load <4 x float>, ptr [[ARRAYIDX6]], align 16 11895 // CHECK: [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8> 11896 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <4 x float> 11897 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float> 11898 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float> 11899 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float> 11900 // CHECK: call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> [[TMP11]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], ptr %a) 11901 // CHECK: ret void 11902 void test_vst4q_f32(float32_t *a, float32x4x4_t b) { 11903 vst4q_f32(a, b); 11904 } 11905 11906 // CHECK-LABEL: @test_vst4q_f64( 11907 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 11908 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 11909 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0 11910 // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11911 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11912 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 11913 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0 11914 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 11915 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 11916 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 11917 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 11918 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 11919 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 11920 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 11921 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 11922 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 11923 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 11924 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 11925 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3 11926 // CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16 11927 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 11928 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 11929 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 11930 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 11931 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 11932 // CHECK: call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a) 11933 // CHECK: ret void 11934 void test_vst4q_f64(float64_t *a, float64x2x4_t b) { 11935 vst4q_f64(a, b); 11936 } 11937 11938 // CHECK-LABEL: @test_vst4q_p8( 11939 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 11940 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16 11941 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0 11942 // CHECK: store [4 x <16 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11943 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11944 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 11945 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL]], i64 0, i64 0 11946 // CHECK: [[TMP2:%.*]] = load <16 x i8>, ptr [[ARRAYIDX]], align 16 11947 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 11948 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL1]], i64 0, i64 1 11949 // CHECK: [[TMP3:%.*]] = load <16 x i8>, ptr [[ARRAYIDX2]], align 16 11950 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 11951 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL3]], i64 0, i64 2 11952 // CHECK: [[TMP4:%.*]] = load <16 x i8>, ptr [[ARRAYIDX4]], align 16 11953 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x16x4_t, ptr [[__S1]], i32 0, i32 0 11954 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], ptr [[VAL5]], i64 0, i64 3 11955 // CHECK: [[TMP5:%.*]] = load <16 x i8>, ptr [[ARRAYIDX6]], align 16 11956 // CHECK: call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], ptr %a) 11957 // CHECK: ret void 11958 void test_vst4q_p8(poly8_t *a, poly8x16x4_t b) { 11959 vst4q_p8(a, b); 11960 } 11961 11962 // CHECK-LABEL: @test_vst4q_p16( 11963 // CHECK: [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16 11964 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16 11965 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0 11966 // CHECK: store [4 x <8 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 11967 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 11968 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 11969 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL]], i64 0, i64 0 11970 // CHECK: [[TMP3:%.*]] = load <8 x i16>, ptr [[ARRAYIDX]], align 16 11971 // CHECK: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <16 x i8> 11972 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 11973 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL1]], i64 0, i64 1 11974 // CHECK: [[TMP5:%.*]] = load <8 x i16>, ptr [[ARRAYIDX2]], align 16 11975 // CHECK: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8> 11976 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 11977 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL3]], i64 0, i64 2 11978 // CHECK: [[TMP7:%.*]] = load <8 x i16>, ptr [[ARRAYIDX4]], align 16 11979 // CHECK: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8> 11980 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x8x4_t, ptr [[__S1]], i32 0, i32 0 11981 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], ptr [[VAL5]], i64 0, i64 3 11982 // CHECK: [[TMP9:%.*]] = load <8 x i16>, ptr [[ARRAYIDX6]], align 16 11983 // CHECK: [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8> 11984 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <8 x i16> 11985 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> 11986 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16> 11987 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16> 11988 // CHECK: call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> [[TMP11]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], ptr %a) 11989 // CHECK: ret void 11990 void test_vst4q_p16(poly16_t *a, poly16x8x4_t b) { 11991 vst4q_p16(a, b); 11992 } 11993 11994 // CHECK-LABEL: @test_vst4_u8( 11995 // CHECK: [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8 11996 // CHECK: [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8 11997 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0 11998 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 11999 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12000 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 12001 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 12002 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 12003 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 12004 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 12005 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 12006 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 12007 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 12008 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 12009 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint8x8x4_t, ptr [[__S1]], i32 0, i32 0 12010 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 12011 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 12012 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a) 12013 // CHECK: ret void 12014 void test_vst4_u8(uint8_t *a, uint8x8x4_t b) { 12015 vst4_u8(a, b); 12016 } 12017 12018 // CHECK-LABEL: @test_vst4_u16( 12019 // CHECK: [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8 12020 // CHECK: [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8 12021 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0 12022 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12023 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12024 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 12025 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 12026 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 12027 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12028 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 12029 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 12030 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 12031 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12032 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 12033 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 12034 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 12035 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12036 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint16x4x4_t, ptr [[__S1]], i32 0, i32 0 12037 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 12038 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 12039 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 12040 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12041 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12042 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12043 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 12044 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a) 12045 // CHECK: ret void 12046 void test_vst4_u16(uint16_t *a, uint16x4x4_t b) { 12047 vst4_u16(a, b); 12048 } 12049 12050 // CHECK-LABEL: @test_vst4_u32( 12051 // CHECK: [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8 12052 // CHECK: [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8 12053 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0 12054 // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12055 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12056 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 12057 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 12058 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 12059 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12060 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 12061 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 12062 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 12063 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12064 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 12065 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 12066 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 12067 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12068 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint32x2x4_t, ptr [[__S1]], i32 0, i32 0 12069 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 12070 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 12071 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 12072 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12073 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12074 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12075 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 12076 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a) 12077 // CHECK: ret void 12078 void test_vst4_u32(uint32_t *a, uint32x2x4_t b) { 12079 vst4_u32(a, b); 12080 } 12081 12082 // CHECK-LABEL: @test_vst4_u64( 12083 // CHECK: [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8 12084 // CHECK: [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8 12085 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0 12086 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12087 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12088 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 12089 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 12090 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 12091 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12092 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 12093 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 12094 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 12095 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12096 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 12097 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 12098 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 12099 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12100 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.uint64x1x4_t, ptr [[__S1]], i32 0, i32 0 12101 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 12102 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 12103 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 12104 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12105 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12106 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12107 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 12108 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a) 12109 // CHECK: ret void 12110 void test_vst4_u64(uint64_t *a, uint64x1x4_t b) { 12111 vst4_u64(a, b); 12112 } 12113 12114 // CHECK-LABEL: @test_vst4_s8( 12115 // CHECK: [[B:%.*]] = alloca %struct.int8x8x4_t, align 8 12116 // CHECK: [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8 12117 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0 12118 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12119 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12120 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 12121 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 12122 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 12123 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 12124 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 12125 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 12126 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 12127 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 12128 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 12129 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int8x8x4_t, ptr [[__S1]], i32 0, i32 0 12130 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 12131 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 12132 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a) 12133 // CHECK: ret void 12134 void test_vst4_s8(int8_t *a, int8x8x4_t b) { 12135 vst4_s8(a, b); 12136 } 12137 12138 // CHECK-LABEL: @test_vst4_s16( 12139 // CHECK: [[B:%.*]] = alloca %struct.int16x4x4_t, align 8 12140 // CHECK: [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8 12141 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0 12142 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12143 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12144 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 12145 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 12146 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 12147 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12148 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 12149 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 12150 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 12151 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12152 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 12153 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 12154 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 12155 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12156 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int16x4x4_t, ptr [[__S1]], i32 0, i32 0 12157 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 12158 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 12159 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 12160 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12161 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12162 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12163 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 12164 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a) 12165 // CHECK: ret void 12166 void test_vst4_s16(int16_t *a, int16x4x4_t b) { 12167 vst4_s16(a, b); 12168 } 12169 12170 // CHECK-LABEL: @test_vst4_s32( 12171 // CHECK: [[B:%.*]] = alloca %struct.int32x2x4_t, align 8 12172 // CHECK: [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8 12173 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0 12174 // CHECK: store [4 x <2 x i32>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12175 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12176 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 12177 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL]], i64 0, i64 0 12178 // CHECK: [[TMP3:%.*]] = load <2 x i32>, ptr [[ARRAYIDX]], align 8 12179 // CHECK: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <8 x i8> 12180 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 12181 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL1]], i64 0, i64 1 12182 // CHECK: [[TMP5:%.*]] = load <2 x i32>, ptr [[ARRAYIDX2]], align 8 12183 // CHECK: [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8> 12184 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 12185 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL3]], i64 0, i64 2 12186 // CHECK: [[TMP7:%.*]] = load <2 x i32>, ptr [[ARRAYIDX4]], align 8 12187 // CHECK: [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8> 12188 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int32x2x4_t, ptr [[__S1]], i32 0, i32 0 12189 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], ptr [[VAL5]], i64 0, i64 3 12190 // CHECK: [[TMP9:%.*]] = load <2 x i32>, ptr [[ARRAYIDX6]], align 8 12191 // CHECK: [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8> 12192 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x i32> 12193 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32> 12194 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32> 12195 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32> 12196 // CHECK: call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> [[TMP11]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], ptr %a) 12197 // CHECK: ret void 12198 void test_vst4_s32(int32_t *a, int32x2x4_t b) { 12199 vst4_s32(a, b); 12200 } 12201 12202 // CHECK-LABEL: @test_vst4_s64( 12203 // CHECK: [[B:%.*]] = alloca %struct.int64x1x4_t, align 8 12204 // CHECK: [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8 12205 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0 12206 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12207 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12208 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 12209 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 12210 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 12211 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12212 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 12213 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 12214 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 12215 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12216 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 12217 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 12218 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 12219 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12220 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.int64x1x4_t, ptr [[__S1]], i32 0, i32 0 12221 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 12222 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 12223 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 12224 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12225 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12226 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12227 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 12228 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a) 12229 // CHECK: ret void 12230 void test_vst4_s64(int64_t *a, int64x1x4_t b) { 12231 vst4_s64(a, b); 12232 } 12233 12234 // CHECK-LABEL: @test_vst4_f16( 12235 // CHECK: [[B:%.*]] = alloca %struct.float16x4x4_t, align 8 12236 // CHECK: [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8 12237 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0 12238 // CHECK: store [4 x <4 x half>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12239 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12240 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 12241 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL]], i64 0, i64 0 12242 // CHECK: [[TMP3:%.*]] = load <4 x half>, ptr [[ARRAYIDX]], align 8 12243 // CHECK: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <8 x i8> 12244 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 12245 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL1]], i64 0, i64 1 12246 // CHECK: [[TMP5:%.*]] = load <4 x half>, ptr [[ARRAYIDX2]], align 8 12247 // CHECK: [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> 12248 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 12249 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL3]], i64 0, i64 2 12250 // CHECK: [[TMP7:%.*]] = load <4 x half>, ptr [[ARRAYIDX4]], align 8 12251 // CHECK: [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8> 12252 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float16x4x4_t, ptr [[__S1]], i32 0, i32 0 12253 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], ptr [[VAL5]], i64 0, i64 3 12254 // CHECK: [[TMP9:%.*]] = load <4 x half>, ptr [[ARRAYIDX6]], align 8 12255 // CHECK: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8> 12256 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x half> 12257 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half> 12258 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half> 12259 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half> 12260 // CHECK: call void @llvm.aarch64.neon.st4.v4f16.p0(<4 x half> [[TMP11]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], ptr %a) 12261 // CHECK: ret void 12262 void test_vst4_f16(float16_t *a, float16x4x4_t b) { 12263 vst4_f16(a, b); 12264 } 12265 12266 // CHECK-LABEL: @test_vst4_f32( 12267 // CHECK: [[B:%.*]] = alloca %struct.float32x2x4_t, align 8 12268 // CHECK: [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8 12269 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0 12270 // CHECK: store [4 x <2 x float>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12271 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12272 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 12273 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL]], i64 0, i64 0 12274 // CHECK: [[TMP3:%.*]] = load <2 x float>, ptr [[ARRAYIDX]], align 8 12275 // CHECK: [[TMP4:%.*]] = bitcast <2 x float> [[TMP3]] to <8 x i8> 12276 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 12277 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL1]], i64 0, i64 1 12278 // CHECK: [[TMP5:%.*]] = load <2 x float>, ptr [[ARRAYIDX2]], align 8 12279 // CHECK: [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8> 12280 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 12281 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL3]], i64 0, i64 2 12282 // CHECK: [[TMP7:%.*]] = load <2 x float>, ptr [[ARRAYIDX4]], align 8 12283 // CHECK: [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8> 12284 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float32x2x4_t, ptr [[__S1]], i32 0, i32 0 12285 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], ptr [[VAL5]], i64 0, i64 3 12286 // CHECK: [[TMP9:%.*]] = load <2 x float>, ptr [[ARRAYIDX6]], align 8 12287 // CHECK: [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8> 12288 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <2 x float> 12289 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float> 12290 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float> 12291 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float> 12292 // CHECK: call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> [[TMP11]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], ptr %a) 12293 // CHECK: ret void 12294 void test_vst4_f32(float32_t *a, float32x2x4_t b) { 12295 vst4_f32(a, b); 12296 } 12297 12298 // CHECK-LABEL: @test_vst4_f64( 12299 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 12300 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 12301 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0 12302 // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12303 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12304 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12305 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0 12306 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 12307 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12308 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12309 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 12310 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 12311 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12312 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12313 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 12314 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 12315 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 12316 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12317 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3 12318 // CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8 12319 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 12320 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12321 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12322 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 12323 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 12324 // CHECK: call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a) 12325 // CHECK: ret void 12326 void test_vst4_f64(float64_t *a, float64x1x4_t b) { 12327 vst4_f64(a, b); 12328 } 12329 12330 // CHECK-LABEL: @test_vst4_p8( 12331 // CHECK: [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8 12332 // CHECK: [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8 12333 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0 12334 // CHECK: store [4 x <8 x i8>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12335 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12336 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 12337 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL]], i64 0, i64 0 12338 // CHECK: [[TMP2:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 8 12339 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 12340 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL1]], i64 0, i64 1 12341 // CHECK: [[TMP3:%.*]] = load <8 x i8>, ptr [[ARRAYIDX2]], align 8 12342 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 12343 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL3]], i64 0, i64 2 12344 // CHECK: [[TMP4:%.*]] = load <8 x i8>, ptr [[ARRAYIDX4]], align 8 12345 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly8x8x4_t, ptr [[__S1]], i32 0, i32 0 12346 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], ptr [[VAL5]], i64 0, i64 3 12347 // CHECK: [[TMP5:%.*]] = load <8 x i8>, ptr [[ARRAYIDX6]], align 8 12348 // CHECK: call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> [[TMP2]], <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], ptr %a) 12349 // CHECK: ret void 12350 void test_vst4_p8(poly8_t *a, poly8x8x4_t b) { 12351 vst4_p8(a, b); 12352 } 12353 12354 // CHECK-LABEL: @test_vst4_p16( 12355 // CHECK: [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8 12356 // CHECK: [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8 12357 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0 12358 // CHECK: store [4 x <4 x i16>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12359 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12360 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 12361 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL]], i64 0, i64 0 12362 // CHECK: [[TMP3:%.*]] = load <4 x i16>, ptr [[ARRAYIDX]], align 8 12363 // CHECK: [[TMP4:%.*]] = bitcast <4 x i16> [[TMP3]] to <8 x i8> 12364 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 12365 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL1]], i64 0, i64 1 12366 // CHECK: [[TMP5:%.*]] = load <4 x i16>, ptr [[ARRAYIDX2]], align 8 12367 // CHECK: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8> 12368 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 12369 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL3]], i64 0, i64 2 12370 // CHECK: [[TMP7:%.*]] = load <4 x i16>, ptr [[ARRAYIDX4]], align 8 12371 // CHECK: [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8> 12372 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly16x4x4_t, ptr [[__S1]], i32 0, i32 0 12373 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], ptr [[VAL5]], i64 0, i64 3 12374 // CHECK: [[TMP9:%.*]] = load <4 x i16>, ptr [[ARRAYIDX6]], align 8 12375 // CHECK: [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8> 12376 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <4 x i16> 12377 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> 12378 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16> 12379 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16> 12380 // CHECK: call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> [[TMP11]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], ptr %a) 12381 // CHECK: ret void 12382 void test_vst4_p16(poly16_t *a, poly16x4x4_t b) { 12383 vst4_p16(a, b); 12384 } 12385 12386 // CHECK-LABEL: @test_vld1q_f64_x2( 12387 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16 12388 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16 12389 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %a) 12390 // CHECK: store { <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]] 12391 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 12392 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x2_t, ptr [[RETVAL]], align 16 12393 // CHECK: ret %struct.float64x2x2_t [[TMP6]] 12394 float64x2x2_t test_vld1q_f64_x2(float64_t const *a) { 12395 return vld1q_f64_x2(a); 12396 } 12397 12398 // CHECK-LABEL: @test_vld1q_p64_x2( 12399 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16 12400 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16 12401 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %a) 12402 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]] 12403 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 32, i1 false) 12404 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, ptr [[RETVAL]], align 16 12405 // CHECK: ret %struct.poly64x2x2_t [[TMP6]] 12406 poly64x2x2_t test_vld1q_p64_x2(poly64_t const *a) { 12407 return vld1q_p64_x2(a); 12408 } 12409 12410 // CHECK-LABEL: @test_vld1_f64_x2( 12411 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8 12412 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8 12413 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %a) 12414 // CHECK: store { <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]] 12415 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 12416 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x2_t, ptr [[RETVAL]], align 8 12417 // CHECK: ret %struct.float64x1x2_t [[TMP6]] 12418 float64x1x2_t test_vld1_f64_x2(float64_t const *a) { 12419 return vld1_f64_x2(a); 12420 } 12421 12422 // CHECK-LABEL: @test_vld1_p64_x2( 12423 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8 12424 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8 12425 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %a) 12426 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]] 12427 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 16, i1 false) 12428 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, ptr [[RETVAL]], align 8 12429 // CHECK: ret %struct.poly64x1x2_t [[TMP6]] 12430 poly64x1x2_t test_vld1_p64_x2(poly64_t const *a) { 12431 return vld1_p64_x2(a); 12432 } 12433 12434 // CHECK-LABEL: @test_vld1q_f64_x3( 12435 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16 12436 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16 12437 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %a) 12438 // CHECK: store { <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]] 12439 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 12440 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x3_t, ptr [[RETVAL]], align 16 12441 // CHECK: ret %struct.float64x2x3_t [[TMP6]] 12442 float64x2x3_t test_vld1q_f64_x3(float64_t const *a) { 12443 return vld1q_f64_x3(a); 12444 } 12445 12446 // CHECK-LABEL: @test_vld1q_p64_x3( 12447 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16 12448 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16 12449 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %a) 12450 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]] 12451 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 48, i1 false) 12452 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, ptr [[RETVAL]], align 16 12453 // CHECK: ret %struct.poly64x2x3_t [[TMP6]] 12454 poly64x2x3_t test_vld1q_p64_x3(poly64_t const *a) { 12455 return vld1q_p64_x3(a); 12456 } 12457 12458 // CHECK-LABEL: @test_vld1_f64_x3( 12459 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8 12460 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8 12461 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %a) 12462 // CHECK: store { <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]] 12463 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 12464 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x3_t, ptr [[RETVAL]], align 8 12465 // CHECK: ret %struct.float64x1x3_t [[TMP6]] 12466 float64x1x3_t test_vld1_f64_x3(float64_t const *a) { 12467 return vld1_f64_x3(a); 12468 } 12469 12470 // CHECK-LABEL: @test_vld1_p64_x3( 12471 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8 12472 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8 12473 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %a) 12474 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]] 12475 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 24, i1 false) 12476 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, ptr [[RETVAL]], align 8 12477 // CHECK: ret %struct.poly64x1x3_t [[TMP6]] 12478 poly64x1x3_t test_vld1_p64_x3(poly64_t const *a) { 12479 return vld1_p64_x3(a); 12480 } 12481 12482 // CHECK-LABEL: @test_vld1q_f64_x4( 12483 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16 12484 // CHECK: [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16 12485 // CHECK: [[VLD1XN:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %a) 12486 // CHECK: store { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[VLD1XN]], ptr [[__RET]] 12487 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 12488 // CHECK: [[TMP6:%.*]] = load %struct.float64x2x4_t, ptr [[RETVAL]], align 16 12489 // CHECK: ret %struct.float64x2x4_t [[TMP6]] 12490 float64x2x4_t test_vld1q_f64_x4(float64_t const *a) { 12491 return vld1q_f64_x4(a); 12492 } 12493 12494 // CHECK-LABEL: @test_vld1q_p64_x4( 12495 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16 12496 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16 12497 // CHECK: [[VLD1XN:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %a) 12498 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD1XN]], ptr [[__RET]] 12499 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[__RET]], i64 64, i1 false) 12500 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, ptr [[RETVAL]], align 16 12501 // CHECK: ret %struct.poly64x2x4_t [[TMP6]] 12502 poly64x2x4_t test_vld1q_p64_x4(poly64_t const *a) { 12503 return vld1q_p64_x4(a); 12504 } 12505 12506 // CHECK-LABEL: @test_vld1_f64_x4( 12507 // CHECK: [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8 12508 // CHECK: [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8 12509 // CHECK: [[VLD1XN:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %a) 12510 // CHECK: store { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[VLD1XN]], ptr [[__RET]] 12511 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 12512 // CHECK: [[TMP6:%.*]] = load %struct.float64x1x4_t, ptr [[RETVAL]], align 8 12513 // CHECK: ret %struct.float64x1x4_t [[TMP6]] 12514 float64x1x4_t test_vld1_f64_x4(float64_t const *a) { 12515 return vld1_f64_x4(a); 12516 } 12517 12518 // CHECK-LABEL: @test_vld1_p64_x4( 12519 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8 12520 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8 12521 // CHECK: [[VLD1XN:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %a) 12522 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD1XN]], ptr [[__RET]] 12523 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[RETVAL]], ptr align 8 [[__RET]], i64 32, i1 false) 12524 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, ptr [[RETVAL]], align 8 12525 // CHECK: ret %struct.poly64x1x4_t [[TMP6]] 12526 poly64x1x4_t test_vld1_p64_x4(poly64_t const *a) { 12527 return vld1_p64_x4(a); 12528 } 12529 12530 // CHECK-LABEL: @test_vst1q_f64_x2( 12531 // CHECK: [[B:%.*]] = alloca %struct.float64x2x2_t, align 16 12532 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16 12533 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0 12534 // CHECK: store [2 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12535 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 12536 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 12537 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL]], i64 0, i64 0 12538 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 12539 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12540 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x2_t, ptr [[__S1]], i32 0, i32 0 12541 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 12542 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 12543 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12544 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12545 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12546 // CHECK: call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> [[TMP7]], <2 x double> [[TMP8]], ptr %a) 12547 // CHECK: ret void 12548 void test_vst1q_f64_x2(float64_t *a, float64x2x2_t b) { 12549 vst1q_f64_x2(a, b); 12550 } 12551 12552 // CHECK-LABEL: @test_vst1q_p64_x2( 12553 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16 12554 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16 12555 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0 12556 // CHECK: store [2 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12557 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 32, i1 false) 12558 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 12559 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 12560 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 12561 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12562 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x2_t, ptr [[__S1]], i32 0, i32 0 12563 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 12564 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 12565 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12566 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12567 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12568 // CHECK: call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], ptr %a) 12569 // CHECK: ret void 12570 void test_vst1q_p64_x2(poly64_t *a, poly64x2x2_t b) { 12571 vst1q_p64_x2(a, b); 12572 } 12573 12574 // CHECK-LABEL: @test_vst1_f64_x2( 12575 // CHECK: [[B:%.*]] = alloca %struct.float64x1x2_t, align 8 12576 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8 12577 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0 12578 // CHECK: store [2 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12579 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 12580 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 12581 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL]], i64 0, i64 0 12582 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 12583 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12584 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x2_t, ptr [[__S1]], i32 0, i32 0 12585 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 12586 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 12587 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12588 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12589 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12590 // CHECK: call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> [[TMP7]], <1 x double> [[TMP8]], ptr %a) 12591 // CHECK: ret void 12592 void test_vst1_f64_x2(float64_t *a, float64x1x2_t b) { 12593 vst1_f64_x2(a, b); 12594 } 12595 12596 // CHECK-LABEL: @test_vst1_p64_x2( 12597 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8 12598 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8 12599 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0 12600 // CHECK: store [2 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12601 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 16, i1 false) 12602 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 12603 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 12604 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 12605 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12606 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x2_t, ptr [[__S1]], i32 0, i32 0 12607 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 12608 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 12609 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12610 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12611 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12612 // CHECK: call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], ptr %a) 12613 // CHECK: ret void 12614 void test_vst1_p64_x2(poly64_t *a, poly64x1x2_t b) { 12615 vst1_p64_x2(a, b); 12616 } 12617 12618 // CHECK-LABEL: @test_vst1q_f64_x3( 12619 // CHECK: [[B:%.*]] = alloca %struct.float64x2x3_t, align 16 12620 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16 12621 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0 12622 // CHECK: store [3 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12623 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 12624 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 12625 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL]], i64 0, i64 0 12626 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 12627 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12628 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 12629 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 12630 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 12631 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12632 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x3_t, ptr [[__S1]], i32 0, i32 0 12633 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 12634 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 12635 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 12636 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12637 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12638 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 12639 // CHECK: call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> [[TMP9]], <2 x double> [[TMP10]], <2 x double> [[TMP11]], ptr %a) 12640 // CHECK: ret void 12641 void test_vst1q_f64_x3(float64_t *a, float64x2x3_t b) { 12642 vst1q_f64_x3(a, b); 12643 } 12644 12645 // CHECK-LABEL: @test_vst1q_p64_x3( 12646 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16 12647 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16 12648 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0 12649 // CHECK: store [3 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12650 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 48, i1 false) 12651 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 12652 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 12653 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 12654 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12655 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 12656 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 12657 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 12658 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12659 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x3_t, ptr [[__S1]], i32 0, i32 0 12660 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 12661 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 12662 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12663 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12664 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12665 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12666 // CHECK: call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], ptr %a) 12667 // CHECK: ret void 12668 void test_vst1q_p64_x3(poly64_t *a, poly64x2x3_t b) { 12669 vst1q_p64_x3(a, b); 12670 } 12671 12672 // CHECK-LABEL: @test_vst1_f64_x3( 12673 // CHECK: [[B:%.*]] = alloca %struct.float64x1x3_t, align 8 12674 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8 12675 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0 12676 // CHECK: store [3 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12677 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 12678 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 12679 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL]], i64 0, i64 0 12680 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 12681 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12682 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 12683 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 12684 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 12685 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12686 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x3_t, ptr [[__S1]], i32 0, i32 0 12687 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 12688 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 12689 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 12690 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12691 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12692 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 12693 // CHECK: call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> [[TMP9]], <1 x double> [[TMP10]], <1 x double> [[TMP11]], ptr %a) 12694 // CHECK: ret void 12695 void test_vst1_f64_x3(float64_t *a, float64x1x3_t b) { 12696 vst1_f64_x3(a, b); 12697 } 12698 12699 // CHECK-LABEL: @test_vst1_p64_x3( 12700 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8 12701 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8 12702 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0 12703 // CHECK: store [3 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12704 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 24, i1 false) 12705 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 12706 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 12707 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 12708 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12709 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 12710 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 12711 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 12712 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12713 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x3_t, ptr [[__S1]], i32 0, i32 0 12714 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 12715 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 12716 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12717 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12718 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12719 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12720 // CHECK: call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], ptr %a) 12721 // CHECK: ret void 12722 void test_vst1_p64_x3(poly64_t *a, poly64x1x3_t b) { 12723 vst1_p64_x3(a, b); 12724 } 12725 12726 // CHECK-LABEL: @test_vst1q_f64_x4( 12727 // CHECK: [[B:%.*]] = alloca %struct.float64x2x4_t, align 16 12728 // CHECK: [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16 12729 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0 12730 // CHECK: store [4 x <2 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12731 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 12732 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 12733 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL]], i64 0, i64 0 12734 // CHECK: [[TMP3:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 16 12735 // CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[TMP3]] to <16 x i8> 12736 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 12737 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL1]], i64 0, i64 1 12738 // CHECK: [[TMP5:%.*]] = load <2 x double>, ptr [[ARRAYIDX2]], align 16 12739 // CHECK: [[TMP6:%.*]] = bitcast <2 x double> [[TMP5]] to <16 x i8> 12740 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 12741 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL3]], i64 0, i64 2 12742 // CHECK: [[TMP7:%.*]] = load <2 x double>, ptr [[ARRAYIDX4]], align 16 12743 // CHECK: [[TMP8:%.*]] = bitcast <2 x double> [[TMP7]] to <16 x i8> 12744 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x2x4_t, ptr [[__S1]], i32 0, i32 0 12745 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x double>], ptr [[VAL5]], i64 0, i64 3 12746 // CHECK: [[TMP9:%.*]] = load <2 x double>, ptr [[ARRAYIDX6]], align 16 12747 // CHECK: [[TMP10:%.*]] = bitcast <2 x double> [[TMP9]] to <16 x i8> 12748 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> 12749 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x double> 12750 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x double> 12751 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x double> 12752 // CHECK: call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> [[TMP11]], <2 x double> [[TMP12]], <2 x double> [[TMP13]], <2 x double> [[TMP14]], ptr %a) 12753 // CHECK: ret void 12754 void test_vst1q_f64_x4(float64_t *a, float64x2x4_t b) { 12755 vst1q_f64_x4(a, b); 12756 } 12757 12758 // CHECK-LABEL: @test_vst1q_p64_x4( 12759 // CHECK: [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16 12760 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16 12761 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0 12762 // CHECK: store [4 x <2 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 16 12763 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[__S1]], ptr align 16 [[B]], i64 64, i1 false) 12764 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 12765 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL]], i64 0, i64 0 12766 // CHECK: [[TMP3:%.*]] = load <2 x i64>, ptr [[ARRAYIDX]], align 16 12767 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8> 12768 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 12769 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL1]], i64 0, i64 1 12770 // CHECK: [[TMP5:%.*]] = load <2 x i64>, ptr [[ARRAYIDX2]], align 16 12771 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8> 12772 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 12773 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL3]], i64 0, i64 2 12774 // CHECK: [[TMP7:%.*]] = load <2 x i64>, ptr [[ARRAYIDX4]], align 16 12775 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8> 12776 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x2x4_t, ptr [[__S1]], i32 0, i32 0 12777 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i64>], ptr [[VAL5]], i64 0, i64 3 12778 // CHECK: [[TMP9:%.*]] = load <2 x i64>, ptr [[ARRAYIDX6]], align 16 12779 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8> 12780 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64> 12781 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64> 12782 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64> 12783 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64> 12784 // CHECK: call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], ptr %a) 12785 // CHECK: ret void 12786 void test_vst1q_p64_x4(poly64_t *a, poly64x2x4_t b) { 12787 vst1q_p64_x4(a, b); 12788 } 12789 12790 // CHECK-LABEL: @test_vst1_f64_x4( 12791 // CHECK: [[B:%.*]] = alloca %struct.float64x1x4_t, align 8 12792 // CHECK: [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8 12793 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0 12794 // CHECK: store [4 x <1 x double>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12795 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12796 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12797 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL]], i64 0, i64 0 12798 // CHECK: [[TMP3:%.*]] = load <1 x double>, ptr [[ARRAYIDX]], align 8 12799 // CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP3]] to <8 x i8> 12800 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12801 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL1]], i64 0, i64 1 12802 // CHECK: [[TMP5:%.*]] = load <1 x double>, ptr [[ARRAYIDX2]], align 8 12803 // CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP5]] to <8 x i8> 12804 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12805 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL3]], i64 0, i64 2 12806 // CHECK: [[TMP7:%.*]] = load <1 x double>, ptr [[ARRAYIDX4]], align 8 12807 // CHECK: [[TMP8:%.*]] = bitcast <1 x double> [[TMP7]] to <8 x i8> 12808 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.float64x1x4_t, ptr [[__S1]], i32 0, i32 0 12809 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x double>], ptr [[VAL5]], i64 0, i64 3 12810 // CHECK: [[TMP9:%.*]] = load <1 x double>, ptr [[ARRAYIDX6]], align 8 12811 // CHECK: [[TMP10:%.*]] = bitcast <1 x double> [[TMP9]] to <8 x i8> 12812 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> 12813 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> 12814 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x double> 12815 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x double> 12816 // CHECK: call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> [[TMP11]], <1 x double> [[TMP12]], <1 x double> [[TMP13]], <1 x double> [[TMP14]], ptr %a) 12817 // CHECK: ret void 12818 void test_vst1_f64_x4(float64_t *a, float64x1x4_t b) { 12819 vst1_f64_x4(a, b); 12820 } 12821 12822 // CHECK-LABEL: @test_vst1_p64_x4( 12823 // CHECK: [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8 12824 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8 12825 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0 12826 // CHECK: store [4 x <1 x i64>] [[B]].coerce, ptr [[COERCE_DIVE]], align 8 12827 // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[__S1]], ptr align 8 [[B]], i64 32, i1 false) 12828 // CHECK: [[VAL:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 12829 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL]], i64 0, i64 0 12830 // CHECK: [[TMP3:%.*]] = load <1 x i64>, ptr [[ARRAYIDX]], align 8 12831 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8> 12832 // CHECK: [[VAL1:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 12833 // CHECK: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL1]], i64 0, i64 1 12834 // CHECK: [[TMP5:%.*]] = load <1 x i64>, ptr [[ARRAYIDX2]], align 8 12835 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8> 12836 // CHECK: [[VAL3:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 12837 // CHECK: [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL3]], i64 0, i64 2 12838 // CHECK: [[TMP7:%.*]] = load <1 x i64>, ptr [[ARRAYIDX4]], align 8 12839 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8> 12840 // CHECK: [[VAL5:%.*]] = getelementptr inbounds nuw %struct.poly64x1x4_t, ptr [[__S1]], i32 0, i32 0 12841 // CHECK: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], ptr [[VAL5]], i64 0, i64 3 12842 // CHECK: [[TMP9:%.*]] = load <1 x i64>, ptr [[ARRAYIDX6]], align 8 12843 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8> 12844 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64> 12845 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64> 12846 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64> 12847 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64> 12848 // CHECK: call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], ptr %a) 12849 // CHECK: ret void 12850 void test_vst1_p64_x4(poly64_t *a, poly64x1x4_t b) { 12851 vst1_p64_x4(a, b); 12852 } 12853 12854 // CHECK-LABEL: @test_vceqd_s64( 12855 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 12856 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12857 // CHECK: ret i64 [[VCEQD_I]] 12858 uint64_t test_vceqd_s64(int64_t a, int64_t b) { 12859 return (uint64_t)vceqd_s64(a, b); 12860 } 12861 12862 // CHECK-LABEL: @test_vceqd_u64( 12863 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, %b 12864 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12865 // CHECK: ret i64 [[VCEQD_I]] 12866 uint64_t test_vceqd_u64(uint64_t a, uint64_t b) { 12867 return (int64_t)vceqd_u64(a, b); 12868 } 12869 12870 // CHECK-LABEL: @test_vceqzd_s64( 12871 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 12872 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 12873 // CHECK: ret i64 [[VCEQZ_I]] 12874 uint64_t test_vceqzd_s64(int64_t a) { 12875 return (uint64_t)vceqzd_s64(a); 12876 } 12877 12878 // CHECK-LABEL: @test_vceqzd_u64( 12879 // CHECK: [[TMP0:%.*]] = icmp eq i64 %a, 0 12880 // CHECK: [[VCEQZD_I:%.*]] = sext i1 [[TMP0]] to i64 12881 // CHECK: ret i64 [[VCEQZD_I]] 12882 int64_t test_vceqzd_u64(int64_t a) { 12883 return (int64_t)vceqzd_u64(a); 12884 } 12885 12886 // CHECK-LABEL: @test_vcged_s64( 12887 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, %b 12888 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12889 // CHECK: ret i64 [[VCEQD_I]] 12890 uint64_t test_vcged_s64(int64_t a, int64_t b) { 12891 return (uint64_t)vcged_s64(a, b); 12892 } 12893 12894 // CHECK-LABEL: @test_vcged_u64( 12895 // CHECK: [[TMP0:%.*]] = icmp uge i64 %a, %b 12896 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12897 // CHECK: ret i64 [[VCEQD_I]] 12898 uint64_t test_vcged_u64(uint64_t a, uint64_t b) { 12899 return (uint64_t)vcged_u64(a, b); 12900 } 12901 12902 // CHECK-LABEL: @test_vcgezd_s64( 12903 // CHECK: [[TMP0:%.*]] = icmp sge i64 %a, 0 12904 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 12905 // CHECK: ret i64 [[VCGEZ_I]] 12906 uint64_t test_vcgezd_s64(int64_t a) { 12907 return (uint64_t)vcgezd_s64(a); 12908 } 12909 12910 // CHECK-LABEL: @test_vcgtd_s64( 12911 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, %b 12912 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12913 // CHECK: ret i64 [[VCEQD_I]] 12914 uint64_t test_vcgtd_s64(int64_t a, int64_t b) { 12915 return (uint64_t)vcgtd_s64(a, b); 12916 } 12917 12918 // CHECK-LABEL: @test_vcgtd_u64( 12919 // CHECK: [[TMP0:%.*]] = icmp ugt i64 %a, %b 12920 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12921 // CHECK: ret i64 [[VCEQD_I]] 12922 uint64_t test_vcgtd_u64(uint64_t a, uint64_t b) { 12923 return (uint64_t)vcgtd_u64(a, b); 12924 } 12925 12926 // CHECK-LABEL: @test_vcgtzd_s64( 12927 // CHECK: [[TMP0:%.*]] = icmp sgt i64 %a, 0 12928 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 12929 // CHECK: ret i64 [[VCGTZ_I]] 12930 uint64_t test_vcgtzd_s64(int64_t a) { 12931 return (uint64_t)vcgtzd_s64(a); 12932 } 12933 12934 // CHECK-LABEL: @test_vcled_s64( 12935 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, %b 12936 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12937 // CHECK: ret i64 [[VCEQD_I]] 12938 uint64_t test_vcled_s64(int64_t a, int64_t b) { 12939 return (uint64_t)vcled_s64(a, b); 12940 } 12941 12942 // CHECK-LABEL: @test_vcled_u64( 12943 // CHECK: [[TMP0:%.*]] = icmp ule i64 %a, %b 12944 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12945 // CHECK: ret i64 [[VCEQD_I]] 12946 uint64_t test_vcled_u64(uint64_t a, uint64_t b) { 12947 return (uint64_t)vcled_u64(a, b); 12948 } 12949 12950 // CHECK-LABEL: @test_vclezd_s64( 12951 // CHECK: [[TMP0:%.*]] = icmp sle i64 %a, 0 12952 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 12953 // CHECK: ret i64 [[VCLEZ_I]] 12954 uint64_t test_vclezd_s64(int64_t a) { 12955 return (uint64_t)vclezd_s64(a); 12956 } 12957 12958 // CHECK-LABEL: @test_vcltd_s64( 12959 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, %b 12960 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12961 // CHECK: ret i64 [[VCEQD_I]] 12962 uint64_t test_vcltd_s64(int64_t a, int64_t b) { 12963 return (uint64_t)vcltd_s64(a, b); 12964 } 12965 12966 // CHECK-LABEL: @test_vcltd_u64( 12967 // CHECK: [[TMP0:%.*]] = icmp ult i64 %a, %b 12968 // CHECK: [[VCEQD_I:%.*]] = sext i1 [[TMP0]] to i64 12969 // CHECK: ret i64 [[VCEQD_I]] 12970 uint64_t test_vcltd_u64(uint64_t a, uint64_t b) { 12971 return (uint64_t)vcltd_u64(a, b); 12972 } 12973 12974 // CHECK-LABEL: @test_vcltzd_s64( 12975 // CHECK: [[TMP0:%.*]] = icmp slt i64 %a, 0 12976 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 12977 // CHECK: ret i64 [[VCLTZ_I]] 12978 uint64_t test_vcltzd_s64(int64_t a) { 12979 return (uint64_t)vcltzd_s64(a); 12980 } 12981 12982 // CHECK-LABEL: @test_vtstd_s64( 12983 // CHECK: [[TMP0:%.*]] = and i64 %a, %b 12984 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 12985 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 12986 // CHECK: ret i64 [[VTSTD_I]] 12987 uint64_t test_vtstd_s64(int64_t a, int64_t b) { 12988 return (uint64_t)vtstd_s64(a, b); 12989 } 12990 12991 // CHECK-LABEL: @test_vtstd_u64( 12992 // CHECK: [[TMP0:%.*]] = and i64 %a, %b 12993 // CHECK: [[TMP1:%.*]] = icmp ne i64 [[TMP0]], 0 12994 // CHECK: [[VTSTD_I:%.*]] = sext i1 [[TMP1]] to i64 12995 // CHECK: ret i64 [[VTSTD_I]] 12996 uint64_t test_vtstd_u64(uint64_t a, uint64_t b) { 12997 return (uint64_t)vtstd_u64(a, b); 12998 } 12999 13000 // CHECK-LABEL: @test_vabsd_s64( 13001 // CHECK: [[VABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.abs.i64(i64 %a) 13002 // CHECK: ret i64 [[VABSD_S64_I]] 13003 int64_t test_vabsd_s64(int64_t a) { 13004 return (int64_t)vabsd_s64(a); 13005 } 13006 13007 // CHECK-LABEL: @test_vqabsb_s8( 13008 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13009 // CHECK: [[VQABSB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqabs.v8i8(<8 x i8> [[TMP0]]) 13010 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQABSB_S8_I]], i64 0 13011 // CHECK: ret i8 [[TMP1]] 13012 int8_t test_vqabsb_s8(int8_t a) { 13013 return (int8_t)vqabsb_s8(a); 13014 } 13015 13016 // CHECK-LABEL: @test_vqabsh_s16( 13017 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13018 // CHECK: [[VQABSH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqabs.v4i16(<4 x i16> [[TMP0]]) 13019 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQABSH_S16_I]], i64 0 13020 // CHECK: ret i16 [[TMP1]] 13021 int16_t test_vqabsh_s16(int16_t a) { 13022 return (int16_t)vqabsh_s16(a); 13023 } 13024 13025 // CHECK-LABEL: @test_vqabss_s32( 13026 // CHECK: [[VQABSS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqabs.i32(i32 %a) 13027 // CHECK: ret i32 [[VQABSS_S32_I]] 13028 int32_t test_vqabss_s32(int32_t a) { 13029 return (int32_t)vqabss_s32(a); 13030 } 13031 13032 // CHECK-LABEL: @test_vqabsd_s64( 13033 // CHECK: [[VQABSD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqabs.i64(i64 %a) 13034 // CHECK: ret i64 [[VQABSD_S64_I]] 13035 int64_t test_vqabsd_s64(int64_t a) { 13036 return (int64_t)vqabsd_s64(a); 13037 } 13038 13039 // CHECK-LABEL: @test_vnegd_s64( 13040 // CHECK: [[VNEGD_I:%.*]] = sub i64 0, %a 13041 // CHECK: ret i64 [[VNEGD_I]] 13042 int64_t test_vnegd_s64(int64_t a) { 13043 return (int64_t)vnegd_s64(a); 13044 } 13045 13046 // CHECK-LABEL: @test_vqnegb_s8( 13047 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13048 // CHECK: [[VQNEGB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqneg.v8i8(<8 x i8> [[TMP0]]) 13049 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQNEGB_S8_I]], i64 0 13050 // CHECK: ret i8 [[TMP1]] 13051 int8_t test_vqnegb_s8(int8_t a) { 13052 return (int8_t)vqnegb_s8(a); 13053 } 13054 13055 // CHECK-LABEL: @test_vqnegh_s16( 13056 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13057 // CHECK: [[VQNEGH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqneg.v4i16(<4 x i16> [[TMP0]]) 13058 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQNEGH_S16_I]], i64 0 13059 // CHECK: ret i16 [[TMP1]] 13060 int16_t test_vqnegh_s16(int16_t a) { 13061 return (int16_t)vqnegh_s16(a); 13062 } 13063 13064 // CHECK-LABEL: @test_vqnegs_s32( 13065 // CHECK: [[VQNEGS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqneg.i32(i32 %a) 13066 // CHECK: ret i32 [[VQNEGS_S32_I]] 13067 int32_t test_vqnegs_s32(int32_t a) { 13068 return (int32_t)vqnegs_s32(a); 13069 } 13070 13071 // CHECK-LABEL: @test_vqnegd_s64( 13072 // CHECK: [[VQNEGD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.sqneg.i64(i64 %a) 13073 // CHECK: ret i64 [[VQNEGD_S64_I]] 13074 int64_t test_vqnegd_s64(int64_t a) { 13075 return (int64_t)vqnegd_s64(a); 13076 } 13077 13078 // CHECK-LABEL: @test_vuqaddb_s8( 13079 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13080 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 13081 // CHECK: [[VUQADDB_S8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 13082 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VUQADDB_S8_I]], i64 0 13083 // CHECK: ret i8 [[TMP2]] 13084 int8_t test_vuqaddb_s8(int8_t a, uint8_t b) { 13085 return (int8_t)vuqaddb_s8(a, b); 13086 } 13087 13088 // CHECK-LABEL: @test_vuqaddh_s16( 13089 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13090 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 13091 // CHECK: [[VUQADDH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 13092 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VUQADDH_S16_I]], i64 0 13093 // CHECK: ret i16 [[TMP2]] 13094 int16_t test_vuqaddh_s16(int16_t a, uint16_t b) { 13095 return (int16_t)vuqaddh_s16(a, b); 13096 } 13097 13098 // CHECK-LABEL: @test_vuqadds_s32( 13099 // CHECK: [[VUQADDS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.suqadd.i32(i32 %a, i32 %b) 13100 // CHECK: ret i32 [[VUQADDS_S32_I]] 13101 int32_t test_vuqadds_s32(int32_t a, uint32_t b) { 13102 return (int32_t)vuqadds_s32(a, b); 13103 } 13104 13105 // CHECK-LABEL: @test_vuqaddd_s64( 13106 // CHECK: [[VUQADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.suqadd.i64(i64 %a, i64 %b) 13107 // CHECK: ret i64 [[VUQADDD_S64_I]] 13108 int64_t test_vuqaddd_s64(int64_t a, uint64_t b) { 13109 return (int64_t)vuqaddd_s64(a, b); 13110 } 13111 13112 // CHECK-LABEL: @test_vsqaddb_u8( 13113 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13114 // CHECK: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 %b, i64 0 13115 // CHECK: [[VSQADDB_U8_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]]) 13116 // CHECK: [[TMP2:%.*]] = extractelement <8 x i8> [[VSQADDB_U8_I]], i64 0 13117 // CHECK: ret i8 [[TMP2]] 13118 uint8_t test_vsqaddb_u8(uint8_t a, int8_t b) { 13119 return (uint8_t)vsqaddb_u8(a, b); 13120 } 13121 13122 // CHECK-LABEL: @test_vsqaddh_u16( 13123 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13124 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 13125 // CHECK: [[VSQADDH_U16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 13126 // CHECK: [[TMP2:%.*]] = extractelement <4 x i16> [[VSQADDH_U16_I]], i64 0 13127 // CHECK: ret i16 [[TMP2]] 13128 uint16_t test_vsqaddh_u16(uint16_t a, int16_t b) { 13129 return (uint16_t)vsqaddh_u16(a, b); 13130 } 13131 13132 // CHECK-LABEL: @test_vsqadds_u32( 13133 // CHECK: [[VSQADDS_U32_I:%.*]] = call i32 @llvm.aarch64.neon.usqadd.i32(i32 %a, i32 %b) 13134 // CHECK: ret i32 [[VSQADDS_U32_I]] 13135 uint32_t test_vsqadds_u32(uint32_t a, int32_t b) { 13136 return (uint32_t)vsqadds_u32(a, b); 13137 } 13138 13139 // CHECK-LABEL: @test_vsqaddd_u64( 13140 // CHECK: [[VSQADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.usqadd.i64(i64 %a, i64 %b) 13141 // CHECK: ret i64 [[VSQADDD_U64_I]] 13142 uint64_t test_vsqaddd_u64(uint64_t a, int64_t b) { 13143 return (uint64_t)vsqaddd_u64(a, b); 13144 } 13145 13146 // CHECK-LABEL: @test_vqdmlalh_s16( 13147 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 13148 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0 13149 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 13150 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 13151 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %a, i32 [[LANE0_I]]) 13152 // CHECK: ret i32 [[VQDMLXL1_I]] 13153 int32_t test_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { 13154 return (int32_t)vqdmlalh_s16(a, b, c); 13155 } 13156 13157 // CHECK-LABEL: @test_vqdmlals_s32( 13158 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) 13159 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL_I]]) 13160 // CHECK: ret i64 [[VQDMLXL1_I]] 13161 int64_t test_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { 13162 return (int64_t)vqdmlals_s32(a, b, c); 13163 } 13164 13165 // CHECK-LABEL: @test_vqdmlslh_s16( 13166 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 13167 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %c, i64 0 13168 // CHECK: [[VQDMLXL_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 13169 // CHECK: [[LANE0_I:%.*]] = extractelement <4 x i32> [[VQDMLXL_I]], i64 0 13170 // CHECK: [[VQDMLXL1_I:%.*]] = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %a, i32 [[LANE0_I]]) 13171 // CHECK: ret i32 [[VQDMLXL1_I]] 13172 int32_t test_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { 13173 return (int32_t)vqdmlslh_s16(a, b, c); 13174 } 13175 13176 // CHECK-LABEL: @test_vqdmlsls_s32( 13177 // CHECK: [[VQDMLXL_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 %c) 13178 // CHECK: [[VQDMLXL1_I:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL_I]]) 13179 // CHECK: ret i64 [[VQDMLXL1_I]] 13180 int64_t test_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { 13181 return (int64_t)vqdmlsls_s32(a, b, c); 13182 } 13183 13184 // CHECK-LABEL: @test_vqdmullh_s16( 13185 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13186 // CHECK: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 %b, i64 0 13187 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) 13188 // CHECK: [[TMP2:%.*]] = extractelement <4 x i32> [[VQDMULLH_S16_I]], i64 0 13189 // CHECK: ret i32 [[TMP2]] 13190 int32_t test_vqdmullh_s16(int16_t a, int16_t b) { 13191 return (int32_t)vqdmullh_s16(a, b); 13192 } 13193 13194 // CHECK-LABEL: @test_vqdmulls_s32( 13195 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 %b) 13196 // CHECK: ret i64 [[VQDMULLS_S32_I]] 13197 int64_t test_vqdmulls_s32(int32_t a, int32_t b) { 13198 return (int64_t)vqdmulls_s32(a, b); 13199 } 13200 13201 // CHECK-LABEL: @test_vqmovunh_s16( 13202 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 13203 // CHECK: [[VQMOVUNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtun.v8i8(<8 x i16> [[TMP0]]) 13204 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVUNH_S16_I]], i64 0 13205 // CHECK: ret i8 [[TMP1]] 13206 uint8_t test_vqmovunh_s16(int16_t a) { 13207 return (uint8_t)vqmovunh_s16(a); 13208 } 13209 13210 // CHECK-LABEL: @test_vqmovuns_s32( 13211 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 13212 // CHECK: [[VQMOVUNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtun.v4i16(<4 x i32> [[TMP0]]) 13213 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVUNS_S32_I]], i64 0 13214 // CHECK: ret i16 [[TMP1]] 13215 uint16_t test_vqmovuns_s32(int32_t a) { 13216 return (uint16_t)vqmovuns_s32(a); 13217 } 13218 13219 // CHECK-LABEL: @test_vqmovund_s64( 13220 // CHECK: [[VQMOVUND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %a) 13221 // CHECK: ret i32 [[VQMOVUND_S64_I]] 13222 uint32_t test_vqmovund_s64(int64_t a) { 13223 return (uint32_t)vqmovund_s64(a); 13224 } 13225 13226 // CHECK-LABEL: @test_vqmovnh_s16( 13227 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 13228 // CHECK: [[VQMOVNH_S16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqxtn.v8i8(<8 x i16> [[TMP0]]) 13229 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_S16_I]], i64 0 13230 // CHECK: ret i8 [[TMP1]] 13231 int8_t test_vqmovnh_s16(int16_t a) { 13232 return (int8_t)vqmovnh_s16(a); 13233 } 13234 13235 // CHECK-LABEL: @test_vqmovns_s32( 13236 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 13237 // CHECK: [[VQMOVNS_S32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqxtn.v4i16(<4 x i32> [[TMP0]]) 13238 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_S32_I]], i64 0 13239 // CHECK: ret i16 [[TMP1]] 13240 int16_t test_vqmovns_s32(int32_t a) { 13241 return (int16_t)vqmovns_s32(a); 13242 } 13243 13244 // CHECK-LABEL: @test_vqmovnd_s64( 13245 // CHECK: [[VQMOVND_S64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %a) 13246 // CHECK: ret i32 [[VQMOVND_S64_I]] 13247 int32_t test_vqmovnd_s64(int64_t a) { 13248 return (int32_t)vqmovnd_s64(a); 13249 } 13250 13251 // CHECK-LABEL: @test_vqmovnh_u16( 13252 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 13253 // CHECK: [[VQMOVNH_U16_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqxtn.v8i8(<8 x i16> [[TMP0]]) 13254 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQMOVNH_U16_I]], i64 0 13255 // CHECK: ret i8 [[TMP1]] 13256 int8_t test_vqmovnh_u16(int16_t a) { 13257 return (int8_t)vqmovnh_u16(a); 13258 } 13259 13260 // CHECK-LABEL: @test_vqmovns_u32( 13261 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 13262 // CHECK: [[VQMOVNS_U32_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqxtn.v4i16(<4 x i32> [[TMP0]]) 13263 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQMOVNS_U32_I]], i64 0 13264 // CHECK: ret i16 [[TMP1]] 13265 int16_t test_vqmovns_u32(int32_t a) { 13266 return (int16_t)vqmovns_u32(a); 13267 } 13268 13269 // CHECK-LABEL: @test_vqmovnd_u64( 13270 // CHECK: [[VQMOVND_U64_I:%.*]] = call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %a) 13271 // CHECK: ret i32 [[VQMOVND_U64_I]] 13272 int32_t test_vqmovnd_u64(int64_t a) { 13273 return (int32_t)vqmovnd_u64(a); 13274 } 13275 13276 // CHECK-LABEL: @test_vceqs_f32( 13277 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, %b 13278 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 13279 // CHECK: ret i32 [[VCMPD_I]] 13280 uint32_t test_vceqs_f32(float32_t a, float32_t b) { 13281 return (uint32_t)vceqs_f32(a, b); 13282 } 13283 13284 // CHECK-LABEL: @test_vceqd_f64( 13285 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, %b 13286 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 13287 // CHECK: ret i64 [[VCMPD_I]] 13288 uint64_t test_vceqd_f64(float64_t a, float64_t b) { 13289 return (uint64_t)vceqd_f64(a, b); 13290 } 13291 13292 // CHECK-LABEL: @test_vceqzs_f32( 13293 // CHECK: [[TMP0:%.*]] = fcmp oeq float %a, 0.000000e+00 13294 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i32 13295 // CHECK: ret i32 [[VCEQZ_I]] 13296 uint32_t test_vceqzs_f32(float32_t a) { 13297 return (uint32_t)vceqzs_f32(a); 13298 } 13299 13300 // CHECK-LABEL: @test_vceqzd_f64( 13301 // CHECK: [[TMP0:%.*]] = fcmp oeq double %a, 0.000000e+00 13302 // CHECK: [[VCEQZ_I:%.*]] = sext i1 [[TMP0]] to i64 13303 // CHECK: ret i64 [[VCEQZ_I]] 13304 uint64_t test_vceqzd_f64(float64_t a) { 13305 return (uint64_t)vceqzd_f64(a); 13306 } 13307 13308 // CHECK-LABEL: @test_vcges_f32( 13309 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, %b 13310 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 13311 // CHECK: ret i32 [[VCMPD_I]] 13312 uint32_t test_vcges_f32(float32_t a, float32_t b) { 13313 return (uint32_t)vcges_f32(a, b); 13314 } 13315 13316 // CHECK-LABEL: @test_vcged_f64( 13317 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, %b 13318 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 13319 // CHECK: ret i64 [[VCMPD_I]] 13320 uint64_t test_vcged_f64(float64_t a, float64_t b) { 13321 return (uint64_t)vcged_f64(a, b); 13322 } 13323 13324 // CHECK-LABEL: @test_vcgezs_f32( 13325 // CHECK: [[TMP0:%.*]] = fcmp oge float %a, 0.000000e+00 13326 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i32 13327 // CHECK: ret i32 [[VCGEZ_I]] 13328 uint32_t test_vcgezs_f32(float32_t a) { 13329 return (uint32_t)vcgezs_f32(a); 13330 } 13331 13332 // CHECK-LABEL: @test_vcgezd_f64( 13333 // CHECK: [[TMP0:%.*]] = fcmp oge double %a, 0.000000e+00 13334 // CHECK: [[VCGEZ_I:%.*]] = sext i1 [[TMP0]] to i64 13335 // CHECK: ret i64 [[VCGEZ_I]] 13336 uint64_t test_vcgezd_f64(float64_t a) { 13337 return (uint64_t)vcgezd_f64(a); 13338 } 13339 13340 // CHECK-LABEL: @test_vcgts_f32( 13341 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, %b 13342 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 13343 // CHECK: ret i32 [[VCMPD_I]] 13344 uint32_t test_vcgts_f32(float32_t a, float32_t b) { 13345 return (uint32_t)vcgts_f32(a, b); 13346 } 13347 13348 // CHECK-LABEL: @test_vcgtd_f64( 13349 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, %b 13350 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 13351 // CHECK: ret i64 [[VCMPD_I]] 13352 uint64_t test_vcgtd_f64(float64_t a, float64_t b) { 13353 return (uint64_t)vcgtd_f64(a, b); 13354 } 13355 13356 // CHECK-LABEL: @test_vcgtzs_f32( 13357 // CHECK: [[TMP0:%.*]] = fcmp ogt float %a, 0.000000e+00 13358 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i32 13359 // CHECK: ret i32 [[VCGTZ_I]] 13360 uint32_t test_vcgtzs_f32(float32_t a) { 13361 return (uint32_t)vcgtzs_f32(a); 13362 } 13363 13364 // CHECK-LABEL: @test_vcgtzd_f64( 13365 // CHECK: [[TMP0:%.*]] = fcmp ogt double %a, 0.000000e+00 13366 // CHECK: [[VCGTZ_I:%.*]] = sext i1 [[TMP0]] to i64 13367 // CHECK: ret i64 [[VCGTZ_I]] 13368 uint64_t test_vcgtzd_f64(float64_t a) { 13369 return (uint64_t)vcgtzd_f64(a); 13370 } 13371 13372 // CHECK-LABEL: @test_vcles_f32( 13373 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, %b 13374 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 13375 // CHECK: ret i32 [[VCMPD_I]] 13376 uint32_t test_vcles_f32(float32_t a, float32_t b) { 13377 return (uint32_t)vcles_f32(a, b); 13378 } 13379 13380 // CHECK-LABEL: @test_vcled_f64( 13381 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, %b 13382 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 13383 // CHECK: ret i64 [[VCMPD_I]] 13384 uint64_t test_vcled_f64(float64_t a, float64_t b) { 13385 return (uint64_t)vcled_f64(a, b); 13386 } 13387 13388 // CHECK-LABEL: @test_vclezs_f32( 13389 // CHECK: [[TMP0:%.*]] = fcmp ole float %a, 0.000000e+00 13390 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i32 13391 // CHECK: ret i32 [[VCLEZ_I]] 13392 uint32_t test_vclezs_f32(float32_t a) { 13393 return (uint32_t)vclezs_f32(a); 13394 } 13395 13396 // CHECK-LABEL: @test_vclezd_f64( 13397 // CHECK: [[TMP0:%.*]] = fcmp ole double %a, 0.000000e+00 13398 // CHECK: [[VCLEZ_I:%.*]] = sext i1 [[TMP0]] to i64 13399 // CHECK: ret i64 [[VCLEZ_I]] 13400 uint64_t test_vclezd_f64(float64_t a) { 13401 return (uint64_t)vclezd_f64(a); 13402 } 13403 13404 // CHECK-LABEL: @test_vclts_f32( 13405 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, %b 13406 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i32 13407 // CHECK: ret i32 [[VCMPD_I]] 13408 uint32_t test_vclts_f32(float32_t a, float32_t b) { 13409 return (uint32_t)vclts_f32(a, b); 13410 } 13411 13412 // CHECK-LABEL: @test_vcltd_f64( 13413 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, %b 13414 // CHECK: [[VCMPD_I:%.*]] = sext i1 [[TMP0]] to i64 13415 // CHECK: ret i64 [[VCMPD_I]] 13416 uint64_t test_vcltd_f64(float64_t a, float64_t b) { 13417 return (uint64_t)vcltd_f64(a, b); 13418 } 13419 13420 // CHECK-LABEL: @test_vcltzs_f32( 13421 // CHECK: [[TMP0:%.*]] = fcmp olt float %a, 0.000000e+00 13422 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i32 13423 // CHECK: ret i32 [[VCLTZ_I]] 13424 uint32_t test_vcltzs_f32(float32_t a) { 13425 return (uint32_t)vcltzs_f32(a); 13426 } 13427 13428 // CHECK-LABEL: @test_vcltzd_f64( 13429 // CHECK: [[TMP0:%.*]] = fcmp olt double %a, 0.000000e+00 13430 // CHECK: [[VCLTZ_I:%.*]] = sext i1 [[TMP0]] to i64 13431 // CHECK: ret i64 [[VCLTZ_I]] 13432 uint64_t test_vcltzd_f64(float64_t a) { 13433 return (uint64_t)vcltzd_f64(a); 13434 } 13435 13436 // CHECK-LABEL: @test_vcages_f32( 13437 // CHECK: [[VCAGES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %a, float %b) 13438 // CHECK: ret i32 [[VCAGES_F32_I]] 13439 uint32_t test_vcages_f32(float32_t a, float32_t b) { 13440 return (uint32_t)vcages_f32(a, b); 13441 } 13442 13443 // CHECK-LABEL: @test_vcaged_f64( 13444 // CHECK: [[VCAGED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %a, double %b) 13445 // CHECK: ret i64 [[VCAGED_F64_I]] 13446 uint64_t test_vcaged_f64(float64_t a, float64_t b) { 13447 return (uint64_t)vcaged_f64(a, b); 13448 } 13449 13450 // CHECK-LABEL: @test_vcagts_f32( 13451 // CHECK: [[VCAGTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %a, float %b) 13452 // CHECK: ret i32 [[VCAGTS_F32_I]] 13453 uint32_t test_vcagts_f32(float32_t a, float32_t b) { 13454 return (uint32_t)vcagts_f32(a, b); 13455 } 13456 13457 // CHECK-LABEL: @test_vcagtd_f64( 13458 // CHECK: [[VCAGTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %a, double %b) 13459 // CHECK: ret i64 [[VCAGTD_F64_I]] 13460 uint64_t test_vcagtd_f64(float64_t a, float64_t b) { 13461 return (uint64_t)vcagtd_f64(a, b); 13462 } 13463 13464 // CHECK-LABEL: @test_vcales_f32( 13465 // CHECK: [[VCALES_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facge.i32.f32(float %b, float %a) 13466 // CHECK: ret i32 [[VCALES_F32_I]] 13467 uint32_t test_vcales_f32(float32_t a, float32_t b) { 13468 return (uint32_t)vcales_f32(a, b); 13469 } 13470 13471 // CHECK-LABEL: @test_vcaled_f64( 13472 // CHECK: [[VCALED_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facge.i64.f64(double %b, double %a) 13473 // CHECK: ret i64 [[VCALED_F64_I]] 13474 uint64_t test_vcaled_f64(float64_t a, float64_t b) { 13475 return (uint64_t)vcaled_f64(a, b); 13476 } 13477 13478 // CHECK-LABEL: @test_vcalts_f32( 13479 // CHECK: [[VCALTS_F32_I:%.*]] = call i32 @llvm.aarch64.neon.facgt.i32.f32(float %b, float %a) 13480 // CHECK: ret i32 [[VCALTS_F32_I]] 13481 uint32_t test_vcalts_f32(float32_t a, float32_t b) { 13482 return (uint32_t)vcalts_f32(a, b); 13483 } 13484 13485 // CHECK-LABEL: @test_vcaltd_f64( 13486 // CHECK: [[VCALTD_F64_I:%.*]] = call i64 @llvm.aarch64.neon.facgt.i64.f64(double %b, double %a) 13487 // CHECK: ret i64 [[VCALTD_F64_I]] 13488 uint64_t test_vcaltd_f64(float64_t a, float64_t b) { 13489 return (uint64_t)vcaltd_f64(a, b); 13490 } 13491 13492 // CHECK-LABEL: @test_vshrd_n_s64( 13493 // CHECK: [[SHRD_N:%.*]] = ashr i64 %a, 1 13494 // CHECK: ret i64 [[SHRD_N]] 13495 int64_t test_vshrd_n_s64(int64_t a) { 13496 return (int64_t)vshrd_n_s64(a, 1); 13497 } 13498 13499 // CHECK-LABEL: @test_vshr_n_s64( 13500 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13501 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13502 // CHECK: [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], splat (i64 1) 13503 // CHECK: ret <1 x i64> [[VSHR_N]] 13504 int64x1_t test_vshr_n_s64(int64x1_t a) { 13505 return vshr_n_s64(a, 1); 13506 } 13507 13508 // CHECK-LABEL: @test_vshrd_n_u64( 13509 // CHECK: ret i64 0 13510 uint64_t test_vshrd_n_u64(uint64_t a) { 13511 return (uint64_t)vshrd_n_u64(a, 64); 13512 } 13513 13514 // CHECK-LABEL: @test_vshrd_n_u64_2( 13515 // CHECK: ret i64 0 13516 uint64_t test_vshrd_n_u64_2() { 13517 uint64_t a = UINT64_C(0xf000000000000000); 13518 return vshrd_n_u64(a, 64); 13519 } 13520 13521 // CHECK-LABEL: @test_vshr_n_u64( 13522 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13523 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13524 // CHECK: [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], splat (i64 1) 13525 // CHECK: ret <1 x i64> [[VSHR_N]] 13526 uint64x1_t test_vshr_n_u64(uint64x1_t a) { 13527 return vshr_n_u64(a, 1); 13528 } 13529 13530 // CHECK-LABEL: @test_vrshrd_n_s64( 13531 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %a, i64 -63) 13532 // CHECK: ret i64 [[VRSHR_N]] 13533 int64_t test_vrshrd_n_s64(int64_t a) { 13534 return (int64_t)vrshrd_n_s64(a, 63); 13535 } 13536 13537 // CHECK-LABEL: @test_vrshr_n_s64( 13538 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13539 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13540 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) 13541 // CHECK: ret <1 x i64> [[VRSHR_N1]] 13542 int64x1_t test_vrshr_n_s64(int64x1_t a) { 13543 return vrshr_n_s64(a, 1); 13544 } 13545 13546 // CHECK-LABEL: @test_vrshrd_n_u64( 13547 // CHECK: [[VRSHR_N:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %a, i64 -63) 13548 // CHECK: ret i64 [[VRSHR_N]] 13549 uint64_t test_vrshrd_n_u64(uint64_t a) { 13550 return (uint64_t)vrshrd_n_u64(a, 63); 13551 } 13552 13553 // CHECK-LABEL: @test_vrshr_n_u64( 13554 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13555 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13556 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) 13557 // CHECK: ret <1 x i64> [[VRSHR_N1]] 13558 uint64x1_t test_vrshr_n_u64(uint64x1_t a) { 13559 return vrshr_n_u64(a, 1); 13560 } 13561 13562 // CHECK-LABEL: @test_vsrad_n_s64( 13563 // CHECK: [[SHRD_N:%.*]] = ashr i64 %b, 63 13564 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 13565 // CHECK: ret i64 [[TMP0]] 13566 int64_t test_vsrad_n_s64(int64_t a, int64_t b) { 13567 return (int64_t)vsrad_n_s64(a, b, 63); 13568 } 13569 13570 // CHECK-LABEL: @test_vsra_n_s64( 13571 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13572 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13573 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13574 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13575 // CHECK: [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], splat (i64 1) 13576 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 13577 // CHECK: ret <1 x i64> [[TMP4]] 13578 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) { 13579 return vsra_n_s64(a, b, 1); 13580 } 13581 13582 // CHECK-LABEL: @test_vsrad_n_u64( 13583 // CHECK: [[SHRD_N:%.*]] = lshr i64 %b, 63 13584 // CHECK: [[TMP0:%.*]] = add i64 %a, [[SHRD_N]] 13585 // CHECK: ret i64 [[TMP0]] 13586 uint64_t test_vsrad_n_u64(uint64_t a, uint64_t b) { 13587 return (uint64_t)vsrad_n_u64(a, b, 63); 13588 } 13589 13590 // CHECK-LABEL: @test_vsrad_n_u64_2( 13591 // CHECK: ret i64 %a 13592 uint64_t test_vsrad_n_u64_2(uint64_t a, uint64_t b) { 13593 return (uint64_t)vsrad_n_u64(a, b, 64); 13594 } 13595 13596 // CHECK-LABEL: @test_vsra_n_u64( 13597 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13598 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13599 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13600 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13601 // CHECK: [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], splat (i64 1) 13602 // CHECK: [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]] 13603 // CHECK: ret <1 x i64> [[TMP4]] 13604 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) { 13605 return vsra_n_u64(a, b, 1); 13606 } 13607 13608 // CHECK-LABEL: @test_vrsrad_n_s64( 13609 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 %b, i64 -63) 13610 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 13611 // CHECK: ret i64 [[TMP1]] 13612 int64_t test_vrsrad_n_s64(int64_t a, int64_t b) { 13613 return (int64_t)vrsrad_n_s64(a, b, 63); 13614 } 13615 13616 // CHECK-LABEL: @test_vrsra_n_s64( 13617 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13618 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13619 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13620 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) 13621 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13622 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 13623 // CHECK: ret <1 x i64> [[TMP3]] 13624 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) { 13625 return vrsra_n_s64(a, b, 1); 13626 } 13627 13628 // CHECK-LABEL: @test_vrsrad_n_u64( 13629 // CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 %b, i64 -63) 13630 // CHECK: [[TMP1:%.*]] = add i64 %a, [[TMP0]] 13631 // CHECK: ret i64 [[TMP1]] 13632 uint64_t test_vrsrad_n_u64(uint64_t a, uint64_t b) { 13633 return (uint64_t)vrsrad_n_u64(a, b, 63); 13634 } 13635 13636 // CHECK-LABEL: @test_vrsra_n_u64( 13637 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13638 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13639 // CHECK: [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13640 // CHECK: [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> splat (i64 -1)) 13641 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13642 // CHECK: [[TMP3:%.*]] = add <1 x i64> [[TMP2]], [[VRSHR_N1]] 13643 // CHECK: ret <1 x i64> [[TMP3]] 13644 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) { 13645 return vrsra_n_u64(a, b, 1); 13646 } 13647 13648 // CHECK-LABEL: @test_vshld_n_s64( 13649 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 1 13650 // CHECK: ret i64 [[SHLD_N]] 13651 int64_t test_vshld_n_s64(int64_t a) { 13652 return (int64_t)vshld_n_s64(a, 1); 13653 } 13654 13655 // CHECK-LABEL: @test_vshl_n_s64( 13656 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13657 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13658 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) 13659 // CHECK: ret <1 x i64> [[VSHL_N]] 13660 int64x1_t test_vshl_n_s64(int64x1_t a) { 13661 return vshl_n_s64(a, 1); 13662 } 13663 13664 // CHECK-LABEL: @test_vshld_n_u64( 13665 // CHECK: [[SHLD_N:%.*]] = shl i64 %a, 63 13666 // CHECK: ret i64 [[SHLD_N]] 13667 uint64_t test_vshld_n_u64(uint64_t a) { 13668 return (uint64_t)vshld_n_u64(a, 63); 13669 } 13670 13671 // CHECK-LABEL: @test_vshl_n_u64( 13672 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13673 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13674 // CHECK: [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], splat (i64 1) 13675 // CHECK: ret <1 x i64> [[VSHL_N]] 13676 uint64x1_t test_vshl_n_u64(uint64x1_t a) { 13677 return vshl_n_u64(a, 1); 13678 } 13679 13680 // CHECK-LABEL: @test_vqshlb_n_s8( 13681 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13682 // CHECK: [[VQSHLB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>) 13683 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_S8]], i64 0 13684 // CHECK: ret i8 [[TMP1]] 13685 int8_t test_vqshlb_n_s8(int8_t a) { 13686 return (int8_t)vqshlb_n_s8(a, 7); 13687 } 13688 13689 // CHECK-LABEL: @test_vqshlh_n_s16( 13690 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13691 // CHECK: [[VQSHLH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>) 13692 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_S16]], i64 0 13693 // CHECK: ret i16 [[TMP1]] 13694 int16_t test_vqshlh_n_s16(int16_t a) { 13695 return (int16_t)vqshlh_n_s16(a, 15); 13696 } 13697 13698 // CHECK-LABEL: @test_vqshls_n_s32( 13699 // CHECK: [[VQSHLS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshl.i32(i32 %a, i32 31) 13700 // CHECK: ret i32 [[VQSHLS_N_S32]] 13701 int32_t test_vqshls_n_s32(int32_t a) { 13702 return (int32_t)vqshls_n_s32(a, 31); 13703 } 13704 13705 // CHECK-LABEL: @test_vqshld_n_s64( 13706 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %a, i64 63) 13707 // CHECK: ret i64 [[VQSHL_N]] 13708 int64_t test_vqshld_n_s64(int64_t a) { 13709 return (int64_t)vqshld_n_s64(a, 63); 13710 } 13711 13712 // CHECK-LABEL: @test_vqshl_n_s8( 13713 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 13714 // CHECK: ret <8 x i8> [[VQSHL_N]] 13715 int8x8_t test_vqshl_n_s8(int8x8_t a) { 13716 return vqshl_n_s8(a, 0); 13717 } 13718 13719 // CHECK-LABEL: @test_vqshlq_n_s8( 13720 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 13721 // CHECK: ret <16 x i8> [[VQSHL_N]] 13722 int8x16_t test_vqshlq_n_s8(int8x16_t a) { 13723 return vqshlq_n_s8(a, 0); 13724 } 13725 13726 // CHECK-LABEL: @test_vqshl_n_s16( 13727 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13728 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13729 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 13730 // CHECK: ret <4 x i16> [[VQSHL_N1]] 13731 int16x4_t test_vqshl_n_s16(int16x4_t a) { 13732 return vqshl_n_s16(a, 0); 13733 } 13734 13735 // CHECK-LABEL: @test_vqshlq_n_s16( 13736 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13737 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13738 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 13739 // CHECK: ret <8 x i16> [[VQSHL_N1]] 13740 int16x8_t test_vqshlq_n_s16(int16x8_t a) { 13741 return vqshlq_n_s16(a, 0); 13742 } 13743 13744 // CHECK-LABEL: @test_vqshl_n_s32( 13745 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13746 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13747 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 13748 // CHECK: ret <2 x i32> [[VQSHL_N1]] 13749 int32x2_t test_vqshl_n_s32(int32x2_t a) { 13750 return vqshl_n_s32(a, 0); 13751 } 13752 13753 // CHECK-LABEL: @test_vqshlq_n_s32( 13754 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13755 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13756 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 13757 // CHECK: ret <4 x i32> [[VQSHL_N1]] 13758 int32x4_t test_vqshlq_n_s32(int32x4_t a) { 13759 return vqshlq_n_s32(a, 0); 13760 } 13761 13762 // CHECK-LABEL: @test_vqshlq_n_s64( 13763 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13764 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13765 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 13766 // CHECK: ret <2 x i64> [[VQSHL_N1]] 13767 int64x2_t test_vqshlq_n_s64(int64x2_t a) { 13768 return vqshlq_n_s64(a, 0); 13769 } 13770 13771 // CHECK-LABEL: @test_vqshl_n_u8( 13772 // CHECK: [[VQSHL_N:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %a, <8 x i8> zeroinitializer) 13773 // CHECK: ret <8 x i8> [[VQSHL_N]] 13774 uint8x8_t test_vqshl_n_u8(uint8x8_t a) { 13775 return vqshl_n_u8(a, 0); 13776 } 13777 13778 // CHECK-LABEL: @test_vqshlq_n_u8( 13779 // CHECK: [[VQSHL_N:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %a, <16 x i8> zeroinitializer) 13780 // CHECK: ret <16 x i8> [[VQSHL_N]] 13781 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) { 13782 return vqshlq_n_u8(a, 0); 13783 } 13784 13785 // CHECK-LABEL: @test_vqshl_n_u16( 13786 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 13787 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> 13788 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> zeroinitializer) 13789 // CHECK: ret <4 x i16> [[VQSHL_N1]] 13790 uint16x4_t test_vqshl_n_u16(uint16x4_t a) { 13791 return vqshl_n_u16(a, 0); 13792 } 13793 13794 // CHECK-LABEL: @test_vqshlq_n_u16( 13795 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 13796 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> 13797 // CHECK: [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> zeroinitializer) 13798 // CHECK: ret <8 x i16> [[VQSHL_N1]] 13799 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) { 13800 return vqshlq_n_u16(a, 0); 13801 } 13802 13803 // CHECK-LABEL: @test_vqshl_n_u32( 13804 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 13805 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> 13806 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> zeroinitializer) 13807 // CHECK: ret <2 x i32> [[VQSHL_N1]] 13808 uint32x2_t test_vqshl_n_u32(uint32x2_t a) { 13809 return vqshl_n_u32(a, 0); 13810 } 13811 13812 // CHECK-LABEL: @test_vqshlq_n_u32( 13813 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 13814 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> 13815 // CHECK: [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> zeroinitializer) 13816 // CHECK: ret <4 x i32> [[VQSHL_N1]] 13817 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) { 13818 return vqshlq_n_u32(a, 0); 13819 } 13820 13821 // CHECK-LABEL: @test_vqshlq_n_u64( 13822 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 13823 // CHECK: [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> 13824 // CHECK: [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> zeroinitializer) 13825 // CHECK: ret <2 x i64> [[VQSHL_N1]] 13826 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) { 13827 return vqshlq_n_u64(a, 0); 13828 } 13829 13830 // CHECK-LABEL: @test_vqshl_n_s64( 13831 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13832 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13833 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) 13834 // CHECK: ret <1 x i64> [[VQSHL_N1]] 13835 int64x1_t test_vqshl_n_s64(int64x1_t a) { 13836 return vqshl_n_s64(a, 1); 13837 } 13838 13839 // CHECK-LABEL: @test_vqshlb_n_u8( 13840 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13841 // CHECK: [[VQSHLB_N_U8:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>) 13842 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLB_N_U8]], i64 0 13843 // CHECK: ret i8 [[TMP1]] 13844 uint8_t test_vqshlb_n_u8(uint8_t a) { 13845 return (uint8_t)vqshlb_n_u8(a, 7); 13846 } 13847 13848 // CHECK-LABEL: @test_vqshlh_n_u16( 13849 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13850 // CHECK: [[VQSHLH_N_U16:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>) 13851 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLH_N_U16]], i64 0 13852 // CHECK: ret i16 [[TMP1]] 13853 uint16_t test_vqshlh_n_u16(uint16_t a) { 13854 return (uint16_t)vqshlh_n_u16(a, 15); 13855 } 13856 13857 // CHECK-LABEL: @test_vqshls_n_u32( 13858 // CHECK: [[VQSHLS_N_U32:%.*]] = call i32 @llvm.aarch64.neon.uqshl.i32(i32 %a, i32 31) 13859 // CHECK: ret i32 [[VQSHLS_N_U32]] 13860 uint32_t test_vqshls_n_u32(uint32_t a) { 13861 return (uint32_t)vqshls_n_u32(a, 31); 13862 } 13863 13864 // CHECK-LABEL: @test_vqshld_n_u64( 13865 // CHECK: [[VQSHL_N:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %a, i64 63) 13866 // CHECK: ret i64 [[VQSHL_N]] 13867 uint64_t test_vqshld_n_u64(uint64_t a) { 13868 return (uint64_t)vqshld_n_u64(a, 63); 13869 } 13870 13871 // CHECK-LABEL: @test_vqshl_n_u64( 13872 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13873 // CHECK: [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13874 // CHECK: [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> splat (i64 1)) 13875 // CHECK: ret <1 x i64> [[VQSHL_N1]] 13876 uint64x1_t test_vqshl_n_u64(uint64x1_t a) { 13877 return vqshl_n_u64(a, 1); 13878 } 13879 13880 // CHECK-LABEL: @test_vqshlub_n_s8( 13881 // CHECK: [[TMP0:%.*]] = insertelement <8 x i8> poison, i8 %a, i64 0 13882 // CHECK: [[VQSHLUB_N_S8:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP0]], <8 x i8> <i8 7, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>) 13883 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHLUB_N_S8]], i64 0 13884 // CHECK: ret i8 [[TMP1]] 13885 int8_t test_vqshlub_n_s8(int8_t a) { 13886 return (int8_t)vqshlub_n_s8(a, 7); 13887 } 13888 13889 // CHECK-LABEL: @test_vqshluh_n_s16( 13890 // CHECK: [[TMP0:%.*]] = insertelement <4 x i16> poison, i16 %a, i64 0 13891 // CHECK: [[VQSHLUH_N_S16:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP0]], <4 x i16> <i16 15, i16 poison, i16 poison, i16 poison>) 13892 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHLUH_N_S16]], i64 0 13893 // CHECK: ret i16 [[TMP1]] 13894 int16_t test_vqshluh_n_s16(int16_t a) { 13895 return (int16_t)vqshluh_n_s16(a, 15); 13896 } 13897 13898 // CHECK-LABEL: @test_vqshlus_n_s32( 13899 // CHECK: [[VQSHLUS_N_S32:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %a, i32 31) 13900 // CHECK: ret i32 [[VQSHLUS_N_S32]] 13901 int32_t test_vqshlus_n_s32(int32_t a) { 13902 return (int32_t)vqshlus_n_s32(a, 31); 13903 } 13904 13905 // CHECK-LABEL: @test_vqshlud_n_s64( 13906 // CHECK: [[VQSHLU_N:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %a, i64 63) 13907 // CHECK: ret i64 [[VQSHLU_N]] 13908 int64_t test_vqshlud_n_s64(int64_t a) { 13909 return (int64_t)vqshlud_n_s64(a, 63); 13910 } 13911 13912 // CHECK-LABEL: @test_vqshlu_n_s64( 13913 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13914 // CHECK: [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13915 // CHECK: [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> splat (i64 1)) 13916 // CHECK: ret <1 x i64> [[VQSHLU_N1]] 13917 uint64x1_t test_vqshlu_n_s64(int64x1_t a) { 13918 return vqshlu_n_s64(a, 1); 13919 } 13920 13921 // CHECK-LABEL: @test_vsrid_n_s64( 13922 // CHECK: [[VSRID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 13923 // CHECK: [[VSRID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 13924 // CHECK: [[VSRID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_S64]], <1 x i64> [[VSRID_N_S641]], i32 63) 13925 // CHECK: [[VSRID_N_S643:%.*]] = bitcast <1 x i64> [[VSRID_N_S642]] to i64 13926 // CHECK: ret i64 [[VSRID_N_S643]] 13927 int64_t test_vsrid_n_s64(int64_t a, int64_t b) { 13928 return (int64_t)vsrid_n_s64(a, b, 63); 13929 } 13930 13931 // CHECK-LABEL: @test_vsri_n_s64( 13932 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13933 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13934 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13935 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13936 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 13937 // CHECK: ret <1 x i64> [[VSRI_N2]] 13938 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) { 13939 return vsri_n_s64(a, b, 1); 13940 } 13941 13942 // CHECK-LABEL: @test_vsrid_n_u64( 13943 // CHECK: [[VSRID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 13944 // CHECK: [[VSRID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 13945 // CHECK: [[VSRID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRID_N_U64]], <1 x i64> [[VSRID_N_U641]], i32 63) 13946 // CHECK: [[VSRID_N_U643:%.*]] = bitcast <1 x i64> [[VSRID_N_U642]] to i64 13947 // CHECK: ret i64 [[VSRID_N_U643]] 13948 uint64_t test_vsrid_n_u64(uint64_t a, uint64_t b) { 13949 return (uint64_t)vsrid_n_u64(a, b, 63); 13950 } 13951 13952 // CHECK-LABEL: @test_vsri_n_u64( 13953 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13954 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13955 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13956 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13957 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 1) 13958 // CHECK: ret <1 x i64> [[VSRI_N2]] 13959 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) { 13960 return vsri_n_u64(a, b, 1); 13961 } 13962 13963 // CHECK-LABEL: @test_vslid_n_s64( 13964 // CHECK: [[VSLID_N_S64:%.*]] = bitcast i64 %a to <1 x i64> 13965 // CHECK: [[VSLID_N_S641:%.*]] = bitcast i64 %b to <1 x i64> 13966 // CHECK: [[VSLID_N_S642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_S64]], <1 x i64> [[VSLID_N_S641]], i32 63) 13967 // CHECK: [[VSLID_N_S643:%.*]] = bitcast <1 x i64> [[VSLID_N_S642]] to i64 13968 // CHECK: ret i64 [[VSLID_N_S643]] 13969 int64_t test_vslid_n_s64(int64_t a, int64_t b) { 13970 return (int64_t)vslid_n_s64(a, b, 63); 13971 } 13972 13973 // CHECK-LABEL: @test_vsli_n_s64( 13974 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13975 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13976 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13977 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13978 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 13979 // CHECK: ret <1 x i64> [[VSLI_N2]] 13980 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) { 13981 return vsli_n_s64(a, b, 1); 13982 } 13983 13984 // CHECK-LABEL: @test_vslid_n_u64( 13985 // CHECK: [[VSLID_N_U64:%.*]] = bitcast i64 %a to <1 x i64> 13986 // CHECK: [[VSLID_N_U641:%.*]] = bitcast i64 %b to <1 x i64> 13987 // CHECK: [[VSLID_N_U642:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLID_N_U64]], <1 x i64> [[VSLID_N_U641]], i32 63) 13988 // CHECK: [[VSLID_N_U643:%.*]] = bitcast <1 x i64> [[VSLID_N_U642]] to i64 13989 // CHECK: ret i64 [[VSLID_N_U643]] 13990 uint64_t test_vslid_n_u64(uint64_t a, uint64_t b) { 13991 return (uint64_t)vslid_n_u64(a, b, 63); 13992 } 13993 13994 // CHECK-LABEL: @test_vsli_n_u64( 13995 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 13996 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 13997 // CHECK: [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 13998 // CHECK: [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> 13999 // CHECK: [[VSLI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], i32 1) 14000 // CHECK: ret <1 x i64> [[VSLI_N2]] 14001 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) { 14002 return vsli_n_u64(a, b, 1); 14003 } 14004 14005 // CHECK-LABEL: @test_vqshrnh_n_s16( 14006 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14007 // CHECK: [[VQSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 14008 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_S16]], i64 0 14009 // CHECK: ret i8 [[TMP1]] 14010 int8_t test_vqshrnh_n_s16(int16_t a) { 14011 return (int8_t)vqshrnh_n_s16(a, 8); 14012 } 14013 14014 // CHECK-LABEL: @test_vqshrns_n_s32( 14015 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14016 // CHECK: [[VQSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 14017 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_S32]], i64 0 14018 // CHECK: ret i16 [[TMP1]] 14019 int16_t test_vqshrns_n_s32(int32_t a) { 14020 return (int16_t)vqshrns_n_s32(a, 16); 14021 } 14022 14023 // CHECK-LABEL: @test_vqshrnd_n_s64( 14024 // CHECK: [[VQSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %a, i32 32) 14025 // CHECK: ret i32 [[VQSHRND_N_S64]] 14026 int32_t test_vqshrnd_n_s64(int64_t a) { 14027 return (int32_t)vqshrnd_n_s64(a, 32); 14028 } 14029 14030 // CHECK-LABEL: @test_vqshrnh_n_u16( 14031 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14032 // CHECK: [[VQSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 14033 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRNH_N_U16]], i64 0 14034 // CHECK: ret i8 [[TMP1]] 14035 uint8_t test_vqshrnh_n_u16(uint16_t a) { 14036 return (uint8_t)vqshrnh_n_u16(a, 8); 14037 } 14038 14039 // CHECK-LABEL: @test_vqshrns_n_u32( 14040 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14041 // CHECK: [[VQSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 14042 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRNS_N_U32]], i64 0 14043 // CHECK: ret i16 [[TMP1]] 14044 uint16_t test_vqshrns_n_u32(uint32_t a) { 14045 return (uint16_t)vqshrns_n_u32(a, 16); 14046 } 14047 14048 // CHECK-LABEL: @test_vqshrnd_n_u64( 14049 // CHECK: [[VQSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %a, i32 32) 14050 // CHECK: ret i32 [[VQSHRND_N_U64]] 14051 uint32_t test_vqshrnd_n_u64(uint64_t a) { 14052 return (uint32_t)vqshrnd_n_u64(a, 32); 14053 } 14054 14055 // CHECK-LABEL: @test_vqrshrnh_n_s16( 14056 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14057 // CHECK: [[VQRSHRNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 14058 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_S16]], i64 0 14059 // CHECK: ret i8 [[TMP1]] 14060 int8_t test_vqrshrnh_n_s16(int16_t a) { 14061 return (int8_t)vqrshrnh_n_s16(a, 8); 14062 } 14063 14064 // CHECK-LABEL: @test_vqrshrns_n_s32( 14065 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14066 // CHECK: [[VQRSHRNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 14067 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_S32]], i64 0 14068 // CHECK: ret i16 [[TMP1]] 14069 int16_t test_vqrshrns_n_s32(int32_t a) { 14070 return (int16_t)vqrshrns_n_s32(a, 16); 14071 } 14072 14073 // CHECK-LABEL: @test_vqrshrnd_n_s64( 14074 // CHECK: [[VQRSHRND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %a, i32 32) 14075 // CHECK: ret i32 [[VQRSHRND_N_S64]] 14076 int32_t test_vqrshrnd_n_s64(int64_t a) { 14077 return (int32_t)vqrshrnd_n_s64(a, 32); 14078 } 14079 14080 // CHECK-LABEL: @test_vqrshrnh_n_u16( 14081 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14082 // CHECK: [[VQRSHRNH_N_U16:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP0]], i32 8) 14083 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRNH_N_U16]], i64 0 14084 // CHECK: ret i8 [[TMP1]] 14085 uint8_t test_vqrshrnh_n_u16(uint16_t a) { 14086 return (uint8_t)vqrshrnh_n_u16(a, 8); 14087 } 14088 14089 // CHECK-LABEL: @test_vqrshrns_n_u32( 14090 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14091 // CHECK: [[VQRSHRNS_N_U32:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP0]], i32 16) 14092 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRNS_N_U32]], i64 0 14093 // CHECK: ret i16 [[TMP1]] 14094 uint16_t test_vqrshrns_n_u32(uint32_t a) { 14095 return (uint16_t)vqrshrns_n_u32(a, 16); 14096 } 14097 14098 // CHECK-LABEL: @test_vqrshrnd_n_u64( 14099 // CHECK: [[VQRSHRND_N_U64:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %a, i32 32) 14100 // CHECK: ret i32 [[VQRSHRND_N_U64]] 14101 uint32_t test_vqrshrnd_n_u64(uint64_t a) { 14102 return (uint32_t)vqrshrnd_n_u64(a, 32); 14103 } 14104 14105 // CHECK-LABEL: @test_vqshrunh_n_s16( 14106 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14107 // CHECK: [[VQSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 14108 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQSHRUNH_N_S16]], i64 0 14109 // CHECK: ret i8 [[TMP1]] 14110 int8_t test_vqshrunh_n_s16(int16_t a) { 14111 return (int8_t)vqshrunh_n_s16(a, 8); 14112 } 14113 14114 // CHECK-LABEL: @test_vqshruns_n_s32( 14115 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14116 // CHECK: [[VQSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 14117 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQSHRUNS_N_S32]], i64 0 14118 // CHECK: ret i16 [[TMP1]] 14119 int16_t test_vqshruns_n_s32(int32_t a) { 14120 return (int16_t)vqshruns_n_s32(a, 16); 14121 } 14122 14123 // CHECK-LABEL: @test_vqshrund_n_s64( 14124 // CHECK: [[VQSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %a, i32 32) 14125 // CHECK: ret i32 [[VQSHRUND_N_S64]] 14126 int32_t test_vqshrund_n_s64(int64_t a) { 14127 return (int32_t)vqshrund_n_s64(a, 32); 14128 } 14129 14130 // CHECK-LABEL: @test_vqrshrunh_n_s16( 14131 // CHECK: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 %a, i64 0 14132 // CHECK: [[VQRSHRUNH_N_S16:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP0]], i32 8) 14133 // CHECK: [[TMP1:%.*]] = extractelement <8 x i8> [[VQRSHRUNH_N_S16]], i64 0 14134 // CHECK: ret i8 [[TMP1]] 14135 uint8_t test_vqrshrunh_n_s16(int16_t a) { 14136 return (uint8_t)vqrshrunh_n_s16(a, 8); 14137 } 14138 14139 // CHECK-LABEL: @test_vqrshruns_n_s32( 14140 // CHECK: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 %a, i64 0 14141 // CHECK: [[VQRSHRUNS_N_S32:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP0]], i32 16) 14142 // CHECK: [[TMP1:%.*]] = extractelement <4 x i16> [[VQRSHRUNS_N_S32]], i64 0 14143 // CHECK: ret i16 [[TMP1]] 14144 uint16_t test_vqrshruns_n_s32(int32_t a) { 14145 return (uint16_t)vqrshruns_n_s32(a, 16); 14146 } 14147 14148 // CHECK-LABEL: @test_vqrshrund_n_s64( 14149 // CHECK: [[VQRSHRUND_N_S64:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %a, i32 32) 14150 // CHECK: ret i32 [[VQRSHRUND_N_S64]] 14151 uint32_t test_vqrshrund_n_s64(int64_t a) { 14152 return (uint32_t)vqrshrund_n_s64(a, 32); 14153 } 14154 14155 // CHECK-LABEL: @test_vcvts_n_f32_s32( 14156 // CHECK: [[VCVTS_N_F32_S32:%.*]] = call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %a, i32 1) 14157 // CHECK: ret float [[VCVTS_N_F32_S32]] 14158 float32_t test_vcvts_n_f32_s32(int32_t a) { 14159 return vcvts_n_f32_s32(a, 1); 14160 } 14161 14162 // CHECK-LABEL: @test_vcvtd_n_f64_s64( 14163 // CHECK: [[VCVTD_N_F64_S64:%.*]] = call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %a, i32 1) 14164 // CHECK: ret double [[VCVTD_N_F64_S64]] 14165 float64_t test_vcvtd_n_f64_s64(int64_t a) { 14166 return vcvtd_n_f64_s64(a, 1); 14167 } 14168 14169 // CHECK-LABEL: @test_vcvts_n_f32_u32( 14170 // CHECK: [[VCVTS_N_F32_U32:%.*]] = call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %a, i32 32) 14171 // CHECK: ret float [[VCVTS_N_F32_U32]] 14172 float32_t test_vcvts_n_f32_u32(uint32_t a) { 14173 return vcvts_n_f32_u32(a, 32); 14174 } 14175 14176 // CHECK-LABEL: @test_vcvtd_n_f64_u64( 14177 // CHECK: [[VCVTD_N_F64_U64:%.*]] = call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %a, i32 64) 14178 // CHECK: ret double [[VCVTD_N_F64_U64]] 14179 float64_t test_vcvtd_n_f64_u64(uint64_t a) { 14180 return vcvtd_n_f64_u64(a, 64); 14181 } 14182 14183 // CHECK-LABEL: @test_vcvts_n_s32_f32( 14184 // CHECK: [[VCVTS_N_S32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %a, i32 1) 14185 // CHECK: ret i32 [[VCVTS_N_S32_F32]] 14186 int32_t test_vcvts_n_s32_f32(float32_t a) { 14187 return (int32_t)vcvts_n_s32_f32(a, 1); 14188 } 14189 14190 // CHECK-LABEL: @test_vcvtd_n_s64_f64( 14191 // CHECK: [[VCVTD_N_S64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %a, i32 1) 14192 // CHECK: ret i64 [[VCVTD_N_S64_F64]] 14193 int64_t test_vcvtd_n_s64_f64(float64_t a) { 14194 return (int64_t)vcvtd_n_s64_f64(a, 1); 14195 } 14196 14197 // CHECK-LABEL: @test_vcvts_n_u32_f32( 14198 // CHECK: [[VCVTS_N_U32_F32:%.*]] = call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %a, i32 32) 14199 // CHECK: ret i32 [[VCVTS_N_U32_F32]] 14200 uint32_t test_vcvts_n_u32_f32(float32_t a) { 14201 return (uint32_t)vcvts_n_u32_f32(a, 32); 14202 } 14203 14204 // CHECK-LABEL: @test_vcvtd_n_u64_f64( 14205 // CHECK: [[VCVTD_N_U64_F64:%.*]] = call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %a, i32 64) 14206 // CHECK: ret i64 [[VCVTD_N_U64_F64]] 14207 uint64_t test_vcvtd_n_u64_f64(float64_t a) { 14208 return (uint64_t)vcvtd_n_u64_f64(a, 64); 14209 } 14210 14211 // CHECK-LABEL: @test_vreinterpret_s8_s16( 14212 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14213 // CHECK: ret <8 x i8> [[TMP0]] 14214 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) { 14215 return vreinterpret_s8_s16(a); 14216 } 14217 14218 // CHECK-LABEL: @test_vreinterpret_s8_s32( 14219 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14220 // CHECK: ret <8 x i8> [[TMP0]] 14221 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) { 14222 return vreinterpret_s8_s32(a); 14223 } 14224 14225 // CHECK-LABEL: @test_vreinterpret_s8_s64( 14226 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14227 // CHECK: ret <8 x i8> [[TMP0]] 14228 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) { 14229 return vreinterpret_s8_s64(a); 14230 } 14231 14232 // CHECK-LABEL: @test_vreinterpret_s8_u8( 14233 // CHECK: ret <8 x i8> %a 14234 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) { 14235 return vreinterpret_s8_u8(a); 14236 } 14237 14238 // CHECK-LABEL: @test_vreinterpret_s8_u16( 14239 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14240 // CHECK: ret <8 x i8> [[TMP0]] 14241 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) { 14242 return vreinterpret_s8_u16(a); 14243 } 14244 14245 // CHECK-LABEL: @test_vreinterpret_s8_u32( 14246 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14247 // CHECK: ret <8 x i8> [[TMP0]] 14248 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) { 14249 return vreinterpret_s8_u32(a); 14250 } 14251 14252 // CHECK-LABEL: @test_vreinterpret_s8_u64( 14253 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14254 // CHECK: ret <8 x i8> [[TMP0]] 14255 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) { 14256 return vreinterpret_s8_u64(a); 14257 } 14258 14259 // CHECK-LABEL: @test_vreinterpret_s8_f16( 14260 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14261 // CHECK: ret <8 x i8> [[TMP0]] 14262 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) { 14263 return vreinterpret_s8_f16(a); 14264 } 14265 14266 // CHECK-LABEL: @test_vreinterpret_s8_f32( 14267 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14268 // CHECK: ret <8 x i8> [[TMP0]] 14269 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) { 14270 return vreinterpret_s8_f32(a); 14271 } 14272 14273 // CHECK-LABEL: @test_vreinterpret_s8_f64( 14274 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 14275 // CHECK: ret <8 x i8> [[TMP0]] 14276 int8x8_t test_vreinterpret_s8_f64(float64x1_t a) { 14277 return vreinterpret_s8_f64(a); 14278 } 14279 14280 // CHECK-LABEL: @test_vreinterpret_s8_p8( 14281 // CHECK: ret <8 x i8> %a 14282 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) { 14283 return vreinterpret_s8_p8(a); 14284 } 14285 14286 // CHECK-LABEL: @test_vreinterpret_s8_p16( 14287 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14288 // CHECK: ret <8 x i8> [[TMP0]] 14289 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) { 14290 return vreinterpret_s8_p16(a); 14291 } 14292 14293 // CHECK-LABEL: @test_vreinterpret_s8_p64( 14294 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14295 // CHECK: ret <8 x i8> [[TMP0]] 14296 int8x8_t test_vreinterpret_s8_p64(poly64x1_t a) { 14297 return vreinterpret_s8_p64(a); 14298 } 14299 14300 // CHECK-LABEL: @test_vreinterpret_s16_s8( 14301 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14302 // CHECK: ret <4 x i16> [[TMP0]] 14303 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) { 14304 return vreinterpret_s16_s8(a); 14305 } 14306 14307 // CHECK-LABEL: @test_vreinterpret_s16_s32( 14308 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14309 // CHECK: ret <4 x i16> [[TMP0]] 14310 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) { 14311 return vreinterpret_s16_s32(a); 14312 } 14313 14314 // CHECK-LABEL: @test_vreinterpret_s16_s64( 14315 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14316 // CHECK: ret <4 x i16> [[TMP0]] 14317 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) { 14318 return vreinterpret_s16_s64(a); 14319 } 14320 14321 // CHECK-LABEL: @test_vreinterpret_s16_u8( 14322 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14323 // CHECK: ret <4 x i16> [[TMP0]] 14324 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) { 14325 return vreinterpret_s16_u8(a); 14326 } 14327 14328 // CHECK-LABEL: @test_vreinterpret_s16_u16( 14329 // CHECK: ret <4 x i16> %a 14330 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) { 14331 return vreinterpret_s16_u16(a); 14332 } 14333 14334 // CHECK-LABEL: @test_vreinterpret_s16_u32( 14335 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14336 // CHECK: ret <4 x i16> [[TMP0]] 14337 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) { 14338 return vreinterpret_s16_u32(a); 14339 } 14340 14341 // CHECK-LABEL: @test_vreinterpret_s16_u64( 14342 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14343 // CHECK: ret <4 x i16> [[TMP0]] 14344 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) { 14345 return vreinterpret_s16_u64(a); 14346 } 14347 14348 // CHECK-LABEL: @test_vreinterpret_s16_f16( 14349 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14350 // CHECK: ret <4 x i16> [[TMP0]] 14351 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) { 14352 return vreinterpret_s16_f16(a); 14353 } 14354 14355 // CHECK-LABEL: @test_vreinterpret_s16_f32( 14356 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14357 // CHECK: ret <4 x i16> [[TMP0]] 14358 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) { 14359 return vreinterpret_s16_f32(a); 14360 } 14361 14362 // CHECK-LABEL: @test_vreinterpret_s16_f64( 14363 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 14364 // CHECK: ret <4 x i16> [[TMP0]] 14365 int16x4_t test_vreinterpret_s16_f64(float64x1_t a) { 14366 return vreinterpret_s16_f64(a); 14367 } 14368 14369 // CHECK-LABEL: @test_vreinterpret_s16_p8( 14370 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14371 // CHECK: ret <4 x i16> [[TMP0]] 14372 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) { 14373 return vreinterpret_s16_p8(a); 14374 } 14375 14376 // CHECK-LABEL: @test_vreinterpret_s16_p16( 14377 // CHECK: ret <4 x i16> %a 14378 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) { 14379 return vreinterpret_s16_p16(a); 14380 } 14381 14382 // CHECK-LABEL: @test_vreinterpret_s16_p64( 14383 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14384 // CHECK: ret <4 x i16> [[TMP0]] 14385 int16x4_t test_vreinterpret_s16_p64(poly64x1_t a) { 14386 return vreinterpret_s16_p64(a); 14387 } 14388 14389 // CHECK-LABEL: @test_vreinterpret_s32_s8( 14390 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14391 // CHECK: ret <2 x i32> [[TMP0]] 14392 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) { 14393 return vreinterpret_s32_s8(a); 14394 } 14395 14396 // CHECK-LABEL: @test_vreinterpret_s32_s16( 14397 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14398 // CHECK: ret <2 x i32> [[TMP0]] 14399 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) { 14400 return vreinterpret_s32_s16(a); 14401 } 14402 14403 // CHECK-LABEL: @test_vreinterpret_s32_s64( 14404 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14405 // CHECK: ret <2 x i32> [[TMP0]] 14406 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) { 14407 return vreinterpret_s32_s64(a); 14408 } 14409 14410 // CHECK-LABEL: @test_vreinterpret_s32_u8( 14411 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14412 // CHECK: ret <2 x i32> [[TMP0]] 14413 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) { 14414 return vreinterpret_s32_u8(a); 14415 } 14416 14417 // CHECK-LABEL: @test_vreinterpret_s32_u16( 14418 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14419 // CHECK: ret <2 x i32> [[TMP0]] 14420 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) { 14421 return vreinterpret_s32_u16(a); 14422 } 14423 14424 // CHECK-LABEL: @test_vreinterpret_s32_u32( 14425 // CHECK: ret <2 x i32> %a 14426 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) { 14427 return vreinterpret_s32_u32(a); 14428 } 14429 14430 // CHECK-LABEL: @test_vreinterpret_s32_u64( 14431 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14432 // CHECK: ret <2 x i32> [[TMP0]] 14433 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) { 14434 return vreinterpret_s32_u64(a); 14435 } 14436 14437 // CHECK-LABEL: @test_vreinterpret_s32_f16( 14438 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 14439 // CHECK: ret <2 x i32> [[TMP0]] 14440 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) { 14441 return vreinterpret_s32_f16(a); 14442 } 14443 14444 // CHECK-LABEL: @test_vreinterpret_s32_f32( 14445 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 14446 // CHECK: ret <2 x i32> [[TMP0]] 14447 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) { 14448 return vreinterpret_s32_f32(a); 14449 } 14450 14451 // CHECK-LABEL: @test_vreinterpret_s32_f64( 14452 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 14453 // CHECK: ret <2 x i32> [[TMP0]] 14454 int32x2_t test_vreinterpret_s32_f64(float64x1_t a) { 14455 return vreinterpret_s32_f64(a); 14456 } 14457 14458 // CHECK-LABEL: @test_vreinterpret_s32_p8( 14459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14460 // CHECK: ret <2 x i32> [[TMP0]] 14461 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) { 14462 return vreinterpret_s32_p8(a); 14463 } 14464 14465 // CHECK-LABEL: @test_vreinterpret_s32_p16( 14466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14467 // CHECK: ret <2 x i32> [[TMP0]] 14468 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) { 14469 return vreinterpret_s32_p16(a); 14470 } 14471 14472 // CHECK-LABEL: @test_vreinterpret_s32_p64( 14473 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14474 // CHECK: ret <2 x i32> [[TMP0]] 14475 int32x2_t test_vreinterpret_s32_p64(poly64x1_t a) { 14476 return vreinterpret_s32_p64(a); 14477 } 14478 14479 // CHECK-LABEL: @test_vreinterpret_s64_s8( 14480 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14481 // CHECK: ret <1 x i64> [[TMP0]] 14482 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) { 14483 return vreinterpret_s64_s8(a); 14484 } 14485 14486 // CHECK-LABEL: @test_vreinterpret_s64_s16( 14487 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14488 // CHECK: ret <1 x i64> [[TMP0]] 14489 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) { 14490 return vreinterpret_s64_s16(a); 14491 } 14492 14493 // CHECK-LABEL: @test_vreinterpret_s64_s32( 14494 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14495 // CHECK: ret <1 x i64> [[TMP0]] 14496 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) { 14497 return vreinterpret_s64_s32(a); 14498 } 14499 14500 // CHECK-LABEL: @test_vreinterpret_s64_u8( 14501 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14502 // CHECK: ret <1 x i64> [[TMP0]] 14503 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) { 14504 return vreinterpret_s64_u8(a); 14505 } 14506 14507 // CHECK-LABEL: @test_vreinterpret_s64_u16( 14508 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14509 // CHECK: ret <1 x i64> [[TMP0]] 14510 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) { 14511 return vreinterpret_s64_u16(a); 14512 } 14513 14514 // CHECK-LABEL: @test_vreinterpret_s64_u32( 14515 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14516 // CHECK: ret <1 x i64> [[TMP0]] 14517 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) { 14518 return vreinterpret_s64_u32(a); 14519 } 14520 14521 // CHECK-LABEL: @test_vreinterpret_s64_u64( 14522 // CHECK: ret <1 x i64> %a 14523 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) { 14524 return vreinterpret_s64_u64(a); 14525 } 14526 14527 // CHECK-LABEL: @test_vreinterpret_s64_f16( 14528 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 14529 // CHECK: ret <1 x i64> [[TMP0]] 14530 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) { 14531 return vreinterpret_s64_f16(a); 14532 } 14533 14534 // CHECK-LABEL: @test_vreinterpret_s64_f32( 14535 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 14536 // CHECK: ret <1 x i64> [[TMP0]] 14537 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) { 14538 return vreinterpret_s64_f32(a); 14539 } 14540 14541 // CHECK-LABEL: @test_vreinterpret_s64_f64( 14542 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 14543 // CHECK: ret <1 x i64> [[TMP0]] 14544 int64x1_t test_vreinterpret_s64_f64(float64x1_t a) { 14545 return vreinterpret_s64_f64(a); 14546 } 14547 14548 // CHECK-LABEL: @test_vreinterpret_s64_p8( 14549 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14550 // CHECK: ret <1 x i64> [[TMP0]] 14551 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) { 14552 return vreinterpret_s64_p8(a); 14553 } 14554 14555 // CHECK-LABEL: @test_vreinterpret_s64_p16( 14556 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14557 // CHECK: ret <1 x i64> [[TMP0]] 14558 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) { 14559 return vreinterpret_s64_p16(a); 14560 } 14561 14562 // CHECK-LABEL: @test_vreinterpret_s64_p64( 14563 // CHECK: ret <1 x i64> %a 14564 int64x1_t test_vreinterpret_s64_p64(poly64x1_t a) { 14565 return vreinterpret_s64_p64(a); 14566 } 14567 14568 // CHECK-LABEL: @test_vreinterpret_u8_s8( 14569 // CHECK: ret <8 x i8> %a 14570 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) { 14571 return vreinterpret_u8_s8(a); 14572 } 14573 14574 // CHECK-LABEL: @test_vreinterpret_u8_s16( 14575 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14576 // CHECK: ret <8 x i8> [[TMP0]] 14577 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) { 14578 return vreinterpret_u8_s16(a); 14579 } 14580 14581 // CHECK-LABEL: @test_vreinterpret_u8_s32( 14582 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14583 // CHECK: ret <8 x i8> [[TMP0]] 14584 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) { 14585 return vreinterpret_u8_s32(a); 14586 } 14587 14588 // CHECK-LABEL: @test_vreinterpret_u8_s64( 14589 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14590 // CHECK: ret <8 x i8> [[TMP0]] 14591 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) { 14592 return vreinterpret_u8_s64(a); 14593 } 14594 14595 // CHECK-LABEL: @test_vreinterpret_u8_u16( 14596 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14597 // CHECK: ret <8 x i8> [[TMP0]] 14598 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) { 14599 return vreinterpret_u8_u16(a); 14600 } 14601 14602 // CHECK-LABEL: @test_vreinterpret_u8_u32( 14603 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 14604 // CHECK: ret <8 x i8> [[TMP0]] 14605 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) { 14606 return vreinterpret_u8_u32(a); 14607 } 14608 14609 // CHECK-LABEL: @test_vreinterpret_u8_u64( 14610 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14611 // CHECK: ret <8 x i8> [[TMP0]] 14612 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) { 14613 return vreinterpret_u8_u64(a); 14614 } 14615 14616 // CHECK-LABEL: @test_vreinterpret_u8_f16( 14617 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 14618 // CHECK: ret <8 x i8> [[TMP0]] 14619 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) { 14620 return vreinterpret_u8_f16(a); 14621 } 14622 14623 // CHECK-LABEL: @test_vreinterpret_u8_f32( 14624 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 14625 // CHECK: ret <8 x i8> [[TMP0]] 14626 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) { 14627 return vreinterpret_u8_f32(a); 14628 } 14629 14630 // CHECK-LABEL: @test_vreinterpret_u8_f64( 14631 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 14632 // CHECK: ret <8 x i8> [[TMP0]] 14633 uint8x8_t test_vreinterpret_u8_f64(float64x1_t a) { 14634 return vreinterpret_u8_f64(a); 14635 } 14636 14637 // CHECK-LABEL: @test_vreinterpret_u8_p8( 14638 // CHECK: ret <8 x i8> %a 14639 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) { 14640 return vreinterpret_u8_p8(a); 14641 } 14642 14643 // CHECK-LABEL: @test_vreinterpret_u8_p16( 14644 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 14645 // CHECK: ret <8 x i8> [[TMP0]] 14646 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) { 14647 return vreinterpret_u8_p16(a); 14648 } 14649 14650 // CHECK-LABEL: @test_vreinterpret_u8_p64( 14651 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 14652 // CHECK: ret <8 x i8> [[TMP0]] 14653 uint8x8_t test_vreinterpret_u8_p64(poly64x1_t a) { 14654 return vreinterpret_u8_p64(a); 14655 } 14656 14657 // CHECK-LABEL: @test_vreinterpret_u16_s8( 14658 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14659 // CHECK: ret <4 x i16> [[TMP0]] 14660 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) { 14661 return vreinterpret_u16_s8(a); 14662 } 14663 14664 // CHECK-LABEL: @test_vreinterpret_u16_s16( 14665 // CHECK: ret <4 x i16> %a 14666 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) { 14667 return vreinterpret_u16_s16(a); 14668 } 14669 14670 // CHECK-LABEL: @test_vreinterpret_u16_s32( 14671 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14672 // CHECK: ret <4 x i16> [[TMP0]] 14673 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) { 14674 return vreinterpret_u16_s32(a); 14675 } 14676 14677 // CHECK-LABEL: @test_vreinterpret_u16_s64( 14678 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14679 // CHECK: ret <4 x i16> [[TMP0]] 14680 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) { 14681 return vreinterpret_u16_s64(a); 14682 } 14683 14684 // CHECK-LABEL: @test_vreinterpret_u16_u8( 14685 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14686 // CHECK: ret <4 x i16> [[TMP0]] 14687 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) { 14688 return vreinterpret_u16_u8(a); 14689 } 14690 14691 // CHECK-LABEL: @test_vreinterpret_u16_u32( 14692 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 14693 // CHECK: ret <4 x i16> [[TMP0]] 14694 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) { 14695 return vreinterpret_u16_u32(a); 14696 } 14697 14698 // CHECK-LABEL: @test_vreinterpret_u16_u64( 14699 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14700 // CHECK: ret <4 x i16> [[TMP0]] 14701 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) { 14702 return vreinterpret_u16_u64(a); 14703 } 14704 14705 // CHECK-LABEL: @test_vreinterpret_u16_f16( 14706 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 14707 // CHECK: ret <4 x i16> [[TMP0]] 14708 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) { 14709 return vreinterpret_u16_f16(a); 14710 } 14711 14712 // CHECK-LABEL: @test_vreinterpret_u16_f32( 14713 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 14714 // CHECK: ret <4 x i16> [[TMP0]] 14715 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) { 14716 return vreinterpret_u16_f32(a); 14717 } 14718 14719 // CHECK-LABEL: @test_vreinterpret_u16_f64( 14720 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 14721 // CHECK: ret <4 x i16> [[TMP0]] 14722 uint16x4_t test_vreinterpret_u16_f64(float64x1_t a) { 14723 return vreinterpret_u16_f64(a); 14724 } 14725 14726 // CHECK-LABEL: @test_vreinterpret_u16_p8( 14727 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 14728 // CHECK: ret <4 x i16> [[TMP0]] 14729 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) { 14730 return vreinterpret_u16_p8(a); 14731 } 14732 14733 // CHECK-LABEL: @test_vreinterpret_u16_p16( 14734 // CHECK: ret <4 x i16> %a 14735 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) { 14736 return vreinterpret_u16_p16(a); 14737 } 14738 14739 // CHECK-LABEL: @test_vreinterpret_u16_p64( 14740 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 14741 // CHECK: ret <4 x i16> [[TMP0]] 14742 uint16x4_t test_vreinterpret_u16_p64(poly64x1_t a) { 14743 return vreinterpret_u16_p64(a); 14744 } 14745 14746 // CHECK-LABEL: @test_vreinterpret_u32_s8( 14747 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14748 // CHECK: ret <2 x i32> [[TMP0]] 14749 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) { 14750 return vreinterpret_u32_s8(a); 14751 } 14752 14753 // CHECK-LABEL: @test_vreinterpret_u32_s16( 14754 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14755 // CHECK: ret <2 x i32> [[TMP0]] 14756 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) { 14757 return vreinterpret_u32_s16(a); 14758 } 14759 14760 // CHECK-LABEL: @test_vreinterpret_u32_s32( 14761 // CHECK: ret <2 x i32> %a 14762 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) { 14763 return vreinterpret_u32_s32(a); 14764 } 14765 14766 // CHECK-LABEL: @test_vreinterpret_u32_s64( 14767 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14768 // CHECK: ret <2 x i32> [[TMP0]] 14769 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) { 14770 return vreinterpret_u32_s64(a); 14771 } 14772 14773 // CHECK-LABEL: @test_vreinterpret_u32_u8( 14774 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14775 // CHECK: ret <2 x i32> [[TMP0]] 14776 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) { 14777 return vreinterpret_u32_u8(a); 14778 } 14779 14780 // CHECK-LABEL: @test_vreinterpret_u32_u16( 14781 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14782 // CHECK: ret <2 x i32> [[TMP0]] 14783 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) { 14784 return vreinterpret_u32_u16(a); 14785 } 14786 14787 // CHECK-LABEL: @test_vreinterpret_u32_u64( 14788 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14789 // CHECK: ret <2 x i32> [[TMP0]] 14790 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) { 14791 return vreinterpret_u32_u64(a); 14792 } 14793 14794 // CHECK-LABEL: @test_vreinterpret_u32_f16( 14795 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32> 14796 // CHECK: ret <2 x i32> [[TMP0]] 14797 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) { 14798 return vreinterpret_u32_f16(a); 14799 } 14800 14801 // CHECK-LABEL: @test_vreinterpret_u32_f32( 14802 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32> 14803 // CHECK: ret <2 x i32> [[TMP0]] 14804 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) { 14805 return vreinterpret_u32_f32(a); 14806 } 14807 14808 // CHECK-LABEL: @test_vreinterpret_u32_f64( 14809 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x i32> 14810 // CHECK: ret <2 x i32> [[TMP0]] 14811 uint32x2_t test_vreinterpret_u32_f64(float64x1_t a) { 14812 return vreinterpret_u32_f64(a); 14813 } 14814 14815 // CHECK-LABEL: @test_vreinterpret_u32_p8( 14816 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32> 14817 // CHECK: ret <2 x i32> [[TMP0]] 14818 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) { 14819 return vreinterpret_u32_p8(a); 14820 } 14821 14822 // CHECK-LABEL: @test_vreinterpret_u32_p16( 14823 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32> 14824 // CHECK: ret <2 x i32> [[TMP0]] 14825 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) { 14826 return vreinterpret_u32_p16(a); 14827 } 14828 14829 // CHECK-LABEL: @test_vreinterpret_u32_p64( 14830 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32> 14831 // CHECK: ret <2 x i32> [[TMP0]] 14832 uint32x2_t test_vreinterpret_u32_p64(poly64x1_t a) { 14833 return vreinterpret_u32_p64(a); 14834 } 14835 14836 // CHECK-LABEL: @test_vreinterpret_u64_s8( 14837 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14838 // CHECK: ret <1 x i64> [[TMP0]] 14839 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) { 14840 return vreinterpret_u64_s8(a); 14841 } 14842 14843 // CHECK-LABEL: @test_vreinterpret_u64_s16( 14844 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14845 // CHECK: ret <1 x i64> [[TMP0]] 14846 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) { 14847 return vreinterpret_u64_s16(a); 14848 } 14849 14850 // CHECK-LABEL: @test_vreinterpret_u64_s32( 14851 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14852 // CHECK: ret <1 x i64> [[TMP0]] 14853 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) { 14854 return vreinterpret_u64_s32(a); 14855 } 14856 14857 // CHECK-LABEL: @test_vreinterpret_u64_s64( 14858 // CHECK: ret <1 x i64> %a 14859 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) { 14860 return vreinterpret_u64_s64(a); 14861 } 14862 14863 // CHECK-LABEL: @test_vreinterpret_u64_u8( 14864 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14865 // CHECK: ret <1 x i64> [[TMP0]] 14866 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) { 14867 return vreinterpret_u64_u8(a); 14868 } 14869 14870 // CHECK-LABEL: @test_vreinterpret_u64_u16( 14871 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14872 // CHECK: ret <1 x i64> [[TMP0]] 14873 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) { 14874 return vreinterpret_u64_u16(a); 14875 } 14876 14877 // CHECK-LABEL: @test_vreinterpret_u64_u32( 14878 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 14879 // CHECK: ret <1 x i64> [[TMP0]] 14880 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) { 14881 return vreinterpret_u64_u32(a); 14882 } 14883 14884 // CHECK-LABEL: @test_vreinterpret_u64_f16( 14885 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 14886 // CHECK: ret <1 x i64> [[TMP0]] 14887 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) { 14888 return vreinterpret_u64_f16(a); 14889 } 14890 14891 // CHECK-LABEL: @test_vreinterpret_u64_f32( 14892 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 14893 // CHECK: ret <1 x i64> [[TMP0]] 14894 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) { 14895 return vreinterpret_u64_f32(a); 14896 } 14897 14898 // CHECK-LABEL: @test_vreinterpret_u64_f64( 14899 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 14900 // CHECK: ret <1 x i64> [[TMP0]] 14901 uint64x1_t test_vreinterpret_u64_f64(float64x1_t a) { 14902 return vreinterpret_u64_f64(a); 14903 } 14904 14905 // CHECK-LABEL: @test_vreinterpret_u64_p8( 14906 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 14907 // CHECK: ret <1 x i64> [[TMP0]] 14908 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) { 14909 return vreinterpret_u64_p8(a); 14910 } 14911 14912 // CHECK-LABEL: @test_vreinterpret_u64_p16( 14913 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 14914 // CHECK: ret <1 x i64> [[TMP0]] 14915 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) { 14916 return vreinterpret_u64_p16(a); 14917 } 14918 14919 // CHECK-LABEL: @test_vreinterpret_u64_p64( 14920 // CHECK: ret <1 x i64> %a 14921 uint64x1_t test_vreinterpret_u64_p64(poly64x1_t a) { 14922 return vreinterpret_u64_p64(a); 14923 } 14924 14925 // CHECK-LABEL: @test_vreinterpret_f16_s8( 14926 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14927 // CHECK: ret <4 x half> [[TMP0]] 14928 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) { 14929 return vreinterpret_f16_s8(a); 14930 } 14931 14932 // CHECK-LABEL: @test_vreinterpret_f16_s16( 14933 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14934 // CHECK: ret <4 x half> [[TMP0]] 14935 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) { 14936 return vreinterpret_f16_s16(a); 14937 } 14938 14939 // CHECK-LABEL: @test_vreinterpret_f16_s32( 14940 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14941 // CHECK: ret <4 x half> [[TMP0]] 14942 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) { 14943 return vreinterpret_f16_s32(a); 14944 } 14945 14946 // CHECK-LABEL: @test_vreinterpret_f16_s64( 14947 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14948 // CHECK: ret <4 x half> [[TMP0]] 14949 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) { 14950 return vreinterpret_f16_s64(a); 14951 } 14952 14953 // CHECK-LABEL: @test_vreinterpret_f16_u8( 14954 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14955 // CHECK: ret <4 x half> [[TMP0]] 14956 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) { 14957 return vreinterpret_f16_u8(a); 14958 } 14959 14960 // CHECK-LABEL: @test_vreinterpret_f16_u16( 14961 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 14962 // CHECK: ret <4 x half> [[TMP0]] 14963 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) { 14964 return vreinterpret_f16_u16(a); 14965 } 14966 14967 // CHECK-LABEL: @test_vreinterpret_f16_u32( 14968 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half> 14969 // CHECK: ret <4 x half> [[TMP0]] 14970 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) { 14971 return vreinterpret_f16_u32(a); 14972 } 14973 14974 // CHECK-LABEL: @test_vreinterpret_f16_u64( 14975 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 14976 // CHECK: ret <4 x half> [[TMP0]] 14977 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) { 14978 return vreinterpret_f16_u64(a); 14979 } 14980 14981 // CHECK-LABEL: @test_vreinterpret_f16_f32( 14982 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half> 14983 // CHECK: ret <4 x half> [[TMP0]] 14984 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) { 14985 return vreinterpret_f16_f32(a); 14986 } 14987 14988 // CHECK-LABEL: @test_vreinterpret_f16_f64( 14989 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x half> 14990 // CHECK: ret <4 x half> [[TMP0]] 14991 float16x4_t test_vreinterpret_f16_f64(float64x1_t a) { 14992 return vreinterpret_f16_f64(a); 14993 } 14994 14995 // CHECK-LABEL: @test_vreinterpret_f16_p8( 14996 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half> 14997 // CHECK: ret <4 x half> [[TMP0]] 14998 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) { 14999 return vreinterpret_f16_p8(a); 15000 } 15001 15002 // CHECK-LABEL: @test_vreinterpret_f16_p16( 15003 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half> 15004 // CHECK: ret <4 x half> [[TMP0]] 15005 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) { 15006 return vreinterpret_f16_p16(a); 15007 } 15008 15009 // CHECK-LABEL: @test_vreinterpret_f16_p64( 15010 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half> 15011 // CHECK: ret <4 x half> [[TMP0]] 15012 float16x4_t test_vreinterpret_f16_p64(poly64x1_t a) { 15013 return vreinterpret_f16_p64(a); 15014 } 15015 15016 // CHECK-LABEL: @test_vreinterpret_f32_s8( 15017 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 15018 // CHECK: ret <2 x float> [[TMP0]] 15019 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) { 15020 return vreinterpret_f32_s8(a); 15021 } 15022 15023 // CHECK-LABEL: @test_vreinterpret_f32_s16( 15024 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 15025 // CHECK: ret <2 x float> [[TMP0]] 15026 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) { 15027 return vreinterpret_f32_s16(a); 15028 } 15029 15030 // CHECK-LABEL: @test_vreinterpret_f32_s32( 15031 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 15032 // CHECK: ret <2 x float> [[TMP0]] 15033 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) { 15034 return vreinterpret_f32_s32(a); 15035 } 15036 15037 // CHECK-LABEL: @test_vreinterpret_f32_s64( 15038 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 15039 // CHECK: ret <2 x float> [[TMP0]] 15040 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) { 15041 return vreinterpret_f32_s64(a); 15042 } 15043 15044 // CHECK-LABEL: @test_vreinterpret_f32_u8( 15045 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 15046 // CHECK: ret <2 x float> [[TMP0]] 15047 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) { 15048 return vreinterpret_f32_u8(a); 15049 } 15050 15051 // CHECK-LABEL: @test_vreinterpret_f32_u16( 15052 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 15053 // CHECK: ret <2 x float> [[TMP0]] 15054 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) { 15055 return vreinterpret_f32_u16(a); 15056 } 15057 15058 // CHECK-LABEL: @test_vreinterpret_f32_u32( 15059 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float> 15060 // CHECK: ret <2 x float> [[TMP0]] 15061 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) { 15062 return vreinterpret_f32_u32(a); 15063 } 15064 15065 // CHECK-LABEL: @test_vreinterpret_f32_u64( 15066 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 15067 // CHECK: ret <2 x float> [[TMP0]] 15068 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) { 15069 return vreinterpret_f32_u64(a); 15070 } 15071 15072 // CHECK-LABEL: @test_vreinterpret_f32_f16( 15073 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float> 15074 // CHECK: ret <2 x float> [[TMP0]] 15075 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) { 15076 return vreinterpret_f32_f16(a); 15077 } 15078 15079 // CHECK-LABEL: @test_vreinterpret_f32_f64( 15080 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <2 x float> 15081 // CHECK: ret <2 x float> [[TMP0]] 15082 float32x2_t test_vreinterpret_f32_f64(float64x1_t a) { 15083 return vreinterpret_f32_f64(a); 15084 } 15085 15086 // CHECK-LABEL: @test_vreinterpret_f32_p8( 15087 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float> 15088 // CHECK: ret <2 x float> [[TMP0]] 15089 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) { 15090 return vreinterpret_f32_p8(a); 15091 } 15092 15093 // CHECK-LABEL: @test_vreinterpret_f32_p16( 15094 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float> 15095 // CHECK: ret <2 x float> [[TMP0]] 15096 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) { 15097 return vreinterpret_f32_p16(a); 15098 } 15099 15100 // CHECK-LABEL: @test_vreinterpret_f32_p64( 15101 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float> 15102 // CHECK: ret <2 x float> [[TMP0]] 15103 float32x2_t test_vreinterpret_f32_p64(poly64x1_t a) { 15104 return vreinterpret_f32_p64(a); 15105 } 15106 15107 // CHECK-LABEL: @test_vreinterpret_f64_s8( 15108 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 15109 // CHECK: ret <1 x double> [[TMP0]] 15110 float64x1_t test_vreinterpret_f64_s8(int8x8_t a) { 15111 return vreinterpret_f64_s8(a); 15112 } 15113 15114 // CHECK-LABEL: @test_vreinterpret_f64_s16( 15115 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 15116 // CHECK: ret <1 x double> [[TMP0]] 15117 float64x1_t test_vreinterpret_f64_s16(int16x4_t a) { 15118 return vreinterpret_f64_s16(a); 15119 } 15120 15121 // CHECK-LABEL: @test_vreinterpret_f64_s32( 15122 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 15123 // CHECK: ret <1 x double> [[TMP0]] 15124 float64x1_t test_vreinterpret_f64_s32(int32x2_t a) { 15125 return vreinterpret_f64_s32(a); 15126 } 15127 15128 // CHECK-LABEL: @test_vreinterpret_f64_s64( 15129 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 15130 // CHECK: ret <1 x double> [[TMP0]] 15131 float64x1_t test_vreinterpret_f64_s64(int64x1_t a) { 15132 return vreinterpret_f64_s64(a); 15133 } 15134 15135 // CHECK-LABEL: @test_vreinterpret_f64_u8( 15136 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 15137 // CHECK: ret <1 x double> [[TMP0]] 15138 float64x1_t test_vreinterpret_f64_u8(uint8x8_t a) { 15139 return vreinterpret_f64_u8(a); 15140 } 15141 15142 // CHECK-LABEL: @test_vreinterpret_f64_u16( 15143 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 15144 // CHECK: ret <1 x double> [[TMP0]] 15145 float64x1_t test_vreinterpret_f64_u16(uint16x4_t a) { 15146 return vreinterpret_f64_u16(a); 15147 } 15148 15149 // CHECK-LABEL: @test_vreinterpret_f64_u32( 15150 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x double> 15151 // CHECK: ret <1 x double> [[TMP0]] 15152 float64x1_t test_vreinterpret_f64_u32(uint32x2_t a) { 15153 return vreinterpret_f64_u32(a); 15154 } 15155 15156 // CHECK-LABEL: @test_vreinterpret_f64_u64( 15157 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 15158 // CHECK: ret <1 x double> [[TMP0]] 15159 float64x1_t test_vreinterpret_f64_u64(uint64x1_t a) { 15160 return vreinterpret_f64_u64(a); 15161 } 15162 15163 // CHECK-LABEL: @test_vreinterpret_f64_f16( 15164 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x double> 15165 // CHECK: ret <1 x double> [[TMP0]] 15166 float64x1_t test_vreinterpret_f64_f16(float16x4_t a) { 15167 return vreinterpret_f64_f16(a); 15168 } 15169 15170 // CHECK-LABEL: @test_vreinterpret_f64_f32( 15171 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x double> 15172 // CHECK: ret <1 x double> [[TMP0]] 15173 float64x1_t test_vreinterpret_f64_f32(float32x2_t a) { 15174 return vreinterpret_f64_f32(a); 15175 } 15176 15177 // CHECK-LABEL: @test_vreinterpret_f64_p8( 15178 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x double> 15179 // CHECK: ret <1 x double> [[TMP0]] 15180 float64x1_t test_vreinterpret_f64_p8(poly8x8_t a) { 15181 return vreinterpret_f64_p8(a); 15182 } 15183 15184 // CHECK-LABEL: @test_vreinterpret_f64_p16( 15185 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x double> 15186 // CHECK: ret <1 x double> [[TMP0]] 15187 float64x1_t test_vreinterpret_f64_p16(poly16x4_t a) { 15188 return vreinterpret_f64_p16(a); 15189 } 15190 15191 // CHECK-LABEL: @test_vreinterpret_f64_p64( 15192 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <1 x double> 15193 // CHECK: ret <1 x double> [[TMP0]] 15194 float64x1_t test_vreinterpret_f64_p64(poly64x1_t a) { 15195 return vreinterpret_f64_p64(a); 15196 } 15197 15198 // CHECK-LABEL: @test_vreinterpret_p8_s8( 15199 // CHECK: ret <8 x i8> %a 15200 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) { 15201 return vreinterpret_p8_s8(a); 15202 } 15203 15204 // CHECK-LABEL: @test_vreinterpret_p8_s16( 15205 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15206 // CHECK: ret <8 x i8> [[TMP0]] 15207 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) { 15208 return vreinterpret_p8_s16(a); 15209 } 15210 15211 // CHECK-LABEL: @test_vreinterpret_p8_s32( 15212 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15213 // CHECK: ret <8 x i8> [[TMP0]] 15214 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) { 15215 return vreinterpret_p8_s32(a); 15216 } 15217 15218 // CHECK-LABEL: @test_vreinterpret_p8_s64( 15219 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15220 // CHECK: ret <8 x i8> [[TMP0]] 15221 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) { 15222 return vreinterpret_p8_s64(a); 15223 } 15224 15225 // CHECK-LABEL: @test_vreinterpret_p8_u8( 15226 // CHECK: ret <8 x i8> %a 15227 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) { 15228 return vreinterpret_p8_u8(a); 15229 } 15230 15231 // CHECK-LABEL: @test_vreinterpret_p8_u16( 15232 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15233 // CHECK: ret <8 x i8> [[TMP0]] 15234 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) { 15235 return vreinterpret_p8_u16(a); 15236 } 15237 15238 // CHECK-LABEL: @test_vreinterpret_p8_u32( 15239 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 15240 // CHECK: ret <8 x i8> [[TMP0]] 15241 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) { 15242 return vreinterpret_p8_u32(a); 15243 } 15244 15245 // CHECK-LABEL: @test_vreinterpret_p8_u64( 15246 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15247 // CHECK: ret <8 x i8> [[TMP0]] 15248 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) { 15249 return vreinterpret_p8_u64(a); 15250 } 15251 15252 // CHECK-LABEL: @test_vreinterpret_p8_f16( 15253 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> 15254 // CHECK: ret <8 x i8> [[TMP0]] 15255 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) { 15256 return vreinterpret_p8_f16(a); 15257 } 15258 15259 // CHECK-LABEL: @test_vreinterpret_p8_f32( 15260 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> 15261 // CHECK: ret <8 x i8> [[TMP0]] 15262 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) { 15263 return vreinterpret_p8_f32(a); 15264 } 15265 15266 // CHECK-LABEL: @test_vreinterpret_p8_f64( 15267 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 15268 // CHECK: ret <8 x i8> [[TMP0]] 15269 poly8x8_t test_vreinterpret_p8_f64(float64x1_t a) { 15270 return vreinterpret_p8_f64(a); 15271 } 15272 15273 // CHECK-LABEL: @test_vreinterpret_p8_p16( 15274 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 15275 // CHECK: ret <8 x i8> [[TMP0]] 15276 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) { 15277 return vreinterpret_p8_p16(a); 15278 } 15279 15280 // CHECK-LABEL: @test_vreinterpret_p8_p64( 15281 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 15282 // CHECK: ret <8 x i8> [[TMP0]] 15283 poly8x8_t test_vreinterpret_p8_p64(poly64x1_t a) { 15284 return vreinterpret_p8_p64(a); 15285 } 15286 15287 // CHECK-LABEL: @test_vreinterpret_p16_s8( 15288 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 15289 // CHECK: ret <4 x i16> [[TMP0]] 15290 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) { 15291 return vreinterpret_p16_s8(a); 15292 } 15293 15294 // CHECK-LABEL: @test_vreinterpret_p16_s16( 15295 // CHECK: ret <4 x i16> %a 15296 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) { 15297 return vreinterpret_p16_s16(a); 15298 } 15299 15300 // CHECK-LABEL: @test_vreinterpret_p16_s32( 15301 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 15302 // CHECK: ret <4 x i16> [[TMP0]] 15303 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) { 15304 return vreinterpret_p16_s32(a); 15305 } 15306 15307 // CHECK-LABEL: @test_vreinterpret_p16_s64( 15308 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 15309 // CHECK: ret <4 x i16> [[TMP0]] 15310 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) { 15311 return vreinterpret_p16_s64(a); 15312 } 15313 15314 // CHECK-LABEL: @test_vreinterpret_p16_u8( 15315 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 15316 // CHECK: ret <4 x i16> [[TMP0]] 15317 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) { 15318 return vreinterpret_p16_u8(a); 15319 } 15320 15321 // CHECK-LABEL: @test_vreinterpret_p16_u16( 15322 // CHECK: ret <4 x i16> %a 15323 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) { 15324 return vreinterpret_p16_u16(a); 15325 } 15326 15327 // CHECK-LABEL: @test_vreinterpret_p16_u32( 15328 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16> 15329 // CHECK: ret <4 x i16> [[TMP0]] 15330 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) { 15331 return vreinterpret_p16_u32(a); 15332 } 15333 15334 // CHECK-LABEL: @test_vreinterpret_p16_u64( 15335 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 15336 // CHECK: ret <4 x i16> [[TMP0]] 15337 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) { 15338 return vreinterpret_p16_u64(a); 15339 } 15340 15341 // CHECK-LABEL: @test_vreinterpret_p16_f16( 15342 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> 15343 // CHECK: ret <4 x i16> [[TMP0]] 15344 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) { 15345 return vreinterpret_p16_f16(a); 15346 } 15347 15348 // CHECK-LABEL: @test_vreinterpret_p16_f32( 15349 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16> 15350 // CHECK: ret <4 x i16> [[TMP0]] 15351 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) { 15352 return vreinterpret_p16_f32(a); 15353 } 15354 15355 // CHECK-LABEL: @test_vreinterpret_p16_f64( 15356 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <4 x i16> 15357 // CHECK: ret <4 x i16> [[TMP0]] 15358 poly16x4_t test_vreinterpret_p16_f64(float64x1_t a) { 15359 return vreinterpret_p16_f64(a); 15360 } 15361 15362 // CHECK-LABEL: @test_vreinterpret_p16_p8( 15363 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16> 15364 // CHECK: ret <4 x i16> [[TMP0]] 15365 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) { 15366 return vreinterpret_p16_p8(a); 15367 } 15368 15369 // CHECK-LABEL: @test_vreinterpret_p16_p64( 15370 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16> 15371 // CHECK: ret <4 x i16> [[TMP0]] 15372 poly16x4_t test_vreinterpret_p16_p64(poly64x1_t a) { 15373 return vreinterpret_p16_p64(a); 15374 } 15375 15376 // CHECK-LABEL: @test_vreinterpret_p64_s8( 15377 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 15378 // CHECK: ret <1 x i64> [[TMP0]] 15379 poly64x1_t test_vreinterpret_p64_s8(int8x8_t a) { 15380 return vreinterpret_p64_s8(a); 15381 } 15382 15383 // CHECK-LABEL: @test_vreinterpret_p64_s16( 15384 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 15385 // CHECK: ret <1 x i64> [[TMP0]] 15386 poly64x1_t test_vreinterpret_p64_s16(int16x4_t a) { 15387 return vreinterpret_p64_s16(a); 15388 } 15389 15390 // CHECK-LABEL: @test_vreinterpret_p64_s32( 15391 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 15392 // CHECK: ret <1 x i64> [[TMP0]] 15393 poly64x1_t test_vreinterpret_p64_s32(int32x2_t a) { 15394 return vreinterpret_p64_s32(a); 15395 } 15396 15397 // CHECK-LABEL: @test_vreinterpret_p64_s64( 15398 // CHECK: ret <1 x i64> %a 15399 poly64x1_t test_vreinterpret_p64_s64(int64x1_t a) { 15400 return vreinterpret_p64_s64(a); 15401 } 15402 15403 // CHECK-LABEL: @test_vreinterpret_p64_u8( 15404 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 15405 // CHECK: ret <1 x i64> [[TMP0]] 15406 poly64x1_t test_vreinterpret_p64_u8(uint8x8_t a) { 15407 return vreinterpret_p64_u8(a); 15408 } 15409 15410 // CHECK-LABEL: @test_vreinterpret_p64_u16( 15411 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 15412 // CHECK: ret <1 x i64> [[TMP0]] 15413 poly64x1_t test_vreinterpret_p64_u16(uint16x4_t a) { 15414 return vreinterpret_p64_u16(a); 15415 } 15416 15417 // CHECK-LABEL: @test_vreinterpret_p64_u32( 15418 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64> 15419 // CHECK: ret <1 x i64> [[TMP0]] 15420 poly64x1_t test_vreinterpret_p64_u32(uint32x2_t a) { 15421 return vreinterpret_p64_u32(a); 15422 } 15423 15424 // CHECK-LABEL: @test_vreinterpret_p64_u64( 15425 // CHECK: ret <1 x i64> %a 15426 poly64x1_t test_vreinterpret_p64_u64(uint64x1_t a) { 15427 return vreinterpret_p64_u64(a); 15428 } 15429 15430 // CHECK-LABEL: @test_vreinterpret_p64_f16( 15431 // CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64> 15432 // CHECK: ret <1 x i64> [[TMP0]] 15433 poly64x1_t test_vreinterpret_p64_f16(float16x4_t a) { 15434 return vreinterpret_p64_f16(a); 15435 } 15436 15437 // CHECK-LABEL: @test_vreinterpret_p64_f32( 15438 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64> 15439 // CHECK: ret <1 x i64> [[TMP0]] 15440 poly64x1_t test_vreinterpret_p64_f32(float32x2_t a) { 15441 return vreinterpret_p64_f32(a); 15442 } 15443 15444 // CHECK-LABEL: @test_vreinterpret_p64_f64( 15445 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <1 x i64> 15446 // CHECK: ret <1 x i64> [[TMP0]] 15447 poly64x1_t test_vreinterpret_p64_f64(float64x1_t a) { 15448 return vreinterpret_p64_f64(a); 15449 } 15450 15451 // CHECK-LABEL: @test_vreinterpret_p64_p8( 15452 // CHECK: [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64> 15453 // CHECK: ret <1 x i64> [[TMP0]] 15454 poly64x1_t test_vreinterpret_p64_p8(poly8x8_t a) { 15455 return vreinterpret_p64_p8(a); 15456 } 15457 15458 // CHECK-LABEL: @test_vreinterpret_p64_p16( 15459 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64> 15460 // CHECK: ret <1 x i64> [[TMP0]] 15461 poly64x1_t test_vreinterpret_p64_p16(poly16x4_t a) { 15462 return vreinterpret_p64_p16(a); 15463 } 15464 15465 // CHECK-LABEL: @test_vreinterpretq_s8_s16( 15466 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15467 // CHECK: ret <16 x i8> [[TMP0]] 15468 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) { 15469 return vreinterpretq_s8_s16(a); 15470 } 15471 15472 // CHECK-LABEL: @test_vreinterpretq_s8_s32( 15473 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15474 // CHECK: ret <16 x i8> [[TMP0]] 15475 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) { 15476 return vreinterpretq_s8_s32(a); 15477 } 15478 15479 // CHECK-LABEL: @test_vreinterpretq_s8_s64( 15480 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15481 // CHECK: ret <16 x i8> [[TMP0]] 15482 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) { 15483 return vreinterpretq_s8_s64(a); 15484 } 15485 15486 // CHECK-LABEL: @test_vreinterpretq_s8_u8( 15487 // CHECK: ret <16 x i8> %a 15488 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) { 15489 return vreinterpretq_s8_u8(a); 15490 } 15491 15492 // CHECK-LABEL: @test_vreinterpretq_s8_u16( 15493 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15494 // CHECK: ret <16 x i8> [[TMP0]] 15495 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) { 15496 return vreinterpretq_s8_u16(a); 15497 } 15498 15499 // CHECK-LABEL: @test_vreinterpretq_s8_u32( 15500 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15501 // CHECK: ret <16 x i8> [[TMP0]] 15502 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) { 15503 return vreinterpretq_s8_u32(a); 15504 } 15505 15506 // CHECK-LABEL: @test_vreinterpretq_s8_u64( 15507 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15508 // CHECK: ret <16 x i8> [[TMP0]] 15509 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) { 15510 return vreinterpretq_s8_u64(a); 15511 } 15512 15513 // CHECK-LABEL: @test_vreinterpretq_s8_f16( 15514 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 15515 // CHECK: ret <16 x i8> [[TMP0]] 15516 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) { 15517 return vreinterpretq_s8_f16(a); 15518 } 15519 15520 // CHECK-LABEL: @test_vreinterpretq_s8_f32( 15521 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 15522 // CHECK: ret <16 x i8> [[TMP0]] 15523 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) { 15524 return vreinterpretq_s8_f32(a); 15525 } 15526 15527 // CHECK-LABEL: @test_vreinterpretq_s8_f64( 15528 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 15529 // CHECK: ret <16 x i8> [[TMP0]] 15530 int8x16_t test_vreinterpretq_s8_f64(float64x2_t a) { 15531 return vreinterpretq_s8_f64(a); 15532 } 15533 15534 // CHECK-LABEL: @test_vreinterpretq_s8_p8( 15535 // CHECK: ret <16 x i8> %a 15536 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) { 15537 return vreinterpretq_s8_p8(a); 15538 } 15539 15540 // CHECK-LABEL: @test_vreinterpretq_s8_p16( 15541 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15542 // CHECK: ret <16 x i8> [[TMP0]] 15543 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) { 15544 return vreinterpretq_s8_p16(a); 15545 } 15546 15547 // CHECK-LABEL: @test_vreinterpretq_s8_p64( 15548 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15549 // CHECK: ret <16 x i8> [[TMP0]] 15550 int8x16_t test_vreinterpretq_s8_p64(poly64x2_t a) { 15551 return vreinterpretq_s8_p64(a); 15552 } 15553 15554 // CHECK-LABEL: @test_vreinterpretq_s16_s8( 15555 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15556 // CHECK: ret <8 x i16> [[TMP0]] 15557 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) { 15558 return vreinterpretq_s16_s8(a); 15559 } 15560 15561 // CHECK-LABEL: @test_vreinterpretq_s16_s32( 15562 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15563 // CHECK: ret <8 x i16> [[TMP0]] 15564 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) { 15565 return vreinterpretq_s16_s32(a); 15566 } 15567 15568 // CHECK-LABEL: @test_vreinterpretq_s16_s64( 15569 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15570 // CHECK: ret <8 x i16> [[TMP0]] 15571 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) { 15572 return vreinterpretq_s16_s64(a); 15573 } 15574 15575 // CHECK-LABEL: @test_vreinterpretq_s16_u8( 15576 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15577 // CHECK: ret <8 x i16> [[TMP0]] 15578 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) { 15579 return vreinterpretq_s16_u8(a); 15580 } 15581 15582 // CHECK-LABEL: @test_vreinterpretq_s16_u16( 15583 // CHECK: ret <8 x i16> %a 15584 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) { 15585 return vreinterpretq_s16_u16(a); 15586 } 15587 15588 // CHECK-LABEL: @test_vreinterpretq_s16_u32( 15589 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15590 // CHECK: ret <8 x i16> [[TMP0]] 15591 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) { 15592 return vreinterpretq_s16_u32(a); 15593 } 15594 15595 // CHECK-LABEL: @test_vreinterpretq_s16_u64( 15596 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15597 // CHECK: ret <8 x i16> [[TMP0]] 15598 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) { 15599 return vreinterpretq_s16_u64(a); 15600 } 15601 15602 // CHECK-LABEL: @test_vreinterpretq_s16_f16( 15603 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15604 // CHECK: ret <8 x i16> [[TMP0]] 15605 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) { 15606 return vreinterpretq_s16_f16(a); 15607 } 15608 15609 // CHECK-LABEL: @test_vreinterpretq_s16_f32( 15610 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15611 // CHECK: ret <8 x i16> [[TMP0]] 15612 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) { 15613 return vreinterpretq_s16_f32(a); 15614 } 15615 15616 // CHECK-LABEL: @test_vreinterpretq_s16_f64( 15617 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 15618 // CHECK: ret <8 x i16> [[TMP0]] 15619 int16x8_t test_vreinterpretq_s16_f64(float64x2_t a) { 15620 return vreinterpretq_s16_f64(a); 15621 } 15622 15623 // CHECK-LABEL: @test_vreinterpretq_s16_p8( 15624 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15625 // CHECK: ret <8 x i16> [[TMP0]] 15626 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) { 15627 return vreinterpretq_s16_p8(a); 15628 } 15629 15630 // CHECK-LABEL: @test_vreinterpretq_s16_p16( 15631 // CHECK: ret <8 x i16> %a 15632 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) { 15633 return vreinterpretq_s16_p16(a); 15634 } 15635 15636 // CHECK-LABEL: @test_vreinterpretq_s16_p64( 15637 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15638 // CHECK: ret <8 x i16> [[TMP0]] 15639 int16x8_t test_vreinterpretq_s16_p64(poly64x2_t a) { 15640 return vreinterpretq_s16_p64(a); 15641 } 15642 15643 // CHECK-LABEL: @test_vreinterpretq_s32_s8( 15644 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15645 // CHECK: ret <4 x i32> [[TMP0]] 15646 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) { 15647 return vreinterpretq_s32_s8(a); 15648 } 15649 15650 // CHECK-LABEL: @test_vreinterpretq_s32_s16( 15651 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15652 // CHECK: ret <4 x i32> [[TMP0]] 15653 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) { 15654 return vreinterpretq_s32_s16(a); 15655 } 15656 15657 // CHECK-LABEL: @test_vreinterpretq_s32_s64( 15658 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15659 // CHECK: ret <4 x i32> [[TMP0]] 15660 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) { 15661 return vreinterpretq_s32_s64(a); 15662 } 15663 15664 // CHECK-LABEL: @test_vreinterpretq_s32_u8( 15665 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15666 // CHECK: ret <4 x i32> [[TMP0]] 15667 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) { 15668 return vreinterpretq_s32_u8(a); 15669 } 15670 15671 // CHECK-LABEL: @test_vreinterpretq_s32_u16( 15672 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15673 // CHECK: ret <4 x i32> [[TMP0]] 15674 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) { 15675 return vreinterpretq_s32_u16(a); 15676 } 15677 15678 // CHECK-LABEL: @test_vreinterpretq_s32_u32( 15679 // CHECK: ret <4 x i32> %a 15680 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) { 15681 return vreinterpretq_s32_u32(a); 15682 } 15683 15684 // CHECK-LABEL: @test_vreinterpretq_s32_u64( 15685 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15686 // CHECK: ret <4 x i32> [[TMP0]] 15687 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) { 15688 return vreinterpretq_s32_u64(a); 15689 } 15690 15691 // CHECK-LABEL: @test_vreinterpretq_s32_f16( 15692 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 15693 // CHECK: ret <4 x i32> [[TMP0]] 15694 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) { 15695 return vreinterpretq_s32_f16(a); 15696 } 15697 15698 // CHECK-LABEL: @test_vreinterpretq_s32_f32( 15699 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 15700 // CHECK: ret <4 x i32> [[TMP0]] 15701 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) { 15702 return vreinterpretq_s32_f32(a); 15703 } 15704 15705 // CHECK-LABEL: @test_vreinterpretq_s32_f64( 15706 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 15707 // CHECK: ret <4 x i32> [[TMP0]] 15708 int32x4_t test_vreinterpretq_s32_f64(float64x2_t a) { 15709 return vreinterpretq_s32_f64(a); 15710 } 15711 15712 // CHECK-LABEL: @test_vreinterpretq_s32_p8( 15713 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 15714 // CHECK: ret <4 x i32> [[TMP0]] 15715 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) { 15716 return vreinterpretq_s32_p8(a); 15717 } 15718 15719 // CHECK-LABEL: @test_vreinterpretq_s32_p16( 15720 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 15721 // CHECK: ret <4 x i32> [[TMP0]] 15722 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) { 15723 return vreinterpretq_s32_p16(a); 15724 } 15725 15726 // CHECK-LABEL: @test_vreinterpretq_s32_p64( 15727 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 15728 // CHECK: ret <4 x i32> [[TMP0]] 15729 int32x4_t test_vreinterpretq_s32_p64(poly64x2_t a) { 15730 return vreinterpretq_s32_p64(a); 15731 } 15732 15733 // CHECK-LABEL: @test_vreinterpretq_s64_s8( 15734 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15735 // CHECK: ret <2 x i64> [[TMP0]] 15736 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) { 15737 return vreinterpretq_s64_s8(a); 15738 } 15739 15740 // CHECK-LABEL: @test_vreinterpretq_s64_s16( 15741 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15742 // CHECK: ret <2 x i64> [[TMP0]] 15743 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) { 15744 return vreinterpretq_s64_s16(a); 15745 } 15746 15747 // CHECK-LABEL: @test_vreinterpretq_s64_s32( 15748 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15749 // CHECK: ret <2 x i64> [[TMP0]] 15750 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) { 15751 return vreinterpretq_s64_s32(a); 15752 } 15753 15754 // CHECK-LABEL: @test_vreinterpretq_s64_u8( 15755 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15756 // CHECK: ret <2 x i64> [[TMP0]] 15757 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) { 15758 return vreinterpretq_s64_u8(a); 15759 } 15760 15761 // CHECK-LABEL: @test_vreinterpretq_s64_u16( 15762 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15763 // CHECK: ret <2 x i64> [[TMP0]] 15764 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) { 15765 return vreinterpretq_s64_u16(a); 15766 } 15767 15768 // CHECK-LABEL: @test_vreinterpretq_s64_u32( 15769 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 15770 // CHECK: ret <2 x i64> [[TMP0]] 15771 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) { 15772 return vreinterpretq_s64_u32(a); 15773 } 15774 15775 // CHECK-LABEL: @test_vreinterpretq_s64_u64( 15776 // CHECK: ret <2 x i64> %a 15777 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) { 15778 return vreinterpretq_s64_u64(a); 15779 } 15780 15781 // CHECK-LABEL: @test_vreinterpretq_s64_f16( 15782 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 15783 // CHECK: ret <2 x i64> [[TMP0]] 15784 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) { 15785 return vreinterpretq_s64_f16(a); 15786 } 15787 15788 // CHECK-LABEL: @test_vreinterpretq_s64_f32( 15789 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 15790 // CHECK: ret <2 x i64> [[TMP0]] 15791 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) { 15792 return vreinterpretq_s64_f32(a); 15793 } 15794 15795 // CHECK-LABEL: @test_vreinterpretq_s64_f64( 15796 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 15797 // CHECK: ret <2 x i64> [[TMP0]] 15798 int64x2_t test_vreinterpretq_s64_f64(float64x2_t a) { 15799 return vreinterpretq_s64_f64(a); 15800 } 15801 15802 // CHECK-LABEL: @test_vreinterpretq_s64_p8( 15803 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 15804 // CHECK: ret <2 x i64> [[TMP0]] 15805 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) { 15806 return vreinterpretq_s64_p8(a); 15807 } 15808 15809 // CHECK-LABEL: @test_vreinterpretq_s64_p16( 15810 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 15811 // CHECK: ret <2 x i64> [[TMP0]] 15812 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) { 15813 return vreinterpretq_s64_p16(a); 15814 } 15815 15816 // CHECK-LABEL: @test_vreinterpretq_s64_p64( 15817 // CHECK: ret <2 x i64> %a 15818 int64x2_t test_vreinterpretq_s64_p64(poly64x2_t a) { 15819 return vreinterpretq_s64_p64(a); 15820 } 15821 15822 // CHECK-LABEL: @test_vreinterpretq_u8_s8( 15823 // CHECK: ret <16 x i8> %a 15824 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) { 15825 return vreinterpretq_u8_s8(a); 15826 } 15827 15828 // CHECK-LABEL: @test_vreinterpretq_u8_s16( 15829 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15830 // CHECK: ret <16 x i8> [[TMP0]] 15831 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) { 15832 return vreinterpretq_u8_s16(a); 15833 } 15834 15835 // CHECK-LABEL: @test_vreinterpretq_u8_s32( 15836 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15837 // CHECK: ret <16 x i8> [[TMP0]] 15838 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) { 15839 return vreinterpretq_u8_s32(a); 15840 } 15841 15842 // CHECK-LABEL: @test_vreinterpretq_u8_s64( 15843 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15844 // CHECK: ret <16 x i8> [[TMP0]] 15845 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) { 15846 return vreinterpretq_u8_s64(a); 15847 } 15848 15849 // CHECK-LABEL: @test_vreinterpretq_u8_u16( 15850 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15851 // CHECK: ret <16 x i8> [[TMP0]] 15852 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) { 15853 return vreinterpretq_u8_u16(a); 15854 } 15855 15856 // CHECK-LABEL: @test_vreinterpretq_u8_u32( 15857 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 15858 // CHECK: ret <16 x i8> [[TMP0]] 15859 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) { 15860 return vreinterpretq_u8_u32(a); 15861 } 15862 15863 // CHECK-LABEL: @test_vreinterpretq_u8_u64( 15864 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15865 // CHECK: ret <16 x i8> [[TMP0]] 15866 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) { 15867 return vreinterpretq_u8_u64(a); 15868 } 15869 15870 // CHECK-LABEL: @test_vreinterpretq_u8_f16( 15871 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 15872 // CHECK: ret <16 x i8> [[TMP0]] 15873 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) { 15874 return vreinterpretq_u8_f16(a); 15875 } 15876 15877 // CHECK-LABEL: @test_vreinterpretq_u8_f32( 15878 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 15879 // CHECK: ret <16 x i8> [[TMP0]] 15880 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) { 15881 return vreinterpretq_u8_f32(a); 15882 } 15883 15884 // CHECK-LABEL: @test_vreinterpretq_u8_f64( 15885 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 15886 // CHECK: ret <16 x i8> [[TMP0]] 15887 uint8x16_t test_vreinterpretq_u8_f64(float64x2_t a) { 15888 return vreinterpretq_u8_f64(a); 15889 } 15890 15891 // CHECK-LABEL: @test_vreinterpretq_u8_p8( 15892 // CHECK: ret <16 x i8> %a 15893 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) { 15894 return vreinterpretq_u8_p8(a); 15895 } 15896 15897 // CHECK-LABEL: @test_vreinterpretq_u8_p16( 15898 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 15899 // CHECK: ret <16 x i8> [[TMP0]] 15900 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) { 15901 return vreinterpretq_u8_p16(a); 15902 } 15903 15904 // CHECK-LABEL: @test_vreinterpretq_u8_p64( 15905 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 15906 // CHECK: ret <16 x i8> [[TMP0]] 15907 uint8x16_t test_vreinterpretq_u8_p64(poly64x2_t a) { 15908 return vreinterpretq_u8_p64(a); 15909 } 15910 15911 // CHECK-LABEL: @test_vreinterpretq_u16_s8( 15912 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15913 // CHECK: ret <8 x i16> [[TMP0]] 15914 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) { 15915 return vreinterpretq_u16_s8(a); 15916 } 15917 15918 // CHECK-LABEL: @test_vreinterpretq_u16_s16( 15919 // CHECK: ret <8 x i16> %a 15920 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) { 15921 return vreinterpretq_u16_s16(a); 15922 } 15923 15924 // CHECK-LABEL: @test_vreinterpretq_u16_s32( 15925 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15926 // CHECK: ret <8 x i16> [[TMP0]] 15927 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) { 15928 return vreinterpretq_u16_s32(a); 15929 } 15930 15931 // CHECK-LABEL: @test_vreinterpretq_u16_s64( 15932 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15933 // CHECK: ret <8 x i16> [[TMP0]] 15934 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) { 15935 return vreinterpretq_u16_s64(a); 15936 } 15937 15938 // CHECK-LABEL: @test_vreinterpretq_u16_u8( 15939 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15940 // CHECK: ret <8 x i16> [[TMP0]] 15941 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) { 15942 return vreinterpretq_u16_u8(a); 15943 } 15944 15945 // CHECK-LABEL: @test_vreinterpretq_u16_u32( 15946 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 15947 // CHECK: ret <8 x i16> [[TMP0]] 15948 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) { 15949 return vreinterpretq_u16_u32(a); 15950 } 15951 15952 // CHECK-LABEL: @test_vreinterpretq_u16_u64( 15953 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15954 // CHECK: ret <8 x i16> [[TMP0]] 15955 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) { 15956 return vreinterpretq_u16_u64(a); 15957 } 15958 15959 // CHECK-LABEL: @test_vreinterpretq_u16_f16( 15960 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 15961 // CHECK: ret <8 x i16> [[TMP0]] 15962 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) { 15963 return vreinterpretq_u16_f16(a); 15964 } 15965 15966 // CHECK-LABEL: @test_vreinterpretq_u16_f32( 15967 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 15968 // CHECK: ret <8 x i16> [[TMP0]] 15969 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) { 15970 return vreinterpretq_u16_f32(a); 15971 } 15972 15973 // CHECK-LABEL: @test_vreinterpretq_u16_f64( 15974 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 15975 // CHECK: ret <8 x i16> [[TMP0]] 15976 uint16x8_t test_vreinterpretq_u16_f64(float64x2_t a) { 15977 return vreinterpretq_u16_f64(a); 15978 } 15979 15980 // CHECK-LABEL: @test_vreinterpretq_u16_p8( 15981 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 15982 // CHECK: ret <8 x i16> [[TMP0]] 15983 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) { 15984 return vreinterpretq_u16_p8(a); 15985 } 15986 15987 // CHECK-LABEL: @test_vreinterpretq_u16_p16( 15988 // CHECK: ret <8 x i16> %a 15989 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) { 15990 return vreinterpretq_u16_p16(a); 15991 } 15992 15993 // CHECK-LABEL: @test_vreinterpretq_u16_p64( 15994 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 15995 // CHECK: ret <8 x i16> [[TMP0]] 15996 uint16x8_t test_vreinterpretq_u16_p64(poly64x2_t a) { 15997 return vreinterpretq_u16_p64(a); 15998 } 15999 16000 // CHECK-LABEL: @test_vreinterpretq_u32_s8( 16001 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 16002 // CHECK: ret <4 x i32> [[TMP0]] 16003 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) { 16004 return vreinterpretq_u32_s8(a); 16005 } 16006 16007 // CHECK-LABEL: @test_vreinterpretq_u32_s16( 16008 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 16009 // CHECK: ret <4 x i32> [[TMP0]] 16010 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) { 16011 return vreinterpretq_u32_s16(a); 16012 } 16013 16014 // CHECK-LABEL: @test_vreinterpretq_u32_s32( 16015 // CHECK: ret <4 x i32> %a 16016 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) { 16017 return vreinterpretq_u32_s32(a); 16018 } 16019 16020 // CHECK-LABEL: @test_vreinterpretq_u32_s64( 16021 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 16022 // CHECK: ret <4 x i32> [[TMP0]] 16023 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) { 16024 return vreinterpretq_u32_s64(a); 16025 } 16026 16027 // CHECK-LABEL: @test_vreinterpretq_u32_u8( 16028 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 16029 // CHECK: ret <4 x i32> [[TMP0]] 16030 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) { 16031 return vreinterpretq_u32_u8(a); 16032 } 16033 16034 // CHECK-LABEL: @test_vreinterpretq_u32_u16( 16035 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 16036 // CHECK: ret <4 x i32> [[TMP0]] 16037 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) { 16038 return vreinterpretq_u32_u16(a); 16039 } 16040 16041 // CHECK-LABEL: @test_vreinterpretq_u32_u64( 16042 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 16043 // CHECK: ret <4 x i32> [[TMP0]] 16044 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) { 16045 return vreinterpretq_u32_u64(a); 16046 } 16047 16048 // CHECK-LABEL: @test_vreinterpretq_u32_f16( 16049 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32> 16050 // CHECK: ret <4 x i32> [[TMP0]] 16051 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) { 16052 return vreinterpretq_u32_f16(a); 16053 } 16054 16055 // CHECK-LABEL: @test_vreinterpretq_u32_f32( 16056 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32> 16057 // CHECK: ret <4 x i32> [[TMP0]] 16058 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) { 16059 return vreinterpretq_u32_f32(a); 16060 } 16061 16062 // CHECK-LABEL: @test_vreinterpretq_u32_f64( 16063 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x i32> 16064 // CHECK: ret <4 x i32> [[TMP0]] 16065 uint32x4_t test_vreinterpretq_u32_f64(float64x2_t a) { 16066 return vreinterpretq_u32_f64(a); 16067 } 16068 16069 // CHECK-LABEL: @test_vreinterpretq_u32_p8( 16070 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32> 16071 // CHECK: ret <4 x i32> [[TMP0]] 16072 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) { 16073 return vreinterpretq_u32_p8(a); 16074 } 16075 16076 // CHECK-LABEL: @test_vreinterpretq_u32_p16( 16077 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32> 16078 // CHECK: ret <4 x i32> [[TMP0]] 16079 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) { 16080 return vreinterpretq_u32_p16(a); 16081 } 16082 16083 // CHECK-LABEL: @test_vreinterpretq_u32_p64( 16084 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32> 16085 // CHECK: ret <4 x i32> [[TMP0]] 16086 uint32x4_t test_vreinterpretq_u32_p64(poly64x2_t a) { 16087 return vreinterpretq_u32_p64(a); 16088 } 16089 16090 // CHECK-LABEL: @test_vreinterpretq_u64_s8( 16091 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16092 // CHECK: ret <2 x i64> [[TMP0]] 16093 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) { 16094 return vreinterpretq_u64_s8(a); 16095 } 16096 16097 // CHECK-LABEL: @test_vreinterpretq_u64_s16( 16098 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16099 // CHECK: ret <2 x i64> [[TMP0]] 16100 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) { 16101 return vreinterpretq_u64_s16(a); 16102 } 16103 16104 // CHECK-LABEL: @test_vreinterpretq_u64_s32( 16105 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 16106 // CHECK: ret <2 x i64> [[TMP0]] 16107 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) { 16108 return vreinterpretq_u64_s32(a); 16109 } 16110 16111 // CHECK-LABEL: @test_vreinterpretq_u64_s64( 16112 // CHECK: ret <2 x i64> %a 16113 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) { 16114 return vreinterpretq_u64_s64(a); 16115 } 16116 16117 // CHECK-LABEL: @test_vreinterpretq_u64_u8( 16118 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16119 // CHECK: ret <2 x i64> [[TMP0]] 16120 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) { 16121 return vreinterpretq_u64_u8(a); 16122 } 16123 16124 // CHECK-LABEL: @test_vreinterpretq_u64_u16( 16125 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16126 // CHECK: ret <2 x i64> [[TMP0]] 16127 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) { 16128 return vreinterpretq_u64_u16(a); 16129 } 16130 16131 // CHECK-LABEL: @test_vreinterpretq_u64_u32( 16132 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 16133 // CHECK: ret <2 x i64> [[TMP0]] 16134 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) { 16135 return vreinterpretq_u64_u32(a); 16136 } 16137 16138 // CHECK-LABEL: @test_vreinterpretq_u64_f16( 16139 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 16140 // CHECK: ret <2 x i64> [[TMP0]] 16141 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) { 16142 return vreinterpretq_u64_f16(a); 16143 } 16144 16145 // CHECK-LABEL: @test_vreinterpretq_u64_f32( 16146 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 16147 // CHECK: ret <2 x i64> [[TMP0]] 16148 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) { 16149 return vreinterpretq_u64_f32(a); 16150 } 16151 16152 // CHECK-LABEL: @test_vreinterpretq_u64_f64( 16153 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 16154 // CHECK: ret <2 x i64> [[TMP0]] 16155 uint64x2_t test_vreinterpretq_u64_f64(float64x2_t a) { 16156 return vreinterpretq_u64_f64(a); 16157 } 16158 16159 // CHECK-LABEL: @test_vreinterpretq_u64_p8( 16160 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16161 // CHECK: ret <2 x i64> [[TMP0]] 16162 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) { 16163 return vreinterpretq_u64_p8(a); 16164 } 16165 16166 // CHECK-LABEL: @test_vreinterpretq_u64_p16( 16167 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16168 // CHECK: ret <2 x i64> [[TMP0]] 16169 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) { 16170 return vreinterpretq_u64_p16(a); 16171 } 16172 16173 // CHECK-LABEL: @test_vreinterpretq_u64_p64( 16174 // CHECK: ret <2 x i64> %a 16175 uint64x2_t test_vreinterpretq_u64_p64(poly64x2_t a) { 16176 return vreinterpretq_u64_p64(a); 16177 } 16178 16179 // CHECK-LABEL: @test_vreinterpretq_f16_s8( 16180 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 16181 // CHECK: ret <8 x half> [[TMP0]] 16182 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) { 16183 return vreinterpretq_f16_s8(a); 16184 } 16185 16186 // CHECK-LABEL: @test_vreinterpretq_f16_s16( 16187 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 16188 // CHECK: ret <8 x half> [[TMP0]] 16189 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) { 16190 return vreinterpretq_f16_s16(a); 16191 } 16192 16193 // CHECK-LABEL: @test_vreinterpretq_f16_s32( 16194 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 16195 // CHECK: ret <8 x half> [[TMP0]] 16196 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) { 16197 return vreinterpretq_f16_s32(a); 16198 } 16199 16200 // CHECK-LABEL: @test_vreinterpretq_f16_s64( 16201 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 16202 // CHECK: ret <8 x half> [[TMP0]] 16203 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) { 16204 return vreinterpretq_f16_s64(a); 16205 } 16206 16207 // CHECK-LABEL: @test_vreinterpretq_f16_u8( 16208 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 16209 // CHECK: ret <8 x half> [[TMP0]] 16210 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) { 16211 return vreinterpretq_f16_u8(a); 16212 } 16213 16214 // CHECK-LABEL: @test_vreinterpretq_f16_u16( 16215 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 16216 // CHECK: ret <8 x half> [[TMP0]] 16217 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) { 16218 return vreinterpretq_f16_u16(a); 16219 } 16220 16221 // CHECK-LABEL: @test_vreinterpretq_f16_u32( 16222 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half> 16223 // CHECK: ret <8 x half> [[TMP0]] 16224 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) { 16225 return vreinterpretq_f16_u32(a); 16226 } 16227 16228 // CHECK-LABEL: @test_vreinterpretq_f16_u64( 16229 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 16230 // CHECK: ret <8 x half> [[TMP0]] 16231 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) { 16232 return vreinterpretq_f16_u64(a); 16233 } 16234 16235 // CHECK-LABEL: @test_vreinterpretq_f16_f32( 16236 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half> 16237 // CHECK: ret <8 x half> [[TMP0]] 16238 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) { 16239 return vreinterpretq_f16_f32(a); 16240 } 16241 16242 // CHECK-LABEL: @test_vreinterpretq_f16_f64( 16243 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x half> 16244 // CHECK: ret <8 x half> [[TMP0]] 16245 float16x8_t test_vreinterpretq_f16_f64(float64x2_t a) { 16246 return vreinterpretq_f16_f64(a); 16247 } 16248 16249 // CHECK-LABEL: @test_vreinterpretq_f16_p8( 16250 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half> 16251 // CHECK: ret <8 x half> [[TMP0]] 16252 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) { 16253 return vreinterpretq_f16_p8(a); 16254 } 16255 16256 // CHECK-LABEL: @test_vreinterpretq_f16_p16( 16257 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half> 16258 // CHECK: ret <8 x half> [[TMP0]] 16259 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) { 16260 return vreinterpretq_f16_p16(a); 16261 } 16262 16263 // CHECK-LABEL: @test_vreinterpretq_f16_p64( 16264 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half> 16265 // CHECK: ret <8 x half> [[TMP0]] 16266 float16x8_t test_vreinterpretq_f16_p64(poly64x2_t a) { 16267 return vreinterpretq_f16_p64(a); 16268 } 16269 16270 // CHECK-LABEL: @test_vreinterpretq_f32_s8( 16271 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 16272 // CHECK: ret <4 x float> [[TMP0]] 16273 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) { 16274 return vreinterpretq_f32_s8(a); 16275 } 16276 16277 // CHECK-LABEL: @test_vreinterpretq_f32_s16( 16278 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 16279 // CHECK: ret <4 x float> [[TMP0]] 16280 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) { 16281 return vreinterpretq_f32_s16(a); 16282 } 16283 16284 // CHECK-LABEL: @test_vreinterpretq_f32_s32( 16285 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 16286 // CHECK: ret <4 x float> [[TMP0]] 16287 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) { 16288 return vreinterpretq_f32_s32(a); 16289 } 16290 16291 // CHECK-LABEL: @test_vreinterpretq_f32_s64( 16292 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 16293 // CHECK: ret <4 x float> [[TMP0]] 16294 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) { 16295 return vreinterpretq_f32_s64(a); 16296 } 16297 16298 // CHECK-LABEL: @test_vreinterpretq_f32_u8( 16299 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 16300 // CHECK: ret <4 x float> [[TMP0]] 16301 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) { 16302 return vreinterpretq_f32_u8(a); 16303 } 16304 16305 // CHECK-LABEL: @test_vreinterpretq_f32_u16( 16306 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 16307 // CHECK: ret <4 x float> [[TMP0]] 16308 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) { 16309 return vreinterpretq_f32_u16(a); 16310 } 16311 16312 // CHECK-LABEL: @test_vreinterpretq_f32_u32( 16313 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float> 16314 // CHECK: ret <4 x float> [[TMP0]] 16315 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) { 16316 return vreinterpretq_f32_u32(a); 16317 } 16318 16319 // CHECK-LABEL: @test_vreinterpretq_f32_u64( 16320 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 16321 // CHECK: ret <4 x float> [[TMP0]] 16322 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) { 16323 return vreinterpretq_f32_u64(a); 16324 } 16325 16326 // CHECK-LABEL: @test_vreinterpretq_f32_f16( 16327 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float> 16328 // CHECK: ret <4 x float> [[TMP0]] 16329 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) { 16330 return vreinterpretq_f32_f16(a); 16331 } 16332 16333 // CHECK-LABEL: @test_vreinterpretq_f32_f64( 16334 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <4 x float> 16335 // CHECK: ret <4 x float> [[TMP0]] 16336 float32x4_t test_vreinterpretq_f32_f64(float64x2_t a) { 16337 return vreinterpretq_f32_f64(a); 16338 } 16339 16340 // CHECK-LABEL: @test_vreinterpretq_f32_p8( 16341 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float> 16342 // CHECK: ret <4 x float> [[TMP0]] 16343 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) { 16344 return vreinterpretq_f32_p8(a); 16345 } 16346 16347 // CHECK-LABEL: @test_vreinterpretq_f32_p16( 16348 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float> 16349 // CHECK: ret <4 x float> [[TMP0]] 16350 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) { 16351 return vreinterpretq_f32_p16(a); 16352 } 16353 16354 // CHECK-LABEL: @test_vreinterpretq_f32_p64( 16355 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float> 16356 // CHECK: ret <4 x float> [[TMP0]] 16357 float32x4_t test_vreinterpretq_f32_p64(poly64x2_t a) { 16358 return vreinterpretq_f32_p64(a); 16359 } 16360 16361 // CHECK-LABEL: @test_vreinterpretq_f64_s8( 16362 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 16363 // CHECK: ret <2 x double> [[TMP0]] 16364 float64x2_t test_vreinterpretq_f64_s8(int8x16_t a) { 16365 return vreinterpretq_f64_s8(a); 16366 } 16367 16368 // CHECK-LABEL: @test_vreinterpretq_f64_s16( 16369 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 16370 // CHECK: ret <2 x double> [[TMP0]] 16371 float64x2_t test_vreinterpretq_f64_s16(int16x8_t a) { 16372 return vreinterpretq_f64_s16(a); 16373 } 16374 16375 // CHECK-LABEL: @test_vreinterpretq_f64_s32( 16376 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 16377 // CHECK: ret <2 x double> [[TMP0]] 16378 float64x2_t test_vreinterpretq_f64_s32(int32x4_t a) { 16379 return vreinterpretq_f64_s32(a); 16380 } 16381 16382 // CHECK-LABEL: @test_vreinterpretq_f64_s64( 16383 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 16384 // CHECK: ret <2 x double> [[TMP0]] 16385 float64x2_t test_vreinterpretq_f64_s64(int64x2_t a) { 16386 return vreinterpretq_f64_s64(a); 16387 } 16388 16389 // CHECK-LABEL: @test_vreinterpretq_f64_u8( 16390 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 16391 // CHECK: ret <2 x double> [[TMP0]] 16392 float64x2_t test_vreinterpretq_f64_u8(uint8x16_t a) { 16393 return vreinterpretq_f64_u8(a); 16394 } 16395 16396 // CHECK-LABEL: @test_vreinterpretq_f64_u16( 16397 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 16398 // CHECK: ret <2 x double> [[TMP0]] 16399 float64x2_t test_vreinterpretq_f64_u16(uint16x8_t a) { 16400 return vreinterpretq_f64_u16(a); 16401 } 16402 16403 // CHECK-LABEL: @test_vreinterpretq_f64_u32( 16404 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x double> 16405 // CHECK: ret <2 x double> [[TMP0]] 16406 float64x2_t test_vreinterpretq_f64_u32(uint32x4_t a) { 16407 return vreinterpretq_f64_u32(a); 16408 } 16409 16410 // CHECK-LABEL: @test_vreinterpretq_f64_u64( 16411 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 16412 // CHECK: ret <2 x double> [[TMP0]] 16413 float64x2_t test_vreinterpretq_f64_u64(uint64x2_t a) { 16414 return vreinterpretq_f64_u64(a); 16415 } 16416 16417 // CHECK-LABEL: @test_vreinterpretq_f64_f16( 16418 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x double> 16419 // CHECK: ret <2 x double> [[TMP0]] 16420 float64x2_t test_vreinterpretq_f64_f16(float16x8_t a) { 16421 return vreinterpretq_f64_f16(a); 16422 } 16423 16424 // CHECK-LABEL: @test_vreinterpretq_f64_f32( 16425 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x double> 16426 // CHECK: ret <2 x double> [[TMP0]] 16427 float64x2_t test_vreinterpretq_f64_f32(float32x4_t a) { 16428 return vreinterpretq_f64_f32(a); 16429 } 16430 16431 // CHECK-LABEL: @test_vreinterpretq_f64_p8( 16432 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x double> 16433 // CHECK: ret <2 x double> [[TMP0]] 16434 float64x2_t test_vreinterpretq_f64_p8(poly8x16_t a) { 16435 return vreinterpretq_f64_p8(a); 16436 } 16437 16438 // CHECK-LABEL: @test_vreinterpretq_f64_p16( 16439 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x double> 16440 // CHECK: ret <2 x double> [[TMP0]] 16441 float64x2_t test_vreinterpretq_f64_p16(poly16x8_t a) { 16442 return vreinterpretq_f64_p16(a); 16443 } 16444 16445 // CHECK-LABEL: @test_vreinterpretq_f64_p64( 16446 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <2 x double> 16447 // CHECK: ret <2 x double> [[TMP0]] 16448 float64x2_t test_vreinterpretq_f64_p64(poly64x2_t a) { 16449 return vreinterpretq_f64_p64(a); 16450 } 16451 16452 // CHECK-LABEL: @test_vreinterpretq_p8_s8( 16453 // CHECK: ret <16 x i8> %a 16454 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) { 16455 return vreinterpretq_p8_s8(a); 16456 } 16457 16458 // CHECK-LABEL: @test_vreinterpretq_p8_s16( 16459 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16460 // CHECK: ret <16 x i8> [[TMP0]] 16461 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) { 16462 return vreinterpretq_p8_s16(a); 16463 } 16464 16465 // CHECK-LABEL: @test_vreinterpretq_p8_s32( 16466 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16467 // CHECK: ret <16 x i8> [[TMP0]] 16468 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) { 16469 return vreinterpretq_p8_s32(a); 16470 } 16471 16472 // CHECK-LABEL: @test_vreinterpretq_p8_s64( 16473 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16474 // CHECK: ret <16 x i8> [[TMP0]] 16475 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) { 16476 return vreinterpretq_p8_s64(a); 16477 } 16478 16479 // CHECK-LABEL: @test_vreinterpretq_p8_u8( 16480 // CHECK: ret <16 x i8> %a 16481 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) { 16482 return vreinterpretq_p8_u8(a); 16483 } 16484 16485 // CHECK-LABEL: @test_vreinterpretq_p8_u16( 16486 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16487 // CHECK: ret <16 x i8> [[TMP0]] 16488 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) { 16489 return vreinterpretq_p8_u16(a); 16490 } 16491 16492 // CHECK-LABEL: @test_vreinterpretq_p8_u32( 16493 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16494 // CHECK: ret <16 x i8> [[TMP0]] 16495 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) { 16496 return vreinterpretq_p8_u32(a); 16497 } 16498 16499 // CHECK-LABEL: @test_vreinterpretq_p8_u64( 16500 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16501 // CHECK: ret <16 x i8> [[TMP0]] 16502 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) { 16503 return vreinterpretq_p8_u64(a); 16504 } 16505 16506 // CHECK-LABEL: @test_vreinterpretq_p8_f16( 16507 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> 16508 // CHECK: ret <16 x i8> [[TMP0]] 16509 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) { 16510 return vreinterpretq_p8_f16(a); 16511 } 16512 16513 // CHECK-LABEL: @test_vreinterpretq_p8_f32( 16514 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> 16515 // CHECK: ret <16 x i8> [[TMP0]] 16516 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) { 16517 return vreinterpretq_p8_f32(a); 16518 } 16519 16520 // CHECK-LABEL: @test_vreinterpretq_p8_f64( 16521 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> 16522 // CHECK: ret <16 x i8> [[TMP0]] 16523 poly8x16_t test_vreinterpretq_p8_f64(float64x2_t a) { 16524 return vreinterpretq_p8_f64(a); 16525 } 16526 16527 // CHECK-LABEL: @test_vreinterpretq_p8_p16( 16528 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16529 // CHECK: ret <16 x i8> [[TMP0]] 16530 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) { 16531 return vreinterpretq_p8_p16(a); 16532 } 16533 16534 // CHECK-LABEL: @test_vreinterpretq_p8_p64( 16535 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16536 // CHECK: ret <16 x i8> [[TMP0]] 16537 poly8x16_t test_vreinterpretq_p8_p64(poly64x2_t a) { 16538 return vreinterpretq_p8_p64(a); 16539 } 16540 16541 // CHECK-LABEL: @test_vreinterpretq_p16_s8( 16542 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 16543 // CHECK: ret <8 x i16> [[TMP0]] 16544 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) { 16545 return vreinterpretq_p16_s8(a); 16546 } 16547 16548 // CHECK-LABEL: @test_vreinterpretq_p16_s16( 16549 // CHECK: ret <8 x i16> %a 16550 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) { 16551 return vreinterpretq_p16_s16(a); 16552 } 16553 16554 // CHECK-LABEL: @test_vreinterpretq_p16_s32( 16555 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 16556 // CHECK: ret <8 x i16> [[TMP0]] 16557 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) { 16558 return vreinterpretq_p16_s32(a); 16559 } 16560 16561 // CHECK-LABEL: @test_vreinterpretq_p16_s64( 16562 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 16563 // CHECK: ret <8 x i16> [[TMP0]] 16564 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) { 16565 return vreinterpretq_p16_s64(a); 16566 } 16567 16568 // CHECK-LABEL: @test_vreinterpretq_p16_u8( 16569 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 16570 // CHECK: ret <8 x i16> [[TMP0]] 16571 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) { 16572 return vreinterpretq_p16_u8(a); 16573 } 16574 16575 // CHECK-LABEL: @test_vreinterpretq_p16_u16( 16576 // CHECK: ret <8 x i16> %a 16577 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) { 16578 return vreinterpretq_p16_u16(a); 16579 } 16580 16581 // CHECK-LABEL: @test_vreinterpretq_p16_u32( 16582 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16> 16583 // CHECK: ret <8 x i16> [[TMP0]] 16584 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) { 16585 return vreinterpretq_p16_u32(a); 16586 } 16587 16588 // CHECK-LABEL: @test_vreinterpretq_p16_u64( 16589 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 16590 // CHECK: ret <8 x i16> [[TMP0]] 16591 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) { 16592 return vreinterpretq_p16_u64(a); 16593 } 16594 16595 // CHECK-LABEL: @test_vreinterpretq_p16_f16( 16596 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> 16597 // CHECK: ret <8 x i16> [[TMP0]] 16598 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) { 16599 return vreinterpretq_p16_f16(a); 16600 } 16601 16602 // CHECK-LABEL: @test_vreinterpretq_p16_f32( 16603 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16> 16604 // CHECK: ret <8 x i16> [[TMP0]] 16605 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) { 16606 return vreinterpretq_p16_f32(a); 16607 } 16608 16609 // CHECK-LABEL: @test_vreinterpretq_p16_f64( 16610 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <8 x i16> 16611 // CHECK: ret <8 x i16> [[TMP0]] 16612 poly16x8_t test_vreinterpretq_p16_f64(float64x2_t a) { 16613 return vreinterpretq_p16_f64(a); 16614 } 16615 16616 // CHECK-LABEL: @test_vreinterpretq_p16_p8( 16617 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16> 16618 // CHECK: ret <8 x i16> [[TMP0]] 16619 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) { 16620 return vreinterpretq_p16_p8(a); 16621 } 16622 16623 // CHECK-LABEL: @test_vreinterpretq_p16_p64( 16624 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16> 16625 // CHECK: ret <8 x i16> [[TMP0]] 16626 poly16x8_t test_vreinterpretq_p16_p64(poly64x2_t a) { 16627 return vreinterpretq_p16_p64(a); 16628 } 16629 16630 // CHECK-LABEL: @test_vreinterpretq_p64_s8( 16631 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16632 // CHECK: ret <2 x i64> [[TMP0]] 16633 poly64x2_t test_vreinterpretq_p64_s8(int8x16_t a) { 16634 return vreinterpretq_p64_s8(a); 16635 } 16636 16637 // CHECK-LABEL: @test_vreinterpretq_p64_s16( 16638 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16639 // CHECK: ret <2 x i64> [[TMP0]] 16640 poly64x2_t test_vreinterpretq_p64_s16(int16x8_t a) { 16641 return vreinterpretq_p64_s16(a); 16642 } 16643 16644 // CHECK-LABEL: @test_vreinterpretq_p64_s32( 16645 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 16646 // CHECK: ret <2 x i64> [[TMP0]] 16647 poly64x2_t test_vreinterpretq_p64_s32(int32x4_t a) { 16648 return vreinterpretq_p64_s32(a); 16649 } 16650 16651 // CHECK-LABEL: @test_vreinterpretq_p64_s64( 16652 // CHECK: ret <2 x i64> %a 16653 poly64x2_t test_vreinterpretq_p64_s64(int64x2_t a) { 16654 return vreinterpretq_p64_s64(a); 16655 } 16656 16657 // CHECK-LABEL: @test_vreinterpretq_p64_u8( 16658 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16659 // CHECK: ret <2 x i64> [[TMP0]] 16660 poly64x2_t test_vreinterpretq_p64_u8(uint8x16_t a) { 16661 return vreinterpretq_p64_u8(a); 16662 } 16663 16664 // CHECK-LABEL: @test_vreinterpretq_p64_u16( 16665 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16666 // CHECK: ret <2 x i64> [[TMP0]] 16667 poly64x2_t test_vreinterpretq_p64_u16(uint16x8_t a) { 16668 return vreinterpretq_p64_u16(a); 16669 } 16670 16671 // CHECK-LABEL: @test_vreinterpretq_p64_u32( 16672 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64> 16673 // CHECK: ret <2 x i64> [[TMP0]] 16674 poly64x2_t test_vreinterpretq_p64_u32(uint32x4_t a) { 16675 return vreinterpretq_p64_u32(a); 16676 } 16677 16678 // CHECK-LABEL: @test_vreinterpretq_p64_u64( 16679 // CHECK: ret <2 x i64> %a 16680 poly64x2_t test_vreinterpretq_p64_u64(uint64x2_t a) { 16681 return vreinterpretq_p64_u64(a); 16682 } 16683 16684 // CHECK-LABEL: @test_vreinterpretq_p64_f16( 16685 // CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64> 16686 // CHECK: ret <2 x i64> [[TMP0]] 16687 poly64x2_t test_vreinterpretq_p64_f16(float16x8_t a) { 16688 return vreinterpretq_p64_f16(a); 16689 } 16690 16691 // CHECK-LABEL: @test_vreinterpretq_p64_f32( 16692 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64> 16693 // CHECK: ret <2 x i64> [[TMP0]] 16694 poly64x2_t test_vreinterpretq_p64_f32(float32x4_t a) { 16695 return vreinterpretq_p64_f32(a); 16696 } 16697 16698 // CHECK-LABEL: @test_vreinterpretq_p64_f64( 16699 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <2 x i64> 16700 // CHECK: ret <2 x i64> [[TMP0]] 16701 poly64x2_t test_vreinterpretq_p64_f64(float64x2_t a) { 16702 return vreinterpretq_p64_f64(a); 16703 } 16704 16705 // CHECK-LABEL: @test_vreinterpretq_p64_p8( 16706 // CHECK: [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64> 16707 // CHECK: ret <2 x i64> [[TMP0]] 16708 poly64x2_t test_vreinterpretq_p64_p8(poly8x16_t a) { 16709 return vreinterpretq_p64_p8(a); 16710 } 16711 16712 // CHECK-LABEL: @test_vreinterpretq_p64_p16( 16713 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64> 16714 // CHECK: ret <2 x i64> [[TMP0]] 16715 poly64x2_t test_vreinterpretq_p64_p16(poly16x8_t a) { 16716 return vreinterpretq_p64_p16(a); 16717 } 16718 16719 // CHECK-LABEL: @test_vabds_f32( 16720 // CHECK: [[VABDS_F32_I:%.*]] = call float @llvm.aarch64.sisd.fabd.f32(float %a, float %b) 16721 // CHECK: ret float [[VABDS_F32_I]] 16722 float32_t test_vabds_f32(float32_t a, float32_t b) { 16723 return vabds_f32(a, b); 16724 } 16725 16726 // CHECK-LABEL: @test_vabdd_f64( 16727 // CHECK: [[VABDD_F64_I:%.*]] = call double @llvm.aarch64.sisd.fabd.f64(double %a, double %b) 16728 // CHECK: ret double [[VABDD_F64_I]] 16729 float64_t test_vabdd_f64(float64_t a, float64_t b) { 16730 return vabdd_f64(a, b); 16731 } 16732 16733 // CHECK-LABEL: @test_vuqaddq_s8( 16734 // CHECK: entry: 16735 // CHECK-NEXT: [[V:%.*]] = call <16 x i8> @llvm.aarch64.neon.suqadd.v16i8(<16 x i8> %a, <16 x i8> %b) 16736 // CHECK-NEXT: ret <16 x i8> [[V]] 16737 int8x16_t test_vuqaddq_s8(int8x16_t a, uint8x16_t b) { 16738 return vuqaddq_s8(a, b); 16739 } 16740 16741 // CHECK-LABEL: @test_vuqaddq_s32( 16742 // CHECK: [[V:%.*]] = call <4 x i32> @llvm.aarch64.neon.suqadd.v4i32(<4 x i32> %a, <4 x i32> %b) 16743 // CHECK-NEXT: ret <4 x i32> [[V]] 16744 int32x4_t test_vuqaddq_s32(int32x4_t a, uint32x4_t b) { 16745 return vuqaddq_s32(a, b); 16746 } 16747 16748 // CHECK-LABEL: @test_vuqaddq_s64( 16749 // CHECK: [[V:%.*]] = call <2 x i64> @llvm.aarch64.neon.suqadd.v2i64(<2 x i64> %a, <2 x i64> %b) 16750 // CHECK-NEXT: ret <2 x i64> [[V]] 16751 int64x2_t test_vuqaddq_s64(int64x2_t a, uint64x2_t b) { 16752 return vuqaddq_s64(a, b); 16753 } 16754 16755 // CHECK-LABEL: @test_vuqaddq_s16( 16756 // CHECK: [[V:%.*]] = call <8 x i16> @llvm.aarch64.neon.suqadd.v8i16(<8 x i16> %a, <8 x i16> %b) 16757 // CHECK-NEXT: ret <8 x i16> [[V]] 16758 int16x8_t test_vuqaddq_s16(int16x8_t a, uint16x8_t b) { 16759 return vuqaddq_s16(a, b); 16760 } 16761 16762 // CHECK-LABEL: @test_vuqadd_s8( 16763 // CHECK: entry: 16764 // CHECK-NEXT: [[V:%.*]] = call <8 x i8> @llvm.aarch64.neon.suqadd.v8i8(<8 x i8> %a, <8 x i8> %b) 16765 // CHECK-NEXT: ret <8 x i8> [[V]] 16766 int8x8_t test_vuqadd_s8(int8x8_t a, uint8x8_t b) { 16767 return vuqadd_s8(a, b); 16768 } 16769 16770 // CHECK-LABEL: @test_vuqadd_s32( 16771 // CHECK: [[V:%.*]] = call <2 x i32> @llvm.aarch64.neon.suqadd.v2i32(<2 x i32> %a, <2 x i32> %b) 16772 // CHECK-NEXT: ret <2 x i32> [[V]] 16773 int32x2_t test_vuqadd_s32(int32x2_t a, uint32x2_t b) { 16774 return vuqadd_s32(a, b); 16775 } 16776 16777 // CHECK-LABEL: @test_vuqadd_s64( 16778 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16779 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16780 // CHECK: [[VUQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.suqadd.v1i64(<1 x i64> %a, <1 x i64> %b) 16781 // CHECK: ret <1 x i64> [[VUQADD2_I]] 16782 int64x1_t test_vuqadd_s64(int64x1_t a, uint64x1_t b) { 16783 return vuqadd_s64(a, b); 16784 } 16785 16786 // CHECK-LABEL: @test_vuqadd_s16( 16787 // CHECK: [[V:%.*]] = call <4 x i16> @llvm.aarch64.neon.suqadd.v4i16(<4 x i16> %a, <4 x i16> %b) 16788 // CHECK-NEXT: ret <4 x i16> [[V]] 16789 int16x4_t test_vuqadd_s16(int16x4_t a, uint16x4_t b) { 16790 return vuqadd_s16(a, b); 16791 } 16792 16793 // CHECK-LABEL: @test_vsqadd_u64( 16794 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16795 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8> 16796 // CHECK: [[VSQADD2_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.usqadd.v1i64(<1 x i64> %a, <1 x i64> %b) 16797 // CHECK: ret <1 x i64> [[VSQADD2_I]] 16798 uint64x1_t test_vsqadd_u64(uint64x1_t a, int64x1_t b) { 16799 return vsqadd_u64(a, b); 16800 } 16801 16802 // CHECK-LABEL: @test_vsqadd_u8( 16803 // CHECK: [[VSQADD_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.usqadd.v8i8(<8 x i8> %a, <8 x i8> %b) 16804 // CHECK: ret <8 x i8> [[VSQADD_I]] 16805 uint8x8_t test_vsqadd_u8(uint8x8_t a, int8x8_t b) { 16806 return vsqadd_u8(a, b); 16807 } 16808 16809 // CHECK-LABEL: @test_vsqaddq_u8( 16810 // CHECK: [[VSQADD_I:%.*]] = call <16 x i8> @llvm.aarch64.neon.usqadd.v16i8(<16 x i8> %a, <16 x i8> %b) 16811 // CHECK: ret <16 x i8> [[VSQADD_I]] 16812 uint8x16_t test_vsqaddq_u8(uint8x16_t a, int8x16_t b) { 16813 return vsqaddq_u8(a, b); 16814 } 16815 16816 // CHECK-LABEL: @test_vsqadd_u16( 16817 // CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> 16818 // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8> 16819 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.usqadd.v4i16(<4 x i16> %a, <4 x i16> %b) 16820 // CHECK: ret <4 x i16> [[VSQADD2_I]] 16821 uint16x4_t test_vsqadd_u16(uint16x4_t a, int16x4_t b) { 16822 return vsqadd_u16(a, b); 16823 } 16824 16825 // CHECK-LABEL: @test_vsqaddq_u16( 16826 // CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> 16827 // CHECK: [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8> 16828 // CHECK: [[VSQADD2_I:%.*]] = call <8 x i16> @llvm.aarch64.neon.usqadd.v8i16(<8 x i16> %a, <8 x i16> %b) 16829 // CHECK: ret <8 x i16> [[VSQADD2_I]] 16830 uint16x8_t test_vsqaddq_u16(uint16x8_t a, int16x8_t b) { 16831 return vsqaddq_u16(a, b); 16832 } 16833 16834 // CHECK-LABEL: @test_vsqadd_u32( 16835 // CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> 16836 // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8> 16837 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i32> @llvm.aarch64.neon.usqadd.v2i32(<2 x i32> %a, <2 x i32> %b) 16838 // CHECK: ret <2 x i32> [[VSQADD2_I]] 16839 uint32x2_t test_vsqadd_u32(uint32x2_t a, int32x2_t b) { 16840 return vsqadd_u32(a, b); 16841 } 16842 16843 // CHECK-LABEL: @test_vsqaddq_u32( 16844 // CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> 16845 // CHECK: [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8> 16846 // CHECK: [[VSQADD2_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.usqadd.v4i32(<4 x i32> %a, <4 x i32> %b) 16847 // CHECK: ret <4 x i32> [[VSQADD2_I]] 16848 uint32x4_t test_vsqaddq_u32(uint32x4_t a, int32x4_t b) { 16849 return vsqaddq_u32(a, b); 16850 } 16851 16852 // CHECK-LABEL: @test_vsqaddq_u64( 16853 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> 16854 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8> 16855 // CHECK: [[VSQADD2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.usqadd.v2i64(<2 x i64> %a, <2 x i64> %b) 16856 // CHECK: ret <2 x i64> [[VSQADD2_I]] 16857 uint64x2_t test_vsqaddq_u64(uint64x2_t a, int64x2_t b) { 16858 return vsqaddq_u64(a, b); 16859 } 16860 16861 // CHECK-LABEL: @test_vabs_s64( 16862 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16863 // CHECK: [[VABS1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.abs.v1i64(<1 x i64> %a) 16864 // CHECK: ret <1 x i64> [[VABS1_I]] 16865 int64x1_t test_vabs_s64(int64x1_t a) { 16866 return vabs_s64(a); 16867 } 16868 16869 // CHECK-LABEL: @test_vqabs_s64( 16870 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16871 // CHECK: [[VQABS_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqabs.v1i64(<1 x i64> %a) 16872 // CHECK: [[VQABS_V2_I:%.*]] = bitcast <1 x i64> [[VQABS_V1_I]] to <8 x i8> 16873 // CHECK: ret <1 x i64> [[VQABS_V1_I]] 16874 int64x1_t test_vqabs_s64(int64x1_t a) { 16875 return vqabs_s64(a); 16876 } 16877 16878 // CHECK-LABEL: @test_vqneg_s64( 16879 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 16880 // CHECK: [[VQNEG_V1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqneg.v1i64(<1 x i64> %a) 16881 // CHECK: [[VQNEG_V2_I:%.*]] = bitcast <1 x i64> [[VQNEG_V1_I]] to <8 x i8> 16882 // CHECK: ret <1 x i64> [[VQNEG_V1_I]] 16883 int64x1_t test_vqneg_s64(int64x1_t a) { 16884 return vqneg_s64(a); 16885 } 16886 16887 // CHECK-LABEL: @test_vneg_s64( 16888 // CHECK: [[SUB_I:%.*]] = sub <1 x i64> zeroinitializer, %a 16889 // CHECK: ret <1 x i64> [[SUB_I]] 16890 int64x1_t test_vneg_s64(int64x1_t a) { 16891 return vneg_s64(a); 16892 } 16893 16894 // CHECK-LABEL: @test_vaddv_f32( 16895 // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a) 16896 // CHECK: ret float [[VADDV_F32_I]] 16897 float32_t test_vaddv_f32(float32x2_t a) { 16898 return vaddv_f32(a); 16899 } 16900 16901 // CHECK-LABEL: @test_vaddvq_f32( 16902 // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a) 16903 // CHECK: ret float [[VADDVQ_F32_I]] 16904 float32_t test_vaddvq_f32(float32x4_t a) { 16905 return vaddvq_f32(a); 16906 } 16907 16908 // CHECK-LABEL: @test_vaddvq_f64( 16909 // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a) 16910 // CHECK: ret double [[VADDVQ_F64_I]] 16911 float64_t test_vaddvq_f64(float64x2_t a) { 16912 return vaddvq_f64(a); 16913 } 16914 16915 // CHECK-LABEL: @test_vmaxv_f32( 16916 // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) 16917 // CHECK: ret float [[VMAXV_F32_I]] 16918 float32_t test_vmaxv_f32(float32x2_t a) { 16919 return vmaxv_f32(a); 16920 } 16921 16922 // CHECK-LABEL: @test_vmaxvq_f64( 16923 // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) 16924 // CHECK: ret double [[VMAXVQ_F64_I]] 16925 float64_t test_vmaxvq_f64(float64x2_t a) { 16926 return vmaxvq_f64(a); 16927 } 16928 16929 // CHECK-LABEL: @test_vminv_f32( 16930 // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) 16931 // CHECK: ret float [[VMINV_F32_I]] 16932 float32_t test_vminv_f32(float32x2_t a) { 16933 return vminv_f32(a); 16934 } 16935 16936 // CHECK-LABEL: @test_vminvq_f64( 16937 // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) 16938 // CHECK: ret double [[VMINVQ_F64_I]] 16939 float64_t test_vminvq_f64(float64x2_t a) { 16940 return vminvq_f64(a); 16941 } 16942 16943 // CHECK-LABEL: @test_vmaxnmvq_f64( 16944 // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) 16945 // CHECK: ret double [[VMAXNMVQ_F64_I]] 16946 float64_t test_vmaxnmvq_f64(float64x2_t a) { 16947 return vmaxnmvq_f64(a); 16948 } 16949 16950 // CHECK-LABEL: @test_vmaxnmv_f32( 16951 // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) 16952 // CHECK: ret float [[VMAXNMV_F32_I]] 16953 float32_t test_vmaxnmv_f32(float32x2_t a) { 16954 return vmaxnmv_f32(a); 16955 } 16956 16957 // CHECK-LABEL: @test_vminnmvq_f64( 16958 // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) 16959 // CHECK: ret double [[VMINNMVQ_F64_I]] 16960 float64_t test_vminnmvq_f64(float64x2_t a) { 16961 return vminnmvq_f64(a); 16962 } 16963 16964 // CHECK-LABEL: @test_vminnmv_f32( 16965 // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) 16966 // CHECK: ret float [[VMINNMV_F32_I]] 16967 float32_t test_vminnmv_f32(float32x2_t a) { 16968 return vminnmv_f32(a); 16969 } 16970 16971 // CHECK-LABEL: @test_vpaddq_s64( 16972 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) 16973 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 16974 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] 16975 int64x2_t test_vpaddq_s64(int64x2_t a, int64x2_t b) { 16976 return vpaddq_s64(a, b); 16977 } 16978 16979 // CHECK-LABEL: @test_vpaddq_u64( 16980 // CHECK: [[VPADDQ_V2_I:%.*]] = call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b) 16981 // CHECK: [[VPADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VPADDQ_V2_I]] to <16 x i8> 16982 // CHECK: ret <2 x i64> [[VPADDQ_V2_I]] 16983 uint64x2_t test_vpaddq_u64(uint64x2_t a, uint64x2_t b) { 16984 return vpaddq_u64(a, b); 16985 } 16986 16987 // CHECK-LABEL: @test_vpaddd_u64( 16988 // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) 16989 // CHECK: ret i64 [[VPADDD_U64_I]] 16990 uint64_t test_vpaddd_u64(uint64x2_t a) { 16991 return vpaddd_u64(a); 16992 } 16993 16994 // CHECK-LABEL: @test_vaddvq_s64( 16995 // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a) 16996 // CHECK: ret i64 [[VADDVQ_S64_I]] 16997 int64_t test_vaddvq_s64(int64x2_t a) { 16998 return vaddvq_s64(a); 16999 } 17000 17001 // CHECK-LABEL: @test_vaddvq_u64( 17002 // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) 17003 // CHECK: ret i64 [[VADDVQ_U64_I]] 17004 uint64_t test_vaddvq_u64(uint64x2_t a) { 17005 return vaddvq_u64(a); 17006 } 17007 17008 // CHECK-LABEL: @test_vadd_f64( 17009 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, %b 17010 // CHECK: ret <1 x double> [[ADD_I]] 17011 float64x1_t test_vadd_f64(float64x1_t a, float64x1_t b) { 17012 return vadd_f64(a, b); 17013 } 17014 17015 // CHECK-LABEL: @test_vmul_f64( 17016 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %a, %b 17017 // CHECK: ret <1 x double> [[MUL_I]] 17018 float64x1_t test_vmul_f64(float64x1_t a, float64x1_t b) { 17019 return vmul_f64(a, b); 17020 } 17021 17022 // CHECK-LABEL: @test_vdiv_f64( 17023 // CHECK: [[DIV_I:%.*]] = fdiv <1 x double> %a, %b 17024 // CHECK: ret <1 x double> [[DIV_I]] 17025 float64x1_t test_vdiv_f64(float64x1_t a, float64x1_t b) { 17026 return vdiv_f64(a, b); 17027 } 17028 17029 // CHECK-LABEL: @test_vmla_f64( 17030 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 17031 // CHECK: [[ADD_I:%.*]] = fadd <1 x double> %a, [[MUL_I]] 17032 // CHECK: ret <1 x double> [[ADD_I]] 17033 float64x1_t test_vmla_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 17034 return vmla_f64(a, b, c); 17035 } 17036 17037 // CHECK-LABEL: @test_vmls_f64( 17038 // CHECK: [[MUL_I:%.*]] = fmul <1 x double> %b, %c 17039 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, [[MUL_I]] 17040 // CHECK: ret <1 x double> [[SUB_I]] 17041 float64x1_t test_vmls_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 17042 return vmls_f64(a, b, c); 17043 } 17044 17045 // CHECK-LABEL: @test_vfma_f64( 17046 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17047 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17048 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 17049 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> %b, <1 x double> %c, <1 x double> %a) 17050 // CHECK: ret <1 x double> [[TMP3]] 17051 float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 17052 return vfma_f64(a, b, c); 17053 } 17054 17055 // CHECK-LABEL: @test_vfms_f64( 17056 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %b 17057 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17058 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> 17059 // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> 17060 // CHECK: [[TMP3:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[SUB_I]], <1 x double> %c, <1 x double> %a) 17061 // CHECK: ret <1 x double> [[TMP3]] 17062 float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { 17063 return vfms_f64(a, b, c); 17064 } 17065 17066 // CHECK-LABEL: @test_vsub_f64( 17067 // CHECK: [[SUB_I:%.*]] = fsub <1 x double> %a, %b 17068 // CHECK: ret <1 x double> [[SUB_I]] 17069 float64x1_t test_vsub_f64(float64x1_t a, float64x1_t b) { 17070 return vsub_f64(a, b); 17071 } 17072 17073 // CHECK-LABEL: @test_vabd_f64( 17074 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17075 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17076 // CHECK: [[VABD2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fabd.v1f64(<1 x double> %a, <1 x double> %b) 17077 // CHECK: ret <1 x double> [[VABD2_I]] 17078 float64x1_t test_vabd_f64(float64x1_t a, float64x1_t b) { 17079 return vabd_f64(a, b); 17080 } 17081 17082 // CHECK-LABEL: @test_vmax_f64( 17083 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17084 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17085 // CHECK: [[VMAX2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmax.v1f64(<1 x double> %a, <1 x double> %b) 17086 // CHECK: ret <1 x double> [[VMAX2_I]] 17087 float64x1_t test_vmax_f64(float64x1_t a, float64x1_t b) { 17088 return vmax_f64(a, b); 17089 } 17090 17091 // CHECK-LABEL: @test_vmin_f64( 17092 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17093 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17094 // CHECK: [[VMIN2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmin.v1f64(<1 x double> %a, <1 x double> %b) 17095 // CHECK: ret <1 x double> [[VMIN2_I]] 17096 float64x1_t test_vmin_f64(float64x1_t a, float64x1_t b) { 17097 return vmin_f64(a, b); 17098 } 17099 17100 // CHECK-LABEL: @test_vmaxnm_f64( 17101 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17102 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17103 // CHECK: [[VMAXNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fmaxnm.v1f64(<1 x double> %a, <1 x double> %b) 17104 // CHECK: ret <1 x double> [[VMAXNM2_I]] 17105 float64x1_t test_vmaxnm_f64(float64x1_t a, float64x1_t b) { 17106 return vmaxnm_f64(a, b); 17107 } 17108 17109 // CHECK-LABEL: @test_vminnm_f64( 17110 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17111 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17112 // CHECK: [[VMINNM2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.fminnm.v1f64(<1 x double> %a, <1 x double> %b) 17113 // CHECK: ret <1 x double> [[VMINNM2_I]] 17114 float64x1_t test_vminnm_f64(float64x1_t a, float64x1_t b) { 17115 return vminnm_f64(a, b); 17116 } 17117 17118 // CHECK-LABEL: @test_vabs_f64( 17119 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17120 // CHECK: [[VABS1_I:%.*]] = call <1 x double> @llvm.fabs.v1f64(<1 x double> %a) 17121 // CHECK: ret <1 x double> [[VABS1_I]] 17122 float64x1_t test_vabs_f64(float64x1_t a) { 17123 return vabs_f64(a); 17124 } 17125 17126 // CHECK-LABEL: @test_vneg_f64( 17127 // CHECK: [[SUB_I:%.*]] = fneg <1 x double> %a 17128 // CHECK: ret <1 x double> [[SUB_I]] 17129 float64x1_t test_vneg_f64(float64x1_t a) { 17130 return vneg_f64(a); 17131 } 17132 17133 // CHECK-LABEL: @test_vcvt_s64_f64( 17134 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17135 // CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a) 17136 // CHECK: ret <1 x i64> [[TMP1]] 17137 int64x1_t test_vcvt_s64_f64(float64x1_t a) { 17138 return vcvt_s64_f64(a); 17139 } 17140 17141 // CHECK-LABEL: @test_vcvt_u64_f64( 17142 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17143 // CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a) 17144 // CHECK: ret <1 x i64> [[TMP1]] 17145 uint64x1_t test_vcvt_u64_f64(float64x1_t a) { 17146 return vcvt_u64_f64(a); 17147 } 17148 17149 // CHECK-LABEL: @test_vcvtn_s64_f64( 17150 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17151 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtns.v1i64.v1f64(<1 x double> %a) 17152 // CHECK: ret <1 x i64> [[VCVTN1_I]] 17153 int64x1_t test_vcvtn_s64_f64(float64x1_t a) { 17154 return vcvtn_s64_f64(a); 17155 } 17156 17157 // CHECK-LABEL: @test_vcvtn_u64_f64( 17158 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17159 // CHECK: [[VCVTN1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtnu.v1i64.v1f64(<1 x double> %a) 17160 // CHECK: ret <1 x i64> [[VCVTN1_I]] 17161 uint64x1_t test_vcvtn_u64_f64(float64x1_t a) { 17162 return vcvtn_u64_f64(a); 17163 } 17164 17165 // CHECK-LABEL: @test_vcvtp_s64_f64( 17166 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17167 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtps.v1i64.v1f64(<1 x double> %a) 17168 // CHECK: ret <1 x i64> [[VCVTP1_I]] 17169 int64x1_t test_vcvtp_s64_f64(float64x1_t a) { 17170 return vcvtp_s64_f64(a); 17171 } 17172 17173 // CHECK-LABEL: @test_vcvtp_u64_f64( 17174 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17175 // CHECK: [[VCVTP1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtpu.v1i64.v1f64(<1 x double> %a) 17176 // CHECK: ret <1 x i64> [[VCVTP1_I]] 17177 uint64x1_t test_vcvtp_u64_f64(float64x1_t a) { 17178 return vcvtp_u64_f64(a); 17179 } 17180 17181 // CHECK-LABEL: @test_vcvtm_s64_f64( 17182 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17183 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtms.v1i64.v1f64(<1 x double> %a) 17184 // CHECK: ret <1 x i64> [[VCVTM1_I]] 17185 int64x1_t test_vcvtm_s64_f64(float64x1_t a) { 17186 return vcvtm_s64_f64(a); 17187 } 17188 17189 // CHECK-LABEL: @test_vcvtm_u64_f64( 17190 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17191 // CHECK: [[VCVTM1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtmu.v1i64.v1f64(<1 x double> %a) 17192 // CHECK: ret <1 x i64> [[VCVTM1_I]] 17193 uint64x1_t test_vcvtm_u64_f64(float64x1_t a) { 17194 return vcvtm_u64_f64(a); 17195 } 17196 17197 // CHECK-LABEL: @test_vcvta_s64_f64( 17198 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17199 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtas.v1i64.v1f64(<1 x double> %a) 17200 // CHECK: ret <1 x i64> [[VCVTA1_I]] 17201 int64x1_t test_vcvta_s64_f64(float64x1_t a) { 17202 return vcvta_s64_f64(a); 17203 } 17204 17205 // CHECK-LABEL: @test_vcvta_u64_f64( 17206 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17207 // CHECK: [[VCVTA1_I:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtau.v1i64.v1f64(<1 x double> %a) 17208 // CHECK: ret <1 x i64> [[VCVTA1_I]] 17209 uint64x1_t test_vcvta_u64_f64(float64x1_t a) { 17210 return vcvta_u64_f64(a); 17211 } 17212 17213 // CHECK-LABEL: @test_vcvt_f64_s64( 17214 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17215 // CHECK: [[VCVT_I:%.*]] = sitofp <1 x i64> %a to <1 x double> 17216 // CHECK: ret <1 x double> [[VCVT_I]] 17217 float64x1_t test_vcvt_f64_s64(int64x1_t a) { 17218 return vcvt_f64_s64(a); 17219 } 17220 17221 // CHECK-LABEL: @test_vcvt_f64_u64( 17222 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17223 // CHECK: [[VCVT_I:%.*]] = uitofp <1 x i64> %a to <1 x double> 17224 // CHECK: ret <1 x double> [[VCVT_I]] 17225 float64x1_t test_vcvt_f64_u64(uint64x1_t a) { 17226 return vcvt_f64_u64(a); 17227 } 17228 17229 // CHECK-LABEL: @test_vcvt_n_s64_f64( 17230 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17231 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 17232 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxs.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 17233 // CHECK: ret <1 x i64> [[VCVT_N1]] 17234 int64x1_t test_vcvt_n_s64_f64(float64x1_t a) { 17235 return vcvt_n_s64_f64(a, 64); 17236 } 17237 17238 // CHECK-LABEL: @test_vcvt_n_u64_f64( 17239 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17240 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> 17241 // CHECK: [[VCVT_N1:%.*]] = call <1 x i64> @llvm.aarch64.neon.vcvtfp2fxu.v1i64.v1f64(<1 x double> [[VCVT_N]], i32 64) 17242 // CHECK: ret <1 x i64> [[VCVT_N1]] 17243 uint64x1_t test_vcvt_n_u64_f64(float64x1_t a) { 17244 return vcvt_n_u64_f64(a, 64); 17245 } 17246 17247 // CHECK-LABEL: @test_vcvt_n_f64_s64( 17248 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17249 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17250 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxs2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 17251 // CHECK: ret <1 x double> [[VCVT_N1]] 17252 float64x1_t test_vcvt_n_f64_s64(int64x1_t a) { 17253 return vcvt_n_f64_s64(a, 64); 17254 } 17255 17256 // CHECK-LABEL: @test_vcvt_n_f64_u64( 17257 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> 17258 // CHECK: [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> 17259 // CHECK: [[VCVT_N1:%.*]] = call <1 x double> @llvm.aarch64.neon.vcvtfxu2fp.v1f64.v1i64(<1 x i64> [[VCVT_N]], i32 64) 17260 // CHECK: ret <1 x double> [[VCVT_N1]] 17261 float64x1_t test_vcvt_n_f64_u64(uint64x1_t a) { 17262 return vcvt_n_f64_u64(a, 64); 17263 } 17264 17265 // CHECK-LABEL: @test_vrndn_f64( 17266 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17267 // CHECK: [[VRNDN1_I:%.*]] = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %a) 17268 // CHECK: ret <1 x double> [[VRNDN1_I]] 17269 float64x1_t test_vrndn_f64(float64x1_t a) { 17270 return vrndn_f64(a); 17271 } 17272 17273 // CHECK-LABEL: @test_vrnda_f64( 17274 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17275 // CHECK: [[VRNDA1_I:%.*]] = call <1 x double> @llvm.round.v1f64(<1 x double> %a) 17276 // CHECK: ret <1 x double> [[VRNDA1_I]] 17277 float64x1_t test_vrnda_f64(float64x1_t a) { 17278 return vrnda_f64(a); 17279 } 17280 17281 // CHECK-LABEL: @test_vrndp_f64( 17282 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17283 // CHECK: [[VRNDP1_I:%.*]] = call <1 x double> @llvm.ceil.v1f64(<1 x double> %a) 17284 // CHECK: ret <1 x double> [[VRNDP1_I]] 17285 float64x1_t test_vrndp_f64(float64x1_t a) { 17286 return vrndp_f64(a); 17287 } 17288 17289 // CHECK-LABEL: @test_vrndm_f64( 17290 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17291 // CHECK: [[VRNDM1_I:%.*]] = call <1 x double> @llvm.floor.v1f64(<1 x double> %a) 17292 // CHECK: ret <1 x double> [[VRNDM1_I]] 17293 float64x1_t test_vrndm_f64(float64x1_t a) { 17294 return vrndm_f64(a); 17295 } 17296 17297 // CHECK-LABEL: @test_vrndx_f64( 17298 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17299 // CHECK: [[VRNDX1_I:%.*]] = call <1 x double> @llvm.rint.v1f64(<1 x double> %a) 17300 // CHECK: ret <1 x double> [[VRNDX1_I]] 17301 float64x1_t test_vrndx_f64(float64x1_t a) { 17302 return vrndx_f64(a); 17303 } 17304 17305 // CHECK-LABEL: @test_vrnd_f64( 17306 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17307 // CHECK: [[VRNDZ1_I:%.*]] = call <1 x double> @llvm.trunc.v1f64(<1 x double> %a) 17308 // CHECK: ret <1 x double> [[VRNDZ1_I]] 17309 float64x1_t test_vrnd_f64(float64x1_t a) { 17310 return vrnd_f64(a); 17311 } 17312 17313 // CHECK-LABEL: @test_vrndi_f64( 17314 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17315 // CHECK: [[VRNDI1_I:%.*]] = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %a) 17316 // CHECK: ret <1 x double> [[VRNDI1_I]] 17317 float64x1_t test_vrndi_f64(float64x1_t a) { 17318 return vrndi_f64(a); 17319 } 17320 17321 // CHECK-LABEL: @test_vrsqrte_f64( 17322 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17323 // CHECK: [[VRSQRTE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrte.v1f64(<1 x double> %a) 17324 // CHECK: ret <1 x double> [[VRSQRTE_V1_I]] 17325 float64x1_t test_vrsqrte_f64(float64x1_t a) { 17326 return vrsqrte_f64(a); 17327 } 17328 17329 // CHECK-LABEL: @test_vrecpe_f64( 17330 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17331 // CHECK: [[VRECPE_V1_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecpe.v1f64(<1 x double> %a) 17332 // CHECK: ret <1 x double> [[VRECPE_V1_I]] 17333 float64x1_t test_vrecpe_f64(float64x1_t a) { 17334 return vrecpe_f64(a); 17335 } 17336 17337 // CHECK-LABEL: @test_vsqrt_f64( 17338 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17339 // CHECK: [[VSQRT_I:%.*]] = call <1 x double> @llvm.sqrt.v1f64(<1 x double> %a) 17340 // CHECK: ret <1 x double> [[VSQRT_I]] 17341 float64x1_t test_vsqrt_f64(float64x1_t a) { 17342 return vsqrt_f64(a); 17343 } 17344 17345 // CHECK-LABEL: @test_vrecps_f64( 17346 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17347 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17348 // CHECK: [[VRECPS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frecps.v1f64(<1 x double> %a, <1 x double> %b) 17349 // CHECK: ret <1 x double> [[VRECPS_V2_I]] 17350 float64x1_t test_vrecps_f64(float64x1_t a, float64x1_t b) { 17351 return vrecps_f64(a, b); 17352 } 17353 17354 // CHECK-LABEL: @test_vrsqrts_f64( 17355 // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> 17356 // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> 17357 // CHECK: [[VRSQRTS_V2_I:%.*]] = call <1 x double> @llvm.aarch64.neon.frsqrts.v1f64(<1 x double> %a, <1 x double> %b) 17358 // CHECK: [[VRSQRTS_V3_I:%.*]] = bitcast <1 x double> [[VRSQRTS_V2_I]] to <8 x i8> 17359 // CHECK: ret <1 x double> [[VRSQRTS_V2_I]] 17360 float64x1_t test_vrsqrts_f64(float64x1_t a, float64x1_t b) { 17361 return vrsqrts_f64(a, b); 17362 } 17363 17364 // CHECK-LABEL: @test_vminv_s32( 17365 // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a) 17366 // CHECK: ret i32 [[VMINV_S32_I]] 17367 int32_t test_vminv_s32(int32x2_t a) { 17368 return vminv_s32(a); 17369 } 17370 17371 // CHECK-LABEL: @test_vminv_u32( 17372 // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a) 17373 // CHECK: ret i32 [[VMINV_U32_I]] 17374 uint32_t test_vminv_u32(uint32x2_t a) { 17375 return vminv_u32(a); 17376 } 17377 17378 // CHECK-LABEL: @test_vmaxv_s32( 17379 // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a) 17380 // CHECK: ret i32 [[VMAXV_S32_I]] 17381 int32_t test_vmaxv_s32(int32x2_t a) { 17382 return vmaxv_s32(a); 17383 } 17384 17385 // CHECK-LABEL: @test_vmaxv_u32( 17386 // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a) 17387 // CHECK: ret i32 [[VMAXV_U32_I]] 17388 uint32_t test_vmaxv_u32(uint32x2_t a) { 17389 return vmaxv_u32(a); 17390 } 17391 17392 // CHECK-LABEL: @test_vaddv_s32( 17393 // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) 17394 // CHECK: ret i32 [[VADDV_S32_I]] 17395 int32_t test_vaddv_s32(int32x2_t a) { 17396 return vaddv_s32(a); 17397 } 17398 17399 // CHECK-LABEL: @test_vaddv_u32( 17400 // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a) 17401 // CHECK: ret i32 [[VADDV_U32_I]] 17402 uint32_t test_vaddv_u32(uint32x2_t a) { 17403 return vaddv_u32(a); 17404 } 17405 17406 // CHECK-LABEL: @test_vaddlv_s32( 17407 // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a) 17408 // CHECK: ret i64 [[VADDLV_S32_I]] 17409 int64_t test_vaddlv_s32(int32x2_t a) { 17410 return vaddlv_s32(a); 17411 } 17412 17413 // CHECK-LABEL: @test_vaddlv_u32( 17414 // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a) 17415 // CHECK: ret i64 [[VADDLV_U32_I]] 17416 uint64_t test_vaddlv_u32(uint32x2_t a) { 17417 return vaddlv_u32(a); 17418 } 17419