1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics) 2 Copyright (C) 2018-2020 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it 7 under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3, or (at your option) 9 any later version. 10 11 GCC is distributed in the hope that it will be useful, but 12 WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "tm.h" 24 #include "tree.h" 25 #include "rtl.h" 26 #include "tm_p.h" 27 #include "memmodel.h" 28 #include "insn-codes.h" 29 #include "optabs.h" 30 #include "recog.h" 31 #include "expr.h" 32 #include "basic-block.h" 33 #include "function.h" 34 #include "fold-const.h" 35 #include "gimple.h" 36 #include "gimple-iterator.h" 37 #include "gimplify.h" 38 #include "explow.h" 39 #include "emit-rtl.h" 40 #include "tree-vector-builder.h" 41 #include "rtx-vector-builder.h" 42 #include "vec-perm-indices.h" 43 #include "aarch64-sve-builtins.h" 44 #include "aarch64-sve-builtins-shapes.h" 45 #include "aarch64-sve-builtins-base.h" 46 #include "aarch64-sve-builtins-functions.h" 47 48 using namespace aarch64_sve; 49 50 namespace { 51 52 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */ 53 static int 54 unspec_cmla (int rot) 55 { 56 switch (rot) 57 { 58 case 0: return UNSPEC_CMLA; 59 case 90: return UNSPEC_CMLA90; 60 case 180: return UNSPEC_CMLA180; 61 case 270: return UNSPEC_CMLA270; 62 default: gcc_unreachable (); 63 } 64 } 65 66 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */ 67 static int 68 unspec_fcmla (int rot) 69 { 70 switch (rot) 71 { 72 case 0: return UNSPEC_FCMLA; 73 case 90: return UNSPEC_FCMLA90; 74 case 180: return UNSPEC_FCMLA180; 75 case 270: return UNSPEC_FCMLA270; 76 default: gcc_unreachable (); 77 } 78 } 79 80 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */ 81 static int 82 unspec_cond_fcmla (int rot) 83 { 84 switch (rot) 85 { 86 case 0: return UNSPEC_COND_FCMLA; 87 case 90: return UNSPEC_COND_FCMLA90; 88 case 180: return UNSPEC_COND_FCMLA180; 89 case 270: return UNSPEC_COND_FCMLA270; 90 default: gcc_unreachable (); 91 } 92 } 93 94 /* Expand a call to svmad, or svmla after reordering its operands. 95 Make _m forms merge with argument MERGE_ARGNO. */ 96 static rtx 97 expand_mad (function_expander &e, 98 unsigned int merge_argno = DEFAULT_MERGE_ARGNO) 99 { 100 if (e.pred == PRED_x) 101 { 102 insn_code icode; 103 if (e.type_suffix (0).integer_p) 104 icode = code_for_aarch64_pred_fma (e.vector_mode (0)); 105 else 106 icode = code_for_aarch64_pred (UNSPEC_COND_FMLA, e.vector_mode (0)); 107 return e.use_pred_x_insn (icode); 108 } 109 110 insn_code icode = e.direct_optab_handler (cond_fma_optab); 111 return e.use_cond_insn (icode, merge_argno); 112 } 113 114 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec 115 UNSPEC. */ 116 static rtx 117 expand_mla_mls_lane (function_expander &e, int unspec) 118 { 119 /* Put the operands in the normal (fma ...) order, with the accumulator 120 last. This fits naturally since that's also the unprinted operand 121 in the asm output. */ 122 e.rotate_inputs_left (0, 4); 123 insn_code icode = code_for_aarch64_lane (unspec, e.vector_mode (0)); 124 return e.use_exact_insn (icode); 125 } 126 127 /* Expand a call to svmsb, or svmls after reordering its operands. 128 Make _m forms merge with argument MERGE_ARGNO. */ 129 static rtx 130 expand_msb (function_expander &e, 131 unsigned int merge_argno = DEFAULT_MERGE_ARGNO) 132 { 133 if (e.pred == PRED_x) 134 { 135 insn_code icode; 136 if (e.type_suffix (0).integer_p) 137 icode = code_for_aarch64_pred_fnma (e.vector_mode (0)); 138 else 139 icode = code_for_aarch64_pred (UNSPEC_COND_FMLS, e.vector_mode (0)); 140 return e.use_pred_x_insn (icode); 141 } 142 143 insn_code icode = e.direct_optab_handler (cond_fnma_optab); 144 return e.use_cond_insn (icode, merge_argno); 145 } 146 147 class svabd_impl : public function_base 148 { 149 public: 150 rtx 151 expand (function_expander &e) const OVERRIDE 152 { 153 /* The integer operations are represented as the subtraction of the 154 minimum from the maximum, with the signedness of the instruction 155 keyed off the signedness of the maximum operation. */ 156 rtx_code max_code = e.type_suffix (0).unsigned_p ? UMAX : SMAX; 157 insn_code icode; 158 if (e.pred == PRED_x) 159 { 160 if (e.type_suffix (0).integer_p) 161 icode = code_for_aarch64_pred_abd (max_code, e.vector_mode (0)); 162 else 163 icode = code_for_aarch64_pred_abd (e.vector_mode (0)); 164 return e.use_pred_x_insn (icode); 165 } 166 167 if (e.type_suffix (0).integer_p) 168 icode = code_for_aarch64_cond_abd (max_code, e.vector_mode (0)); 169 else 170 icode = code_for_aarch64_cond_abd (e.vector_mode (0)); 171 return e.use_cond_insn (icode); 172 } 173 }; 174 175 /* Implements svacge, svacgt, svacle and svaclt. */ 176 class svac_impl : public function_base 177 { 178 public: 179 CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {} 180 181 rtx 182 expand (function_expander &e) const OVERRIDE 183 { 184 e.add_ptrue_hint (0, e.gp_mode (0)); 185 insn_code icode = code_for_aarch64_pred_fac (m_unspec, e.vector_mode (0)); 186 return e.use_exact_insn (icode); 187 } 188 189 /* The unspec code for the underlying comparison. */ 190 int m_unspec; 191 }; 192 193 class svadda_impl : public function_base 194 { 195 public: 196 rtx 197 expand (function_expander &e) const OVERRIDE 198 { 199 /* Put the predicate last, as required by mask_fold_left_plus_optab. */ 200 e.rotate_inputs_left (0, 3); 201 machine_mode mode = e.vector_mode (0); 202 insn_code icode = direct_optab_handler (mask_fold_left_plus_optab, mode); 203 return e.use_exact_insn (icode); 204 } 205 }; 206 207 /* Implements svadr[bhwd]. */ 208 class svadr_bhwd_impl : public function_base 209 { 210 public: 211 CONSTEXPR svadr_bhwd_impl (unsigned int shift) : m_shift (shift) {} 212 213 rtx 214 expand (function_expander &e) const OVERRIDE 215 { 216 machine_mode mode = GET_MODE (e.args[0]); 217 if (m_shift == 0) 218 return e.use_exact_insn (code_for_aarch64_adr (mode)); 219 220 /* Turn the access size into an extra shift argument. */ 221 rtx shift = gen_int_mode (m_shift, GET_MODE_INNER (mode)); 222 e.args.quick_push (expand_vector_broadcast (mode, shift)); 223 return e.use_exact_insn (code_for_aarch64_adr_shift (mode)); 224 } 225 226 /* How many bits left to shift the vector displacement. */ 227 unsigned int m_shift; 228 }; 229 230 class svbic_impl : public function_base 231 { 232 public: 233 rtx 234 expand (function_expander &e) const OVERRIDE 235 { 236 /* Convert svbic of a constant into svand of its inverse. */ 237 if (CONST_INT_P (e.args[2])) 238 { 239 machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); 240 e.args[2] = simplify_unary_operation (NOT, mode, e.args[2], mode); 241 return e.map_to_rtx_codes (AND, AND, -1); 242 } 243 244 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) 245 { 246 gcc_assert (e.pred == PRED_z); 247 return e.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z); 248 } 249 250 if (e.pred == PRED_x) 251 return e.use_unpred_insn (code_for_aarch64_bic (e.vector_mode (0))); 252 253 return e.use_cond_insn (code_for_cond_bic (e.vector_mode (0))); 254 } 255 }; 256 257 /* Implements svbrkn, svbrkpa and svbrkpb. */ 258 class svbrk_binary_impl : public function_base 259 { 260 public: 261 CONSTEXPR svbrk_binary_impl (int unspec) : m_unspec (unspec) {} 262 263 rtx 264 expand (function_expander &e) const OVERRIDE 265 { 266 return e.use_exact_insn (code_for_aarch64_brk (m_unspec)); 267 } 268 269 /* The unspec code associated with the operation. */ 270 int m_unspec; 271 }; 272 273 /* Implements svbrka and svbrkb. */ 274 class svbrk_unary_impl : public function_base 275 { 276 public: 277 CONSTEXPR svbrk_unary_impl (int unspec) : m_unspec (unspec) {} 278 279 rtx 280 expand (function_expander &e) const OVERRIDE 281 { 282 return e.use_cond_insn (code_for_aarch64_brk (m_unspec)); 283 } 284 285 /* The unspec code associated with the operation. */ 286 int m_unspec; 287 }; 288 289 class svcadd_impl : public function_base 290 { 291 public: 292 rtx 293 expand (function_expander &e) const OVERRIDE 294 { 295 /* Convert the rotation amount into a specific unspec. */ 296 int rot = INTVAL (e.args.pop ()); 297 if (rot == 90) 298 return e.map_to_unspecs (UNSPEC_CADD90, UNSPEC_CADD90, 299 UNSPEC_COND_FCADD90); 300 if (rot == 270) 301 return e.map_to_unspecs (UNSPEC_CADD270, UNSPEC_CADD270, 302 UNSPEC_COND_FCADD270); 303 gcc_unreachable (); 304 } 305 }; 306 307 /* Implements svclasta and svclastb. */ 308 class svclast_impl : public quiet<function_base> 309 { 310 public: 311 CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {} 312 313 rtx 314 expand (function_expander &e) const OVERRIDE 315 { 316 /* Match the fold_extract_optab order. */ 317 std::swap (e.args[0], e.args[1]); 318 machine_mode mode = e.vector_mode (0); 319 insn_code icode; 320 if (e.mode_suffix_id == MODE_n) 321 icode = code_for_fold_extract (m_unspec, mode); 322 else 323 icode = code_for_aarch64_fold_extract_vector (m_unspec, mode); 324 return e.use_exact_insn (icode); 325 } 326 327 /* The unspec code associated with the operation. */ 328 int m_unspec; 329 }; 330 331 class svcmla_impl : public function_base 332 { 333 public: 334 rtx 335 expand (function_expander &e) const OVERRIDE 336 { 337 /* Convert the rotation amount into a specific unspec. */ 338 int rot = INTVAL (e.args.pop ()); 339 if (e.type_suffix (0).float_p) 340 { 341 /* Make the operand order the same as the one used by the fma optabs, 342 with the accumulator last. */ 343 e.rotate_inputs_left (1, 4); 344 return e.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot), 3); 345 } 346 else 347 { 348 int cmla = unspec_cmla (rot); 349 return e.map_to_unspecs (cmla, cmla, -1); 350 } 351 } 352 }; 353 354 class svcmla_lane_impl : public function_base 355 { 356 public: 357 rtx 358 expand (function_expander &e) const OVERRIDE 359 { 360 /* Convert the rotation amount into a specific unspec. */ 361 int rot = INTVAL (e.args.pop ()); 362 machine_mode mode = e.vector_mode (0); 363 if (e.type_suffix (0).float_p) 364 { 365 /* Make the operand order the same as the one used by the fma optabs, 366 with the accumulator last. */ 367 e.rotate_inputs_left (0, 4); 368 insn_code icode = code_for_aarch64_lane (unspec_fcmla (rot), mode); 369 return e.use_exact_insn (icode); 370 } 371 else 372 { 373 insn_code icode = code_for_aarch64_lane (unspec_cmla (rot), mode); 374 return e.use_exact_insn (icode); 375 } 376 } 377 }; 378 379 /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */ 380 class svcmp_impl : public function_base 381 { 382 public: 383 CONSTEXPR svcmp_impl (tree_code code, int unspec_for_fp) 384 : m_code (code), m_unspec_for_fp (unspec_for_fp) {} 385 386 gimple * 387 fold (gimple_folder &f) const OVERRIDE 388 { 389 tree pg = gimple_call_arg (f.call, 0); 390 tree rhs1 = gimple_call_arg (f.call, 1); 391 tree rhs2 = gimple_call_arg (f.call, 2); 392 393 /* Convert a ptrue-predicated integer comparison into the corresponding 394 gimple-level operation. */ 395 if (integer_all_onesp (pg) 396 && f.type_suffix (0).element_bytes == 1 397 && f.type_suffix (0).integer_p) 398 { 399 gimple_seq stmts = NULL; 400 rhs2 = f.force_vector (stmts, TREE_TYPE (rhs1), rhs2); 401 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 402 return gimple_build_assign (f.lhs, m_code, rhs1, rhs2); 403 } 404 405 return NULL; 406 } 407 408 rtx 409 expand (function_expander &e) const OVERRIDE 410 { 411 machine_mode mode = e.vector_mode (0); 412 413 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint 414 operand. */ 415 e.add_ptrue_hint (0, e.gp_mode (0)); 416 417 if (e.type_suffix (0).integer_p) 418 { 419 bool unsigned_p = e.type_suffix (0).unsigned_p; 420 rtx_code code = get_rtx_code (m_code, unsigned_p); 421 return e.use_exact_insn (code_for_aarch64_pred_cmp (code, mode)); 422 } 423 424 insn_code icode = code_for_aarch64_pred_fcm (m_unspec_for_fp, mode); 425 return e.use_exact_insn (icode); 426 } 427 428 /* The tree code associated with the comparison. */ 429 tree_code m_code; 430 431 /* The unspec code to use for floating-point comparisons. */ 432 int m_unspec_for_fp; 433 }; 434 435 /* Implements svcmp<cc>_wide. */ 436 class svcmp_wide_impl : public function_base 437 { 438 public: 439 CONSTEXPR svcmp_wide_impl (tree_code code, int unspec_for_sint, 440 int unspec_for_uint) 441 : m_code (code), m_unspec_for_sint (unspec_for_sint), 442 m_unspec_for_uint (unspec_for_uint) {} 443 444 rtx 445 expand (function_expander &e) const OVERRIDE 446 { 447 machine_mode mode = e.vector_mode (0); 448 bool unsigned_p = e.type_suffix (0).unsigned_p; 449 rtx_code code = get_rtx_code (m_code, unsigned_p); 450 451 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint 452 operand. */ 453 e.add_ptrue_hint (0, e.gp_mode (0)); 454 455 /* If the argument is a constant that the unwidened comparisons 456 can handle directly, use them instead. */ 457 insn_code icode = code_for_aarch64_pred_cmp (code, mode); 458 rtx op2 = unwrap_const_vec_duplicate (e.args[3]); 459 if (CONSTANT_P (op2) 460 && insn_data[icode].operand[4].predicate (op2, DImode)) 461 { 462 e.args[3] = op2; 463 return e.use_exact_insn (icode); 464 } 465 466 int unspec = (unsigned_p ? m_unspec_for_uint : m_unspec_for_sint); 467 return e.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec, mode)); 468 } 469 470 /* The tree code associated with the comparison. */ 471 tree_code m_code; 472 473 /* The unspec codes for signed and unsigned wide comparisons 474 respectively. */ 475 int m_unspec_for_sint; 476 int m_unspec_for_uint; 477 }; 478 479 class svcmpuo_impl : public quiet<function_base> 480 { 481 public: 482 rtx 483 expand (function_expander &e) const OVERRIDE 484 { 485 e.add_ptrue_hint (0, e.gp_mode (0)); 486 return e.use_exact_insn (code_for_aarch64_pred_fcmuo (e.vector_mode (0))); 487 } 488 }; 489 490 class svcnot_impl : public function_base 491 { 492 public: 493 rtx 494 expand (function_expander &e) const OVERRIDE 495 { 496 machine_mode mode = e.vector_mode (0); 497 if (e.pred == PRED_x) 498 { 499 /* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs 500 a ptrue hint. */ 501 e.add_ptrue_hint (0, e.gp_mode (0)); 502 return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode)); 503 } 504 505 return e.use_cond_insn (code_for_cond_cnot (mode), 0); 506 } 507 }; 508 509 /* Implements svcnt[bhwd], which count the number of elements 510 in a particular vector mode. */ 511 class svcnt_bhwd_impl : public function_base 512 { 513 public: 514 CONSTEXPR svcnt_bhwd_impl (machine_mode ref_mode) : m_ref_mode (ref_mode) {} 515 516 gimple * 517 fold (gimple_folder &f) const OVERRIDE 518 { 519 tree count = build_int_cstu (TREE_TYPE (f.lhs), 520 GET_MODE_NUNITS (m_ref_mode)); 521 return gimple_build_assign (f.lhs, count); 522 } 523 524 rtx 525 expand (function_expander &) const OVERRIDE 526 { 527 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode), DImode); 528 } 529 530 /* The mode of the vector associated with the [bhwd] suffix. */ 531 machine_mode m_ref_mode; 532 }; 533 534 /* Implements svcnt[bhwd]_pat. */ 535 class svcnt_bhwd_pat_impl : public svcnt_bhwd_impl 536 { 537 public: 538 CONSTEXPR svcnt_bhwd_pat_impl (machine_mode ref_mode) 539 : svcnt_bhwd_impl (ref_mode) {} 540 541 gimple * 542 fold (gimple_folder &f) const OVERRIDE 543 { 544 tree pattern_arg = gimple_call_arg (f.call, 0); 545 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); 546 547 if (pattern == AARCH64_SV_ALL) 548 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */ 549 return svcnt_bhwd_impl::fold (f); 550 551 /* See whether we can count the number of elements in the pattern 552 at compile time. */ 553 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); 554 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq); 555 if (value >= 0) 556 { 557 tree count = build_int_cstu (TREE_TYPE (f.lhs), value); 558 return gimple_build_assign (f.lhs, count); 559 } 560 561 return NULL; 562 } 563 564 rtx 565 expand (function_expander &e) const OVERRIDE 566 { 567 unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode); 568 e.args.quick_push (gen_int_mode (elements_per_vq, DImode)); 569 e.args.quick_push (const1_rtx); 570 return e.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat); 571 } 572 }; 573 574 class svcntp_impl : public function_base 575 { 576 public: 577 rtx 578 expand (function_expander &e) const OVERRIDE 579 { 580 machine_mode mode = e.vector_mode (0); 581 e.add_ptrue_hint (0, mode); 582 return e.use_exact_insn (code_for_aarch64_pred_cntp (mode)); 583 } 584 }; 585 586 /* Implements svcreate2, svcreate3 and svcreate4. */ 587 class svcreate_impl : public quiet<multi_vector_function> 588 { 589 public: 590 CONSTEXPR svcreate_impl (unsigned int vectors_per_tuple) 591 : quiet<multi_vector_function> (vectors_per_tuple) {} 592 593 gimple * 594 fold (gimple_folder &f) const OVERRIDE 595 { 596 unsigned int nargs = gimple_call_num_args (f.call); 597 tree lhs_type = TREE_TYPE (f.lhs); 598 599 /* Replace the call with a clobber of the result (to prevent it from 600 becoming upwards exposed) followed by stores into each individual 601 vector of tuple. 602 603 The fold routines expect the replacement statement to have the 604 same lhs as the original call, so return the clobber statement 605 rather than the final vector store. */ 606 gassign *clobber = gimple_build_assign (f.lhs, build_clobber (lhs_type)); 607 608 for (unsigned int i = nargs; i-- > 0; ) 609 { 610 tree rhs_vector = gimple_call_arg (f.call, i); 611 tree field = tuple_type_field (TREE_TYPE (f.lhs)); 612 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), 613 unshare_expr (f.lhs), field, NULL_TREE); 614 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), 615 lhs_array, size_int (i), 616 NULL_TREE, NULL_TREE); 617 gassign *assign = gimple_build_assign (lhs_vector, rhs_vector); 618 gsi_insert_after (f.gsi, assign, GSI_SAME_STMT); 619 } 620 return clobber; 621 } 622 623 rtx 624 expand (function_expander &e) const OVERRIDE 625 { 626 rtx lhs_tuple = e.get_nonoverlapping_reg_target (); 627 628 /* Record that LHS_TUPLE is dead before the first store. */ 629 emit_clobber (lhs_tuple); 630 for (unsigned int i = 0; i < e.args.length (); ++i) 631 { 632 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */ 633 rtx lhs_vector = simplify_gen_subreg (GET_MODE (e.args[i]), 634 lhs_tuple, GET_MODE (lhs_tuple), 635 i * BYTES_PER_SVE_VECTOR); 636 emit_move_insn (lhs_vector, e.args[i]); 637 } 638 return lhs_tuple; 639 } 640 }; 641 642 class svcvt_impl : public function_base 643 { 644 public: 645 rtx 646 expand (function_expander &e) const OVERRIDE 647 { 648 machine_mode mode0 = e.vector_mode (0); 649 machine_mode mode1 = e.vector_mode (1); 650 insn_code icode; 651 /* All this complication comes from the need to select four things 652 simultaneously: 653 654 (1) the kind of conversion (int<-float, float<-int, float<-float) 655 (2) signed vs. unsigned integers, where relevant 656 (3) the predication mode, which must be the wider of the predication 657 modes for MODE0 and MODE1 658 (4) the predication type (m, x or z) 659 660 The only supported int<->float conversions for which the integer is 661 narrower than the float are SI<->DF. It's therefore more convenient 662 to handle (3) by defining two patterns for int<->float conversions: 663 one in which the integer is at least as wide as the float and so 664 determines the predication mode, and another single SI<->DF pattern 665 in which the float's mode determines the predication mode (which is 666 always VNx2BI in that case). 667 668 The names of the patterns follow the optab convention of giving 669 the source mode before the destination mode. */ 670 if (e.type_suffix (1).integer_p) 671 { 672 int unspec = (e.type_suffix (1).unsigned_p 673 ? UNSPEC_COND_UCVTF 674 : UNSPEC_COND_SCVTF); 675 if (e.type_suffix (0).element_bytes <= e.type_suffix (1).element_bytes) 676 icode = (e.pred == PRED_x 677 ? code_for_aarch64_sve_nonextend (unspec, mode1, mode0) 678 : code_for_cond_nonextend (unspec, mode1, mode0)); 679 else 680 icode = (e.pred == PRED_x 681 ? code_for_aarch64_sve_extend (unspec, mode1, mode0) 682 : code_for_cond_extend (unspec, mode1, mode0)); 683 } 684 else 685 { 686 int unspec = (!e.type_suffix (0).integer_p ? UNSPEC_COND_FCVT 687 : e.type_suffix (0).unsigned_p ? UNSPEC_COND_FCVTZU 688 : UNSPEC_COND_FCVTZS); 689 if (e.type_suffix (0).element_bytes >= e.type_suffix (1).element_bytes) 690 icode = (e.pred == PRED_x 691 ? code_for_aarch64_sve_nontrunc (unspec, mode1, mode0) 692 : code_for_cond_nontrunc (unspec, mode1, mode0)); 693 else 694 icode = (e.pred == PRED_x 695 ? code_for_aarch64_sve_trunc (unspec, mode1, mode0) 696 : code_for_cond_trunc (unspec, mode1, mode0)); 697 } 698 699 if (e.pred == PRED_x) 700 return e.use_pred_x_insn (icode); 701 return e.use_cond_insn (icode); 702 } 703 }; 704 705 class svdot_impl : public function_base 706 { 707 public: 708 rtx 709 expand (function_expander &e) const OVERRIDE 710 { 711 /* In the optab, the multiplication operands come before the accumulator 712 operand. The optab is keyed off the multiplication mode. */ 713 e.rotate_inputs_left (0, 3); 714 insn_code icode 715 = e.direct_optab_handler_for_sign (sdot_prod_optab, udot_prod_optab, 716 0, GET_MODE (e.args[0])); 717 return e.use_unpred_insn (icode); 718 } 719 }; 720 721 class svdotprod_lane_impl : public unspec_based_function_base 722 { 723 public: 724 CONSTEXPR svdotprod_lane_impl (int unspec_for_sint, 725 int unspec_for_uint, 726 int unspec_for_float) 727 : unspec_based_function_base (unspec_for_sint, 728 unspec_for_uint, 729 unspec_for_float) {} 730 731 rtx 732 expand (function_expander &e) const OVERRIDE 733 { 734 /* Use the same ordering as the dot_prod_optab, with the 735 accumulator last. */ 736 e.rotate_inputs_left (0, 4); 737 int unspec = unspec_for (e); 738 machine_mode mode = e.vector_mode (0); 739 return e.use_exact_insn (code_for_aarch64_dot_prod_lane (unspec, mode)); 740 } 741 }; 742 743 class svdup_impl : public quiet<function_base> 744 { 745 public: 746 gimple * 747 fold (gimple_folder &f) const OVERRIDE 748 { 749 tree vec_type = TREE_TYPE (f.lhs); 750 tree rhs = gimple_call_arg (f.call, f.pred == PRED_none ? 0 : 1); 751 752 if (f.pred == PRED_none || f.pred == PRED_x) 753 { 754 if (CONSTANT_CLASS_P (rhs)) 755 { 756 if (f.type_suffix (0).bool_p) 757 return (tree_to_shwi (rhs) 758 ? f.fold_to_ptrue () 759 : f.fold_to_pfalse ()); 760 761 tree rhs_vector = build_vector_from_val (vec_type, rhs); 762 return gimple_build_assign (f.lhs, rhs_vector); 763 } 764 765 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we 766 would need to introduce an extra and unwanted conversion to 767 the truth vector element type. */ 768 if (!f.type_suffix (0).bool_p) 769 return gimple_build_assign (f.lhs, VEC_DUPLICATE_EXPR, rhs); 770 } 771 772 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */ 773 if (f.pred == PRED_z) 774 { 775 gimple_seq stmts = NULL; 776 tree pred = f.convert_pred (stmts, vec_type, 0); 777 rhs = f.force_vector (stmts, vec_type, rhs); 778 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 779 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, rhs, 780 build_zero_cst (vec_type)); 781 } 782 783 return NULL; 784 } 785 786 rtx 787 expand (function_expander &e) const OVERRIDE 788 { 789 if (e.pred == PRED_none || e.pred == PRED_x) 790 /* There's no benefit to using predicated instructions for _x here. */ 791 return e.use_unpred_insn (e.direct_optab_handler (vec_duplicate_optab)); 792 793 /* Model predicated svdups as a SEL in which the "true" value is 794 the duplicate of the function argument and the "false" value 795 is the value of inactive lanes. */ 796 insn_code icode; 797 machine_mode mode = e.vector_mode (0); 798 if (valid_for_const_vector_p (GET_MODE_INNER (mode), e.args.last ())) 799 /* Duplicate the constant to fill a vector. The pattern optimizes 800 various cases involving constant operands, falling back to SEL 801 if necessary. */ 802 icode = code_for_vcond_mask (mode, mode); 803 else 804 /* Use the pattern for selecting between a duplicated scalar 805 variable and a vector fallback. */ 806 icode = code_for_aarch64_sel_dup (mode); 807 return e.use_vcond_mask_insn (icode); 808 } 809 }; 810 811 class svdup_lane_impl : public quiet<function_base> 812 { 813 public: 814 rtx 815 expand (function_expander &e) const OVERRIDE 816 { 817 /* The native DUP lane has an index range of 64 bytes. */ 818 machine_mode mode = e.vector_mode (0); 819 if (CONST_INT_P (e.args[1]) 820 && IN_RANGE (INTVAL (e.args[1]) * GET_MODE_UNIT_SIZE (mode), 0, 63)) 821 return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode)); 822 823 /* Treat svdup_lane as if it were svtbl_n. */ 824 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); 825 } 826 }; 827 828 class svdupq_impl : public quiet<function_base> 829 { 830 public: 831 gimple * 832 fold (gimple_folder &f) const OVERRIDE 833 { 834 tree vec_type = TREE_TYPE (f.lhs); 835 unsigned int nargs = gimple_call_num_args (f.call); 836 /* For predicates, pad out each argument so that we have one element 837 per bit. */ 838 unsigned int factor = (f.type_suffix (0).bool_p 839 ? f.type_suffix (0).element_bytes : 1); 840 tree_vector_builder builder (vec_type, nargs * factor, 1); 841 for (unsigned int i = 0; i < nargs; ++i) 842 { 843 tree elt = gimple_call_arg (f.call, i); 844 if (!CONSTANT_CLASS_P (elt)) 845 return NULL; 846 builder.quick_push (elt); 847 for (unsigned int j = 1; j < factor; ++j) 848 builder.quick_push (build_zero_cst (TREE_TYPE (vec_type))); 849 } 850 return gimple_build_assign (f.lhs, builder.build ()); 851 } 852 853 rtx 854 expand (function_expander &e) const OVERRIDE 855 { 856 machine_mode mode = e.vector_mode (0); 857 unsigned int elements_per_vq = e.args.length (); 858 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) 859 { 860 /* Construct a vector of integers so that we can compare them against 861 zero below. Zero vs. nonzero is the only distinction that 862 matters. */ 863 mode = aarch64_sve_int_mode (mode); 864 for (unsigned int i = 0; i < elements_per_vq; ++i) 865 e.args[i] = simplify_gen_unary (ZERO_EXTEND, GET_MODE_INNER (mode), 866 e.args[i], QImode); 867 } 868 869 /* Get the 128-bit Advanced SIMD vector for this data size. */ 870 scalar_mode element_mode = GET_MODE_INNER (mode); 871 machine_mode vq_mode = aarch64_vq_mode (element_mode).require (); 872 gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode))); 873 874 /* Put the arguments into a 128-bit Advanced SIMD vector. We want 875 argument N to go into architectural lane N, whereas Advanced SIMD 876 vectors are loaded memory lsb to register lsb. We therefore need 877 to reverse the elements for big-endian targets. */ 878 rtx vq_reg = gen_reg_rtx (vq_mode); 879 rtvec vec = rtvec_alloc (elements_per_vq); 880 for (unsigned int i = 0; i < elements_per_vq; ++i) 881 { 882 unsigned int argno = BYTES_BIG_ENDIAN ? elements_per_vq - i - 1 : i; 883 RTVEC_ELT (vec, i) = e.args[argno]; 884 } 885 aarch64_expand_vector_init (vq_reg, gen_rtx_PARALLEL (vq_mode, vec)); 886 887 /* If the result is a boolean, compare the data vector against zero. */ 888 if (mode != e.vector_mode (0)) 889 { 890 rtx data_dupq = aarch64_expand_sve_dupq (NULL, mode, vq_reg); 891 return aarch64_convert_sve_data_to_pred (e.possible_target, 892 e.vector_mode (0), data_dupq); 893 } 894 895 return aarch64_expand_sve_dupq (e.possible_target, mode, vq_reg); 896 } 897 }; 898 899 class svdupq_lane_impl : public quiet<function_base> 900 { 901 public: 902 rtx 903 expand (function_expander &e) const OVERRIDE 904 { 905 machine_mode mode = e.vector_mode (0); 906 rtx index = e.args[1]; 907 if (CONST_INT_P (index) && IN_RANGE (INTVAL (index), 0, 3)) 908 { 909 /* Use the .Q form of DUP, which is the native instruction for 910 this function. */ 911 insn_code icode = code_for_aarch64_sve_dupq_lane (mode); 912 unsigned int num_indices = e.elements_per_vq (0); 913 rtx indices = aarch64_gen_stepped_int_parallel 914 (num_indices, INTVAL (index) * num_indices, 1); 915 916 e.add_output_operand (icode); 917 e.add_input_operand (icode, e.args[0]); 918 e.add_fixed_operand (indices); 919 return e.generate_insn (icode); 920 } 921 922 /* Build a .D TBL index for the pairs of doublewords that we want to 923 duplicate. */ 924 if (CONST_INT_P (index)) 925 { 926 /* The index vector is a constant. */ 927 rtx_vector_builder builder (VNx2DImode, 2, 1); 928 builder.quick_push (gen_int_mode (INTVAL (index) * 2, DImode)); 929 builder.quick_push (gen_int_mode (INTVAL (index) * 2 + 1, DImode)); 930 index = builder.build (); 931 } 932 else 933 { 934 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec 935 explicitly allows the top of the index to be dropped. */ 936 index = force_reg (DImode, simplify_gen_binary (ASHIFT, DImode, 937 index, const1_rtx)); 938 index = expand_vector_broadcast (VNx2DImode, index); 939 940 /* Get an alternating 0, 1 predicate. */ 941 rtx_vector_builder builder (VNx2BImode, 2, 1); 942 builder.quick_push (const0_rtx); 943 builder.quick_push (constm1_rtx); 944 rtx pg = force_reg (VNx2BImode, builder.build ()); 945 946 /* Add one to the odd elements of the index. */ 947 rtx one = force_reg (VNx2DImode, CONST1_RTX (VNx2DImode)); 948 rtx target = gen_reg_rtx (VNx2DImode); 949 emit_insn (gen_cond_addvnx2di (target, pg, index, one, index)); 950 index = target; 951 } 952 953 e.args[0] = gen_lowpart (VNx2DImode, e.args[0]); 954 e.args[1] = index; 955 return e.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di); 956 } 957 }; 958 959 /* Implements svextb, svexth and svextw. */ 960 class svext_bhw_impl : public function_base 961 { 962 public: 963 CONSTEXPR svext_bhw_impl (scalar_int_mode from_mode) 964 : m_from_mode (from_mode) {} 965 966 rtx 967 expand (function_expander &e) const OVERRIDE 968 { 969 if (e.type_suffix (0).unsigned_p) 970 { 971 /* Convert to an AND. The widest we go is 0xffffffff, which fits 972 in a CONST_INT. */ 973 e.args.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode))); 974 if (e.pred == PRED_m) 975 /* We now have arguments "(inactive, pg, op, mask)". Convert this 976 to "(pg, op, mask, inactive)" so that the order matches svand_m 977 with an extra argument on the end. Take the inactive elements 978 from this extra argument. */ 979 e.rotate_inputs_left (0, 4); 980 return e.map_to_rtx_codes (AND, AND, -1, 3); 981 } 982 983 machine_mode wide_mode = e.vector_mode (0); 984 poly_uint64 nunits = GET_MODE_NUNITS (wide_mode); 985 machine_mode narrow_mode 986 = aarch64_sve_data_mode (m_from_mode, nunits).require (); 987 if (e.pred == PRED_x) 988 { 989 insn_code icode = code_for_aarch64_pred_sxt (wide_mode, narrow_mode); 990 return e.use_pred_x_insn (icode); 991 } 992 993 insn_code icode = code_for_aarch64_cond_sxt (wide_mode, narrow_mode); 994 return e.use_cond_insn (icode); 995 } 996 997 /* The element mode that we're extending from. */ 998 scalar_int_mode m_from_mode; 999 }; 1000 1001 /* Implements svget2, svget3 and svget4. */ 1002 class svget_impl : public quiet<multi_vector_function> 1003 { 1004 public: 1005 CONSTEXPR svget_impl (unsigned int vectors_per_tuple) 1006 : quiet<multi_vector_function> (vectors_per_tuple) {} 1007 1008 gimple * 1009 fold (gimple_folder &f) const OVERRIDE 1010 { 1011 /* Fold into a normal gimple component access. */ 1012 tree rhs_tuple = gimple_call_arg (f.call, 0); 1013 tree index = gimple_call_arg (f.call, 1); 1014 tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); 1015 tree rhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), 1016 rhs_tuple, field, NULL_TREE); 1017 tree rhs_vector = build4 (ARRAY_REF, TREE_TYPE (f.lhs), 1018 rhs_array, index, NULL_TREE, NULL_TREE); 1019 return gimple_build_assign (f.lhs, rhs_vector); 1020 } 1021 1022 rtx 1023 expand (function_expander &e) const OVERRIDE 1024 { 1025 /* Fold the access into a subreg rvalue. */ 1026 return simplify_gen_subreg (e.vector_mode (0), e.args[0], 1027 GET_MODE (e.args[0]), 1028 INTVAL (e.args[1]) * BYTES_PER_SVE_VECTOR); 1029 } 1030 }; 1031 1032 class svindex_impl : public function_base 1033 { 1034 public: 1035 rtx 1036 expand (function_expander &e) const OVERRIDE 1037 { 1038 return e.use_exact_insn (e.direct_optab_handler (vec_series_optab)); 1039 } 1040 }; 1041 1042 class svinsr_impl : public quiet<function_base> 1043 { 1044 public: 1045 gimple * 1046 fold (gimple_folder &f) const OVERRIDE 1047 { 1048 gcall *new_call = gimple_build_call_internal (IFN_VEC_SHL_INSERT, 2, 1049 gimple_call_arg (f.call, 0), 1050 gimple_call_arg (f.call, 1)); 1051 gimple_call_set_lhs (new_call, f.lhs); 1052 return new_call; 1053 } 1054 1055 rtx 1056 expand (function_expander &e) const OVERRIDE 1057 { 1058 insn_code icode = direct_optab_handler (vec_shl_insert_optab, 1059 e.vector_mode (0)); 1060 return e.use_exact_insn (icode); 1061 } 1062 }; 1063 1064 /* Implements svlasta and svlastb. */ 1065 class svlast_impl : public quiet<function_base> 1066 { 1067 public: 1068 CONSTEXPR svlast_impl (int unspec) : m_unspec (unspec) {} 1069 1070 rtx 1071 expand (function_expander &e) const OVERRIDE 1072 { 1073 return e.use_exact_insn (code_for_extract (m_unspec, e.vector_mode (0))); 1074 } 1075 1076 /* The unspec code associated with the operation. */ 1077 int m_unspec; 1078 }; 1079 1080 class svld1_impl : public full_width_access 1081 { 1082 public: 1083 unsigned int 1084 call_properties (const function_instance &) const OVERRIDE 1085 { 1086 return CP_READ_MEMORY; 1087 } 1088 1089 gimple * 1090 fold (gimple_folder &f) const OVERRIDE 1091 { 1092 tree vectype = f.vector_type (0); 1093 1094 /* Get the predicate and base pointer. */ 1095 gimple_seq stmts = NULL; 1096 tree pred = f.convert_pred (stmts, vectype, 0); 1097 tree base = f.fold_contiguous_base (stmts, vectype); 1098 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 1099 1100 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); 1101 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3, 1102 base, cookie, pred); 1103 gimple_call_set_lhs (new_call, f.lhs); 1104 return new_call; 1105 } 1106 1107 rtx 1108 expand (function_expander &e) const OVERRIDE 1109 { 1110 insn_code icode = convert_optab_handler (maskload_optab, 1111 e.vector_mode (0), e.gp_mode (0)); 1112 return e.use_contiguous_load_insn (icode); 1113 } 1114 }; 1115 1116 /* Implements extending contiguous forms of svld1. */ 1117 class svld1_extend_impl : public extending_load 1118 { 1119 public: 1120 CONSTEXPR svld1_extend_impl (type_suffix_index memory_type) 1121 : extending_load (memory_type) {} 1122 1123 rtx 1124 expand (function_expander &e) const OVERRIDE 1125 { 1126 insn_code icode = code_for_aarch64_load (extend_rtx_code (), 1127 e.vector_mode (0), 1128 e.memory_vector_mode ()); 1129 return e.use_contiguous_load_insn (icode); 1130 } 1131 }; 1132 1133 class svld1_gather_impl : public full_width_access 1134 { 1135 public: 1136 unsigned int 1137 call_properties (const function_instance &) const OVERRIDE 1138 { 1139 return CP_READ_MEMORY; 1140 } 1141 1142 rtx 1143 expand (function_expander &e) const OVERRIDE 1144 { 1145 e.prepare_gather_address_operands (1); 1146 /* Put the predicate last, as required by mask_gather_load_optab. */ 1147 e.rotate_inputs_left (0, 5); 1148 machine_mode mem_mode = e.memory_vector_mode (); 1149 machine_mode int_mode = aarch64_sve_int_mode (mem_mode); 1150 insn_code icode = convert_optab_handler (mask_gather_load_optab, 1151 mem_mode, int_mode); 1152 return e.use_exact_insn (icode); 1153 } 1154 }; 1155 1156 /* Implements extending forms of svld1_gather. */ 1157 class svld1_gather_extend_impl : public extending_load 1158 { 1159 public: 1160 CONSTEXPR svld1_gather_extend_impl (type_suffix_index memory_type) 1161 : extending_load (memory_type) {} 1162 1163 rtx 1164 expand (function_expander &e) const OVERRIDE 1165 { 1166 e.prepare_gather_address_operands (1); 1167 /* Put the predicate last, since the extending gathers use the same 1168 operand order as mask_gather_load_optab. */ 1169 e.rotate_inputs_left (0, 5); 1170 /* Add a constant predicate for the extension rtx. */ 1171 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); 1172 insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (), 1173 e.vector_mode (0), 1174 e.memory_vector_mode ()); 1175 return e.use_exact_insn (icode); 1176 } 1177 }; 1178 1179 class load_replicate : public function_base 1180 { 1181 public: 1182 unsigned int 1183 call_properties (const function_instance &) const OVERRIDE 1184 { 1185 return CP_READ_MEMORY; 1186 } 1187 1188 tree 1189 memory_scalar_type (const function_instance &fi) const OVERRIDE 1190 { 1191 return fi.scalar_type (0); 1192 } 1193 }; 1194 1195 class svld1rq_impl : public load_replicate 1196 { 1197 public: 1198 machine_mode 1199 memory_vector_mode (const function_instance &fi) const OVERRIDE 1200 { 1201 return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require (); 1202 } 1203 1204 rtx 1205 expand (function_expander &e) const OVERRIDE 1206 { 1207 insn_code icode = code_for_aarch64_sve_ld1rq (e.vector_mode (0)); 1208 return e.use_contiguous_load_insn (icode); 1209 } 1210 }; 1211 1212 class svld1ro_impl : public load_replicate 1213 { 1214 public: 1215 machine_mode 1216 memory_vector_mode (const function_instance &) const OVERRIDE 1217 { 1218 return OImode; 1219 } 1220 1221 rtx 1222 expand (function_expander &e) const OVERRIDE 1223 { 1224 insn_code icode = code_for_aarch64_sve_ld1ro (e.vector_mode (0)); 1225 return e.use_contiguous_load_insn (icode); 1226 } 1227 }; 1228 1229 /* Implements svld2, svld3 and svld4. */ 1230 class svld234_impl : public full_width_access 1231 { 1232 public: 1233 CONSTEXPR svld234_impl (unsigned int vectors_per_tuple) 1234 : full_width_access (vectors_per_tuple) {} 1235 1236 unsigned int 1237 call_properties (const function_instance &) const OVERRIDE 1238 { 1239 return CP_READ_MEMORY; 1240 } 1241 1242 gimple * 1243 fold (gimple_folder &f) const OVERRIDE 1244 { 1245 tree tuple_type = TREE_TYPE (f.lhs); 1246 tree vectype = f.vector_type (0); 1247 1248 /* Get the predicate and base pointer. */ 1249 gimple_seq stmts = NULL; 1250 tree pred = f.convert_pred (stmts, vectype, 0); 1251 tree base = f.fold_contiguous_base (stmts, vectype); 1252 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 1253 1254 /* Emit two statements: a clobber of the lhs, so that it isn't 1255 upwards exposed, and then the load itself. 1256 1257 The fold routines expect the replacement statement to have the 1258 same lhs as the original call, so return the clobber statement 1259 rather than the load. */ 1260 gimple *clobber = gimple_build_assign (f.lhs, build_clobber (tuple_type)); 1261 1262 /* View the loaded data as an array of vectors. */ 1263 tree field = tuple_type_field (tuple_type); 1264 tree lhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), 1265 unshare_expr (f.lhs)); 1266 1267 /* Emit the load itself. */ 1268 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); 1269 gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3, 1270 base, cookie, pred); 1271 gimple_call_set_lhs (new_call, lhs_array); 1272 gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT); 1273 1274 return clobber; 1275 } 1276 1277 rtx 1278 expand (function_expander &e) const OVERRIDE 1279 { 1280 machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr)); 1281 insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab, 1282 tuple_mode, e.vector_mode (0)); 1283 return e.use_contiguous_load_insn (icode); 1284 } 1285 }; 1286 1287 class svldff1_gather_impl : public full_width_access 1288 { 1289 public: 1290 unsigned int 1291 call_properties (const function_instance &) const OVERRIDE 1292 { 1293 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; 1294 } 1295 1296 rtx 1297 expand (function_expander &e) const OVERRIDE 1298 { 1299 /* See the block comment in aarch64-sve.md for details about the 1300 FFR handling. */ 1301 emit_insn (gen_aarch64_update_ffr_for_load ()); 1302 1303 e.prepare_gather_address_operands (1); 1304 /* Put the predicate last, since ldff1_gather uses the same operand 1305 order as mask_gather_load_optab. */ 1306 e.rotate_inputs_left (0, 5); 1307 machine_mode mem_mode = e.memory_vector_mode (); 1308 return e.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode)); 1309 } 1310 }; 1311 1312 /* Implements extending forms of svldff1_gather. */ 1313 class svldff1_gather_extend : public extending_load 1314 { 1315 public: 1316 CONSTEXPR svldff1_gather_extend (type_suffix_index memory_type) 1317 : extending_load (memory_type) {} 1318 1319 rtx 1320 expand (function_expander &e) const OVERRIDE 1321 { 1322 /* See the block comment in aarch64-sve.md for details about the 1323 FFR handling. */ 1324 emit_insn (gen_aarch64_update_ffr_for_load ()); 1325 1326 e.prepare_gather_address_operands (1); 1327 /* Put the predicate last, since ldff1_gather uses the same operand 1328 order as mask_gather_load_optab. */ 1329 e.rotate_inputs_left (0, 5); 1330 /* Add a constant predicate for the extension rtx. */ 1331 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); 1332 insn_code icode = code_for_aarch64_ldff1_gather (extend_rtx_code (), 1333 e.vector_mode (0), 1334 e.memory_vector_mode ()); 1335 return e.use_exact_insn (icode); 1336 } 1337 }; 1338 1339 class svldnt1_impl : public full_width_access 1340 { 1341 public: 1342 unsigned int 1343 call_properties (const function_instance &) const OVERRIDE 1344 { 1345 return CP_READ_MEMORY; 1346 } 1347 1348 rtx 1349 expand (function_expander &e) const OVERRIDE 1350 { 1351 insn_code icode = code_for_aarch64_ldnt1 (e.vector_mode (0)); 1352 return e.use_contiguous_load_insn (icode); 1353 } 1354 }; 1355 1356 /* Implements svldff1 and svldnf1. */ 1357 class svldxf1_impl : public full_width_access 1358 { 1359 public: 1360 CONSTEXPR svldxf1_impl (int unspec) : m_unspec (unspec) {} 1361 1362 unsigned int 1363 call_properties (const function_instance &) const OVERRIDE 1364 { 1365 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; 1366 } 1367 1368 rtx 1369 expand (function_expander &e) const OVERRIDE 1370 { 1371 /* See the block comment in aarch64-sve.md for details about the 1372 FFR handling. */ 1373 emit_insn (gen_aarch64_update_ffr_for_load ()); 1374 1375 machine_mode mode = e.vector_mode (0); 1376 return e.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec, mode)); 1377 } 1378 1379 /* The unspec associated with the load. */ 1380 int m_unspec; 1381 }; 1382 1383 /* Implements extending contiguous forms of svldff1 and svldnf1. */ 1384 class svldxf1_extend_impl : public extending_load 1385 { 1386 public: 1387 CONSTEXPR svldxf1_extend_impl (type_suffix_index memory_type, int unspec) 1388 : extending_load (memory_type), m_unspec (unspec) {} 1389 1390 unsigned int 1391 call_properties (const function_instance &) const OVERRIDE 1392 { 1393 return CP_READ_MEMORY | CP_READ_FFR | CP_WRITE_FFR; 1394 } 1395 1396 rtx 1397 expand (function_expander &e) const OVERRIDE 1398 { 1399 /* See the block comment in aarch64-sve.md for details about the 1400 FFR handling. */ 1401 emit_insn (gen_aarch64_update_ffr_for_load ()); 1402 1403 insn_code icode = code_for_aarch64_ldf1 (m_unspec, extend_rtx_code (), 1404 e.vector_mode (0), 1405 e.memory_vector_mode ()); 1406 return e.use_contiguous_load_insn (icode); 1407 } 1408 1409 /* The unspec associated with the load. */ 1410 int m_unspec; 1411 }; 1412 1413 class svlen_impl : public quiet<function_base> 1414 { 1415 public: 1416 gimple * 1417 fold (gimple_folder &f) const OVERRIDE 1418 { 1419 /* The argument only exists for its type. */ 1420 tree rhs_type = TREE_TYPE (gimple_call_arg (f.call, 0)); 1421 tree count = build_int_cstu (TREE_TYPE (f.lhs), 1422 TYPE_VECTOR_SUBPARTS (rhs_type)); 1423 return gimple_build_assign (f.lhs, count); 1424 } 1425 1426 rtx 1427 expand (function_expander &e) const OVERRIDE 1428 { 1429 /* The argument only exists for its type. */ 1430 return gen_int_mode (GET_MODE_NUNITS (e.vector_mode (0)), DImode); 1431 } 1432 }; 1433 1434 class svmad_impl : public function_base 1435 { 1436 public: 1437 rtx 1438 expand (function_expander &e) const OVERRIDE 1439 { 1440 return expand_mad (e); 1441 } 1442 }; 1443 1444 class svmla_impl : public function_base 1445 { 1446 public: 1447 rtx 1448 expand (function_expander &e) const OVERRIDE 1449 { 1450 /* Put the accumulator at the end (argument 3), but keep it as the 1451 merge input for _m functions. */ 1452 e.rotate_inputs_left (1, 4); 1453 return expand_mad (e, 3); 1454 } 1455 }; 1456 1457 class svmla_lane_impl : public function_base 1458 { 1459 public: 1460 rtx 1461 expand (function_expander &e) const OVERRIDE 1462 { 1463 if (e.type_suffix (0).integer_p) 1464 { 1465 machine_mode mode = e.vector_mode (0); 1466 return e.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode)); 1467 } 1468 return expand_mla_mls_lane (e, UNSPEC_FMLA); 1469 } 1470 }; 1471 1472 class svmls_impl : public function_base 1473 { 1474 public: 1475 rtx 1476 expand (function_expander &e) const OVERRIDE 1477 { 1478 /* Put the accumulator at the end (argument 3), but keep it as the 1479 merge input for _m functions. */ 1480 e.rotate_inputs_left (1, 4); 1481 return expand_msb (e, 3); 1482 } 1483 }; 1484 1485 class svmov_impl : public function_base 1486 { 1487 public: 1488 gimple * 1489 fold (gimple_folder &f) const OVERRIDE 1490 { 1491 return gimple_build_assign (f.lhs, BIT_AND_EXPR, 1492 gimple_call_arg (f.call, 0), 1493 gimple_call_arg (f.call, 1)); 1494 } 1495 1496 rtx 1497 expand (function_expander &e) const OVERRIDE 1498 { 1499 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B" 1500 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */ 1501 gcc_assert (e.pred == PRED_z); 1502 e.args.quick_push (e.args[1]); 1503 return e.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z); 1504 } 1505 }; 1506 1507 class svmls_lane_impl : public function_base 1508 { 1509 public: 1510 rtx 1511 expand (function_expander &e) const OVERRIDE 1512 { 1513 if (e.type_suffix (0).integer_p) 1514 { 1515 machine_mode mode = e.vector_mode (0); 1516 return e.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode)); 1517 } 1518 return expand_mla_mls_lane (e, UNSPEC_FMLS); 1519 } 1520 }; 1521 1522 class svmmla_impl : public function_base 1523 { 1524 public: 1525 rtx 1526 expand (function_expander &e) const OVERRIDE 1527 { 1528 insn_code icode; 1529 if (e.type_suffix (0).integer_p) 1530 { 1531 if (e.type_suffix (0).unsigned_p) 1532 icode = code_for_aarch64_sve_add (UNSPEC_UMATMUL, e.vector_mode (0)); 1533 else 1534 icode = code_for_aarch64_sve_add (UNSPEC_SMATMUL, e.vector_mode (0)); 1535 } 1536 else 1537 icode = code_for_aarch64_sve (UNSPEC_FMMLA, e.vector_mode (0)); 1538 return e.use_exact_insn (icode); 1539 } 1540 }; 1541 1542 class svmsb_impl : public function_base 1543 { 1544 public: 1545 rtx 1546 expand (function_expander &e) const OVERRIDE 1547 { 1548 return expand_msb (e); 1549 } 1550 }; 1551 1552 class svnand_impl : public function_base 1553 { 1554 public: 1555 rtx 1556 expand (function_expander &e) const OVERRIDE 1557 { 1558 gcc_assert (e.pred == PRED_z); 1559 return e.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z); 1560 } 1561 }; 1562 1563 class svnor_impl : public function_base 1564 { 1565 public: 1566 rtx 1567 expand (function_expander &e) const OVERRIDE 1568 { 1569 gcc_assert (e.pred == PRED_z); 1570 return e.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z); 1571 } 1572 }; 1573 1574 class svnot_impl : public rtx_code_function 1575 { 1576 public: 1577 CONSTEXPR svnot_impl () : rtx_code_function (NOT, NOT, -1) {} 1578 1579 rtx 1580 expand (function_expander &e) const OVERRIDE 1581 { 1582 if (e.type_suffix_ids[0] == TYPE_SUFFIX_b) 1583 { 1584 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B" 1585 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */ 1586 gcc_assert (e.pred == PRED_z); 1587 e.args.quick_insert (1, e.args[0]); 1588 return e.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z); 1589 } 1590 return rtx_code_function::expand (e); 1591 } 1592 }; 1593 1594 class svorn_impl : public function_base 1595 { 1596 public: 1597 rtx 1598 expand (function_expander &e) const OVERRIDE 1599 { 1600 gcc_assert (e.pred == PRED_z); 1601 return e.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z); 1602 } 1603 }; 1604 1605 class svpfalse_impl : public function_base 1606 { 1607 public: 1608 gimple * 1609 fold (gimple_folder &f) const OVERRIDE 1610 { 1611 return f.fold_to_pfalse (); 1612 } 1613 1614 rtx 1615 expand (function_expander &) const OVERRIDE 1616 { 1617 return CONST0_RTX (VNx16BImode); 1618 } 1619 }; 1620 1621 /* Implements svpfirst and svpnext, which share the same .md patterns. */ 1622 class svpfirst_svpnext_impl : public function_base 1623 { 1624 public: 1625 CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {} 1626 1627 rtx 1628 expand (function_expander &e) const OVERRIDE 1629 { 1630 machine_mode mode = e.vector_mode (0); 1631 e.add_ptrue_hint (0, mode); 1632 return e.use_exact_insn (code_for_aarch64_sve (m_unspec, mode)); 1633 } 1634 1635 /* The unspec associated with the operation. */ 1636 int m_unspec; 1637 }; 1638 1639 /* Implements contiguous forms of svprf[bhwd]. */ 1640 class svprf_bhwd_impl : public function_base 1641 { 1642 public: 1643 CONSTEXPR svprf_bhwd_impl (machine_mode mode) : m_mode (mode) {} 1644 1645 unsigned int 1646 call_properties (const function_instance &) const OVERRIDE 1647 { 1648 return CP_PREFETCH_MEMORY; 1649 } 1650 1651 rtx 1652 expand (function_expander &e) const OVERRIDE 1653 { 1654 e.prepare_prefetch_operands (); 1655 insn_code icode = code_for_aarch64_sve_prefetch (m_mode); 1656 return e.use_contiguous_prefetch_insn (icode); 1657 } 1658 1659 /* The mode that we'd use to hold one vector of prefetched data. */ 1660 machine_mode m_mode; 1661 }; 1662 1663 /* Implements svprf[bhwd]_gather. */ 1664 class svprf_bhwd_gather_impl : public function_base 1665 { 1666 public: 1667 CONSTEXPR svprf_bhwd_gather_impl (machine_mode mode) : m_mode (mode) {} 1668 1669 unsigned int 1670 call_properties (const function_instance &) const OVERRIDE 1671 { 1672 return CP_PREFETCH_MEMORY; 1673 } 1674 1675 machine_mode 1676 memory_vector_mode (const function_instance &) const OVERRIDE 1677 { 1678 return m_mode; 1679 } 1680 1681 rtx 1682 expand (function_expander &e) const OVERRIDE 1683 { 1684 e.prepare_prefetch_operands (); 1685 e.prepare_gather_address_operands (1); 1686 1687 /* Insert a zero operand to identify the mode of the memory being 1688 accessed. This goes between the gather operands and prefetch 1689 operands created above. */ 1690 e.args.quick_insert (5, CONST0_RTX (m_mode)); 1691 1692 machine_mode reg_mode = GET_MODE (e.args[2]); 1693 insn_code icode = code_for_aarch64_sve_gather_prefetch (m_mode, reg_mode); 1694 return e.use_exact_insn (icode); 1695 } 1696 1697 /* The mode that we'd use to hold one vector of prefetched data. */ 1698 machine_mode m_mode; 1699 }; 1700 1701 /* Implements svptest_any, svptest_first and svptest_last. */ 1702 class svptest_impl : public function_base 1703 { 1704 public: 1705 CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {} 1706 1707 rtx 1708 expand (function_expander &e) const OVERRIDE 1709 { 1710 /* See whether GP is an exact ptrue for some predicate mode; 1711 i.e. whether converting the GP to that mode will not drop 1712 set bits and will leave all significant bits set. */ 1713 machine_mode wide_mode; 1714 int hint; 1715 if (aarch64_ptrue_all_mode (e.args[0]).exists (&wide_mode)) 1716 hint = SVE_KNOWN_PTRUE; 1717 else 1718 { 1719 hint = SVE_MAYBE_NOT_PTRUE; 1720 wide_mode = VNx16BImode; 1721 } 1722 1723 /* Generate the PTEST itself. */ 1724 rtx pg = force_reg (VNx16BImode, e.args[0]); 1725 rtx wide_pg = gen_lowpart (wide_mode, pg); 1726 rtx hint_rtx = gen_int_mode (hint, DImode); 1727 rtx op = force_reg (wide_mode, gen_lowpart (wide_mode, e.args[1])); 1728 emit_insn (gen_aarch64_ptestvnx16bi (pg, wide_pg, hint_rtx, op)); 1729 1730 /* Get the location of the boolean result. We can provide SImode and 1731 DImode values directly; rely on generic code to convert others. */ 1732 rtx target = e.possible_target; 1733 if (!target 1734 || !REG_P (target) 1735 || (GET_MODE (target) != SImode && GET_MODE (target) != DImode)) 1736 target = gen_reg_rtx (DImode); 1737 1738 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */ 1739 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); 1740 rtx compare = gen_rtx_fmt_ee (m_compare, GET_MODE (target), 1741 cc_reg, const0_rtx); 1742 emit_insn (gen_rtx_SET (target, compare)); 1743 return target; 1744 } 1745 1746 /* The comparison code associated with ptest condition. */ 1747 rtx_code m_compare; 1748 }; 1749 1750 class svptrue_impl : public function_base 1751 { 1752 public: 1753 gimple * 1754 fold (gimple_folder &f) const OVERRIDE 1755 { 1756 return f.fold_to_ptrue (); 1757 } 1758 1759 rtx 1760 expand (function_expander &e) const OVERRIDE 1761 { 1762 return aarch64_ptrue_all (e.type_suffix (0).element_bytes); 1763 } 1764 }; 1765 1766 class svptrue_pat_impl : public function_base 1767 { 1768 public: 1769 gimple * 1770 fold (gimple_folder &f) const OVERRIDE 1771 { 1772 tree pattern_arg = gimple_call_arg (f.call, 0); 1773 aarch64_svpattern pattern = (aarch64_svpattern) tree_to_shwi (pattern_arg); 1774 1775 if (pattern == AARCH64_SV_ALL) 1776 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */ 1777 return f.fold_to_ptrue (); 1778 1779 /* See whether we can count the number of elements in the pattern 1780 at compile time. If so, construct a predicate with that number 1781 of 1s followed by all 0s. */ 1782 int nelts_per_vq = f.elements_per_vq (0); 1783 HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, nelts_per_vq); 1784 if (value >= 0) 1785 return f.fold_to_vl_pred (value); 1786 1787 return NULL; 1788 } 1789 1790 rtx 1791 expand (function_expander &e) const OVERRIDE 1792 { 1793 /* In rtl, the predicate is represented as the constant: 1794 1795 (const:V16BI (unspec:V16BI [(const_int PATTERN) 1796 (const_vector:VnnBI [zeros])] 1797 UNSPEC_PTRUE)) 1798 1799 where nn determines the element size. */ 1800 rtvec vec = gen_rtvec (2, e.args[0], CONST0_RTX (e.vector_mode (0))); 1801 return gen_rtx_CONST (VNx16BImode, 1802 gen_rtx_UNSPEC (VNx16BImode, vec, UNSPEC_PTRUE)); 1803 } 1804 }; 1805 1806 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */ 1807 class svqdec_svqinc_bhwd_impl : public function_base 1808 { 1809 public: 1810 CONSTEXPR svqdec_svqinc_bhwd_impl (rtx_code code_for_sint, 1811 rtx_code code_for_uint, 1812 scalar_int_mode elem_mode) 1813 : m_code_for_sint (code_for_sint), 1814 m_code_for_uint (code_for_uint), 1815 m_elem_mode (elem_mode) 1816 {} 1817 1818 rtx 1819 expand (function_expander &e) const OVERRIDE 1820 { 1821 /* Treat non-_pat functions in the same way as _pat functions with 1822 an SV_ALL argument. */ 1823 if (e.args.length () == 2) 1824 e.args.quick_insert (1, gen_int_mode (AARCH64_SV_ALL, DImode)); 1825 1826 /* Insert the number of elements per 128-bit block as a fake argument, 1827 between the pattern and the multiplier. Arguments 1, 2 and 3 then 1828 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see 1829 aarch64_sve_cnt_pat for details. */ 1830 unsigned int elements_per_vq = 128 / GET_MODE_BITSIZE (m_elem_mode); 1831 e.args.quick_insert (2, gen_int_mode (elements_per_vq, DImode)); 1832 1833 rtx_code code = (e.type_suffix (0).unsigned_p 1834 ? m_code_for_uint 1835 : m_code_for_sint); 1836 1837 /* Choose between operating on integer scalars or integer vectors. */ 1838 machine_mode mode = e.vector_mode (0); 1839 if (e.mode_suffix_id == MODE_n) 1840 mode = GET_MODE_INNER (mode); 1841 return e.use_exact_insn (code_for_aarch64_sve_pat (code, mode)); 1842 } 1843 1844 /* The saturating addition or subtraction codes to use for signed and 1845 unsigned values respectively. */ 1846 rtx_code m_code_for_sint; 1847 rtx_code m_code_for_uint; 1848 1849 /* The integer mode associated with the [bhwd] suffix. */ 1850 scalar_int_mode m_elem_mode; 1851 }; 1852 1853 /* Implements svqdec[bhwd]{,_pat}. */ 1854 class svqdec_bhwd_impl : public svqdec_svqinc_bhwd_impl 1855 { 1856 public: 1857 CONSTEXPR svqdec_bhwd_impl (scalar_int_mode elem_mode) 1858 : svqdec_svqinc_bhwd_impl (SS_MINUS, US_MINUS, elem_mode) {} 1859 }; 1860 1861 /* Implements svqinc[bhwd]{,_pat}. */ 1862 class svqinc_bhwd_impl : public svqdec_svqinc_bhwd_impl 1863 { 1864 public: 1865 CONSTEXPR svqinc_bhwd_impl (scalar_int_mode elem_mode) 1866 : svqdec_svqinc_bhwd_impl (SS_PLUS, US_PLUS, elem_mode) {} 1867 }; 1868 1869 /* Implements svqdecp and svqincp. */ 1870 class svqdecp_svqincp_impl : public function_base 1871 { 1872 public: 1873 CONSTEXPR svqdecp_svqincp_impl (rtx_code code_for_sint, 1874 rtx_code code_for_uint) 1875 : m_code_for_sint (code_for_sint), 1876 m_code_for_uint (code_for_uint) 1877 {} 1878 1879 rtx 1880 expand (function_expander &e) const OVERRIDE 1881 { 1882 rtx_code code = (e.type_suffix (0).unsigned_p 1883 ? m_code_for_uint 1884 : m_code_for_sint); 1885 insn_code icode; 1886 if (e.mode_suffix_id == MODE_n) 1887 { 1888 /* Increment or decrement a scalar (whose mode is given by the first 1889 type suffix) by the number of active elements in a predicate 1890 (whose mode is given by the second type suffix). */ 1891 machine_mode mode = GET_MODE_INNER (e.vector_mode (0)); 1892 icode = code_for_aarch64_sve_cntp (code, mode, e.vector_mode (1)); 1893 } 1894 else 1895 /* Increment a vector by the number of active elements in a predicate, 1896 with the vector mode determining the predicate mode. */ 1897 icode = code_for_aarch64_sve_cntp (code, e.vector_mode (0)); 1898 return e.use_exact_insn (icode); 1899 } 1900 1901 /* The saturating addition or subtraction codes to use for signed and 1902 unsigned values respectively. */ 1903 rtx_code m_code_for_sint; 1904 rtx_code m_code_for_uint; 1905 }; 1906 1907 class svrdffr_impl : public function_base 1908 { 1909 public: 1910 unsigned int 1911 call_properties (const function_instance &) const OVERRIDE 1912 { 1913 return CP_READ_FFR; 1914 } 1915 1916 rtx 1917 expand (function_expander &e) const OVERRIDE 1918 { 1919 /* See the block comment in aarch64-sve.md for details about the 1920 FFR handling. */ 1921 emit_insn (gen_aarch64_copy_ffr_to_ffrt ()); 1922 rtx result = e.use_exact_insn (e.pred == PRED_z 1923 ? CODE_FOR_aarch64_rdffr_z 1924 : CODE_FOR_aarch64_rdffr); 1925 emit_insn (gen_aarch64_update_ffrt ()); 1926 return result; 1927 } 1928 }; 1929 1930 class svreinterpret_impl : public quiet<function_base> 1931 { 1932 public: 1933 gimple * 1934 fold (gimple_folder &f) const OVERRIDE 1935 { 1936 /* Punt to rtl if the effect of the reinterpret on registers does not 1937 conform to GCC's endianness model. */ 1938 if (!targetm.can_change_mode_class (f.vector_mode (0), 1939 f.vector_mode (1), FP_REGS)) 1940 return NULL; 1941 1942 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR 1943 reinterpretation. */ 1944 tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (f.lhs), 1945 gimple_call_arg (f.call, 0)); 1946 return gimple_build_assign (f.lhs, VIEW_CONVERT_EXPR, rhs); 1947 } 1948 1949 rtx 1950 expand (function_expander &e) const OVERRIDE 1951 { 1952 machine_mode mode = e.vector_mode (0); 1953 return e.use_exact_insn (code_for_aarch64_sve_reinterpret (mode)); 1954 } 1955 }; 1956 1957 class svrev_impl : public permute 1958 { 1959 public: 1960 gimple * 1961 fold (gimple_folder &f) const OVERRIDE 1962 { 1963 /* Punt for now on _b16 and wider; we'd need more complex evpc logic 1964 to rerecognize the result. */ 1965 if (f.type_suffix (0).bool_p && f.type_suffix (0).element_bits > 8) 1966 return NULL; 1967 1968 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */ 1969 poly_int64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); 1970 vec_perm_builder builder (nelts, 1, 3); 1971 for (int i = 0; i < 3; ++i) 1972 builder.quick_push (nelts - i - 1); 1973 return fold_permute (f, builder); 1974 } 1975 1976 rtx 1977 expand (function_expander &e) const OVERRIDE 1978 { 1979 return e.use_exact_insn (code_for_aarch64_sve_rev (e.vector_mode (0))); 1980 } 1981 }; 1982 1983 class svsel_impl : public quiet<function_base> 1984 { 1985 public: 1986 gimple * 1987 fold (gimple_folder &f) const OVERRIDE 1988 { 1989 /* svsel corresponds exactly to VEC_COND_EXPR. */ 1990 gimple_seq stmts = NULL; 1991 tree pred = f.convert_pred (stmts, f.vector_type (0), 0); 1992 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 1993 return gimple_build_assign (f.lhs, VEC_COND_EXPR, pred, 1994 gimple_call_arg (f.call, 1), 1995 gimple_call_arg (f.call, 2)); 1996 } 1997 1998 rtx 1999 expand (function_expander &e) const OVERRIDE 2000 { 2001 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */ 2002 e.rotate_inputs_left (0, 3); 2003 insn_code icode = convert_optab_handler (vcond_mask_optab, 2004 e.vector_mode (0), 2005 e.gp_mode (0)); 2006 return e.use_exact_insn (icode); 2007 } 2008 }; 2009 2010 /* Implements svset2, svset3 and svset4. */ 2011 class svset_impl : public quiet<multi_vector_function> 2012 { 2013 public: 2014 CONSTEXPR svset_impl (unsigned int vectors_per_tuple) 2015 : quiet<multi_vector_function> (vectors_per_tuple) {} 2016 2017 gimple * 2018 fold (gimple_folder &f) const OVERRIDE 2019 { 2020 tree rhs_tuple = gimple_call_arg (f.call, 0); 2021 tree index = gimple_call_arg (f.call, 1); 2022 tree rhs_vector = gimple_call_arg (f.call, 2); 2023 2024 /* Replace the call with two statements: a copy of the full tuple 2025 to the call result, followed by an update of the individual vector. 2026 2027 The fold routines expect the replacement statement to have the 2028 same lhs as the original call, so return the copy statement 2029 rather than the field update. */ 2030 gassign *copy = gimple_build_assign (unshare_expr (f.lhs), rhs_tuple); 2031 2032 /* Get a reference to the individual vector. */ 2033 tree field = tuple_type_field (TREE_TYPE (f.lhs)); 2034 tree lhs_array = build3 (COMPONENT_REF, TREE_TYPE (field), 2035 f.lhs, field, NULL_TREE); 2036 tree lhs_vector = build4 (ARRAY_REF, TREE_TYPE (rhs_vector), 2037 lhs_array, index, NULL_TREE, NULL_TREE); 2038 gassign *update = gimple_build_assign (lhs_vector, rhs_vector); 2039 gsi_insert_after (f.gsi, update, GSI_SAME_STMT); 2040 2041 return copy; 2042 } 2043 2044 rtx 2045 expand (function_expander &e) const OVERRIDE 2046 { 2047 rtx rhs_tuple = e.args[0]; 2048 unsigned int index = INTVAL (e.args[1]); 2049 rtx rhs_vector = e.args[2]; 2050 2051 /* First copy the full tuple to the target register. */ 2052 rtx lhs_tuple = e.get_nonoverlapping_reg_target (); 2053 emit_move_insn (lhs_tuple, rhs_tuple); 2054 2055 /* ...then update the individual vector. */ 2056 rtx lhs_vector = simplify_gen_subreg (GET_MODE (rhs_vector), 2057 lhs_tuple, GET_MODE (lhs_tuple), 2058 index * BYTES_PER_SVE_VECTOR); 2059 emit_move_insn (lhs_vector, rhs_vector); 2060 return lhs_vector; 2061 } 2062 }; 2063 2064 class svsetffr_impl : public function_base 2065 { 2066 public: 2067 unsigned int 2068 call_properties (const function_instance &) const OVERRIDE 2069 { 2070 return CP_WRITE_FFR; 2071 } 2072 2073 rtx 2074 expand (function_expander &e) const OVERRIDE 2075 { 2076 e.args.quick_push (CONSTM1_RTX (VNx16BImode)); 2077 return e.use_exact_insn (CODE_FOR_aarch64_wrffr); 2078 } 2079 }; 2080 2081 class svst1_impl : public full_width_access 2082 { 2083 public: 2084 unsigned int 2085 call_properties (const function_instance &) const OVERRIDE 2086 { 2087 return CP_WRITE_MEMORY; 2088 } 2089 2090 gimple * 2091 fold (gimple_folder &f) const OVERRIDE 2092 { 2093 tree vectype = f.vector_type (0); 2094 2095 /* Get the predicate and base pointer. */ 2096 gimple_seq stmts = NULL; 2097 tree pred = f.convert_pred (stmts, vectype, 0); 2098 tree base = f.fold_contiguous_base (stmts, vectype); 2099 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 2100 2101 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); 2102 tree rhs = gimple_call_arg (f.call, gimple_call_num_args (f.call) - 1); 2103 return gimple_build_call_internal (IFN_MASK_STORE, 4, 2104 base, cookie, pred, rhs); 2105 } 2106 2107 rtx 2108 expand (function_expander &e) const OVERRIDE 2109 { 2110 insn_code icode = convert_optab_handler (maskstore_optab, 2111 e.vector_mode (0), e.gp_mode (0)); 2112 return e.use_contiguous_store_insn (icode); 2113 } 2114 }; 2115 2116 class svst1_scatter_impl : public full_width_access 2117 { 2118 public: 2119 unsigned int 2120 call_properties (const function_instance &) const OVERRIDE 2121 { 2122 return CP_WRITE_MEMORY; 2123 } 2124 2125 rtx 2126 expand (function_expander &e) const OVERRIDE 2127 { 2128 e.prepare_gather_address_operands (1); 2129 /* Put the predicate last, as required by mask_scatter_store_optab. */ 2130 e.rotate_inputs_left (0, 6); 2131 machine_mode mem_mode = e.memory_vector_mode (); 2132 machine_mode int_mode = aarch64_sve_int_mode (mem_mode); 2133 insn_code icode = convert_optab_handler (mask_scatter_store_optab, 2134 mem_mode, int_mode); 2135 return e.use_exact_insn (icode); 2136 } 2137 }; 2138 2139 /* Implements truncating forms of svst1_scatter. */ 2140 class svst1_scatter_truncate_impl : public truncating_store 2141 { 2142 public: 2143 CONSTEXPR svst1_scatter_truncate_impl (scalar_int_mode to_mode) 2144 : truncating_store (to_mode) {} 2145 2146 rtx 2147 expand (function_expander &e) const OVERRIDE 2148 { 2149 e.prepare_gather_address_operands (1); 2150 /* Put the predicate last, since the truncating scatters use the same 2151 operand order as mask_scatter_store_optab. */ 2152 e.rotate_inputs_left (0, 6); 2153 insn_code icode = code_for_aarch64_scatter_store_trunc 2154 (e.memory_vector_mode (), e.vector_mode (0)); 2155 return e.use_exact_insn (icode); 2156 } 2157 }; 2158 2159 /* Implements truncating contiguous forms of svst1. */ 2160 class svst1_truncate_impl : public truncating_store 2161 { 2162 public: 2163 CONSTEXPR svst1_truncate_impl (scalar_int_mode to_mode) 2164 : truncating_store (to_mode) {} 2165 2166 rtx 2167 expand (function_expander &e) const OVERRIDE 2168 { 2169 insn_code icode = code_for_aarch64_store_trunc (e.memory_vector_mode (), 2170 e.vector_mode (0)); 2171 return e.use_contiguous_store_insn (icode); 2172 } 2173 }; 2174 2175 /* Implements svst2, svst3 and svst4. */ 2176 class svst234_impl : public full_width_access 2177 { 2178 public: 2179 CONSTEXPR svst234_impl (unsigned int vectors_per_tuple) 2180 : full_width_access (vectors_per_tuple) {} 2181 2182 unsigned int 2183 call_properties (const function_instance &) const OVERRIDE 2184 { 2185 return CP_WRITE_MEMORY; 2186 } 2187 2188 gimple * 2189 fold (gimple_folder &f) const OVERRIDE 2190 { 2191 tree vectype = f.vector_type (0); 2192 2193 /* Get the predicate and base pointer. */ 2194 gimple_seq stmts = NULL; 2195 tree pred = f.convert_pred (stmts, vectype, 0); 2196 tree base = f.fold_contiguous_base (stmts, vectype); 2197 gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT); 2198 2199 /* View the stored data as an array of vectors. */ 2200 unsigned int num_args = gimple_call_num_args (f.call); 2201 tree rhs_tuple = gimple_call_arg (f.call, num_args - 1); 2202 tree field = tuple_type_field (TREE_TYPE (rhs_tuple)); 2203 tree rhs_array = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (field), rhs_tuple); 2204 2205 tree cookie = f.load_store_cookie (TREE_TYPE (vectype)); 2206 return gimple_build_call_internal (IFN_MASK_STORE_LANES, 4, 2207 base, cookie, pred, rhs_array); 2208 } 2209 2210 rtx 2211 expand (function_expander &e) const OVERRIDE 2212 { 2213 machine_mode tuple_mode = GET_MODE (e.args.last ()); 2214 insn_code icode = convert_optab_handler (vec_mask_store_lanes_optab, 2215 tuple_mode, e.vector_mode (0)); 2216 return e.use_contiguous_store_insn (icode); 2217 } 2218 }; 2219 2220 class svstnt1_impl : public full_width_access 2221 { 2222 public: 2223 unsigned int 2224 call_properties (const function_instance &) const OVERRIDE 2225 { 2226 return CP_WRITE_MEMORY; 2227 } 2228 2229 rtx 2230 expand (function_expander &e) const OVERRIDE 2231 { 2232 insn_code icode = code_for_aarch64_stnt1 (e.vector_mode (0)); 2233 return e.use_contiguous_store_insn (icode); 2234 } 2235 }; 2236 2237 class svsub_impl : public rtx_code_function 2238 { 2239 public: 2240 CONSTEXPR svsub_impl () 2241 : rtx_code_function (MINUS, MINUS, UNSPEC_COND_FSUB) {} 2242 2243 rtx 2244 expand (function_expander &e) const OVERRIDE 2245 { 2246 /* Canonicalize subtractions of constants to additions. */ 2247 machine_mode mode = e.vector_mode (0); 2248 if (e.try_negating_argument (2, mode)) 2249 return e.map_to_rtx_codes (PLUS, PLUS, UNSPEC_COND_FADD); 2250 2251 return rtx_code_function::expand (e); 2252 } 2253 }; 2254 2255 class svtbl_impl : public permute 2256 { 2257 public: 2258 rtx 2259 expand (function_expander &e) const OVERRIDE 2260 { 2261 return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0))); 2262 } 2263 }; 2264 2265 /* Implements svtrn1 and svtrn2. */ 2266 class svtrn_impl : public binary_permute 2267 { 2268 public: 2269 CONSTEXPR svtrn_impl (int base) 2270 : binary_permute (base ? UNSPEC_TRN2 : UNSPEC_TRN1), m_base (base) {} 2271 2272 gimple * 2273 fold (gimple_folder &f) const OVERRIDE 2274 { 2275 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... } 2276 svtrn2: as for svtrn1, but with 1 added to each index. */ 2277 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); 2278 vec_perm_builder builder (nelts, 2, 3); 2279 for (unsigned int i = 0; i < 3; ++i) 2280 { 2281 builder.quick_push (m_base + i * 2); 2282 builder.quick_push (m_base + i * 2 + nelts); 2283 } 2284 return fold_permute (f, builder); 2285 } 2286 2287 /* 0 for svtrn1, 1 for svtrn2. */ 2288 unsigned int m_base; 2289 }; 2290 2291 /* Base class for svundef{,2,3,4}. */ 2292 class svundef_impl : public quiet<multi_vector_function> 2293 { 2294 public: 2295 CONSTEXPR svundef_impl (unsigned int vectors_per_tuple) 2296 : quiet<multi_vector_function> (vectors_per_tuple) {} 2297 2298 gimple * 2299 fold (gimple_folder &f) const OVERRIDE 2300 { 2301 /* Don't fold svundef at the gimple level. There's no exact 2302 correspondence for SSA_NAMEs, and we explicitly don't want 2303 to generate a specific value (like an all-zeros vector). */ 2304 if (vectors_per_tuple () == 1) 2305 return NULL; 2306 return gimple_build_assign (f.lhs, build_clobber (TREE_TYPE (f.lhs))); 2307 } 2308 2309 rtx 2310 expand (function_expander &e) const OVERRIDE 2311 { 2312 rtx target = e.get_reg_target (); 2313 emit_clobber (copy_rtx (target)); 2314 return target; 2315 } 2316 }; 2317 2318 /* Implements svunpklo and svunpkhi. */ 2319 class svunpk_impl : public quiet<function_base> 2320 { 2321 public: 2322 CONSTEXPR svunpk_impl (bool high_p) : m_high_p (high_p) {} 2323 2324 gimple * 2325 fold (gimple_folder &f) const OVERRIDE 2326 { 2327 /* Don't fold the predicate ops, since every bit of the svbool_t 2328 result is significant. */ 2329 if (f.type_suffix_ids[0] == TYPE_SUFFIX_b) 2330 return NULL; 2331 2332 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian 2333 and VEC_UNPACK_HI_EXPR for big-endian. */ 2334 bool high_p = BYTES_BIG_ENDIAN ? !m_high_p : m_high_p; 2335 tree_code code = high_p ? VEC_UNPACK_HI_EXPR : VEC_UNPACK_LO_EXPR; 2336 return gimple_build_assign (f.lhs, code, gimple_call_arg (f.call, 0)); 2337 } 2338 2339 rtx 2340 expand (function_expander &e) const OVERRIDE 2341 { 2342 machine_mode mode = GET_MODE (e.args[0]); 2343 unsigned int unpacku = m_high_p ? UNSPEC_UNPACKUHI : UNSPEC_UNPACKULO; 2344 unsigned int unpacks = m_high_p ? UNSPEC_UNPACKSHI : UNSPEC_UNPACKSLO; 2345 insn_code icode; 2346 if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) 2347 icode = code_for_aarch64_sve_punpk (unpacku, mode); 2348 else 2349 { 2350 int unspec = e.type_suffix (0).unsigned_p ? unpacku : unpacks; 2351 icode = code_for_aarch64_sve_unpk (unspec, unspec, mode); 2352 } 2353 return e.use_exact_insn (icode); 2354 } 2355 2356 /* True for svunpkhi, false for svunpklo. */ 2357 bool m_high_p; 2358 }; 2359 2360 /* Also implements svsudot. */ 2361 class svusdot_impl : public function_base 2362 { 2363 public: 2364 CONSTEXPR svusdot_impl (bool su) : m_su (su) {} 2365 2366 rtx 2367 expand (function_expander &e) const OVERRIDE 2368 { 2369 /* The implementation of the ACLE function svsudot (for the non-lane 2370 version) is through the USDOT instruction but with the second and third 2371 inputs swapped. */ 2372 if (m_su) 2373 e.rotate_inputs_left (1, 2); 2374 /* The ACLE function has the same order requirements as for svdot. 2375 While there's no requirement for the RTL pattern to have the same sort 2376 of order as that for <sur>dot_prod, it's easier to read. 2377 Hence we do the same rotation on arguments as svdot_impl does. */ 2378 e.rotate_inputs_left (0, 3); 2379 machine_mode mode = e.vector_mode (0); 2380 insn_code icode = code_for_aarch64_dot_prod (UNSPEC_USDOT, mode); 2381 return e.use_exact_insn (icode); 2382 } 2383 2384 private: 2385 bool m_su; 2386 }; 2387 2388 /* Implements svuzp1 and svuzp2. */ 2389 class svuzp_impl : public binary_permute 2390 { 2391 public: 2392 CONSTEXPR svuzp_impl (unsigned int base) 2393 : binary_permute (base ? UNSPEC_UZP2 : UNSPEC_UZP1), m_base (base) {} 2394 2395 gimple * 2396 fold (gimple_folder &f) const OVERRIDE 2397 { 2398 /* svuzp1: { 0, 2, 4, 6, ... } 2399 svuzp2: { 1, 3, 5, 7, ... }. */ 2400 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); 2401 vec_perm_builder builder (nelts, 1, 3); 2402 for (unsigned int i = 0; i < 3; ++i) 2403 builder.quick_push (m_base + i * 2); 2404 return fold_permute (f, builder); 2405 } 2406 2407 /* 0 for svuzp1, 1 for svuzp2. */ 2408 unsigned int m_base; 2409 }; 2410 2411 /* A function_base for svwhilele and svwhilelt functions. */ 2412 class svwhilelx_impl : public while_comparison 2413 { 2414 public: 2415 CONSTEXPR svwhilelx_impl (int unspec_for_sint, int unspec_for_uint, bool eq_p) 2416 : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p) 2417 {} 2418 2419 /* Try to fold a call by treating its arguments as constants of type T. */ 2420 template<typename T> 2421 gimple * 2422 fold_type (gimple_folder &f) const 2423 { 2424 /* Only handle cases in which both operands are constant. */ 2425 T arg0, arg1; 2426 if (!poly_int_tree_p (gimple_call_arg (f.call, 0), &arg0) 2427 || !poly_int_tree_p (gimple_call_arg (f.call, 1), &arg1)) 2428 return NULL; 2429 2430 /* Check whether the result is known to be all-false. */ 2431 if (m_eq_p ? known_gt (arg0, arg1) : known_ge (arg0, arg1)) 2432 return f.fold_to_pfalse (); 2433 2434 /* Punt if we can't tell at compile time whether the result 2435 is all-false. */ 2436 if (m_eq_p ? maybe_gt (arg0, arg1) : maybe_ge (arg0, arg1)) 2437 return NULL; 2438 2439 /* At this point we know the result has at least one set element. */ 2440 poly_uint64 diff = arg1 - arg0; 2441 poly_uint64 nelts = GET_MODE_NUNITS (f.vector_mode (0)); 2442 2443 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract 2444 from NELTS rather than adding to DIFF, to prevent overflow. */ 2445 if (m_eq_p) 2446 nelts -= 1; 2447 2448 /* Check whether the result is known to be all-true. */ 2449 if (known_ge (diff, nelts)) 2450 return f.fold_to_ptrue (); 2451 2452 /* Punt if DIFF might not be the actual number of set elements 2453 in the result. Conditional equality is fine. */ 2454 if (maybe_gt (diff, nelts)) 2455 return NULL; 2456 2457 /* At this point we know that the predicate will have DIFF set elements 2458 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops 2459 after rather than before ARG1 is reached). See if we can create 2460 the predicate at compile time. */ 2461 unsigned HOST_WIDE_INT vl; 2462 if (diff.is_constant (&vl)) 2463 /* Overflow is no longer possible after the checks above. */ 2464 return f.fold_to_vl_pred (m_eq_p ? vl + 1 : vl); 2465 2466 return NULL; 2467 } 2468 2469 gimple * 2470 fold (gimple_folder &f) const OVERRIDE 2471 { 2472 if (f.type_suffix (1).unsigned_p) 2473 return fold_type<poly_uint64> (f); 2474 else 2475 return fold_type<poly_int64> (f); 2476 } 2477 2478 /* True svwhilele, false for svwhilelt. */ 2479 bool m_eq_p; 2480 }; 2481 2482 class svwrffr_impl : public function_base 2483 { 2484 public: 2485 unsigned int 2486 call_properties (const function_instance &) const OVERRIDE 2487 { 2488 return CP_WRITE_FFR; 2489 } 2490 2491 rtx 2492 expand (function_expander &e) const OVERRIDE 2493 { 2494 return e.use_exact_insn (CODE_FOR_aarch64_wrffr); 2495 } 2496 }; 2497 2498 /* Implements svzip1 and svzip2. */ 2499 class svzip_impl : public binary_permute 2500 { 2501 public: 2502 CONSTEXPR svzip_impl (unsigned int base) 2503 : binary_permute (base ? UNSPEC_ZIP2 : UNSPEC_ZIP1), m_base (base) {} 2504 2505 gimple * 2506 fold (gimple_folder &f) const OVERRIDE 2507 { 2508 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... } 2509 svzip2: as for svzip1, but with nelts / 2 added to each index. */ 2510 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (f.lhs)); 2511 poly_uint64 base = m_base * exact_div (nelts, 2); 2512 vec_perm_builder builder (nelts, 2, 3); 2513 for (unsigned int i = 0; i < 3; ++i) 2514 { 2515 builder.quick_push (base + i); 2516 builder.quick_push (base + i + nelts); 2517 } 2518 return fold_permute (f, builder); 2519 } 2520 2521 /* 0 for svzip1, 1 for svzip2. */ 2522 unsigned int m_base; 2523 }; 2524 2525 } /* end anonymous namespace */ 2526 2527 namespace aarch64_sve { 2528 2529 FUNCTION (svabd, svabd_impl,) 2530 FUNCTION (svabs, quiet<rtx_code_function>, (ABS, ABS, UNSPEC_COND_FABS)) 2531 FUNCTION (svacge, svac_impl, (UNSPEC_COND_FCMGE)) 2532 FUNCTION (svacgt, svac_impl, (UNSPEC_COND_FCMGT)) 2533 FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE)) 2534 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT)) 2535 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD)) 2536 FUNCTION (svadda, svadda_impl,) 2537 FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV)) 2538 FUNCTION (svadrb, svadr_bhwd_impl, (0)) 2539 FUNCTION (svadrd, svadr_bhwd_impl, (3)) 2540 FUNCTION (svadrh, svadr_bhwd_impl, (1)) 2541 FUNCTION (svadrw, svadr_bhwd_impl, (2)) 2542 FUNCTION (svand, rtx_code_function, (AND, AND)) 2543 FUNCTION (svandv, reduction, (UNSPEC_ANDV)) 2544 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT)) 2545 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE)) 2546 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1)) 2547 FUNCTION (svbfdot, fixed_insn_function, (CODE_FOR_aarch64_sve_bfdotvnx4sf)) 2548 FUNCTION (svbfdot_lane, fixed_insn_function, 2549 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf)) 2550 FUNCTION (svbfmlalb, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf)) 2551 FUNCTION (svbfmlalb_lane, fixed_insn_function, 2552 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf)) 2553 FUNCTION (svbfmlalt, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf)) 2554 FUNCTION (svbfmlalt_lane, fixed_insn_function, 2555 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf)) 2556 FUNCTION (svbfmmla, fixed_insn_function, (CODE_FOR_aarch64_sve_bfmmlavnx4sf)) 2557 FUNCTION (svbic, svbic_impl,) 2558 FUNCTION (svbrka, svbrk_unary_impl, (UNSPEC_BRKA)) 2559 FUNCTION (svbrkb, svbrk_unary_impl, (UNSPEC_BRKB)) 2560 FUNCTION (svbrkn, svbrk_binary_impl, (UNSPEC_BRKN)) 2561 FUNCTION (svbrkpa, svbrk_binary_impl, (UNSPEC_BRKPA)) 2562 FUNCTION (svbrkpb, svbrk_binary_impl, (UNSPEC_BRKPB)) 2563 FUNCTION (svcadd, svcadd_impl,) 2564 FUNCTION (svclasta, svclast_impl, (UNSPEC_CLASTA)) 2565 FUNCTION (svclastb, svclast_impl, (UNSPEC_CLASTB)) 2566 FUNCTION (svcls, unary_count, (CLRSB)) 2567 FUNCTION (svclz, unary_count, (CLZ)) 2568 FUNCTION (svcmla, svcmla_impl,) 2569 FUNCTION (svcmla_lane, svcmla_lane_impl,) 2570 FUNCTION (svcmpeq, svcmp_impl, (EQ_EXPR, UNSPEC_COND_FCMEQ)) 2571 FUNCTION (svcmpeq_wide, svcmp_wide_impl, (EQ_EXPR, UNSPEC_COND_CMPEQ_WIDE, 2572 UNSPEC_COND_CMPEQ_WIDE)) 2573 FUNCTION (svcmpge, svcmp_impl, (GE_EXPR, UNSPEC_COND_FCMGE)) 2574 FUNCTION (svcmpge_wide, svcmp_wide_impl, (GE_EXPR, UNSPEC_COND_CMPGE_WIDE, 2575 UNSPEC_COND_CMPHS_WIDE)) 2576 FUNCTION (svcmpgt, svcmp_impl, (GT_EXPR, UNSPEC_COND_FCMGT)) 2577 FUNCTION (svcmpgt_wide, svcmp_wide_impl, (GT_EXPR, UNSPEC_COND_CMPGT_WIDE, 2578 UNSPEC_COND_CMPHI_WIDE)) 2579 FUNCTION (svcmple, svcmp_impl, (LE_EXPR, UNSPEC_COND_FCMLE)) 2580 FUNCTION (svcmple_wide, svcmp_wide_impl, (LE_EXPR, UNSPEC_COND_CMPLE_WIDE, 2581 UNSPEC_COND_CMPLS_WIDE)) 2582 FUNCTION (svcmplt, svcmp_impl, (LT_EXPR, UNSPEC_COND_FCMLT)) 2583 FUNCTION (svcmplt_wide, svcmp_wide_impl, (LT_EXPR, UNSPEC_COND_CMPLT_WIDE, 2584 UNSPEC_COND_CMPLO_WIDE)) 2585 FUNCTION (svcmpne, svcmp_impl, (NE_EXPR, UNSPEC_COND_FCMNE)) 2586 FUNCTION (svcmpne_wide, svcmp_wide_impl, (NE_EXPR, UNSPEC_COND_CMPNE_WIDE, 2587 UNSPEC_COND_CMPNE_WIDE)) 2588 FUNCTION (svcmpuo, svcmpuo_impl,) 2589 FUNCTION (svcnot, svcnot_impl,) 2590 FUNCTION (svcnt, unary_count, (POPCOUNT)) 2591 FUNCTION (svcntb, svcnt_bhwd_impl, (VNx16QImode)) 2592 FUNCTION (svcntb_pat, svcnt_bhwd_pat_impl, (VNx16QImode)) 2593 FUNCTION (svcntd, svcnt_bhwd_impl, (VNx2DImode)) 2594 FUNCTION (svcntd_pat, svcnt_bhwd_pat_impl, (VNx2DImode)) 2595 FUNCTION (svcnth, svcnt_bhwd_impl, (VNx8HImode)) 2596 FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode)) 2597 FUNCTION (svcntp, svcntp_impl,) 2598 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode)) 2599 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode)) 2600 FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),) 2601 FUNCTION (svcreate2, svcreate_impl, (2)) 2602 FUNCTION (svcreate3, svcreate_impl, (3)) 2603 FUNCTION (svcreate4, svcreate_impl, (4)) 2604 FUNCTION (svcvt, svcvt_impl,) 2605 FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),) 2606 FUNCTION (svdiv, rtx_code_function, (DIV, UDIV, UNSPEC_COND_FDIV)) 2607 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV)) 2608 FUNCTION (svdot, svdot_impl,) 2609 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT, -1)) 2610 FUNCTION (svdup, svdup_impl,) 2611 FUNCTION (svdup_lane, svdup_lane_impl,) 2612 FUNCTION (svdupq, svdupq_impl,) 2613 FUNCTION (svdupq_lane, svdupq_lane_impl,) 2614 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1)) 2615 FUNCTION (sveorv, reduction, (UNSPEC_XORV)) 2616 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA)) 2617 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),) 2618 FUNCTION (svextb, svext_bhw_impl, (QImode)) 2619 FUNCTION (svexth, svext_bhw_impl, (HImode)) 2620 FUNCTION (svextw, svext_bhw_impl, (SImode)) 2621 FUNCTION (svget2, svget_impl, (2)) 2622 FUNCTION (svget3, svget_impl, (3)) 2623 FUNCTION (svget4, svget_impl, (4)) 2624 FUNCTION (svindex, svindex_impl,) 2625 FUNCTION (svinsr, svinsr_impl,) 2626 FUNCTION (svlasta, svlast_impl, (UNSPEC_LASTA)) 2627 FUNCTION (svlastb, svlast_impl, (UNSPEC_LASTB)) 2628 FUNCTION (svld1, svld1_impl,) 2629 FUNCTION (svld1_gather, svld1_gather_impl,) 2630 FUNCTION (svld1ro, svld1ro_impl,) 2631 FUNCTION (svld1rq, svld1rq_impl,) 2632 FUNCTION (svld1sb, svld1_extend_impl, (TYPE_SUFFIX_s8)) 2633 FUNCTION (svld1sb_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s8)) 2634 FUNCTION (svld1sh, svld1_extend_impl, (TYPE_SUFFIX_s16)) 2635 FUNCTION (svld1sh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s16)) 2636 FUNCTION (svld1sw, svld1_extend_impl, (TYPE_SUFFIX_s32)) 2637 FUNCTION (svld1sw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_s32)) 2638 FUNCTION (svld1ub, svld1_extend_impl, (TYPE_SUFFIX_u8)) 2639 FUNCTION (svld1ub_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u8)) 2640 FUNCTION (svld1uh, svld1_extend_impl, (TYPE_SUFFIX_u16)) 2641 FUNCTION (svld1uh_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u16)) 2642 FUNCTION (svld1uw, svld1_extend_impl, (TYPE_SUFFIX_u32)) 2643 FUNCTION (svld1uw_gather, svld1_gather_extend_impl, (TYPE_SUFFIX_u32)) 2644 FUNCTION (svld2, svld234_impl, (2)) 2645 FUNCTION (svld3, svld234_impl, (3)) 2646 FUNCTION (svld4, svld234_impl, (4)) 2647 FUNCTION (svldff1, svldxf1_impl, (UNSPEC_LDFF1)) 2648 FUNCTION (svldff1_gather, svldff1_gather_impl,) 2649 FUNCTION (svldff1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDFF1)) 2650 FUNCTION (svldff1sb_gather, svldff1_gather_extend, (TYPE_SUFFIX_s8)) 2651 FUNCTION (svldff1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDFF1)) 2652 FUNCTION (svldff1sh_gather, svldff1_gather_extend, (TYPE_SUFFIX_s16)) 2653 FUNCTION (svldff1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDFF1)) 2654 FUNCTION (svldff1sw_gather, svldff1_gather_extend, (TYPE_SUFFIX_s32)) 2655 FUNCTION (svldff1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDFF1)) 2656 FUNCTION (svldff1ub_gather, svldff1_gather_extend, (TYPE_SUFFIX_u8)) 2657 FUNCTION (svldff1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDFF1)) 2658 FUNCTION (svldff1uh_gather, svldff1_gather_extend, (TYPE_SUFFIX_u16)) 2659 FUNCTION (svldff1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDFF1)) 2660 FUNCTION (svldff1uw_gather, svldff1_gather_extend, (TYPE_SUFFIX_u32)) 2661 FUNCTION (svldnf1, svldxf1_impl, (UNSPEC_LDNF1)) 2662 FUNCTION (svldnf1sb, svldxf1_extend_impl, (TYPE_SUFFIX_s8, UNSPEC_LDNF1)) 2663 FUNCTION (svldnf1sh, svldxf1_extend_impl, (TYPE_SUFFIX_s16, UNSPEC_LDNF1)) 2664 FUNCTION (svldnf1sw, svldxf1_extend_impl, (TYPE_SUFFIX_s32, UNSPEC_LDNF1)) 2665 FUNCTION (svldnf1ub, svldxf1_extend_impl, (TYPE_SUFFIX_u8, UNSPEC_LDNF1)) 2666 FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1)) 2667 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1)) 2668 FUNCTION (svldnt1, svldnt1_impl,) 2669 FUNCTION (svlen, svlen_impl,) 2670 FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT)) 2671 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE)) 2672 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT)) 2673 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE)) 2674 FUNCTION (svmad, svmad_impl,) 2675 FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX)) 2676 FUNCTION (svmaxnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMAXNM)) 2677 FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV)) 2678 FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV)) 2679 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN)) 2680 FUNCTION (svminnm, unspec_based_function, (-1, -1, UNSPEC_COND_FMINNM)) 2681 FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV)) 2682 FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV)) 2683 FUNCTION (svmla, svmla_impl,) 2684 FUNCTION (svmla_lane, svmla_lane_impl,) 2685 FUNCTION (svmls, svmls_impl,) 2686 FUNCTION (svmls_lane, svmls_lane_impl,) 2687 FUNCTION (svmmla, svmmla_impl,) 2688 FUNCTION (svmov, svmov_impl,) 2689 FUNCTION (svmsb, svmsb_impl,) 2690 FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL)) 2691 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),) 2692 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART, 2693 UNSPEC_UMUL_HIGHPART, -1)) 2694 FUNCTION (svmulx, unspec_based_function, (-1, -1, UNSPEC_COND_FMULX)) 2695 FUNCTION (svnand, svnand_impl,) 2696 FUNCTION (svneg, quiet<rtx_code_function>, (NEG, NEG, UNSPEC_COND_FNEG)) 2697 FUNCTION (svnmad, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLA)) 2698 FUNCTION (svnmla, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLA)) 2699 FUNCTION (svnmls, unspec_based_function_rotated, (-1, -1, UNSPEC_COND_FNMLS)) 2700 FUNCTION (svnmsb, unspec_based_function, (-1, -1, UNSPEC_COND_FNMLS)) 2701 FUNCTION (svnor, svnor_impl,) 2702 FUNCTION (svnot, svnot_impl,) 2703 FUNCTION (svorn, svorn_impl,) 2704 FUNCTION (svorr, rtx_code_function, (IOR, IOR)) 2705 FUNCTION (svorv, reduction, (UNSPEC_IORV)) 2706 FUNCTION (svpfalse, svpfalse_impl,) 2707 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST)) 2708 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT)) 2709 FUNCTION (svprfb, svprf_bhwd_impl, (VNx16QImode)) 2710 FUNCTION (svprfb_gather, svprf_bhwd_gather_impl, (VNx16QImode)) 2711 FUNCTION (svprfd, svprf_bhwd_impl, (VNx2DImode)) 2712 FUNCTION (svprfd_gather, svprf_bhwd_gather_impl, (VNx2DImode)) 2713 FUNCTION (svprfh, svprf_bhwd_impl, (VNx8HImode)) 2714 FUNCTION (svprfh_gather, svprf_bhwd_gather_impl, (VNx8HImode)) 2715 FUNCTION (svprfw, svprf_bhwd_impl, (VNx4SImode)) 2716 FUNCTION (svprfw_gather, svprf_bhwd_gather_impl, (VNx4SImode)) 2717 FUNCTION (svptest_any, svptest_impl, (NE)) 2718 FUNCTION (svptest_first, svptest_impl, (LT)) 2719 FUNCTION (svptest_last, svptest_impl, (LTU)) 2720 FUNCTION (svptrue, svptrue_impl,) 2721 FUNCTION (svptrue_pat, svptrue_pat_impl,) 2722 FUNCTION (svqadd, rtx_code_function, (SS_PLUS, US_PLUS, -1)) 2723 FUNCTION (svqdecb, svqdec_bhwd_impl, (QImode)) 2724 FUNCTION (svqdecb_pat, svqdec_bhwd_impl, (QImode)) 2725 FUNCTION (svqdecd, svqdec_bhwd_impl, (DImode)) 2726 FUNCTION (svqdecd_pat, svqdec_bhwd_impl, (DImode)) 2727 FUNCTION (svqdech, svqdec_bhwd_impl, (HImode)) 2728 FUNCTION (svqdech_pat, svqdec_bhwd_impl, (HImode)) 2729 FUNCTION (svqdecp, svqdecp_svqincp_impl, (SS_MINUS, US_MINUS)) 2730 FUNCTION (svqdecw, svqdec_bhwd_impl, (SImode)) 2731 FUNCTION (svqdecw_pat, svqdec_bhwd_impl, (SImode)) 2732 FUNCTION (svqincb, svqinc_bhwd_impl, (QImode)) 2733 FUNCTION (svqincb_pat, svqinc_bhwd_impl, (QImode)) 2734 FUNCTION (svqincd, svqinc_bhwd_impl, (DImode)) 2735 FUNCTION (svqincd_pat, svqinc_bhwd_impl, (DImode)) 2736 FUNCTION (svqinch, svqinc_bhwd_impl, (HImode)) 2737 FUNCTION (svqinch_pat, svqinc_bhwd_impl, (HImode)) 2738 FUNCTION (svqincp, svqdecp_svqincp_impl, (SS_PLUS, US_PLUS)) 2739 FUNCTION (svqincw, svqinc_bhwd_impl, (SImode)) 2740 FUNCTION (svqincw_pat, svqinc_bhwd_impl, (SImode)) 2741 FUNCTION (svqsub, rtx_code_function, (SS_MINUS, US_MINUS, -1)) 2742 FUNCTION (svrbit, unspec_based_function, (UNSPEC_RBIT, UNSPEC_RBIT, -1)) 2743 FUNCTION (svrdffr, svrdffr_impl,) 2744 FUNCTION (svrecpe, unspec_based_function, (-1, UNSPEC_URECPE, UNSPEC_FRECPE)) 2745 FUNCTION (svrecps, unspec_based_function, (-1, -1, UNSPEC_FRECPS)) 2746 FUNCTION (svrecpx, unspec_based_function, (-1, -1, UNSPEC_COND_FRECPX)) 2747 FUNCTION (svreinterpret, svreinterpret_impl,) 2748 FUNCTION (svrev, svrev_impl,) 2749 FUNCTION (svrevb, unspec_based_function, (UNSPEC_REVB, UNSPEC_REVB, -1)) 2750 FUNCTION (svrevh, unspec_based_function, (UNSPEC_REVH, UNSPEC_REVH, -1)) 2751 FUNCTION (svrevw, unspec_based_function, (UNSPEC_REVW, UNSPEC_REVW, -1)) 2752 FUNCTION (svrinta, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTA)) 2753 FUNCTION (svrinti, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTI)) 2754 FUNCTION (svrintm, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTM)) 2755 FUNCTION (svrintn, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTN)) 2756 FUNCTION (svrintp, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTP)) 2757 FUNCTION (svrintx, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTX)) 2758 FUNCTION (svrintz, unspec_based_function, (-1, -1, UNSPEC_COND_FRINTZ)) 2759 FUNCTION (svrsqrte, unspec_based_function, (-1, UNSPEC_RSQRTE, UNSPEC_RSQRTE)) 2760 FUNCTION (svrsqrts, unspec_based_function, (-1, -1, UNSPEC_RSQRTS)) 2761 FUNCTION (svscale, unspec_based_function, (-1, -1, UNSPEC_COND_FSCALE)) 2762 FUNCTION (svsel, svsel_impl,) 2763 FUNCTION (svset2, svset_impl, (2)) 2764 FUNCTION (svset3, svset_impl, (3)) 2765 FUNCTION (svset4, svset_impl, (4)) 2766 FUNCTION (svsetffr, svsetffr_impl,) 2767 FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),) 2768 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT)) 2769 FUNCTION (svst1, svst1_impl,) 2770 FUNCTION (svst1_scatter, svst1_scatter_impl,) 2771 FUNCTION (svst1b, svst1_truncate_impl, (QImode)) 2772 FUNCTION (svst1b_scatter, svst1_scatter_truncate_impl, (QImode)) 2773 FUNCTION (svst1h, svst1_truncate_impl, (HImode)) 2774 FUNCTION (svst1h_scatter, svst1_scatter_truncate_impl, (HImode)) 2775 FUNCTION (svst1w, svst1_truncate_impl, (SImode)) 2776 FUNCTION (svst1w_scatter, svst1_scatter_truncate_impl, (SImode)) 2777 FUNCTION (svst2, svst234_impl, (2)) 2778 FUNCTION (svst3, svst234_impl, (3)) 2779 FUNCTION (svst4, svst234_impl, (4)) 2780 FUNCTION (svstnt1, svstnt1_impl,) 2781 FUNCTION (svsub, svsub_impl,) 2782 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB)) 2783 FUNCTION (svsudot, svusdot_impl, (true)) 2784 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1)) 2785 FUNCTION (svtbl, svtbl_impl,) 2786 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),) 2787 FUNCTION (svtrn1, svtrn_impl, (0)) 2788 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q, 2789 UNSPEC_TRN1Q)) 2790 FUNCTION (svtrn2, svtrn_impl, (1)) 2791 FUNCTION (svtrn2q, unspec_based_function, (UNSPEC_TRN2Q, UNSPEC_TRN2Q, 2792 UNSPEC_TRN2Q)) 2793 FUNCTION (svtsmul, unspec_based_function, (-1, -1, UNSPEC_FTSMUL)) 2794 FUNCTION (svtssel, unspec_based_function, (-1, -1, UNSPEC_FTSSEL)) 2795 FUNCTION (svundef, svundef_impl, (1)) 2796 FUNCTION (svundef2, svundef_impl, (2)) 2797 FUNCTION (svundef3, svundef_impl, (3)) 2798 FUNCTION (svundef4, svundef_impl, (4)) 2799 FUNCTION (svunpkhi, svunpk_impl, (true)) 2800 FUNCTION (svunpklo, svunpk_impl, (false)) 2801 FUNCTION (svusdot, svusdot_impl, (false)) 2802 FUNCTION (svusdot_lane, svdotprod_lane_impl, (UNSPEC_USDOT, -1, -1)) 2803 FUNCTION (svusmmla, unspec_based_add_function, (UNSPEC_USMATMUL, -1, -1)) 2804 FUNCTION (svuzp1, svuzp_impl, (0)) 2805 FUNCTION (svuzp1q, unspec_based_function, (UNSPEC_UZP1Q, UNSPEC_UZP1Q, 2806 UNSPEC_UZP1Q)) 2807 FUNCTION (svuzp2, svuzp_impl, (1)) 2808 FUNCTION (svuzp2q, unspec_based_function, (UNSPEC_UZP2Q, UNSPEC_UZP2Q, 2809 UNSPEC_UZP2Q)) 2810 FUNCTION (svwhilele, svwhilelx_impl, (UNSPEC_WHILELE, UNSPEC_WHILELS, true)) 2811 FUNCTION (svwhilelt, svwhilelx_impl, (UNSPEC_WHILELT, UNSPEC_WHILELO, false)) 2812 FUNCTION (svwrffr, svwrffr_impl,) 2813 FUNCTION (svzip1, svzip_impl, (0)) 2814 FUNCTION (svzip1q, unspec_based_function, (UNSPEC_ZIP1Q, UNSPEC_ZIP1Q, 2815 UNSPEC_ZIP1Q)) 2816 FUNCTION (svzip2, svzip_impl, (1)) 2817 FUNCTION (svzip2q, unspec_based_function, (UNSPEC_ZIP2Q, UNSPEC_ZIP2Q, 2818 UNSPEC_ZIP2Q)) 2819 2820 } /* end namespace aarch64_sve */ 2821