1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. 2 3 This file is free software; you can redistribute it and/or modify it under 4 the terms of the GNU General Public License as published by the Free 5 Software Foundation; either version 3 of the License, or (at your option) 6 any later version. 7 8 This file is distributed in the hope that it will be useful, but WITHOUT 9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11 for more details. 12 13 You should have received a copy of the GNU General Public License 14 along with GCC; see the file COPYING3. If not see 15 <http://www.gnu.org/licenses/>. */ 16 17 #include "config.h" 18 #include "system.h" 19 #include "coretypes.h" 20 #include "tm.h" 21 #include "rtl.h" 22 #include "regs.h" 23 #include "hard-reg-set.h" 24 #include "real.h" 25 #include "insn-config.h" 26 #include "conditions.h" 27 #include "insn-attr.h" 28 #include "flags.h" 29 #include "recog.h" 30 #include "obstack.h" 31 #include "tree.h" 32 #include "expr.h" 33 #include "optabs.h" 34 #include "except.h" 35 #include "function.h" 36 #include "output.h" 37 #include "basic-block.h" 38 #include "integrate.h" 39 #include "toplev.h" 40 #include "ggc.h" 41 #include "hashtab.h" 42 #include "tm_p.h" 43 #include "target.h" 44 #include "target-def.h" 45 #include "langhooks.h" 46 #include "reload.h" 47 #include "cfglayout.h" 48 #include "sched-int.h" 49 #include "params.h" 50 #include "assert.h" 51 #include "machmode.h" 52 #include "gimple.h" 53 #include "tm-constrs.h" 54 #include "ddg.h" 55 #include "sbitmap.h" 56 #include "timevar.h" 57 #include "df.h" 58 59 /* Builtin types, data and prototypes. */ 60 61 enum spu_builtin_type_index 62 { 63 SPU_BTI_END_OF_PARAMS, 64 65 /* We create new type nodes for these. */ 66 SPU_BTI_V16QI, 67 SPU_BTI_V8HI, 68 SPU_BTI_V4SI, 69 SPU_BTI_V2DI, 70 SPU_BTI_V4SF, 71 SPU_BTI_V2DF, 72 SPU_BTI_UV16QI, 73 SPU_BTI_UV8HI, 74 SPU_BTI_UV4SI, 75 SPU_BTI_UV2DI, 76 77 /* A 16-byte type. (Implemented with V16QI_type_node) */ 78 SPU_BTI_QUADWORD, 79 80 /* These all correspond to intSI_type_node */ 81 SPU_BTI_7, 82 SPU_BTI_S7, 83 SPU_BTI_U7, 84 SPU_BTI_S10, 85 SPU_BTI_S10_4, 86 SPU_BTI_U14, 87 SPU_BTI_16, 88 SPU_BTI_S16, 89 SPU_BTI_S16_2, 90 SPU_BTI_U16, 91 SPU_BTI_U16_2, 92 SPU_BTI_U18, 93 94 /* These correspond to the standard types */ 95 SPU_BTI_INTQI, 96 SPU_BTI_INTHI, 97 SPU_BTI_INTSI, 98 SPU_BTI_INTDI, 99 100 SPU_BTI_UINTQI, 101 SPU_BTI_UINTHI, 102 SPU_BTI_UINTSI, 103 SPU_BTI_UINTDI, 104 105 SPU_BTI_FLOAT, 106 SPU_BTI_DOUBLE, 107 108 SPU_BTI_VOID, 109 SPU_BTI_PTR, 110 111 SPU_BTI_MAX 112 }; 113 114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI]) 115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI]) 116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI]) 117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI]) 118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF]) 119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF]) 120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI]) 121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI]) 122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI]) 123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI]) 124 125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX]; 126 127 struct spu_builtin_range 128 { 129 int low, high; 130 }; 131 132 static struct spu_builtin_range spu_builtin_range[] = { 133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */ 134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */ 135 {0ll, 0x7fll}, /* SPU_BTI_U7 */ 136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */ 137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */ 138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */ 139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */ 140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */ 141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */ 142 {0ll, 0xffffll}, /* SPU_BTI_U16 */ 143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */ 144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */ 145 }; 146 147 148 /* Target specific attribute specifications. */ 149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER]; 150 151 /* Prototypes and external defs. */ 152 static void spu_init_builtins (void); 153 static tree spu_builtin_decl (unsigned, bool); 154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode); 155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode); 156 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool); 157 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx, 158 bool, addr_space_t); 159 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start); 160 static rtx get_pic_reg (void); 161 static int need_to_save_reg (int regno, int saving); 162 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset); 163 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset); 164 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, 165 rtx scratch); 166 static void emit_nop_for_insn (rtx insn); 167 static bool insn_clobbers_hbr (rtx insn); 168 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target, 169 int distance, sbitmap blocks); 170 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1, 171 enum machine_mode dmode); 172 static rtx get_branch_target (rtx branch); 173 static void spu_machine_dependent_reorg (void); 174 static int spu_sched_issue_rate (void); 175 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn, 176 int can_issue_more); 177 static int get_pipe (rtx insn); 178 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost); 179 static void spu_sched_init_global (FILE *, int, int); 180 static void spu_sched_init (FILE *, int, int); 181 static int spu_sched_reorder (FILE *, int, rtx *, int *, int); 182 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args, 183 int flags, 184 unsigned char *no_add_attrs); 185 static tree spu_handle_vector_attribute (tree * node, tree name, tree args, 186 int flags, 187 unsigned char *no_add_attrs); 188 static int spu_naked_function_p (tree func); 189 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode, 190 const_tree type, unsigned char named); 191 static tree spu_build_builtin_va_list (void); 192 static void spu_va_start (tree, rtx); 193 static tree spu_gimplify_va_arg_expr (tree valist, tree type, 194 gimple_seq * pre_p, gimple_seq * post_p); 195 static int store_with_one_insn_p (rtx mem); 196 static int mem_is_padded_component_ref (rtx x); 197 static int reg_aligned_for_addr (rtx x); 198 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); 199 static void spu_asm_globalize_label (FILE * file, const char *name); 200 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, 201 int *total, bool speed); 202 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp); 203 static void spu_init_libfuncs (void); 204 static bool spu_return_in_memory (const_tree type, const_tree fntype); 205 static void fix_range (const char *); 206 static void spu_encode_section_info (tree, rtx, int); 207 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode); 208 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode, 209 addr_space_t); 210 static tree spu_builtin_mul_widen_even (tree); 211 static tree spu_builtin_mul_widen_odd (tree); 212 static tree spu_builtin_mask_for_load (void); 213 static int spu_builtin_vectorization_cost (bool); 214 static bool spu_vector_alignment_reachable (const_tree, bool); 215 static tree spu_builtin_vec_perm (tree, tree *); 216 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t); 217 static enum machine_mode spu_addr_space_address_mode (addr_space_t); 218 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t); 219 static rtx spu_addr_space_convert (rtx, tree, tree); 220 static int spu_sms_res_mii (struct ddg *g); 221 static void asm_file_start (void); 222 static unsigned int spu_section_type_flags (tree, const char *, int); 223 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT); 224 static void spu_unique_section (tree, int); 225 static rtx spu_expand_load (rtx, rtx, rtx, int); 226 static void spu_trampoline_init (rtx, tree, rtx); 227 228 extern const char *reg_names[]; 229 230 /* Which instruction set architecture to use. */ 231 int spu_arch; 232 /* Which cpu are we tuning for. */ 233 int spu_tune; 234 235 /* The hardware requires 8 insns between a hint and the branch it 236 effects. This variable describes how many rtl instructions the 237 compiler needs to see before inserting a hint, and then the compiler 238 will insert enough nops to make it at least 8 insns. The default is 239 for the compiler to allow up to 2 nops be emitted. The nops are 240 inserted in pairs, so we round down. */ 241 int spu_hint_dist = (8*4) - (2*4); 242 243 /* Determines whether we run variable tracking in machine dependent 244 reorganization. */ 245 static int spu_flag_var_tracking; 246 247 enum spu_immediate { 248 SPU_NONE, 249 SPU_IL, 250 SPU_ILA, 251 SPU_ILH, 252 SPU_ILHU, 253 SPU_ORI, 254 SPU_ORHI, 255 SPU_ORBI, 256 SPU_IOHL 257 }; 258 enum immediate_class 259 { 260 IC_POOL, /* constant pool */ 261 IC_IL1, /* one il* instruction */ 262 IC_IL2, /* both ilhu and iohl instructions */ 263 IC_IL1s, /* one il* instruction */ 264 IC_IL2s, /* both ilhu and iohl instructions */ 265 IC_FSMBI, /* the fsmbi instruction */ 266 IC_CPAT, /* one of the c*d instructions */ 267 IC_FSMBI2 /* fsmbi plus 1 other instruction */ 268 }; 269 270 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val); 271 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val); 272 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart); 273 static enum immediate_class classify_immediate (rtx op, 274 enum machine_mode mode); 275 276 static enum machine_mode spu_unwind_word_mode (void); 277 278 static enum machine_mode 279 spu_libgcc_cmp_return_mode (void); 280 281 static enum machine_mode 282 spu_libgcc_shift_count_mode (void); 283 284 /* Pointer mode for __ea references. */ 285 #define EAmode (spu_ea_model != 32 ? DImode : SImode) 286 287 288 /* Table of machine attributes. */ 289 static const struct attribute_spec spu_attribute_table[] = 290 { 291 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 292 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute }, 293 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute }, 294 { NULL, 0, 0, false, false, false, NULL } 295 }; 296 297 /* TARGET overrides. */ 298 299 #undef TARGET_ADDR_SPACE_POINTER_MODE 300 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode 301 302 #undef TARGET_ADDR_SPACE_ADDRESS_MODE 303 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode 304 305 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P 306 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \ 307 spu_addr_space_legitimate_address_p 308 309 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS 310 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address 311 312 #undef TARGET_ADDR_SPACE_SUBSET_P 313 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p 314 315 #undef TARGET_ADDR_SPACE_CONVERT 316 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert 317 318 #undef TARGET_INIT_BUILTINS 319 #define TARGET_INIT_BUILTINS spu_init_builtins 320 #undef TARGET_BUILTIN_DECL 321 #define TARGET_BUILTIN_DECL spu_builtin_decl 322 323 #undef TARGET_EXPAND_BUILTIN 324 #define TARGET_EXPAND_BUILTIN spu_expand_builtin 325 326 #undef TARGET_UNWIND_WORD_MODE 327 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode 328 329 #undef TARGET_LEGITIMIZE_ADDRESS 330 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address 331 332 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long 333 and .quad for the debugger. When it is known that the assembler is fixed, 334 these can be removed. */ 335 #undef TARGET_ASM_UNALIGNED_SI_OP 336 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" 337 338 #undef TARGET_ASM_ALIGNED_DI_OP 339 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" 340 341 /* The .8byte directive doesn't seem to work well for a 32 bit 342 architecture. */ 343 #undef TARGET_ASM_UNALIGNED_DI_OP 344 #define TARGET_ASM_UNALIGNED_DI_OP NULL 345 346 #undef TARGET_RTX_COSTS 347 #define TARGET_RTX_COSTS spu_rtx_costs 348 349 #undef TARGET_ADDRESS_COST 350 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0 351 352 #undef TARGET_SCHED_ISSUE_RATE 353 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate 354 355 #undef TARGET_SCHED_INIT_GLOBAL 356 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global 357 358 #undef TARGET_SCHED_INIT 359 #define TARGET_SCHED_INIT spu_sched_init 360 361 #undef TARGET_SCHED_VARIABLE_ISSUE 362 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue 363 364 #undef TARGET_SCHED_REORDER 365 #define TARGET_SCHED_REORDER spu_sched_reorder 366 367 #undef TARGET_SCHED_REORDER2 368 #define TARGET_SCHED_REORDER2 spu_sched_reorder 369 370 #undef TARGET_SCHED_ADJUST_COST 371 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost 372 373 #undef TARGET_ATTRIBUTE_TABLE 374 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table 375 376 #undef TARGET_ASM_INTEGER 377 #define TARGET_ASM_INTEGER spu_assemble_integer 378 379 #undef TARGET_SCALAR_MODE_SUPPORTED_P 380 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p 381 382 #undef TARGET_VECTOR_MODE_SUPPORTED_P 383 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p 384 385 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 386 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall 387 388 #undef TARGET_ASM_GLOBALIZE_LABEL 389 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label 390 391 #undef TARGET_PASS_BY_REFERENCE 392 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference 393 394 #undef TARGET_MUST_PASS_IN_STACK 395 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 396 397 #undef TARGET_BUILD_BUILTIN_VA_LIST 398 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list 399 400 #undef TARGET_EXPAND_BUILTIN_VA_START 401 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start 402 403 #undef TARGET_SETUP_INCOMING_VARARGS 404 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs 405 406 #undef TARGET_MACHINE_DEPENDENT_REORG 407 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg 408 409 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 410 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr 411 412 #undef TARGET_DEFAULT_TARGET_FLAGS 413 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT) 414 415 #undef TARGET_INIT_LIBFUNCS 416 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs 417 418 #undef TARGET_RETURN_IN_MEMORY 419 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory 420 421 #undef TARGET_ENCODE_SECTION_INFO 422 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info 423 424 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 425 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even 426 427 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 428 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd 429 430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load 432 433 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 434 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost 435 436 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE 437 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable 438 439 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM 440 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm 441 442 #undef TARGET_LIBGCC_CMP_RETURN_MODE 443 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode 444 445 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE 446 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode 447 448 #undef TARGET_SCHED_SMS_RES_MII 449 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii 450 451 #undef TARGET_ASM_FILE_START 452 #define TARGET_ASM_FILE_START asm_file_start 453 454 #undef TARGET_SECTION_TYPE_FLAGS 455 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags 456 457 #undef TARGET_ASM_SELECT_SECTION 458 #define TARGET_ASM_SELECT_SECTION spu_select_section 459 460 #undef TARGET_ASM_UNIQUE_SECTION 461 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section 462 463 #undef TARGET_LEGITIMATE_ADDRESS_P 464 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p 465 466 #undef TARGET_TRAMPOLINE_INIT 467 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init 468 469 struct gcc_target targetm = TARGET_INITIALIZER; 470 471 void 472 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED) 473 { 474 /* Override some of the default param values. With so many registers 475 larger values are better for these params. */ 476 MAX_PENDING_LIST_LENGTH = 128; 477 478 /* With so many registers this is better on by default. */ 479 flag_rename_registers = 1; 480 } 481 482 /* Sometimes certain combinations of command options do not make sense 483 on a particular target machine. You can define a macro 484 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is 485 executed once just after all the command options have been parsed. */ 486 void 487 spu_override_options (void) 488 { 489 /* Small loops will be unpeeled at -O3. For SPU it is more important 490 to keep code small by default. */ 491 if (!flag_unroll_loops && !flag_peel_loops 492 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES)) 493 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1; 494 495 flag_omit_frame_pointer = 1; 496 497 /* Functions must be 8 byte aligned so we correctly handle dual issue */ 498 if (align_functions < 8) 499 align_functions = 8; 500 501 spu_hint_dist = 8*4 - spu_max_nops*4; 502 if (spu_hint_dist < 0) 503 spu_hint_dist = 0; 504 505 if (spu_fixed_range_string) 506 fix_range (spu_fixed_range_string); 507 508 /* Determine processor architectural level. */ 509 if (spu_arch_string) 510 { 511 if (strcmp (&spu_arch_string[0], "cell") == 0) 512 spu_arch = PROCESSOR_CELL; 513 else if (strcmp (&spu_arch_string[0], "celledp") == 0) 514 spu_arch = PROCESSOR_CELLEDP; 515 else 516 error ("Unknown architecture '%s'", &spu_arch_string[0]); 517 } 518 519 /* Determine processor to tune for. */ 520 if (spu_tune_string) 521 { 522 if (strcmp (&spu_tune_string[0], "cell") == 0) 523 spu_tune = PROCESSOR_CELL; 524 else if (strcmp (&spu_tune_string[0], "celledp") == 0) 525 spu_tune = PROCESSOR_CELLEDP; 526 else 527 error ("Unknown architecture '%s'", &spu_tune_string[0]); 528 } 529 530 /* Change defaults according to the processor architecture. */ 531 if (spu_arch == PROCESSOR_CELLEDP) 532 { 533 /* If no command line option has been otherwise specified, change 534 the default to -mno-safe-hints on celledp -- only the original 535 Cell/B.E. processors require this workaround. */ 536 if (!(target_flags_explicit & MASK_SAFE_HINTS)) 537 target_flags &= ~MASK_SAFE_HINTS; 538 } 539 540 REAL_MODE_FORMAT (SFmode) = &spu_single_format; 541 } 542 543 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in 544 struct attribute_spec.handler. */ 545 546 /* True if MODE is valid for the target. By "valid", we mean able to 547 be manipulated in non-trivial ways. In particular, this means all 548 the arithmetic is supported. */ 549 static bool 550 spu_scalar_mode_supported_p (enum machine_mode mode) 551 { 552 switch (mode) 553 { 554 case QImode: 555 case HImode: 556 case SImode: 557 case SFmode: 558 case DImode: 559 case TImode: 560 case DFmode: 561 return true; 562 563 default: 564 return false; 565 } 566 } 567 568 /* Similarly for vector modes. "Supported" here is less strict. At 569 least some operations are supported; need to check optabs or builtins 570 for further details. */ 571 static bool 572 spu_vector_mode_supported_p (enum machine_mode mode) 573 { 574 switch (mode) 575 { 576 case V16QImode: 577 case V8HImode: 578 case V4SImode: 579 case V2DImode: 580 case V4SFmode: 581 case V2DFmode: 582 return true; 583 584 default: 585 return false; 586 } 587 } 588 589 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the 590 least significant bytes of the outer mode. This function returns 591 TRUE for the SUBREG's where this is correct. */ 592 int 593 valid_subreg (rtx op) 594 { 595 enum machine_mode om = GET_MODE (op); 596 enum machine_mode im = GET_MODE (SUBREG_REG (op)); 597 return om != VOIDmode && im != VOIDmode 598 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om) 599 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4) 600 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16)); 601 } 602 603 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off 604 and adjust the start offset. */ 605 static rtx 606 adjust_operand (rtx op, HOST_WIDE_INT * start) 607 { 608 enum machine_mode mode; 609 int op_size; 610 /* Strip any paradoxical SUBREG. */ 611 if (GET_CODE (op) == SUBREG 612 && (GET_MODE_BITSIZE (GET_MODE (op)) 613 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op))))) 614 { 615 if (start) 616 *start -= 617 GET_MODE_BITSIZE (GET_MODE (op)) - 618 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op))); 619 op = SUBREG_REG (op); 620 } 621 /* If it is smaller than SI, assure a SUBREG */ 622 op_size = GET_MODE_BITSIZE (GET_MODE (op)); 623 if (op_size < 32) 624 { 625 if (start) 626 *start += 32 - op_size; 627 op_size = 32; 628 } 629 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */ 630 mode = mode_for_size (op_size, MODE_INT, 0); 631 if (mode != GET_MODE (op)) 632 op = gen_rtx_SUBREG (mode, op, 0); 633 return op; 634 } 635 636 void 637 spu_expand_extv (rtx ops[], int unsignedp) 638 { 639 rtx dst = ops[0], src = ops[1]; 640 HOST_WIDE_INT width = INTVAL (ops[2]); 641 HOST_WIDE_INT start = INTVAL (ops[3]); 642 HOST_WIDE_INT align_mask; 643 rtx s0, s1, mask, r0; 644 645 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode); 646 647 if (MEM_P (src)) 648 { 649 /* First, determine if we need 1 TImode load or 2. We need only 1 650 if the bits being extracted do not cross the alignment boundary 651 as determined by the MEM and its address. */ 652 653 align_mask = -MEM_ALIGN (src); 654 if ((start & align_mask) == ((start + width - 1) & align_mask)) 655 { 656 /* Alignment is sufficient for 1 load. */ 657 s0 = gen_reg_rtx (TImode); 658 r0 = spu_expand_load (s0, 0, src, start / 8); 659 start &= 7; 660 if (r0) 661 emit_insn (gen_rotqby_ti (s0, s0, r0)); 662 } 663 else 664 { 665 /* Need 2 loads. */ 666 s0 = gen_reg_rtx (TImode); 667 s1 = gen_reg_rtx (TImode); 668 r0 = spu_expand_load (s0, s1, src, start / 8); 669 start &= 7; 670 671 gcc_assert (start + width <= 128); 672 if (r0) 673 { 674 rtx r1 = gen_reg_rtx (SImode); 675 mask = gen_reg_rtx (TImode); 676 emit_move_insn (mask, GEN_INT (-1)); 677 emit_insn (gen_rotqby_ti (s0, s0, r0)); 678 emit_insn (gen_rotqby_ti (s1, s1, r0)); 679 if (GET_CODE (r0) == CONST_INT) 680 r1 = GEN_INT (INTVAL (r0) & 15); 681 else 682 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15))); 683 emit_insn (gen_shlqby_ti (mask, mask, r1)); 684 emit_insn (gen_selb (s0, s1, s0, mask)); 685 } 686 } 687 688 } 689 else if (GET_CODE (src) == SUBREG) 690 { 691 rtx r = SUBREG_REG (src); 692 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r))); 693 s0 = gen_reg_rtx (TImode); 694 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode)) 695 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r))); 696 else 697 emit_move_insn (s0, src); 698 } 699 else 700 { 701 gcc_assert (REG_P (src) && GET_MODE (src) == TImode); 702 s0 = gen_reg_rtx (TImode); 703 emit_move_insn (s0, src); 704 } 705 706 /* Now s0 is TImode and contains the bits to extract at start. */ 707 708 if (start) 709 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start))); 710 711 if (128 - width) 712 { 713 tree c = build_int_cst (NULL_TREE, 128 - width); 714 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp); 715 } 716 717 emit_move_insn (dst, s0); 718 } 719 720 void 721 spu_expand_insv (rtx ops[]) 722 { 723 HOST_WIDE_INT width = INTVAL (ops[1]); 724 HOST_WIDE_INT start = INTVAL (ops[2]); 725 HOST_WIDE_INT maskbits; 726 enum machine_mode dst_mode, src_mode; 727 rtx dst = ops[0], src = ops[3]; 728 int dst_size, src_size; 729 rtx mask; 730 rtx shift_reg; 731 int shift; 732 733 734 if (GET_CODE (ops[0]) == MEM) 735 dst = gen_reg_rtx (TImode); 736 else 737 dst = adjust_operand (dst, &start); 738 dst_mode = GET_MODE (dst); 739 dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); 740 741 if (CONSTANT_P (src)) 742 { 743 enum machine_mode m = 744 (width <= 32 ? SImode : width <= 64 ? DImode : TImode); 745 src = force_reg (m, convert_to_mode (m, src, 0)); 746 } 747 src = adjust_operand (src, 0); 748 src_mode = GET_MODE (src); 749 src_size = GET_MODE_BITSIZE (GET_MODE (src)); 750 751 mask = gen_reg_rtx (dst_mode); 752 shift_reg = gen_reg_rtx (dst_mode); 753 shift = dst_size - start - width; 754 755 /* It's not safe to use subreg here because the compiler assumes 756 that the SUBREG_REG is right justified in the SUBREG. */ 757 convert_move (shift_reg, src, 1); 758 759 if (shift > 0) 760 { 761 switch (dst_mode) 762 { 763 case SImode: 764 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift))); 765 break; 766 case DImode: 767 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift))); 768 break; 769 case TImode: 770 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift))); 771 break; 772 default: 773 abort (); 774 } 775 } 776 else if (shift < 0) 777 abort (); 778 779 switch (dst_size) 780 { 781 case 32: 782 maskbits = (-1ll << (32 - width - start)); 783 if (start) 784 maskbits += (1ll << (32 - start)); 785 emit_move_insn (mask, GEN_INT (maskbits)); 786 break; 787 case 64: 788 maskbits = (-1ll << (64 - width - start)); 789 if (start) 790 maskbits += (1ll << (64 - start)); 791 emit_move_insn (mask, GEN_INT (maskbits)); 792 break; 793 case 128: 794 { 795 unsigned char arr[16]; 796 int i = start / 8; 797 memset (arr, 0, sizeof (arr)); 798 arr[i] = 0xff >> (start & 7); 799 for (i++; i <= (start + width - 1) / 8; i++) 800 arr[i] = 0xff; 801 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7)); 802 emit_move_insn (mask, array_to_constant (TImode, arr)); 803 } 804 break; 805 default: 806 abort (); 807 } 808 if (GET_CODE (ops[0]) == MEM) 809 { 810 rtx low = gen_reg_rtx (SImode); 811 rtx rotl = gen_reg_rtx (SImode); 812 rtx mask0 = gen_reg_rtx (TImode); 813 rtx addr; 814 rtx addr0; 815 rtx addr1; 816 rtx mem; 817 818 addr = force_reg (Pmode, XEXP (ops[0], 0)); 819 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); 820 emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); 821 emit_insn (gen_negsi2 (rotl, low)); 822 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); 823 emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); 824 mem = change_address (ops[0], TImode, addr0); 825 set_mem_alias_set (mem, 0); 826 emit_move_insn (dst, mem); 827 emit_insn (gen_selb (dst, dst, shift_reg, mask0)); 828 if (start + width > MEM_ALIGN (ops[0])) 829 { 830 rtx shl = gen_reg_rtx (SImode); 831 rtx mask1 = gen_reg_rtx (TImode); 832 rtx dst1 = gen_reg_rtx (TImode); 833 rtx mem1; 834 addr1 = plus_constant (addr, 16); 835 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16)); 836 emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); 837 emit_insn (gen_shlqby_ti (mask1, mask, shl)); 838 mem1 = change_address (ops[0], TImode, addr1); 839 set_mem_alias_set (mem1, 0); 840 emit_move_insn (dst1, mem1); 841 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); 842 emit_move_insn (mem1, dst1); 843 } 844 emit_move_insn (mem, dst); 845 } 846 else 847 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask)); 848 } 849 850 851 int 852 spu_expand_block_move (rtx ops[]) 853 { 854 HOST_WIDE_INT bytes, align, offset; 855 rtx src, dst, sreg, dreg, target; 856 int i; 857 if (GET_CODE (ops[2]) != CONST_INT 858 || GET_CODE (ops[3]) != CONST_INT 859 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8)) 860 return 0; 861 862 bytes = INTVAL (ops[2]); 863 align = INTVAL (ops[3]); 864 865 if (bytes <= 0) 866 return 1; 867 868 dst = ops[0]; 869 src = ops[1]; 870 871 if (align == 16) 872 { 873 for (offset = 0; offset + 16 <= bytes; offset += 16) 874 { 875 dst = adjust_address (ops[0], V16QImode, offset); 876 src = adjust_address (ops[1], V16QImode, offset); 877 emit_move_insn (dst, src); 878 } 879 if (offset < bytes) 880 { 881 rtx mask; 882 unsigned char arr[16] = { 0 }; 883 for (i = 0; i < bytes - offset; i++) 884 arr[i] = 0xff; 885 dst = adjust_address (ops[0], V16QImode, offset); 886 src = adjust_address (ops[1], V16QImode, offset); 887 mask = gen_reg_rtx (V16QImode); 888 sreg = gen_reg_rtx (V16QImode); 889 dreg = gen_reg_rtx (V16QImode); 890 target = gen_reg_rtx (V16QImode); 891 emit_move_insn (mask, array_to_constant (V16QImode, arr)); 892 emit_move_insn (dreg, dst); 893 emit_move_insn (sreg, src); 894 emit_insn (gen_selb (target, dreg, sreg, mask)); 895 emit_move_insn (dst, target); 896 } 897 return 1; 898 } 899 return 0; 900 } 901 902 enum spu_comp_code 903 { SPU_EQ, SPU_GT, SPU_GTU }; 904 905 int spu_comp_icode[12][3] = { 906 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi}, 907 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi}, 908 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si}, 909 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di}, 910 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti}, 911 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0}, 912 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0}, 913 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi}, 914 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi}, 915 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si}, 916 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0}, 917 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0}, 918 }; 919 920 /* Generate a compare for CODE. Return a brand-new rtx that represents 921 the result of the compare. GCC can figure this out too if we don't 922 provide all variations of compares, but GCC always wants to use 923 WORD_MODE, we can generate better code in most cases if we do it 924 ourselves. */ 925 void 926 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[]) 927 { 928 int reverse_compare = 0; 929 int reverse_test = 0; 930 rtx compare_result, eq_result; 931 rtx comp_rtx, eq_rtx; 932 enum machine_mode comp_mode; 933 enum machine_mode op_mode; 934 enum spu_comp_code scode, eq_code; 935 enum insn_code ior_code; 936 enum rtx_code code = GET_CODE (cmp); 937 rtx op0 = XEXP (cmp, 0); 938 rtx op1 = XEXP (cmp, 1); 939 int index; 940 int eq_test = 0; 941 942 /* When op1 is a CONST_INT change (X >= C) to (X > C-1), 943 and so on, to keep the constant in operand 1. */ 944 if (GET_CODE (op1) == CONST_INT) 945 { 946 HOST_WIDE_INT val = INTVAL (op1) - 1; 947 if (trunc_int_for_mode (val, GET_MODE (op0)) == val) 948 switch (code) 949 { 950 case GE: 951 op1 = GEN_INT (val); 952 code = GT; 953 break; 954 case LT: 955 op1 = GEN_INT (val); 956 code = LE; 957 break; 958 case GEU: 959 op1 = GEN_INT (val); 960 code = GTU; 961 break; 962 case LTU: 963 op1 = GEN_INT (val); 964 code = LEU; 965 break; 966 default: 967 break; 968 } 969 } 970 971 comp_mode = SImode; 972 op_mode = GET_MODE (op0); 973 974 switch (code) 975 { 976 case GE: 977 scode = SPU_GT; 978 if (HONOR_NANS (op_mode)) 979 { 980 reverse_compare = 0; 981 reverse_test = 0; 982 eq_test = 1; 983 eq_code = SPU_EQ; 984 } 985 else 986 { 987 reverse_compare = 1; 988 reverse_test = 1; 989 } 990 break; 991 case LE: 992 scode = SPU_GT; 993 if (HONOR_NANS (op_mode)) 994 { 995 reverse_compare = 1; 996 reverse_test = 0; 997 eq_test = 1; 998 eq_code = SPU_EQ; 999 } 1000 else 1001 { 1002 reverse_compare = 0; 1003 reverse_test = 1; 1004 } 1005 break; 1006 case LT: 1007 reverse_compare = 1; 1008 reverse_test = 0; 1009 scode = SPU_GT; 1010 break; 1011 case GEU: 1012 reverse_compare = 1; 1013 reverse_test = 1; 1014 scode = SPU_GTU; 1015 break; 1016 case LEU: 1017 reverse_compare = 0; 1018 reverse_test = 1; 1019 scode = SPU_GTU; 1020 break; 1021 case LTU: 1022 reverse_compare = 1; 1023 reverse_test = 0; 1024 scode = SPU_GTU; 1025 break; 1026 case NE: 1027 reverse_compare = 0; 1028 reverse_test = 1; 1029 scode = SPU_EQ; 1030 break; 1031 1032 case EQ: 1033 scode = SPU_EQ; 1034 break; 1035 case GT: 1036 scode = SPU_GT; 1037 break; 1038 case GTU: 1039 scode = SPU_GTU; 1040 break; 1041 default: 1042 scode = SPU_EQ; 1043 break; 1044 } 1045 1046 switch (op_mode) 1047 { 1048 case QImode: 1049 index = 0; 1050 comp_mode = QImode; 1051 break; 1052 case HImode: 1053 index = 1; 1054 comp_mode = HImode; 1055 break; 1056 case SImode: 1057 index = 2; 1058 break; 1059 case DImode: 1060 index = 3; 1061 break; 1062 case TImode: 1063 index = 4; 1064 break; 1065 case SFmode: 1066 index = 5; 1067 break; 1068 case DFmode: 1069 index = 6; 1070 break; 1071 case V16QImode: 1072 index = 7; 1073 comp_mode = op_mode; 1074 break; 1075 case V8HImode: 1076 index = 8; 1077 comp_mode = op_mode; 1078 break; 1079 case V4SImode: 1080 index = 9; 1081 comp_mode = op_mode; 1082 break; 1083 case V4SFmode: 1084 index = 10; 1085 comp_mode = V4SImode; 1086 break; 1087 case V2DFmode: 1088 index = 11; 1089 comp_mode = V2DImode; 1090 break; 1091 case V2DImode: 1092 default: 1093 abort (); 1094 } 1095 1096 if (GET_MODE (op1) == DFmode 1097 && (scode != SPU_GT && scode != SPU_EQ)) 1098 abort (); 1099 1100 if (is_set == 0 && op1 == const0_rtx 1101 && (GET_MODE (op0) == SImode 1102 || GET_MODE (op0) == HImode) && scode == SPU_EQ) 1103 { 1104 /* Don't need to set a register with the result when we are 1105 comparing against zero and branching. */ 1106 reverse_test = !reverse_test; 1107 compare_result = op0; 1108 } 1109 else 1110 { 1111 compare_result = gen_reg_rtx (comp_mode); 1112 1113 if (reverse_compare) 1114 { 1115 rtx t = op1; 1116 op1 = op0; 1117 op0 = t; 1118 } 1119 1120 if (spu_comp_icode[index][scode] == 0) 1121 abort (); 1122 1123 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate) 1124 (op0, op_mode)) 1125 op0 = force_reg (op_mode, op0); 1126 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate) 1127 (op1, op_mode)) 1128 op1 = force_reg (op_mode, op1); 1129 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result, 1130 op0, op1); 1131 if (comp_rtx == 0) 1132 abort (); 1133 emit_insn (comp_rtx); 1134 1135 if (eq_test) 1136 { 1137 eq_result = gen_reg_rtx (comp_mode); 1138 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result, 1139 op0, op1); 1140 if (eq_rtx == 0) 1141 abort (); 1142 emit_insn (eq_rtx); 1143 ior_code = ior_optab->handlers[(int)comp_mode].insn_code; 1144 gcc_assert (ior_code != CODE_FOR_nothing); 1145 emit_insn (GEN_FCN (ior_code) 1146 (compare_result, compare_result, eq_result)); 1147 } 1148 } 1149 1150 if (is_set == 0) 1151 { 1152 rtx bcomp; 1153 rtx loc_ref; 1154 1155 /* We don't have branch on QI compare insns, so we convert the 1156 QI compare result to a HI result. */ 1157 if (comp_mode == QImode) 1158 { 1159 rtx old_res = compare_result; 1160 compare_result = gen_reg_rtx (HImode); 1161 comp_mode = HImode; 1162 emit_insn (gen_extendqihi2 (compare_result, old_res)); 1163 } 1164 1165 if (reverse_test) 1166 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx); 1167 else 1168 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx); 1169 1170 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); 1171 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 1172 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, 1173 loc_ref, pc_rtx))); 1174 } 1175 else if (is_set == 2) 1176 { 1177 rtx target = operands[0]; 1178 int compare_size = GET_MODE_BITSIZE (comp_mode); 1179 int target_size = GET_MODE_BITSIZE (GET_MODE (target)); 1180 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0); 1181 rtx select_mask; 1182 rtx op_t = operands[2]; 1183 rtx op_f = operands[3]; 1184 1185 /* The result of the comparison can be SI, HI or QI mode. Create a 1186 mask based on that result. */ 1187 if (target_size > compare_size) 1188 { 1189 select_mask = gen_reg_rtx (mode); 1190 emit_insn (gen_extend_compare (select_mask, compare_result)); 1191 } 1192 else if (target_size < compare_size) 1193 select_mask = 1194 gen_rtx_SUBREG (mode, compare_result, 1195 (compare_size - target_size) / BITS_PER_UNIT); 1196 else if (comp_mode != mode) 1197 select_mask = gen_rtx_SUBREG (mode, compare_result, 0); 1198 else 1199 select_mask = compare_result; 1200 1201 if (GET_MODE (target) != GET_MODE (op_t) 1202 || GET_MODE (target) != GET_MODE (op_f)) 1203 abort (); 1204 1205 if (reverse_test) 1206 emit_insn (gen_selb (target, op_t, op_f, select_mask)); 1207 else 1208 emit_insn (gen_selb (target, op_f, op_t, select_mask)); 1209 } 1210 else 1211 { 1212 rtx target = operands[0]; 1213 if (reverse_test) 1214 emit_insn (gen_rtx_SET (VOIDmode, compare_result, 1215 gen_rtx_NOT (comp_mode, compare_result))); 1216 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode) 1217 emit_insn (gen_extendhisi2 (target, compare_result)); 1218 else if (GET_MODE (target) == SImode 1219 && GET_MODE (compare_result) == QImode) 1220 emit_insn (gen_extend_compare (target, compare_result)); 1221 else 1222 emit_move_insn (target, compare_result); 1223 } 1224 } 1225 1226 HOST_WIDE_INT 1227 const_double_to_hwint (rtx x) 1228 { 1229 HOST_WIDE_INT val; 1230 REAL_VALUE_TYPE rv; 1231 if (GET_MODE (x) == SFmode) 1232 { 1233 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 1234 REAL_VALUE_TO_TARGET_SINGLE (rv, val); 1235 } 1236 else if (GET_MODE (x) == DFmode) 1237 { 1238 long l[2]; 1239 REAL_VALUE_FROM_CONST_DOUBLE (rv, x); 1240 REAL_VALUE_TO_TARGET_DOUBLE (rv, l); 1241 val = l[0]; 1242 val = (val << 32) | (l[1] & 0xffffffff); 1243 } 1244 else 1245 abort (); 1246 return val; 1247 } 1248 1249 rtx 1250 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v) 1251 { 1252 long tv[2]; 1253 REAL_VALUE_TYPE rv; 1254 gcc_assert (mode == SFmode || mode == DFmode); 1255 1256 if (mode == SFmode) 1257 tv[0] = (v << 32) >> 32; 1258 else if (mode == DFmode) 1259 { 1260 tv[1] = (v << 32) >> 32; 1261 tv[0] = v >> 32; 1262 } 1263 real_from_target (&rv, tv, mode); 1264 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode); 1265 } 1266 1267 void 1268 print_operand_address (FILE * file, register rtx addr) 1269 { 1270 rtx reg; 1271 rtx offset; 1272 1273 if (GET_CODE (addr) == AND 1274 && GET_CODE (XEXP (addr, 1)) == CONST_INT 1275 && INTVAL (XEXP (addr, 1)) == -16) 1276 addr = XEXP (addr, 0); 1277 1278 switch (GET_CODE (addr)) 1279 { 1280 case REG: 1281 fprintf (file, "0(%s)", reg_names[REGNO (addr)]); 1282 break; 1283 1284 case PLUS: 1285 reg = XEXP (addr, 0); 1286 offset = XEXP (addr, 1); 1287 if (GET_CODE (offset) == REG) 1288 { 1289 fprintf (file, "%s,%s", reg_names[REGNO (reg)], 1290 reg_names[REGNO (offset)]); 1291 } 1292 else if (GET_CODE (offset) == CONST_INT) 1293 { 1294 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", 1295 INTVAL (offset), reg_names[REGNO (reg)]); 1296 } 1297 else 1298 abort (); 1299 break; 1300 1301 case CONST: 1302 case LABEL_REF: 1303 case SYMBOL_REF: 1304 case CONST_INT: 1305 output_addr_const (file, addr); 1306 break; 1307 1308 default: 1309 debug_rtx (addr); 1310 abort (); 1311 } 1312 } 1313 1314 void 1315 print_operand (FILE * file, rtx x, int code) 1316 { 1317 enum machine_mode mode = GET_MODE (x); 1318 HOST_WIDE_INT val; 1319 unsigned char arr[16]; 1320 int xcode = GET_CODE (x); 1321 int i, info; 1322 if (GET_MODE (x) == VOIDmode) 1323 switch (code) 1324 { 1325 case 'L': /* 128 bits, signed */ 1326 case 'm': /* 128 bits, signed */ 1327 case 'T': /* 128 bits, signed */ 1328 case 't': /* 128 bits, signed */ 1329 mode = TImode; 1330 break; 1331 case 'K': /* 64 bits, signed */ 1332 case 'k': /* 64 bits, signed */ 1333 case 'D': /* 64 bits, signed */ 1334 case 'd': /* 64 bits, signed */ 1335 mode = DImode; 1336 break; 1337 case 'J': /* 32 bits, signed */ 1338 case 'j': /* 32 bits, signed */ 1339 case 's': /* 32 bits, signed */ 1340 case 'S': /* 32 bits, signed */ 1341 mode = SImode; 1342 break; 1343 } 1344 switch (code) 1345 { 1346 1347 case 'j': /* 32 bits, signed */ 1348 case 'k': /* 64 bits, signed */ 1349 case 'm': /* 128 bits, signed */ 1350 if (xcode == CONST_INT 1351 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) 1352 { 1353 gcc_assert (logical_immediate_p (x, mode)); 1354 constant_to_array (mode, x, arr); 1355 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1356 val = trunc_int_for_mode (val, SImode); 1357 switch (which_logical_immediate (val)) 1358 { 1359 case SPU_ORI: 1360 break; 1361 case SPU_ORHI: 1362 fprintf (file, "h"); 1363 break; 1364 case SPU_ORBI: 1365 fprintf (file, "b"); 1366 break; 1367 default: 1368 gcc_unreachable(); 1369 } 1370 } 1371 else 1372 gcc_unreachable(); 1373 return; 1374 1375 case 'J': /* 32 bits, signed */ 1376 case 'K': /* 64 bits, signed */ 1377 case 'L': /* 128 bits, signed */ 1378 if (xcode == CONST_INT 1379 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR) 1380 { 1381 gcc_assert (logical_immediate_p (x, mode) 1382 || iohl_immediate_p (x, mode)); 1383 constant_to_array (mode, x, arr); 1384 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1385 val = trunc_int_for_mode (val, SImode); 1386 switch (which_logical_immediate (val)) 1387 { 1388 case SPU_ORI: 1389 case SPU_IOHL: 1390 break; 1391 case SPU_ORHI: 1392 val = trunc_int_for_mode (val, HImode); 1393 break; 1394 case SPU_ORBI: 1395 val = trunc_int_for_mode (val, QImode); 1396 break; 1397 default: 1398 gcc_unreachable(); 1399 } 1400 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); 1401 } 1402 else 1403 gcc_unreachable(); 1404 return; 1405 1406 case 't': /* 128 bits, signed */ 1407 case 'd': /* 64 bits, signed */ 1408 case 's': /* 32 bits, signed */ 1409 if (CONSTANT_P (x)) 1410 { 1411 enum immediate_class c = classify_immediate (x, mode); 1412 switch (c) 1413 { 1414 case IC_IL1: 1415 constant_to_array (mode, x, arr); 1416 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1417 val = trunc_int_for_mode (val, SImode); 1418 switch (which_immediate_load (val)) 1419 { 1420 case SPU_IL: 1421 break; 1422 case SPU_ILA: 1423 fprintf (file, "a"); 1424 break; 1425 case SPU_ILH: 1426 fprintf (file, "h"); 1427 break; 1428 case SPU_ILHU: 1429 fprintf (file, "hu"); 1430 break; 1431 default: 1432 gcc_unreachable (); 1433 } 1434 break; 1435 case IC_CPAT: 1436 constant_to_array (mode, x, arr); 1437 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0); 1438 if (info == 1) 1439 fprintf (file, "b"); 1440 else if (info == 2) 1441 fprintf (file, "h"); 1442 else if (info == 4) 1443 fprintf (file, "w"); 1444 else if (info == 8) 1445 fprintf (file, "d"); 1446 break; 1447 case IC_IL1s: 1448 if (xcode == CONST_VECTOR) 1449 { 1450 x = CONST_VECTOR_ELT (x, 0); 1451 xcode = GET_CODE (x); 1452 } 1453 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST) 1454 fprintf (file, "a"); 1455 else if (xcode == HIGH) 1456 fprintf (file, "hu"); 1457 break; 1458 case IC_FSMBI: 1459 case IC_FSMBI2: 1460 case IC_IL2: 1461 case IC_IL2s: 1462 case IC_POOL: 1463 abort (); 1464 } 1465 } 1466 else 1467 gcc_unreachable (); 1468 return; 1469 1470 case 'T': /* 128 bits, signed */ 1471 case 'D': /* 64 bits, signed */ 1472 case 'S': /* 32 bits, signed */ 1473 if (CONSTANT_P (x)) 1474 { 1475 enum immediate_class c = classify_immediate (x, mode); 1476 switch (c) 1477 { 1478 case IC_IL1: 1479 constant_to_array (mode, x, arr); 1480 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 1481 val = trunc_int_for_mode (val, SImode); 1482 switch (which_immediate_load (val)) 1483 { 1484 case SPU_IL: 1485 case SPU_ILA: 1486 break; 1487 case SPU_ILH: 1488 case SPU_ILHU: 1489 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode); 1490 break; 1491 default: 1492 gcc_unreachable (); 1493 } 1494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val); 1495 break; 1496 case IC_FSMBI: 1497 constant_to_array (mode, x, arr); 1498 val = 0; 1499 for (i = 0; i < 16; i++) 1500 { 1501 val <<= 1; 1502 val |= arr[i] & 1; 1503 } 1504 print_operand (file, GEN_INT (val), 0); 1505 break; 1506 case IC_CPAT: 1507 constant_to_array (mode, x, arr); 1508 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info); 1509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info); 1510 break; 1511 case IC_IL1s: 1512 if (xcode == HIGH) 1513 x = XEXP (x, 0); 1514 if (GET_CODE (x) == CONST_VECTOR) 1515 x = CONST_VECTOR_ELT (x, 0); 1516 output_addr_const (file, x); 1517 if (xcode == HIGH) 1518 fprintf (file, "@h"); 1519 break; 1520 case IC_IL2: 1521 case IC_IL2s: 1522 case IC_FSMBI2: 1523 case IC_POOL: 1524 abort (); 1525 } 1526 } 1527 else 1528 gcc_unreachable (); 1529 return; 1530 1531 case 'C': 1532 if (xcode == CONST_INT) 1533 { 1534 /* Only 4 least significant bits are relevant for generate 1535 control word instructions. */ 1536 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15); 1537 return; 1538 } 1539 break; 1540 1541 case 'M': /* print code for c*d */ 1542 if (GET_CODE (x) == CONST_INT) 1543 switch (INTVAL (x)) 1544 { 1545 case 1: 1546 fprintf (file, "b"); 1547 break; 1548 case 2: 1549 fprintf (file, "h"); 1550 break; 1551 case 4: 1552 fprintf (file, "w"); 1553 break; 1554 case 8: 1555 fprintf (file, "d"); 1556 break; 1557 default: 1558 gcc_unreachable(); 1559 } 1560 else 1561 gcc_unreachable(); 1562 return; 1563 1564 case 'N': /* Negate the operand */ 1565 if (xcode == CONST_INT) 1566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x)); 1567 else if (xcode == CONST_VECTOR) 1568 fprintf (file, HOST_WIDE_INT_PRINT_DEC, 1569 -INTVAL (CONST_VECTOR_ELT (x, 0))); 1570 return; 1571 1572 case 'I': /* enable/disable interrupts */ 1573 if (xcode == CONST_INT) 1574 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e"); 1575 return; 1576 1577 case 'b': /* branch modifiers */ 1578 if (xcode == REG) 1579 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : ""); 1580 else if (COMPARISON_P (x)) 1581 fprintf (file, "%s", xcode == NE ? "n" : ""); 1582 return; 1583 1584 case 'i': /* indirect call */ 1585 if (xcode == MEM) 1586 { 1587 if (GET_CODE (XEXP (x, 0)) == REG) 1588 /* Used in indirect function calls. */ 1589 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]); 1590 else 1591 output_address (XEXP (x, 0)); 1592 } 1593 return; 1594 1595 case 'p': /* load/store */ 1596 if (xcode == MEM) 1597 { 1598 x = XEXP (x, 0); 1599 xcode = GET_CODE (x); 1600 } 1601 if (xcode == AND) 1602 { 1603 x = XEXP (x, 0); 1604 xcode = GET_CODE (x); 1605 } 1606 if (xcode == REG) 1607 fprintf (file, "d"); 1608 else if (xcode == CONST_INT) 1609 fprintf (file, "a"); 1610 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF) 1611 fprintf (file, "r"); 1612 else if (xcode == PLUS || xcode == LO_SUM) 1613 { 1614 if (GET_CODE (XEXP (x, 1)) == REG) 1615 fprintf (file, "x"); 1616 else 1617 fprintf (file, "d"); 1618 } 1619 return; 1620 1621 case 'e': 1622 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1623 val &= 0x7; 1624 output_addr_const (file, GEN_INT (val)); 1625 return; 1626 1627 case 'f': 1628 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1629 val &= 0x1f; 1630 output_addr_const (file, GEN_INT (val)); 1631 return; 1632 1633 case 'g': 1634 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1635 val &= 0x3f; 1636 output_addr_const (file, GEN_INT (val)); 1637 return; 1638 1639 case 'h': 1640 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1641 val = (val >> 3) & 0x1f; 1642 output_addr_const (file, GEN_INT (val)); 1643 return; 1644 1645 case 'E': 1646 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1647 val = -val; 1648 val &= 0x7; 1649 output_addr_const (file, GEN_INT (val)); 1650 return; 1651 1652 case 'F': 1653 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1654 val = -val; 1655 val &= 0x1f; 1656 output_addr_const (file, GEN_INT (val)); 1657 return; 1658 1659 case 'G': 1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1661 val = -val; 1662 val &= 0x3f; 1663 output_addr_const (file, GEN_INT (val)); 1664 return; 1665 1666 case 'H': 1667 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0)); 1668 val = -(val & -8ll); 1669 val = (val >> 3) & 0x1f; 1670 output_addr_const (file, GEN_INT (val)); 1671 return; 1672 1673 case 'v': 1674 case 'w': 1675 constant_to_array (mode, x, arr); 1676 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127; 1677 output_addr_const (file, GEN_INT (code == 'w' ? -val : val)); 1678 return; 1679 1680 case 0: 1681 if (xcode == REG) 1682 fprintf (file, "%s", reg_names[REGNO (x)]); 1683 else if (xcode == MEM) 1684 output_address (XEXP (x, 0)); 1685 else if (xcode == CONST_VECTOR) 1686 print_operand (file, CONST_VECTOR_ELT (x, 0), 0); 1687 else 1688 output_addr_const (file, x); 1689 return; 1690 1691 /* unused letters 1692 o qr u yz 1693 AB OPQR UVWXYZ */ 1694 default: 1695 output_operand_lossage ("invalid %%xn code"); 1696 } 1697 gcc_unreachable (); 1698 } 1699 1700 extern char call_used_regs[]; 1701 1702 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a 1703 caller saved register. For leaf functions it is more efficient to 1704 use a volatile register because we won't need to save and restore the 1705 pic register. This routine is only valid after register allocation 1706 is completed, so we can pick an unused register. */ 1707 static rtx 1708 get_pic_reg (void) 1709 { 1710 rtx pic_reg = pic_offset_table_rtx; 1711 if (!reload_completed && !reload_in_progress) 1712 abort (); 1713 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM)) 1714 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); 1715 return pic_reg; 1716 } 1717 1718 /* Split constant addresses to handle cases that are too large. 1719 Add in the pic register when in PIC mode. 1720 Split immediates that require more than 1 instruction. */ 1721 int 1722 spu_split_immediate (rtx * ops) 1723 { 1724 enum machine_mode mode = GET_MODE (ops[0]); 1725 enum immediate_class c = classify_immediate (ops[1], mode); 1726 1727 switch (c) 1728 { 1729 case IC_IL2: 1730 { 1731 unsigned char arrhi[16]; 1732 unsigned char arrlo[16]; 1733 rtx to, temp, hi, lo; 1734 int i; 1735 enum machine_mode imode = mode; 1736 /* We need to do reals as ints because the constant used in the 1737 IOR might not be a legitimate real constant. */ 1738 imode = int_mode_for_mode (mode); 1739 constant_to_array (mode, ops[1], arrhi); 1740 if (imode != mode) 1741 to = simplify_gen_subreg (imode, ops[0], mode, 0); 1742 else 1743 to = ops[0]; 1744 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode); 1745 for (i = 0; i < 16; i += 4) 1746 { 1747 arrlo[i + 2] = arrhi[i + 2]; 1748 arrlo[i + 3] = arrhi[i + 3]; 1749 arrlo[i + 0] = arrlo[i + 1] = 0; 1750 arrhi[i + 2] = arrhi[i + 3] = 0; 1751 } 1752 hi = array_to_constant (imode, arrhi); 1753 lo = array_to_constant (imode, arrlo); 1754 emit_move_insn (temp, hi); 1755 emit_insn (gen_rtx_SET 1756 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo))); 1757 return 1; 1758 } 1759 case IC_FSMBI2: 1760 { 1761 unsigned char arr_fsmbi[16]; 1762 unsigned char arr_andbi[16]; 1763 rtx to, reg_fsmbi, reg_and; 1764 int i; 1765 enum machine_mode imode = mode; 1766 /* We need to do reals as ints because the constant used in the 1767 * AND might not be a legitimate real constant. */ 1768 imode = int_mode_for_mode (mode); 1769 constant_to_array (mode, ops[1], arr_fsmbi); 1770 if (imode != mode) 1771 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0); 1772 else 1773 to = ops[0]; 1774 for (i = 0; i < 16; i++) 1775 if (arr_fsmbi[i] != 0) 1776 { 1777 arr_andbi[0] = arr_fsmbi[i]; 1778 arr_fsmbi[i] = 0xff; 1779 } 1780 for (i = 1; i < 16; i++) 1781 arr_andbi[i] = arr_andbi[0]; 1782 reg_fsmbi = array_to_constant (imode, arr_fsmbi); 1783 reg_and = array_to_constant (imode, arr_andbi); 1784 emit_move_insn (to, reg_fsmbi); 1785 emit_insn (gen_rtx_SET 1786 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and))); 1787 return 1; 1788 } 1789 case IC_POOL: 1790 if (reload_in_progress || reload_completed) 1791 { 1792 rtx mem = force_const_mem (mode, ops[1]); 1793 if (TARGET_LARGE_MEM) 1794 { 1795 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0])); 1796 emit_move_insn (addr, XEXP (mem, 0)); 1797 mem = replace_equiv_address (mem, addr); 1798 } 1799 emit_move_insn (ops[0], mem); 1800 return 1; 1801 } 1802 break; 1803 case IC_IL1s: 1804 case IC_IL2s: 1805 if (reload_completed && GET_CODE (ops[1]) != HIGH) 1806 { 1807 if (c == IC_IL2s) 1808 { 1809 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1])); 1810 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1])); 1811 } 1812 else if (flag_pic) 1813 emit_insn (gen_pic (ops[0], ops[1])); 1814 if (flag_pic) 1815 { 1816 rtx pic_reg = get_pic_reg (); 1817 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg)); 1818 crtl->uses_pic_offset_table = 1; 1819 } 1820 return flag_pic || c == IC_IL2s; 1821 } 1822 break; 1823 case IC_IL1: 1824 case IC_FSMBI: 1825 case IC_CPAT: 1826 break; 1827 } 1828 return 0; 1829 } 1830 1831 /* SAVING is TRUE when we are generating the actual load and store 1832 instructions for REGNO. When determining the size of the stack 1833 needed for saving register we must allocate enough space for the 1834 worst case, because we don't always have the information early enough 1835 to not allocate it. But we can at least eliminate the actual loads 1836 and stores during the prologue/epilogue. */ 1837 static int 1838 need_to_save_reg (int regno, int saving) 1839 { 1840 if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) 1841 return 1; 1842 if (flag_pic 1843 && regno == PIC_OFFSET_TABLE_REGNUM 1844 && (!saving || crtl->uses_pic_offset_table) 1845 && (!saving 1846 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM))) 1847 return 1; 1848 return 0; 1849 } 1850 1851 /* This function is only correct starting with local register 1852 allocation */ 1853 int 1854 spu_saved_regs_size (void) 1855 { 1856 int reg_save_size = 0; 1857 int regno; 1858 1859 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno) 1860 if (need_to_save_reg (regno, 0)) 1861 reg_save_size += 0x10; 1862 return reg_save_size; 1863 } 1864 1865 static rtx 1866 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset) 1867 { 1868 rtx reg = gen_rtx_REG (V4SImode, regno); 1869 rtx mem = 1870 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); 1871 return emit_insn (gen_movv4si (mem, reg)); 1872 } 1873 1874 static rtx 1875 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset) 1876 { 1877 rtx reg = gen_rtx_REG (V4SImode, regno); 1878 rtx mem = 1879 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset))); 1880 return emit_insn (gen_movv4si (reg, mem)); 1881 } 1882 1883 /* This happens after reload, so we need to expand it. */ 1884 static rtx 1885 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch) 1886 { 1887 rtx insn; 1888 if (satisfies_constraint_K (GEN_INT (imm))) 1889 { 1890 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm))); 1891 } 1892 else 1893 { 1894 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode))); 1895 insn = emit_insn (gen_addsi3 (dst, src, scratch)); 1896 if (REGNO (src) == REGNO (scratch)) 1897 abort (); 1898 } 1899 return insn; 1900 } 1901 1902 /* Return nonzero if this function is known to have a null epilogue. */ 1903 1904 int 1905 direct_return (void) 1906 { 1907 if (reload_completed) 1908 { 1909 if (cfun->static_chain_decl == 0 1910 && (spu_saved_regs_size () 1911 + get_frame_size () 1912 + crtl->outgoing_args_size 1913 + crtl->args.pretend_args_size == 0) 1914 && current_function_is_leaf) 1915 return 1; 1916 } 1917 return 0; 1918 } 1919 1920 /* 1921 The stack frame looks like this: 1922 +-------------+ 1923 | incoming | 1924 | args | 1925 AP -> +-------------+ 1926 | $lr save | 1927 +-------------+ 1928 prev SP | back chain | 1929 +-------------+ 1930 | var args | 1931 | reg save | crtl->args.pretend_args_size bytes 1932 +-------------+ 1933 | ... | 1934 | saved regs | spu_saved_regs_size() bytes 1935 FP -> +-------------+ 1936 | ... | 1937 | vars | get_frame_size() bytes 1938 HFP -> +-------------+ 1939 | ... | 1940 | outgoing | 1941 | args | crtl->outgoing_args_size bytes 1942 +-------------+ 1943 | $lr of next | 1944 | frame | 1945 +-------------+ 1946 | back chain | 1947 SP -> +-------------+ 1948 1949 */ 1950 void 1951 spu_expand_prologue (void) 1952 { 1953 HOST_WIDE_INT size = get_frame_size (), offset, regno; 1954 HOST_WIDE_INT total_size; 1955 HOST_WIDE_INT saved_regs_size; 1956 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 1957 rtx scratch_reg_0, scratch_reg_1; 1958 rtx insn, real; 1959 1960 if (flag_pic && optimize == 0) 1961 crtl->uses_pic_offset_table = 1; 1962 1963 if (spu_naked_function_p (current_function_decl)) 1964 return; 1965 1966 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); 1967 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2); 1968 1969 saved_regs_size = spu_saved_regs_size (); 1970 total_size = size + saved_regs_size 1971 + crtl->outgoing_args_size 1972 + crtl->args.pretend_args_size; 1973 1974 if (!current_function_is_leaf 1975 || cfun->calls_alloca || total_size > 0) 1976 total_size += STACK_POINTER_OFFSET; 1977 1978 /* Save this first because code after this might use the link 1979 register as a scratch register. */ 1980 if (!current_function_is_leaf) 1981 { 1982 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16); 1983 RTX_FRAME_RELATED_P (insn) = 1; 1984 } 1985 1986 if (total_size > 0) 1987 { 1988 offset = -crtl->args.pretend_args_size; 1989 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) 1990 if (need_to_save_reg (regno, 1)) 1991 { 1992 offset -= 16; 1993 insn = frame_emit_store (regno, sp_reg, offset); 1994 RTX_FRAME_RELATED_P (insn) = 1; 1995 } 1996 } 1997 1998 if (flag_pic && crtl->uses_pic_offset_table) 1999 { 2000 rtx pic_reg = get_pic_reg (); 2001 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0)); 2002 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0)); 2003 } 2004 2005 if (total_size > 0) 2006 { 2007 if (flag_stack_check) 2008 { 2009 /* We compare against total_size-1 because 2010 ($sp >= total_size) <=> ($sp > total_size-1) */ 2011 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0)); 2012 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 2013 rtx size_v4si = spu_const (V4SImode, total_size - 1); 2014 if (!satisfies_constraint_K (GEN_INT (total_size - 1))) 2015 { 2016 emit_move_insn (scratch_v4si, size_v4si); 2017 size_v4si = scratch_v4si; 2018 } 2019 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si)); 2020 emit_insn (gen_vec_extractv4si 2021 (scratch_reg_0, scratch_v4si, GEN_INT (1))); 2022 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0))); 2023 } 2024 2025 /* Adjust the stack pointer, and make sure scratch_reg_0 contains 2026 the value of the previous $sp because we save it as the back 2027 chain. */ 2028 if (total_size <= 2000) 2029 { 2030 /* In this case we save the back chain first. */ 2031 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size); 2032 insn = 2033 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0); 2034 } 2035 else 2036 { 2037 insn = emit_move_insn (scratch_reg_0, sp_reg); 2038 insn = 2039 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1); 2040 } 2041 RTX_FRAME_RELATED_P (insn) = 1; 2042 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)); 2043 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real); 2044 2045 if (total_size > 2000) 2046 { 2047 /* Save the back chain ptr */ 2048 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0); 2049 } 2050 2051 if (frame_pointer_needed) 2052 { 2053 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); 2054 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET 2055 + crtl->outgoing_args_size; 2056 /* Set the new frame_pointer */ 2057 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0); 2058 RTX_FRAME_RELATED_P (insn) = 1; 2059 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset)); 2060 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real); 2061 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY; 2062 } 2063 } 2064 2065 } 2066 2067 void 2068 spu_expand_epilogue (bool sibcall_p) 2069 { 2070 int size = get_frame_size (), offset, regno; 2071 HOST_WIDE_INT saved_regs_size, total_size; 2072 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); 2073 rtx jump, scratch_reg_0; 2074 2075 if (spu_naked_function_p (current_function_decl)) 2076 return; 2077 2078 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1); 2079 2080 saved_regs_size = spu_saved_regs_size (); 2081 total_size = size + saved_regs_size 2082 + crtl->outgoing_args_size 2083 + crtl->args.pretend_args_size; 2084 2085 if (!current_function_is_leaf 2086 || cfun->calls_alloca || total_size > 0) 2087 total_size += STACK_POINTER_OFFSET; 2088 2089 if (total_size > 0) 2090 { 2091 if (cfun->calls_alloca) 2092 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0); 2093 else 2094 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0); 2095 2096 2097 if (saved_regs_size > 0) 2098 { 2099 offset = -crtl->args.pretend_args_size; 2100 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno) 2101 if (need_to_save_reg (regno, 1)) 2102 { 2103 offset -= 0x10; 2104 frame_emit_load (regno, sp_reg, offset); 2105 } 2106 } 2107 } 2108 2109 if (!current_function_is_leaf) 2110 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16); 2111 2112 if (!sibcall_p) 2113 { 2114 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM)); 2115 jump = emit_jump_insn (gen__return ()); 2116 emit_barrier_after (jump); 2117 } 2118 2119 } 2120 2121 rtx 2122 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED) 2123 { 2124 if (count != 0) 2125 return 0; 2126 /* This is inefficient because it ends up copying to a save-register 2127 which then gets saved even though $lr has already been saved. But 2128 it does generate better code for leaf functions and we don't need 2129 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only 2130 used for __builtin_return_address anyway, so maybe we don't care if 2131 it's inefficient. */ 2132 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM); 2133 } 2134 2135 2136 /* Given VAL, generate a constant appropriate for MODE. 2137 If MODE is a vector mode, every element will be VAL. 2138 For TImode, VAL will be zero extended to 128 bits. */ 2139 rtx 2140 spu_const (enum machine_mode mode, HOST_WIDE_INT val) 2141 { 2142 rtx inner; 2143 rtvec v; 2144 int units, i; 2145 2146 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 2147 || GET_MODE_CLASS (mode) == MODE_FLOAT 2148 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT 2149 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT); 2150 2151 if (GET_MODE_CLASS (mode) == MODE_INT) 2152 return immed_double_const (val, 0, mode); 2153 2154 /* val is the bit representation of the float */ 2155 if (GET_MODE_CLASS (mode) == MODE_FLOAT) 2156 return hwint_to_const_double (mode, val); 2157 2158 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 2159 inner = immed_double_const (val, 0, GET_MODE_INNER (mode)); 2160 else 2161 inner = hwint_to_const_double (GET_MODE_INNER (mode), val); 2162 2163 units = GET_MODE_NUNITS (mode); 2164 2165 v = rtvec_alloc (units); 2166 2167 for (i = 0; i < units; ++i) 2168 RTVEC_ELT (v, i) = inner; 2169 2170 return gen_rtx_CONST_VECTOR (mode, v); 2171 } 2172 2173 /* Create a MODE vector constant from 4 ints. */ 2174 rtx 2175 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d) 2176 { 2177 unsigned char arr[16]; 2178 arr[0] = (a >> 24) & 0xff; 2179 arr[1] = (a >> 16) & 0xff; 2180 arr[2] = (a >> 8) & 0xff; 2181 arr[3] = (a >> 0) & 0xff; 2182 arr[4] = (b >> 24) & 0xff; 2183 arr[5] = (b >> 16) & 0xff; 2184 arr[6] = (b >> 8) & 0xff; 2185 arr[7] = (b >> 0) & 0xff; 2186 arr[8] = (c >> 24) & 0xff; 2187 arr[9] = (c >> 16) & 0xff; 2188 arr[10] = (c >> 8) & 0xff; 2189 arr[11] = (c >> 0) & 0xff; 2190 arr[12] = (d >> 24) & 0xff; 2191 arr[13] = (d >> 16) & 0xff; 2192 arr[14] = (d >> 8) & 0xff; 2193 arr[15] = (d >> 0) & 0xff; 2194 return array_to_constant(mode, arr); 2195 } 2196 2197 /* branch hint stuff */ 2198 2199 /* An array of these is used to propagate hints to predecessor blocks. */ 2200 struct spu_bb_info 2201 { 2202 rtx prop_jump; /* propagated from another block */ 2203 int bb_index; /* the original block. */ 2204 }; 2205 static struct spu_bb_info *spu_bb_info; 2206 2207 #define STOP_HINT_P(INSN) \ 2208 (GET_CODE(INSN) == CALL_INSN \ 2209 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \ 2210 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4) 2211 2212 /* 1 when RTX is a hinted branch or its target. We keep track of 2213 what has been hinted so the safe-hint code can test it easily. */ 2214 #define HINTED_P(RTX) \ 2215 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging) 2216 2217 /* 1 when RTX is an insn that must be scheduled on an even boundary. */ 2218 #define SCHED_ON_EVEN_P(RTX) \ 2219 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct) 2220 2221 /* Emit a nop for INSN such that the two will dual issue. This assumes 2222 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop. 2223 We check for TImode to handle a MULTI1 insn which has dual issued its 2224 first instruction. get_pipe returns -1 for MULTI0, inline asm, or 2225 ADDR_VEC insns. */ 2226 static void 2227 emit_nop_for_insn (rtx insn) 2228 { 2229 int p; 2230 rtx new_insn; 2231 p = get_pipe (insn); 2232 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn)) 2233 new_insn = emit_insn_after (gen_lnop (), insn); 2234 else if (p == 1 && GET_MODE (insn) == TImode) 2235 { 2236 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn); 2237 PUT_MODE (new_insn, TImode); 2238 PUT_MODE (insn, VOIDmode); 2239 } 2240 else 2241 new_insn = emit_insn_after (gen_lnop (), insn); 2242 recog_memoized (new_insn); 2243 } 2244 2245 /* Insert nops in basic blocks to meet dual issue alignment 2246 requirements. Also make sure hbrp and hint instructions are at least 2247 one cycle apart, possibly inserting a nop. */ 2248 static void 2249 pad_bb(void) 2250 { 2251 rtx insn, next_insn, prev_insn, hbr_insn = 0; 2252 int length; 2253 int addr; 2254 2255 /* This sets up INSN_ADDRESSES. */ 2256 shorten_branches (get_insns ()); 2257 2258 /* Keep track of length added by nops. */ 2259 length = 0; 2260 2261 prev_insn = 0; 2262 insn = get_insns (); 2263 if (!active_insn_p (insn)) 2264 insn = next_active_insn (insn); 2265 for (; insn; insn = next_insn) 2266 { 2267 next_insn = next_active_insn (insn); 2268 if (INSN_CODE (insn) == CODE_FOR_iprefetch 2269 || INSN_CODE (insn) == CODE_FOR_hbr) 2270 { 2271 if (hbr_insn) 2272 { 2273 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn)); 2274 int a1 = INSN_ADDRESSES (INSN_UID (insn)); 2275 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode) 2276 || (a1 - a0 == 4)) 2277 { 2278 prev_insn = emit_insn_before (gen_lnop (), insn); 2279 PUT_MODE (prev_insn, GET_MODE (insn)); 2280 PUT_MODE (insn, TImode); 2281 length += 4; 2282 } 2283 } 2284 hbr_insn = insn; 2285 } 2286 if (INSN_CODE (insn) == CODE_FOR_blockage) 2287 { 2288 if (GET_MODE (insn) == TImode) 2289 PUT_MODE (next_insn, TImode); 2290 insn = next_insn; 2291 next_insn = next_active_insn (insn); 2292 } 2293 addr = INSN_ADDRESSES (INSN_UID (insn)); 2294 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn)) 2295 { 2296 if (((addr + length) & 7) != 0) 2297 { 2298 emit_nop_for_insn (prev_insn); 2299 length += 4; 2300 } 2301 } 2302 else if (GET_MODE (insn) == TImode 2303 && ((next_insn && GET_MODE (next_insn) != TImode) 2304 || get_attr_type (insn) == TYPE_MULTI0) 2305 && ((addr + length) & 7) != 0) 2306 { 2307 /* prev_insn will always be set because the first insn is 2308 always 8-byte aligned. */ 2309 emit_nop_for_insn (prev_insn); 2310 length += 4; 2311 } 2312 prev_insn = insn; 2313 } 2314 } 2315 2316 2317 /* Routines for branch hints. */ 2318 2319 static void 2320 spu_emit_branch_hint (rtx before, rtx branch, rtx target, 2321 int distance, sbitmap blocks) 2322 { 2323 rtx branch_label = 0; 2324 rtx hint; 2325 rtx insn; 2326 rtx table; 2327 2328 if (before == 0 || branch == 0 || target == 0) 2329 return; 2330 2331 /* While scheduling we require hints to be no further than 600, so 2332 we need to enforce that here too */ 2333 if (distance > 600) 2334 return; 2335 2336 /* If we have a Basic block note, emit it after the basic block note. */ 2337 if (NOTE_INSN_BASIC_BLOCK_P (before)) 2338 before = NEXT_INSN (before); 2339 2340 branch_label = gen_label_rtx (); 2341 LABEL_NUSES (branch_label)++; 2342 LABEL_PRESERVE_P (branch_label) = 1; 2343 insn = emit_label_before (branch_label, branch); 2344 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label); 2345 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index); 2346 2347 hint = emit_insn_before (gen_hbr (branch_label, target), before); 2348 recog_memoized (hint); 2349 HINTED_P (branch) = 1; 2350 2351 if (GET_CODE (target) == LABEL_REF) 2352 HINTED_P (XEXP (target, 0)) = 1; 2353 else if (tablejump_p (branch, 0, &table)) 2354 { 2355 rtvec vec; 2356 int j; 2357 if (GET_CODE (PATTERN (table)) == ADDR_VEC) 2358 vec = XVEC (PATTERN (table), 0); 2359 else 2360 vec = XVEC (PATTERN (table), 1); 2361 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j) 2362 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1; 2363 } 2364 2365 if (distance >= 588) 2366 { 2367 /* Make sure the hint isn't scheduled any earlier than this point, 2368 which could make it too far for the branch offest to fit */ 2369 recog_memoized (emit_insn_before (gen_blockage (), hint)); 2370 } 2371 else if (distance <= 8 * 4) 2372 { 2373 /* To guarantee at least 8 insns between the hint and branch we 2374 insert nops. */ 2375 int d; 2376 for (d = distance; d < 8 * 4; d += 4) 2377 { 2378 insn = 2379 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint); 2380 recog_memoized (insn); 2381 } 2382 2383 /* Make sure any nops inserted aren't scheduled before the hint. */ 2384 recog_memoized (emit_insn_after (gen_blockage (), hint)); 2385 2386 /* Make sure any nops inserted aren't scheduled after the call. */ 2387 if (CALL_P (branch) && distance < 8 * 4) 2388 recog_memoized (emit_insn_before (gen_blockage (), branch)); 2389 } 2390 } 2391 2392 /* Returns 0 if we don't want a hint for this branch. Otherwise return 2393 the rtx for the branch target. */ 2394 static rtx 2395 get_branch_target (rtx branch) 2396 { 2397 if (GET_CODE (branch) == JUMP_INSN) 2398 { 2399 rtx set, src; 2400 2401 /* Return statements */ 2402 if (GET_CODE (PATTERN (branch)) == RETURN) 2403 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); 2404 2405 /* jump table */ 2406 if (GET_CODE (PATTERN (branch)) == ADDR_VEC 2407 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC) 2408 return 0; 2409 2410 /* ASM GOTOs. */ 2411 if (extract_asm_operands (PATTERN (branch)) != NULL) 2412 return NULL; 2413 2414 set = single_set (branch); 2415 src = SET_SRC (set); 2416 if (GET_CODE (SET_DEST (set)) != PC) 2417 abort (); 2418 2419 if (GET_CODE (src) == IF_THEN_ELSE) 2420 { 2421 rtx lab = 0; 2422 rtx note = find_reg_note (branch, REG_BR_PROB, 0); 2423 if (note) 2424 { 2425 /* If the more probable case is not a fall through, then 2426 try a branch hint. */ 2427 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0)); 2428 if (prob > (REG_BR_PROB_BASE * 6 / 10) 2429 && GET_CODE (XEXP (src, 1)) != PC) 2430 lab = XEXP (src, 1); 2431 else if (prob < (REG_BR_PROB_BASE * 4 / 10) 2432 && GET_CODE (XEXP (src, 2)) != PC) 2433 lab = XEXP (src, 2); 2434 } 2435 if (lab) 2436 { 2437 if (GET_CODE (lab) == RETURN) 2438 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM); 2439 return lab; 2440 } 2441 return 0; 2442 } 2443 2444 return src; 2445 } 2446 else if (GET_CODE (branch) == CALL_INSN) 2447 { 2448 rtx call; 2449 /* All of our call patterns are in a PARALLEL and the CALL is 2450 the first pattern in the PARALLEL. */ 2451 if (GET_CODE (PATTERN (branch)) != PARALLEL) 2452 abort (); 2453 call = XVECEXP (PATTERN (branch), 0, 0); 2454 if (GET_CODE (call) == SET) 2455 call = SET_SRC (call); 2456 if (GET_CODE (call) != CALL) 2457 abort (); 2458 return XEXP (XEXP (call, 0), 0); 2459 } 2460 return 0; 2461 } 2462 2463 /* The special $hbr register is used to prevent the insn scheduler from 2464 moving hbr insns across instructions which invalidate them. It 2465 should only be used in a clobber, and this function searches for 2466 insns which clobber it. */ 2467 static bool 2468 insn_clobbers_hbr (rtx insn) 2469 { 2470 if (INSN_P (insn) 2471 && GET_CODE (PATTERN (insn)) == PARALLEL) 2472 { 2473 rtx parallel = PATTERN (insn); 2474 rtx clobber; 2475 int j; 2476 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--) 2477 { 2478 clobber = XVECEXP (parallel, 0, j); 2479 if (GET_CODE (clobber) == CLOBBER 2480 && GET_CODE (XEXP (clobber, 0)) == REG 2481 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM) 2482 return 1; 2483 } 2484 } 2485 return 0; 2486 } 2487 2488 /* Search up to 32 insns starting at FIRST: 2489 - at any kind of hinted branch, just return 2490 - at any unconditional branch in the first 15 insns, just return 2491 - at a call or indirect branch, after the first 15 insns, force it to 2492 an even address and return 2493 - at any unconditional branch, after the first 15 insns, force it to 2494 an even address. 2495 At then end of the search, insert an hbrp within 4 insns of FIRST, 2496 and an hbrp within 16 instructions of FIRST. 2497 */ 2498 static void 2499 insert_hbrp_for_ilb_runout (rtx first) 2500 { 2501 rtx insn, before_4 = 0, before_16 = 0; 2502 int addr = 0, length, first_addr = -1; 2503 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4; 2504 int insert_lnop_after = 0; 2505 for (insn = first; insn; insn = NEXT_INSN (insn)) 2506 if (INSN_P (insn)) 2507 { 2508 if (first_addr == -1) 2509 first_addr = INSN_ADDRESSES (INSN_UID (insn)); 2510 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr; 2511 length = get_attr_length (insn); 2512 2513 if (before_4 == 0 && addr + length >= 4 * 4) 2514 before_4 = insn; 2515 /* We test for 14 instructions because the first hbrp will add 2516 up to 2 instructions. */ 2517 if (before_16 == 0 && addr + length >= 14 * 4) 2518 before_16 = insn; 2519 2520 if (INSN_CODE (insn) == CODE_FOR_hbr) 2521 { 2522 /* Make sure an hbrp is at least 2 cycles away from a hint. 2523 Insert an lnop after the hbrp when necessary. */ 2524 if (before_4 == 0 && addr > 0) 2525 { 2526 before_4 = insn; 2527 insert_lnop_after |= 1; 2528 } 2529 else if (before_4 && addr <= 4 * 4) 2530 insert_lnop_after |= 1; 2531 if (before_16 == 0 && addr > 10 * 4) 2532 { 2533 before_16 = insn; 2534 insert_lnop_after |= 2; 2535 } 2536 else if (before_16 && addr <= 14 * 4) 2537 insert_lnop_after |= 2; 2538 } 2539 2540 if (INSN_CODE (insn) == CODE_FOR_iprefetch) 2541 { 2542 if (addr < hbrp_addr0) 2543 hbrp_addr0 = addr; 2544 else if (addr < hbrp_addr1) 2545 hbrp_addr1 = addr; 2546 } 2547 2548 if (CALL_P (insn) || JUMP_P (insn)) 2549 { 2550 if (HINTED_P (insn)) 2551 return; 2552 2553 /* Any branch after the first 15 insns should be on an even 2554 address to avoid a special case branch. There might be 2555 some nops and/or hbrps inserted, so we test after 10 2556 insns. */ 2557 if (addr > 10 * 4) 2558 SCHED_ON_EVEN_P (insn) = 1; 2559 } 2560 2561 if (CALL_P (insn) || tablejump_p (insn, 0, 0)) 2562 return; 2563 2564 2565 if (addr + length >= 32 * 4) 2566 { 2567 gcc_assert (before_4 && before_16); 2568 if (hbrp_addr0 > 4 * 4) 2569 { 2570 insn = 2571 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4); 2572 recog_memoized (insn); 2573 INSN_ADDRESSES_NEW (insn, 2574 INSN_ADDRESSES (INSN_UID (before_4))); 2575 PUT_MODE (insn, GET_MODE (before_4)); 2576 PUT_MODE (before_4, TImode); 2577 if (insert_lnop_after & 1) 2578 { 2579 insn = emit_insn_before (gen_lnop (), before_4); 2580 recog_memoized (insn); 2581 INSN_ADDRESSES_NEW (insn, 2582 INSN_ADDRESSES (INSN_UID (before_4))); 2583 PUT_MODE (insn, TImode); 2584 } 2585 } 2586 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4) 2587 && hbrp_addr1 > 16 * 4) 2588 { 2589 insn = 2590 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16); 2591 recog_memoized (insn); 2592 INSN_ADDRESSES_NEW (insn, 2593 INSN_ADDRESSES (INSN_UID (before_16))); 2594 PUT_MODE (insn, GET_MODE (before_16)); 2595 PUT_MODE (before_16, TImode); 2596 if (insert_lnop_after & 2) 2597 { 2598 insn = emit_insn_before (gen_lnop (), before_16); 2599 recog_memoized (insn); 2600 INSN_ADDRESSES_NEW (insn, 2601 INSN_ADDRESSES (INSN_UID 2602 (before_16))); 2603 PUT_MODE (insn, TImode); 2604 } 2605 } 2606 return; 2607 } 2608 } 2609 else if (BARRIER_P (insn)) 2610 return; 2611 2612 } 2613 2614 /* The SPU might hang when it executes 48 inline instructions after a 2615 hinted branch jumps to its hinted target. The beginning of a 2616 function and the return from a call might have been hinted, and must 2617 be handled as well. To prevent a hang we insert 2 hbrps. The first 2618 should be within 6 insns of the branch target. The second should be 2619 within 22 insns of the branch target. When determining if hbrps are 2620 necessary, we look for only 32 inline instructions, because up to to 2621 12 nops and 4 hbrps could be inserted. Similarily, when inserting 2622 new hbrps, we insert them within 4 and 16 insns of the target. */ 2623 static void 2624 insert_hbrp (void) 2625 { 2626 rtx insn; 2627 if (TARGET_SAFE_HINTS) 2628 { 2629 shorten_branches (get_insns ()); 2630 /* Insert hbrp at beginning of function */ 2631 insn = next_active_insn (get_insns ()); 2632 if (insn) 2633 insert_hbrp_for_ilb_runout (insn); 2634 /* Insert hbrp after hinted targets. */ 2635 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 2636 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn)) 2637 insert_hbrp_for_ilb_runout (next_active_insn (insn)); 2638 } 2639 } 2640 2641 static int in_spu_reorg; 2642 2643 /* Insert branch hints. There are no branch optimizations after this 2644 pass, so it's safe to set our branch hints now. */ 2645 static void 2646 spu_machine_dependent_reorg (void) 2647 { 2648 sbitmap blocks; 2649 basic_block bb; 2650 rtx branch, insn; 2651 rtx branch_target = 0; 2652 int branch_addr = 0, insn_addr, required_dist = 0; 2653 int i; 2654 unsigned int j; 2655 2656 if (!TARGET_BRANCH_HINTS || optimize == 0) 2657 { 2658 /* We still do it for unoptimized code because an external 2659 function might have hinted a call or return. */ 2660 insert_hbrp (); 2661 pad_bb (); 2662 return; 2663 } 2664 2665 blocks = sbitmap_alloc (last_basic_block); 2666 sbitmap_zero (blocks); 2667 2668 in_spu_reorg = 1; 2669 compute_bb_for_insn (); 2670 2671 compact_blocks (); 2672 2673 spu_bb_info = 2674 (struct spu_bb_info *) xcalloc (n_basic_blocks, 2675 sizeof (struct spu_bb_info)); 2676 2677 /* We need exact insn addresses and lengths. */ 2678 shorten_branches (get_insns ()); 2679 2680 for (i = n_basic_blocks - 1; i >= 0; i--) 2681 { 2682 bb = BASIC_BLOCK (i); 2683 branch = 0; 2684 if (spu_bb_info[i].prop_jump) 2685 { 2686 branch = spu_bb_info[i].prop_jump; 2687 branch_target = get_branch_target (branch); 2688 branch_addr = INSN_ADDRESSES (INSN_UID (branch)); 2689 required_dist = spu_hint_dist; 2690 } 2691 /* Search from end of a block to beginning. In this loop, find 2692 jumps which need a branch and emit them only when: 2693 - it's an indirect branch and we're at the insn which sets 2694 the register 2695 - we're at an insn that will invalidate the hint. e.g., a 2696 call, another hint insn, inline asm that clobbers $hbr, and 2697 some inlined operations (divmodsi4). Don't consider jumps 2698 because they are only at the end of a block and are 2699 considered when we are deciding whether to propagate 2700 - we're getting too far away from the branch. The hbr insns 2701 only have a signed 10 bit offset 2702 We go back as far as possible so the branch will be considered 2703 for propagation when we get to the beginning of the block. */ 2704 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn)) 2705 { 2706 if (INSN_P (insn)) 2707 { 2708 insn_addr = INSN_ADDRESSES (INSN_UID (insn)); 2709 if (branch 2710 && ((GET_CODE (branch_target) == REG 2711 && set_of (branch_target, insn) != NULL_RTX) 2712 || insn_clobbers_hbr (insn) 2713 || branch_addr - insn_addr > 600)) 2714 { 2715 rtx next = NEXT_INSN (insn); 2716 int next_addr = INSN_ADDRESSES (INSN_UID (next)); 2717 if (insn != BB_END (bb) 2718 && branch_addr - next_addr >= required_dist) 2719 { 2720 if (dump_file) 2721 fprintf (dump_file, 2722 "hint for %i in block %i before %i\n", 2723 INSN_UID (branch), bb->index, 2724 INSN_UID (next)); 2725 spu_emit_branch_hint (next, branch, branch_target, 2726 branch_addr - next_addr, blocks); 2727 } 2728 branch = 0; 2729 } 2730 2731 /* JUMP_P will only be true at the end of a block. When 2732 branch is already set it means we've previously decided 2733 to propagate a hint for that branch into this block. */ 2734 if (CALL_P (insn) || (JUMP_P (insn) && !branch)) 2735 { 2736 branch = 0; 2737 if ((branch_target = get_branch_target (insn))) 2738 { 2739 branch = insn; 2740 branch_addr = insn_addr; 2741 required_dist = spu_hint_dist; 2742 } 2743 } 2744 } 2745 if (insn == BB_HEAD (bb)) 2746 break; 2747 } 2748 2749 if (branch) 2750 { 2751 /* If we haven't emitted a hint for this branch yet, it might 2752 be profitable to emit it in one of the predecessor blocks, 2753 especially for loops. */ 2754 rtx bbend; 2755 basic_block prev = 0, prop = 0, prev2 = 0; 2756 int loop_exit = 0, simple_loop = 0; 2757 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn))); 2758 2759 for (j = 0; j < EDGE_COUNT (bb->preds); j++) 2760 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU) 2761 prev = EDGE_PRED (bb, j)->src; 2762 else 2763 prev2 = EDGE_PRED (bb, j)->src; 2764 2765 for (j = 0; j < EDGE_COUNT (bb->succs); j++) 2766 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT) 2767 loop_exit = 1; 2768 else if (EDGE_SUCC (bb, j)->dest == bb) 2769 simple_loop = 1; 2770 2771 /* If this branch is a loop exit then propagate to previous 2772 fallthru block. This catches the cases when it is a simple 2773 loop or when there is an initial branch into the loop. */ 2774 if (prev && (loop_exit || simple_loop) 2775 && prev->loop_depth <= bb->loop_depth) 2776 prop = prev; 2777 2778 /* If there is only one adjacent predecessor. Don't propagate 2779 outside this loop. This loop_depth test isn't perfect, but 2780 I'm not sure the loop_father member is valid at this point. */ 2781 else if (prev && single_pred_p (bb) 2782 && prev->loop_depth == bb->loop_depth) 2783 prop = prev; 2784 2785 /* If this is the JOIN block of a simple IF-THEN then 2786 propogate the hint to the HEADER block. */ 2787 else if (prev && prev2 2788 && EDGE_COUNT (bb->preds) == 2 2789 && EDGE_COUNT (prev->preds) == 1 2790 && EDGE_PRED (prev, 0)->src == prev2 2791 && prev2->loop_depth == bb->loop_depth 2792 && GET_CODE (branch_target) != REG) 2793 prop = prev; 2794 2795 /* Don't propagate when: 2796 - this is a simple loop and the hint would be too far 2797 - this is not a simple loop and there are 16 insns in 2798 this block already 2799 - the predecessor block ends in a branch that will be 2800 hinted 2801 - the predecessor block ends in an insn that invalidates 2802 the hint */ 2803 if (prop 2804 && prop->index >= 0 2805 && (bbend = BB_END (prop)) 2806 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) < 2807 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0 2808 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend))) 2809 { 2810 if (dump_file) 2811 fprintf (dump_file, "propagate from %i to %i (loop depth %i) " 2812 "for %i (loop_exit %i simple_loop %i dist %i)\n", 2813 bb->index, prop->index, bb->loop_depth, 2814 INSN_UID (branch), loop_exit, simple_loop, 2815 branch_addr - INSN_ADDRESSES (INSN_UID (bbend))); 2816 2817 spu_bb_info[prop->index].prop_jump = branch; 2818 spu_bb_info[prop->index].bb_index = i; 2819 } 2820 else if (branch_addr - next_addr >= required_dist) 2821 { 2822 if (dump_file) 2823 fprintf (dump_file, "hint for %i in block %i before %i\n", 2824 INSN_UID (branch), bb->index, 2825 INSN_UID (NEXT_INSN (insn))); 2826 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target, 2827 branch_addr - next_addr, blocks); 2828 } 2829 branch = 0; 2830 } 2831 } 2832 free (spu_bb_info); 2833 2834 if (!sbitmap_empty_p (blocks)) 2835 find_many_sub_basic_blocks (blocks); 2836 2837 /* We have to schedule to make sure alignment is ok. */ 2838 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE; 2839 2840 /* The hints need to be scheduled, so call it again. */ 2841 schedule_insns (); 2842 2843 insert_hbrp (); 2844 2845 pad_bb (); 2846 2847 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 2848 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr) 2849 { 2850 /* Adjust the LABEL_REF in a hint when we have inserted a nop 2851 between its branch label and the branch . We don't move the 2852 label because GCC expects it at the beginning of the block. */ 2853 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); 2854 rtx label_ref = XVECEXP (unspec, 0, 0); 2855 rtx label = XEXP (label_ref, 0); 2856 rtx branch; 2857 int offset = 0; 2858 for (branch = NEXT_INSN (label); 2859 !JUMP_P (branch) && !CALL_P (branch); 2860 branch = NEXT_INSN (branch)) 2861 if (NONJUMP_INSN_P (branch)) 2862 offset += get_attr_length (branch); 2863 if (offset > 0) 2864 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset); 2865 } 2866 2867 if (spu_flag_var_tracking) 2868 { 2869 df_analyze (); 2870 timevar_push (TV_VAR_TRACKING); 2871 variable_tracking_main (); 2872 timevar_pop (TV_VAR_TRACKING); 2873 df_finish_pass (false); 2874 } 2875 2876 free_bb_for_insn (); 2877 2878 in_spu_reorg = 0; 2879 } 2880 2881 2882 /* Insn scheduling routines, primarily for dual issue. */ 2883 static int 2884 spu_sched_issue_rate (void) 2885 { 2886 return 2; 2887 } 2888 2889 static int 2890 uses_ls_unit(rtx insn) 2891 { 2892 rtx set = single_set (insn); 2893 if (set != 0 2894 && (GET_CODE (SET_DEST (set)) == MEM 2895 || GET_CODE (SET_SRC (set)) == MEM)) 2896 return 1; 2897 return 0; 2898 } 2899 2900 static int 2901 get_pipe (rtx insn) 2902 { 2903 enum attr_type t; 2904 /* Handle inline asm */ 2905 if (INSN_CODE (insn) == -1) 2906 return -1; 2907 t = get_attr_type (insn); 2908 switch (t) 2909 { 2910 case TYPE_CONVERT: 2911 return -2; 2912 case TYPE_MULTI0: 2913 return -1; 2914 2915 case TYPE_FX2: 2916 case TYPE_FX3: 2917 case TYPE_SPR: 2918 case TYPE_NOP: 2919 case TYPE_FXB: 2920 case TYPE_FPD: 2921 case TYPE_FP6: 2922 case TYPE_FP7: 2923 return 0; 2924 2925 case TYPE_LNOP: 2926 case TYPE_SHUF: 2927 case TYPE_LOAD: 2928 case TYPE_STORE: 2929 case TYPE_BR: 2930 case TYPE_MULTI1: 2931 case TYPE_HBR: 2932 case TYPE_IPREFETCH: 2933 return 1; 2934 default: 2935 abort (); 2936 } 2937 } 2938 2939 2940 /* haifa-sched.c has a static variable that keeps track of the current 2941 cycle. It is passed to spu_sched_reorder, and we record it here for 2942 use by spu_sched_variable_issue. It won't be accurate if the 2943 scheduler updates it's clock_var between the two calls. */ 2944 static int clock_var; 2945 2946 /* This is used to keep track of insn alignment. Set to 0 at the 2947 beginning of each block and increased by the "length" attr of each 2948 insn scheduled. */ 2949 static int spu_sched_length; 2950 2951 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the 2952 ready list appropriately in spu_sched_reorder(). */ 2953 static int pipe0_clock; 2954 static int pipe1_clock; 2955 2956 static int prev_clock_var; 2957 2958 static int prev_priority; 2959 2960 /* The SPU needs to load the next ilb sometime during the execution of 2961 the previous ilb. There is a potential conflict if every cycle has a 2962 load or store. To avoid the conflict we make sure the load/store 2963 unit is free for at least one cycle during the execution of insns in 2964 the previous ilb. */ 2965 static int spu_ls_first; 2966 static int prev_ls_clock; 2967 2968 static void 2969 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 2970 int max_ready ATTRIBUTE_UNUSED) 2971 { 2972 spu_sched_length = 0; 2973 } 2974 2975 static void 2976 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 2977 int max_ready ATTRIBUTE_UNUSED) 2978 { 2979 if (align_labels > 4 || align_loops > 4 || align_jumps > 4) 2980 { 2981 /* When any block might be at least 8-byte aligned, assume they 2982 will all be at least 8-byte aligned to make sure dual issue 2983 works out correctly. */ 2984 spu_sched_length = 0; 2985 } 2986 spu_ls_first = INT_MAX; 2987 clock_var = -1; 2988 prev_ls_clock = -1; 2989 pipe0_clock = -1; 2990 pipe1_clock = -1; 2991 prev_clock_var = -1; 2992 prev_priority = -1; 2993 } 2994 2995 static int 2996 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED, 2997 int verbose ATTRIBUTE_UNUSED, rtx insn, int more) 2998 { 2999 int len; 3000 int p; 3001 if (GET_CODE (PATTERN (insn)) == USE 3002 || GET_CODE (PATTERN (insn)) == CLOBBER 3003 || (len = get_attr_length (insn)) == 0) 3004 return more; 3005 3006 spu_sched_length += len; 3007 3008 /* Reset on inline asm */ 3009 if (INSN_CODE (insn) == -1) 3010 { 3011 spu_ls_first = INT_MAX; 3012 pipe0_clock = -1; 3013 pipe1_clock = -1; 3014 return 0; 3015 } 3016 p = get_pipe (insn); 3017 if (p == 0) 3018 pipe0_clock = clock_var; 3019 else 3020 pipe1_clock = clock_var; 3021 3022 if (in_spu_reorg) 3023 { 3024 if (clock_var - prev_ls_clock > 1 3025 || INSN_CODE (insn) == CODE_FOR_iprefetch) 3026 spu_ls_first = INT_MAX; 3027 if (uses_ls_unit (insn)) 3028 { 3029 if (spu_ls_first == INT_MAX) 3030 spu_ls_first = spu_sched_length; 3031 prev_ls_clock = clock_var; 3032 } 3033 3034 /* The scheduler hasn't inserted the nop, but we will later on. 3035 Include those nops in spu_sched_length. */ 3036 if (prev_clock_var == clock_var && (spu_sched_length & 7)) 3037 spu_sched_length += 4; 3038 prev_clock_var = clock_var; 3039 3040 /* more is -1 when called from spu_sched_reorder for new insns 3041 that don't have INSN_PRIORITY */ 3042 if (more >= 0) 3043 prev_priority = INSN_PRIORITY (insn); 3044 } 3045 3046 /* Always try issueing more insns. spu_sched_reorder will decide 3047 when the cycle should be advanced. */ 3048 return 1; 3049 } 3050 3051 /* This function is called for both TARGET_SCHED_REORDER and 3052 TARGET_SCHED_REORDER2. */ 3053 static int 3054 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, 3055 rtx *ready, int *nreadyp, int clock) 3056 { 3057 int i, nready = *nreadyp; 3058 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i; 3059 rtx insn; 3060 3061 clock_var = clock; 3062 3063 if (nready <= 0 || pipe1_clock >= clock) 3064 return 0; 3065 3066 /* Find any rtl insns that don't generate assembly insns and schedule 3067 them first. */ 3068 for (i = nready - 1; i >= 0; i--) 3069 { 3070 insn = ready[i]; 3071 if (INSN_CODE (insn) == -1 3072 || INSN_CODE (insn) == CODE_FOR_blockage 3073 || (INSN_P (insn) && get_attr_length (insn) == 0)) 3074 { 3075 ready[i] = ready[nready - 1]; 3076 ready[nready - 1] = insn; 3077 return 1; 3078 } 3079 } 3080 3081 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1; 3082 for (i = 0; i < nready; i++) 3083 if (INSN_CODE (ready[i]) != -1) 3084 { 3085 insn = ready[i]; 3086 switch (get_attr_type (insn)) 3087 { 3088 default: 3089 case TYPE_MULTI0: 3090 case TYPE_CONVERT: 3091 case TYPE_FX2: 3092 case TYPE_FX3: 3093 case TYPE_SPR: 3094 case TYPE_NOP: 3095 case TYPE_FXB: 3096 case TYPE_FPD: 3097 case TYPE_FP6: 3098 case TYPE_FP7: 3099 pipe_0 = i; 3100 break; 3101 case TYPE_LOAD: 3102 case TYPE_STORE: 3103 pipe_ls = i; 3104 case TYPE_LNOP: 3105 case TYPE_SHUF: 3106 case TYPE_BR: 3107 case TYPE_MULTI1: 3108 case TYPE_HBR: 3109 pipe_1 = i; 3110 break; 3111 case TYPE_IPREFETCH: 3112 pipe_hbrp = i; 3113 break; 3114 } 3115 } 3116 3117 /* In the first scheduling phase, schedule loads and stores together 3118 to increase the chance they will get merged during postreload CSE. */ 3119 if (!reload_completed && pipe_ls >= 0) 3120 { 3121 insn = ready[pipe_ls]; 3122 ready[pipe_ls] = ready[nready - 1]; 3123 ready[nready - 1] = insn; 3124 return 1; 3125 } 3126 3127 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */ 3128 if (pipe_hbrp >= 0) 3129 pipe_1 = pipe_hbrp; 3130 3131 /* When we have loads/stores in every cycle of the last 15 insns and 3132 we are about to schedule another load/store, emit an hbrp insn 3133 instead. */ 3134 if (in_spu_reorg 3135 && spu_sched_length - spu_ls_first >= 4 * 15 3136 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls) 3137 { 3138 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3))); 3139 recog_memoized (insn); 3140 if (pipe0_clock < clock) 3141 PUT_MODE (insn, TImode); 3142 spu_sched_variable_issue (file, verbose, insn, -1); 3143 return 0; 3144 } 3145 3146 /* In general, we want to emit nops to increase dual issue, but dual 3147 issue isn't faster when one of the insns could be scheduled later 3148 without effecting the critical path. We look at INSN_PRIORITY to 3149 make a good guess, but it isn't perfect so -mdual-nops=n can be 3150 used to effect it. */ 3151 if (in_spu_reorg && spu_dual_nops < 10) 3152 { 3153 /* When we are at an even address and we are not issueing nops to 3154 improve scheduling then we need to advance the cycle. */ 3155 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock 3156 && (spu_dual_nops == 0 3157 || (pipe_1 != -1 3158 && prev_priority > 3159 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops))) 3160 return 0; 3161 3162 /* When at an odd address, schedule the highest priority insn 3163 without considering pipeline. */ 3164 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock 3165 && (spu_dual_nops == 0 3166 || (prev_priority > 3167 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops))) 3168 return 1; 3169 } 3170 3171 3172 /* We haven't issued a pipe0 insn yet this cycle, if there is a 3173 pipe0 insn in the ready list, schedule it. */ 3174 if (pipe0_clock < clock && pipe_0 >= 0) 3175 schedule_i = pipe_0; 3176 3177 /* Either we've scheduled a pipe0 insn already or there is no pipe0 3178 insn to schedule. Put a pipe1 insn at the front of the ready list. */ 3179 else 3180 schedule_i = pipe_1; 3181 3182 if (schedule_i > -1) 3183 { 3184 insn = ready[schedule_i]; 3185 ready[schedule_i] = ready[nready - 1]; 3186 ready[nready - 1] = insn; 3187 return 1; 3188 } 3189 return 0; 3190 } 3191 3192 /* INSN is dependent on DEP_INSN. */ 3193 static int 3194 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 3195 { 3196 rtx set; 3197 3198 /* The blockage pattern is used to prevent instructions from being 3199 moved across it and has no cost. */ 3200 if (INSN_CODE (insn) == CODE_FOR_blockage 3201 || INSN_CODE (dep_insn) == CODE_FOR_blockage) 3202 return 0; 3203 3204 if ((INSN_P (insn) && get_attr_length (insn) == 0) 3205 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0)) 3206 return 0; 3207 3208 /* Make sure hbrps are spread out. */ 3209 if (INSN_CODE (insn) == CODE_FOR_iprefetch 3210 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch) 3211 return 8; 3212 3213 /* Make sure hints and hbrps are 2 cycles apart. */ 3214 if ((INSN_CODE (insn) == CODE_FOR_iprefetch 3215 || INSN_CODE (insn) == CODE_FOR_hbr) 3216 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch 3217 || INSN_CODE (dep_insn) == CODE_FOR_hbr)) 3218 return 2; 3219 3220 /* An hbrp has no real dependency on other insns. */ 3221 if (INSN_CODE (insn) == CODE_FOR_iprefetch 3222 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch) 3223 return 0; 3224 3225 /* Assuming that it is unlikely an argument register will be used in 3226 the first cycle of the called function, we reduce the cost for 3227 slightly better scheduling of dep_insn. When not hinted, the 3228 mispredicted branch would hide the cost as well. */ 3229 if (CALL_P (insn)) 3230 { 3231 rtx target = get_branch_target (insn); 3232 if (GET_CODE (target) != REG || !set_of (target, insn)) 3233 return cost - 2; 3234 return cost; 3235 } 3236 3237 /* And when returning from a function, let's assume the return values 3238 are completed sooner too. */ 3239 if (CALL_P (dep_insn)) 3240 return cost - 2; 3241 3242 /* Make sure an instruction that loads from the back chain is schedule 3243 away from the return instruction so a hint is more likely to get 3244 issued. */ 3245 if (INSN_CODE (insn) == CODE_FOR__return 3246 && (set = single_set (dep_insn)) 3247 && GET_CODE (SET_DEST (set)) == REG 3248 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM) 3249 return 20; 3250 3251 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the 3252 scheduler makes every insn in a block anti-dependent on the final 3253 jump_insn. We adjust here so higher cost insns will get scheduled 3254 earlier. */ 3255 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI) 3256 return insn_cost (dep_insn) - 3; 3257 3258 return cost; 3259 } 3260 3261 /* Create a CONST_DOUBLE from a string. */ 3262 struct rtx_def * 3263 spu_float_const (const char *string, enum machine_mode mode) 3264 { 3265 REAL_VALUE_TYPE value; 3266 value = REAL_VALUE_ATOF (string, mode); 3267 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode); 3268 } 3269 3270 int 3271 spu_constant_address_p (rtx x) 3272 { 3273 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF 3274 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST 3275 || GET_CODE (x) == HIGH); 3276 } 3277 3278 static enum spu_immediate 3279 which_immediate_load (HOST_WIDE_INT val) 3280 { 3281 gcc_assert (val == trunc_int_for_mode (val, SImode)); 3282 3283 if (val >= -0x8000 && val <= 0x7fff) 3284 return SPU_IL; 3285 if (val >= 0 && val <= 0x3ffff) 3286 return SPU_ILA; 3287 if ((val & 0xffff) == ((val >> 16) & 0xffff)) 3288 return SPU_ILH; 3289 if ((val & 0xffff) == 0) 3290 return SPU_ILHU; 3291 3292 return SPU_NONE; 3293 } 3294 3295 /* Return true when OP can be loaded by one of the il instructions, or 3296 when flow2 is not completed and OP can be loaded using ilhu and iohl. */ 3297 int 3298 immediate_load_p (rtx op, enum machine_mode mode) 3299 { 3300 if (CONSTANT_P (op)) 3301 { 3302 enum immediate_class c = classify_immediate (op, mode); 3303 return c == IC_IL1 || c == IC_IL1s 3304 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s)); 3305 } 3306 return 0; 3307 } 3308 3309 /* Return true if the first SIZE bytes of arr is a constant that can be 3310 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART 3311 represent the size and offset of the instruction to use. */ 3312 static int 3313 cpat_info(unsigned char *arr, int size, int *prun, int *pstart) 3314 { 3315 int cpat, run, i, start; 3316 cpat = 1; 3317 run = 0; 3318 start = -1; 3319 for (i = 0; i < size && cpat; i++) 3320 if (arr[i] != i+16) 3321 { 3322 if (!run) 3323 { 3324 start = i; 3325 if (arr[i] == 3) 3326 run = 1; 3327 else if (arr[i] == 2 && arr[i+1] == 3) 3328 run = 2; 3329 else if (arr[i] == 0) 3330 { 3331 while (arr[i+run] == run && i+run < 16) 3332 run++; 3333 if (run != 4 && run != 8) 3334 cpat = 0; 3335 } 3336 else 3337 cpat = 0; 3338 if ((i & (run-1)) != 0) 3339 cpat = 0; 3340 i += run; 3341 } 3342 else 3343 cpat = 0; 3344 } 3345 if (cpat && (run || size < 16)) 3346 { 3347 if (run == 0) 3348 run = 1; 3349 if (prun) 3350 *prun = run; 3351 if (pstart) 3352 *pstart = start == -1 ? 16-run : start; 3353 return 1; 3354 } 3355 return 0; 3356 } 3357 3358 /* OP is a CONSTANT_P. Determine what instructions can be used to load 3359 it into a register. MODE is only valid when OP is a CONST_INT. */ 3360 static enum immediate_class 3361 classify_immediate (rtx op, enum machine_mode mode) 3362 { 3363 HOST_WIDE_INT val; 3364 unsigned char arr[16]; 3365 int i, j, repeated, fsmbi, repeat; 3366 3367 gcc_assert (CONSTANT_P (op)); 3368 3369 if (GET_MODE (op) != VOIDmode) 3370 mode = GET_MODE (op); 3371 3372 /* A V4SI const_vector with all identical symbols is ok. */ 3373 if (!flag_pic 3374 && mode == V4SImode 3375 && GET_CODE (op) == CONST_VECTOR 3376 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT 3377 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE 3378 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1) 3379 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2) 3380 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3)) 3381 op = CONST_VECTOR_ELT (op, 0); 3382 3383 switch (GET_CODE (op)) 3384 { 3385 case SYMBOL_REF: 3386 case LABEL_REF: 3387 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s; 3388 3389 case CONST: 3390 /* We can never know if the resulting address fits in 18 bits and can be 3391 loaded with ila. For now, assume the address will not overflow if 3392 the displacement is "small" (fits 'K' constraint). */ 3393 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS) 3394 { 3395 rtx sym = XEXP (XEXP (op, 0), 0); 3396 rtx cst = XEXP (XEXP (op, 0), 1); 3397 3398 if (GET_CODE (sym) == SYMBOL_REF 3399 && GET_CODE (cst) == CONST_INT 3400 && satisfies_constraint_K (cst)) 3401 return IC_IL1s; 3402 } 3403 return IC_IL2s; 3404 3405 case HIGH: 3406 return IC_IL1s; 3407 3408 case CONST_VECTOR: 3409 for (i = 0; i < GET_MODE_NUNITS (mode); i++) 3410 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT 3411 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE) 3412 return IC_POOL; 3413 /* Fall through. */ 3414 3415 case CONST_INT: 3416 case CONST_DOUBLE: 3417 constant_to_array (mode, op, arr); 3418 3419 /* Check that each 4-byte slot is identical. */ 3420 repeated = 1; 3421 for (i = 4; i < 16; i += 4) 3422 for (j = 0; j < 4; j++) 3423 if (arr[j] != arr[i + j]) 3424 repeated = 0; 3425 3426 if (repeated) 3427 { 3428 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3429 val = trunc_int_for_mode (val, SImode); 3430 3431 if (which_immediate_load (val) != SPU_NONE) 3432 return IC_IL1; 3433 } 3434 3435 /* Any mode of 2 bytes or smaller can be loaded with an il 3436 instruction. */ 3437 gcc_assert (GET_MODE_SIZE (mode) > 2); 3438 3439 fsmbi = 1; 3440 repeat = 0; 3441 for (i = 0; i < 16 && fsmbi; i++) 3442 if (arr[i] != 0 && repeat == 0) 3443 repeat = arr[i]; 3444 else if (arr[i] != 0 && arr[i] != repeat) 3445 fsmbi = 0; 3446 if (fsmbi) 3447 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2; 3448 3449 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0)) 3450 return IC_CPAT; 3451 3452 if (repeated) 3453 return IC_IL2; 3454 3455 return IC_POOL; 3456 default: 3457 break; 3458 } 3459 gcc_unreachable (); 3460 } 3461 3462 static enum spu_immediate 3463 which_logical_immediate (HOST_WIDE_INT val) 3464 { 3465 gcc_assert (val == trunc_int_for_mode (val, SImode)); 3466 3467 if (val >= -0x200 && val <= 0x1ff) 3468 return SPU_ORI; 3469 if (val >= 0 && val <= 0xffff) 3470 return SPU_IOHL; 3471 if ((val & 0xffff) == ((val >> 16) & 0xffff)) 3472 { 3473 val = trunc_int_for_mode (val, HImode); 3474 if (val >= -0x200 && val <= 0x1ff) 3475 return SPU_ORHI; 3476 if ((val & 0xff) == ((val >> 8) & 0xff)) 3477 { 3478 val = trunc_int_for_mode (val, QImode); 3479 if (val >= -0x200 && val <= 0x1ff) 3480 return SPU_ORBI; 3481 } 3482 } 3483 return SPU_NONE; 3484 } 3485 3486 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or 3487 CONST_DOUBLEs. */ 3488 static int 3489 const_vector_immediate_p (rtx x) 3490 { 3491 int i; 3492 gcc_assert (GET_CODE (x) == CONST_VECTOR); 3493 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++) 3494 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT 3495 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE) 3496 return 0; 3497 return 1; 3498 } 3499 3500 int 3501 logical_immediate_p (rtx op, enum machine_mode mode) 3502 { 3503 HOST_WIDE_INT val; 3504 unsigned char arr[16]; 3505 int i, j; 3506 3507 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3508 || GET_CODE (op) == CONST_VECTOR); 3509 3510 if (GET_CODE (op) == CONST_VECTOR 3511 && !const_vector_immediate_p (op)) 3512 return 0; 3513 3514 if (GET_MODE (op) != VOIDmode) 3515 mode = GET_MODE (op); 3516 3517 constant_to_array (mode, op, arr); 3518 3519 /* Check that bytes are repeated. */ 3520 for (i = 4; i < 16; i += 4) 3521 for (j = 0; j < 4; j++) 3522 if (arr[j] != arr[i + j]) 3523 return 0; 3524 3525 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3526 val = trunc_int_for_mode (val, SImode); 3527 3528 i = which_logical_immediate (val); 3529 return i != SPU_NONE && i != SPU_IOHL; 3530 } 3531 3532 int 3533 iohl_immediate_p (rtx op, enum machine_mode mode) 3534 { 3535 HOST_WIDE_INT val; 3536 unsigned char arr[16]; 3537 int i, j; 3538 3539 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3540 || GET_CODE (op) == CONST_VECTOR); 3541 3542 if (GET_CODE (op) == CONST_VECTOR 3543 && !const_vector_immediate_p (op)) 3544 return 0; 3545 3546 if (GET_MODE (op) != VOIDmode) 3547 mode = GET_MODE (op); 3548 3549 constant_to_array (mode, op, arr); 3550 3551 /* Check that bytes are repeated. */ 3552 for (i = 4; i < 16; i += 4) 3553 for (j = 0; j < 4; j++) 3554 if (arr[j] != arr[i + j]) 3555 return 0; 3556 3557 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 3558 val = trunc_int_for_mode (val, SImode); 3559 3560 return val >= 0 && val <= 0xffff; 3561 } 3562 3563 int 3564 arith_immediate_p (rtx op, enum machine_mode mode, 3565 HOST_WIDE_INT low, HOST_WIDE_INT high) 3566 { 3567 HOST_WIDE_INT val; 3568 unsigned char arr[16]; 3569 int bytes, i, j; 3570 3571 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3572 || GET_CODE (op) == CONST_VECTOR); 3573 3574 if (GET_CODE (op) == CONST_VECTOR 3575 && !const_vector_immediate_p (op)) 3576 return 0; 3577 3578 if (GET_MODE (op) != VOIDmode) 3579 mode = GET_MODE (op); 3580 3581 constant_to_array (mode, op, arr); 3582 3583 if (VECTOR_MODE_P (mode)) 3584 mode = GET_MODE_INNER (mode); 3585 3586 bytes = GET_MODE_SIZE (mode); 3587 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 3588 3589 /* Check that bytes are repeated. */ 3590 for (i = bytes; i < 16; i += bytes) 3591 for (j = 0; j < bytes; j++) 3592 if (arr[j] != arr[i + j]) 3593 return 0; 3594 3595 val = arr[0]; 3596 for (j = 1; j < bytes; j++) 3597 val = (val << 8) | arr[j]; 3598 3599 val = trunc_int_for_mode (val, mode); 3600 3601 return val >= low && val <= high; 3602 } 3603 3604 /* TRUE when op is an immediate and an exact power of 2, and given that 3605 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector, 3606 all entries must be the same. */ 3607 bool 3608 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high) 3609 { 3610 enum machine_mode int_mode; 3611 HOST_WIDE_INT val; 3612 unsigned char arr[16]; 3613 int bytes, i, j; 3614 3615 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE 3616 || GET_CODE (op) == CONST_VECTOR); 3617 3618 if (GET_CODE (op) == CONST_VECTOR 3619 && !const_vector_immediate_p (op)) 3620 return 0; 3621 3622 if (GET_MODE (op) != VOIDmode) 3623 mode = GET_MODE (op); 3624 3625 constant_to_array (mode, op, arr); 3626 3627 if (VECTOR_MODE_P (mode)) 3628 mode = GET_MODE_INNER (mode); 3629 3630 bytes = GET_MODE_SIZE (mode); 3631 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 3632 3633 /* Check that bytes are repeated. */ 3634 for (i = bytes; i < 16; i += bytes) 3635 for (j = 0; j < bytes; j++) 3636 if (arr[j] != arr[i + j]) 3637 return 0; 3638 3639 val = arr[0]; 3640 for (j = 1; j < bytes; j++) 3641 val = (val << 8) | arr[j]; 3642 3643 val = trunc_int_for_mode (val, int_mode); 3644 3645 /* Currently, we only handle SFmode */ 3646 gcc_assert (mode == SFmode); 3647 if (mode == SFmode) 3648 { 3649 int exp = (val >> 23) - 127; 3650 return val > 0 && (val & 0x007fffff) == 0 3651 && exp >= low && exp <= high; 3652 } 3653 return FALSE; 3654 } 3655 3656 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */ 3657 3658 static int 3659 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED) 3660 { 3661 rtx x = *px; 3662 tree decl; 3663 3664 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 3665 { 3666 rtx plus = XEXP (x, 0); 3667 rtx op0 = XEXP (plus, 0); 3668 rtx op1 = XEXP (plus, 1); 3669 if (GET_CODE (op1) == CONST_INT) 3670 x = op0; 3671 } 3672 3673 return (GET_CODE (x) == SYMBOL_REF 3674 && (decl = SYMBOL_REF_DECL (x)) != 0 3675 && TREE_CODE (decl) == VAR_DECL 3676 && TYPE_ADDR_SPACE (TREE_TYPE (decl))); 3677 } 3678 3679 /* We accept: 3680 - any 32-bit constant (SImode, SFmode) 3681 - any constant that can be generated with fsmbi (any mode) 3682 - a 64-bit constant where the high and low bits are identical 3683 (DImode, DFmode) 3684 - a 128-bit constant where the four 32-bit words match. */ 3685 int 3686 spu_legitimate_constant_p (rtx x) 3687 { 3688 if (GET_CODE (x) == HIGH) 3689 x = XEXP (x, 0); 3690 3691 /* Reject any __ea qualified reference. These can't appear in 3692 instructions but must be forced to the constant pool. */ 3693 if (for_each_rtx (&x, ea_symbol_ref, 0)) 3694 return 0; 3695 3696 /* V4SI with all identical symbols is valid. */ 3697 if (!flag_pic 3698 && GET_MODE (x) == V4SImode 3699 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF 3700 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF 3701 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST)) 3702 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1) 3703 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2) 3704 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3); 3705 3706 if (GET_CODE (x) == CONST_VECTOR 3707 && !const_vector_immediate_p (x)) 3708 return 0; 3709 return 1; 3710 } 3711 3712 /* Valid address are: 3713 - symbol_ref, label_ref, const 3714 - reg 3715 - reg + const_int, where const_int is 16 byte aligned 3716 - reg + reg, alignment doesn't matter 3717 The alignment matters in the reg+const case because lqd and stqd 3718 ignore the 4 least significant bits of the const. We only care about 3719 16 byte modes because the expand phase will change all smaller MEM 3720 references to TImode. */ 3721 static bool 3722 spu_legitimate_address_p (enum machine_mode mode, 3723 rtx x, bool reg_ok_strict) 3724 { 3725 int aligned = GET_MODE_SIZE (mode) >= 16; 3726 if (aligned 3727 && GET_CODE (x) == AND 3728 && GET_CODE (XEXP (x, 1)) == CONST_INT 3729 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16) 3730 x = XEXP (x, 0); 3731 switch (GET_CODE (x)) 3732 { 3733 case LABEL_REF: 3734 return !TARGET_LARGE_MEM; 3735 3736 case SYMBOL_REF: 3737 case CONST: 3738 /* Keep __ea references until reload so that spu_expand_mov can see them 3739 in MEMs. */ 3740 if (ea_symbol_ref (&x, 0)) 3741 return !reload_in_progress && !reload_completed; 3742 return !TARGET_LARGE_MEM; 3743 3744 case CONST_INT: 3745 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; 3746 3747 case SUBREG: 3748 x = XEXP (x, 0); 3749 if (REG_P (x)) 3750 return 0; 3751 3752 case REG: 3753 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); 3754 3755 case PLUS: 3756 case LO_SUM: 3757 { 3758 rtx op0 = XEXP (x, 0); 3759 rtx op1 = XEXP (x, 1); 3760 if (GET_CODE (op0) == SUBREG) 3761 op0 = XEXP (op0, 0); 3762 if (GET_CODE (op1) == SUBREG) 3763 op1 = XEXP (op1, 0); 3764 if (GET_CODE (op0) == REG 3765 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3766 && GET_CODE (op1) == CONST_INT 3767 && INTVAL (op1) >= -0x2000 3768 && INTVAL (op1) <= 0x1fff 3769 && (!aligned || (INTVAL (op1) & 15) == 0)) 3770 return TRUE; 3771 if (GET_CODE (op0) == REG 3772 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3773 && GET_CODE (op1) == REG 3774 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) 3775 return TRUE; 3776 } 3777 break; 3778 3779 default: 3780 break; 3781 } 3782 return FALSE; 3783 } 3784 3785 /* Like spu_legitimate_address_p, except with named addresses. */ 3786 static bool 3787 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x, 3788 bool reg_ok_strict, addr_space_t as) 3789 { 3790 if (as == ADDR_SPACE_EA) 3791 return (REG_P (x) && (GET_MODE (x) == EAmode)); 3792 3793 else if (as != ADDR_SPACE_GENERIC) 3794 gcc_unreachable (); 3795 3796 return spu_legitimate_address_p (mode, x, reg_ok_strict); 3797 } 3798 3799 /* When the address is reg + const_int, force the const_int into a 3800 register. */ 3801 rtx 3802 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 3803 enum machine_mode mode ATTRIBUTE_UNUSED) 3804 { 3805 rtx op0, op1; 3806 /* Make sure both operands are registers. */ 3807 if (GET_CODE (x) == PLUS) 3808 { 3809 op0 = XEXP (x, 0); 3810 op1 = XEXP (x, 1); 3811 if (ALIGNED_SYMBOL_REF_P (op0)) 3812 { 3813 op0 = force_reg (Pmode, op0); 3814 mark_reg_pointer (op0, 128); 3815 } 3816 else if (GET_CODE (op0) != REG) 3817 op0 = force_reg (Pmode, op0); 3818 if (ALIGNED_SYMBOL_REF_P (op1)) 3819 { 3820 op1 = force_reg (Pmode, op1); 3821 mark_reg_pointer (op1, 128); 3822 } 3823 else if (GET_CODE (op1) != REG) 3824 op1 = force_reg (Pmode, op1); 3825 x = gen_rtx_PLUS (Pmode, op0, op1); 3826 } 3827 return x; 3828 } 3829 3830 /* Like spu_legitimate_address, except with named address support. */ 3831 static rtx 3832 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode, 3833 addr_space_t as) 3834 { 3835 if (as != ADDR_SPACE_GENERIC) 3836 return x; 3837 3838 return spu_legitimize_address (x, oldx, mode); 3839 } 3840 3841 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in 3842 struct attribute_spec.handler. */ 3843 static tree 3844 spu_handle_fndecl_attribute (tree * node, 3845 tree name, 3846 tree args ATTRIBUTE_UNUSED, 3847 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) 3848 { 3849 if (TREE_CODE (*node) != FUNCTION_DECL) 3850 { 3851 warning (0, "%qE attribute only applies to functions", 3852 name); 3853 *no_add_attrs = true; 3854 } 3855 3856 return NULL_TREE; 3857 } 3858 3859 /* Handle the "vector" attribute. */ 3860 static tree 3861 spu_handle_vector_attribute (tree * node, tree name, 3862 tree args ATTRIBUTE_UNUSED, 3863 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs) 3864 { 3865 tree type = *node, result = NULL_TREE; 3866 enum machine_mode mode; 3867 int unsigned_p; 3868 3869 while (POINTER_TYPE_P (type) 3870 || TREE_CODE (type) == FUNCTION_TYPE 3871 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE) 3872 type = TREE_TYPE (type); 3873 3874 mode = TYPE_MODE (type); 3875 3876 unsigned_p = TYPE_UNSIGNED (type); 3877 switch (mode) 3878 { 3879 case DImode: 3880 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); 3881 break; 3882 case SImode: 3883 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); 3884 break; 3885 case HImode: 3886 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); 3887 break; 3888 case QImode: 3889 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); 3890 break; 3891 case SFmode: 3892 result = V4SF_type_node; 3893 break; 3894 case DFmode: 3895 result = V2DF_type_node; 3896 break; 3897 default: 3898 break; 3899 } 3900 3901 /* Propagate qualifiers attached to the element type 3902 onto the vector type. */ 3903 if (result && result != type && TYPE_QUALS (type)) 3904 result = build_qualified_type (result, TYPE_QUALS (type)); 3905 3906 *no_add_attrs = true; /* No need to hang on to the attribute. */ 3907 3908 if (!result) 3909 warning (0, "%qE attribute ignored", name); 3910 else 3911 *node = lang_hooks.types.reconstruct_complex_type (*node, result); 3912 3913 return NULL_TREE; 3914 } 3915 3916 /* Return nonzero if FUNC is a naked function. */ 3917 static int 3918 spu_naked_function_p (tree func) 3919 { 3920 tree a; 3921 3922 if (TREE_CODE (func) != FUNCTION_DECL) 3923 abort (); 3924 3925 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func)); 3926 return a != NULL_TREE; 3927 } 3928 3929 int 3930 spu_initial_elimination_offset (int from, int to) 3931 { 3932 int saved_regs_size = spu_saved_regs_size (); 3933 int sp_offset = 0; 3934 if (!current_function_is_leaf || crtl->outgoing_args_size 3935 || get_frame_size () || saved_regs_size) 3936 sp_offset = STACK_POINTER_OFFSET; 3937 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 3938 return get_frame_size () + crtl->outgoing_args_size + sp_offset; 3939 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3940 return get_frame_size (); 3941 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) 3942 return sp_offset + crtl->outgoing_args_size 3943 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET; 3944 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 3945 return get_frame_size () + saved_regs_size + sp_offset; 3946 else 3947 gcc_unreachable (); 3948 } 3949 3950 rtx 3951 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED) 3952 { 3953 enum machine_mode mode = TYPE_MODE (type); 3954 int byte_size = ((mode == BLKmode) 3955 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 3956 3957 /* Make sure small structs are left justified in a register. */ 3958 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 3959 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0) 3960 { 3961 enum machine_mode smode; 3962 rtvec v; 3963 int i; 3964 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 3965 int n = byte_size / UNITS_PER_WORD; 3966 v = rtvec_alloc (nregs); 3967 for (i = 0; i < n; i++) 3968 { 3969 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode, 3970 gen_rtx_REG (TImode, 3971 FIRST_RETURN_REGNUM 3972 + i), 3973 GEN_INT (UNITS_PER_WORD * i)); 3974 byte_size -= UNITS_PER_WORD; 3975 } 3976 3977 if (n < nregs) 3978 { 3979 if (byte_size < 4) 3980 byte_size = 4; 3981 smode = 3982 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); 3983 RTVEC_ELT (v, n) = 3984 gen_rtx_EXPR_LIST (VOIDmode, 3985 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n), 3986 GEN_INT (UNITS_PER_WORD * n)); 3987 } 3988 return gen_rtx_PARALLEL (mode, v); 3989 } 3990 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM); 3991 } 3992 3993 rtx 3994 spu_function_arg (CUMULATIVE_ARGS cum, 3995 enum machine_mode mode, 3996 tree type, int named ATTRIBUTE_UNUSED) 3997 { 3998 int byte_size; 3999 4000 if (cum >= MAX_REGISTER_ARGS) 4001 return 0; 4002 4003 byte_size = ((mode == BLKmode) 4004 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); 4005 4006 /* The ABI does not allow parameters to be passed partially in 4007 reg and partially in stack. */ 4008 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS) 4009 return 0; 4010 4011 /* Make sure small structs are left justified in a register. */ 4012 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type))) 4013 && byte_size < UNITS_PER_WORD && byte_size > 0) 4014 { 4015 enum machine_mode smode; 4016 rtx gr_reg; 4017 if (byte_size < 4) 4018 byte_size = 4; 4019 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT); 4020 gr_reg = gen_rtx_EXPR_LIST (VOIDmode, 4021 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum), 4022 const0_rtx); 4023 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg)); 4024 } 4025 else 4026 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum); 4027 } 4028 4029 /* Variable sized types are passed by reference. */ 4030 static bool 4031 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED, 4032 enum machine_mode mode ATTRIBUTE_UNUSED, 4033 const_tree type, bool named ATTRIBUTE_UNUSED) 4034 { 4035 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; 4036 } 4037 4038 4039 /* Var args. */ 4040 4041 /* Create and return the va_list datatype. 4042 4043 On SPU, va_list is an array type equivalent to 4044 4045 typedef struct __va_list_tag 4046 { 4047 void *__args __attribute__((__aligned(16))); 4048 void *__skip __attribute__((__aligned(16))); 4049 4050 } va_list[1]; 4051 4052 where __args points to the arg that will be returned by the next 4053 va_arg(), and __skip points to the previous stack frame such that 4054 when __args == __skip we should advance __args by 32 bytes. */ 4055 static tree 4056 spu_build_builtin_va_list (void) 4057 { 4058 tree f_args, f_skip, record, type_decl; 4059 bool owp; 4060 4061 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 4062 4063 type_decl = 4064 build_decl (BUILTINS_LOCATION, 4065 TYPE_DECL, get_identifier ("__va_list_tag"), record); 4066 4067 f_args = build_decl (BUILTINS_LOCATION, 4068 FIELD_DECL, get_identifier ("__args"), ptr_type_node); 4069 f_skip = build_decl (BUILTINS_LOCATION, 4070 FIELD_DECL, get_identifier ("__skip"), ptr_type_node); 4071 4072 DECL_FIELD_CONTEXT (f_args) = record; 4073 DECL_ALIGN (f_args) = 128; 4074 DECL_USER_ALIGN (f_args) = 1; 4075 4076 DECL_FIELD_CONTEXT (f_skip) = record; 4077 DECL_ALIGN (f_skip) = 128; 4078 DECL_USER_ALIGN (f_skip) = 1; 4079 4080 TREE_CHAIN (record) = type_decl; 4081 TYPE_NAME (record) = type_decl; 4082 TYPE_FIELDS (record) = f_args; 4083 TREE_CHAIN (f_args) = f_skip; 4084 4085 /* We know this is being padded and we want it too. It is an internal 4086 type so hide the warnings from the user. */ 4087 owp = warn_padded; 4088 warn_padded = false; 4089 4090 layout_type (record); 4091 4092 warn_padded = owp; 4093 4094 /* The correct type is an array type of one element. */ 4095 return build_array_type (record, build_index_type (size_zero_node)); 4096 } 4097 4098 /* Implement va_start by filling the va_list structure VALIST. 4099 NEXTARG points to the first anonymous stack argument. 4100 4101 The following global variables are used to initialize 4102 the va_list structure: 4103 4104 crtl->args.info; 4105 the CUMULATIVE_ARGS for this function 4106 4107 crtl->args.arg_offset_rtx: 4108 holds the offset of the first anonymous stack argument 4109 (relative to the virtual arg pointer). */ 4110 4111 static void 4112 spu_va_start (tree valist, rtx nextarg) 4113 { 4114 tree f_args, f_skip; 4115 tree args, skip, t; 4116 4117 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4118 f_skip = TREE_CHAIN (f_args); 4119 4120 valist = build_va_arg_indirect_ref (valist); 4121 args = 4122 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); 4123 skip = 4124 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); 4125 4126 /* Find the __args area. */ 4127 t = make_tree (TREE_TYPE (args), nextarg); 4128 if (crtl->args.pretend_args_size > 0) 4129 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t, 4130 size_int (-STACK_POINTER_OFFSET)); 4131 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t); 4132 TREE_SIDE_EFFECTS (t) = 1; 4133 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4134 4135 /* Find the __skip area. */ 4136 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx); 4137 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t, 4138 size_int (crtl->args.pretend_args_size 4139 - STACK_POINTER_OFFSET)); 4140 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t); 4141 TREE_SIDE_EFFECTS (t) = 1; 4142 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 4143 } 4144 4145 /* Gimplify va_arg by updating the va_list structure 4146 VALIST as required to retrieve an argument of type 4147 TYPE, and returning that argument. 4148 4149 ret = va_arg(VALIST, TYPE); 4150 4151 generates code equivalent to: 4152 4153 paddedsize = (sizeof(TYPE) + 15) & -16; 4154 if (VALIST.__args + paddedsize > VALIST.__skip 4155 && VALIST.__args <= VALIST.__skip) 4156 addr = VALIST.__skip + 32; 4157 else 4158 addr = VALIST.__args; 4159 VALIST.__args = addr + paddedsize; 4160 ret = *(TYPE *)addr; 4161 */ 4162 static tree 4163 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p, 4164 gimple_seq * post_p ATTRIBUTE_UNUSED) 4165 { 4166 tree f_args, f_skip; 4167 tree args, skip; 4168 HOST_WIDE_INT size, rsize; 4169 tree paddedsize, addr, tmp; 4170 bool pass_by_reference_p; 4171 4172 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); 4173 f_skip = TREE_CHAIN (f_args); 4174 4175 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 4176 args = 4177 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE); 4178 skip = 4179 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE); 4180 4181 addr = create_tmp_var (ptr_type_node, "va_arg"); 4182 4183 /* if an object is dynamically sized, a pointer to it is passed 4184 instead of the object itself. */ 4185 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type, 4186 false); 4187 if (pass_by_reference_p) 4188 type = build_pointer_type (type); 4189 size = int_size_in_bytes (type); 4190 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD; 4191 4192 /* build conditional expression to calculate addr. The expression 4193 will be gimplified later. */ 4194 paddedsize = size_int (rsize); 4195 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize); 4196 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node, 4197 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)), 4198 build2 (LE_EXPR, boolean_type_node, unshare_expr (args), 4199 unshare_expr (skip))); 4200 4201 tmp = build3 (COND_EXPR, ptr_type_node, tmp, 4202 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip), 4203 size_int (32)), unshare_expr (args)); 4204 4205 gimplify_assign (addr, tmp, pre_p); 4206 4207 /* update VALIST.__args */ 4208 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize); 4209 gimplify_assign (unshare_expr (args), tmp, pre_p); 4210 4211 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true), 4212 addr); 4213 4214 if (pass_by_reference_p) 4215 addr = build_va_arg_indirect_ref (addr); 4216 4217 return build_va_arg_indirect_ref (addr); 4218 } 4219 4220 /* Save parameter registers starting with the register that corresponds 4221 to the first unnamed parameters. If the first unnamed parameter is 4222 in the stack then save no registers. Set pretend_args_size to the 4223 amount of space needed to save the registers. */ 4224 void 4225 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode, 4226 tree type, int *pretend_size, int no_rtl) 4227 { 4228 if (!no_rtl) 4229 { 4230 rtx tmp; 4231 int regno; 4232 int offset; 4233 int ncum = *cum; 4234 4235 /* cum currently points to the last named argument, we want to 4236 start at the next argument. */ 4237 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1); 4238 4239 offset = -STACK_POINTER_OFFSET; 4240 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++) 4241 { 4242 tmp = gen_frame_mem (V4SImode, 4243 plus_constant (virtual_incoming_args_rtx, 4244 offset)); 4245 emit_move_insn (tmp, 4246 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno)); 4247 offset += 16; 4248 } 4249 *pretend_size = offset + STACK_POINTER_OFFSET; 4250 } 4251 } 4252 4253 void 4254 spu_conditional_register_usage (void) 4255 { 4256 if (flag_pic) 4257 { 4258 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4259 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4260 } 4261 } 4262 4263 /* This is called any time we inspect the alignment of a register for 4264 addresses. */ 4265 static int 4266 reg_aligned_for_addr (rtx x) 4267 { 4268 int regno = 4269 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x); 4270 return REGNO_POINTER_ALIGN (regno) >= 128; 4271 } 4272 4273 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF 4274 into its SYMBOL_REF_FLAGS. */ 4275 static void 4276 spu_encode_section_info (tree decl, rtx rtl, int first) 4277 { 4278 default_encode_section_info (decl, rtl, first); 4279 4280 /* If a variable has a forced alignment to < 16 bytes, mark it with 4281 SYMBOL_FLAG_ALIGN1. */ 4282 if (TREE_CODE (decl) == VAR_DECL 4283 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128) 4284 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1; 4285 } 4286 4287 /* Return TRUE if we are certain the mem refers to a complete object 4288 which is both 16-byte aligned and padded to a 16-byte boundary. This 4289 would make it safe to store with a single instruction. 4290 We guarantee the alignment and padding for static objects by aligning 4291 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.) 4292 FIXME: We currently cannot guarantee this for objects on the stack 4293 because assign_parm_setup_stack calls assign_stack_local with the 4294 alignment of the parameter mode and in that case the alignment never 4295 gets adjusted by LOCAL_ALIGNMENT. */ 4296 static int 4297 store_with_one_insn_p (rtx mem) 4298 { 4299 enum machine_mode mode = GET_MODE (mem); 4300 rtx addr = XEXP (mem, 0); 4301 if (mode == BLKmode) 4302 return 0; 4303 if (GET_MODE_SIZE (mode) >= 16) 4304 return 1; 4305 /* Only static objects. */ 4306 if (GET_CODE (addr) == SYMBOL_REF) 4307 { 4308 /* We use the associated declaration to make sure the access is 4309 referring to the whole object. 4310 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure 4311 if it is necessary. Will there be cases where one exists, and 4312 the other does not? Will there be cases where both exist, but 4313 have different types? */ 4314 tree decl = MEM_EXPR (mem); 4315 if (decl 4316 && TREE_CODE (decl) == VAR_DECL 4317 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) 4318 return 1; 4319 decl = SYMBOL_REF_DECL (addr); 4320 if (decl 4321 && TREE_CODE (decl) == VAR_DECL 4322 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl))) 4323 return 1; 4324 } 4325 return 0; 4326 } 4327 4328 /* Return 1 when the address is not valid for a simple load and store as 4329 required by the '_mov*' patterns. We could make this less strict 4330 for loads, but we prefer mem's to look the same so they are more 4331 likely to be merged. */ 4332 static int 4333 address_needs_split (rtx mem) 4334 { 4335 if (GET_MODE_SIZE (GET_MODE (mem)) < 16 4336 && (GET_MODE_SIZE (GET_MODE (mem)) < 4 4337 || !(store_with_one_insn_p (mem) 4338 || mem_is_padded_component_ref (mem)))) 4339 return 1; 4340 4341 return 0; 4342 } 4343 4344 static GTY(()) rtx cache_fetch; /* __cache_fetch function */ 4345 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */ 4346 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */ 4347 4348 /* MEM is known to be an __ea qualified memory access. Emit a call to 4349 fetch the ppu memory to local store, and return its address in local 4350 store. */ 4351 4352 static void 4353 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr) 4354 { 4355 if (is_store) 4356 { 4357 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem))); 4358 if (!cache_fetch_dirty) 4359 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty"); 4360 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode, 4361 2, ea_addr, EAmode, ndirty, SImode); 4362 } 4363 else 4364 { 4365 if (!cache_fetch) 4366 cache_fetch = init_one_libfunc ("__cache_fetch"); 4367 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode, 4368 1, ea_addr, EAmode); 4369 } 4370 } 4371 4372 /* Like ea_load_store, but do the cache tag comparison and, for stores, 4373 dirty bit marking, inline. 4374 4375 The cache control data structure is an array of 4376 4377 struct __cache_tag_array 4378 { 4379 unsigned int tag_lo[4]; 4380 unsigned int tag_hi[4]; 4381 void *data_pointer[4]; 4382 int reserved[4]; 4383 vector unsigned short dirty_bits[4]; 4384 } */ 4385 4386 static void 4387 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr) 4388 { 4389 rtx ea_addr_si; 4390 HOST_WIDE_INT v; 4391 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size"); 4392 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array"); 4393 rtx index_mask = gen_reg_rtx (SImode); 4394 rtx tag_arr = gen_reg_rtx (Pmode); 4395 rtx splat_mask = gen_reg_rtx (TImode); 4396 rtx splat = gen_reg_rtx (V4SImode); 4397 rtx splat_hi = NULL_RTX; 4398 rtx tag_index = gen_reg_rtx (Pmode); 4399 rtx block_off = gen_reg_rtx (SImode); 4400 rtx tag_addr = gen_reg_rtx (Pmode); 4401 rtx tag = gen_reg_rtx (V4SImode); 4402 rtx cache_tag = gen_reg_rtx (V4SImode); 4403 rtx cache_tag_hi = NULL_RTX; 4404 rtx cache_ptrs = gen_reg_rtx (TImode); 4405 rtx cache_ptrs_si = gen_reg_rtx (SImode); 4406 rtx tag_equal = gen_reg_rtx (V4SImode); 4407 rtx tag_equal_hi = NULL_RTX; 4408 rtx tag_eq_pack = gen_reg_rtx (V4SImode); 4409 rtx tag_eq_pack_si = gen_reg_rtx (SImode); 4410 rtx eq_index = gen_reg_rtx (SImode); 4411 rtx bcomp, hit_label, hit_ref, cont_label, insn; 4412 4413 if (spu_ea_model != 32) 4414 { 4415 splat_hi = gen_reg_rtx (V4SImode); 4416 cache_tag_hi = gen_reg_rtx (V4SImode); 4417 tag_equal_hi = gen_reg_rtx (V4SImode); 4418 } 4419 4420 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128)); 4421 emit_move_insn (tag_arr, tag_arr_sym); 4422 v = 0x0001020300010203LL; 4423 emit_move_insn (splat_mask, immed_double_const (v, v, TImode)); 4424 ea_addr_si = ea_addr; 4425 if (spu_ea_model != 32) 4426 ea_addr_si = convert_to_mode (SImode, ea_addr, 1); 4427 4428 /* tag_index = ea_addr & (tag_array_size - 128) */ 4429 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask)); 4430 4431 /* splat ea_addr to all 4 slots. */ 4432 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask)); 4433 /* Similarly for high 32 bits of ea_addr. */ 4434 if (spu_ea_model != 32) 4435 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask)); 4436 4437 /* block_off = ea_addr & 127 */ 4438 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127))); 4439 4440 /* tag_addr = tag_arr + tag_index */ 4441 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index)); 4442 4443 /* Read cache tags. */ 4444 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr)); 4445 if (spu_ea_model != 32) 4446 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode, 4447 plus_constant (tag_addr, 16))); 4448 4449 /* tag = ea_addr & -128 */ 4450 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128))); 4451 4452 /* Read all four cache data pointers. */ 4453 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode, 4454 plus_constant (tag_addr, 32))); 4455 4456 /* Compare tags. */ 4457 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag)); 4458 if (spu_ea_model != 32) 4459 { 4460 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi)); 4461 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi)); 4462 } 4463 4464 /* At most one of the tags compare equal, so tag_equal has one 4465 32-bit slot set to all 1's, with the other slots all zero. 4466 gbb picks off low bit from each byte in the 128-bit registers, 4467 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming 4468 we have a hit. */ 4469 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal))); 4470 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack)); 4471 4472 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */ 4473 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si)); 4474 4475 /* Allowing us to rotate the corresponding cache data pointer to slot0. 4476 (rotating eq_index mod 16 bytes). */ 4477 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index)); 4478 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs)); 4479 4480 /* Add block offset to form final data address. */ 4481 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off)); 4482 4483 /* Check that we did hit. */ 4484 hit_label = gen_label_rtx (); 4485 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label); 4486 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx); 4487 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, 4488 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp, 4489 hit_ref, pc_rtx))); 4490 /* Say that this branch is very likely to happen. */ 4491 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1; 4492 REG_NOTES (insn) 4493 = gen_rtx_EXPR_LIST (REG_BR_PROB, GEN_INT (v), REG_NOTES (insn)); 4494 4495 ea_load_store (mem, is_store, ea_addr, data_addr); 4496 cont_label = gen_label_rtx (); 4497 emit_jump_insn (gen_jump (cont_label)); 4498 emit_barrier (); 4499 4500 emit_label (hit_label); 4501 4502 if (is_store) 4503 { 4504 HOST_WIDE_INT v_hi; 4505 rtx dirty_bits = gen_reg_rtx (TImode); 4506 rtx dirty_off = gen_reg_rtx (SImode); 4507 rtx dirty_128 = gen_reg_rtx (TImode); 4508 rtx neg_block_off = gen_reg_rtx (SImode); 4509 4510 /* Set up mask with one dirty bit per byte of the mem we are 4511 writing, starting from top bit. */ 4512 v_hi = v = -1; 4513 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63; 4514 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64) 4515 { 4516 v_hi = v; 4517 v = 0; 4518 } 4519 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode)); 4520 4521 /* Form index into cache dirty_bits. eq_index is one of 4522 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us 4523 0x40, 0x50, 0x60 or 0x70 which just happens to be the 4524 offset to each of the four dirty_bits elements. */ 4525 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2))); 4526 4527 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off)); 4528 4529 /* Rotate bit mask to proper bit. */ 4530 emit_insn (gen_negsi2 (neg_block_off, block_off)); 4531 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off)); 4532 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off)); 4533 4534 /* Or in the new dirty bits. */ 4535 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128)); 4536 4537 /* Store. */ 4538 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off)); 4539 } 4540 4541 emit_label (cont_label); 4542 } 4543 4544 static rtx 4545 expand_ea_mem (rtx mem, bool is_store) 4546 { 4547 rtx ea_addr; 4548 rtx data_addr = gen_reg_rtx (Pmode); 4549 rtx new_mem; 4550 4551 ea_addr = force_reg (EAmode, XEXP (mem, 0)); 4552 if (optimize_size || optimize == 0) 4553 ea_load_store (mem, is_store, ea_addr, data_addr); 4554 else 4555 ea_load_store_inline (mem, is_store, ea_addr, data_addr); 4556 4557 if (ea_alias_set == -1) 4558 ea_alias_set = new_alias_set (); 4559 4560 /* We generate a new MEM RTX to refer to the copy of the data 4561 in the cache. We do not copy memory attributes (except the 4562 alignment) from the original MEM, as they may no longer apply 4563 to the cache copy. */ 4564 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr); 4565 set_mem_alias_set (new_mem, ea_alias_set); 4566 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8)); 4567 4568 return new_mem; 4569 } 4570 4571 int 4572 spu_expand_mov (rtx * ops, enum machine_mode mode) 4573 { 4574 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) 4575 abort (); 4576 4577 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1])) 4578 { 4579 rtx from = SUBREG_REG (ops[1]); 4580 enum machine_mode imode = int_mode_for_mode (GET_MODE (from)); 4581 4582 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT 4583 && GET_MODE_CLASS (imode) == MODE_INT 4584 && subreg_lowpart_p (ops[1])); 4585 4586 if (GET_MODE_SIZE (imode) < 4) 4587 imode = SImode; 4588 if (imode != GET_MODE (from)) 4589 from = gen_rtx_SUBREG (imode, from, 0); 4590 4591 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode)) 4592 { 4593 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code; 4594 emit_insn (GEN_FCN (icode) (ops[0], from)); 4595 } 4596 else 4597 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1)); 4598 return 1; 4599 } 4600 4601 /* At least one of the operands needs to be a register. */ 4602 if ((reload_in_progress | reload_completed) == 0 4603 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode)) 4604 { 4605 rtx temp = force_reg (mode, ops[1]); 4606 emit_move_insn (ops[0], temp); 4607 return 1; 4608 } 4609 if (reload_in_progress || reload_completed) 4610 { 4611 if (CONSTANT_P (ops[1])) 4612 return spu_split_immediate (ops); 4613 return 0; 4614 } 4615 4616 /* Catch the SImode immediates greater than 0x7fffffff, and sign 4617 extend them. */ 4618 if (GET_CODE (ops[1]) == CONST_INT) 4619 { 4620 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode); 4621 if (val != INTVAL (ops[1])) 4622 { 4623 emit_move_insn (ops[0], GEN_INT (val)); 4624 return 1; 4625 } 4626 } 4627 if (MEM_P (ops[0])) 4628 { 4629 if (MEM_ADDR_SPACE (ops[0])) 4630 ops[0] = expand_ea_mem (ops[0], true); 4631 return spu_split_store (ops); 4632 } 4633 if (MEM_P (ops[1])) 4634 { 4635 if (MEM_ADDR_SPACE (ops[1])) 4636 ops[1] = expand_ea_mem (ops[1], false); 4637 return spu_split_load (ops); 4638 } 4639 4640 return 0; 4641 } 4642 4643 static void 4644 spu_convert_move (rtx dst, rtx src) 4645 { 4646 enum machine_mode mode = GET_MODE (dst); 4647 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); 4648 rtx reg; 4649 gcc_assert (GET_MODE (src) == TImode); 4650 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst; 4651 emit_insn (gen_rtx_SET (VOIDmode, reg, 4652 gen_rtx_TRUNCATE (int_mode, 4653 gen_rtx_LSHIFTRT (TImode, src, 4654 GEN_INT (int_mode == DImode ? 64 : 96))))); 4655 if (int_mode != mode) 4656 { 4657 reg = simplify_gen_subreg (mode, reg, int_mode, 0); 4658 emit_move_insn (dst, reg); 4659 } 4660 } 4661 4662 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using 4663 the address from SRC and SRC+16. Return a REG or CONST_INT that 4664 specifies how many bytes to rotate the loaded registers, plus any 4665 extra from EXTRA_ROTQBY. The address and rotate amounts are 4666 normalized to improve merging of loads and rotate computations. */ 4667 static rtx 4668 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby) 4669 { 4670 rtx addr = XEXP (src, 0); 4671 rtx p0, p1, rot, addr0, addr1; 4672 int rot_amt; 4673 4674 rot = 0; 4675 rot_amt = 0; 4676 4677 if (MEM_ALIGN (src) >= 128) 4678 /* Address is already aligned; simply perform a TImode load. */ ; 4679 else if (GET_CODE (addr) == PLUS) 4680 { 4681 /* 8 cases: 4682 aligned reg + aligned reg => lqx 4683 aligned reg + unaligned reg => lqx, rotqby 4684 aligned reg + aligned const => lqd 4685 aligned reg + unaligned const => lqd, rotqbyi 4686 unaligned reg + aligned reg => lqx, rotqby 4687 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch) 4688 unaligned reg + aligned const => lqd, rotqby 4689 unaligned reg + unaligned const -> not allowed by legitimate address 4690 */ 4691 p0 = XEXP (addr, 0); 4692 p1 = XEXP (addr, 1); 4693 if (!reg_aligned_for_addr (p0)) 4694 { 4695 if (REG_P (p1) && !reg_aligned_for_addr (p1)) 4696 { 4697 rot = gen_reg_rtx (SImode); 4698 emit_insn (gen_addsi3 (rot, p0, p1)); 4699 } 4700 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) 4701 { 4702 if (INTVAL (p1) > 0 4703 && REG_POINTER (p0) 4704 && INTVAL (p1) * BITS_PER_UNIT 4705 < REGNO_POINTER_ALIGN (REGNO (p0))) 4706 { 4707 rot = gen_reg_rtx (SImode); 4708 emit_insn (gen_addsi3 (rot, p0, p1)); 4709 addr = p0; 4710 } 4711 else 4712 { 4713 rtx x = gen_reg_rtx (SImode); 4714 emit_move_insn (x, p1); 4715 if (!spu_arith_operand (p1, SImode)) 4716 p1 = x; 4717 rot = gen_reg_rtx (SImode); 4718 emit_insn (gen_addsi3 (rot, p0, p1)); 4719 addr = gen_rtx_PLUS (Pmode, p0, x); 4720 } 4721 } 4722 else 4723 rot = p0; 4724 } 4725 else 4726 { 4727 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) 4728 { 4729 rot_amt = INTVAL (p1) & 15; 4730 if (INTVAL (p1) & -16) 4731 { 4732 p1 = GEN_INT (INTVAL (p1) & -16); 4733 addr = gen_rtx_PLUS (SImode, p0, p1); 4734 } 4735 else 4736 addr = p0; 4737 } 4738 else if (REG_P (p1) && !reg_aligned_for_addr (p1)) 4739 rot = p1; 4740 } 4741 } 4742 else if (REG_P (addr)) 4743 { 4744 if (!reg_aligned_for_addr (addr)) 4745 rot = addr; 4746 } 4747 else if (GET_CODE (addr) == CONST) 4748 { 4749 if (GET_CODE (XEXP (addr, 0)) == PLUS 4750 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) 4751 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4752 { 4753 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1)); 4754 if (rot_amt & -16) 4755 addr = gen_rtx_CONST (Pmode, 4756 gen_rtx_PLUS (Pmode, 4757 XEXP (XEXP (addr, 0), 0), 4758 GEN_INT (rot_amt & -16))); 4759 else 4760 addr = XEXP (XEXP (addr, 0), 0); 4761 } 4762 else 4763 { 4764 rot = gen_reg_rtx (Pmode); 4765 emit_move_insn (rot, addr); 4766 } 4767 } 4768 else if (GET_CODE (addr) == CONST_INT) 4769 { 4770 rot_amt = INTVAL (addr); 4771 addr = GEN_INT (rot_amt & -16); 4772 } 4773 else if (!ALIGNED_SYMBOL_REF_P (addr)) 4774 { 4775 rot = gen_reg_rtx (Pmode); 4776 emit_move_insn (rot, addr); 4777 } 4778 4779 rot_amt += extra_rotby; 4780 4781 rot_amt &= 15; 4782 4783 if (rot && rot_amt) 4784 { 4785 rtx x = gen_reg_rtx (SImode); 4786 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt))); 4787 rot = x; 4788 rot_amt = 0; 4789 } 4790 if (!rot && rot_amt) 4791 rot = GEN_INT (rot_amt); 4792 4793 addr0 = copy_rtx (addr); 4794 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); 4795 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0))); 4796 4797 if (dst1) 4798 { 4799 addr1 = plus_constant (copy_rtx (addr), 16); 4800 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16)); 4801 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1))); 4802 } 4803 4804 return rot; 4805 } 4806 4807 int 4808 spu_split_load (rtx * ops) 4809 { 4810 enum machine_mode mode = GET_MODE (ops[0]); 4811 rtx addr, load, rot; 4812 int rot_amt; 4813 4814 if (GET_MODE_SIZE (mode) >= 16) 4815 return 0; 4816 4817 addr = XEXP (ops[1], 0); 4818 gcc_assert (GET_CODE (addr) != AND); 4819 4820 if (!address_needs_split (ops[1])) 4821 { 4822 ops[1] = change_address (ops[1], TImode, addr); 4823 load = gen_reg_rtx (TImode); 4824 emit_insn (gen__movti (load, ops[1])); 4825 spu_convert_move (ops[0], load); 4826 return 1; 4827 } 4828 4829 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0; 4830 4831 load = gen_reg_rtx (TImode); 4832 rot = spu_expand_load (load, 0, ops[1], rot_amt); 4833 4834 if (rot) 4835 emit_insn (gen_rotqby_ti (load, load, rot)); 4836 4837 spu_convert_move (ops[0], load); 4838 return 1; 4839 } 4840 4841 int 4842 spu_split_store (rtx * ops) 4843 { 4844 enum machine_mode mode = GET_MODE (ops[0]); 4845 rtx reg; 4846 rtx addr, p0, p1, p1_lo, smem; 4847 int aform; 4848 int scalar; 4849 4850 if (GET_MODE_SIZE (mode) >= 16) 4851 return 0; 4852 4853 addr = XEXP (ops[0], 0); 4854 gcc_assert (GET_CODE (addr) != AND); 4855 4856 if (!address_needs_split (ops[0])) 4857 { 4858 reg = gen_reg_rtx (TImode); 4859 emit_insn (gen_spu_convert (reg, ops[1])); 4860 ops[0] = change_address (ops[0], TImode, addr); 4861 emit_move_insn (ops[0], reg); 4862 return 1; 4863 } 4864 4865 if (GET_CODE (addr) == PLUS) 4866 { 4867 /* 8 cases: 4868 aligned reg + aligned reg => lqx, c?x, shuf, stqx 4869 aligned reg + unaligned reg => lqx, c?x, shuf, stqx 4870 aligned reg + aligned const => lqd, c?d, shuf, stqx 4871 aligned reg + unaligned const => lqd, c?d, shuf, stqx 4872 unaligned reg + aligned reg => lqx, c?x, shuf, stqx 4873 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx 4874 unaligned reg + aligned const => lqd, c?d, shuf, stqx 4875 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx 4876 */ 4877 aform = 0; 4878 p0 = XEXP (addr, 0); 4879 p1 = p1_lo = XEXP (addr, 1); 4880 if (REG_P (p0) && GET_CODE (p1) == CONST_INT) 4881 { 4882 p1_lo = GEN_INT (INTVAL (p1) & 15); 4883 if (reg_aligned_for_addr (p0)) 4884 { 4885 p1 = GEN_INT (INTVAL (p1) & -16); 4886 if (p1 == const0_rtx) 4887 addr = p0; 4888 else 4889 addr = gen_rtx_PLUS (SImode, p0, p1); 4890 } 4891 else 4892 { 4893 rtx x = gen_reg_rtx (SImode); 4894 emit_move_insn (x, p1); 4895 addr = gen_rtx_PLUS (SImode, p0, x); 4896 } 4897 } 4898 } 4899 else if (REG_P (addr)) 4900 { 4901 aform = 0; 4902 p0 = addr; 4903 p1 = p1_lo = const0_rtx; 4904 } 4905 else 4906 { 4907 aform = 1; 4908 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 4909 p1 = 0; /* aform doesn't use p1 */ 4910 p1_lo = addr; 4911 if (ALIGNED_SYMBOL_REF_P (addr)) 4912 p1_lo = const0_rtx; 4913 else if (GET_CODE (addr) == CONST 4914 && GET_CODE (XEXP (addr, 0)) == PLUS 4915 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) 4916 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4917 { 4918 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); 4919 if ((v & -16) != 0) 4920 addr = gen_rtx_CONST (Pmode, 4921 gen_rtx_PLUS (Pmode, 4922 XEXP (XEXP (addr, 0), 0), 4923 GEN_INT (v & -16))); 4924 else 4925 addr = XEXP (XEXP (addr, 0), 0); 4926 p1_lo = GEN_INT (v & 15); 4927 } 4928 else if (GET_CODE (addr) == CONST_INT) 4929 { 4930 p1_lo = GEN_INT (INTVAL (addr) & 15); 4931 addr = GEN_INT (INTVAL (addr) & -16); 4932 } 4933 else 4934 { 4935 p1_lo = gen_reg_rtx (SImode); 4936 emit_move_insn (p1_lo, addr); 4937 } 4938 } 4939 4940 reg = gen_reg_rtx (TImode); 4941 4942 scalar = store_with_one_insn_p (ops[0]); 4943 if (!scalar) 4944 { 4945 /* We could copy the flags from the ops[0] MEM to mem here, 4946 We don't because we want this load to be optimized away if 4947 possible, and copying the flags will prevent that in certain 4948 cases, e.g. consider the volatile flag. */ 4949 4950 rtx pat = gen_reg_rtx (TImode); 4951 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); 4952 set_mem_alias_set (lmem, 0); 4953 emit_insn (gen_movti (reg, lmem)); 4954 4955 if (!p0 || reg_aligned_for_addr (p0)) 4956 p0 = stack_pointer_rtx; 4957 if (!p1_lo) 4958 p1_lo = const0_rtx; 4959 4960 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); 4961 emit_insn (gen_shufb (reg, ops[1], reg, pat)); 4962 } 4963 else 4964 { 4965 if (GET_CODE (ops[1]) == REG) 4966 emit_insn (gen_spu_convert (reg, ops[1])); 4967 else if (GET_CODE (ops[1]) == SUBREG) 4968 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1]))); 4969 else 4970 abort (); 4971 } 4972 4973 if (GET_MODE_SIZE (mode) < 4 && scalar) 4974 emit_insn (gen_ashlti3 4975 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode)))); 4976 4977 smem = change_address (ops[0], TImode, copy_rtx (addr)); 4978 /* We can't use the previous alias set because the memory has changed 4979 size and can potentially overlap objects of other types. */ 4980 set_mem_alias_set (smem, 0); 4981 4982 emit_insn (gen_movti (smem, reg)); 4983 return 1; 4984 } 4985 4986 /* Return TRUE if X is MEM which is a struct member reference 4987 and the member can safely be loaded and stored with a single 4988 instruction because it is padded. */ 4989 static int 4990 mem_is_padded_component_ref (rtx x) 4991 { 4992 tree t = MEM_EXPR (x); 4993 tree r; 4994 if (!t || TREE_CODE (t) != COMPONENT_REF) 4995 return 0; 4996 t = TREE_OPERAND (t, 1); 4997 if (!t || TREE_CODE (t) != FIELD_DECL 4998 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t))) 4999 return 0; 5000 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */ 5001 r = DECL_FIELD_CONTEXT (t); 5002 if (!r || TREE_CODE (r) != RECORD_TYPE) 5003 return 0; 5004 /* Make sure they are the same mode */ 5005 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t))) 5006 return 0; 5007 /* If there are no following fields then the field alignment assures 5008 the structure is padded to the alignment which means this field is 5009 padded too. */ 5010 if (TREE_CHAIN (t) == 0) 5011 return 1; 5012 /* If the following field is also aligned then this field will be 5013 padded. */ 5014 t = TREE_CHAIN (t); 5015 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128) 5016 return 1; 5017 return 0; 5018 } 5019 5020 /* Parse the -mfixed-range= option string. */ 5021 static void 5022 fix_range (const char *const_str) 5023 { 5024 int i, first, last; 5025 char *str, *dash, *comma; 5026 5027 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and 5028 REG2 are either register names or register numbers. The effect 5029 of this option is to mark the registers in the range from REG1 to 5030 REG2 as ``fixed'' so they won't be used by the compiler. */ 5031 5032 i = strlen (const_str); 5033 str = (char *) alloca (i + 1); 5034 memcpy (str, const_str, i + 1); 5035 5036 while (1) 5037 { 5038 dash = strchr (str, '-'); 5039 if (!dash) 5040 { 5041 warning (0, "value of -mfixed-range must have form REG1-REG2"); 5042 return; 5043 } 5044 *dash = '\0'; 5045 comma = strchr (dash + 1, ','); 5046 if (comma) 5047 *comma = '\0'; 5048 5049 first = decode_reg_name (str); 5050 if (first < 0) 5051 { 5052 warning (0, "unknown register name: %s", str); 5053 return; 5054 } 5055 5056 last = decode_reg_name (dash + 1); 5057 if (last < 0) 5058 { 5059 warning (0, "unknown register name: %s", dash + 1); 5060 return; 5061 } 5062 5063 *dash = '-'; 5064 5065 if (first > last) 5066 { 5067 warning (0, "%s-%s is an empty range", str, dash + 1); 5068 return; 5069 } 5070 5071 for (i = first; i <= last; ++i) 5072 fixed_regs[i] = call_used_regs[i] = 1; 5073 5074 if (!comma) 5075 break; 5076 5077 *comma = ','; 5078 str = comma + 1; 5079 } 5080 } 5081 5082 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that 5083 can be generated using the fsmbi instruction. */ 5084 int 5085 fsmbi_const_p (rtx x) 5086 { 5087 if (CONSTANT_P (x)) 5088 { 5089 /* We can always choose TImode for CONST_INT because the high bits 5090 of an SImode will always be all 1s, i.e., valid for fsmbi. */ 5091 enum immediate_class c = classify_immediate (x, TImode); 5092 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2); 5093 } 5094 return 0; 5095 } 5096 5097 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that 5098 can be generated using the cbd, chd, cwd or cdd instruction. */ 5099 int 5100 cpat_const_p (rtx x, enum machine_mode mode) 5101 { 5102 if (CONSTANT_P (x)) 5103 { 5104 enum immediate_class c = classify_immediate (x, mode); 5105 return c == IC_CPAT; 5106 } 5107 return 0; 5108 } 5109 5110 rtx 5111 gen_cpat_const (rtx * ops) 5112 { 5113 unsigned char dst[16]; 5114 int i, offset, shift, isize; 5115 if (GET_CODE (ops[3]) != CONST_INT 5116 || GET_CODE (ops[2]) != CONST_INT 5117 || (GET_CODE (ops[1]) != CONST_INT 5118 && GET_CODE (ops[1]) != REG)) 5119 return 0; 5120 if (GET_CODE (ops[1]) == REG 5121 && (!REG_POINTER (ops[1]) 5122 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128)) 5123 return 0; 5124 5125 for (i = 0; i < 16; i++) 5126 dst[i] = i + 16; 5127 isize = INTVAL (ops[3]); 5128 if (isize == 1) 5129 shift = 3; 5130 else if (isize == 2) 5131 shift = 2; 5132 else 5133 shift = 0; 5134 offset = (INTVAL (ops[2]) + 5135 (GET_CODE (ops[1]) == 5136 CONST_INT ? INTVAL (ops[1]) : 0)) & 15; 5137 for (i = 0; i < isize; i++) 5138 dst[offset + i] = i + shift; 5139 return array_to_constant (TImode, dst); 5140 } 5141 5142 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte 5143 array. Use MODE for CONST_INT's. When the constant's mode is smaller 5144 than 16 bytes, the value is repeated across the rest of the array. */ 5145 void 5146 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16]) 5147 { 5148 HOST_WIDE_INT val; 5149 int i, j, first; 5150 5151 memset (arr, 0, 16); 5152 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode; 5153 if (GET_CODE (x) == CONST_INT 5154 || (GET_CODE (x) == CONST_DOUBLE 5155 && (mode == SFmode || mode == DFmode))) 5156 { 5157 gcc_assert (mode != VOIDmode && mode != BLKmode); 5158 5159 if (GET_CODE (x) == CONST_DOUBLE) 5160 val = const_double_to_hwint (x); 5161 else 5162 val = INTVAL (x); 5163 first = GET_MODE_SIZE (mode) - 1; 5164 for (i = first; i >= 0; i--) 5165 { 5166 arr[i] = val & 0xff; 5167 val >>= 8; 5168 } 5169 /* Splat the constant across the whole array. */ 5170 for (j = 0, i = first + 1; i < 16; i++) 5171 { 5172 arr[i] = arr[j]; 5173 j = (j == first) ? 0 : j + 1; 5174 } 5175 } 5176 else if (GET_CODE (x) == CONST_DOUBLE) 5177 { 5178 val = CONST_DOUBLE_LOW (x); 5179 for (i = 15; i >= 8; i--) 5180 { 5181 arr[i] = val & 0xff; 5182 val >>= 8; 5183 } 5184 val = CONST_DOUBLE_HIGH (x); 5185 for (i = 7; i >= 0; i--) 5186 { 5187 arr[i] = val & 0xff; 5188 val >>= 8; 5189 } 5190 } 5191 else if (GET_CODE (x) == CONST_VECTOR) 5192 { 5193 int units; 5194 rtx elt; 5195 mode = GET_MODE_INNER (mode); 5196 units = CONST_VECTOR_NUNITS (x); 5197 for (i = 0; i < units; i++) 5198 { 5199 elt = CONST_VECTOR_ELT (x, i); 5200 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE) 5201 { 5202 if (GET_CODE (elt) == CONST_DOUBLE) 5203 val = const_double_to_hwint (elt); 5204 else 5205 val = INTVAL (elt); 5206 first = GET_MODE_SIZE (mode) - 1; 5207 if (first + i * GET_MODE_SIZE (mode) > 16) 5208 abort (); 5209 for (j = first; j >= 0; j--) 5210 { 5211 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff; 5212 val >>= 8; 5213 } 5214 } 5215 } 5216 } 5217 else 5218 gcc_unreachable(); 5219 } 5220 5221 /* Convert a 16 byte array to a constant of mode MODE. When MODE is 5222 smaller than 16 bytes, use the bytes that would represent that value 5223 in a register, e.g., for QImode return the value of arr[3]. */ 5224 rtx 5225 array_to_constant (enum machine_mode mode, const unsigned char arr[16]) 5226 { 5227 enum machine_mode inner_mode; 5228 rtvec v; 5229 int units, size, i, j, k; 5230 HOST_WIDE_INT val; 5231 5232 if (GET_MODE_CLASS (mode) == MODE_INT 5233 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) 5234 { 5235 j = GET_MODE_SIZE (mode); 5236 i = j < 4 ? 4 - j : 0; 5237 for (val = 0; i < j; i++) 5238 val = (val << 8) | arr[i]; 5239 val = trunc_int_for_mode (val, mode); 5240 return GEN_INT (val); 5241 } 5242 5243 if (mode == TImode) 5244 { 5245 HOST_WIDE_INT high; 5246 for (i = high = 0; i < 8; i++) 5247 high = (high << 8) | arr[i]; 5248 for (i = 8, val = 0; i < 16; i++) 5249 val = (val << 8) | arr[i]; 5250 return immed_double_const (val, high, TImode); 5251 } 5252 if (mode == SFmode) 5253 { 5254 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3]; 5255 val = trunc_int_for_mode (val, SImode); 5256 return hwint_to_const_double (SFmode, val); 5257 } 5258 if (mode == DFmode) 5259 { 5260 for (i = 0, val = 0; i < 8; i++) 5261 val = (val << 8) | arr[i]; 5262 return hwint_to_const_double (DFmode, val); 5263 } 5264 5265 if (!VECTOR_MODE_P (mode)) 5266 abort (); 5267 5268 units = GET_MODE_NUNITS (mode); 5269 size = GET_MODE_UNIT_SIZE (mode); 5270 inner_mode = GET_MODE_INNER (mode); 5271 v = rtvec_alloc (units); 5272 5273 for (k = i = 0; i < units; ++i) 5274 { 5275 val = 0; 5276 for (j = 0; j < size; j++, k++) 5277 val = (val << 8) | arr[k]; 5278 5279 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT) 5280 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val); 5281 else 5282 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode)); 5283 } 5284 if (k > 16) 5285 abort (); 5286 5287 return gen_rtx_CONST_VECTOR (mode, v); 5288 } 5289 5290 static void 5291 reloc_diagnostic (rtx x) 5292 { 5293 tree decl = 0; 5294 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC)) 5295 return; 5296 5297 if (GET_CODE (x) == SYMBOL_REF) 5298 decl = SYMBOL_REF_DECL (x); 5299 else if (GET_CODE (x) == CONST 5300 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) 5301 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0)); 5302 5303 /* SYMBOL_REF_DECL is not necessarily a DECL. */ 5304 if (decl && !DECL_P (decl)) 5305 decl = 0; 5306 5307 /* The decl could be a string constant. */ 5308 if (decl && DECL_P (decl)) 5309 { 5310 location_t loc; 5311 /* We use last_assemble_variable_decl to get line information. It's 5312 not always going to be right and might not even be close, but will 5313 be right for the more common cases. */ 5314 if (!last_assemble_variable_decl || in_section == ctors_section) 5315 loc = DECL_SOURCE_LOCATION (decl); 5316 else 5317 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl); 5318 5319 if (TARGET_WARN_RELOC) 5320 warning_at (loc, 0, 5321 "creating run-time relocation for %qD", decl); 5322 else 5323 error_at (loc, 5324 "creating run-time relocation for %qD", decl); 5325 } 5326 else 5327 { 5328 if (TARGET_WARN_RELOC) 5329 warning_at (input_location, 0, "creating run-time relocation"); 5330 else 5331 error_at (input_location, "creating run-time relocation"); 5332 } 5333 } 5334 5335 /* Hook into assemble_integer so we can generate an error for run-time 5336 relocations. The SPU ABI disallows them. */ 5337 static bool 5338 spu_assemble_integer (rtx x, unsigned int size, int aligned_p) 5339 { 5340 /* By default run-time relocations aren't supported, but we allow them 5341 in case users support it in their own run-time loader. And we provide 5342 a warning for those users that don't. */ 5343 if ((GET_CODE (x) == SYMBOL_REF) 5344 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST) 5345 reloc_diagnostic (x); 5346 5347 return default_assemble_integer (x, size, aligned_p); 5348 } 5349 5350 static void 5351 spu_asm_globalize_label (FILE * file, const char *name) 5352 { 5353 fputs ("\t.global\t", file); 5354 assemble_name (file, name); 5355 fputs ("\n", file); 5356 } 5357 5358 static bool 5359 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total, 5360 bool speed ATTRIBUTE_UNUSED) 5361 { 5362 enum machine_mode mode = GET_MODE (x); 5363 int cost = COSTS_N_INSNS (2); 5364 5365 /* Folding to a CONST_VECTOR will use extra space but there might 5366 be only a small savings in cycles. We'd like to use a CONST_VECTOR 5367 only if it allows us to fold away multiple insns. Changing the cost 5368 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though 5369 because this cost will only be compared against a single insn. 5370 if (code == CONST_VECTOR) 5371 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6); 5372 */ 5373 5374 /* Use defaults for float operations. Not accurate but good enough. */ 5375 if (mode == DFmode) 5376 { 5377 *total = COSTS_N_INSNS (13); 5378 return true; 5379 } 5380 if (mode == SFmode) 5381 { 5382 *total = COSTS_N_INSNS (6); 5383 return true; 5384 } 5385 switch (code) 5386 { 5387 case CONST_INT: 5388 if (satisfies_constraint_K (x)) 5389 *total = 0; 5390 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll) 5391 *total = COSTS_N_INSNS (1); 5392 else 5393 *total = COSTS_N_INSNS (3); 5394 return true; 5395 5396 case CONST: 5397 *total = COSTS_N_INSNS (3); 5398 return true; 5399 5400 case LABEL_REF: 5401 case SYMBOL_REF: 5402 *total = COSTS_N_INSNS (0); 5403 return true; 5404 5405 case CONST_DOUBLE: 5406 *total = COSTS_N_INSNS (5); 5407 return true; 5408 5409 case FLOAT_EXTEND: 5410 case FLOAT_TRUNCATE: 5411 case FLOAT: 5412 case UNSIGNED_FLOAT: 5413 case FIX: 5414 case UNSIGNED_FIX: 5415 *total = COSTS_N_INSNS (7); 5416 return true; 5417 5418 case PLUS: 5419 if (mode == TImode) 5420 { 5421 *total = COSTS_N_INSNS (9); 5422 return true; 5423 } 5424 break; 5425 5426 case MULT: 5427 cost = 5428 GET_CODE (XEXP (x, 0)) == 5429 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7); 5430 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG) 5431 { 5432 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 5433 { 5434 HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); 5435 cost = COSTS_N_INSNS (14); 5436 if ((val & 0xffff) == 0) 5437 cost = COSTS_N_INSNS (9); 5438 else if (val > 0 && val < 0x10000) 5439 cost = COSTS_N_INSNS (11); 5440 } 5441 } 5442 *total = cost; 5443 return true; 5444 case DIV: 5445 case UDIV: 5446 case MOD: 5447 case UMOD: 5448 *total = COSTS_N_INSNS (20); 5449 return true; 5450 case ROTATE: 5451 case ROTATERT: 5452 case ASHIFT: 5453 case ASHIFTRT: 5454 case LSHIFTRT: 5455 *total = COSTS_N_INSNS (4); 5456 return true; 5457 case UNSPEC: 5458 if (XINT (x, 1) == UNSPEC_CONVERT) 5459 *total = COSTS_N_INSNS (0); 5460 else 5461 *total = COSTS_N_INSNS (4); 5462 return true; 5463 } 5464 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */ 5465 if (GET_MODE_CLASS (mode) == MODE_INT 5466 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl) 5467 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)) 5468 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode)); 5469 *total = cost; 5470 return true; 5471 } 5472 5473 static enum machine_mode 5474 spu_unwind_word_mode (void) 5475 { 5476 return SImode; 5477 } 5478 5479 /* Decide whether we can make a sibling call to a function. DECL is the 5480 declaration of the function being targeted by the call and EXP is the 5481 CALL_EXPR representing the call. */ 5482 static bool 5483 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 5484 { 5485 return decl && !TARGET_LARGE_MEM; 5486 } 5487 5488 /* We need to correctly update the back chain pointer and the Available 5489 Stack Size (which is in the second slot of the sp register.) */ 5490 void 5491 spu_allocate_stack (rtx op0, rtx op1) 5492 { 5493 HOST_WIDE_INT v; 5494 rtx chain = gen_reg_rtx (V4SImode); 5495 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx); 5496 rtx sp = gen_reg_rtx (V4SImode); 5497 rtx splatted = gen_reg_rtx (V4SImode); 5498 rtx pat = gen_reg_rtx (TImode); 5499 5500 /* copy the back chain so we can save it back again. */ 5501 emit_move_insn (chain, stack_bot); 5502 5503 op1 = force_reg (SImode, op1); 5504 5505 v = 0x1020300010203ll; 5506 emit_move_insn (pat, immed_double_const (v, v, TImode)); 5507 emit_insn (gen_shufb (splatted, op1, op1, pat)); 5508 5509 emit_insn (gen_spu_convert (sp, stack_pointer_rtx)); 5510 emit_insn (gen_subv4si3 (sp, sp, splatted)); 5511 5512 if (flag_stack_check) 5513 { 5514 rtx avail = gen_reg_rtx(SImode); 5515 rtx result = gen_reg_rtx(SImode); 5516 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1))); 5517 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1))); 5518 emit_insn (gen_spu_heq (result, GEN_INT(0) )); 5519 } 5520 5521 emit_insn (gen_spu_convert (stack_pointer_rtx, sp)); 5522 5523 emit_move_insn (stack_bot, chain); 5524 5525 emit_move_insn (op0, virtual_stack_dynamic_rtx); 5526 } 5527 5528 void 5529 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1) 5530 { 5531 static unsigned char arr[16] = 5532 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; 5533 rtx temp = gen_reg_rtx (SImode); 5534 rtx temp2 = gen_reg_rtx (SImode); 5535 rtx temp3 = gen_reg_rtx (V4SImode); 5536 rtx temp4 = gen_reg_rtx (V4SImode); 5537 rtx pat = gen_reg_rtx (TImode); 5538 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 5539 5540 /* Restore the backchain from the first word, sp from the second. */ 5541 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0)); 5542 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4)); 5543 5544 emit_move_insn (pat, array_to_constant (TImode, arr)); 5545 5546 /* Compute Available Stack Size for sp */ 5547 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx)); 5548 emit_insn (gen_shufb (temp3, temp, temp, pat)); 5549 5550 /* Compute Available Stack Size for back chain */ 5551 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx)); 5552 emit_insn (gen_shufb (temp4, temp2, temp2, pat)); 5553 emit_insn (gen_addv4si3 (temp4, sp, temp4)); 5554 5555 emit_insn (gen_addv4si3 (sp, sp, temp3)); 5556 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4); 5557 } 5558 5559 static void 5560 spu_init_libfuncs (void) 5561 { 5562 set_optab_libfunc (smul_optab, DImode, "__muldi3"); 5563 set_optab_libfunc (sdiv_optab, DImode, "__divdi3"); 5564 set_optab_libfunc (smod_optab, DImode, "__moddi3"); 5565 set_optab_libfunc (udiv_optab, DImode, "__udivdi3"); 5566 set_optab_libfunc (umod_optab, DImode, "__umoddi3"); 5567 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4"); 5568 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2"); 5569 set_optab_libfunc (clz_optab, DImode, "__clzdi2"); 5570 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2"); 5571 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2"); 5572 set_optab_libfunc (parity_optab, DImode, "__paritydi2"); 5573 5574 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf"); 5575 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf"); 5576 5577 set_optab_libfunc (smul_optab, TImode, "__multi3"); 5578 set_optab_libfunc (sdiv_optab, TImode, "__divti3"); 5579 set_optab_libfunc (smod_optab, TImode, "__modti3"); 5580 set_optab_libfunc (udiv_optab, TImode, "__udivti3"); 5581 set_optab_libfunc (umod_optab, TImode, "__umodti3"); 5582 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4"); 5583 } 5584 5585 /* Make a subreg, stripping any existing subreg. We could possibly just 5586 call simplify_subreg, but in this case we know what we want. */ 5587 rtx 5588 spu_gen_subreg (enum machine_mode mode, rtx x) 5589 { 5590 if (GET_CODE (x) == SUBREG) 5591 x = SUBREG_REG (x); 5592 if (GET_MODE (x) == mode) 5593 return x; 5594 return gen_rtx_SUBREG (mode, x, 0); 5595 } 5596 5597 static bool 5598 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 5599 { 5600 return (TYPE_MODE (type) == BLKmode 5601 && ((type) == 0 5602 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST 5603 || int_size_in_bytes (type) > 5604 (MAX_REGISTER_RETURN * UNITS_PER_WORD))); 5605 } 5606 5607 /* Create the built-in types and functions */ 5608 5609 enum spu_function_code 5610 { 5611 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode, 5612 #include "spu-builtins.def" 5613 #undef DEF_BUILTIN 5614 NUM_SPU_BUILTINS 5615 }; 5616 5617 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS]; 5618 5619 struct spu_builtin_description spu_builtins[] = { 5620 #define DEF_BUILTIN(fcode, icode, name, type, params) \ 5621 {fcode, icode, name, type, params, NULL_TREE}, 5622 #include "spu-builtins.def" 5623 #undef DEF_BUILTIN 5624 }; 5625 5626 /* Returns the rs6000 builtin decl for CODE. */ 5627 5628 static tree 5629 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 5630 { 5631 if (code >= NUM_SPU_BUILTINS) 5632 return error_mark_node; 5633 5634 return spu_builtins[code].fndecl; 5635 } 5636 5637 5638 static void 5639 spu_init_builtins (void) 5640 { 5641 struct spu_builtin_description *d; 5642 unsigned int i; 5643 5644 V16QI_type_node = build_vector_type (intQI_type_node, 16); 5645 V8HI_type_node = build_vector_type (intHI_type_node, 8); 5646 V4SI_type_node = build_vector_type (intSI_type_node, 4); 5647 V2DI_type_node = build_vector_type (intDI_type_node, 2); 5648 V4SF_type_node = build_vector_type (float_type_node, 4); 5649 V2DF_type_node = build_vector_type (double_type_node, 2); 5650 5651 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16); 5652 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8); 5653 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4); 5654 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2); 5655 5656 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node; 5657 5658 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE]; 5659 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE]; 5660 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE]; 5661 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE]; 5662 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE]; 5663 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE]; 5664 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE]; 5665 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE]; 5666 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE]; 5667 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE]; 5668 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE]; 5669 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE]; 5670 5671 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE]; 5672 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE]; 5673 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE]; 5674 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE]; 5675 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE]; 5676 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE]; 5677 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE]; 5678 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE]; 5679 5680 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE]; 5681 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE]; 5682 5683 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE]; 5684 5685 spu_builtin_types[SPU_BTI_PTR] = 5686 build_pointer_type (build_qualified_type 5687 (void_type_node, 5688 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE)); 5689 5690 /* For each builtin we build a new prototype. The tree code will make 5691 sure nodes are shared. */ 5692 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++) 5693 { 5694 tree p; 5695 char name[64]; /* build_function will make a copy. */ 5696 int parm; 5697 5698 if (d->name == 0) 5699 continue; 5700 5701 /* Find last parm. */ 5702 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++) 5703 ; 5704 5705 p = void_list_node; 5706 while (parm > 1) 5707 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p); 5708 5709 p = build_function_type (spu_builtin_types[d->parm[0]], p); 5710 5711 sprintf (name, "__builtin_%s", d->name); 5712 d->fndecl = 5713 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD, 5714 NULL, NULL_TREE); 5715 if (d->fcode == SPU_MASK_FOR_LOAD) 5716 TREE_READONLY (d->fndecl) = 1; 5717 5718 /* These builtins don't throw. */ 5719 TREE_NOTHROW (d->fndecl) = 1; 5720 } 5721 } 5722 5723 void 5724 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1) 5725 { 5726 static unsigned char arr[16] = 5727 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 }; 5728 5729 rtx temp = gen_reg_rtx (Pmode); 5730 rtx temp2 = gen_reg_rtx (V4SImode); 5731 rtx temp3 = gen_reg_rtx (V4SImode); 5732 rtx pat = gen_reg_rtx (TImode); 5733 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM); 5734 5735 emit_move_insn (pat, array_to_constant (TImode, arr)); 5736 5737 /* Restore the sp. */ 5738 emit_move_insn (temp, op1); 5739 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx)); 5740 5741 /* Compute available stack size for sp. */ 5742 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx)); 5743 emit_insn (gen_shufb (temp3, temp, temp, pat)); 5744 5745 emit_insn (gen_addv4si3 (sp, sp, temp3)); 5746 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2); 5747 } 5748 5749 int 5750 spu_safe_dma (HOST_WIDE_INT channel) 5751 { 5752 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27; 5753 } 5754 5755 void 5756 spu_builtin_splats (rtx ops[]) 5757 { 5758 enum machine_mode mode = GET_MODE (ops[0]); 5759 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE) 5760 { 5761 unsigned char arr[16]; 5762 constant_to_array (GET_MODE_INNER (mode), ops[1], arr); 5763 emit_move_insn (ops[0], array_to_constant (mode, arr)); 5764 } 5765 else 5766 { 5767 rtx reg = gen_reg_rtx (TImode); 5768 rtx shuf; 5769 if (GET_CODE (ops[1]) != REG 5770 && GET_CODE (ops[1]) != SUBREG) 5771 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]); 5772 switch (mode) 5773 { 5774 case V2DImode: 5775 case V2DFmode: 5776 shuf = 5777 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll, 5778 TImode); 5779 break; 5780 case V4SImode: 5781 case V4SFmode: 5782 shuf = 5783 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll, 5784 TImode); 5785 break; 5786 case V8HImode: 5787 shuf = 5788 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll, 5789 TImode); 5790 break; 5791 case V16QImode: 5792 shuf = 5793 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll, 5794 TImode); 5795 break; 5796 default: 5797 abort (); 5798 } 5799 emit_move_insn (reg, shuf); 5800 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg)); 5801 } 5802 } 5803 5804 void 5805 spu_builtin_extract (rtx ops[]) 5806 { 5807 enum machine_mode mode; 5808 rtx rot, from, tmp; 5809 5810 mode = GET_MODE (ops[1]); 5811 5812 if (GET_CODE (ops[2]) == CONST_INT) 5813 { 5814 switch (mode) 5815 { 5816 case V16QImode: 5817 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2])); 5818 break; 5819 case V8HImode: 5820 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2])); 5821 break; 5822 case V4SFmode: 5823 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2])); 5824 break; 5825 case V4SImode: 5826 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2])); 5827 break; 5828 case V2DImode: 5829 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2])); 5830 break; 5831 case V2DFmode: 5832 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2])); 5833 break; 5834 default: 5835 abort (); 5836 } 5837 return; 5838 } 5839 5840 from = spu_gen_subreg (TImode, ops[1]); 5841 rot = gen_reg_rtx (TImode); 5842 tmp = gen_reg_rtx (SImode); 5843 5844 switch (mode) 5845 { 5846 case V16QImode: 5847 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3))); 5848 break; 5849 case V8HImode: 5850 emit_insn (gen_addsi3 (tmp, ops[2], ops[2])); 5851 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2))); 5852 break; 5853 case V4SFmode: 5854 case V4SImode: 5855 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2))); 5856 break; 5857 case V2DImode: 5858 case V2DFmode: 5859 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3))); 5860 break; 5861 default: 5862 abort (); 5863 } 5864 emit_insn (gen_rotqby_ti (rot, from, tmp)); 5865 5866 emit_insn (gen_spu_convert (ops[0], rot)); 5867 } 5868 5869 void 5870 spu_builtin_insert (rtx ops[]) 5871 { 5872 enum machine_mode mode = GET_MODE (ops[0]); 5873 enum machine_mode imode = GET_MODE_INNER (mode); 5874 rtx mask = gen_reg_rtx (TImode); 5875 rtx offset; 5876 5877 if (GET_CODE (ops[3]) == CONST_INT) 5878 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode)); 5879 else 5880 { 5881 offset = gen_reg_rtx (SImode); 5882 emit_insn (gen_mulsi3 5883 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode)))); 5884 } 5885 emit_insn (gen_cpat 5886 (mask, stack_pointer_rtx, offset, 5887 GEN_INT (GET_MODE_SIZE (imode)))); 5888 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask)); 5889 } 5890 5891 void 5892 spu_builtin_promote (rtx ops[]) 5893 { 5894 enum machine_mode mode, imode; 5895 rtx rot, from, offset; 5896 HOST_WIDE_INT pos; 5897 5898 mode = GET_MODE (ops[0]); 5899 imode = GET_MODE_INNER (mode); 5900 5901 from = gen_reg_rtx (TImode); 5902 rot = spu_gen_subreg (TImode, ops[0]); 5903 5904 emit_insn (gen_spu_convert (from, ops[1])); 5905 5906 if (GET_CODE (ops[2]) == CONST_INT) 5907 { 5908 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]); 5909 if (GET_MODE_SIZE (imode) < 4) 5910 pos += 4 - GET_MODE_SIZE (imode); 5911 offset = GEN_INT (pos & 15); 5912 } 5913 else 5914 { 5915 offset = gen_reg_rtx (SImode); 5916 switch (mode) 5917 { 5918 case V16QImode: 5919 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2])); 5920 break; 5921 case V8HImode: 5922 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2])); 5923 emit_insn (gen_addsi3 (offset, offset, offset)); 5924 break; 5925 case V4SFmode: 5926 case V4SImode: 5927 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2])); 5928 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2))); 5929 break; 5930 case V2DImode: 5931 case V2DFmode: 5932 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3))); 5933 break; 5934 default: 5935 abort (); 5936 } 5937 } 5938 emit_insn (gen_rotqby_ti (rot, from, offset)); 5939 } 5940 5941 static void 5942 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 5943 { 5944 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); 5945 rtx shuf = gen_reg_rtx (V4SImode); 5946 rtx insn = gen_reg_rtx (V4SImode); 5947 rtx shufc; 5948 rtx insnc; 5949 rtx mem; 5950 5951 fnaddr = force_reg (SImode, fnaddr); 5952 cxt = force_reg (SImode, cxt); 5953 5954 if (TARGET_LARGE_MEM) 5955 { 5956 rtx rotl = gen_reg_rtx (V4SImode); 5957 rtx mask = gen_reg_rtx (V4SImode); 5958 rtx bi = gen_reg_rtx (SImode); 5959 static unsigned char const shufa[16] = { 5960 2, 3, 0, 1, 18, 19, 16, 17, 5961 0, 1, 2, 3, 16, 17, 18, 19 5962 }; 5963 static unsigned char const insna[16] = { 5964 0x41, 0, 0, 79, 5965 0x41, 0, 0, STATIC_CHAIN_REGNUM, 5966 0x60, 0x80, 0, 79, 5967 0x60, 0x80, 0, STATIC_CHAIN_REGNUM 5968 }; 5969 5970 shufc = force_reg (TImode, array_to_constant (TImode, shufa)); 5971 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); 5972 5973 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc)); 5974 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7))); 5975 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7))); 5976 emit_insn (gen_selb (insn, insnc, rotl, mask)); 5977 5978 mem = adjust_address (m_tramp, V4SImode, 0); 5979 emit_move_insn (mem, insn); 5980 5981 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7))); 5982 mem = adjust_address (m_tramp, Pmode, 16); 5983 emit_move_insn (mem, bi); 5984 } 5985 else 5986 { 5987 rtx scxt = gen_reg_rtx (SImode); 5988 rtx sfnaddr = gen_reg_rtx (SImode); 5989 static unsigned char const insna[16] = { 5990 0x42, 0, 0, STATIC_CHAIN_REGNUM, 5991 0x30, 0, 0, 0, 5992 0, 0, 0, 0, 5993 0, 0, 0, 0 5994 }; 5995 5996 shufc = gen_reg_rtx (TImode); 5997 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna)); 5998 5999 /* By or'ing all of cxt with the ila opcode we are assuming cxt 6000 fits 18 bits and the last 4 are zeros. This will be true if 6001 the stack pointer is initialized to 0x3fff0 at program start, 6002 otherwise the ila instruction will be garbage. */ 6003 6004 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7))); 6005 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5))); 6006 emit_insn (gen_cpat 6007 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4))); 6008 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc)); 6009 emit_insn (gen_iorv4si3 (insn, insnc, shuf)); 6010 6011 mem = adjust_address (m_tramp, V4SImode, 0); 6012 emit_move_insn (mem, insn); 6013 } 6014 emit_insn (gen_sync ()); 6015 } 6016 6017 void 6018 spu_expand_sign_extend (rtx ops[]) 6019 { 6020 unsigned char arr[16]; 6021 rtx pat = gen_reg_rtx (TImode); 6022 rtx sign, c; 6023 int i, last; 6024 last = GET_MODE (ops[0]) == DImode ? 7 : 15; 6025 if (GET_MODE (ops[1]) == QImode) 6026 { 6027 sign = gen_reg_rtx (HImode); 6028 emit_insn (gen_extendqihi2 (sign, ops[1])); 6029 for (i = 0; i < 16; i++) 6030 arr[i] = 0x12; 6031 arr[last] = 0x13; 6032 } 6033 else 6034 { 6035 for (i = 0; i < 16; i++) 6036 arr[i] = 0x10; 6037 switch (GET_MODE (ops[1])) 6038 { 6039 case HImode: 6040 sign = gen_reg_rtx (SImode); 6041 emit_insn (gen_extendhisi2 (sign, ops[1])); 6042 arr[last] = 0x03; 6043 arr[last - 1] = 0x02; 6044 break; 6045 case SImode: 6046 sign = gen_reg_rtx (SImode); 6047 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31))); 6048 for (i = 0; i < 4; i++) 6049 arr[last - i] = 3 - i; 6050 break; 6051 case DImode: 6052 sign = gen_reg_rtx (SImode); 6053 c = gen_reg_rtx (SImode); 6054 emit_insn (gen_spu_convert (c, ops[1])); 6055 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31))); 6056 for (i = 0; i < 8; i++) 6057 arr[last - i] = 7 - i; 6058 break; 6059 default: 6060 abort (); 6061 } 6062 } 6063 emit_move_insn (pat, array_to_constant (TImode, arr)); 6064 emit_insn (gen_shufb (ops[0], ops[1], sign, pat)); 6065 } 6066 6067 /* expand vector initialization. If there are any constant parts, 6068 load constant parts first. Then load any non-constant parts. */ 6069 void 6070 spu_expand_vector_init (rtx target, rtx vals) 6071 { 6072 enum machine_mode mode = GET_MODE (target); 6073 int n_elts = GET_MODE_NUNITS (mode); 6074 int n_var = 0; 6075 bool all_same = true; 6076 rtx first, x = NULL_RTX, first_constant = NULL_RTX; 6077 int i; 6078 6079 first = XVECEXP (vals, 0, 0); 6080 for (i = 0; i < n_elts; ++i) 6081 { 6082 x = XVECEXP (vals, 0, i); 6083 if (!(CONST_INT_P (x) 6084 || GET_CODE (x) == CONST_DOUBLE 6085 || GET_CODE (x) == CONST_FIXED)) 6086 ++n_var; 6087 else 6088 { 6089 if (first_constant == NULL_RTX) 6090 first_constant = x; 6091 } 6092 if (i > 0 && !rtx_equal_p (x, first)) 6093 all_same = false; 6094 } 6095 6096 /* if all elements are the same, use splats to repeat elements */ 6097 if (all_same) 6098 { 6099 if (!CONSTANT_P (first) 6100 && !register_operand (first, GET_MODE (x))) 6101 first = force_reg (GET_MODE (first), first); 6102 emit_insn (gen_spu_splats (target, first)); 6103 return; 6104 } 6105 6106 /* load constant parts */ 6107 if (n_var != n_elts) 6108 { 6109 if (n_var == 0) 6110 { 6111 emit_move_insn (target, 6112 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 6113 } 6114 else 6115 { 6116 rtx constant_parts_rtx = copy_rtx (vals); 6117 6118 gcc_assert (first_constant != NULL_RTX); 6119 /* fill empty slots with the first constant, this increases 6120 our chance of using splats in the recursive call below. */ 6121 for (i = 0; i < n_elts; ++i) 6122 { 6123 x = XVECEXP (constant_parts_rtx, 0, i); 6124 if (!(CONST_INT_P (x) 6125 || GET_CODE (x) == CONST_DOUBLE 6126 || GET_CODE (x) == CONST_FIXED)) 6127 XVECEXP (constant_parts_rtx, 0, i) = first_constant; 6128 } 6129 6130 spu_expand_vector_init (target, constant_parts_rtx); 6131 } 6132 } 6133 6134 /* load variable parts */ 6135 if (n_var != 0) 6136 { 6137 rtx insert_operands[4]; 6138 6139 insert_operands[0] = target; 6140 insert_operands[2] = target; 6141 for (i = 0; i < n_elts; ++i) 6142 { 6143 x = XVECEXP (vals, 0, i); 6144 if (!(CONST_INT_P (x) 6145 || GET_CODE (x) == CONST_DOUBLE 6146 || GET_CODE (x) == CONST_FIXED)) 6147 { 6148 if (!register_operand (x, GET_MODE (x))) 6149 x = force_reg (GET_MODE (x), x); 6150 insert_operands[1] = x; 6151 insert_operands[3] = GEN_INT (i); 6152 spu_builtin_insert (insert_operands); 6153 } 6154 } 6155 } 6156 } 6157 6158 /* Return insn index for the vector compare instruction for given CODE, 6159 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */ 6160 6161 static int 6162 get_vec_cmp_insn (enum rtx_code code, 6163 enum machine_mode dest_mode, 6164 enum machine_mode op_mode) 6165 6166 { 6167 switch (code) 6168 { 6169 case EQ: 6170 if (dest_mode == V16QImode && op_mode == V16QImode) 6171 return CODE_FOR_ceq_v16qi; 6172 if (dest_mode == V8HImode && op_mode == V8HImode) 6173 return CODE_FOR_ceq_v8hi; 6174 if (dest_mode == V4SImode && op_mode == V4SImode) 6175 return CODE_FOR_ceq_v4si; 6176 if (dest_mode == V4SImode && op_mode == V4SFmode) 6177 return CODE_FOR_ceq_v4sf; 6178 if (dest_mode == V2DImode && op_mode == V2DFmode) 6179 return CODE_FOR_ceq_v2df; 6180 break; 6181 case GT: 6182 if (dest_mode == V16QImode && op_mode == V16QImode) 6183 return CODE_FOR_cgt_v16qi; 6184 if (dest_mode == V8HImode && op_mode == V8HImode) 6185 return CODE_FOR_cgt_v8hi; 6186 if (dest_mode == V4SImode && op_mode == V4SImode) 6187 return CODE_FOR_cgt_v4si; 6188 if (dest_mode == V4SImode && op_mode == V4SFmode) 6189 return CODE_FOR_cgt_v4sf; 6190 if (dest_mode == V2DImode && op_mode == V2DFmode) 6191 return CODE_FOR_cgt_v2df; 6192 break; 6193 case GTU: 6194 if (dest_mode == V16QImode && op_mode == V16QImode) 6195 return CODE_FOR_clgt_v16qi; 6196 if (dest_mode == V8HImode && op_mode == V8HImode) 6197 return CODE_FOR_clgt_v8hi; 6198 if (dest_mode == V4SImode && op_mode == V4SImode) 6199 return CODE_FOR_clgt_v4si; 6200 break; 6201 default: 6202 break; 6203 } 6204 return -1; 6205 } 6206 6207 /* Emit vector compare for operands OP0 and OP1 using code RCODE. 6208 DMODE is expected destination mode. This is a recursive function. */ 6209 6210 static rtx 6211 spu_emit_vector_compare (enum rtx_code rcode, 6212 rtx op0, rtx op1, 6213 enum machine_mode dmode) 6214 { 6215 int vec_cmp_insn; 6216 rtx mask; 6217 enum machine_mode dest_mode; 6218 enum machine_mode op_mode = GET_MODE (op1); 6219 6220 gcc_assert (GET_MODE (op0) == GET_MODE (op1)); 6221 6222 /* Floating point vector compare instructions uses destination V4SImode. 6223 Double floating point vector compare instructions uses destination V2DImode. 6224 Move destination to appropriate mode later. */ 6225 if (dmode == V4SFmode) 6226 dest_mode = V4SImode; 6227 else if (dmode == V2DFmode) 6228 dest_mode = V2DImode; 6229 else 6230 dest_mode = dmode; 6231 6232 mask = gen_reg_rtx (dest_mode); 6233 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); 6234 6235 if (vec_cmp_insn == -1) 6236 { 6237 bool swap_operands = false; 6238 bool try_again = false; 6239 switch (rcode) 6240 { 6241 case LT: 6242 rcode = GT; 6243 swap_operands = true; 6244 try_again = true; 6245 break; 6246 case LTU: 6247 rcode = GTU; 6248 swap_operands = true; 6249 try_again = true; 6250 break; 6251 case NE: 6252 /* Treat A != B as ~(A==B). */ 6253 { 6254 enum insn_code nor_code; 6255 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); 6256 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code; 6257 gcc_assert (nor_code != CODE_FOR_nothing); 6258 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx)); 6259 if (dmode != dest_mode) 6260 { 6261 rtx temp = gen_reg_rtx (dest_mode); 6262 convert_move (temp, mask, 0); 6263 return temp; 6264 } 6265 return mask; 6266 } 6267 break; 6268 case GE: 6269 case GEU: 6270 case LE: 6271 case LEU: 6272 /* Try GT/GTU/LT/LTU OR EQ */ 6273 { 6274 rtx c_rtx, eq_rtx; 6275 enum insn_code ior_code; 6276 enum rtx_code new_code; 6277 6278 switch (rcode) 6279 { 6280 case GE: new_code = GT; break; 6281 case GEU: new_code = GTU; break; 6282 case LE: new_code = LT; break; 6283 case LEU: new_code = LTU; break; 6284 default: 6285 gcc_unreachable (); 6286 } 6287 6288 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode); 6289 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode); 6290 6291 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code; 6292 gcc_assert (ior_code != CODE_FOR_nothing); 6293 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); 6294 if (dmode != dest_mode) 6295 { 6296 rtx temp = gen_reg_rtx (dest_mode); 6297 convert_move (temp, mask, 0); 6298 return temp; 6299 } 6300 return mask; 6301 } 6302 break; 6303 default: 6304 gcc_unreachable (); 6305 } 6306 6307 /* You only get two chances. */ 6308 if (try_again) 6309 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode); 6310 6311 gcc_assert (vec_cmp_insn != -1); 6312 6313 if (swap_operands) 6314 { 6315 rtx tmp; 6316 tmp = op0; 6317 op0 = op1; 6318 op1 = tmp; 6319 } 6320 } 6321 6322 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1)); 6323 if (dmode != dest_mode) 6324 { 6325 rtx temp = gen_reg_rtx (dest_mode); 6326 convert_move (temp, mask, 0); 6327 return temp; 6328 } 6329 return mask; 6330 } 6331 6332 6333 /* Emit vector conditional expression. 6334 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. 6335 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ 6336 6337 int 6338 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, 6339 rtx cond, rtx cc_op0, rtx cc_op1) 6340 { 6341 enum machine_mode dest_mode = GET_MODE (dest); 6342 enum rtx_code rcode = GET_CODE (cond); 6343 rtx mask; 6344 6345 /* Get the vector mask for the given relational operations. */ 6346 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode); 6347 6348 emit_insn(gen_selb (dest, op2, op1, mask)); 6349 6350 return 1; 6351 } 6352 6353 static rtx 6354 spu_force_reg (enum machine_mode mode, rtx op) 6355 { 6356 rtx x, r; 6357 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode) 6358 { 6359 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT) 6360 || GET_MODE (op) == BLKmode) 6361 return force_reg (mode, convert_to_mode (mode, op, 0)); 6362 abort (); 6363 } 6364 6365 r = force_reg (GET_MODE (op), op); 6366 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode)) 6367 { 6368 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0); 6369 if (x) 6370 return x; 6371 } 6372 6373 x = gen_reg_rtx (mode); 6374 emit_insn (gen_spu_convert (x, r)); 6375 return x; 6376 } 6377 6378 static void 6379 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p) 6380 { 6381 HOST_WIDE_INT v = 0; 6382 int lsbits; 6383 /* Check the range of immediate operands. */ 6384 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18) 6385 { 6386 int range = p - SPU_BTI_7; 6387 6388 if (!CONSTANT_P (op)) 6389 error ("%s expects an integer literal in the range [%d, %d].", 6390 d->name, 6391 spu_builtin_range[range].low, spu_builtin_range[range].high); 6392 6393 if (GET_CODE (op) == CONST 6394 && (GET_CODE (XEXP (op, 0)) == PLUS 6395 || GET_CODE (XEXP (op, 0)) == MINUS)) 6396 { 6397 v = INTVAL (XEXP (XEXP (op, 0), 1)); 6398 op = XEXP (XEXP (op, 0), 0); 6399 } 6400 else if (GET_CODE (op) == CONST_INT) 6401 v = INTVAL (op); 6402 else if (GET_CODE (op) == CONST_VECTOR 6403 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT) 6404 v = INTVAL (CONST_VECTOR_ELT (op, 0)); 6405 6406 /* The default for v is 0 which is valid in every range. */ 6407 if (v < spu_builtin_range[range].low 6408 || v > spu_builtin_range[range].high) 6409 error ("%s expects an integer literal in the range [%d, %d]. (" 6410 HOST_WIDE_INT_PRINT_DEC ")", 6411 d->name, 6412 spu_builtin_range[range].low, spu_builtin_range[range].high, 6413 v); 6414 6415 switch (p) 6416 { 6417 case SPU_BTI_S10_4: 6418 lsbits = 4; 6419 break; 6420 case SPU_BTI_U16_2: 6421 /* This is only used in lqa, and stqa. Even though the insns 6422 encode 16 bits of the address (all but the 2 least 6423 significant), only 14 bits are used because it is masked to 6424 be 16 byte aligned. */ 6425 lsbits = 4; 6426 break; 6427 case SPU_BTI_S16_2: 6428 /* This is used for lqr and stqr. */ 6429 lsbits = 2; 6430 break; 6431 default: 6432 lsbits = 0; 6433 } 6434 6435 if (GET_CODE (op) == LABEL_REF 6436 || (GET_CODE (op) == SYMBOL_REF 6437 && SYMBOL_REF_FUNCTION_P (op)) 6438 || (v & ((1 << lsbits) - 1)) != 0) 6439 warning (0, "%d least significant bits of %s are ignored.", lsbits, 6440 d->name); 6441 } 6442 } 6443 6444 6445 static int 6446 expand_builtin_args (struct spu_builtin_description *d, tree exp, 6447 rtx target, rtx ops[]) 6448 { 6449 enum insn_code icode = (enum insn_code) d->icode; 6450 int i = 0, a; 6451 6452 /* Expand the arguments into rtl. */ 6453 6454 if (d->parm[0] != SPU_BTI_VOID) 6455 ops[i++] = target; 6456 6457 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++) 6458 { 6459 tree arg = CALL_EXPR_ARG (exp, a); 6460 if (arg == 0) 6461 abort (); 6462 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL); 6463 } 6464 6465 /* The insn pattern may have additional operands (SCRATCH). 6466 Return the number of actual non-SCRATCH operands. */ 6467 gcc_assert (i <= insn_data[icode].n_operands); 6468 return i; 6469 } 6470 6471 static rtx 6472 spu_expand_builtin_1 (struct spu_builtin_description *d, 6473 tree exp, rtx target) 6474 { 6475 rtx pat; 6476 rtx ops[8]; 6477 enum insn_code icode = (enum insn_code) d->icode; 6478 enum machine_mode mode, tmode; 6479 int i, p; 6480 int n_operands; 6481 tree return_type; 6482 6483 /* Set up ops[] with values from arglist. */ 6484 n_operands = expand_builtin_args (d, exp, target, ops); 6485 6486 /* Handle the target operand which must be operand 0. */ 6487 i = 0; 6488 if (d->parm[0] != SPU_BTI_VOID) 6489 { 6490 6491 /* We prefer the mode specified for the match_operand otherwise 6492 use the mode from the builtin function prototype. */ 6493 tmode = insn_data[d->icode].operand[0].mode; 6494 if (tmode == VOIDmode) 6495 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]); 6496 6497 /* Try to use target because not using it can lead to extra copies 6498 and when we are using all of the registers extra copies leads 6499 to extra spills. */ 6500 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode) 6501 ops[0] = target; 6502 else 6503 target = ops[0] = gen_reg_rtx (tmode); 6504 6505 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode)) 6506 abort (); 6507 6508 i++; 6509 } 6510 6511 if (d->fcode == SPU_MASK_FOR_LOAD) 6512 { 6513 enum machine_mode mode = insn_data[icode].operand[1].mode; 6514 tree arg; 6515 rtx addr, op, pat; 6516 6517 /* get addr */ 6518 arg = CALL_EXPR_ARG (exp, 0); 6519 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE); 6520 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); 6521 addr = memory_address (mode, op); 6522 6523 /* negate addr */ 6524 op = gen_reg_rtx (GET_MODE (addr)); 6525 emit_insn (gen_rtx_SET (VOIDmode, op, 6526 gen_rtx_NEG (GET_MODE (addr), addr))); 6527 op = gen_rtx_MEM (mode, op); 6528 6529 pat = GEN_FCN (icode) (target, op); 6530 if (!pat) 6531 return 0; 6532 emit_insn (pat); 6533 return target; 6534 } 6535 6536 /* Ignore align_hint, but still expand it's args in case they have 6537 side effects. */ 6538 if (icode == CODE_FOR_spu_align_hint) 6539 return 0; 6540 6541 /* Handle the rest of the operands. */ 6542 for (p = 1; i < n_operands; i++, p++) 6543 { 6544 if (insn_data[d->icode].operand[i].mode != VOIDmode) 6545 mode = insn_data[d->icode].operand[i].mode; 6546 else 6547 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]); 6548 6549 /* mode can be VOIDmode here for labels */ 6550 6551 /* For specific intrinsics with an immediate operand, e.g., 6552 si_ai(), we sometimes need to convert the scalar argument to a 6553 vector argument by splatting the scalar. */ 6554 if (VECTOR_MODE_P (mode) 6555 && (GET_CODE (ops[i]) == CONST_INT 6556 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT 6557 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)) 6558 { 6559 if (GET_CODE (ops[i]) == CONST_INT) 6560 ops[i] = spu_const (mode, INTVAL (ops[i])); 6561 else 6562 { 6563 rtx reg = gen_reg_rtx (mode); 6564 enum machine_mode imode = GET_MODE_INNER (mode); 6565 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i]))) 6566 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]); 6567 if (imode != GET_MODE (ops[i])) 6568 ops[i] = convert_to_mode (imode, ops[i], 6569 TYPE_UNSIGNED (spu_builtin_types 6570 [d->parm[i]])); 6571 emit_insn (gen_spu_splats (reg, ops[i])); 6572 ops[i] = reg; 6573 } 6574 } 6575 6576 spu_check_builtin_parm (d, ops[i], d->parm[p]); 6577 6578 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode)) 6579 ops[i] = spu_force_reg (mode, ops[i]); 6580 } 6581 6582 switch (n_operands) 6583 { 6584 case 0: 6585 pat = GEN_FCN (icode) (0); 6586 break; 6587 case 1: 6588 pat = GEN_FCN (icode) (ops[0]); 6589 break; 6590 case 2: 6591 pat = GEN_FCN (icode) (ops[0], ops[1]); 6592 break; 6593 case 3: 6594 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]); 6595 break; 6596 case 4: 6597 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]); 6598 break; 6599 case 5: 6600 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]); 6601 break; 6602 case 6: 6603 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]); 6604 break; 6605 default: 6606 abort (); 6607 } 6608 6609 if (!pat) 6610 abort (); 6611 6612 if (d->type == B_CALL || d->type == B_BISLED) 6613 emit_call_insn (pat); 6614 else if (d->type == B_JUMP) 6615 { 6616 emit_jump_insn (pat); 6617 emit_barrier (); 6618 } 6619 else 6620 emit_insn (pat); 6621 6622 return_type = spu_builtin_types[d->parm[0]]; 6623 if (d->parm[0] != SPU_BTI_VOID 6624 && GET_MODE (target) != TYPE_MODE (return_type)) 6625 { 6626 /* target is the return value. It should always be the mode of 6627 the builtin function prototype. */ 6628 target = spu_force_reg (TYPE_MODE (return_type), target); 6629 } 6630 6631 return target; 6632 } 6633 6634 rtx 6635 spu_expand_builtin (tree exp, 6636 rtx target, 6637 rtx subtarget ATTRIBUTE_UNUSED, 6638 enum machine_mode mode ATTRIBUTE_UNUSED, 6639 int ignore ATTRIBUTE_UNUSED) 6640 { 6641 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 6642 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS; 6643 struct spu_builtin_description *d; 6644 6645 if (fcode < NUM_SPU_BUILTINS) 6646 { 6647 d = &spu_builtins[fcode]; 6648 6649 return spu_expand_builtin_1 (d, exp, target); 6650 } 6651 abort (); 6652 } 6653 6654 /* Implement targetm.vectorize.builtin_mul_widen_even. */ 6655 static tree 6656 spu_builtin_mul_widen_even (tree type) 6657 { 6658 switch (TYPE_MODE (type)) 6659 { 6660 case V8HImode: 6661 if (TYPE_UNSIGNED (type)) 6662 return spu_builtins[SPU_MULE_0].fndecl; 6663 else 6664 return spu_builtins[SPU_MULE_1].fndecl; 6665 break; 6666 default: 6667 return NULL_TREE; 6668 } 6669 } 6670 6671 /* Implement targetm.vectorize.builtin_mul_widen_odd. */ 6672 static tree 6673 spu_builtin_mul_widen_odd (tree type) 6674 { 6675 switch (TYPE_MODE (type)) 6676 { 6677 case V8HImode: 6678 if (TYPE_UNSIGNED (type)) 6679 return spu_builtins[SPU_MULO_1].fndecl; 6680 else 6681 return spu_builtins[SPU_MULO_0].fndecl; 6682 break; 6683 default: 6684 return NULL_TREE; 6685 } 6686 } 6687 6688 /* Implement targetm.vectorize.builtin_mask_for_load. */ 6689 static tree 6690 spu_builtin_mask_for_load (void) 6691 { 6692 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD]; 6693 gcc_assert (d); 6694 return d->fndecl; 6695 } 6696 6697 /* Implement targetm.vectorize.builtin_vectorization_cost. */ 6698 static int 6699 spu_builtin_vectorization_cost (bool runtime_test) 6700 { 6701 /* If the branch of the runtime test is taken - i.e. - the vectorized 6702 version is skipped - this incurs a misprediction cost (because the 6703 vectorized version is expected to be the fall-through). So we subtract 6704 the latency of a mispredicted branch from the costs that are incurred 6705 when the vectorized version is executed. */ 6706 if (runtime_test) 6707 return -19; 6708 else 6709 return 0; 6710 } 6711 6712 /* Return true iff, data reference of TYPE can reach vector alignment (16) 6713 after applying N number of iterations. This routine does not determine 6714 how may iterations are required to reach desired alignment. */ 6715 6716 static bool 6717 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) 6718 { 6719 if (is_packed) 6720 return false; 6721 6722 /* All other types are naturally aligned. */ 6723 return true; 6724 } 6725 6726 /* Implement targetm.vectorize.builtin_vec_perm. */ 6727 tree 6728 spu_builtin_vec_perm (tree type, tree *mask_element_type) 6729 { 6730 struct spu_builtin_description *d; 6731 6732 *mask_element_type = unsigned_char_type_node; 6733 6734 switch (TYPE_MODE (type)) 6735 { 6736 case V16QImode: 6737 if (TYPE_UNSIGNED (type)) 6738 d = &spu_builtins[SPU_SHUFFLE_0]; 6739 else 6740 d = &spu_builtins[SPU_SHUFFLE_1]; 6741 break; 6742 6743 case V8HImode: 6744 if (TYPE_UNSIGNED (type)) 6745 d = &spu_builtins[SPU_SHUFFLE_2]; 6746 else 6747 d = &spu_builtins[SPU_SHUFFLE_3]; 6748 break; 6749 6750 case V4SImode: 6751 if (TYPE_UNSIGNED (type)) 6752 d = &spu_builtins[SPU_SHUFFLE_4]; 6753 else 6754 d = &spu_builtins[SPU_SHUFFLE_5]; 6755 break; 6756 6757 case V2DImode: 6758 if (TYPE_UNSIGNED (type)) 6759 d = &spu_builtins[SPU_SHUFFLE_6]; 6760 else 6761 d = &spu_builtins[SPU_SHUFFLE_7]; 6762 break; 6763 6764 case V4SFmode: 6765 d = &spu_builtins[SPU_SHUFFLE_8]; 6766 break; 6767 6768 case V2DFmode: 6769 d = &spu_builtins[SPU_SHUFFLE_9]; 6770 break; 6771 6772 default: 6773 return NULL_TREE; 6774 } 6775 6776 gcc_assert (d); 6777 return d->fndecl; 6778 } 6779 6780 /* Return the appropriate mode for a named address pointer. */ 6781 static enum machine_mode 6782 spu_addr_space_pointer_mode (addr_space_t addrspace) 6783 { 6784 switch (addrspace) 6785 { 6786 case ADDR_SPACE_GENERIC: 6787 return ptr_mode; 6788 case ADDR_SPACE_EA: 6789 return EAmode; 6790 default: 6791 gcc_unreachable (); 6792 } 6793 } 6794 6795 /* Return the appropriate mode for a named address address. */ 6796 static enum machine_mode 6797 spu_addr_space_address_mode (addr_space_t addrspace) 6798 { 6799 switch (addrspace) 6800 { 6801 case ADDR_SPACE_GENERIC: 6802 return Pmode; 6803 case ADDR_SPACE_EA: 6804 return EAmode; 6805 default: 6806 gcc_unreachable (); 6807 } 6808 } 6809 6810 /* Determine if one named address space is a subset of another. */ 6811 6812 static bool 6813 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset) 6814 { 6815 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA); 6816 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA); 6817 6818 if (subset == superset) 6819 return true; 6820 6821 /* If we have -mno-address-space-conversion, treat __ea and generic as not 6822 being subsets but instead as disjoint address spaces. */ 6823 else if (!TARGET_ADDRESS_SPACE_CONVERSION) 6824 return false; 6825 6826 else 6827 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA); 6828 } 6829 6830 /* Convert from one address space to another. */ 6831 static rtx 6832 spu_addr_space_convert (rtx op, tree from_type, tree to_type) 6833 { 6834 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type)); 6835 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type)); 6836 6837 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA); 6838 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA); 6839 6840 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA) 6841 { 6842 rtx result, ls; 6843 6844 ls = gen_const_mem (DImode, 6845 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store")); 6846 set_mem_align (ls, 128); 6847 6848 result = gen_reg_rtx (Pmode); 6849 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1)); 6850 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1)); 6851 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode, 6852 ls, const0_rtx, Pmode, 1); 6853 6854 emit_insn (gen_subsi3 (result, op, ls)); 6855 6856 return result; 6857 } 6858 6859 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC) 6860 { 6861 rtx result, ls; 6862 6863 ls = gen_const_mem (DImode, 6864 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store")); 6865 set_mem_align (ls, 128); 6866 6867 result = gen_reg_rtx (EAmode); 6868 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1)); 6869 op = force_reg (Pmode, op); 6870 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode, 6871 ls, const0_rtx, EAmode, 1); 6872 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1)); 6873 6874 if (EAmode == SImode) 6875 emit_insn (gen_addsi3 (result, op, ls)); 6876 else 6877 emit_insn (gen_adddi3 (result, op, ls)); 6878 6879 return result; 6880 } 6881 6882 else 6883 gcc_unreachable (); 6884 } 6885 6886 6887 /* Count the total number of instructions in each pipe and return the 6888 maximum, which is used as the Minimum Iteration Interval (MII) 6889 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1. 6890 -2 are instructions that can go in pipe0 or pipe1. */ 6891 static int 6892 spu_sms_res_mii (struct ddg *g) 6893 { 6894 int i; 6895 unsigned t[4] = {0, 0, 0, 0}; 6896 6897 for (i = 0; i < g->num_nodes; i++) 6898 { 6899 rtx insn = g->nodes[i].insn; 6900 int p = get_pipe (insn) + 2; 6901 6902 assert (p >= 0); 6903 assert (p < 4); 6904 6905 t[p]++; 6906 if (dump_file && INSN_P (insn)) 6907 fprintf (dump_file, "i%d %s %d %d\n", 6908 INSN_UID (insn), 6909 insn_data[INSN_CODE(insn)].name, 6910 p, t[p]); 6911 } 6912 if (dump_file) 6913 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]); 6914 6915 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3])); 6916 } 6917 6918 6919 void 6920 spu_init_expanders (void) 6921 { 6922 if (cfun) 6923 { 6924 rtx r0, r1; 6925 /* HARD_FRAME_REGISTER is only 128 bit aligned when 6926 frame_pointer_needed is true. We don't know that until we're 6927 expanding the prologue. */ 6928 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; 6929 6930 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and 6931 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them 6932 to be treated as aligned, so generate them here. */ 6933 r0 = gen_reg_rtx (SImode); 6934 r1 = gen_reg_rtx (SImode); 6935 mark_reg_pointer (r0, 128); 6936 mark_reg_pointer (r1, 128); 6937 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1 6938 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2); 6939 } 6940 } 6941 6942 static enum machine_mode 6943 spu_libgcc_cmp_return_mode (void) 6944 { 6945 6946 /* For SPU word mode is TI mode so it is better to use SImode 6947 for compare returns. */ 6948 return SImode; 6949 } 6950 6951 static enum machine_mode 6952 spu_libgcc_shift_count_mode (void) 6953 { 6954 /* For SPU word mode is TI mode so it is better to use SImode 6955 for shift counts. */ 6956 return SImode; 6957 } 6958 6959 /* An early place to adjust some flags after GCC has finished processing 6960 * them. */ 6961 static void 6962 asm_file_start (void) 6963 { 6964 /* Variable tracking should be run after all optimizations which 6965 change order of insns. It also needs a valid CFG. */ 6966 spu_flag_var_tracking = flag_var_tracking; 6967 flag_var_tracking = 0; 6968 6969 default_file_start (); 6970 } 6971 6972 /* Implement targetm.section_type_flags. */ 6973 static unsigned int 6974 spu_section_type_flags (tree decl, const char *name, int reloc) 6975 { 6976 /* .toe needs to have type @nobits. */ 6977 if (strcmp (name, ".toe") == 0) 6978 return SECTION_BSS; 6979 /* Don't load _ea into the current address space. */ 6980 if (strcmp (name, "._ea") == 0) 6981 return SECTION_WRITE | SECTION_DEBUG; 6982 return default_section_type_flags (decl, name, reloc); 6983 } 6984 6985 /* Implement targetm.select_section. */ 6986 static section * 6987 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) 6988 { 6989 /* Variables and constants defined in the __ea address space 6990 go into a special section named "._ea". */ 6991 if (TREE_TYPE (decl) != error_mark_node 6992 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA) 6993 { 6994 /* We might get called with string constants, but get_named_section 6995 doesn't like them as they are not DECLs. Also, we need to set 6996 flags in that case. */ 6997 if (!DECL_P (decl)) 6998 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL); 6999 7000 return get_named_section (decl, "._ea", reloc); 7001 } 7002 7003 return default_elf_select_section (decl, reloc, align); 7004 } 7005 7006 /* Implement targetm.unique_section. */ 7007 static void 7008 spu_unique_section (tree decl, int reloc) 7009 { 7010 /* We don't support unique section names in the __ea address 7011 space for now. */ 7012 if (TREE_TYPE (decl) != error_mark_node 7013 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0) 7014 return; 7015 7016 default_unique_section (decl, reloc); 7017 } 7018 7019 /* Generate a constant or register which contains 2^SCALE. We assume 7020 the result is valid for MODE. Currently, MODE must be V4SFmode and 7021 SCALE must be SImode. */ 7022 rtx 7023 spu_gen_exp2 (enum machine_mode mode, rtx scale) 7024 { 7025 gcc_assert (mode == V4SFmode); 7026 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT); 7027 if (GET_CODE (scale) != CONST_INT) 7028 { 7029 /* unsigned int exp = (127 + scale) << 23; 7030 __vector float m = (__vector float) spu_splats (exp); */ 7031 rtx reg = force_reg (SImode, scale); 7032 rtx exp = gen_reg_rtx (SImode); 7033 rtx mul = gen_reg_rtx (mode); 7034 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127))); 7035 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23))); 7036 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0))); 7037 return mul; 7038 } 7039 else 7040 { 7041 HOST_WIDE_INT exp = 127 + INTVAL (scale); 7042 unsigned char arr[16]; 7043 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1; 7044 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7; 7045 arr[2] = arr[6] = arr[10] = arr[14] = 0; 7046 arr[3] = arr[7] = arr[11] = arr[15] = 0; 7047 return array_to_constant (mode, arr); 7048 } 7049 } 7050 7051 /* After reload, just change the convert into a move instruction 7052 or a dead instruction. */ 7053 void 7054 spu_split_convert (rtx ops[]) 7055 { 7056 if (REGNO (ops[0]) == REGNO (ops[1])) 7057 emit_note (NOTE_INSN_DELETED); 7058 else 7059 { 7060 /* Use TImode always as this might help hard reg copyprop. */ 7061 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0])); 7062 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1])); 7063 emit_insn (gen_move_insn (op0, op1)); 7064 } 7065 } 7066 7067 void 7068 spu_function_profiler (FILE * file, int labelno) 7069 { 7070 fprintf (file, "# profile\n"); 7071 fprintf (file, "brsl $75, _mcount\n"); 7072 } 7073 7074 #include "gt-spu.h" 7075