1 /* Subroutines for insn-output.c for SPARC. 2 Copyright (C) 1987-2013 Free Software Foundation, Inc. 3 Contributed by Michael Tiemann (tiemann@cygnus.com) 4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, 5 at Cygnus Support. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "tree.h" 28 #include "rtl.h" 29 #include "regs.h" 30 #include "hard-reg-set.h" 31 #include "insn-config.h" 32 #include "insn-codes.h" 33 #include "conditions.h" 34 #include "output.h" 35 #include "insn-attr.h" 36 #include "flags.h" 37 #include "function.h" 38 #include "except.h" 39 #include "expr.h" 40 #include "optabs.h" 41 #include "recog.h" 42 #include "diagnostic-core.h" 43 #include "ggc.h" 44 #include "tm_p.h" 45 #include "debug.h" 46 #include "target.h" 47 #include "target-def.h" 48 #include "common/common-target.h" 49 #include "gimple.h" 50 #include "langhooks.h" 51 #include "reload.h" 52 #include "params.h" 53 #include "df.h" 54 #include "opts.h" 55 #include "tree-pass.h" 56 57 /* Processor costs */ 58 59 struct processor_costs { 60 /* Integer load */ 61 const int int_load; 62 63 /* Integer signed load */ 64 const int int_sload; 65 66 /* Integer zeroed load */ 67 const int int_zload; 68 69 /* Float load */ 70 const int float_load; 71 72 /* fmov, fneg, fabs */ 73 const int float_move; 74 75 /* fadd, fsub */ 76 const int float_plusminus; 77 78 /* fcmp */ 79 const int float_cmp; 80 81 /* fmov, fmovr */ 82 const int float_cmove; 83 84 /* fmul */ 85 const int float_mul; 86 87 /* fdivs */ 88 const int float_div_sf; 89 90 /* fdivd */ 91 const int float_div_df; 92 93 /* fsqrts */ 94 const int float_sqrt_sf; 95 96 /* fsqrtd */ 97 const int float_sqrt_df; 98 99 /* umul/smul */ 100 const int int_mul; 101 102 /* mulX */ 103 const int int_mulX; 104 105 /* integer multiply cost for each bit set past the most 106 significant 3, so the formula for multiply cost becomes: 107 108 if (rs1 < 0) 109 highest_bit = highest_clear_bit(rs1); 110 else 111 highest_bit = highest_set_bit(rs1); 112 if (highest_bit < 3) 113 highest_bit = 3; 114 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); 115 116 A value of zero indicates that the multiply costs is fixed, 117 and not variable. */ 118 const int int_mul_bit_factor; 119 120 /* udiv/sdiv */ 121 const int int_div; 122 123 /* divX */ 124 const int int_divX; 125 126 /* movcc, movr */ 127 const int int_cmove; 128 129 /* penalty for shifts, due to scheduling rules etc. */ 130 const int shift_penalty; 131 }; 132 133 static const 134 struct processor_costs cypress_costs = { 135 COSTS_N_INSNS (2), /* int load */ 136 COSTS_N_INSNS (2), /* int signed load */ 137 COSTS_N_INSNS (2), /* int zeroed load */ 138 COSTS_N_INSNS (2), /* float load */ 139 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ 140 COSTS_N_INSNS (5), /* fadd, fsub */ 141 COSTS_N_INSNS (1), /* fcmp */ 142 COSTS_N_INSNS (1), /* fmov, fmovr */ 143 COSTS_N_INSNS (7), /* fmul */ 144 COSTS_N_INSNS (37), /* fdivs */ 145 COSTS_N_INSNS (37), /* fdivd */ 146 COSTS_N_INSNS (63), /* fsqrts */ 147 COSTS_N_INSNS (63), /* fsqrtd */ 148 COSTS_N_INSNS (1), /* imul */ 149 COSTS_N_INSNS (1), /* imulX */ 150 0, /* imul bit factor */ 151 COSTS_N_INSNS (1), /* idiv */ 152 COSTS_N_INSNS (1), /* idivX */ 153 COSTS_N_INSNS (1), /* movcc/movr */ 154 0, /* shift penalty */ 155 }; 156 157 static const 158 struct processor_costs supersparc_costs = { 159 COSTS_N_INSNS (1), /* int load */ 160 COSTS_N_INSNS (1), /* int signed load */ 161 COSTS_N_INSNS (1), /* int zeroed load */ 162 COSTS_N_INSNS (0), /* float load */ 163 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 164 COSTS_N_INSNS (3), /* fadd, fsub */ 165 COSTS_N_INSNS (3), /* fcmp */ 166 COSTS_N_INSNS (1), /* fmov, fmovr */ 167 COSTS_N_INSNS (3), /* fmul */ 168 COSTS_N_INSNS (6), /* fdivs */ 169 COSTS_N_INSNS (9), /* fdivd */ 170 COSTS_N_INSNS (12), /* fsqrts */ 171 COSTS_N_INSNS (12), /* fsqrtd */ 172 COSTS_N_INSNS (4), /* imul */ 173 COSTS_N_INSNS (4), /* imulX */ 174 0, /* imul bit factor */ 175 COSTS_N_INSNS (4), /* idiv */ 176 COSTS_N_INSNS (4), /* idivX */ 177 COSTS_N_INSNS (1), /* movcc/movr */ 178 1, /* shift penalty */ 179 }; 180 181 static const 182 struct processor_costs hypersparc_costs = { 183 COSTS_N_INSNS (1), /* int load */ 184 COSTS_N_INSNS (1), /* int signed load */ 185 COSTS_N_INSNS (1), /* int zeroed load */ 186 COSTS_N_INSNS (1), /* float load */ 187 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 188 COSTS_N_INSNS (1), /* fadd, fsub */ 189 COSTS_N_INSNS (1), /* fcmp */ 190 COSTS_N_INSNS (1), /* fmov, fmovr */ 191 COSTS_N_INSNS (1), /* fmul */ 192 COSTS_N_INSNS (8), /* fdivs */ 193 COSTS_N_INSNS (12), /* fdivd */ 194 COSTS_N_INSNS (17), /* fsqrts */ 195 COSTS_N_INSNS (17), /* fsqrtd */ 196 COSTS_N_INSNS (17), /* imul */ 197 COSTS_N_INSNS (17), /* imulX */ 198 0, /* imul bit factor */ 199 COSTS_N_INSNS (17), /* idiv */ 200 COSTS_N_INSNS (17), /* idivX */ 201 COSTS_N_INSNS (1), /* movcc/movr */ 202 0, /* shift penalty */ 203 }; 204 205 static const 206 struct processor_costs leon_costs = { 207 COSTS_N_INSNS (1), /* int load */ 208 COSTS_N_INSNS (1), /* int signed load */ 209 COSTS_N_INSNS (1), /* int zeroed load */ 210 COSTS_N_INSNS (1), /* float load */ 211 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 212 COSTS_N_INSNS (1), /* fadd, fsub */ 213 COSTS_N_INSNS (1), /* fcmp */ 214 COSTS_N_INSNS (1), /* fmov, fmovr */ 215 COSTS_N_INSNS (1), /* fmul */ 216 COSTS_N_INSNS (15), /* fdivs */ 217 COSTS_N_INSNS (15), /* fdivd */ 218 COSTS_N_INSNS (23), /* fsqrts */ 219 COSTS_N_INSNS (23), /* fsqrtd */ 220 COSTS_N_INSNS (5), /* imul */ 221 COSTS_N_INSNS (5), /* imulX */ 222 0, /* imul bit factor */ 223 COSTS_N_INSNS (5), /* idiv */ 224 COSTS_N_INSNS (5), /* idivX */ 225 COSTS_N_INSNS (1), /* movcc/movr */ 226 0, /* shift penalty */ 227 }; 228 229 static const 230 struct processor_costs leon3_costs = { 231 COSTS_N_INSNS (1), /* int load */ 232 COSTS_N_INSNS (1), /* int signed load */ 233 COSTS_N_INSNS (1), /* int zeroed load */ 234 COSTS_N_INSNS (1), /* float load */ 235 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 236 COSTS_N_INSNS (1), /* fadd, fsub */ 237 COSTS_N_INSNS (1), /* fcmp */ 238 COSTS_N_INSNS (1), /* fmov, fmovr */ 239 COSTS_N_INSNS (1), /* fmul */ 240 COSTS_N_INSNS (14), /* fdivs */ 241 COSTS_N_INSNS (15), /* fdivd */ 242 COSTS_N_INSNS (22), /* fsqrts */ 243 COSTS_N_INSNS (23), /* fsqrtd */ 244 COSTS_N_INSNS (5), /* imul */ 245 COSTS_N_INSNS (5), /* imulX */ 246 0, /* imul bit factor */ 247 COSTS_N_INSNS (35), /* idiv */ 248 COSTS_N_INSNS (35), /* idivX */ 249 COSTS_N_INSNS (1), /* movcc/movr */ 250 0, /* shift penalty */ 251 }; 252 253 static const 254 struct processor_costs sparclet_costs = { 255 COSTS_N_INSNS (3), /* int load */ 256 COSTS_N_INSNS (3), /* int signed load */ 257 COSTS_N_INSNS (1), /* int zeroed load */ 258 COSTS_N_INSNS (1), /* float load */ 259 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 260 COSTS_N_INSNS (1), /* fadd, fsub */ 261 COSTS_N_INSNS (1), /* fcmp */ 262 COSTS_N_INSNS (1), /* fmov, fmovr */ 263 COSTS_N_INSNS (1), /* fmul */ 264 COSTS_N_INSNS (1), /* fdivs */ 265 COSTS_N_INSNS (1), /* fdivd */ 266 COSTS_N_INSNS (1), /* fsqrts */ 267 COSTS_N_INSNS (1), /* fsqrtd */ 268 COSTS_N_INSNS (5), /* imul */ 269 COSTS_N_INSNS (5), /* imulX */ 270 0, /* imul bit factor */ 271 COSTS_N_INSNS (5), /* idiv */ 272 COSTS_N_INSNS (5), /* idivX */ 273 COSTS_N_INSNS (1), /* movcc/movr */ 274 0, /* shift penalty */ 275 }; 276 277 static const 278 struct processor_costs ultrasparc_costs = { 279 COSTS_N_INSNS (2), /* int load */ 280 COSTS_N_INSNS (3), /* int signed load */ 281 COSTS_N_INSNS (2), /* int zeroed load */ 282 COSTS_N_INSNS (2), /* float load */ 283 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 284 COSTS_N_INSNS (4), /* fadd, fsub */ 285 COSTS_N_INSNS (1), /* fcmp */ 286 COSTS_N_INSNS (2), /* fmov, fmovr */ 287 COSTS_N_INSNS (4), /* fmul */ 288 COSTS_N_INSNS (13), /* fdivs */ 289 COSTS_N_INSNS (23), /* fdivd */ 290 COSTS_N_INSNS (13), /* fsqrts */ 291 COSTS_N_INSNS (23), /* fsqrtd */ 292 COSTS_N_INSNS (4), /* imul */ 293 COSTS_N_INSNS (4), /* imulX */ 294 2, /* imul bit factor */ 295 COSTS_N_INSNS (37), /* idiv */ 296 COSTS_N_INSNS (68), /* idivX */ 297 COSTS_N_INSNS (2), /* movcc/movr */ 298 2, /* shift penalty */ 299 }; 300 301 static const 302 struct processor_costs ultrasparc3_costs = { 303 COSTS_N_INSNS (2), /* int load */ 304 COSTS_N_INSNS (3), /* int signed load */ 305 COSTS_N_INSNS (3), /* int zeroed load */ 306 COSTS_N_INSNS (2), /* float load */ 307 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 308 COSTS_N_INSNS (4), /* fadd, fsub */ 309 COSTS_N_INSNS (5), /* fcmp */ 310 COSTS_N_INSNS (3), /* fmov, fmovr */ 311 COSTS_N_INSNS (4), /* fmul */ 312 COSTS_N_INSNS (17), /* fdivs */ 313 COSTS_N_INSNS (20), /* fdivd */ 314 COSTS_N_INSNS (20), /* fsqrts */ 315 COSTS_N_INSNS (29), /* fsqrtd */ 316 COSTS_N_INSNS (6), /* imul */ 317 COSTS_N_INSNS (6), /* imulX */ 318 0, /* imul bit factor */ 319 COSTS_N_INSNS (40), /* idiv */ 320 COSTS_N_INSNS (71), /* idivX */ 321 COSTS_N_INSNS (2), /* movcc/movr */ 322 0, /* shift penalty */ 323 }; 324 325 static const 326 struct processor_costs niagara_costs = { 327 COSTS_N_INSNS (3), /* int load */ 328 COSTS_N_INSNS (3), /* int signed load */ 329 COSTS_N_INSNS (3), /* int zeroed load */ 330 COSTS_N_INSNS (9), /* float load */ 331 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ 332 COSTS_N_INSNS (8), /* fadd, fsub */ 333 COSTS_N_INSNS (26), /* fcmp */ 334 COSTS_N_INSNS (8), /* fmov, fmovr */ 335 COSTS_N_INSNS (29), /* fmul */ 336 COSTS_N_INSNS (54), /* fdivs */ 337 COSTS_N_INSNS (83), /* fdivd */ 338 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ 339 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ 340 COSTS_N_INSNS (11), /* imul */ 341 COSTS_N_INSNS (11), /* imulX */ 342 0, /* imul bit factor */ 343 COSTS_N_INSNS (72), /* idiv */ 344 COSTS_N_INSNS (72), /* idivX */ 345 COSTS_N_INSNS (1), /* movcc/movr */ 346 0, /* shift penalty */ 347 }; 348 349 static const 350 struct processor_costs niagara2_costs = { 351 COSTS_N_INSNS (3), /* int load */ 352 COSTS_N_INSNS (3), /* int signed load */ 353 COSTS_N_INSNS (3), /* int zeroed load */ 354 COSTS_N_INSNS (3), /* float load */ 355 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ 356 COSTS_N_INSNS (6), /* fadd, fsub */ 357 COSTS_N_INSNS (6), /* fcmp */ 358 COSTS_N_INSNS (6), /* fmov, fmovr */ 359 COSTS_N_INSNS (6), /* fmul */ 360 COSTS_N_INSNS (19), /* fdivs */ 361 COSTS_N_INSNS (33), /* fdivd */ 362 COSTS_N_INSNS (19), /* fsqrts */ 363 COSTS_N_INSNS (33), /* fsqrtd */ 364 COSTS_N_INSNS (5), /* imul */ 365 COSTS_N_INSNS (5), /* imulX */ 366 0, /* imul bit factor */ 367 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ 368 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ 369 COSTS_N_INSNS (1), /* movcc/movr */ 370 0, /* shift penalty */ 371 }; 372 373 static const 374 struct processor_costs niagara3_costs = { 375 COSTS_N_INSNS (3), /* int load */ 376 COSTS_N_INSNS (3), /* int signed load */ 377 COSTS_N_INSNS (3), /* int zeroed load */ 378 COSTS_N_INSNS (3), /* float load */ 379 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 380 COSTS_N_INSNS (9), /* fadd, fsub */ 381 COSTS_N_INSNS (9), /* fcmp */ 382 COSTS_N_INSNS (9), /* fmov, fmovr */ 383 COSTS_N_INSNS (9), /* fmul */ 384 COSTS_N_INSNS (23), /* fdivs */ 385 COSTS_N_INSNS (37), /* fdivd */ 386 COSTS_N_INSNS (23), /* fsqrts */ 387 COSTS_N_INSNS (37), /* fsqrtd */ 388 COSTS_N_INSNS (9), /* imul */ 389 COSTS_N_INSNS (9), /* imulX */ 390 0, /* imul bit factor */ 391 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ 392 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ 393 COSTS_N_INSNS (1), /* movcc/movr */ 394 0, /* shift penalty */ 395 }; 396 397 static const 398 struct processor_costs niagara4_costs = { 399 COSTS_N_INSNS (5), /* int load */ 400 COSTS_N_INSNS (5), /* int signed load */ 401 COSTS_N_INSNS (5), /* int zeroed load */ 402 COSTS_N_INSNS (5), /* float load */ 403 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 404 COSTS_N_INSNS (11), /* fadd, fsub */ 405 COSTS_N_INSNS (11), /* fcmp */ 406 COSTS_N_INSNS (11), /* fmov, fmovr */ 407 COSTS_N_INSNS (11), /* fmul */ 408 COSTS_N_INSNS (24), /* fdivs */ 409 COSTS_N_INSNS (37), /* fdivd */ 410 COSTS_N_INSNS (24), /* fsqrts */ 411 COSTS_N_INSNS (37), /* fsqrtd */ 412 COSTS_N_INSNS (12), /* imul */ 413 COSTS_N_INSNS (12), /* imulX */ 414 0, /* imul bit factor */ 415 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ 416 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 417 COSTS_N_INSNS (1), /* movcc/movr */ 418 0, /* shift penalty */ 419 }; 420 421 static const struct processor_costs *sparc_costs = &cypress_costs; 422 423 #ifdef HAVE_AS_RELAX_OPTION 424 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use 425 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. 426 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if 427 somebody does not branch between the sethi and jmp. */ 428 #define LEAF_SIBCALL_SLOT_RESERVED_P 1 429 #else 430 #define LEAF_SIBCALL_SLOT_RESERVED_P \ 431 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) 432 #endif 433 434 /* Vector to say how input registers are mapped to output registers. 435 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to 436 eliminate it. You must use -fomit-frame-pointer to get that. */ 437 char leaf_reg_remap[] = 438 { 0, 1, 2, 3, 4, 5, 6, 7, 439 -1, -1, -1, -1, -1, -1, 14, -1, 440 -1, -1, -1, -1, -1, -1, -1, -1, 441 8, 9, 10, 11, 12, 13, -1, 15, 442 443 32, 33, 34, 35, 36, 37, 38, 39, 444 40, 41, 42, 43, 44, 45, 46, 47, 445 48, 49, 50, 51, 52, 53, 54, 55, 446 56, 57, 58, 59, 60, 61, 62, 63, 447 64, 65, 66, 67, 68, 69, 70, 71, 448 72, 73, 74, 75, 76, 77, 78, 79, 449 80, 81, 82, 83, 84, 85, 86, 87, 450 88, 89, 90, 91, 92, 93, 94, 95, 451 96, 97, 98, 99, 100, 101, 102}; 452 453 /* Vector, indexed by hard register number, which contains 1 454 for a register that is allowable in a candidate for leaf 455 function treatment. */ 456 char sparc_leaf_regs[] = 457 { 1, 1, 1, 1, 1, 1, 1, 1, 458 0, 0, 0, 0, 0, 0, 1, 0, 459 0, 0, 0, 0, 0, 0, 0, 0, 460 1, 1, 1, 1, 1, 1, 0, 1, 461 1, 1, 1, 1, 1, 1, 1, 1, 462 1, 1, 1, 1, 1, 1, 1, 1, 463 1, 1, 1, 1, 1, 1, 1, 1, 464 1, 1, 1, 1, 1, 1, 1, 1, 465 1, 1, 1, 1, 1, 1, 1, 1, 466 1, 1, 1, 1, 1, 1, 1, 1, 467 1, 1, 1, 1, 1, 1, 1, 1, 468 1, 1, 1, 1, 1, 1, 1, 1, 469 1, 1, 1, 1, 1, 1, 1}; 470 471 struct GTY(()) machine_function 472 { 473 /* Size of the frame of the function. */ 474 HOST_WIDE_INT frame_size; 475 476 /* Size of the frame of the function minus the register window save area 477 and the outgoing argument area. */ 478 HOST_WIDE_INT apparent_frame_size; 479 480 /* Register we pretend the frame pointer is allocated to. Normally, this 481 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We 482 record "offset" separately as it may be too big for (reg + disp). */ 483 rtx frame_base_reg; 484 HOST_WIDE_INT frame_base_offset; 485 486 /* Some local-dynamic TLS symbol name. */ 487 const char *some_ld_name; 488 489 /* Number of global or FP registers to be saved (as 4-byte quantities). */ 490 int n_global_fp_regs; 491 492 /* True if the current function is leaf and uses only leaf regs, 493 so that the SPARC leaf function optimization can be applied. 494 Private version of crtl->uses_only_leaf_regs, see 495 sparc_expand_prologue for the rationale. */ 496 int leaf_function_p; 497 498 /* True if the prologue saves local or in registers. */ 499 bool save_local_in_regs_p; 500 501 /* True if the data calculated by sparc_expand_prologue are valid. */ 502 bool prologue_data_valid_p; 503 }; 504 505 #define sparc_frame_size cfun->machine->frame_size 506 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size 507 #define sparc_frame_base_reg cfun->machine->frame_base_reg 508 #define sparc_frame_base_offset cfun->machine->frame_base_offset 509 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs 510 #define sparc_leaf_function_p cfun->machine->leaf_function_p 511 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p 512 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p 513 514 /* 1 if the next opcode is to be specially indented. */ 515 int sparc_indent_opcode = 0; 516 517 static void sparc_option_override (void); 518 static void sparc_init_modes (void); 519 static void scan_record_type (const_tree, int *, int *, int *); 520 static int function_arg_slotno (const CUMULATIVE_ARGS *, enum machine_mode, 521 const_tree, bool, bool, int *, int *); 522 523 static int supersparc_adjust_cost (rtx, rtx, rtx, int); 524 static int hypersparc_adjust_cost (rtx, rtx, rtx, int); 525 526 static void sparc_emit_set_const32 (rtx, rtx); 527 static void sparc_emit_set_const64 (rtx, rtx); 528 static void sparc_output_addr_vec (rtx); 529 static void sparc_output_addr_diff_vec (rtx); 530 static void sparc_output_deferred_case_vectors (void); 531 static bool sparc_legitimate_address_p (enum machine_mode, rtx, bool); 532 static bool sparc_legitimate_constant_p (enum machine_mode, rtx); 533 static rtx sparc_builtin_saveregs (void); 534 static int epilogue_renumber (rtx *, int); 535 static bool sparc_assemble_integer (rtx, unsigned int, int); 536 static int set_extends (rtx); 537 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT); 538 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT); 539 #ifdef TARGET_SOLARIS 540 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, 541 tree) ATTRIBUTE_UNUSED; 542 #endif 543 static int sparc_adjust_cost (rtx, rtx, rtx, int); 544 static int sparc_issue_rate (void); 545 static void sparc_sched_init (FILE *, int, int); 546 static int sparc_use_sched_lookahead (void); 547 548 static void emit_soft_tfmode_libcall (const char *, int, rtx *); 549 static void emit_soft_tfmode_binop (enum rtx_code, rtx *); 550 static void emit_soft_tfmode_unop (enum rtx_code, rtx *); 551 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); 552 static void emit_hard_tfmode_operation (enum rtx_code, rtx *); 553 554 static bool sparc_function_ok_for_sibcall (tree, tree); 555 static void sparc_init_libfuncs (void); 556 static void sparc_init_builtins (void); 557 static void sparc_vis_init_builtins (void); 558 static rtx sparc_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 559 static tree sparc_fold_builtin (tree, int, tree *, bool); 560 static int sparc_vis_mul8x16 (int, int); 561 static void sparc_handle_vis_mul8x16 (tree *, int, tree, tree, tree); 562 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 563 HOST_WIDE_INT, tree); 564 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, 565 HOST_WIDE_INT, const_tree); 566 static struct machine_function * sparc_init_machine_status (void); 567 static bool sparc_cannot_force_const_mem (enum machine_mode, rtx); 568 static rtx sparc_tls_get_addr (void); 569 static rtx sparc_tls_got (void); 570 static const char *get_some_local_dynamic_name (void); 571 static int get_some_local_dynamic_name_1 (rtx *, void *); 572 static int sparc_register_move_cost (enum machine_mode, 573 reg_class_t, reg_class_t); 574 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool); 575 static rtx sparc_function_value (const_tree, const_tree, bool); 576 static rtx sparc_libcall_value (enum machine_mode, const_rtx); 577 static bool sparc_function_value_regno_p (const unsigned int); 578 static rtx sparc_struct_value_rtx (tree, int); 579 static enum machine_mode sparc_promote_function_mode (const_tree, enum machine_mode, 580 int *, const_tree, int); 581 static bool sparc_return_in_memory (const_tree, const_tree); 582 static bool sparc_strict_argument_naming (cumulative_args_t); 583 static void sparc_va_start (tree, rtx); 584 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 585 static bool sparc_vector_mode_supported_p (enum machine_mode); 586 static bool sparc_tls_referenced_p (rtx); 587 static rtx sparc_legitimize_tls_address (rtx); 588 static rtx sparc_legitimize_pic_address (rtx, rtx); 589 static rtx sparc_legitimize_address (rtx, rtx, enum machine_mode); 590 static rtx sparc_delegitimize_address (rtx); 591 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); 592 static bool sparc_pass_by_reference (cumulative_args_t, 593 enum machine_mode, const_tree, bool); 594 static void sparc_function_arg_advance (cumulative_args_t, 595 enum machine_mode, const_tree, bool); 596 static rtx sparc_function_arg_1 (cumulative_args_t, 597 enum machine_mode, const_tree, bool, bool); 598 static rtx sparc_function_arg (cumulative_args_t, 599 enum machine_mode, const_tree, bool); 600 static rtx sparc_function_incoming_arg (cumulative_args_t, 601 enum machine_mode, const_tree, bool); 602 static unsigned int sparc_function_arg_boundary (enum machine_mode, 603 const_tree); 604 static int sparc_arg_partial_bytes (cumulative_args_t, 605 enum machine_mode, tree, bool); 606 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 607 static void sparc_file_end (void); 608 static bool sparc_frame_pointer_required (void); 609 static bool sparc_can_eliminate (const int, const int); 610 static rtx sparc_builtin_setjmp_frame_value (void); 611 static void sparc_conditional_register_usage (void); 612 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 613 static const char *sparc_mangle_type (const_tree); 614 #endif 615 static void sparc_trampoline_init (rtx, tree, rtx); 616 static enum machine_mode sparc_preferred_simd_mode (enum machine_mode); 617 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); 618 static bool sparc_print_operand_punct_valid_p (unsigned char); 619 static void sparc_print_operand (FILE *, rtx, int); 620 static void sparc_print_operand_address (FILE *, rtx); 621 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, 622 enum machine_mode, 623 secondary_reload_info *); 624 625 #ifdef SUBTARGET_ATTRIBUTE_TABLE 626 /* Table of valid machine attributes. */ 627 static const struct attribute_spec sparc_attribute_table[] = 628 { 629 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 630 do_diagnostic } */ 631 SUBTARGET_ATTRIBUTE_TABLE, 632 { NULL, 0, 0, false, false, false, NULL, false } 633 }; 634 #endif 635 636 /* Option handling. */ 637 638 /* Parsed value. */ 639 enum cmodel sparc_cmodel; 640 641 char sparc_hard_reg_printed[8]; 642 643 /* Initialize the GCC target structure. */ 644 645 /* The default is to use .half rather than .short for aligned HI objects. */ 646 #undef TARGET_ASM_ALIGNED_HI_OP 647 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 648 649 #undef TARGET_ASM_UNALIGNED_HI_OP 650 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" 651 #undef TARGET_ASM_UNALIGNED_SI_OP 652 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" 653 #undef TARGET_ASM_UNALIGNED_DI_OP 654 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" 655 656 /* The target hook has to handle DI-mode values. */ 657 #undef TARGET_ASM_INTEGER 658 #define TARGET_ASM_INTEGER sparc_assemble_integer 659 660 #undef TARGET_ASM_FUNCTION_PROLOGUE 661 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue 662 #undef TARGET_ASM_FUNCTION_EPILOGUE 663 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue 664 665 #undef TARGET_SCHED_ADJUST_COST 666 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost 667 #undef TARGET_SCHED_ISSUE_RATE 668 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate 669 #undef TARGET_SCHED_INIT 670 #define TARGET_SCHED_INIT sparc_sched_init 671 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 672 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead 673 674 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 675 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall 676 677 #undef TARGET_INIT_LIBFUNCS 678 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs 679 #undef TARGET_INIT_BUILTINS 680 #define TARGET_INIT_BUILTINS sparc_init_builtins 681 682 #undef TARGET_LEGITIMIZE_ADDRESS 683 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address 684 #undef TARGET_DELEGITIMIZE_ADDRESS 685 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address 686 #undef TARGET_MODE_DEPENDENT_ADDRESS_P 687 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p 688 689 #undef TARGET_EXPAND_BUILTIN 690 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin 691 #undef TARGET_FOLD_BUILTIN 692 #define TARGET_FOLD_BUILTIN sparc_fold_builtin 693 694 #if TARGET_TLS 695 #undef TARGET_HAVE_TLS 696 #define TARGET_HAVE_TLS true 697 #endif 698 699 #undef TARGET_CANNOT_FORCE_CONST_MEM 700 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem 701 702 #undef TARGET_ASM_OUTPUT_MI_THUNK 703 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk 704 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 705 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk 706 707 #undef TARGET_RTX_COSTS 708 #define TARGET_RTX_COSTS sparc_rtx_costs 709 #undef TARGET_ADDRESS_COST 710 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 711 #undef TARGET_REGISTER_MOVE_COST 712 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost 713 714 #undef TARGET_PROMOTE_FUNCTION_MODE 715 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode 716 717 #undef TARGET_FUNCTION_VALUE 718 #define TARGET_FUNCTION_VALUE sparc_function_value 719 #undef TARGET_LIBCALL_VALUE 720 #define TARGET_LIBCALL_VALUE sparc_libcall_value 721 #undef TARGET_FUNCTION_VALUE_REGNO_P 722 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p 723 724 #undef TARGET_STRUCT_VALUE_RTX 725 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx 726 #undef TARGET_RETURN_IN_MEMORY 727 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory 728 #undef TARGET_MUST_PASS_IN_STACK 729 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 730 #undef TARGET_PASS_BY_REFERENCE 731 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference 732 #undef TARGET_ARG_PARTIAL_BYTES 733 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes 734 #undef TARGET_FUNCTION_ARG_ADVANCE 735 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance 736 #undef TARGET_FUNCTION_ARG 737 #define TARGET_FUNCTION_ARG sparc_function_arg 738 #undef TARGET_FUNCTION_INCOMING_ARG 739 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg 740 #undef TARGET_FUNCTION_ARG_BOUNDARY 741 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary 742 743 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 744 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs 745 #undef TARGET_STRICT_ARGUMENT_NAMING 746 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming 747 748 #undef TARGET_EXPAND_BUILTIN_VA_START 749 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start 750 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 751 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg 752 753 #undef TARGET_VECTOR_MODE_SUPPORTED_P 754 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p 755 756 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 757 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode 758 759 #ifdef SUBTARGET_INSERT_ATTRIBUTES 760 #undef TARGET_INSERT_ATTRIBUTES 761 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 762 #endif 763 764 #ifdef SUBTARGET_ATTRIBUTE_TABLE 765 #undef TARGET_ATTRIBUTE_TABLE 766 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table 767 #endif 768 769 #undef TARGET_RELAXED_ORDERING 770 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING 771 772 #undef TARGET_OPTION_OVERRIDE 773 #define TARGET_OPTION_OVERRIDE sparc_option_override 774 775 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) 776 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 777 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel 778 #endif 779 780 #undef TARGET_ASM_FILE_END 781 #define TARGET_ASM_FILE_END sparc_file_end 782 783 #undef TARGET_FRAME_POINTER_REQUIRED 784 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required 785 786 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 787 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value 788 789 #undef TARGET_CAN_ELIMINATE 790 #define TARGET_CAN_ELIMINATE sparc_can_eliminate 791 792 #undef TARGET_PREFERRED_RELOAD_CLASS 793 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class 794 795 #undef TARGET_SECONDARY_RELOAD 796 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload 797 798 #undef TARGET_CONDITIONAL_REGISTER_USAGE 799 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage 800 801 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 802 #undef TARGET_MANGLE_TYPE 803 #define TARGET_MANGLE_TYPE sparc_mangle_type 804 #endif 805 806 #undef TARGET_LEGITIMATE_ADDRESS_P 807 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p 808 809 #undef TARGET_LEGITIMATE_CONSTANT_P 810 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p 811 812 #undef TARGET_TRAMPOLINE_INIT 813 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init 814 815 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 816 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p 817 #undef TARGET_PRINT_OPERAND 818 #define TARGET_PRINT_OPERAND sparc_print_operand 819 #undef TARGET_PRINT_OPERAND_ADDRESS 820 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address 821 822 /* The value stored by LDSTUB. */ 823 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 824 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff 825 826 struct gcc_target targetm = TARGET_INITIALIZER; 827 828 /* Return the memory reference contained in X if any, zero otherwise. */ 829 830 static rtx 831 mem_ref (rtx x) 832 { 833 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) 834 x = XEXP (x, 0); 835 836 if (MEM_P (x)) 837 return x; 838 839 return NULL_RTX; 840 } 841 842 /* We use a machine specific pass to enable workarounds for errata. 843 We need to have the (essentially) final form of the insn stream in order 844 to properly detect the various hazards. Therefore, this machine specific 845 pass runs as late as possible. The pass is inserted in the pass pipeline 846 at the end of sparc_option_override. */ 847 848 static bool 849 sparc_gate_work_around_errata (void) 850 { 851 /* The only errata we handle are those of the AT697F and UT699. */ 852 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0; 853 } 854 855 static unsigned int 856 sparc_do_work_around_errata (void) 857 { 858 rtx insn, next; 859 860 /* Force all instructions to be split into their final form. */ 861 split_all_insns_noflow (); 862 863 /* Now look for specific patterns in the insn stream. */ 864 for (insn = get_insns (); insn; insn = next) 865 { 866 bool insert_nop = false; 867 rtx set; 868 869 /* Look into the instruction in a delay slot. */ 870 if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE) 871 insn = XVECEXP (PATTERN (insn), 0, 1); 872 873 /* Look for a single-word load into an odd-numbered FP register. */ 874 if (sparc_fix_at697f 875 && NONJUMP_INSN_P (insn) 876 && (set = single_set (insn)) != NULL_RTX 877 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 878 && MEM_P (SET_SRC (set)) 879 && REG_P (SET_DEST (set)) 880 && REGNO (SET_DEST (set)) > 31 881 && REGNO (SET_DEST (set)) % 2 != 0) 882 { 883 /* The wrong dependency is on the enclosing double register. */ 884 const unsigned int x = REGNO (SET_DEST (set)) - 1; 885 unsigned int src1, src2, dest; 886 int code; 887 888 next = next_active_insn (insn); 889 if (!next) 890 break; 891 /* If the insn is a branch, then it cannot be problematic. */ 892 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 893 continue; 894 895 extract_insn (next); 896 code = INSN_CODE (next); 897 898 switch (code) 899 { 900 case CODE_FOR_adddf3: 901 case CODE_FOR_subdf3: 902 case CODE_FOR_muldf3: 903 case CODE_FOR_divdf3: 904 dest = REGNO (recog_data.operand[0]); 905 src1 = REGNO (recog_data.operand[1]); 906 src2 = REGNO (recog_data.operand[2]); 907 if (src1 != src2) 908 { 909 /* Case [1-4]: 910 ld [address], %fx+1 911 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ 912 if ((src1 == x || src2 == x) 913 && (dest == src1 || dest == src2)) 914 insert_nop = true; 915 } 916 else 917 { 918 /* Case 5: 919 ld [address], %fx+1 920 FPOPd %fx, %fx, %fx */ 921 if (src1 == x 922 && dest == src1 923 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) 924 insert_nop = true; 925 } 926 break; 927 928 case CODE_FOR_sqrtdf2: 929 dest = REGNO (recog_data.operand[0]); 930 src1 = REGNO (recog_data.operand[1]); 931 /* Case 6: 932 ld [address], %fx+1 933 fsqrtd %fx, %fx */ 934 if (src1 == x && dest == src1) 935 insert_nop = true; 936 break; 937 938 default: 939 break; 940 } 941 } 942 943 /* Look for a single-word load into an integer register. */ 944 else if (sparc_fix_ut699 945 && NONJUMP_INSN_P (insn) 946 && (set = single_set (insn)) != NULL_RTX 947 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 948 && mem_ref (SET_SRC (set)) != NULL_RTX 949 && REG_P (SET_DEST (set)) 950 && REGNO (SET_DEST (set)) < 32) 951 { 952 /* There is no problem if the second memory access has a data 953 dependency on the first single-cycle load. */ 954 rtx x = SET_DEST (set); 955 956 next = next_active_insn (insn); 957 if (!next) 958 break; 959 /* If the insn is a branch, then it cannot be problematic. */ 960 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 961 continue; 962 963 /* Look for a second memory access to/from an integer register. */ 964 if ((set = single_set (next)) != NULL_RTX) 965 { 966 rtx src = SET_SRC (set); 967 rtx dest = SET_DEST (set); 968 rtx mem; 969 970 /* LDD is affected. */ 971 if ((mem = mem_ref (src)) != NULL_RTX 972 && REG_P (dest) 973 && REGNO (dest) < 32 974 && !reg_mentioned_p (x, XEXP (mem, 0))) 975 insert_nop = true; 976 977 /* STD is *not* affected. */ 978 else if (MEM_P (dest) 979 && GET_MODE_SIZE (GET_MODE (dest)) <= 4 980 && (src == CONST0_RTX (GET_MODE (dest)) 981 || (REG_P (src) 982 && REGNO (src) < 32 983 && REGNO (src) != REGNO (x))) 984 && !reg_mentioned_p (x, XEXP (dest, 0))) 985 insert_nop = true; 986 } 987 } 988 989 /* Look for a single-word load/operation into an FP register. */ 990 else if (sparc_fix_ut699 991 && NONJUMP_INSN_P (insn) 992 && (set = single_set (insn)) != NULL_RTX 993 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 994 && REG_P (SET_DEST (set)) 995 && REGNO (SET_DEST (set)) > 31) 996 { 997 /* Number of instructions in the problematic window. */ 998 const int n_insns = 4; 999 /* The problematic combination is with the sibling FP register. */ 1000 const unsigned int x = REGNO (SET_DEST (set)); 1001 const unsigned int y = x ^ 1; 1002 rtx after; 1003 int i; 1004 1005 next = next_active_insn (insn); 1006 if (!next) 1007 break; 1008 /* If the insn is a branch, then it cannot be problematic. */ 1009 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1010 continue; 1011 1012 /* Look for a second load/operation into the sibling FP register. */ 1013 if (!((set = single_set (next)) != NULL_RTX 1014 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1015 && REG_P (SET_DEST (set)) 1016 && REGNO (SET_DEST (set)) == y)) 1017 continue; 1018 1019 /* Look for a (possible) store from the FP register in the next N 1020 instructions, but bail out if it is again modified or if there 1021 is a store from the sibling FP register before this store. */ 1022 for (after = next, i = 0; i < n_insns; i++) 1023 { 1024 bool branch_p; 1025 1026 after = next_active_insn (after); 1027 if (!after) 1028 break; 1029 1030 /* This is a branch with an empty delay slot. */ 1031 if (!NONJUMP_INSN_P (after)) 1032 { 1033 if (++i == n_insns) 1034 break; 1035 branch_p = true; 1036 after = NULL_RTX; 1037 } 1038 /* This is a branch with a filled delay slot. */ 1039 else if (GET_CODE (PATTERN (after)) == SEQUENCE) 1040 { 1041 if (++i == n_insns) 1042 break; 1043 branch_p = true; 1044 after = XVECEXP (PATTERN (after), 0, 1); 1045 } 1046 /* This is a regular instruction. */ 1047 else 1048 branch_p = false; 1049 1050 if (after && (set = single_set (after)) != NULL_RTX) 1051 { 1052 const rtx src = SET_SRC (set); 1053 const rtx dest = SET_DEST (set); 1054 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); 1055 1056 /* If the FP register is again modified before the store, 1057 then the store isn't affected. */ 1058 if (REG_P (dest) 1059 && (REGNO (dest) == x 1060 || (REGNO (dest) == y && size == 8))) 1061 break; 1062 1063 if (MEM_P (dest) && REG_P (src)) 1064 { 1065 /* If there is a store from the sibling FP register 1066 before the store, then the store is not affected. */ 1067 if (REGNO (src) == y || (REGNO (src) == x && size == 8)) 1068 break; 1069 1070 /* Otherwise, the store is affected. */ 1071 if (REGNO (src) == x && size == 4) 1072 { 1073 insert_nop = true; 1074 break; 1075 } 1076 } 1077 } 1078 1079 /* If we have a branch in the first M instructions, then we 1080 cannot see the (M+2)th instruction so we play safe. */ 1081 if (branch_p && i <= (n_insns - 2)) 1082 { 1083 insert_nop = true; 1084 break; 1085 } 1086 } 1087 } 1088 1089 else 1090 next = NEXT_INSN (insn); 1091 1092 if (insert_nop) 1093 emit_insn_before (gen_nop (), next); 1094 } 1095 1096 return 0; 1097 } 1098 1099 struct rtl_opt_pass pass_work_around_errata = 1100 { 1101 { 1102 RTL_PASS, 1103 "errata", /* name */ 1104 OPTGROUP_NONE, /* optinfo_flags */ 1105 sparc_gate_work_around_errata, /* gate */ 1106 sparc_do_work_around_errata, /* execute */ 1107 NULL, /* sub */ 1108 NULL, /* next */ 1109 0, /* static_pass_number */ 1110 TV_MACH_DEP, /* tv_id */ 1111 0, /* properties_required */ 1112 0, /* properties_provided */ 1113 0, /* properties_destroyed */ 1114 0, /* todo_flags_start */ 1115 TODO_verify_rtl_sharing, /* todo_flags_finish */ 1116 } 1117 }; 1118 1119 struct register_pass_info insert_pass_work_around_errata = 1120 { 1121 &pass_work_around_errata.pass, /* pass */ 1122 "dbr", /* reference_pass_name */ 1123 1, /* ref_pass_instance_number */ 1124 PASS_POS_INSERT_AFTER /* po_op */ 1125 }; 1126 1127 /* Helpers for TARGET_DEBUG_OPTIONS. */ 1128 static void 1129 dump_target_flag_bits (const int flags) 1130 { 1131 if (flags & MASK_64BIT) 1132 fprintf (stderr, "64BIT "); 1133 if (flags & MASK_APP_REGS) 1134 fprintf (stderr, "APP_REGS "); 1135 if (flags & MASK_FASTER_STRUCTS) 1136 fprintf (stderr, "FASTER_STRUCTS "); 1137 if (flags & MASK_FLAT) 1138 fprintf (stderr, "FLAT "); 1139 if (flags & MASK_FMAF) 1140 fprintf (stderr, "FMAF "); 1141 if (flags & MASK_FPU) 1142 fprintf (stderr, "FPU "); 1143 if (flags & MASK_HARD_QUAD) 1144 fprintf (stderr, "HARD_QUAD "); 1145 if (flags & MASK_POPC) 1146 fprintf (stderr, "POPC "); 1147 if (flags & MASK_PTR64) 1148 fprintf (stderr, "PTR64 "); 1149 if (flags & MASK_STACK_BIAS) 1150 fprintf (stderr, "STACK_BIAS "); 1151 if (flags & MASK_UNALIGNED_DOUBLES) 1152 fprintf (stderr, "UNALIGNED_DOUBLES "); 1153 if (flags & MASK_V8PLUS) 1154 fprintf (stderr, "V8PLUS "); 1155 if (flags & MASK_VIS) 1156 fprintf (stderr, "VIS "); 1157 if (flags & MASK_VIS2) 1158 fprintf (stderr, "VIS2 "); 1159 if (flags & MASK_VIS3) 1160 fprintf (stderr, "VIS3 "); 1161 if (flags & MASK_CBCOND) 1162 fprintf (stderr, "CBCOND "); 1163 if (flags & MASK_DEPRECATED_V8_INSNS) 1164 fprintf (stderr, "DEPRECATED_V8_INSNS "); 1165 if (flags & MASK_SPARCLET) 1166 fprintf (stderr, "SPARCLET "); 1167 if (flags & MASK_SPARCLITE) 1168 fprintf (stderr, "SPARCLITE "); 1169 if (flags & MASK_V8) 1170 fprintf (stderr, "V8 "); 1171 if (flags & MASK_V9) 1172 fprintf (stderr, "V9 "); 1173 } 1174 1175 static void 1176 dump_target_flags (const char *prefix, const int flags) 1177 { 1178 fprintf (stderr, "%s: (%08x) [ ", prefix, flags); 1179 dump_target_flag_bits (flags); 1180 fprintf(stderr, "]\n"); 1181 } 1182 1183 /* Validate and override various options, and do some machine dependent 1184 initialization. */ 1185 1186 static void 1187 sparc_option_override (void) 1188 { 1189 static struct code_model { 1190 const char *const name; 1191 const enum cmodel value; 1192 } const cmodels[] = { 1193 { "32", CM_32 }, 1194 { "medlow", CM_MEDLOW }, 1195 { "medmid", CM_MEDMID }, 1196 { "medany", CM_MEDANY }, 1197 { "embmedany", CM_EMBMEDANY }, 1198 { NULL, (enum cmodel) 0 } 1199 }; 1200 const struct code_model *cmodel; 1201 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ 1202 static struct cpu_default { 1203 const int cpu; 1204 const enum processor_type processor; 1205 } const cpu_default[] = { 1206 /* There must be one entry here for each TARGET_CPU value. */ 1207 { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, 1208 { TARGET_CPU_v8, PROCESSOR_V8 }, 1209 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, 1210 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, 1211 { TARGET_CPU_leon, PROCESSOR_LEON }, 1212 { TARGET_CPU_leon3, PROCESSOR_LEON3 }, 1213 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, 1214 { TARGET_CPU_sparclite, PROCESSOR_F930 }, 1215 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, 1216 { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, 1217 { TARGET_CPU_v9, PROCESSOR_V9 }, 1218 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, 1219 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, 1220 { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, 1221 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, 1222 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, 1223 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, 1224 { -1, PROCESSOR_V7 } 1225 }; 1226 const struct cpu_default *def; 1227 /* Table of values for -m{cpu,tune}=. This must match the order of 1228 the enum processor_type in sparc-opts.h. */ 1229 static struct cpu_table { 1230 const char *const name; 1231 const int disable; 1232 const int enable; 1233 } const cpu_table[] = { 1234 { "v7", MASK_ISA, 0 }, 1235 { "cypress", MASK_ISA, 0 }, 1236 { "v8", MASK_ISA, MASK_V8 }, 1237 /* TI TMS390Z55 supersparc */ 1238 { "supersparc", MASK_ISA, MASK_V8 }, 1239 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU }, 1240 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU }, 1241 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU }, 1242 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU }, 1243 { "sparclite", MASK_ISA, MASK_SPARCLITE }, 1244 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ 1245 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1246 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ 1247 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU }, 1248 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1249 { "sparclet", MASK_ISA, MASK_SPARCLET }, 1250 /* TEMIC sparclet */ 1251 { "tsc701", MASK_ISA, MASK_SPARCLET }, 1252 { "v9", MASK_ISA, MASK_V9 }, 1253 /* UltraSPARC I, II, IIi */ 1254 { "ultrasparc", MASK_ISA, 1255 /* Although insns using %y are deprecated, it is a clear win. */ 1256 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1257 /* UltraSPARC III */ 1258 /* ??? Check if %y issue still holds true. */ 1259 { "ultrasparc3", MASK_ISA, 1260 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, 1261 /* UltraSPARC T1 */ 1262 { "niagara", MASK_ISA, 1263 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1264 /* UltraSPARC T2 */ 1265 { "niagara2", MASK_ISA, 1266 MASK_V9|MASK_POPC|MASK_VIS2 }, 1267 /* UltraSPARC T3 */ 1268 { "niagara3", MASK_ISA, 1269 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF }, 1270 /* UltraSPARC T4 */ 1271 { "niagara4", MASK_ISA, 1272 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, 1273 }; 1274 const struct cpu_table *cpu; 1275 unsigned int i; 1276 int fpu; 1277 1278 if (sparc_debug_string != NULL) 1279 { 1280 const char *q; 1281 char *p; 1282 1283 p = ASTRDUP (sparc_debug_string); 1284 while ((q = strtok (p, ",")) != NULL) 1285 { 1286 bool invert; 1287 int mask; 1288 1289 p = NULL; 1290 if (*q == '!') 1291 { 1292 invert = true; 1293 q++; 1294 } 1295 else 1296 invert = false; 1297 1298 if (! strcmp (q, "all")) 1299 mask = MASK_DEBUG_ALL; 1300 else if (! strcmp (q, "options")) 1301 mask = MASK_DEBUG_OPTIONS; 1302 else 1303 error ("unknown -mdebug-%s switch", q); 1304 1305 if (invert) 1306 sparc_debug &= ~mask; 1307 else 1308 sparc_debug |= mask; 1309 } 1310 } 1311 1312 if (TARGET_DEBUG_OPTIONS) 1313 { 1314 dump_target_flags("Initial target_flags", target_flags); 1315 dump_target_flags("target_flags_explicit", target_flags_explicit); 1316 } 1317 1318 #ifdef SUBTARGET_OVERRIDE_OPTIONS 1319 SUBTARGET_OVERRIDE_OPTIONS; 1320 #endif 1321 1322 #ifndef SPARC_BI_ARCH 1323 /* Check for unsupported architecture size. */ 1324 if (! TARGET_64BIT != DEFAULT_ARCH32_P) 1325 error ("%s is not supported by this configuration", 1326 DEFAULT_ARCH32_P ? "-m64" : "-m32"); 1327 #endif 1328 1329 /* We force all 64bit archs to use 128 bit long double */ 1330 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128) 1331 { 1332 error ("-mlong-double-64 not allowed with -m64"); 1333 target_flags |= MASK_LONG_DOUBLE_128; 1334 } 1335 1336 /* Code model selection. */ 1337 sparc_cmodel = SPARC_DEFAULT_CMODEL; 1338 1339 #ifdef SPARC_BI_ARCH 1340 if (TARGET_ARCH32) 1341 sparc_cmodel = CM_32; 1342 #endif 1343 1344 if (sparc_cmodel_string != NULL) 1345 { 1346 if (TARGET_ARCH64) 1347 { 1348 for (cmodel = &cmodels[0]; cmodel->name; cmodel++) 1349 if (strcmp (sparc_cmodel_string, cmodel->name) == 0) 1350 break; 1351 if (cmodel->name == NULL) 1352 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string); 1353 else 1354 sparc_cmodel = cmodel->value; 1355 } 1356 else 1357 error ("-mcmodel= is not supported on 32 bit systems"); 1358 } 1359 1360 /* Check that -fcall-saved-REG wasn't specified for out registers. */ 1361 for (i = 8; i < 16; i++) 1362 if (!call_used_regs [i]) 1363 { 1364 error ("-fcall-saved-REG is not supported for out registers"); 1365 call_used_regs [i] = 1; 1366 } 1367 1368 fpu = target_flags & MASK_FPU; /* save current -mfpu status */ 1369 1370 /* Set the default CPU. */ 1371 if (!global_options_set.x_sparc_cpu_and_features) 1372 { 1373 for (def = &cpu_default[0]; def->cpu != -1; ++def) 1374 if (def->cpu == TARGET_CPU_DEFAULT) 1375 break; 1376 gcc_assert (def->cpu != -1); 1377 sparc_cpu_and_features = def->processor; 1378 } 1379 1380 if (!global_options_set.x_sparc_cpu) 1381 sparc_cpu = sparc_cpu_and_features; 1382 1383 cpu = &cpu_table[(int) sparc_cpu_and_features]; 1384 1385 if (TARGET_DEBUG_OPTIONS) 1386 { 1387 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); 1388 fprintf (stderr, "sparc_cpu: %s\n", 1389 cpu_table[(int) sparc_cpu].name); 1390 dump_target_flags ("cpu->disable", cpu->disable); 1391 dump_target_flags ("cpu->enable", cpu->enable); 1392 } 1393 1394 target_flags &= ~cpu->disable; 1395 target_flags |= (cpu->enable 1396 #ifndef HAVE_AS_FMAF_HPC_VIS3 1397 & ~(MASK_FMAF | MASK_VIS3) 1398 #endif 1399 #ifndef HAVE_AS_SPARC4 1400 & ~MASK_CBCOND 1401 #endif 1402 #ifndef HAVE_AS_LEON 1403 & ~(MASK_LEON | MASK_LEON3) 1404 #endif 1405 ); 1406 1407 /* If -mfpu or -mno-fpu was explicitly used, don't override with 1408 the processor default. */ 1409 if (target_flags_explicit & MASK_FPU) 1410 target_flags = (target_flags & ~MASK_FPU) | fpu; 1411 1412 /* -mvis2 implies -mvis */ 1413 if (TARGET_VIS2) 1414 target_flags |= MASK_VIS; 1415 1416 /* -mvis3 implies -mvis2 and -mvis */ 1417 if (TARGET_VIS3) 1418 target_flags |= MASK_VIS2 | MASK_VIS; 1419 1420 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is 1421 disabled. */ 1422 if (! TARGET_FPU) 1423 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF); 1424 1425 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions 1426 are available. 1427 -m64 also implies v9. */ 1428 if (TARGET_VIS || TARGET_ARCH64) 1429 { 1430 target_flags |= MASK_V9; 1431 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); 1432 } 1433 1434 /* -mvis also implies -mv8plus on 32-bit */ 1435 if (TARGET_VIS && ! TARGET_ARCH64) 1436 target_flags |= MASK_V8PLUS; 1437 1438 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */ 1439 if (TARGET_V9 && TARGET_ARCH32) 1440 target_flags |= MASK_DEPRECATED_V8_INSNS; 1441 1442 /* V8PLUS requires V9, makes no sense in 64 bit mode. */ 1443 if (! TARGET_V9 || TARGET_ARCH64) 1444 target_flags &= ~MASK_V8PLUS; 1445 1446 /* Don't use stack biasing in 32 bit mode. */ 1447 if (TARGET_ARCH32) 1448 target_flags &= ~MASK_STACK_BIAS; 1449 1450 /* Supply a default value for align_functions. */ 1451 if (align_functions == 0 1452 && (sparc_cpu == PROCESSOR_ULTRASPARC 1453 || sparc_cpu == PROCESSOR_ULTRASPARC3 1454 || sparc_cpu == PROCESSOR_NIAGARA 1455 || sparc_cpu == PROCESSOR_NIAGARA2 1456 || sparc_cpu == PROCESSOR_NIAGARA3 1457 || sparc_cpu == PROCESSOR_NIAGARA4)) 1458 align_functions = 32; 1459 1460 /* Validate PCC_STRUCT_RETURN. */ 1461 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) 1462 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); 1463 1464 /* Only use .uaxword when compiling for a 64-bit target. */ 1465 if (!TARGET_ARCH64) 1466 targetm.asm_out.unaligned_op.di = NULL; 1467 1468 /* Do various machine dependent initializations. */ 1469 sparc_init_modes (); 1470 1471 /* Set up function hooks. */ 1472 init_machine_status = sparc_init_machine_status; 1473 1474 switch (sparc_cpu) 1475 { 1476 case PROCESSOR_V7: 1477 case PROCESSOR_CYPRESS: 1478 sparc_costs = &cypress_costs; 1479 break; 1480 case PROCESSOR_V8: 1481 case PROCESSOR_SPARCLITE: 1482 case PROCESSOR_SUPERSPARC: 1483 sparc_costs = &supersparc_costs; 1484 break; 1485 case PROCESSOR_F930: 1486 case PROCESSOR_F934: 1487 case PROCESSOR_HYPERSPARC: 1488 case PROCESSOR_SPARCLITE86X: 1489 sparc_costs = &hypersparc_costs; 1490 break; 1491 case PROCESSOR_LEON: 1492 sparc_costs = &leon_costs; 1493 break; 1494 case PROCESSOR_LEON3: 1495 case PROCESSOR_LEON3V7: 1496 sparc_costs = &leon3_costs; 1497 break; 1498 case PROCESSOR_SPARCLET: 1499 case PROCESSOR_TSC701: 1500 sparc_costs = &sparclet_costs; 1501 break; 1502 case PROCESSOR_V9: 1503 case PROCESSOR_ULTRASPARC: 1504 sparc_costs = &ultrasparc_costs; 1505 break; 1506 case PROCESSOR_ULTRASPARC3: 1507 sparc_costs = &ultrasparc3_costs; 1508 break; 1509 case PROCESSOR_NIAGARA: 1510 sparc_costs = &niagara_costs; 1511 break; 1512 case PROCESSOR_NIAGARA2: 1513 sparc_costs = &niagara2_costs; 1514 break; 1515 case PROCESSOR_NIAGARA3: 1516 sparc_costs = &niagara3_costs; 1517 break; 1518 case PROCESSOR_NIAGARA4: 1519 sparc_costs = &niagara4_costs; 1520 break; 1521 case PROCESSOR_NATIVE: 1522 gcc_unreachable (); 1523 }; 1524 1525 if (sparc_memory_model == SMM_DEFAULT) 1526 { 1527 /* Choose the memory model for the operating system. */ 1528 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; 1529 if (os_default != SMM_DEFAULT) 1530 sparc_memory_model = os_default; 1531 /* Choose the most relaxed model for the processor. */ 1532 else if (TARGET_V9) 1533 sparc_memory_model = SMM_RMO; 1534 else if (TARGET_LEON3) 1535 sparc_memory_model = SMM_TSO; 1536 else if (TARGET_LEON) 1537 sparc_memory_model = SMM_SC; 1538 else if (TARGET_V8) 1539 sparc_memory_model = SMM_PSO; 1540 else 1541 sparc_memory_model = SMM_SC; 1542 } 1543 1544 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 1545 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 1546 target_flags |= MASK_LONG_DOUBLE_128; 1547 #endif 1548 1549 if (TARGET_DEBUG_OPTIONS) 1550 dump_target_flags ("Final target_flags", target_flags); 1551 1552 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 1553 ((sparc_cpu == PROCESSOR_ULTRASPARC 1554 || sparc_cpu == PROCESSOR_NIAGARA 1555 || sparc_cpu == PROCESSOR_NIAGARA2 1556 || sparc_cpu == PROCESSOR_NIAGARA3 1557 || sparc_cpu == PROCESSOR_NIAGARA4) 1558 ? 2 1559 : (sparc_cpu == PROCESSOR_ULTRASPARC3 1560 ? 8 : 3)), 1561 global_options.x_param_values, 1562 global_options_set.x_param_values); 1563 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 1564 ((sparc_cpu == PROCESSOR_ULTRASPARC 1565 || sparc_cpu == PROCESSOR_ULTRASPARC3 1566 || sparc_cpu == PROCESSOR_NIAGARA 1567 || sparc_cpu == PROCESSOR_NIAGARA2 1568 || sparc_cpu == PROCESSOR_NIAGARA3 1569 || sparc_cpu == PROCESSOR_NIAGARA4) 1570 ? 64 : 32), 1571 global_options.x_param_values, 1572 global_options_set.x_param_values); 1573 1574 /* Disable save slot sharing for call-clobbered registers by default. 1575 The IRA sharing algorithm works on single registers only and this 1576 pessimizes for double floating-point registers. */ 1577 if (!global_options_set.x_flag_ira_share_save_slots) 1578 flag_ira_share_save_slots = 0; 1579 1580 /* We register a machine specific pass to work around errata, if any. 1581 The pass mut be scheduled as late as possible so that we have the 1582 (essentially) final form of the insn stream to work on. 1583 Registering the pass must be done at start up. It's convenient to 1584 do it here. */ 1585 register_pass (&insert_pass_work_around_errata); 1586 } 1587 1588 /* Miscellaneous utilities. */ 1589 1590 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move 1591 or branch on register contents instructions. */ 1592 1593 int 1594 v9_regcmp_p (enum rtx_code code) 1595 { 1596 return (code == EQ || code == NE || code == GE || code == LT 1597 || code == LE || code == GT); 1598 } 1599 1600 /* Nonzero if OP is a floating point constant which can 1601 be loaded into an integer register using a single 1602 sethi instruction. */ 1603 1604 int 1605 fp_sethi_p (rtx op) 1606 { 1607 if (GET_CODE (op) == CONST_DOUBLE) 1608 { 1609 REAL_VALUE_TYPE r; 1610 long i; 1611 1612 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1613 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1614 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); 1615 } 1616 1617 return 0; 1618 } 1619 1620 /* Nonzero if OP is a floating point constant which can 1621 be loaded into an integer register using a single 1622 mov instruction. */ 1623 1624 int 1625 fp_mov_p (rtx op) 1626 { 1627 if (GET_CODE (op) == CONST_DOUBLE) 1628 { 1629 REAL_VALUE_TYPE r; 1630 long i; 1631 1632 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1633 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1634 return SPARC_SIMM13_P (i); 1635 } 1636 1637 return 0; 1638 } 1639 1640 /* Nonzero if OP is a floating point constant which can 1641 be loaded into an integer register using a high/losum 1642 instruction sequence. */ 1643 1644 int 1645 fp_high_losum_p (rtx op) 1646 { 1647 /* The constraints calling this should only be in 1648 SFmode move insns, so any constant which cannot 1649 be moved using a single insn will do. */ 1650 if (GET_CODE (op) == CONST_DOUBLE) 1651 { 1652 REAL_VALUE_TYPE r; 1653 long i; 1654 1655 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1656 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1657 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); 1658 } 1659 1660 return 0; 1661 } 1662 1663 /* Return true if the address of LABEL can be loaded by means of the 1664 mov{si,di}_pic_label_ref patterns in PIC mode. */ 1665 1666 static bool 1667 can_use_mov_pic_label_ref (rtx label) 1668 { 1669 /* VxWorks does not impose a fixed gap between segments; the run-time 1670 gap can be different from the object-file gap. We therefore can't 1671 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we 1672 are absolutely sure that X is in the same segment as the GOT. 1673 Unfortunately, the flexibility of linker scripts means that we 1674 can't be sure of that in general, so assume that GOT-relative 1675 accesses are never valid on VxWorks. */ 1676 if (TARGET_VXWORKS_RTP) 1677 return false; 1678 1679 /* Similarly, if the label is non-local, it might end up being placed 1680 in a different section than the current one; now mov_pic_label_ref 1681 requires the label and the code to be in the same section. */ 1682 if (LABEL_REF_NONLOCAL_P (label)) 1683 return false; 1684 1685 /* Finally, if we are reordering basic blocks and partition into hot 1686 and cold sections, this might happen for any label. */ 1687 if (flag_reorder_blocks_and_partition) 1688 return false; 1689 1690 return true; 1691 } 1692 1693 /* Expand a move instruction. Return true if all work is done. */ 1694 1695 bool 1696 sparc_expand_move (enum machine_mode mode, rtx *operands) 1697 { 1698 /* Handle sets of MEM first. */ 1699 if (GET_CODE (operands[0]) == MEM) 1700 { 1701 /* 0 is a register (or a pair of registers) on SPARC. */ 1702 if (register_or_zero_operand (operands[1], mode)) 1703 return false; 1704 1705 if (!reload_in_progress) 1706 { 1707 operands[0] = validize_mem (operands[0]); 1708 operands[1] = force_reg (mode, operands[1]); 1709 } 1710 } 1711 1712 /* Fixup TLS cases. */ 1713 if (TARGET_HAVE_TLS 1714 && CONSTANT_P (operands[1]) 1715 && sparc_tls_referenced_p (operands [1])) 1716 { 1717 operands[1] = sparc_legitimize_tls_address (operands[1]); 1718 return false; 1719 } 1720 1721 /* Fixup PIC cases. */ 1722 if (flag_pic && CONSTANT_P (operands[1])) 1723 { 1724 if (pic_address_needs_scratch (operands[1])) 1725 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); 1726 1727 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ 1728 if (GET_CODE (operands[1]) == LABEL_REF 1729 && can_use_mov_pic_label_ref (operands[1])) 1730 { 1731 if (mode == SImode) 1732 { 1733 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); 1734 return true; 1735 } 1736 1737 if (mode == DImode) 1738 { 1739 gcc_assert (TARGET_ARCH64); 1740 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); 1741 return true; 1742 } 1743 } 1744 1745 if (symbolic_operand (operands[1], mode)) 1746 { 1747 operands[1] 1748 = sparc_legitimize_pic_address (operands[1], 1749 reload_in_progress 1750 ? operands[0] : NULL_RTX); 1751 return false; 1752 } 1753 } 1754 1755 /* If we are trying to toss an integer constant into FP registers, 1756 or loading a FP or vector constant, force it into memory. */ 1757 if (CONSTANT_P (operands[1]) 1758 && REG_P (operands[0]) 1759 && (SPARC_FP_REG_P (REGNO (operands[0])) 1760 || SCALAR_FLOAT_MODE_P (mode) 1761 || VECTOR_MODE_P (mode))) 1762 { 1763 /* emit_group_store will send such bogosity to us when it is 1764 not storing directly into memory. So fix this up to avoid 1765 crashes in output_constant_pool. */ 1766 if (operands [1] == const0_rtx) 1767 operands[1] = CONST0_RTX (mode); 1768 1769 /* We can clear or set to all-ones FP registers if TARGET_VIS, and 1770 always other regs. */ 1771 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) 1772 && (const_zero_operand (operands[1], mode) 1773 || const_all_ones_operand (operands[1], mode))) 1774 return false; 1775 1776 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG 1777 /* We are able to build any SF constant in integer registers 1778 with at most 2 instructions. */ 1779 && (mode == SFmode 1780 /* And any DF constant in integer registers. */ 1781 || (mode == DFmode 1782 && ! can_create_pseudo_p ()))) 1783 return false; 1784 1785 operands[1] = force_const_mem (mode, operands[1]); 1786 if (!reload_in_progress) 1787 operands[1] = validize_mem (operands[1]); 1788 return false; 1789 } 1790 1791 /* Accept non-constants and valid constants unmodified. */ 1792 if (!CONSTANT_P (operands[1]) 1793 || GET_CODE (operands[1]) == HIGH 1794 || input_operand (operands[1], mode)) 1795 return false; 1796 1797 switch (mode) 1798 { 1799 case QImode: 1800 /* All QImode constants require only one insn, so proceed. */ 1801 break; 1802 1803 case HImode: 1804 case SImode: 1805 sparc_emit_set_const32 (operands[0], operands[1]); 1806 return true; 1807 1808 case DImode: 1809 /* input_operand should have filtered out 32-bit mode. */ 1810 sparc_emit_set_const64 (operands[0], operands[1]); 1811 return true; 1812 1813 case TImode: 1814 { 1815 rtx high, low; 1816 /* TImode isn't available in 32-bit mode. */ 1817 split_double (operands[1], &high, &low); 1818 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), 1819 high)); 1820 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), 1821 low)); 1822 } 1823 return true; 1824 1825 default: 1826 gcc_unreachable (); 1827 } 1828 1829 return false; 1830 } 1831 1832 /* Load OP1, a 32-bit constant, into OP0, a register. 1833 We know it can't be done in one insn when we get 1834 here, the move expander guarantees this. */ 1835 1836 static void 1837 sparc_emit_set_const32 (rtx op0, rtx op1) 1838 { 1839 enum machine_mode mode = GET_MODE (op0); 1840 rtx temp = op0; 1841 1842 if (can_create_pseudo_p ()) 1843 temp = gen_reg_rtx (mode); 1844 1845 if (GET_CODE (op1) == CONST_INT) 1846 { 1847 gcc_assert (!small_int_operand (op1, mode) 1848 && !const_high_operand (op1, mode)); 1849 1850 /* Emit them as real moves instead of a HIGH/LO_SUM, 1851 this way CSE can see everything and reuse intermediate 1852 values if it wants. */ 1853 emit_insn (gen_rtx_SET (VOIDmode, temp, 1854 GEN_INT (INTVAL (op1) 1855 & ~(HOST_WIDE_INT)0x3ff))); 1856 1857 emit_insn (gen_rtx_SET (VOIDmode, 1858 op0, 1859 gen_rtx_IOR (mode, temp, 1860 GEN_INT (INTVAL (op1) & 0x3ff)))); 1861 } 1862 else 1863 { 1864 /* A symbol, emit in the traditional way. */ 1865 emit_insn (gen_rtx_SET (VOIDmode, temp, 1866 gen_rtx_HIGH (mode, op1))); 1867 emit_insn (gen_rtx_SET (VOIDmode, 1868 op0, gen_rtx_LO_SUM (mode, temp, op1))); 1869 } 1870 } 1871 1872 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. 1873 If TEMP is nonzero, we are forbidden to use any other scratch 1874 registers. Otherwise, we are allowed to generate them as needed. 1875 1876 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY 1877 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ 1878 1879 void 1880 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) 1881 { 1882 rtx temp1, temp2, temp3, temp4, temp5; 1883 rtx ti_temp = 0; 1884 1885 if (temp && GET_MODE (temp) == TImode) 1886 { 1887 ti_temp = temp; 1888 temp = gen_rtx_REG (DImode, REGNO (temp)); 1889 } 1890 1891 /* SPARC-V9 code-model support. */ 1892 switch (sparc_cmodel) 1893 { 1894 case CM_MEDLOW: 1895 /* The range spanned by all instructions in the object is less 1896 than 2^31 bytes (2GB) and the distance from any instruction 1897 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 1898 than 2^31 bytes (2GB). 1899 1900 The executable must be in the low 4TB of the virtual address 1901 space. 1902 1903 sethi %hi(symbol), %temp1 1904 or %temp1, %lo(symbol), %reg */ 1905 if (temp) 1906 temp1 = temp; /* op0 is allowed. */ 1907 else 1908 temp1 = gen_reg_rtx (DImode); 1909 1910 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1))); 1911 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1))); 1912 break; 1913 1914 case CM_MEDMID: 1915 /* The range spanned by all instructions in the object is less 1916 than 2^31 bytes (2GB) and the distance from any instruction 1917 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 1918 than 2^31 bytes (2GB). 1919 1920 The executable must be in the low 16TB of the virtual address 1921 space. 1922 1923 sethi %h44(symbol), %temp1 1924 or %temp1, %m44(symbol), %temp2 1925 sllx %temp2, 12, %temp3 1926 or %temp3, %l44(symbol), %reg */ 1927 if (temp) 1928 { 1929 temp1 = op0; 1930 temp2 = op0; 1931 temp3 = temp; /* op0 is allowed. */ 1932 } 1933 else 1934 { 1935 temp1 = gen_reg_rtx (DImode); 1936 temp2 = gen_reg_rtx (DImode); 1937 temp3 = gen_reg_rtx (DImode); 1938 } 1939 1940 emit_insn (gen_seth44 (temp1, op1)); 1941 emit_insn (gen_setm44 (temp2, temp1, op1)); 1942 emit_insn (gen_rtx_SET (VOIDmode, temp3, 1943 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); 1944 emit_insn (gen_setl44 (op0, temp3, op1)); 1945 break; 1946 1947 case CM_MEDANY: 1948 /* The range spanned by all instructions in the object is less 1949 than 2^31 bytes (2GB) and the distance from any instruction 1950 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 1951 than 2^31 bytes (2GB). 1952 1953 The executable can be placed anywhere in the virtual address 1954 space. 1955 1956 sethi %hh(symbol), %temp1 1957 sethi %lm(symbol), %temp2 1958 or %temp1, %hm(symbol), %temp3 1959 sllx %temp3, 32, %temp4 1960 or %temp4, %temp2, %temp5 1961 or %temp5, %lo(symbol), %reg */ 1962 if (temp) 1963 { 1964 /* It is possible that one of the registers we got for operands[2] 1965 might coincide with that of operands[0] (which is why we made 1966 it TImode). Pick the other one to use as our scratch. */ 1967 if (rtx_equal_p (temp, op0)) 1968 { 1969 gcc_assert (ti_temp); 1970 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 1971 } 1972 temp1 = op0; 1973 temp2 = temp; /* op0 is _not_ allowed, see above. */ 1974 temp3 = op0; 1975 temp4 = op0; 1976 temp5 = op0; 1977 } 1978 else 1979 { 1980 temp1 = gen_reg_rtx (DImode); 1981 temp2 = gen_reg_rtx (DImode); 1982 temp3 = gen_reg_rtx (DImode); 1983 temp4 = gen_reg_rtx (DImode); 1984 temp5 = gen_reg_rtx (DImode); 1985 } 1986 1987 emit_insn (gen_sethh (temp1, op1)); 1988 emit_insn (gen_setlm (temp2, op1)); 1989 emit_insn (gen_sethm (temp3, temp1, op1)); 1990 emit_insn (gen_rtx_SET (VOIDmode, temp4, 1991 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 1992 emit_insn (gen_rtx_SET (VOIDmode, temp5, 1993 gen_rtx_PLUS (DImode, temp4, temp2))); 1994 emit_insn (gen_setlo (op0, temp5, op1)); 1995 break; 1996 1997 case CM_EMBMEDANY: 1998 /* Old old old backwards compatibility kruft here. 1999 Essentially it is MEDLOW with a fixed 64-bit 2000 virtual base added to all data segment addresses. 2001 Text-segment stuff is computed like MEDANY, we can't 2002 reuse the code above because the relocation knobs 2003 look different. 2004 2005 Data segment: sethi %hi(symbol), %temp1 2006 add %temp1, EMBMEDANY_BASE_REG, %temp2 2007 or %temp2, %lo(symbol), %reg */ 2008 if (data_segment_operand (op1, GET_MODE (op1))) 2009 { 2010 if (temp) 2011 { 2012 temp1 = temp; /* op0 is allowed. */ 2013 temp2 = op0; 2014 } 2015 else 2016 { 2017 temp1 = gen_reg_rtx (DImode); 2018 temp2 = gen_reg_rtx (DImode); 2019 } 2020 2021 emit_insn (gen_embmedany_sethi (temp1, op1)); 2022 emit_insn (gen_embmedany_brsum (temp2, temp1)); 2023 emit_insn (gen_embmedany_losum (op0, temp2, op1)); 2024 } 2025 2026 /* Text segment: sethi %uhi(symbol), %temp1 2027 sethi %hi(symbol), %temp2 2028 or %temp1, %ulo(symbol), %temp3 2029 sllx %temp3, 32, %temp4 2030 or %temp4, %temp2, %temp5 2031 or %temp5, %lo(symbol), %reg */ 2032 else 2033 { 2034 if (temp) 2035 { 2036 /* It is possible that one of the registers we got for operands[2] 2037 might coincide with that of operands[0] (which is why we made 2038 it TImode). Pick the other one to use as our scratch. */ 2039 if (rtx_equal_p (temp, op0)) 2040 { 2041 gcc_assert (ti_temp); 2042 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2043 } 2044 temp1 = op0; 2045 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2046 temp3 = op0; 2047 temp4 = op0; 2048 temp5 = op0; 2049 } 2050 else 2051 { 2052 temp1 = gen_reg_rtx (DImode); 2053 temp2 = gen_reg_rtx (DImode); 2054 temp3 = gen_reg_rtx (DImode); 2055 temp4 = gen_reg_rtx (DImode); 2056 temp5 = gen_reg_rtx (DImode); 2057 } 2058 2059 emit_insn (gen_embmedany_textuhi (temp1, op1)); 2060 emit_insn (gen_embmedany_texthi (temp2, op1)); 2061 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); 2062 emit_insn (gen_rtx_SET (VOIDmode, temp4, 2063 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2064 emit_insn (gen_rtx_SET (VOIDmode, temp5, 2065 gen_rtx_PLUS (DImode, temp4, temp2))); 2066 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); 2067 } 2068 break; 2069 2070 default: 2071 gcc_unreachable (); 2072 } 2073 } 2074 2075 #if HOST_BITS_PER_WIDE_INT == 32 2076 static void 2077 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED) 2078 { 2079 gcc_unreachable (); 2080 } 2081 #else 2082 /* These avoid problems when cross compiling. If we do not 2083 go through all this hair then the optimizer will see 2084 invalid REG_EQUAL notes or in some cases none at all. */ 2085 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); 2086 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); 2087 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); 2088 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); 2089 2090 /* The optimizer is not to assume anything about exactly 2091 which bits are set for a HIGH, they are unspecified. 2092 Unfortunately this leads to many missed optimizations 2093 during CSE. We mask out the non-HIGH bits, and matches 2094 a plain movdi, to alleviate this problem. */ 2095 static rtx 2096 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) 2097 { 2098 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); 2099 } 2100 2101 static rtx 2102 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) 2103 { 2104 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val)); 2105 } 2106 2107 static rtx 2108 gen_safe_OR64 (rtx src, HOST_WIDE_INT val) 2109 { 2110 return gen_rtx_IOR (DImode, src, GEN_INT (val)); 2111 } 2112 2113 static rtx 2114 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) 2115 { 2116 return gen_rtx_XOR (DImode, src, GEN_INT (val)); 2117 } 2118 2119 /* Worker routines for 64-bit constant formation on arch64. 2120 One of the key things to be doing in these emissions is 2121 to create as many temp REGs as possible. This makes it 2122 possible for half-built constants to be used later when 2123 such values are similar to something required later on. 2124 Without doing this, the optimizer cannot see such 2125 opportunities. */ 2126 2127 static void sparc_emit_set_const64_quick1 (rtx, rtx, 2128 unsigned HOST_WIDE_INT, int); 2129 2130 static void 2131 sparc_emit_set_const64_quick1 (rtx op0, rtx temp, 2132 unsigned HOST_WIDE_INT low_bits, int is_neg) 2133 { 2134 unsigned HOST_WIDE_INT high_bits; 2135 2136 if (is_neg) 2137 high_bits = (~low_bits) & 0xffffffff; 2138 else 2139 high_bits = low_bits; 2140 2141 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2142 if (!is_neg) 2143 { 2144 emit_insn (gen_rtx_SET (VOIDmode, op0, 2145 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2146 } 2147 else 2148 { 2149 /* If we are XOR'ing with -1, then we should emit a one's complement 2150 instead. This way the combiner will notice logical operations 2151 such as ANDN later on and substitute. */ 2152 if ((low_bits & 0x3ff) == 0x3ff) 2153 { 2154 emit_insn (gen_rtx_SET (VOIDmode, op0, 2155 gen_rtx_NOT (DImode, temp))); 2156 } 2157 else 2158 { 2159 emit_insn (gen_rtx_SET (VOIDmode, op0, 2160 gen_safe_XOR64 (temp, 2161 (-(HOST_WIDE_INT)0x400 2162 | (low_bits & 0x3ff))))); 2163 } 2164 } 2165 } 2166 2167 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, 2168 unsigned HOST_WIDE_INT, int); 2169 2170 static void 2171 sparc_emit_set_const64_quick2 (rtx op0, rtx temp, 2172 unsigned HOST_WIDE_INT high_bits, 2173 unsigned HOST_WIDE_INT low_immediate, 2174 int shift_count) 2175 { 2176 rtx temp2 = op0; 2177 2178 if ((high_bits & 0xfffffc00) != 0) 2179 { 2180 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2181 if ((high_bits & ~0xfffffc00) != 0) 2182 emit_insn (gen_rtx_SET (VOIDmode, op0, 2183 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2184 else 2185 temp2 = temp; 2186 } 2187 else 2188 { 2189 emit_insn (gen_safe_SET64 (temp, high_bits)); 2190 temp2 = temp; 2191 } 2192 2193 /* Now shift it up into place. */ 2194 emit_insn (gen_rtx_SET (VOIDmode, op0, 2195 gen_rtx_ASHIFT (DImode, temp2, 2196 GEN_INT (shift_count)))); 2197 2198 /* If there is a low immediate part piece, finish up by 2199 putting that in as well. */ 2200 if (low_immediate != 0) 2201 emit_insn (gen_rtx_SET (VOIDmode, op0, 2202 gen_safe_OR64 (op0, low_immediate))); 2203 } 2204 2205 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, 2206 unsigned HOST_WIDE_INT); 2207 2208 /* Full 64-bit constant decomposition. Even though this is the 2209 'worst' case, we still optimize a few things away. */ 2210 static void 2211 sparc_emit_set_const64_longway (rtx op0, rtx temp, 2212 unsigned HOST_WIDE_INT high_bits, 2213 unsigned HOST_WIDE_INT low_bits) 2214 { 2215 rtx sub_temp = op0; 2216 2217 if (can_create_pseudo_p ()) 2218 sub_temp = gen_reg_rtx (DImode); 2219 2220 if ((high_bits & 0xfffffc00) != 0) 2221 { 2222 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2223 if ((high_bits & ~0xfffffc00) != 0) 2224 emit_insn (gen_rtx_SET (VOIDmode, 2225 sub_temp, 2226 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2227 else 2228 sub_temp = temp; 2229 } 2230 else 2231 { 2232 emit_insn (gen_safe_SET64 (temp, high_bits)); 2233 sub_temp = temp; 2234 } 2235 2236 if (can_create_pseudo_p ()) 2237 { 2238 rtx temp2 = gen_reg_rtx (DImode); 2239 rtx temp3 = gen_reg_rtx (DImode); 2240 rtx temp4 = gen_reg_rtx (DImode); 2241 2242 emit_insn (gen_rtx_SET (VOIDmode, temp4, 2243 gen_rtx_ASHIFT (DImode, sub_temp, 2244 GEN_INT (32)))); 2245 2246 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); 2247 if ((low_bits & ~0xfffffc00) != 0) 2248 { 2249 emit_insn (gen_rtx_SET (VOIDmode, temp3, 2250 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); 2251 emit_insn (gen_rtx_SET (VOIDmode, op0, 2252 gen_rtx_PLUS (DImode, temp4, temp3))); 2253 } 2254 else 2255 { 2256 emit_insn (gen_rtx_SET (VOIDmode, op0, 2257 gen_rtx_PLUS (DImode, temp4, temp2))); 2258 } 2259 } 2260 else 2261 { 2262 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); 2263 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); 2264 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); 2265 int to_shift = 12; 2266 2267 /* We are in the middle of reload, so this is really 2268 painful. However we do still make an attempt to 2269 avoid emitting truly stupid code. */ 2270 if (low1 != const0_rtx) 2271 { 2272 emit_insn (gen_rtx_SET (VOIDmode, op0, 2273 gen_rtx_ASHIFT (DImode, sub_temp, 2274 GEN_INT (to_shift)))); 2275 emit_insn (gen_rtx_SET (VOIDmode, op0, 2276 gen_rtx_IOR (DImode, op0, low1))); 2277 sub_temp = op0; 2278 to_shift = 12; 2279 } 2280 else 2281 { 2282 to_shift += 12; 2283 } 2284 if (low2 != const0_rtx) 2285 { 2286 emit_insn (gen_rtx_SET (VOIDmode, op0, 2287 gen_rtx_ASHIFT (DImode, sub_temp, 2288 GEN_INT (to_shift)))); 2289 emit_insn (gen_rtx_SET (VOIDmode, op0, 2290 gen_rtx_IOR (DImode, op0, low2))); 2291 sub_temp = op0; 2292 to_shift = 8; 2293 } 2294 else 2295 { 2296 to_shift += 8; 2297 } 2298 emit_insn (gen_rtx_SET (VOIDmode, op0, 2299 gen_rtx_ASHIFT (DImode, sub_temp, 2300 GEN_INT (to_shift)))); 2301 if (low3 != const0_rtx) 2302 emit_insn (gen_rtx_SET (VOIDmode, op0, 2303 gen_rtx_IOR (DImode, op0, low3))); 2304 /* phew... */ 2305 } 2306 } 2307 2308 /* Analyze a 64-bit constant for certain properties. */ 2309 static void analyze_64bit_constant (unsigned HOST_WIDE_INT, 2310 unsigned HOST_WIDE_INT, 2311 int *, int *, int *); 2312 2313 static void 2314 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, 2315 unsigned HOST_WIDE_INT low_bits, 2316 int *hbsp, int *lbsp, int *abbasp) 2317 { 2318 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 2319 int i; 2320 2321 lowest_bit_set = highest_bit_set = -1; 2322 i = 0; 2323 do 2324 { 2325 if ((lowest_bit_set == -1) 2326 && ((low_bits >> i) & 1)) 2327 lowest_bit_set = i; 2328 if ((highest_bit_set == -1) 2329 && ((high_bits >> (32 - i - 1)) & 1)) 2330 highest_bit_set = (64 - i - 1); 2331 } 2332 while (++i < 32 2333 && ((highest_bit_set == -1) 2334 || (lowest_bit_set == -1))); 2335 if (i == 32) 2336 { 2337 i = 0; 2338 do 2339 { 2340 if ((lowest_bit_set == -1) 2341 && ((high_bits >> i) & 1)) 2342 lowest_bit_set = i + 32; 2343 if ((highest_bit_set == -1) 2344 && ((low_bits >> (32 - i - 1)) & 1)) 2345 highest_bit_set = 32 - i - 1; 2346 } 2347 while (++i < 32 2348 && ((highest_bit_set == -1) 2349 || (lowest_bit_set == -1))); 2350 } 2351 /* If there are no bits set this should have gone out 2352 as one instruction! */ 2353 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); 2354 all_bits_between_are_set = 1; 2355 for (i = lowest_bit_set; i <= highest_bit_set; i++) 2356 { 2357 if (i < 32) 2358 { 2359 if ((low_bits & (1 << i)) != 0) 2360 continue; 2361 } 2362 else 2363 { 2364 if ((high_bits & (1 << (i - 32))) != 0) 2365 continue; 2366 } 2367 all_bits_between_are_set = 0; 2368 break; 2369 } 2370 *hbsp = highest_bit_set; 2371 *lbsp = lowest_bit_set; 2372 *abbasp = all_bits_between_are_set; 2373 } 2374 2375 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); 2376 2377 static int 2378 const64_is_2insns (unsigned HOST_WIDE_INT high_bits, 2379 unsigned HOST_WIDE_INT low_bits) 2380 { 2381 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 2382 2383 if (high_bits == 0 2384 || high_bits == 0xffffffff) 2385 return 1; 2386 2387 analyze_64bit_constant (high_bits, low_bits, 2388 &highest_bit_set, &lowest_bit_set, 2389 &all_bits_between_are_set); 2390 2391 if ((highest_bit_set == 63 2392 || lowest_bit_set == 0) 2393 && all_bits_between_are_set != 0) 2394 return 1; 2395 2396 if ((highest_bit_set - lowest_bit_set) < 21) 2397 return 1; 2398 2399 return 0; 2400 } 2401 2402 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, 2403 unsigned HOST_WIDE_INT, 2404 int, int); 2405 2406 static unsigned HOST_WIDE_INT 2407 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, 2408 unsigned HOST_WIDE_INT low_bits, 2409 int lowest_bit_set, int shift) 2410 { 2411 HOST_WIDE_INT hi, lo; 2412 2413 if (lowest_bit_set < 32) 2414 { 2415 lo = (low_bits >> lowest_bit_set) << shift; 2416 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 2417 } 2418 else 2419 { 2420 lo = 0; 2421 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 2422 } 2423 gcc_assert (! (hi & lo)); 2424 return (hi | lo); 2425 } 2426 2427 /* Here we are sure to be arch64 and this is an integer constant 2428 being loaded into a register. Emit the most efficient 2429 insn sequence possible. Detection of all the 1-insn cases 2430 has been done already. */ 2431 static void 2432 sparc_emit_set_const64 (rtx op0, rtx op1) 2433 { 2434 unsigned HOST_WIDE_INT high_bits, low_bits; 2435 int lowest_bit_set, highest_bit_set; 2436 int all_bits_between_are_set; 2437 rtx temp = 0; 2438 2439 /* Sanity check that we know what we are working with. */ 2440 gcc_assert (TARGET_ARCH64 2441 && (GET_CODE (op0) == SUBREG 2442 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); 2443 2444 if (! can_create_pseudo_p ()) 2445 temp = op0; 2446 2447 if (GET_CODE (op1) != CONST_INT) 2448 { 2449 sparc_emit_set_symbolic_const64 (op0, op1, temp); 2450 return; 2451 } 2452 2453 if (! temp) 2454 temp = gen_reg_rtx (DImode); 2455 2456 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); 2457 low_bits = (INTVAL (op1) & 0xffffffff); 2458 2459 /* low_bits bits 0 --> 31 2460 high_bits bits 32 --> 63 */ 2461 2462 analyze_64bit_constant (high_bits, low_bits, 2463 &highest_bit_set, &lowest_bit_set, 2464 &all_bits_between_are_set); 2465 2466 /* First try for a 2-insn sequence. */ 2467 2468 /* These situations are preferred because the optimizer can 2469 * do more things with them: 2470 * 1) mov -1, %reg 2471 * sllx %reg, shift, %reg 2472 * 2) mov -1, %reg 2473 * srlx %reg, shift, %reg 2474 * 3) mov some_small_const, %reg 2475 * sllx %reg, shift, %reg 2476 */ 2477 if (((highest_bit_set == 63 2478 || lowest_bit_set == 0) 2479 && all_bits_between_are_set != 0) 2480 || ((highest_bit_set - lowest_bit_set) < 12)) 2481 { 2482 HOST_WIDE_INT the_const = -1; 2483 int shift = lowest_bit_set; 2484 2485 if ((highest_bit_set != 63 2486 && lowest_bit_set != 0) 2487 || all_bits_between_are_set == 0) 2488 { 2489 the_const = 2490 create_simple_focus_bits (high_bits, low_bits, 2491 lowest_bit_set, 0); 2492 } 2493 else if (lowest_bit_set == 0) 2494 shift = -(63 - highest_bit_set); 2495 2496 gcc_assert (SPARC_SIMM13_P (the_const)); 2497 gcc_assert (shift != 0); 2498 2499 emit_insn (gen_safe_SET64 (temp, the_const)); 2500 if (shift > 0) 2501 emit_insn (gen_rtx_SET (VOIDmode, 2502 op0, 2503 gen_rtx_ASHIFT (DImode, 2504 temp, 2505 GEN_INT (shift)))); 2506 else if (shift < 0) 2507 emit_insn (gen_rtx_SET (VOIDmode, 2508 op0, 2509 gen_rtx_LSHIFTRT (DImode, 2510 temp, 2511 GEN_INT (-shift)))); 2512 return; 2513 } 2514 2515 /* Now a range of 22 or less bits set somewhere. 2516 * 1) sethi %hi(focus_bits), %reg 2517 * sllx %reg, shift, %reg 2518 * 2) sethi %hi(focus_bits), %reg 2519 * srlx %reg, shift, %reg 2520 */ 2521 if ((highest_bit_set - lowest_bit_set) < 21) 2522 { 2523 unsigned HOST_WIDE_INT focus_bits = 2524 create_simple_focus_bits (high_bits, low_bits, 2525 lowest_bit_set, 10); 2526 2527 gcc_assert (SPARC_SETHI_P (focus_bits)); 2528 gcc_assert (lowest_bit_set != 10); 2529 2530 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); 2531 2532 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ 2533 if (lowest_bit_set < 10) 2534 emit_insn (gen_rtx_SET (VOIDmode, 2535 op0, 2536 gen_rtx_LSHIFTRT (DImode, temp, 2537 GEN_INT (10 - lowest_bit_set)))); 2538 else if (lowest_bit_set > 10) 2539 emit_insn (gen_rtx_SET (VOIDmode, 2540 op0, 2541 gen_rtx_ASHIFT (DImode, temp, 2542 GEN_INT (lowest_bit_set - 10)))); 2543 return; 2544 } 2545 2546 /* 1) sethi %hi(low_bits), %reg 2547 * or %reg, %lo(low_bits), %reg 2548 * 2) sethi %hi(~low_bits), %reg 2549 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg 2550 */ 2551 if (high_bits == 0 2552 || high_bits == 0xffffffff) 2553 { 2554 sparc_emit_set_const64_quick1 (op0, temp, low_bits, 2555 (high_bits == 0xffffffff)); 2556 return; 2557 } 2558 2559 /* Now, try 3-insn sequences. */ 2560 2561 /* 1) sethi %hi(high_bits), %reg 2562 * or %reg, %lo(high_bits), %reg 2563 * sllx %reg, 32, %reg 2564 */ 2565 if (low_bits == 0) 2566 { 2567 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); 2568 return; 2569 } 2570 2571 /* We may be able to do something quick 2572 when the constant is negated, so try that. */ 2573 if (const64_is_2insns ((~high_bits) & 0xffffffff, 2574 (~low_bits) & 0xfffffc00)) 2575 { 2576 /* NOTE: The trailing bits get XOR'd so we need the 2577 non-negated bits, not the negated ones. */ 2578 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; 2579 2580 if ((((~high_bits) & 0xffffffff) == 0 2581 && ((~low_bits) & 0x80000000) == 0) 2582 || (((~high_bits) & 0xffffffff) == 0xffffffff 2583 && ((~low_bits) & 0x80000000) != 0)) 2584 { 2585 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); 2586 2587 if ((SPARC_SETHI_P (fast_int) 2588 && (~high_bits & 0xffffffff) == 0) 2589 || SPARC_SIMM13_P (fast_int)) 2590 emit_insn (gen_safe_SET64 (temp, fast_int)); 2591 else 2592 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); 2593 } 2594 else 2595 { 2596 rtx negated_const; 2597 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | 2598 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); 2599 sparc_emit_set_const64 (temp, negated_const); 2600 } 2601 2602 /* If we are XOR'ing with -1, then we should emit a one's complement 2603 instead. This way the combiner will notice logical operations 2604 such as ANDN later on and substitute. */ 2605 if (trailing_bits == 0x3ff) 2606 { 2607 emit_insn (gen_rtx_SET (VOIDmode, op0, 2608 gen_rtx_NOT (DImode, temp))); 2609 } 2610 else 2611 { 2612 emit_insn (gen_rtx_SET (VOIDmode, 2613 op0, 2614 gen_safe_XOR64 (temp, 2615 (-0x400 | trailing_bits)))); 2616 } 2617 return; 2618 } 2619 2620 /* 1) sethi %hi(xxx), %reg 2621 * or %reg, %lo(xxx), %reg 2622 * sllx %reg, yyy, %reg 2623 * 2624 * ??? This is just a generalized version of the low_bits==0 2625 * thing above, FIXME... 2626 */ 2627 if ((highest_bit_set - lowest_bit_set) < 32) 2628 { 2629 unsigned HOST_WIDE_INT focus_bits = 2630 create_simple_focus_bits (high_bits, low_bits, 2631 lowest_bit_set, 0); 2632 2633 /* We can't get here in this state. */ 2634 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); 2635 2636 /* So what we know is that the set bits straddle the 2637 middle of the 64-bit word. */ 2638 sparc_emit_set_const64_quick2 (op0, temp, 2639 focus_bits, 0, 2640 lowest_bit_set); 2641 return; 2642 } 2643 2644 /* 1) sethi %hi(high_bits), %reg 2645 * or %reg, %lo(high_bits), %reg 2646 * sllx %reg, 32, %reg 2647 * or %reg, low_bits, %reg 2648 */ 2649 if (SPARC_SIMM13_P(low_bits) 2650 && ((int)low_bits > 0)) 2651 { 2652 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); 2653 return; 2654 } 2655 2656 /* The easiest way when all else fails, is full decomposition. */ 2657 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); 2658 } 2659 #endif /* HOST_BITS_PER_WIDE_INT == 32 */ 2660 2661 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 2662 return the mode to be used for the comparison. For floating-point, 2663 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand 2664 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special 2665 processing is needed. */ 2666 2667 enum machine_mode 2668 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED) 2669 { 2670 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2671 { 2672 switch (op) 2673 { 2674 case EQ: 2675 case NE: 2676 case UNORDERED: 2677 case ORDERED: 2678 case UNLT: 2679 case UNLE: 2680 case UNGT: 2681 case UNGE: 2682 case UNEQ: 2683 case LTGT: 2684 return CCFPmode; 2685 2686 case LT: 2687 case LE: 2688 case GT: 2689 case GE: 2690 return CCFPEmode; 2691 2692 default: 2693 gcc_unreachable (); 2694 } 2695 } 2696 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 2697 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) 2698 { 2699 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 2700 return CCX_NOOVmode; 2701 else 2702 return CC_NOOVmode; 2703 } 2704 else 2705 { 2706 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 2707 return CCXmode; 2708 else 2709 return CCmode; 2710 } 2711 } 2712 2713 /* Emit the compare insn and return the CC reg for a CODE comparison 2714 with operands X and Y. */ 2715 2716 static rtx 2717 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) 2718 { 2719 enum machine_mode mode; 2720 rtx cc_reg; 2721 2722 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) 2723 return x; 2724 2725 mode = SELECT_CC_MODE (code, x, y); 2726 2727 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the 2728 fcc regs (cse can't tell they're really call clobbered regs and will 2729 remove a duplicate comparison even if there is an intervening function 2730 call - it will then try to reload the cc reg via an int reg which is why 2731 we need the movcc patterns). It is possible to provide the movcc 2732 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two 2733 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be 2734 to tell cse that CCFPE mode registers (even pseudos) are call 2735 clobbered. */ 2736 2737 /* ??? This is an experiment. Rather than making changes to cse which may 2738 or may not be easy/clean, we do our own cse. This is possible because 2739 we will generate hard registers. Cse knows they're call clobbered (it 2740 doesn't know the same thing about pseudos). If we guess wrong, no big 2741 deal, but if we win, great! */ 2742 2743 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2744 #if 1 /* experiment */ 2745 { 2746 int reg; 2747 /* We cycle through the registers to ensure they're all exercised. */ 2748 static int next_fcc_reg = 0; 2749 /* Previous x,y for each fcc reg. */ 2750 static rtx prev_args[4][2]; 2751 2752 /* Scan prev_args for x,y. */ 2753 for (reg = 0; reg < 4; reg++) 2754 if (prev_args[reg][0] == x && prev_args[reg][1] == y) 2755 break; 2756 if (reg == 4) 2757 { 2758 reg = next_fcc_reg; 2759 prev_args[reg][0] = x; 2760 prev_args[reg][1] = y; 2761 next_fcc_reg = (next_fcc_reg + 1) & 3; 2762 } 2763 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); 2764 } 2765 #else 2766 cc_reg = gen_reg_rtx (mode); 2767 #endif /* ! experiment */ 2768 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2769 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); 2770 else 2771 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); 2772 2773 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this 2774 will only result in an unrecognizable insn so no point in asserting. */ 2775 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y))); 2776 2777 return cc_reg; 2778 } 2779 2780 2781 /* Emit the compare insn and return the CC reg for the comparison in CMP. */ 2782 2783 rtx 2784 gen_compare_reg (rtx cmp) 2785 { 2786 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); 2787 } 2788 2789 /* This function is used for v9 only. 2790 DEST is the target of the Scc insn. 2791 CODE is the code for an Scc's comparison. 2792 X and Y are the values we compare. 2793 2794 This function is needed to turn 2795 2796 (set (reg:SI 110) 2797 (gt (reg:CCX 100 %icc) 2798 (const_int 0))) 2799 into 2800 (set (reg:SI 110) 2801 (gt:DI (reg:CCX 100 %icc) 2802 (const_int 0))) 2803 2804 IE: The instruction recognizer needs to see the mode of the comparison to 2805 find the right instruction. We could use "gt:DI" right in the 2806 define_expand, but leaving it out allows us to handle DI, SI, etc. */ 2807 2808 static int 2809 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) 2810 { 2811 if (! TARGET_ARCH64 2812 && (GET_MODE (x) == DImode 2813 || GET_MODE (dest) == DImode)) 2814 return 0; 2815 2816 /* Try to use the movrCC insns. */ 2817 if (TARGET_ARCH64 2818 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 2819 && y == const0_rtx 2820 && v9_regcmp_p (compare_code)) 2821 { 2822 rtx op0 = x; 2823 rtx temp; 2824 2825 /* Special case for op0 != 0. This can be done with one instruction if 2826 dest == x. */ 2827 2828 if (compare_code == NE 2829 && GET_MODE (dest) == DImode 2830 && rtx_equal_p (op0, dest)) 2831 { 2832 emit_insn (gen_rtx_SET (VOIDmode, dest, 2833 gen_rtx_IF_THEN_ELSE (DImode, 2834 gen_rtx_fmt_ee (compare_code, DImode, 2835 op0, const0_rtx), 2836 const1_rtx, 2837 dest))); 2838 return 1; 2839 } 2840 2841 if (reg_overlap_mentioned_p (dest, op0)) 2842 { 2843 /* Handle the case where dest == x. 2844 We "early clobber" the result. */ 2845 op0 = gen_reg_rtx (GET_MODE (x)); 2846 emit_move_insn (op0, x); 2847 } 2848 2849 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); 2850 if (GET_MODE (op0) != DImode) 2851 { 2852 temp = gen_reg_rtx (DImode); 2853 convert_move (temp, op0, 0); 2854 } 2855 else 2856 temp = op0; 2857 emit_insn (gen_rtx_SET (VOIDmode, dest, 2858 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 2859 gen_rtx_fmt_ee (compare_code, DImode, 2860 temp, const0_rtx), 2861 const1_rtx, 2862 dest))); 2863 return 1; 2864 } 2865 else 2866 { 2867 x = gen_compare_reg_1 (compare_code, x, y); 2868 y = const0_rtx; 2869 2870 gcc_assert (GET_MODE (x) != CC_NOOVmode 2871 && GET_MODE (x) != CCX_NOOVmode); 2872 2873 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); 2874 emit_insn (gen_rtx_SET (VOIDmode, dest, 2875 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 2876 gen_rtx_fmt_ee (compare_code, 2877 GET_MODE (x), x, y), 2878 const1_rtx, dest))); 2879 return 1; 2880 } 2881 } 2882 2883 2884 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this 2885 without jumps using the addx/subx instructions. */ 2886 2887 bool 2888 emit_scc_insn (rtx operands[]) 2889 { 2890 rtx tem; 2891 rtx x; 2892 rtx y; 2893 enum rtx_code code; 2894 2895 /* The quad-word fp compare library routines all return nonzero to indicate 2896 true, which is different from the equivalent libgcc routines, so we must 2897 handle them specially here. */ 2898 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) 2899 { 2900 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], 2901 GET_CODE (operands[1])); 2902 operands[2] = XEXP (operands[1], 0); 2903 operands[3] = XEXP (operands[1], 1); 2904 } 2905 2906 code = GET_CODE (operands[1]); 2907 x = operands[2]; 2908 y = operands[3]; 2909 2910 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has 2911 more applications). The exception to this is "reg != 0" which can 2912 be done in one instruction on v9 (so we do it). */ 2913 if (code == EQ) 2914 { 2915 if (GET_MODE (x) == SImode) 2916 { 2917 rtx pat = gen_seqsi_special (operands[0], x, y); 2918 emit_insn (pat); 2919 return true; 2920 } 2921 else if (GET_MODE (x) == DImode) 2922 { 2923 rtx pat = gen_seqdi_special (operands[0], x, y); 2924 emit_insn (pat); 2925 return true; 2926 } 2927 } 2928 2929 if (code == NE) 2930 { 2931 if (GET_MODE (x) == SImode) 2932 { 2933 rtx pat = gen_snesi_special (operands[0], x, y); 2934 emit_insn (pat); 2935 return true; 2936 } 2937 else if (GET_MODE (x) == DImode) 2938 { 2939 rtx pat; 2940 if (TARGET_VIS3) 2941 pat = gen_snedi_special_vis3 (operands[0], x, y); 2942 else 2943 pat = gen_snedi_special (operands[0], x, y); 2944 emit_insn (pat); 2945 return true; 2946 } 2947 } 2948 2949 if (TARGET_V9 2950 && TARGET_ARCH64 2951 && GET_MODE (x) == DImode 2952 && !(TARGET_VIS3 2953 && (code == GTU || code == LTU)) 2954 && gen_v9_scc (operands[0], code, x, y)) 2955 return true; 2956 2957 /* We can do LTU and GEU using the addx/subx instructions too. And 2958 for GTU/LEU, if both operands are registers swap them and fall 2959 back to the easy case. */ 2960 if (code == GTU || code == LEU) 2961 { 2962 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 2963 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) 2964 { 2965 tem = x; 2966 x = y; 2967 y = tem; 2968 code = swap_condition (code); 2969 } 2970 } 2971 2972 if (code == LTU 2973 || (!TARGET_VIS3 && code == GEU)) 2974 { 2975 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 2976 gen_rtx_fmt_ee (code, SImode, 2977 gen_compare_reg_1 (code, x, y), 2978 const0_rtx))); 2979 return true; 2980 } 2981 2982 /* All the posibilities to use addx/subx based sequences has been 2983 exhausted, try for a 3 instruction sequence using v9 conditional 2984 moves. */ 2985 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y)) 2986 return true; 2987 2988 /* Nope, do branches. */ 2989 return false; 2990 } 2991 2992 /* Emit a conditional jump insn for the v9 architecture using comparison code 2993 CODE and jump target LABEL. 2994 This function exists to take advantage of the v9 brxx insns. */ 2995 2996 static void 2997 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) 2998 { 2999 emit_jump_insn (gen_rtx_SET (VOIDmode, 3000 pc_rtx, 3001 gen_rtx_IF_THEN_ELSE (VOIDmode, 3002 gen_rtx_fmt_ee (code, GET_MODE (op0), 3003 op0, const0_rtx), 3004 gen_rtx_LABEL_REF (VOIDmode, label), 3005 pc_rtx))); 3006 } 3007 3008 /* Emit a conditional jump insn for the UA2011 architecture using 3009 comparison code CODE and jump target LABEL. This function exists 3010 to take advantage of the UA2011 Compare and Branch insns. */ 3011 3012 static void 3013 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label) 3014 { 3015 rtx if_then_else; 3016 3017 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode, 3018 gen_rtx_fmt_ee(code, GET_MODE(op0), 3019 op0, op1), 3020 gen_rtx_LABEL_REF (VOIDmode, label), 3021 pc_rtx); 3022 3023 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else)); 3024 } 3025 3026 void 3027 emit_conditional_branch_insn (rtx operands[]) 3028 { 3029 /* The quad-word fp compare library routines all return nonzero to indicate 3030 true, which is different from the equivalent libgcc routines, so we must 3031 handle them specially here. */ 3032 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) 3033 { 3034 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], 3035 GET_CODE (operands[0])); 3036 operands[1] = XEXP (operands[0], 0); 3037 operands[2] = XEXP (operands[0], 1); 3038 } 3039 3040 /* If we can tell early on that the comparison is against a constant 3041 that won't fit in the 5-bit signed immediate field of a cbcond, 3042 use one of the other v9 conditional branch sequences. */ 3043 if (TARGET_CBCOND 3044 && GET_CODE (operands[1]) == REG 3045 && (GET_MODE (operands[1]) == SImode 3046 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode)) 3047 && (GET_CODE (operands[2]) != CONST_INT 3048 || SPARC_SIMM5_P (INTVAL (operands[2])))) 3049 { 3050 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); 3051 return; 3052 } 3053 3054 if (TARGET_ARCH64 && operands[2] == const0_rtx 3055 && GET_CODE (operands[1]) == REG 3056 && GET_MODE (operands[1]) == DImode) 3057 { 3058 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); 3059 return; 3060 } 3061 3062 operands[1] = gen_compare_reg (operands[0]); 3063 operands[2] = const0_rtx; 3064 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, 3065 operands[1], operands[2]); 3066 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], 3067 operands[3])); 3068 } 3069 3070 3071 /* Generate a DFmode part of a hard TFmode register. 3072 REG is the TFmode hard register, LOW is 1 for the 3073 low 64bit of the register and 0 otherwise. 3074 */ 3075 rtx 3076 gen_df_reg (rtx reg, int low) 3077 { 3078 int regno = REGNO (reg); 3079 3080 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) 3081 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2; 3082 return gen_rtx_REG (DFmode, regno); 3083 } 3084 3085 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. 3086 Unlike normal calls, TFmode operands are passed by reference. It is 3087 assumed that no more than 3 operands are required. */ 3088 3089 static void 3090 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) 3091 { 3092 rtx ret_slot = NULL, arg[3], func_sym; 3093 int i; 3094 3095 /* We only expect to be called for conversions, unary, and binary ops. */ 3096 gcc_assert (nargs == 2 || nargs == 3); 3097 3098 for (i = 0; i < nargs; ++i) 3099 { 3100 rtx this_arg = operands[i]; 3101 rtx this_slot; 3102 3103 /* TFmode arguments and return values are passed by reference. */ 3104 if (GET_MODE (this_arg) == TFmode) 3105 { 3106 int force_stack_temp; 3107 3108 force_stack_temp = 0; 3109 if (TARGET_BUGGY_QP_LIB && i == 0) 3110 force_stack_temp = 1; 3111 3112 if (GET_CODE (this_arg) == MEM 3113 && ! force_stack_temp) 3114 { 3115 tree expr = MEM_EXPR (this_arg); 3116 if (expr) 3117 mark_addressable (expr); 3118 this_arg = XEXP (this_arg, 0); 3119 } 3120 else if (CONSTANT_P (this_arg) 3121 && ! force_stack_temp) 3122 { 3123 this_slot = force_const_mem (TFmode, this_arg); 3124 this_arg = XEXP (this_slot, 0); 3125 } 3126 else 3127 { 3128 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode)); 3129 3130 /* Operand 0 is the return value. We'll copy it out later. */ 3131 if (i > 0) 3132 emit_move_insn (this_slot, this_arg); 3133 else 3134 ret_slot = this_slot; 3135 3136 this_arg = XEXP (this_slot, 0); 3137 } 3138 } 3139 3140 arg[i] = this_arg; 3141 } 3142 3143 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); 3144 3145 if (GET_MODE (operands[0]) == TFmode) 3146 { 3147 if (nargs == 2) 3148 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2, 3149 arg[0], GET_MODE (arg[0]), 3150 arg[1], GET_MODE (arg[1])); 3151 else 3152 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3, 3153 arg[0], GET_MODE (arg[0]), 3154 arg[1], GET_MODE (arg[1]), 3155 arg[2], GET_MODE (arg[2])); 3156 3157 if (ret_slot) 3158 emit_move_insn (operands[0], ret_slot); 3159 } 3160 else 3161 { 3162 rtx ret; 3163 3164 gcc_assert (nargs == 2); 3165 3166 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, 3167 GET_MODE (operands[0]), 1, 3168 arg[1], GET_MODE (arg[1])); 3169 3170 if (ret != operands[0]) 3171 emit_move_insn (operands[0], ret); 3172 } 3173 } 3174 3175 /* Expand soft-float TFmode calls to sparc abi routines. */ 3176 3177 static void 3178 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) 3179 { 3180 const char *func; 3181 3182 switch (code) 3183 { 3184 case PLUS: 3185 func = "_Qp_add"; 3186 break; 3187 case MINUS: 3188 func = "_Qp_sub"; 3189 break; 3190 case MULT: 3191 func = "_Qp_mul"; 3192 break; 3193 case DIV: 3194 func = "_Qp_div"; 3195 break; 3196 default: 3197 gcc_unreachable (); 3198 } 3199 3200 emit_soft_tfmode_libcall (func, 3, operands); 3201 } 3202 3203 static void 3204 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) 3205 { 3206 const char *func; 3207 3208 gcc_assert (code == SQRT); 3209 func = "_Qp_sqrt"; 3210 3211 emit_soft_tfmode_libcall (func, 2, operands); 3212 } 3213 3214 static void 3215 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) 3216 { 3217 const char *func; 3218 3219 switch (code) 3220 { 3221 case FLOAT_EXTEND: 3222 switch (GET_MODE (operands[1])) 3223 { 3224 case SFmode: 3225 func = "_Qp_stoq"; 3226 break; 3227 case DFmode: 3228 func = "_Qp_dtoq"; 3229 break; 3230 default: 3231 gcc_unreachable (); 3232 } 3233 break; 3234 3235 case FLOAT_TRUNCATE: 3236 switch (GET_MODE (operands[0])) 3237 { 3238 case SFmode: 3239 func = "_Qp_qtos"; 3240 break; 3241 case DFmode: 3242 func = "_Qp_qtod"; 3243 break; 3244 default: 3245 gcc_unreachable (); 3246 } 3247 break; 3248 3249 case FLOAT: 3250 switch (GET_MODE (operands[1])) 3251 { 3252 case SImode: 3253 func = "_Qp_itoq"; 3254 if (TARGET_ARCH64) 3255 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); 3256 break; 3257 case DImode: 3258 func = "_Qp_xtoq"; 3259 break; 3260 default: 3261 gcc_unreachable (); 3262 } 3263 break; 3264 3265 case UNSIGNED_FLOAT: 3266 switch (GET_MODE (operands[1])) 3267 { 3268 case SImode: 3269 func = "_Qp_uitoq"; 3270 if (TARGET_ARCH64) 3271 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); 3272 break; 3273 case DImode: 3274 func = "_Qp_uxtoq"; 3275 break; 3276 default: 3277 gcc_unreachable (); 3278 } 3279 break; 3280 3281 case FIX: 3282 switch (GET_MODE (operands[0])) 3283 { 3284 case SImode: 3285 func = "_Qp_qtoi"; 3286 break; 3287 case DImode: 3288 func = "_Qp_qtox"; 3289 break; 3290 default: 3291 gcc_unreachable (); 3292 } 3293 break; 3294 3295 case UNSIGNED_FIX: 3296 switch (GET_MODE (operands[0])) 3297 { 3298 case SImode: 3299 func = "_Qp_qtoui"; 3300 break; 3301 case DImode: 3302 func = "_Qp_qtoux"; 3303 break; 3304 default: 3305 gcc_unreachable (); 3306 } 3307 break; 3308 3309 default: 3310 gcc_unreachable (); 3311 } 3312 3313 emit_soft_tfmode_libcall (func, 2, operands); 3314 } 3315 3316 /* Expand a hard-float tfmode operation. All arguments must be in 3317 registers. */ 3318 3319 static void 3320 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) 3321 { 3322 rtx op, dest; 3323 3324 if (GET_RTX_CLASS (code) == RTX_UNARY) 3325 { 3326 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3327 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); 3328 } 3329 else 3330 { 3331 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3332 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); 3333 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3334 operands[1], operands[2]); 3335 } 3336 3337 if (register_operand (operands[0], VOIDmode)) 3338 dest = operands[0]; 3339 else 3340 dest = gen_reg_rtx (GET_MODE (operands[0])); 3341 3342 emit_insn (gen_rtx_SET (VOIDmode, dest, op)); 3343 3344 if (dest != operands[0]) 3345 emit_move_insn (operands[0], dest); 3346 } 3347 3348 void 3349 emit_tfmode_binop (enum rtx_code code, rtx *operands) 3350 { 3351 if (TARGET_HARD_QUAD) 3352 emit_hard_tfmode_operation (code, operands); 3353 else 3354 emit_soft_tfmode_binop (code, operands); 3355 } 3356 3357 void 3358 emit_tfmode_unop (enum rtx_code code, rtx *operands) 3359 { 3360 if (TARGET_HARD_QUAD) 3361 emit_hard_tfmode_operation (code, operands); 3362 else 3363 emit_soft_tfmode_unop (code, operands); 3364 } 3365 3366 void 3367 emit_tfmode_cvt (enum rtx_code code, rtx *operands) 3368 { 3369 if (TARGET_HARD_QUAD) 3370 emit_hard_tfmode_operation (code, operands); 3371 else 3372 emit_soft_tfmode_cvt (code, operands); 3373 } 3374 3375 /* Return nonzero if a branch/jump/call instruction will be emitting 3376 nop into its delay slot. */ 3377 3378 int 3379 empty_delay_slot (rtx insn) 3380 { 3381 rtx seq; 3382 3383 /* If no previous instruction (should not happen), return true. */ 3384 if (PREV_INSN (insn) == NULL) 3385 return 1; 3386 3387 seq = NEXT_INSN (PREV_INSN (insn)); 3388 if (GET_CODE (PATTERN (seq)) == SEQUENCE) 3389 return 0; 3390 3391 return 1; 3392 } 3393 3394 /* Return nonzero if we should emit a nop after a cbcond instruction. 3395 The cbcond instruction does not have a delay slot, however there is 3396 a severe performance penalty if a control transfer appears right 3397 after a cbcond. Therefore we emit a nop when we detect this 3398 situation. */ 3399 3400 int 3401 emit_cbcond_nop (rtx insn) 3402 { 3403 rtx next = next_active_insn (insn); 3404 3405 if (!next) 3406 return 1; 3407 3408 if (GET_CODE (next) == INSN 3409 && GET_CODE (PATTERN (next)) == SEQUENCE) 3410 next = XVECEXP (PATTERN (next), 0, 0); 3411 else if (GET_CODE (next) == CALL_INSN 3412 && GET_CODE (PATTERN (next)) == PARALLEL) 3413 { 3414 rtx delay = XVECEXP (PATTERN (next), 0, 1); 3415 3416 if (GET_CODE (delay) == RETURN) 3417 { 3418 /* It's a sibling call. Do not emit the nop if we're going 3419 to emit something other than the jump itself as the first 3420 instruction of the sibcall sequence. */ 3421 if (sparc_leaf_function_p || TARGET_FLAT) 3422 return 0; 3423 } 3424 } 3425 3426 if (NONJUMP_INSN_P (next)) 3427 return 0; 3428 3429 return 1; 3430 } 3431 3432 /* Return nonzero if TRIAL can go into the call delay slot. */ 3433 3434 int 3435 eligible_for_call_delay (rtx trial) 3436 { 3437 rtx pat; 3438 3439 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3440 return 0; 3441 3442 /* Binutils allows 3443 call __tls_get_addr, %tgd_call (foo) 3444 add %l7, %o0, %o0, %tgd_add (foo) 3445 while Sun as/ld does not. */ 3446 if (TARGET_GNU_TLS || !TARGET_TLS) 3447 return 1; 3448 3449 pat = PATTERN (trial); 3450 3451 /* We must reject tgd_add{32|64}, i.e. 3452 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) 3453 and tldm_add{32|64}, i.e. 3454 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) 3455 for Sun as/ld. */ 3456 if (GET_CODE (pat) == SET 3457 && GET_CODE (SET_SRC (pat)) == PLUS) 3458 { 3459 rtx unspec = XEXP (SET_SRC (pat), 1); 3460 3461 if (GET_CODE (unspec) == UNSPEC 3462 && (XINT (unspec, 1) == UNSPEC_TLSGD 3463 || XINT (unspec, 1) == UNSPEC_TLSLDM)) 3464 return 0; 3465 } 3466 3467 return 1; 3468 } 3469 3470 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore' 3471 instruction. RETURN_P is true if the v9 variant 'return' is to be 3472 considered in the test too. 3473 3474 TRIAL must be a SET whose destination is a REG appropriate for the 3475 'restore' instruction or, if RETURN_P is true, for the 'return' 3476 instruction. */ 3477 3478 static int 3479 eligible_for_restore_insn (rtx trial, bool return_p) 3480 { 3481 rtx pat = PATTERN (trial); 3482 rtx src = SET_SRC (pat); 3483 bool src_is_freg = false; 3484 rtx src_reg; 3485 3486 /* Since we now can do moves between float and integer registers when 3487 VIS3 is enabled, we have to catch this case. We can allow such 3488 moves when doing a 'return' however. */ 3489 src_reg = src; 3490 if (GET_CODE (src_reg) == SUBREG) 3491 src_reg = SUBREG_REG (src_reg); 3492 if (GET_CODE (src_reg) == REG 3493 && SPARC_FP_REG_P (REGNO (src_reg))) 3494 src_is_freg = true; 3495 3496 /* The 'restore src,%g0,dest' pattern for word mode and below. */ 3497 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 3498 && arith_operand (src, GET_MODE (src)) 3499 && ! src_is_freg) 3500 { 3501 if (TARGET_ARCH64) 3502 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 3503 else 3504 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); 3505 } 3506 3507 /* The 'restore src,%g0,dest' pattern for double-word mode. */ 3508 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 3509 && arith_double_operand (src, GET_MODE (src)) 3510 && ! src_is_freg) 3511 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 3512 3513 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ 3514 else if (! TARGET_FPU && register_operand (src, SFmode)) 3515 return 1; 3516 3517 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ 3518 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) 3519 return 1; 3520 3521 /* If we have the 'return' instruction, anything that does not use 3522 local or output registers and can go into a delay slot wins. */ 3523 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1)) 3524 return 1; 3525 3526 /* The 'restore src1,src2,dest' pattern for SImode. */ 3527 else if (GET_CODE (src) == PLUS 3528 && register_operand (XEXP (src, 0), SImode) 3529 && arith_operand (XEXP (src, 1), SImode)) 3530 return 1; 3531 3532 /* The 'restore src1,src2,dest' pattern for DImode. */ 3533 else if (GET_CODE (src) == PLUS 3534 && register_operand (XEXP (src, 0), DImode) 3535 && arith_double_operand (XEXP (src, 1), DImode)) 3536 return 1; 3537 3538 /* The 'restore src1,%lo(src2),dest' pattern. */ 3539 else if (GET_CODE (src) == LO_SUM 3540 && ! TARGET_CM_MEDMID 3541 && ((register_operand (XEXP (src, 0), SImode) 3542 && immediate_operand (XEXP (src, 1), SImode)) 3543 || (TARGET_ARCH64 3544 && register_operand (XEXP (src, 0), DImode) 3545 && immediate_operand (XEXP (src, 1), DImode)))) 3546 return 1; 3547 3548 /* The 'restore src,src,dest' pattern. */ 3549 else if (GET_CODE (src) == ASHIFT 3550 && (register_operand (XEXP (src, 0), SImode) 3551 || register_operand (XEXP (src, 0), DImode)) 3552 && XEXP (src, 1) == const1_rtx) 3553 return 1; 3554 3555 return 0; 3556 } 3557 3558 /* Return nonzero if TRIAL can go into the function return's delay slot. */ 3559 3560 int 3561 eligible_for_return_delay (rtx trial) 3562 { 3563 int regno; 3564 rtx pat; 3565 3566 /* If the function uses __builtin_eh_return, the eh_return machinery 3567 occupies the delay slot. */ 3568 if (crtl->calls_eh_return) 3569 return 0; 3570 3571 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3572 return 0; 3573 3574 /* In the case of a leaf or flat function, anything can go into the slot. */ 3575 if (sparc_leaf_function_p || TARGET_FLAT) 3576 return 1; 3577 3578 if (!NONJUMP_INSN_P (trial)) 3579 return 0; 3580 3581 pat = PATTERN (trial); 3582 if (GET_CODE (pat) == PARALLEL) 3583 { 3584 int i; 3585 3586 if (! TARGET_V9) 3587 return 0; 3588 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 3589 { 3590 rtx expr = XVECEXP (pat, 0, i); 3591 if (GET_CODE (expr) != SET) 3592 return 0; 3593 if (GET_CODE (SET_DEST (expr)) != REG) 3594 return 0; 3595 regno = REGNO (SET_DEST (expr)); 3596 if (regno >= 8 && regno < 24) 3597 return 0; 3598 } 3599 return !epilogue_renumber (&pat, 1); 3600 } 3601 3602 if (GET_CODE (pat) != SET) 3603 return 0; 3604 3605 if (GET_CODE (SET_DEST (pat)) != REG) 3606 return 0; 3607 3608 regno = REGNO (SET_DEST (pat)); 3609 3610 /* Otherwise, only operations which can be done in tandem with 3611 a `restore' or `return' insn can go into the delay slot. */ 3612 if (regno >= 8 && regno < 24) 3613 return 0; 3614 3615 /* If this instruction sets up floating point register and we have a return 3616 instruction, it can probably go in. But restore will not work 3617 with FP_REGS. */ 3618 if (! SPARC_INT_REG_P (regno)) 3619 return TARGET_V9 && !epilogue_renumber (&pat, 1); 3620 3621 return eligible_for_restore_insn (trial, true); 3622 } 3623 3624 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */ 3625 3626 int 3627 eligible_for_sibcall_delay (rtx trial) 3628 { 3629 rtx pat; 3630 3631 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3632 return 0; 3633 3634 if (!NONJUMP_INSN_P (trial)) 3635 return 0; 3636 3637 pat = PATTERN (trial); 3638 3639 if (sparc_leaf_function_p || TARGET_FLAT) 3640 { 3641 /* If the tail call is done using the call instruction, 3642 we have to restore %o7 in the delay slot. */ 3643 if (LEAF_SIBCALL_SLOT_RESERVED_P) 3644 return 0; 3645 3646 /* %g1 is used to build the function address */ 3647 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) 3648 return 0; 3649 3650 return 1; 3651 } 3652 3653 if (GET_CODE (pat) != SET) 3654 return 0; 3655 3656 /* Otherwise, only operations which can be done in tandem with 3657 a `restore' insn can go into the delay slot. */ 3658 if (GET_CODE (SET_DEST (pat)) != REG 3659 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) 3660 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat)))) 3661 return 0; 3662 3663 /* If it mentions %o7, it can't go in, because sibcall will clobber it 3664 in most cases. */ 3665 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) 3666 return 0; 3667 3668 return eligible_for_restore_insn (trial, false); 3669 } 3670 3671 /* Determine if it's legal to put X into the constant pool. This 3672 is not possible if X contains the address of a symbol that is 3673 not constant (TLS) or not known at final link time (PIC). */ 3674 3675 static bool 3676 sparc_cannot_force_const_mem (enum machine_mode mode, rtx x) 3677 { 3678 switch (GET_CODE (x)) 3679 { 3680 case CONST_INT: 3681 case CONST_DOUBLE: 3682 case CONST_VECTOR: 3683 /* Accept all non-symbolic constants. */ 3684 return false; 3685 3686 case LABEL_REF: 3687 /* Labels are OK iff we are non-PIC. */ 3688 return flag_pic != 0; 3689 3690 case SYMBOL_REF: 3691 /* 'Naked' TLS symbol references are never OK, 3692 non-TLS symbols are OK iff we are non-PIC. */ 3693 if (SYMBOL_REF_TLS_MODEL (x)) 3694 return true; 3695 else 3696 return flag_pic != 0; 3697 3698 case CONST: 3699 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)); 3700 case PLUS: 3701 case MINUS: 3702 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)) 3703 || sparc_cannot_force_const_mem (mode, XEXP (x, 1)); 3704 case UNSPEC: 3705 return true; 3706 default: 3707 gcc_unreachable (); 3708 } 3709 } 3710 3711 /* Global Offset Table support. */ 3712 static GTY(()) rtx got_helper_rtx = NULL_RTX; 3713 static GTY(()) rtx global_offset_table_rtx = NULL_RTX; 3714 3715 /* Return the SYMBOL_REF for the Global Offset Table. */ 3716 3717 static GTY(()) rtx sparc_got_symbol = NULL_RTX; 3718 3719 static rtx 3720 sparc_got (void) 3721 { 3722 if (!sparc_got_symbol) 3723 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 3724 3725 return sparc_got_symbol; 3726 } 3727 3728 /* Ensure that we are not using patterns that are not OK with PIC. */ 3729 3730 int 3731 check_pic (int i) 3732 { 3733 rtx op; 3734 3735 switch (flag_pic) 3736 { 3737 case 1: 3738 op = recog_data.operand[i]; 3739 gcc_assert (GET_CODE (op) != SYMBOL_REF 3740 && (GET_CODE (op) != CONST 3741 || (GET_CODE (XEXP (op, 0)) == MINUS 3742 && XEXP (XEXP (op, 0), 0) == sparc_got () 3743 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); 3744 case 2: 3745 default: 3746 return 1; 3747 } 3748 } 3749 3750 /* Return true if X is an address which needs a temporary register when 3751 reloaded while generating PIC code. */ 3752 3753 int 3754 pic_address_needs_scratch (rtx x) 3755 { 3756 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ 3757 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS 3758 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 3759 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 3760 && ! SMALL_INT (XEXP (XEXP (x, 0), 1))) 3761 return 1; 3762 3763 return 0; 3764 } 3765 3766 /* Determine if a given RTX is a valid constant. We already know this 3767 satisfies CONSTANT_P. */ 3768 3769 static bool 3770 sparc_legitimate_constant_p (enum machine_mode mode, rtx x) 3771 { 3772 switch (GET_CODE (x)) 3773 { 3774 case CONST: 3775 case SYMBOL_REF: 3776 if (sparc_tls_referenced_p (x)) 3777 return false; 3778 break; 3779 3780 case CONST_DOUBLE: 3781 if (GET_MODE (x) == VOIDmode) 3782 return true; 3783 3784 /* Floating point constants are generally not ok. 3785 The only exception is 0.0 and all-ones in VIS. */ 3786 if (TARGET_VIS 3787 && SCALAR_FLOAT_MODE_P (mode) 3788 && (const_zero_operand (x, mode) 3789 || const_all_ones_operand (x, mode))) 3790 return true; 3791 3792 return false; 3793 3794 case CONST_VECTOR: 3795 /* Vector constants are generally not ok. 3796 The only exception is 0 or -1 in VIS. */ 3797 if (TARGET_VIS 3798 && (const_zero_operand (x, mode) 3799 || const_all_ones_operand (x, mode))) 3800 return true; 3801 3802 return false; 3803 3804 default: 3805 break; 3806 } 3807 3808 return true; 3809 } 3810 3811 /* Determine if a given RTX is a valid constant address. */ 3812 3813 bool 3814 constant_address_p (rtx x) 3815 { 3816 switch (GET_CODE (x)) 3817 { 3818 case LABEL_REF: 3819 case CONST_INT: 3820 case HIGH: 3821 return true; 3822 3823 case CONST: 3824 if (flag_pic && pic_address_needs_scratch (x)) 3825 return false; 3826 return sparc_legitimate_constant_p (Pmode, x); 3827 3828 case SYMBOL_REF: 3829 return !flag_pic && sparc_legitimate_constant_p (Pmode, x); 3830 3831 default: 3832 return false; 3833 } 3834 } 3835 3836 /* Nonzero if the constant value X is a legitimate general operand 3837 when generating PIC code. It is given that flag_pic is on and 3838 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 3839 3840 bool 3841 legitimate_pic_operand_p (rtx x) 3842 { 3843 if (pic_address_needs_scratch (x)) 3844 return false; 3845 if (sparc_tls_referenced_p (x)) 3846 return false; 3847 return true; 3848 } 3849 3850 #define RTX_OK_FOR_OFFSET_P(X, MODE) \ 3851 (CONST_INT_P (X) \ 3852 && INTVAL (X) >= -0x1000 \ 3853 && INTVAL (X) < (0x1000 - GET_MODE_SIZE (MODE))) 3854 3855 #define RTX_OK_FOR_OLO10_P(X, MODE) \ 3856 (CONST_INT_P (X) \ 3857 && INTVAL (X) >= -0x1000 \ 3858 && INTVAL (X) < (0xc00 - GET_MODE_SIZE (MODE))) 3859 3860 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook. 3861 3862 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT 3863 ordinarily. This changes a bit when generating PIC. */ 3864 3865 static bool 3866 sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict) 3867 { 3868 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; 3869 3870 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 3871 rs1 = addr; 3872 else if (GET_CODE (addr) == PLUS) 3873 { 3874 rs1 = XEXP (addr, 0); 3875 rs2 = XEXP (addr, 1); 3876 3877 /* Canonicalize. REG comes first, if there are no regs, 3878 LO_SUM comes first. */ 3879 if (!REG_P (rs1) 3880 && GET_CODE (rs1) != SUBREG 3881 && (REG_P (rs2) 3882 || GET_CODE (rs2) == SUBREG 3883 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) 3884 { 3885 rs1 = XEXP (addr, 1); 3886 rs2 = XEXP (addr, 0); 3887 } 3888 3889 if ((flag_pic == 1 3890 && rs1 == pic_offset_table_rtx 3891 && !REG_P (rs2) 3892 && GET_CODE (rs2) != SUBREG 3893 && GET_CODE (rs2) != LO_SUM 3894 && GET_CODE (rs2) != MEM 3895 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) 3896 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) 3897 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) 3898 || ((REG_P (rs1) 3899 || GET_CODE (rs1) == SUBREG) 3900 && RTX_OK_FOR_OFFSET_P (rs2, mode))) 3901 { 3902 imm1 = rs2; 3903 rs2 = NULL; 3904 } 3905 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) 3906 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) 3907 { 3908 /* We prohibit REG + REG for TFmode when there are no quad move insns 3909 and we consequently need to split. We do this because REG+REG 3910 is not an offsettable address. If we get the situation in reload 3911 where source and destination of a movtf pattern are both MEMs with 3912 REG+REG address, then only one of them gets converted to an 3913 offsettable address. */ 3914 if (mode == TFmode 3915 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) 3916 return 0; 3917 3918 /* Likewise for TImode, but in all cases. */ 3919 if (mode == TImode) 3920 return 0; 3921 3922 /* We prohibit REG + REG on ARCH32 if not optimizing for 3923 DFmode/DImode because then mem_min_alignment is likely to be zero 3924 after reload and the forced split would lack a matching splitter 3925 pattern. */ 3926 if (TARGET_ARCH32 && !optimize 3927 && (mode == DFmode || mode == DImode)) 3928 return 0; 3929 } 3930 else if (USE_AS_OFFSETABLE_LO10 3931 && GET_CODE (rs1) == LO_SUM 3932 && TARGET_ARCH64 3933 && ! TARGET_CM_MEDMID 3934 && RTX_OK_FOR_OLO10_P (rs2, mode)) 3935 { 3936 rs2 = NULL; 3937 imm1 = XEXP (rs1, 1); 3938 rs1 = XEXP (rs1, 0); 3939 if (!CONSTANT_P (imm1) 3940 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 3941 return 0; 3942 } 3943 } 3944 else if (GET_CODE (addr) == LO_SUM) 3945 { 3946 rs1 = XEXP (addr, 0); 3947 imm1 = XEXP (addr, 1); 3948 3949 if (!CONSTANT_P (imm1) 3950 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 3951 return 0; 3952 3953 /* We can't allow TFmode in 32-bit mode, because an offset greater 3954 than the alignment (8) may cause the LO_SUM to overflow. */ 3955 if (mode == TFmode && TARGET_ARCH32) 3956 return 0; 3957 } 3958 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) 3959 return 1; 3960 else 3961 return 0; 3962 3963 if (GET_CODE (rs1) == SUBREG) 3964 rs1 = SUBREG_REG (rs1); 3965 if (!REG_P (rs1)) 3966 return 0; 3967 3968 if (rs2) 3969 { 3970 if (GET_CODE (rs2) == SUBREG) 3971 rs2 = SUBREG_REG (rs2); 3972 if (!REG_P (rs2)) 3973 return 0; 3974 } 3975 3976 if (strict) 3977 { 3978 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) 3979 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) 3980 return 0; 3981 } 3982 else 3983 { 3984 if ((! SPARC_INT_REG_P (REGNO (rs1)) 3985 && REGNO (rs1) != FRAME_POINTER_REGNUM 3986 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) 3987 || (rs2 3988 && (! SPARC_INT_REG_P (REGNO (rs2)) 3989 && REGNO (rs2) != FRAME_POINTER_REGNUM 3990 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) 3991 return 0; 3992 } 3993 return 1; 3994 } 3995 3996 /* Return the SYMBOL_REF for the tls_get_addr function. */ 3997 3998 static GTY(()) rtx sparc_tls_symbol = NULL_RTX; 3999 4000 static rtx 4001 sparc_tls_get_addr (void) 4002 { 4003 if (!sparc_tls_symbol) 4004 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); 4005 4006 return sparc_tls_symbol; 4007 } 4008 4009 /* Return the Global Offset Table to be used in TLS mode. */ 4010 4011 static rtx 4012 sparc_tls_got (void) 4013 { 4014 /* In PIC mode, this is just the PIC offset table. */ 4015 if (flag_pic) 4016 { 4017 crtl->uses_pic_offset_table = 1; 4018 return pic_offset_table_rtx; 4019 } 4020 4021 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for 4022 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ 4023 if (TARGET_SUN_TLS && TARGET_ARCH32) 4024 { 4025 load_got_register (); 4026 return global_offset_table_rtx; 4027 } 4028 4029 /* In all other cases, we load a new pseudo with the GOT symbol. */ 4030 return copy_to_reg (sparc_got ()); 4031 } 4032 4033 /* Return true if X contains a thread-local symbol. */ 4034 4035 static bool 4036 sparc_tls_referenced_p (rtx x) 4037 { 4038 if (!TARGET_HAVE_TLS) 4039 return false; 4040 4041 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 4042 x = XEXP (XEXP (x, 0), 0); 4043 4044 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 4045 return true; 4046 4047 /* That's all we handle in sparc_legitimize_tls_address for now. */ 4048 return false; 4049 } 4050 4051 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4052 this (thread-local) address. */ 4053 4054 static rtx 4055 sparc_legitimize_tls_address (rtx addr) 4056 { 4057 rtx temp1, temp2, temp3, ret, o0, got, insn; 4058 4059 gcc_assert (can_create_pseudo_p ()); 4060 4061 if (GET_CODE (addr) == SYMBOL_REF) 4062 switch (SYMBOL_REF_TLS_MODEL (addr)) 4063 { 4064 case TLS_MODEL_GLOBAL_DYNAMIC: 4065 start_sequence (); 4066 temp1 = gen_reg_rtx (SImode); 4067 temp2 = gen_reg_rtx (SImode); 4068 ret = gen_reg_rtx (Pmode); 4069 o0 = gen_rtx_REG (Pmode, 8); 4070 got = sparc_tls_got (); 4071 emit_insn (gen_tgd_hi22 (temp1, addr)); 4072 emit_insn (gen_tgd_lo10 (temp2, temp1, addr)); 4073 if (TARGET_ARCH32) 4074 { 4075 emit_insn (gen_tgd_add32 (o0, got, temp2, addr)); 4076 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (), 4077 addr, const1_rtx)); 4078 } 4079 else 4080 { 4081 emit_insn (gen_tgd_add64 (o0, got, temp2, addr)); 4082 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (), 4083 addr, const1_rtx)); 4084 } 4085 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4086 insn = get_insns (); 4087 end_sequence (); 4088 emit_libcall_block (insn, ret, o0, addr); 4089 break; 4090 4091 case TLS_MODEL_LOCAL_DYNAMIC: 4092 start_sequence (); 4093 temp1 = gen_reg_rtx (SImode); 4094 temp2 = gen_reg_rtx (SImode); 4095 temp3 = gen_reg_rtx (Pmode); 4096 ret = gen_reg_rtx (Pmode); 4097 o0 = gen_rtx_REG (Pmode, 8); 4098 got = sparc_tls_got (); 4099 emit_insn (gen_tldm_hi22 (temp1)); 4100 emit_insn (gen_tldm_lo10 (temp2, temp1)); 4101 if (TARGET_ARCH32) 4102 { 4103 emit_insn (gen_tldm_add32 (o0, got, temp2)); 4104 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (), 4105 const1_rtx)); 4106 } 4107 else 4108 { 4109 emit_insn (gen_tldm_add64 (o0, got, temp2)); 4110 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (), 4111 const1_rtx)); 4112 } 4113 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4114 insn = get_insns (); 4115 end_sequence (); 4116 emit_libcall_block (insn, temp3, o0, 4117 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 4118 UNSPEC_TLSLD_BASE)); 4119 temp1 = gen_reg_rtx (SImode); 4120 temp2 = gen_reg_rtx (SImode); 4121 emit_insn (gen_tldo_hix22 (temp1, addr)); 4122 emit_insn (gen_tldo_lox10 (temp2, temp1, addr)); 4123 if (TARGET_ARCH32) 4124 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr)); 4125 else 4126 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr)); 4127 break; 4128 4129 case TLS_MODEL_INITIAL_EXEC: 4130 temp1 = gen_reg_rtx (SImode); 4131 temp2 = gen_reg_rtx (SImode); 4132 temp3 = gen_reg_rtx (Pmode); 4133 got = sparc_tls_got (); 4134 emit_insn (gen_tie_hi22 (temp1, addr)); 4135 emit_insn (gen_tie_lo10 (temp2, temp1, addr)); 4136 if (TARGET_ARCH32) 4137 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); 4138 else 4139 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); 4140 if (TARGET_SUN_TLS) 4141 { 4142 ret = gen_reg_rtx (Pmode); 4143 if (TARGET_ARCH32) 4144 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7), 4145 temp3, addr)); 4146 else 4147 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7), 4148 temp3, addr)); 4149 } 4150 else 4151 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); 4152 break; 4153 4154 case TLS_MODEL_LOCAL_EXEC: 4155 temp1 = gen_reg_rtx (Pmode); 4156 temp2 = gen_reg_rtx (Pmode); 4157 if (TARGET_ARCH32) 4158 { 4159 emit_insn (gen_tle_hix22_sp32 (temp1, addr)); 4160 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr)); 4161 } 4162 else 4163 { 4164 emit_insn (gen_tle_hix22_sp64 (temp1, addr)); 4165 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr)); 4166 } 4167 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); 4168 break; 4169 4170 default: 4171 gcc_unreachable (); 4172 } 4173 4174 else if (GET_CODE (addr) == CONST) 4175 { 4176 rtx base, offset; 4177 4178 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); 4179 4180 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); 4181 offset = XEXP (XEXP (addr, 0), 1); 4182 4183 base = force_operand (base, NULL_RTX); 4184 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) 4185 offset = force_reg (Pmode, offset); 4186 ret = gen_rtx_PLUS (Pmode, base, offset); 4187 } 4188 4189 else 4190 gcc_unreachable (); /* for now ... */ 4191 4192 return ret; 4193 } 4194 4195 /* Legitimize PIC addresses. If the address is already position-independent, 4196 we return ORIG. Newly generated position-independent addresses go into a 4197 reg. This is REG if nonzero, otherwise we allocate register(s) as 4198 necessary. */ 4199 4200 static rtx 4201 sparc_legitimize_pic_address (rtx orig, rtx reg) 4202 { 4203 bool gotdata_op = false; 4204 4205 if (GET_CODE (orig) == SYMBOL_REF 4206 /* See the comment in sparc_expand_move. */ 4207 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) 4208 { 4209 rtx pic_ref, address; 4210 rtx insn; 4211 4212 if (reg == 0) 4213 { 4214 gcc_assert (can_create_pseudo_p ()); 4215 reg = gen_reg_rtx (Pmode); 4216 } 4217 4218 if (flag_pic == 2) 4219 { 4220 /* If not during reload, allocate another temp reg here for loading 4221 in the address, so that these instructions can be optimized 4222 properly. */ 4223 rtx temp_reg = (! can_create_pseudo_p () 4224 ? reg : gen_reg_rtx (Pmode)); 4225 4226 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse 4227 won't get confused into thinking that these two instructions 4228 are loading in the true address of the symbol. If in the 4229 future a PIC rtx exists, that should be used instead. */ 4230 if (TARGET_ARCH64) 4231 { 4232 emit_insn (gen_movdi_high_pic (temp_reg, orig)); 4233 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); 4234 } 4235 else 4236 { 4237 emit_insn (gen_movsi_high_pic (temp_reg, orig)); 4238 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); 4239 } 4240 address = temp_reg; 4241 gotdata_op = true; 4242 } 4243 else 4244 address = orig; 4245 4246 crtl->uses_pic_offset_table = 1; 4247 if (gotdata_op) 4248 { 4249 if (TARGET_ARCH64) 4250 insn = emit_insn (gen_movdi_pic_gotdata_op (reg, 4251 pic_offset_table_rtx, 4252 address, orig)); 4253 else 4254 insn = emit_insn (gen_movsi_pic_gotdata_op (reg, 4255 pic_offset_table_rtx, 4256 address, orig)); 4257 } 4258 else 4259 { 4260 pic_ref 4261 = gen_const_mem (Pmode, 4262 gen_rtx_PLUS (Pmode, 4263 pic_offset_table_rtx, address)); 4264 insn = emit_move_insn (reg, pic_ref); 4265 } 4266 4267 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4268 by loop. */ 4269 set_unique_reg_note (insn, REG_EQUAL, orig); 4270 return reg; 4271 } 4272 else if (GET_CODE (orig) == CONST) 4273 { 4274 rtx base, offset; 4275 4276 if (GET_CODE (XEXP (orig, 0)) == PLUS 4277 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 4278 return orig; 4279 4280 if (reg == 0) 4281 { 4282 gcc_assert (can_create_pseudo_p ()); 4283 reg = gen_reg_rtx (Pmode); 4284 } 4285 4286 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 4287 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); 4288 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 4289 base == reg ? NULL_RTX : reg); 4290 4291 if (GET_CODE (offset) == CONST_INT) 4292 { 4293 if (SMALL_INT (offset)) 4294 return plus_constant (Pmode, base, INTVAL (offset)); 4295 else if (can_create_pseudo_p ()) 4296 offset = force_reg (Pmode, offset); 4297 else 4298 /* If we reach here, then something is seriously wrong. */ 4299 gcc_unreachable (); 4300 } 4301 return gen_rtx_PLUS (Pmode, base, offset); 4302 } 4303 else if (GET_CODE (orig) == LABEL_REF) 4304 /* ??? We ought to be checking that the register is live instead, in case 4305 it is eliminated. */ 4306 crtl->uses_pic_offset_table = 1; 4307 4308 return orig; 4309 } 4310 4311 /* Try machine-dependent ways of modifying an illegitimate address X 4312 to be legitimate. If we find one, return the new, valid address. 4313 4314 OLDX is the address as it was before break_out_memory_refs was called. 4315 In some cases it is useful to look at this to decide what needs to be done. 4316 4317 MODE is the mode of the operand pointed to by X. 4318 4319 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ 4320 4321 static rtx 4322 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 4323 enum machine_mode mode) 4324 { 4325 rtx orig_x = x; 4326 4327 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) 4328 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 4329 force_operand (XEXP (x, 0), NULL_RTX)); 4330 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) 4331 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4332 force_operand (XEXP (x, 1), NULL_RTX)); 4333 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) 4334 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), 4335 XEXP (x, 1)); 4336 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) 4337 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4338 force_operand (XEXP (x, 1), NULL_RTX)); 4339 4340 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) 4341 return x; 4342 4343 if (sparc_tls_referenced_p (x)) 4344 x = sparc_legitimize_tls_address (x); 4345 else if (flag_pic) 4346 x = sparc_legitimize_pic_address (x, NULL_RTX); 4347 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) 4348 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4349 copy_to_mode_reg (Pmode, XEXP (x, 1))); 4350 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) 4351 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 4352 copy_to_mode_reg (Pmode, XEXP (x, 0))); 4353 else if (GET_CODE (x) == SYMBOL_REF 4354 || GET_CODE (x) == CONST 4355 || GET_CODE (x) == LABEL_REF) 4356 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); 4357 4358 return x; 4359 } 4360 4361 /* Delegitimize an address that was legitimized by the above function. */ 4362 4363 static rtx 4364 sparc_delegitimize_address (rtx x) 4365 { 4366 x = delegitimize_mem_from_attrs (x); 4367 4368 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC) 4369 switch (XINT (XEXP (x, 1), 1)) 4370 { 4371 case UNSPEC_MOVE_PIC: 4372 case UNSPEC_TLSLE: 4373 x = XVECEXP (XEXP (x, 1), 0, 0); 4374 gcc_assert (GET_CODE (x) == SYMBOL_REF); 4375 break; 4376 default: 4377 break; 4378 } 4379 4380 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */ 4381 if (GET_CODE (x) == MINUS 4382 && REG_P (XEXP (x, 0)) 4383 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM 4384 && GET_CODE (XEXP (x, 1)) == LO_SUM 4385 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC 4386 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL) 4387 { 4388 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0); 4389 gcc_assert (GET_CODE (x) == LABEL_REF); 4390 } 4391 4392 return x; 4393 } 4394 4395 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to 4396 replace the input X, or the original X if no replacement is called for. 4397 The output parameter *WIN is 1 if the calling macro should goto WIN, 4398 0 if it should not. 4399 4400 For SPARC, we wish to handle addresses by splitting them into 4401 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 4402 This cuts the number of extra insns by one. 4403 4404 Do nothing when generating PIC code and the address is a symbolic 4405 operand or requires a scratch register. */ 4406 4407 rtx 4408 sparc_legitimize_reload_address (rtx x, enum machine_mode mode, 4409 int opnum, int type, 4410 int ind_levels ATTRIBUTE_UNUSED, int *win) 4411 { 4412 /* Decompose SImode constants into HIGH+LO_SUM. */ 4413 if (CONSTANT_P (x) 4414 && (mode != TFmode || TARGET_ARCH64) 4415 && GET_MODE (x) == SImode 4416 && GET_CODE (x) != LO_SUM 4417 && GET_CODE (x) != HIGH 4418 && sparc_cmodel <= CM_MEDLOW 4419 && !(flag_pic 4420 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) 4421 { 4422 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); 4423 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4424 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4425 opnum, (enum reload_type)type); 4426 *win = 1; 4427 return x; 4428 } 4429 4430 /* We have to recognize what we have already generated above. */ 4431 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) 4432 { 4433 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4434 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4435 opnum, (enum reload_type)type); 4436 *win = 1; 4437 return x; 4438 } 4439 4440 *win = 0; 4441 return x; 4442 } 4443 4444 /* Return true if ADDR (a legitimate address expression) 4445 has an effect that depends on the machine mode it is used for. 4446 4447 In PIC mode, 4448 4449 (mem:HI [%l7+a]) 4450 4451 is not equivalent to 4452 4453 (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) 4454 4455 because [%l7+a+1] is interpreted as the address of (a+1). */ 4456 4457 4458 static bool 4459 sparc_mode_dependent_address_p (const_rtx addr, 4460 addr_space_t as ATTRIBUTE_UNUSED) 4461 { 4462 if (flag_pic && GET_CODE (addr) == PLUS) 4463 { 4464 rtx op0 = XEXP (addr, 0); 4465 rtx op1 = XEXP (addr, 1); 4466 if (op0 == pic_offset_table_rtx 4467 && symbolic_operand (op1, VOIDmode)) 4468 return true; 4469 } 4470 4471 return false; 4472 } 4473 4474 #ifdef HAVE_GAS_HIDDEN 4475 # define USE_HIDDEN_LINKONCE 1 4476 #else 4477 # define USE_HIDDEN_LINKONCE 0 4478 #endif 4479 4480 static void 4481 get_pc_thunk_name (char name[32], unsigned int regno) 4482 { 4483 const char *reg_name = reg_names[regno]; 4484 4485 /* Skip the leading '%' as that cannot be used in a 4486 symbol name. */ 4487 reg_name += 1; 4488 4489 if (USE_HIDDEN_LINKONCE) 4490 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name); 4491 else 4492 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno); 4493 } 4494 4495 /* Wrapper around the load_pcrel_sym{si,di} patterns. */ 4496 4497 static rtx 4498 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3) 4499 { 4500 int orig_flag_pic = flag_pic; 4501 rtx insn; 4502 4503 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ 4504 flag_pic = 0; 4505 if (TARGET_ARCH64) 4506 insn = gen_load_pcrel_symdi (op0, op1, op2, op3); 4507 else 4508 insn = gen_load_pcrel_symsi (op0, op1, op2, op3); 4509 flag_pic = orig_flag_pic; 4510 4511 return insn; 4512 } 4513 4514 /* Emit code to load the GOT register. */ 4515 4516 void 4517 load_got_register (void) 4518 { 4519 /* In PIC mode, this will retrieve pic_offset_table_rtx. */ 4520 if (!global_offset_table_rtx) 4521 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); 4522 4523 if (TARGET_VXWORKS_RTP) 4524 emit_insn (gen_vxworks_load_got ()); 4525 else 4526 { 4527 /* The GOT symbol is subject to a PC-relative relocation so we need a 4528 helper function to add the PC value and thus get the final value. */ 4529 if (!got_helper_rtx) 4530 { 4531 char name[32]; 4532 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM); 4533 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4534 } 4535 4536 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (), 4537 got_helper_rtx, 4538 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM))); 4539 } 4540 4541 /* Need to emit this whether or not we obey regdecls, 4542 since setjmp/longjmp can cause life info to screw up. 4543 ??? In the case where we don't obey regdecls, this is not sufficient 4544 since we may not fall out the bottom. */ 4545 emit_use (global_offset_table_rtx); 4546 } 4547 4548 /* Emit a call instruction with the pattern given by PAT. ADDR is the 4549 address of the call target. */ 4550 4551 void 4552 sparc_emit_call_insn (rtx pat, rtx addr) 4553 { 4554 rtx insn; 4555 4556 insn = emit_call_insn (pat); 4557 4558 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ 4559 if (TARGET_VXWORKS_RTP 4560 && flag_pic 4561 && GET_CODE (addr) == SYMBOL_REF 4562 && (SYMBOL_REF_DECL (addr) 4563 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 4564 : !SYMBOL_REF_LOCAL_P (addr))) 4565 { 4566 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 4567 crtl->uses_pic_offset_table = 1; 4568 } 4569 } 4570 4571 /* Return 1 if RTX is a MEM which is known to be aligned to at 4572 least a DESIRED byte boundary. */ 4573 4574 int 4575 mem_min_alignment (rtx mem, int desired) 4576 { 4577 rtx addr, base, offset; 4578 4579 /* If it's not a MEM we can't accept it. */ 4580 if (GET_CODE (mem) != MEM) 4581 return 0; 4582 4583 /* Obviously... */ 4584 if (!TARGET_UNALIGNED_DOUBLES 4585 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) 4586 return 1; 4587 4588 /* ??? The rest of the function predates MEM_ALIGN so 4589 there is probably a bit of redundancy. */ 4590 addr = XEXP (mem, 0); 4591 base = offset = NULL_RTX; 4592 if (GET_CODE (addr) == PLUS) 4593 { 4594 if (GET_CODE (XEXP (addr, 0)) == REG) 4595 { 4596 base = XEXP (addr, 0); 4597 4598 /* What we are saying here is that if the base 4599 REG is aligned properly, the compiler will make 4600 sure any REG based index upon it will be so 4601 as well. */ 4602 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) 4603 offset = XEXP (addr, 1); 4604 else 4605 offset = const0_rtx; 4606 } 4607 } 4608 else if (GET_CODE (addr) == REG) 4609 { 4610 base = addr; 4611 offset = const0_rtx; 4612 } 4613 4614 if (base != NULL_RTX) 4615 { 4616 int regno = REGNO (base); 4617 4618 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) 4619 { 4620 /* Check if the compiler has recorded some information 4621 about the alignment of the base REG. If reload has 4622 completed, we already matched with proper alignments. 4623 If not running global_alloc, reload might give us 4624 unaligned pointer to local stack though. */ 4625 if (((cfun != 0 4626 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) 4627 || (optimize && reload_completed)) 4628 && (INTVAL (offset) & (desired - 1)) == 0) 4629 return 1; 4630 } 4631 else 4632 { 4633 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) 4634 return 1; 4635 } 4636 } 4637 else if (! TARGET_UNALIGNED_DOUBLES 4638 || CONSTANT_P (addr) 4639 || GET_CODE (addr) == LO_SUM) 4640 { 4641 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES 4642 is true, in which case we can only assume that an access is aligned if 4643 it is to a constant address, or the address involves a LO_SUM. */ 4644 return 1; 4645 } 4646 4647 /* An obviously unaligned address. */ 4648 return 0; 4649 } 4650 4651 4652 /* Vectors to keep interesting information about registers where it can easily 4653 be got. We used to use the actual mode value as the bit number, but there 4654 are more than 32 modes now. Instead we use two tables: one indexed by 4655 hard register number, and one indexed by mode. */ 4656 4657 /* The purpose of sparc_mode_class is to shrink the range of modes so that 4658 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 4659 mapped into one sparc_mode_class mode. */ 4660 4661 enum sparc_mode_class { 4662 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, 4663 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 4664 CC_MODE, CCFP_MODE 4665 }; 4666 4667 /* Modes for single-word and smaller quantities. */ 4668 #define S_MODES \ 4669 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) 4670 4671 /* Modes for double-word and smaller quantities. */ 4672 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) 4673 4674 /* Modes for quad-word and smaller quantities. */ 4675 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 4676 4677 /* Modes for 8-word and smaller quantities. */ 4678 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) 4679 4680 /* Modes for single-float quantities. */ 4681 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 4682 4683 /* Modes for double-float and smaller quantities. */ 4684 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) 4685 4686 /* Modes for quad-float and smaller quantities. */ 4687 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) 4688 4689 /* Modes for quad-float pairs and smaller quantities. */ 4690 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) 4691 4692 /* Modes for double-float only quantities. */ 4693 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) 4694 4695 /* Modes for quad-float and double-float only quantities. */ 4696 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) 4697 4698 /* Modes for quad-float pairs and double-float only quantities. */ 4699 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) 4700 4701 /* Modes for condition codes. */ 4702 #define CC_MODES (1 << (int) CC_MODE) 4703 #define CCFP_MODES (1 << (int) CCFP_MODE) 4704 4705 /* Value is 1 if register/mode pair is acceptable on sparc. 4706 The funny mixture of D and T modes is because integer operations 4707 do not specially operate on tetra quantities, so non-quad-aligned 4708 registers can hold quadword quantities (except %o4 and %i4 because 4709 they cross fixed registers). */ 4710 4711 /* This points to either the 32 bit or the 64 bit version. */ 4712 const int *hard_regno_mode_classes; 4713 4714 static const int hard_32bit_mode_classes[] = { 4715 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 4716 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 4717 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 4718 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 4719 4720 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4721 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4722 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4723 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 4724 4725 /* FP regs f32 to f63. Only the even numbered registers actually exist, 4726 and none can hold SFmode/SImode values. */ 4727 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4728 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4729 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4730 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4731 4732 /* %fcc[0123] */ 4733 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 4734 4735 /* %icc, %sfp, %gsr */ 4736 CC_MODES, 0, D_MODES 4737 }; 4738 4739 static const int hard_64bit_mode_classes[] = { 4740 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4741 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4742 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4743 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4744 4745 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4746 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4747 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4748 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 4749 4750 /* FP regs f32 to f63. Only the even numbered registers actually exist, 4751 and none can hold SFmode/SImode values. */ 4752 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4753 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4754 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4755 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4756 4757 /* %fcc[0123] */ 4758 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 4759 4760 /* %icc, %sfp, %gsr */ 4761 CC_MODES, 0, D_MODES 4762 }; 4763 4764 int sparc_mode_class [NUM_MACHINE_MODES]; 4765 4766 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 4767 4768 static void 4769 sparc_init_modes (void) 4770 { 4771 int i; 4772 4773 for (i = 0; i < NUM_MACHINE_MODES; i++) 4774 { 4775 switch (GET_MODE_CLASS (i)) 4776 { 4777 case MODE_INT: 4778 case MODE_PARTIAL_INT: 4779 case MODE_COMPLEX_INT: 4780 if (GET_MODE_SIZE (i) < 4) 4781 sparc_mode_class[i] = 1 << (int) H_MODE; 4782 else if (GET_MODE_SIZE (i) == 4) 4783 sparc_mode_class[i] = 1 << (int) S_MODE; 4784 else if (GET_MODE_SIZE (i) == 8) 4785 sparc_mode_class[i] = 1 << (int) D_MODE; 4786 else if (GET_MODE_SIZE (i) == 16) 4787 sparc_mode_class[i] = 1 << (int) T_MODE; 4788 else if (GET_MODE_SIZE (i) == 32) 4789 sparc_mode_class[i] = 1 << (int) O_MODE; 4790 else 4791 sparc_mode_class[i] = 0; 4792 break; 4793 case MODE_VECTOR_INT: 4794 if (GET_MODE_SIZE (i) == 4) 4795 sparc_mode_class[i] = 1 << (int) SF_MODE; 4796 else if (GET_MODE_SIZE (i) == 8) 4797 sparc_mode_class[i] = 1 << (int) DF_MODE; 4798 else 4799 sparc_mode_class[i] = 0; 4800 break; 4801 case MODE_FLOAT: 4802 case MODE_COMPLEX_FLOAT: 4803 if (GET_MODE_SIZE (i) == 4) 4804 sparc_mode_class[i] = 1 << (int) SF_MODE; 4805 else if (GET_MODE_SIZE (i) == 8) 4806 sparc_mode_class[i] = 1 << (int) DF_MODE; 4807 else if (GET_MODE_SIZE (i) == 16) 4808 sparc_mode_class[i] = 1 << (int) TF_MODE; 4809 else if (GET_MODE_SIZE (i) == 32) 4810 sparc_mode_class[i] = 1 << (int) OF_MODE; 4811 else 4812 sparc_mode_class[i] = 0; 4813 break; 4814 case MODE_CC: 4815 if (i == (int) CCFPmode || i == (int) CCFPEmode) 4816 sparc_mode_class[i] = 1 << (int) CCFP_MODE; 4817 else 4818 sparc_mode_class[i] = 1 << (int) CC_MODE; 4819 break; 4820 default: 4821 sparc_mode_class[i] = 0; 4822 break; 4823 } 4824 } 4825 4826 if (TARGET_ARCH64) 4827 hard_regno_mode_classes = hard_64bit_mode_classes; 4828 else 4829 hard_regno_mode_classes = hard_32bit_mode_classes; 4830 4831 /* Initialize the array used by REGNO_REG_CLASS. */ 4832 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 4833 { 4834 if (i < 16 && TARGET_V8PLUS) 4835 sparc_regno_reg_class[i] = I64_REGS; 4836 else if (i < 32 || i == FRAME_POINTER_REGNUM) 4837 sparc_regno_reg_class[i] = GENERAL_REGS; 4838 else if (i < 64) 4839 sparc_regno_reg_class[i] = FP_REGS; 4840 else if (i < 96) 4841 sparc_regno_reg_class[i] = EXTRA_FP_REGS; 4842 else if (i < 100) 4843 sparc_regno_reg_class[i] = FPCC_REGS; 4844 else 4845 sparc_regno_reg_class[i] = NO_REGS; 4846 } 4847 } 4848 4849 /* Return whether REGNO, a global or FP register, must be saved/restored. */ 4850 4851 static inline bool 4852 save_global_or_fp_reg_p (unsigned int regno, 4853 int leaf_function ATTRIBUTE_UNUSED) 4854 { 4855 return !call_used_regs[regno] && df_regs_ever_live_p (regno); 4856 } 4857 4858 /* Return whether the return address register (%i7) is needed. */ 4859 4860 static inline bool 4861 return_addr_reg_needed_p (int leaf_function) 4862 { 4863 /* If it is live, for example because of __builtin_return_address (0). */ 4864 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 4865 return true; 4866 4867 /* Otherwise, it is needed as save register if %o7 is clobbered. */ 4868 if (!leaf_function 4869 /* Loading the GOT register clobbers %o7. */ 4870 || crtl->uses_pic_offset_table 4871 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM)) 4872 return true; 4873 4874 return false; 4875 } 4876 4877 /* Return whether REGNO, a local or in register, must be saved/restored. */ 4878 4879 static bool 4880 save_local_or_in_reg_p (unsigned int regno, int leaf_function) 4881 { 4882 /* General case: call-saved registers live at some point. */ 4883 if (!call_used_regs[regno] && df_regs_ever_live_p (regno)) 4884 return true; 4885 4886 /* Frame pointer register (%fp) if needed. */ 4887 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 4888 return true; 4889 4890 /* Return address register (%i7) if needed. */ 4891 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function)) 4892 return true; 4893 4894 /* GOT register (%l7) if needed. */ 4895 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table) 4896 return true; 4897 4898 /* If the function accesses prior frames, the frame pointer and the return 4899 address of the previous frame must be saved on the stack. */ 4900 if (crtl->accesses_prior_frames 4901 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM)) 4902 return true; 4903 4904 return false; 4905 } 4906 4907 /* Compute the frame size required by the function. This function is called 4908 during the reload pass and also by sparc_expand_prologue. */ 4909 4910 HOST_WIDE_INT 4911 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function) 4912 { 4913 HOST_WIDE_INT frame_size, apparent_frame_size; 4914 int args_size, n_global_fp_regs = 0; 4915 bool save_local_in_regs_p = false; 4916 unsigned int i; 4917 4918 /* If the function allocates dynamic stack space, the dynamic offset is 4919 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */ 4920 if (leaf_function && !cfun->calls_alloca) 4921 args_size = 0; 4922 else 4923 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl); 4924 4925 /* Calculate space needed for global registers. */ 4926 if (TARGET_ARCH64) 4927 for (i = 0; i < 8; i++) 4928 if (save_global_or_fp_reg_p (i, 0)) 4929 n_global_fp_regs += 2; 4930 else 4931 for (i = 0; i < 8; i += 2) 4932 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 4933 n_global_fp_regs += 2; 4934 4935 /* In the flat window model, find out which local and in registers need to 4936 be saved. We don't reserve space in the current frame for them as they 4937 will be spilled into the register window save area of the caller's frame. 4938 However, as soon as we use this register window save area, we must create 4939 that of the current frame to make it the live one. */ 4940 if (TARGET_FLAT) 4941 for (i = 16; i < 32; i++) 4942 if (save_local_or_in_reg_p (i, leaf_function)) 4943 { 4944 save_local_in_regs_p = true; 4945 break; 4946 } 4947 4948 /* Calculate space needed for FP registers. */ 4949 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) 4950 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 4951 n_global_fp_regs += 2; 4952 4953 if (size == 0 4954 && n_global_fp_regs == 0 4955 && args_size == 0 4956 && !save_local_in_regs_p) 4957 frame_size = apparent_frame_size = 0; 4958 else 4959 { 4960 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */ 4961 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8; 4962 apparent_frame_size += n_global_fp_regs * 4; 4963 4964 /* We need to add the size of the outgoing argument area. */ 4965 frame_size = apparent_frame_size + ((args_size + 7) & -8); 4966 4967 /* And that of the register window save area. */ 4968 frame_size += FIRST_PARM_OFFSET (cfun->decl); 4969 4970 /* Finally, bump to the appropriate alignment. */ 4971 frame_size = SPARC_STACK_ALIGN (frame_size); 4972 } 4973 4974 /* Set up values for use in prologue and epilogue. */ 4975 sparc_frame_size = frame_size; 4976 sparc_apparent_frame_size = apparent_frame_size; 4977 sparc_n_global_fp_regs = n_global_fp_regs; 4978 sparc_save_local_in_regs_p = save_local_in_regs_p; 4979 4980 return frame_size; 4981 } 4982 4983 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ 4984 4985 int 4986 sparc_initial_elimination_offset (int to) 4987 { 4988 int offset; 4989 4990 if (to == STACK_POINTER_REGNUM) 4991 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf); 4992 else 4993 offset = 0; 4994 4995 offset += SPARC_STACK_BIAS; 4996 return offset; 4997 } 4998 4999 /* Output any necessary .register pseudo-ops. */ 5000 5001 void 5002 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) 5003 { 5004 #ifdef HAVE_AS_REGISTER_PSEUDO_OP 5005 int i; 5006 5007 if (TARGET_ARCH32) 5008 return; 5009 5010 /* Check if %g[2367] were used without 5011 .register being printed for them already. */ 5012 for (i = 2; i < 8; i++) 5013 { 5014 if (df_regs_ever_live_p (i) 5015 && ! sparc_hard_reg_printed [i]) 5016 { 5017 sparc_hard_reg_printed [i] = 1; 5018 /* %g7 is used as TLS base register, use #ignore 5019 for it instead of #scratch. */ 5020 fprintf (file, "\t.register\t%%g%d, #%s\n", i, 5021 i == 7 ? "ignore" : "scratch"); 5022 } 5023 if (i == 3) i = 5; 5024 } 5025 #endif 5026 } 5027 5028 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 5029 5030 #if PROBE_INTERVAL > 4096 5031 #error Cannot use indexed addressing mode for stack probing 5032 #endif 5033 5034 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 5035 inclusive. These are offsets from the current stack pointer. 5036 5037 Note that we don't use the REG+REG addressing mode for the probes because 5038 of the stack bias in 64-bit mode. And it doesn't really buy us anything 5039 so the advantages of having a single code win here. */ 5040 5041 static void 5042 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) 5043 { 5044 rtx g1 = gen_rtx_REG (Pmode, 1); 5045 5046 /* See if we have a constant small number of probes to generate. If so, 5047 that's the easy case. */ 5048 if (size <= PROBE_INTERVAL) 5049 { 5050 emit_move_insn (g1, GEN_INT (first)); 5051 emit_insn (gen_rtx_SET (VOIDmode, g1, 5052 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5053 emit_stack_probe (plus_constant (Pmode, g1, -size)); 5054 } 5055 5056 /* The run-time loop is made up of 10 insns in the generic case while the 5057 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ 5058 else if (size <= 5 * PROBE_INTERVAL) 5059 { 5060 HOST_WIDE_INT i; 5061 5062 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); 5063 emit_insn (gen_rtx_SET (VOIDmode, g1, 5064 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5065 emit_stack_probe (g1); 5066 5067 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 5068 it exceeds SIZE. If only two probes are needed, this will not 5069 generate any code. Then probe at FIRST + SIZE. */ 5070 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 5071 { 5072 emit_insn (gen_rtx_SET (VOIDmode, g1, 5073 plus_constant (Pmode, g1, -PROBE_INTERVAL))); 5074 emit_stack_probe (g1); 5075 } 5076 5077 emit_stack_probe (plus_constant (Pmode, g1, 5078 (i - PROBE_INTERVAL) - size)); 5079 } 5080 5081 /* Otherwise, do the same as above, but in a loop. Note that we must be 5082 extra careful with variables wrapping around because we might be at 5083 the very top (or the very bottom) of the address space and we have 5084 to be able to handle this case properly; in particular, we use an 5085 equality test for the loop condition. */ 5086 else 5087 { 5088 HOST_WIDE_INT rounded_size; 5089 rtx g4 = gen_rtx_REG (Pmode, 4); 5090 5091 emit_move_insn (g1, GEN_INT (first)); 5092 5093 5094 /* Step 1: round SIZE to the previous multiple of the interval. */ 5095 5096 rounded_size = size & -PROBE_INTERVAL; 5097 emit_move_insn (g4, GEN_INT (rounded_size)); 5098 5099 5100 /* Step 2: compute initial and final value of the loop counter. */ 5101 5102 /* TEST_ADDR = SP + FIRST. */ 5103 emit_insn (gen_rtx_SET (VOIDmode, g1, 5104 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5105 5106 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 5107 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4))); 5108 5109 5110 /* Step 3: the loop 5111 5112 while (TEST_ADDR != LAST_ADDR) 5113 { 5114 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 5115 probe at TEST_ADDR 5116 } 5117 5118 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 5119 until it is equal to ROUNDED_SIZE. */ 5120 5121 if (TARGET_64BIT) 5122 emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); 5123 else 5124 emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); 5125 5126 5127 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 5128 that SIZE is equal to ROUNDED_SIZE. */ 5129 5130 if (size != rounded_size) 5131 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size)); 5132 } 5133 5134 /* Make sure nothing is scheduled before we are done. */ 5135 emit_insn (gen_blockage ()); 5136 } 5137 5138 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 5139 absolute addresses. */ 5140 5141 const char * 5142 output_probe_stack_range (rtx reg1, rtx reg2) 5143 { 5144 static int labelno = 0; 5145 char loop_lab[32], end_lab[32]; 5146 rtx xops[2]; 5147 5148 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); 5149 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); 5150 5151 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 5152 5153 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ 5154 xops[0] = reg1; 5155 xops[1] = reg2; 5156 output_asm_insn ("cmp\t%0, %1", xops); 5157 if (TARGET_ARCH64) 5158 fputs ("\tbe,pn\t%xcc,", asm_out_file); 5159 else 5160 fputs ("\tbe\t", asm_out_file); 5161 assemble_name_raw (asm_out_file, end_lab); 5162 fputc ('\n', asm_out_file); 5163 5164 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 5165 xops[1] = GEN_INT (-PROBE_INTERVAL); 5166 output_asm_insn (" add\t%0, %1, %0", xops); 5167 5168 /* Probe at TEST_ADDR and branch. */ 5169 if (TARGET_ARCH64) 5170 fputs ("\tba,pt\t%xcc,", asm_out_file); 5171 else 5172 fputs ("\tba\t", asm_out_file); 5173 assemble_name_raw (asm_out_file, loop_lab); 5174 fputc ('\n', asm_out_file); 5175 xops[1] = GEN_INT (SPARC_STACK_BIAS); 5176 output_asm_insn (" st\t%%g0, [%0+%1]", xops); 5177 5178 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); 5179 5180 return ""; 5181 } 5182 5183 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as 5184 needed. LOW is supposed to be double-word aligned for 32-bit registers. 5185 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE 5186 is the action to be performed if SAVE_P returns true and ACTION_FALSE 5187 the action to be performed if it returns false. Return the new offset. */ 5188 5189 typedef bool (*sorr_pred_t) (unsigned int, int); 5190 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t; 5191 5192 static int 5193 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base, 5194 int offset, int leaf_function, sorr_pred_t save_p, 5195 sorr_act_t action_true, sorr_act_t action_false) 5196 { 5197 unsigned int i; 5198 rtx mem, insn; 5199 5200 if (TARGET_ARCH64 && high <= 32) 5201 { 5202 int fp_offset = -1; 5203 5204 for (i = low; i < high; i++) 5205 { 5206 if (save_p (i, leaf_function)) 5207 { 5208 mem = gen_frame_mem (DImode, plus_constant (Pmode, 5209 base, offset)); 5210 if (action_true == SORR_SAVE) 5211 { 5212 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); 5213 RTX_FRAME_RELATED_P (insn) = 1; 5214 } 5215 else /* action_true == SORR_RESTORE */ 5216 { 5217 /* The frame pointer must be restored last since its old 5218 value may be used as base address for the frame. This 5219 is problematic in 64-bit mode only because of the lack 5220 of double-word load instruction. */ 5221 if (i == HARD_FRAME_POINTER_REGNUM) 5222 fp_offset = offset; 5223 else 5224 emit_move_insn (gen_rtx_REG (DImode, i), mem); 5225 } 5226 offset += 8; 5227 } 5228 else if (action_false == SORR_ADVANCE) 5229 offset += 8; 5230 } 5231 5232 if (fp_offset >= 0) 5233 { 5234 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset)); 5235 emit_move_insn (hard_frame_pointer_rtx, mem); 5236 } 5237 } 5238 else 5239 { 5240 for (i = low; i < high; i += 2) 5241 { 5242 bool reg0 = save_p (i, leaf_function); 5243 bool reg1 = save_p (i + 1, leaf_function); 5244 enum machine_mode mode; 5245 int regno; 5246 5247 if (reg0 && reg1) 5248 { 5249 mode = SPARC_INT_REG_P (i) ? DImode : DFmode; 5250 regno = i; 5251 } 5252 else if (reg0) 5253 { 5254 mode = SPARC_INT_REG_P (i) ? SImode : SFmode; 5255 regno = i; 5256 } 5257 else if (reg1) 5258 { 5259 mode = SPARC_INT_REG_P (i) ? SImode : SFmode; 5260 regno = i + 1; 5261 offset += 4; 5262 } 5263 else 5264 { 5265 if (action_false == SORR_ADVANCE) 5266 offset += 8; 5267 continue; 5268 } 5269 5270 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset)); 5271 if (action_true == SORR_SAVE) 5272 { 5273 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); 5274 RTX_FRAME_RELATED_P (insn) = 1; 5275 if (mode == DImode) 5276 { 5277 rtx set1, set2; 5278 mem = gen_frame_mem (SImode, plus_constant (Pmode, base, 5279 offset)); 5280 set1 = gen_rtx_SET (VOIDmode, mem, 5281 gen_rtx_REG (SImode, regno)); 5282 RTX_FRAME_RELATED_P (set1) = 1; 5283 mem 5284 = gen_frame_mem (SImode, plus_constant (Pmode, base, 5285 offset + 4)); 5286 set2 = gen_rtx_SET (VOIDmode, mem, 5287 gen_rtx_REG (SImode, regno + 1)); 5288 RTX_FRAME_RELATED_P (set2) = 1; 5289 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5290 gen_rtx_PARALLEL (VOIDmode, 5291 gen_rtvec (2, set1, set2))); 5292 } 5293 } 5294 else /* action_true == SORR_RESTORE */ 5295 emit_move_insn (gen_rtx_REG (mode, regno), mem); 5296 5297 /* Always preserve double-word alignment. */ 5298 offset = (offset + 8) & -8; 5299 } 5300 } 5301 5302 return offset; 5303 } 5304 5305 /* Emit code to adjust BASE to OFFSET. Return the new base. */ 5306 5307 static rtx 5308 emit_adjust_base_to_offset (rtx base, int offset) 5309 { 5310 /* ??? This might be optimized a little as %g1 might already have a 5311 value close enough that a single add insn will do. */ 5312 /* ??? Although, all of this is probably only a temporary fix because 5313 if %g1 can hold a function result, then sparc_expand_epilogue will 5314 lose (the result will be clobbered). */ 5315 rtx new_base = gen_rtx_REG (Pmode, 1); 5316 emit_move_insn (new_base, GEN_INT (offset)); 5317 emit_insn (gen_rtx_SET (VOIDmode, 5318 new_base, gen_rtx_PLUS (Pmode, base, new_base))); 5319 return new_base; 5320 } 5321 5322 /* Emit code to save/restore call-saved global and FP registers. */ 5323 5324 static void 5325 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action) 5326 { 5327 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095) 5328 { 5329 base = emit_adjust_base_to_offset (base, offset); 5330 offset = 0; 5331 } 5332 5333 offset 5334 = emit_save_or_restore_regs (0, 8, base, offset, 0, 5335 save_global_or_fp_reg_p, action, SORR_NONE); 5336 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0, 5337 save_global_or_fp_reg_p, action, SORR_NONE); 5338 } 5339 5340 /* Emit code to save/restore call-saved local and in registers. */ 5341 5342 static void 5343 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action) 5344 { 5345 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095) 5346 { 5347 base = emit_adjust_base_to_offset (base, offset); 5348 offset = 0; 5349 } 5350 5351 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p, 5352 save_local_or_in_reg_p, action, SORR_ADVANCE); 5353 } 5354 5355 /* Emit a window_save insn. */ 5356 5357 static rtx 5358 emit_window_save (rtx increment) 5359 { 5360 rtx insn = emit_insn (gen_window_save (increment)); 5361 RTX_FRAME_RELATED_P (insn) = 1; 5362 5363 /* The incoming return address (%o7) is saved in %i7. */ 5364 add_reg_note (insn, REG_CFA_REGISTER, 5365 gen_rtx_SET (VOIDmode, 5366 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), 5367 gen_rtx_REG (Pmode, 5368 INCOMING_RETURN_ADDR_REGNUM))); 5369 5370 /* The window save event. */ 5371 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx); 5372 5373 /* The CFA is %fp, the hard frame pointer. */ 5374 add_reg_note (insn, REG_CFA_DEF_CFA, 5375 plus_constant (Pmode, hard_frame_pointer_rtx, 5376 INCOMING_FRAME_SP_OFFSET)); 5377 5378 return insn; 5379 } 5380 5381 /* Generate an increment for the stack pointer. */ 5382 5383 static rtx 5384 gen_stack_pointer_inc (rtx increment) 5385 { 5386 return gen_rtx_SET (VOIDmode, 5387 stack_pointer_rtx, 5388 gen_rtx_PLUS (Pmode, 5389 stack_pointer_rtx, 5390 increment)); 5391 } 5392 5393 /* Expand the function prologue. The prologue is responsible for reserving 5394 storage for the frame, saving the call-saved registers and loading the 5395 GOT register if needed. */ 5396 5397 void 5398 sparc_expand_prologue (void) 5399 { 5400 HOST_WIDE_INT size; 5401 rtx insn; 5402 5403 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying 5404 on the final value of the flag means deferring the prologue/epilogue 5405 expansion until just before the second scheduling pass, which is too 5406 late to emit multiple epilogues or return insns. 5407 5408 Of course we are making the assumption that the value of the flag 5409 will not change between now and its final value. Of the three parts 5410 of the formula, only the last one can reasonably vary. Let's take a 5411 closer look, after assuming that the first two ones are set to true 5412 (otherwise the last value is effectively silenced). 5413 5414 If only_leaf_regs_used returns false, the global predicate will also 5415 be false so the actual frame size calculated below will be positive. 5416 As a consequence, the save_register_window insn will be emitted in 5417 the instruction stream; now this insn explicitly references %fp 5418 which is not a leaf register so only_leaf_regs_used will always 5419 return false subsequently. 5420 5421 If only_leaf_regs_used returns true, we hope that the subsequent 5422 optimization passes won't cause non-leaf registers to pop up. For 5423 example, the regrename pass has special provisions to not rename to 5424 non-leaf registers in a leaf function. */ 5425 sparc_leaf_function_p 5426 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used (); 5427 5428 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 5429 5430 if (flag_stack_usage_info) 5431 current_function_static_stack_size = size; 5432 5433 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) 5434 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); 5435 5436 if (size == 0) 5437 ; /* do nothing. */ 5438 else if (sparc_leaf_function_p) 5439 { 5440 rtx size_int_rtx = GEN_INT (-size); 5441 5442 if (size <= 4096) 5443 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 5444 else if (size <= 8192) 5445 { 5446 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 5447 RTX_FRAME_RELATED_P (insn) = 1; 5448 5449 /* %sp is still the CFA register. */ 5450 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5451 } 5452 else 5453 { 5454 rtx size_rtx = gen_rtx_REG (Pmode, 1); 5455 emit_move_insn (size_rtx, size_int_rtx); 5456 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 5457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5458 gen_stack_pointer_inc (size_int_rtx)); 5459 } 5460 5461 RTX_FRAME_RELATED_P (insn) = 1; 5462 } 5463 else 5464 { 5465 rtx size_int_rtx = GEN_INT (-size); 5466 5467 if (size <= 4096) 5468 emit_window_save (size_int_rtx); 5469 else if (size <= 8192) 5470 { 5471 emit_window_save (GEN_INT (-4096)); 5472 5473 /* %sp is not the CFA register anymore. */ 5474 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5475 5476 /* Make sure no %fp-based store is issued until after the frame is 5477 established. The offset between the frame pointer and the stack 5478 pointer is calculated relative to the value of the stack pointer 5479 at the end of the function prologue, and moving instructions that 5480 access the stack via the frame pointer between the instructions 5481 that decrement the stack pointer could result in accessing the 5482 register window save area, which is volatile. */ 5483 emit_insn (gen_frame_blockage ()); 5484 } 5485 else 5486 { 5487 rtx size_rtx = gen_rtx_REG (Pmode, 1); 5488 emit_move_insn (size_rtx, size_int_rtx); 5489 emit_window_save (size_rtx); 5490 } 5491 } 5492 5493 if (sparc_leaf_function_p) 5494 { 5495 sparc_frame_base_reg = stack_pointer_rtx; 5496 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 5497 } 5498 else 5499 { 5500 sparc_frame_base_reg = hard_frame_pointer_rtx; 5501 sparc_frame_base_offset = SPARC_STACK_BIAS; 5502 } 5503 5504 if (sparc_n_global_fp_regs > 0) 5505 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5506 sparc_frame_base_offset 5507 - sparc_apparent_frame_size, 5508 SORR_SAVE); 5509 5510 /* Load the GOT register if needed. */ 5511 if (crtl->uses_pic_offset_table) 5512 load_got_register (); 5513 5514 /* Advertise that the data calculated just above are now valid. */ 5515 sparc_prologue_data_valid_p = true; 5516 } 5517 5518 /* Expand the function prologue. The prologue is responsible for reserving 5519 storage for the frame, saving the call-saved registers and loading the 5520 GOT register if needed. */ 5521 5522 void 5523 sparc_flat_expand_prologue (void) 5524 { 5525 HOST_WIDE_INT size; 5526 rtx insn; 5527 5528 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf; 5529 5530 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 5531 5532 if (flag_stack_usage_info) 5533 current_function_static_stack_size = size; 5534 5535 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size) 5536 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); 5537 5538 if (sparc_save_local_in_regs_p) 5539 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, 5540 SORR_SAVE); 5541 5542 if (size == 0) 5543 ; /* do nothing. */ 5544 else 5545 { 5546 rtx size_int_rtx, size_rtx; 5547 5548 size_rtx = size_int_rtx = GEN_INT (-size); 5549 5550 /* We establish the frame (i.e. decrement the stack pointer) first, even 5551 if we use a frame pointer, because we cannot clobber any call-saved 5552 registers, including the frame pointer, if we haven't created a new 5553 register save area, for the sake of compatibility with the ABI. */ 5554 if (size <= 4096) 5555 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 5556 else if (size <= 8192 && !frame_pointer_needed) 5557 { 5558 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 5559 RTX_FRAME_RELATED_P (insn) = 1; 5560 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5561 } 5562 else 5563 { 5564 size_rtx = gen_rtx_REG (Pmode, 1); 5565 emit_move_insn (size_rtx, size_int_rtx); 5566 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 5567 add_reg_note (insn, REG_CFA_ADJUST_CFA, 5568 gen_stack_pointer_inc (size_int_rtx)); 5569 } 5570 RTX_FRAME_RELATED_P (insn) = 1; 5571 5572 /* Ensure nothing is scheduled until after the frame is established. */ 5573 emit_insn (gen_blockage ()); 5574 5575 if (frame_pointer_needed) 5576 { 5577 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, 5578 gen_rtx_MINUS (Pmode, 5579 stack_pointer_rtx, 5580 size_rtx))); 5581 RTX_FRAME_RELATED_P (insn) = 1; 5582 5583 add_reg_note (insn, REG_CFA_ADJUST_CFA, 5584 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, 5585 plus_constant (Pmode, stack_pointer_rtx, 5586 size))); 5587 } 5588 5589 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 5590 { 5591 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM); 5592 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 5593 5594 insn = emit_move_insn (i7, o7); 5595 RTX_FRAME_RELATED_P (insn) = 1; 5596 5597 add_reg_note (insn, REG_CFA_REGISTER, 5598 gen_rtx_SET (VOIDmode, i7, o7)); 5599 5600 /* Prevent this instruction from ever being considered dead, 5601 even if this function has no epilogue. */ 5602 emit_use (i7); 5603 } 5604 } 5605 5606 if (frame_pointer_needed) 5607 { 5608 sparc_frame_base_reg = hard_frame_pointer_rtx; 5609 sparc_frame_base_offset = SPARC_STACK_BIAS; 5610 } 5611 else 5612 { 5613 sparc_frame_base_reg = stack_pointer_rtx; 5614 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 5615 } 5616 5617 if (sparc_n_global_fp_regs > 0) 5618 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5619 sparc_frame_base_offset 5620 - sparc_apparent_frame_size, 5621 SORR_SAVE); 5622 5623 /* Load the GOT register if needed. */ 5624 if (crtl->uses_pic_offset_table) 5625 load_got_register (); 5626 5627 /* Advertise that the data calculated just above are now valid. */ 5628 sparc_prologue_data_valid_p = true; 5629 } 5630 5631 /* This function generates the assembly code for function entry, which boils 5632 down to emitting the necessary .register directives. */ 5633 5634 static void 5635 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5636 { 5637 /* Check that the assumption we made in sparc_expand_prologue is valid. */ 5638 if (!TARGET_FLAT) 5639 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs); 5640 5641 sparc_output_scratch_registers (file); 5642 } 5643 5644 /* Expand the function epilogue, either normal or part of a sibcall. 5645 We emit all the instructions except the return or the call. */ 5646 5647 void 5648 sparc_expand_epilogue (bool for_eh) 5649 { 5650 HOST_WIDE_INT size = sparc_frame_size; 5651 5652 if (sparc_n_global_fp_regs > 0) 5653 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5654 sparc_frame_base_offset 5655 - sparc_apparent_frame_size, 5656 SORR_RESTORE); 5657 5658 if (size == 0 || for_eh) 5659 ; /* do nothing. */ 5660 else if (sparc_leaf_function_p) 5661 { 5662 if (size <= 4096) 5663 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 5664 else if (size <= 8192) 5665 { 5666 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 5667 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 5668 } 5669 else 5670 { 5671 rtx reg = gen_rtx_REG (Pmode, 1); 5672 emit_move_insn (reg, GEN_INT (size)); 5673 emit_insn (gen_stack_pointer_inc (reg)); 5674 } 5675 } 5676 } 5677 5678 /* Expand the function epilogue, either normal or part of a sibcall. 5679 We emit all the instructions except the return or the call. */ 5680 5681 void 5682 sparc_flat_expand_epilogue (bool for_eh) 5683 { 5684 HOST_WIDE_INT size = sparc_frame_size; 5685 5686 if (sparc_n_global_fp_regs > 0) 5687 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5688 sparc_frame_base_offset 5689 - sparc_apparent_frame_size, 5690 SORR_RESTORE); 5691 5692 /* If we have a frame pointer, we'll need both to restore it before the 5693 frame is destroyed and use its current value in destroying the frame. 5694 Since we don't have an atomic way to do that in the flat window model, 5695 we save the current value into a temporary register (%g1). */ 5696 if (frame_pointer_needed && !for_eh) 5697 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx); 5698 5699 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 5700 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM), 5701 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)); 5702 5703 if (sparc_save_local_in_regs_p) 5704 emit_save_or_restore_local_in_regs (sparc_frame_base_reg, 5705 sparc_frame_base_offset, 5706 SORR_RESTORE); 5707 5708 if (size == 0 || for_eh) 5709 ; /* do nothing. */ 5710 else if (frame_pointer_needed) 5711 { 5712 /* Make sure the frame is destroyed after everything else is done. */ 5713 emit_insn (gen_blockage ()); 5714 5715 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1)); 5716 } 5717 else 5718 { 5719 /* Likewise. */ 5720 emit_insn (gen_blockage ()); 5721 5722 if (size <= 4096) 5723 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 5724 else if (size <= 8192) 5725 { 5726 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 5727 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 5728 } 5729 else 5730 { 5731 rtx reg = gen_rtx_REG (Pmode, 1); 5732 emit_move_insn (reg, GEN_INT (size)); 5733 emit_insn (gen_stack_pointer_inc (reg)); 5734 } 5735 } 5736 } 5737 5738 /* Return true if it is appropriate to emit `return' instructions in the 5739 body of a function. */ 5740 5741 bool 5742 sparc_can_use_return_insn_p (void) 5743 { 5744 return sparc_prologue_data_valid_p 5745 && sparc_n_global_fp_regs == 0 5746 && TARGET_FLAT 5747 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p) 5748 : (sparc_frame_size == 0 || !sparc_leaf_function_p); 5749 } 5750 5751 /* This function generates the assembly code for function exit. */ 5752 5753 static void 5754 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5755 { 5756 /* If the last two instructions of a function are "call foo; dslot;" 5757 the return address might point to the first instruction in the next 5758 function and we have to output a dummy nop for the sake of sane 5759 backtraces in such cases. This is pointless for sibling calls since 5760 the return address is explicitly adjusted. */ 5761 5762 rtx insn, last_real_insn; 5763 5764 insn = get_last_insn (); 5765 5766 last_real_insn = prev_real_insn (insn); 5767 if (last_real_insn 5768 && GET_CODE (last_real_insn) == INSN 5769 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) 5770 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); 5771 5772 if (last_real_insn 5773 && CALL_P (last_real_insn) 5774 && !SIBLING_CALL_P (last_real_insn)) 5775 fputs("\tnop\n", file); 5776 5777 sparc_output_deferred_case_vectors (); 5778 } 5779 5780 /* Output a 'restore' instruction. */ 5781 5782 static void 5783 output_restore (rtx pat) 5784 { 5785 rtx operands[3]; 5786 5787 if (! pat) 5788 { 5789 fputs ("\t restore\n", asm_out_file); 5790 return; 5791 } 5792 5793 gcc_assert (GET_CODE (pat) == SET); 5794 5795 operands[0] = SET_DEST (pat); 5796 pat = SET_SRC (pat); 5797 5798 switch (GET_CODE (pat)) 5799 { 5800 case PLUS: 5801 operands[1] = XEXP (pat, 0); 5802 operands[2] = XEXP (pat, 1); 5803 output_asm_insn (" restore %r1, %2, %Y0", operands); 5804 break; 5805 case LO_SUM: 5806 operands[1] = XEXP (pat, 0); 5807 operands[2] = XEXP (pat, 1); 5808 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); 5809 break; 5810 case ASHIFT: 5811 operands[1] = XEXP (pat, 0); 5812 gcc_assert (XEXP (pat, 1) == const1_rtx); 5813 output_asm_insn (" restore %r1, %r1, %Y0", operands); 5814 break; 5815 default: 5816 operands[1] = pat; 5817 output_asm_insn (" restore %%g0, %1, %Y0", operands); 5818 break; 5819 } 5820 } 5821 5822 /* Output a return. */ 5823 5824 const char * 5825 output_return (rtx insn) 5826 { 5827 if (crtl->calls_eh_return) 5828 { 5829 /* If the function uses __builtin_eh_return, the eh_return 5830 machinery occupies the delay slot. */ 5831 gcc_assert (!final_sequence); 5832 5833 if (flag_delayed_branch) 5834 { 5835 if (!TARGET_FLAT && TARGET_V9) 5836 fputs ("\treturn\t%i7+8\n", asm_out_file); 5837 else 5838 { 5839 if (!TARGET_FLAT) 5840 fputs ("\trestore\n", asm_out_file); 5841 5842 fputs ("\tjmp\t%o7+8\n", asm_out_file); 5843 } 5844 5845 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); 5846 } 5847 else 5848 { 5849 if (!TARGET_FLAT) 5850 fputs ("\trestore\n", asm_out_file); 5851 5852 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file); 5853 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); 5854 } 5855 } 5856 else if (sparc_leaf_function_p || TARGET_FLAT) 5857 { 5858 /* This is a leaf or flat function so we don't have to bother restoring 5859 the register window, which frees us from dealing with the convoluted 5860 semantics of restore/return. We simply output the jump to the 5861 return address and the insn in the delay slot (if any). */ 5862 5863 return "jmp\t%%o7+%)%#"; 5864 } 5865 else 5866 { 5867 /* This is a regular function so we have to restore the register window. 5868 We may have a pending insn for the delay slot, which will be either 5869 combined with the 'restore' instruction or put in the delay slot of 5870 the 'return' instruction. */ 5871 5872 if (final_sequence) 5873 { 5874 rtx delay, pat; 5875 5876 delay = NEXT_INSN (insn); 5877 gcc_assert (delay); 5878 5879 pat = PATTERN (delay); 5880 5881 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) 5882 { 5883 epilogue_renumber (&pat, 0); 5884 return "return\t%%i7+%)%#"; 5885 } 5886 else 5887 { 5888 output_asm_insn ("jmp\t%%i7+%)", NULL); 5889 output_restore (pat); 5890 PATTERN (delay) = gen_blockage (); 5891 INSN_CODE (delay) = -1; 5892 } 5893 } 5894 else 5895 { 5896 /* The delay slot is empty. */ 5897 if (TARGET_V9) 5898 return "return\t%%i7+%)\n\t nop"; 5899 else if (flag_delayed_branch) 5900 return "jmp\t%%i7+%)\n\t restore"; 5901 else 5902 return "restore\n\tjmp\t%%o7+%)\n\t nop"; 5903 } 5904 } 5905 5906 return ""; 5907 } 5908 5909 /* Output a sibling call. */ 5910 5911 const char * 5912 output_sibcall (rtx insn, rtx call_operand) 5913 { 5914 rtx operands[1]; 5915 5916 gcc_assert (flag_delayed_branch); 5917 5918 operands[0] = call_operand; 5919 5920 if (sparc_leaf_function_p || TARGET_FLAT) 5921 { 5922 /* This is a leaf or flat function so we don't have to bother restoring 5923 the register window. We simply output the jump to the function and 5924 the insn in the delay slot (if any). */ 5925 5926 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); 5927 5928 if (final_sequence) 5929 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", 5930 operands); 5931 else 5932 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize 5933 it into branch if possible. */ 5934 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", 5935 operands); 5936 } 5937 else 5938 { 5939 /* This is a regular function so we have to restore the register window. 5940 We may have a pending insn for the delay slot, which will be combined 5941 with the 'restore' instruction. */ 5942 5943 output_asm_insn ("call\t%a0, 0", operands); 5944 5945 if (final_sequence) 5946 { 5947 rtx delay = NEXT_INSN (insn); 5948 gcc_assert (delay); 5949 5950 output_restore (PATTERN (delay)); 5951 5952 PATTERN (delay) = gen_blockage (); 5953 INSN_CODE (delay) = -1; 5954 } 5955 else 5956 output_restore (NULL_RTX); 5957 } 5958 5959 return ""; 5960 } 5961 5962 /* Functions for handling argument passing. 5963 5964 For 32-bit, the first 6 args are normally in registers and the rest are 5965 pushed. Any arg that starts within the first 6 words is at least 5966 partially passed in a register unless its data type forbids. 5967 5968 For 64-bit, the argument registers are laid out as an array of 16 elements 5969 and arguments are added sequentially. The first 6 int args and up to the 5970 first 16 fp args (depending on size) are passed in regs. 5971 5972 Slot Stack Integral Float Float in structure Double Long Double 5973 ---- ----- -------- ----- ------------------ ------ ----------- 5974 15 [SP+248] %f31 %f30,%f31 %d30 5975 14 [SP+240] %f29 %f28,%f29 %d28 %q28 5976 13 [SP+232] %f27 %f26,%f27 %d26 5977 12 [SP+224] %f25 %f24,%f25 %d24 %q24 5978 11 [SP+216] %f23 %f22,%f23 %d22 5979 10 [SP+208] %f21 %f20,%f21 %d20 %q20 5980 9 [SP+200] %f19 %f18,%f19 %d18 5981 8 [SP+192] %f17 %f16,%f17 %d16 %q16 5982 7 [SP+184] %f15 %f14,%f15 %d14 5983 6 [SP+176] %f13 %f12,%f13 %d12 %q12 5984 5 [SP+168] %o5 %f11 %f10,%f11 %d10 5985 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 5986 3 [SP+152] %o3 %f7 %f6,%f7 %d6 5987 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 5988 1 [SP+136] %o1 %f3 %f2,%f3 %d2 5989 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 5990 5991 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. 5992 5993 Integral arguments are always passed as 64-bit quantities appropriately 5994 extended. 5995 5996 Passing of floating point values is handled as follows. 5997 If a prototype is in scope: 5998 If the value is in a named argument (i.e. not a stdarg function or a 5999 value not part of the `...') then the value is passed in the appropriate 6000 fp reg. 6001 If the value is part of the `...' and is passed in one of the first 6 6002 slots then the value is passed in the appropriate int reg. 6003 If the value is part of the `...' and is not passed in one of the first 6 6004 slots then the value is passed in memory. 6005 If a prototype is not in scope: 6006 If the value is one of the first 6 arguments the value is passed in the 6007 appropriate integer reg and the appropriate fp reg. 6008 If the value is not one of the first 6 arguments the value is passed in 6009 the appropriate fp reg and in memory. 6010 6011 6012 Summary of the calling conventions implemented by GCC on the SPARC: 6013 6014 32-bit ABI: 6015 size argument return value 6016 6017 small integer <4 int. reg. int. reg. 6018 word 4 int. reg. int. reg. 6019 double word 8 int. reg. int. reg. 6020 6021 _Complex small integer <8 int. reg. int. reg. 6022 _Complex word 8 int. reg. int. reg. 6023 _Complex double word 16 memory int. reg. 6024 6025 vector integer <=8 int. reg. FP reg. 6026 vector integer >8 memory memory 6027 6028 float 4 int. reg. FP reg. 6029 double 8 int. reg. FP reg. 6030 long double 16 memory memory 6031 6032 _Complex float 8 memory FP reg. 6033 _Complex double 16 memory FP reg. 6034 _Complex long double 32 memory FP reg. 6035 6036 vector float any memory memory 6037 6038 aggregate any memory memory 6039 6040 6041 6042 64-bit ABI: 6043 size argument return value 6044 6045 small integer <8 int. reg. int. reg. 6046 word 8 int. reg. int. reg. 6047 double word 16 int. reg. int. reg. 6048 6049 _Complex small integer <16 int. reg. int. reg. 6050 _Complex word 16 int. reg. int. reg. 6051 _Complex double word 32 memory int. reg. 6052 6053 vector integer <=16 FP reg. FP reg. 6054 vector integer 16<s<=32 memory FP reg. 6055 vector integer >32 memory memory 6056 6057 float 4 FP reg. FP reg. 6058 double 8 FP reg. FP reg. 6059 long double 16 FP reg. FP reg. 6060 6061 _Complex float 8 FP reg. FP reg. 6062 _Complex double 16 FP reg. FP reg. 6063 _Complex long double 32 memory FP reg. 6064 6065 vector float <=16 FP reg. FP reg. 6066 vector float 16<s<=32 memory FP reg. 6067 vector float >32 memory memory 6068 6069 aggregate <=16 reg. reg. 6070 aggregate 16<s<=32 memory reg. 6071 aggregate >32 memory memory 6072 6073 6074 6075 Note #1: complex floating-point types follow the extended SPARC ABIs as 6076 implemented by the Sun compiler. 6077 6078 Note #2: integral vector types follow the scalar floating-point types 6079 conventions to match what is implemented by the Sun VIS SDK. 6080 6081 Note #3: floating-point vector types follow the aggregate types 6082 conventions. */ 6083 6084 6085 /* Maximum number of int regs for args. */ 6086 #define SPARC_INT_ARG_MAX 6 6087 /* Maximum number of fp regs for args. */ 6088 #define SPARC_FP_ARG_MAX 16 6089 6090 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) 6091 6092 /* Handle the INIT_CUMULATIVE_ARGS macro. 6093 Initialize a variable CUM of type CUMULATIVE_ARGS 6094 for a call to a function whose data type is FNTYPE. 6095 For a library call, FNTYPE is 0. */ 6096 6097 void 6098 init_cumulative_args (struct sparc_args *cum, tree fntype, 6099 rtx libname ATTRIBUTE_UNUSED, 6100 tree fndecl ATTRIBUTE_UNUSED) 6101 { 6102 cum->words = 0; 6103 cum->prototype_p = fntype && prototype_p (fntype); 6104 cum->libcall_p = fntype == 0; 6105 } 6106 6107 /* Handle promotion of pointer and integer arguments. */ 6108 6109 static enum machine_mode 6110 sparc_promote_function_mode (const_tree type, 6111 enum machine_mode mode, 6112 int *punsignedp, 6113 const_tree fntype ATTRIBUTE_UNUSED, 6114 int for_return ATTRIBUTE_UNUSED) 6115 { 6116 if (type != NULL_TREE && POINTER_TYPE_P (type)) 6117 { 6118 *punsignedp = POINTERS_EXTEND_UNSIGNED; 6119 return Pmode; 6120 } 6121 6122 /* Integral arguments are passed as full words, as per the ABI. */ 6123 if (GET_MODE_CLASS (mode) == MODE_INT 6124 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 6125 return word_mode; 6126 6127 return mode; 6128 } 6129 6130 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ 6131 6132 static bool 6133 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 6134 { 6135 return TARGET_ARCH64 ? true : false; 6136 } 6137 6138 /* Scan the record type TYPE and return the following predicates: 6139 - INTREGS_P: the record contains at least one field or sub-field 6140 that is eligible for promotion in integer registers. 6141 - FP_REGS_P: the record contains at least one field or sub-field 6142 that is eligible for promotion in floating-point registers. 6143 - PACKED_P: the record contains at least one field that is packed. 6144 6145 Sub-fields are not taken into account for the PACKED_P predicate. */ 6146 6147 static void 6148 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p, 6149 int *packed_p) 6150 { 6151 tree field; 6152 6153 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6154 { 6155 if (TREE_CODE (field) == FIELD_DECL) 6156 { 6157 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6158 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0); 6159 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6160 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6161 && TARGET_FPU) 6162 *fpregs_p = 1; 6163 else 6164 *intregs_p = 1; 6165 6166 if (packed_p && DECL_PACKED (field)) 6167 *packed_p = 1; 6168 } 6169 } 6170 } 6171 6172 /* Compute the slot number to pass an argument in. 6173 Return the slot number or -1 if passing on the stack. 6174 6175 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6176 the preceding args and about the function being called. 6177 MODE is the argument's machine mode. 6178 TYPE is the data type of the argument (as a tree). 6179 This is null for libcalls where that information may 6180 not be available. 6181 NAMED is nonzero if this argument is a named parameter 6182 (otherwise it is an extra parameter matching an ellipsis). 6183 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. 6184 *PREGNO records the register number to use if scalar type. 6185 *PPADDING records the amount of padding needed in words. */ 6186 6187 static int 6188 function_arg_slotno (const struct sparc_args *cum, enum machine_mode mode, 6189 const_tree type, bool named, bool incoming_p, 6190 int *pregno, int *ppadding) 6191 { 6192 int regbase = (incoming_p 6193 ? SPARC_INCOMING_INT_ARG_FIRST 6194 : SPARC_OUTGOING_INT_ARG_FIRST); 6195 int slotno = cum->words; 6196 enum mode_class mclass; 6197 int regno; 6198 6199 *ppadding = 0; 6200 6201 if (type && TREE_ADDRESSABLE (type)) 6202 return -1; 6203 6204 if (TARGET_ARCH32 6205 && mode == BLKmode 6206 && type 6207 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0) 6208 return -1; 6209 6210 /* For SPARC64, objects requiring 16-byte alignment get it. */ 6211 if (TARGET_ARCH64 6212 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 6213 && (slotno & 1) != 0) 6214 slotno++, *ppadding = 1; 6215 6216 mclass = GET_MODE_CLASS (mode); 6217 if (type && TREE_CODE (type) == VECTOR_TYPE) 6218 { 6219 /* Vector types deserve special treatment because they are 6220 polymorphic wrt their mode, depending upon whether VIS 6221 instructions are enabled. */ 6222 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 6223 { 6224 /* The SPARC port defines no floating-point vector modes. */ 6225 gcc_assert (mode == BLKmode); 6226 } 6227 else 6228 { 6229 /* Integral vector types should either have a vector 6230 mode or an integral mode, because we are guaranteed 6231 by pass_by_reference that their size is not greater 6232 than 16 bytes and TImode is 16-byte wide. */ 6233 gcc_assert (mode != BLKmode); 6234 6235 /* Vector integers are handled like floats according to 6236 the Sun VIS SDK. */ 6237 mclass = MODE_FLOAT; 6238 } 6239 } 6240 6241 switch (mclass) 6242 { 6243 case MODE_FLOAT: 6244 case MODE_COMPLEX_FLOAT: 6245 case MODE_VECTOR_INT: 6246 if (TARGET_ARCH64 && TARGET_FPU && named) 6247 { 6248 if (slotno >= SPARC_FP_ARG_MAX) 6249 return -1; 6250 regno = SPARC_FP_ARG_FIRST + slotno * 2; 6251 /* Arguments filling only one single FP register are 6252 right-justified in the outer double FP register. */ 6253 if (GET_MODE_SIZE (mode) <= 4) 6254 regno++; 6255 break; 6256 } 6257 /* fallthrough */ 6258 6259 case MODE_INT: 6260 case MODE_COMPLEX_INT: 6261 if (slotno >= SPARC_INT_ARG_MAX) 6262 return -1; 6263 regno = regbase + slotno; 6264 break; 6265 6266 case MODE_RANDOM: 6267 if (mode == VOIDmode) 6268 /* MODE is VOIDmode when generating the actual call. */ 6269 return -1; 6270 6271 gcc_assert (mode == BLKmode); 6272 6273 if (TARGET_ARCH32 6274 || !type 6275 || (TREE_CODE (type) != VECTOR_TYPE 6276 && TREE_CODE (type) != RECORD_TYPE)) 6277 { 6278 if (slotno >= SPARC_INT_ARG_MAX) 6279 return -1; 6280 regno = regbase + slotno; 6281 } 6282 else /* TARGET_ARCH64 && type */ 6283 { 6284 int intregs_p = 0, fpregs_p = 0, packed_p = 0; 6285 6286 /* First see what kinds of registers we would need. */ 6287 if (TREE_CODE (type) == VECTOR_TYPE) 6288 fpregs_p = 1; 6289 else 6290 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p); 6291 6292 /* The ABI obviously doesn't specify how packed structures 6293 are passed. These are defined to be passed in int regs 6294 if possible, otherwise memory. */ 6295 if (packed_p || !named) 6296 fpregs_p = 0, intregs_p = 1; 6297 6298 /* If all arg slots are filled, then must pass on stack. */ 6299 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX) 6300 return -1; 6301 6302 /* If there are only int args and all int arg slots are filled, 6303 then must pass on stack. */ 6304 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX) 6305 return -1; 6306 6307 /* Note that even if all int arg slots are filled, fp members may 6308 still be passed in regs if such regs are available. 6309 *PREGNO isn't set because there may be more than one, it's up 6310 to the caller to compute them. */ 6311 return slotno; 6312 } 6313 break; 6314 6315 default : 6316 gcc_unreachable (); 6317 } 6318 6319 *pregno = regno; 6320 return slotno; 6321 } 6322 6323 /* Handle recursive register counting for structure field layout. */ 6324 6325 struct function_arg_record_value_parms 6326 { 6327 rtx ret; /* return expression being built. */ 6328 int slotno; /* slot number of the argument. */ 6329 int named; /* whether the argument is named. */ 6330 int regbase; /* regno of the base register. */ 6331 int stack; /* 1 if part of the argument is on the stack. */ 6332 int intoffset; /* offset of the first pending integer field. */ 6333 unsigned int nregs; /* number of words passed in registers. */ 6334 }; 6335 6336 static void function_arg_record_value_3 6337 (HOST_WIDE_INT, struct function_arg_record_value_parms *); 6338 static void function_arg_record_value_2 6339 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); 6340 static void function_arg_record_value_1 6341 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); 6342 static rtx function_arg_record_value (const_tree, enum machine_mode, int, int, int); 6343 static rtx function_arg_union_value (int, enum machine_mode, int, int); 6344 6345 /* A subroutine of function_arg_record_value. Traverse the structure 6346 recursively and determine how many registers will be required. */ 6347 6348 static void 6349 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos, 6350 struct function_arg_record_value_parms *parms, 6351 bool packed_p) 6352 { 6353 tree field; 6354 6355 /* We need to compute how many registers are needed so we can 6356 allocate the PARALLEL but before we can do that we need to know 6357 whether there are any packed fields. The ABI obviously doesn't 6358 specify how structures are passed in this case, so they are 6359 defined to be passed in int regs if possible, otherwise memory, 6360 regardless of whether there are fp values present. */ 6361 6362 if (! packed_p) 6363 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 6364 { 6365 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6366 { 6367 packed_p = true; 6368 break; 6369 } 6370 } 6371 6372 /* Compute how many registers we need. */ 6373 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6374 { 6375 if (TREE_CODE (field) == FIELD_DECL) 6376 { 6377 HOST_WIDE_INT bitpos = startbitpos; 6378 6379 if (DECL_SIZE (field) != 0) 6380 { 6381 if (integer_zerop (DECL_SIZE (field))) 6382 continue; 6383 6384 if (host_integerp (bit_position (field), 1)) 6385 bitpos += int_bit_position (field); 6386 } 6387 6388 /* ??? FIXME: else assume zero offset. */ 6389 6390 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6391 function_arg_record_value_1 (TREE_TYPE (field), 6392 bitpos, 6393 parms, 6394 packed_p); 6395 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6396 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6397 && TARGET_FPU 6398 && parms->named 6399 && ! packed_p) 6400 { 6401 if (parms->intoffset != -1) 6402 { 6403 unsigned int startbit, endbit; 6404 int intslots, this_slotno; 6405 6406 startbit = parms->intoffset & -BITS_PER_WORD; 6407 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6408 6409 intslots = (endbit - startbit) / BITS_PER_WORD; 6410 this_slotno = parms->slotno + parms->intoffset 6411 / BITS_PER_WORD; 6412 6413 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) 6414 { 6415 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); 6416 /* We need to pass this field on the stack. */ 6417 parms->stack = 1; 6418 } 6419 6420 parms->nregs += intslots; 6421 parms->intoffset = -1; 6422 } 6423 6424 /* There's no need to check this_slotno < SPARC_FP_ARG MAX. 6425 If it wasn't true we wouldn't be here. */ 6426 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE 6427 && DECL_MODE (field) == BLKmode) 6428 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 6429 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 6430 parms->nregs += 2; 6431 else 6432 parms->nregs += 1; 6433 } 6434 else 6435 { 6436 if (parms->intoffset == -1) 6437 parms->intoffset = bitpos; 6438 } 6439 } 6440 } 6441 } 6442 6443 /* A subroutine of function_arg_record_value. Assign the bits of the 6444 structure between parms->intoffset and bitpos to integer registers. */ 6445 6446 static void 6447 function_arg_record_value_3 (HOST_WIDE_INT bitpos, 6448 struct function_arg_record_value_parms *parms) 6449 { 6450 enum machine_mode mode; 6451 unsigned int regno; 6452 unsigned int startbit, endbit; 6453 int this_slotno, intslots, intoffset; 6454 rtx reg; 6455 6456 if (parms->intoffset == -1) 6457 return; 6458 6459 intoffset = parms->intoffset; 6460 parms->intoffset = -1; 6461 6462 startbit = intoffset & -BITS_PER_WORD; 6463 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6464 intslots = (endbit - startbit) / BITS_PER_WORD; 6465 this_slotno = parms->slotno + intoffset / BITS_PER_WORD; 6466 6467 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); 6468 if (intslots <= 0) 6469 return; 6470 6471 /* If this is the trailing part of a word, only load that much into 6472 the register. Otherwise load the whole register. Note that in 6473 the latter case we may pick up unwanted bits. It's not a problem 6474 at the moment but may wish to revisit. */ 6475 6476 if (intoffset % BITS_PER_WORD != 0) 6477 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, 6478 MODE_INT); 6479 else 6480 mode = word_mode; 6481 6482 intoffset /= BITS_PER_UNIT; 6483 do 6484 { 6485 regno = parms->regbase + this_slotno; 6486 reg = gen_rtx_REG (mode, regno); 6487 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6488 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); 6489 6490 this_slotno += 1; 6491 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; 6492 mode = word_mode; 6493 parms->nregs += 1; 6494 intslots -= 1; 6495 } 6496 while (intslots > 0); 6497 } 6498 6499 /* A subroutine of function_arg_record_value. Traverse the structure 6500 recursively and assign bits to floating point registers. Track which 6501 bits in between need integer registers; invoke function_arg_record_value_3 6502 to make that happen. */ 6503 6504 static void 6505 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos, 6506 struct function_arg_record_value_parms *parms, 6507 bool packed_p) 6508 { 6509 tree field; 6510 6511 if (! packed_p) 6512 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6513 { 6514 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6515 { 6516 packed_p = true; 6517 break; 6518 } 6519 } 6520 6521 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6522 { 6523 if (TREE_CODE (field) == FIELD_DECL) 6524 { 6525 HOST_WIDE_INT bitpos = startbitpos; 6526 6527 if (DECL_SIZE (field) != 0) 6528 { 6529 if (integer_zerop (DECL_SIZE (field))) 6530 continue; 6531 6532 if (host_integerp (bit_position (field), 1)) 6533 bitpos += int_bit_position (field); 6534 } 6535 6536 /* ??? FIXME: else assume zero offset. */ 6537 6538 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6539 function_arg_record_value_2 (TREE_TYPE (field), 6540 bitpos, 6541 parms, 6542 packed_p); 6543 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6544 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6545 && TARGET_FPU 6546 && parms->named 6547 && ! packed_p) 6548 { 6549 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD; 6550 int regno, nregs, pos; 6551 enum machine_mode mode = DECL_MODE (field); 6552 rtx reg; 6553 6554 function_arg_record_value_3 (bitpos, parms); 6555 6556 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE 6557 && mode == BLKmode) 6558 { 6559 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 6560 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 6561 } 6562 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 6563 { 6564 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 6565 nregs = 2; 6566 } 6567 else 6568 nregs = 1; 6569 6570 regno = SPARC_FP_ARG_FIRST + this_slotno * 2; 6571 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) 6572 regno++; 6573 reg = gen_rtx_REG (mode, regno); 6574 pos = bitpos / BITS_PER_UNIT; 6575 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6576 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 6577 parms->nregs += 1; 6578 while (--nregs > 0) 6579 { 6580 regno += GET_MODE_SIZE (mode) / 4; 6581 reg = gen_rtx_REG (mode, regno); 6582 pos += GET_MODE_SIZE (mode); 6583 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6584 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 6585 parms->nregs += 1; 6586 } 6587 } 6588 else 6589 { 6590 if (parms->intoffset == -1) 6591 parms->intoffset = bitpos; 6592 } 6593 } 6594 } 6595 } 6596 6597 /* Used by function_arg and sparc_function_value_1 to implement the complex 6598 conventions of the 64-bit ABI for passing and returning structures. 6599 Return an expression valid as a return value for the FUNCTION_ARG 6600 and TARGET_FUNCTION_VALUE. 6601 6602 TYPE is the data type of the argument (as a tree). 6603 This is null for libcalls where that information may 6604 not be available. 6605 MODE is the argument's machine mode. 6606 SLOTNO is the index number of the argument's slot in the parameter array. 6607 NAMED is nonzero if this argument is a named parameter 6608 (otherwise it is an extra parameter matching an ellipsis). 6609 REGBASE is the regno of the base register for the parameter array. */ 6610 6611 static rtx 6612 function_arg_record_value (const_tree type, enum machine_mode mode, 6613 int slotno, int named, int regbase) 6614 { 6615 HOST_WIDE_INT typesize = int_size_in_bytes (type); 6616 struct function_arg_record_value_parms parms; 6617 unsigned int nregs; 6618 6619 parms.ret = NULL_RTX; 6620 parms.slotno = slotno; 6621 parms.named = named; 6622 parms.regbase = regbase; 6623 parms.stack = 0; 6624 6625 /* Compute how many registers we need. */ 6626 parms.nregs = 0; 6627 parms.intoffset = 0; 6628 function_arg_record_value_1 (type, 0, &parms, false); 6629 6630 /* Take into account pending integer fields. */ 6631 if (parms.intoffset != -1) 6632 { 6633 unsigned int startbit, endbit; 6634 int intslots, this_slotno; 6635 6636 startbit = parms.intoffset & -BITS_PER_WORD; 6637 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6638 intslots = (endbit - startbit) / BITS_PER_WORD; 6639 this_slotno = slotno + parms.intoffset / BITS_PER_WORD; 6640 6641 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) 6642 { 6643 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); 6644 /* We need to pass this field on the stack. */ 6645 parms.stack = 1; 6646 } 6647 6648 parms.nregs += intslots; 6649 } 6650 nregs = parms.nregs; 6651 6652 /* Allocate the vector and handle some annoying special cases. */ 6653 if (nregs == 0) 6654 { 6655 /* ??? Empty structure has no value? Duh? */ 6656 if (typesize <= 0) 6657 { 6658 /* Though there's nothing really to store, return a word register 6659 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL 6660 leads to breakage due to the fact that there are zero bytes to 6661 load. */ 6662 return gen_rtx_REG (mode, regbase); 6663 } 6664 else 6665 { 6666 /* ??? C++ has structures with no fields, and yet a size. Give up 6667 for now and pass everything back in integer registers. */ 6668 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 6669 } 6670 if (nregs + slotno > SPARC_INT_ARG_MAX) 6671 nregs = SPARC_INT_ARG_MAX - slotno; 6672 } 6673 gcc_assert (nregs != 0); 6674 6675 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs)); 6676 6677 /* If at least one field must be passed on the stack, generate 6678 (parallel [(expr_list (nil) ...) ...]) so that all fields will 6679 also be passed on the stack. We can't do much better because the 6680 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case 6681 of structures for which the fields passed exclusively in registers 6682 are not at the beginning of the structure. */ 6683 if (parms.stack) 6684 XVECEXP (parms.ret, 0, 0) 6685 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 6686 6687 /* Fill in the entries. */ 6688 parms.nregs = 0; 6689 parms.intoffset = 0; 6690 function_arg_record_value_2 (type, 0, &parms, false); 6691 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms); 6692 6693 gcc_assert (parms.nregs == nregs); 6694 6695 return parms.ret; 6696 } 6697 6698 /* Used by function_arg and sparc_function_value_1 to implement the conventions 6699 of the 64-bit ABI for passing and returning unions. 6700 Return an expression valid as a return value for the FUNCTION_ARG 6701 and TARGET_FUNCTION_VALUE. 6702 6703 SIZE is the size in bytes of the union. 6704 MODE is the argument's machine mode. 6705 REGNO is the hard register the union will be passed in. */ 6706 6707 static rtx 6708 function_arg_union_value (int size, enum machine_mode mode, int slotno, 6709 int regno) 6710 { 6711 int nwords = ROUND_ADVANCE (size), i; 6712 rtx regs; 6713 6714 /* See comment in previous function for empty structures. */ 6715 if (nwords == 0) 6716 return gen_rtx_REG (mode, regno); 6717 6718 if (slotno == SPARC_INT_ARG_MAX - 1) 6719 nwords = 1; 6720 6721 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); 6722 6723 for (i = 0; i < nwords; i++) 6724 { 6725 /* Unions are passed left-justified. */ 6726 XVECEXP (regs, 0, i) 6727 = gen_rtx_EXPR_LIST (VOIDmode, 6728 gen_rtx_REG (word_mode, regno), 6729 GEN_INT (UNITS_PER_WORD * i)); 6730 regno++; 6731 } 6732 6733 return regs; 6734 } 6735 6736 /* Used by function_arg and sparc_function_value_1 to implement the conventions 6737 for passing and returning large (BLKmode) vectors. 6738 Return an expression valid as a return value for the FUNCTION_ARG 6739 and TARGET_FUNCTION_VALUE. 6740 6741 SIZE is the size in bytes of the vector (at least 8 bytes). 6742 REGNO is the FP hard register the vector will be passed in. */ 6743 6744 static rtx 6745 function_arg_vector_value (int size, int regno) 6746 { 6747 int i, nregs = size / 8; 6748 rtx regs; 6749 6750 regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); 6751 6752 for (i = 0; i < nregs; i++) 6753 { 6754 XVECEXP (regs, 0, i) 6755 = gen_rtx_EXPR_LIST (VOIDmode, 6756 gen_rtx_REG (DImode, regno + 2*i), 6757 GEN_INT (i*8)); 6758 } 6759 6760 return regs; 6761 } 6762 6763 /* Determine where to put an argument to a function. 6764 Value is zero to push the argument on the stack, 6765 or a hard register in which to store the argument. 6766 6767 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6768 the preceding args and about the function being called. 6769 MODE is the argument's machine mode. 6770 TYPE is the data type of the argument (as a tree). 6771 This is null for libcalls where that information may 6772 not be available. 6773 NAMED is true if this argument is a named parameter 6774 (otherwise it is an extra parameter matching an ellipsis). 6775 INCOMING_P is false for TARGET_FUNCTION_ARG, true for 6776 TARGET_FUNCTION_INCOMING_ARG. */ 6777 6778 static rtx 6779 sparc_function_arg_1 (cumulative_args_t cum_v, enum machine_mode mode, 6780 const_tree type, bool named, bool incoming_p) 6781 { 6782 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 6783 6784 int regbase = (incoming_p 6785 ? SPARC_INCOMING_INT_ARG_FIRST 6786 : SPARC_OUTGOING_INT_ARG_FIRST); 6787 int slotno, regno, padding; 6788 enum mode_class mclass = GET_MODE_CLASS (mode); 6789 6790 slotno = function_arg_slotno (cum, mode, type, named, incoming_p, 6791 ®no, &padding); 6792 if (slotno == -1) 6793 return 0; 6794 6795 /* Vector types deserve special treatment because they are polymorphic wrt 6796 their mode, depending upon whether VIS instructions are enabled. */ 6797 if (type && TREE_CODE (type) == VECTOR_TYPE) 6798 { 6799 HOST_WIDE_INT size = int_size_in_bytes (type); 6800 gcc_assert ((TARGET_ARCH32 && size <= 8) 6801 || (TARGET_ARCH64 && size <= 16)); 6802 6803 if (mode == BLKmode) 6804 return function_arg_vector_value (size, 6805 SPARC_FP_ARG_FIRST + 2*slotno); 6806 else 6807 mclass = MODE_FLOAT; 6808 } 6809 6810 if (TARGET_ARCH32) 6811 return gen_rtx_REG (mode, regno); 6812 6813 /* Structures up to 16 bytes in size are passed in arg slots on the stack 6814 and are promoted to registers if possible. */ 6815 if (type && TREE_CODE (type) == RECORD_TYPE) 6816 { 6817 HOST_WIDE_INT size = int_size_in_bytes (type); 6818 gcc_assert (size <= 16); 6819 6820 return function_arg_record_value (type, mode, slotno, named, regbase); 6821 } 6822 6823 /* Unions up to 16 bytes in size are passed in integer registers. */ 6824 else if (type && TREE_CODE (type) == UNION_TYPE) 6825 { 6826 HOST_WIDE_INT size = int_size_in_bytes (type); 6827 gcc_assert (size <= 16); 6828 6829 return function_arg_union_value (size, mode, slotno, regno); 6830 } 6831 6832 /* v9 fp args in reg slots beyond the int reg slots get passed in regs 6833 but also have the slot allocated for them. 6834 If no prototype is in scope fp values in register slots get passed 6835 in two places, either fp regs and int regs or fp regs and memory. */ 6836 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 6837 && SPARC_FP_REG_P (regno)) 6838 { 6839 rtx reg = gen_rtx_REG (mode, regno); 6840 if (cum->prototype_p || cum->libcall_p) 6841 { 6842 /* "* 2" because fp reg numbers are recorded in 4 byte 6843 quantities. */ 6844 #if 0 6845 /* ??? This will cause the value to be passed in the fp reg and 6846 in the stack. When a prototype exists we want to pass the 6847 value in the reg but reserve space on the stack. That's an 6848 optimization, and is deferred [for a bit]. */ 6849 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2) 6850 return gen_rtx_PARALLEL (mode, 6851 gen_rtvec (2, 6852 gen_rtx_EXPR_LIST (VOIDmode, 6853 NULL_RTX, const0_rtx), 6854 gen_rtx_EXPR_LIST (VOIDmode, 6855 reg, const0_rtx))); 6856 else 6857 #else 6858 /* ??? It seems that passing back a register even when past 6859 the area declared by REG_PARM_STACK_SPACE will allocate 6860 space appropriately, and will not copy the data onto the 6861 stack, exactly as we desire. 6862 6863 This is due to locate_and_pad_parm being called in 6864 expand_call whenever reg_parm_stack_space > 0, which 6865 while beneficial to our example here, would seem to be 6866 in error from what had been intended. Ho hum... -- r~ */ 6867 #endif 6868 return reg; 6869 } 6870 else 6871 { 6872 rtx v0, v1; 6873 6874 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) 6875 { 6876 int intreg; 6877 6878 /* On incoming, we don't need to know that the value 6879 is passed in %f0 and %i0, and it confuses other parts 6880 causing needless spillage even on the simplest cases. */ 6881 if (incoming_p) 6882 return reg; 6883 6884 intreg = (SPARC_OUTGOING_INT_ARG_FIRST 6885 + (regno - SPARC_FP_ARG_FIRST) / 2); 6886 6887 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 6888 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), 6889 const0_rtx); 6890 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 6891 } 6892 else 6893 { 6894 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 6895 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 6896 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 6897 } 6898 } 6899 } 6900 6901 /* All other aggregate types are passed in an integer register in a mode 6902 corresponding to the size of the type. */ 6903 else if (type && AGGREGATE_TYPE_P (type)) 6904 { 6905 HOST_WIDE_INT size = int_size_in_bytes (type); 6906 gcc_assert (size <= 16); 6907 6908 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 6909 } 6910 6911 return gen_rtx_REG (mode, regno); 6912 } 6913 6914 /* Handle the TARGET_FUNCTION_ARG target hook. */ 6915 6916 static rtx 6917 sparc_function_arg (cumulative_args_t cum, enum machine_mode mode, 6918 const_tree type, bool named) 6919 { 6920 return sparc_function_arg_1 (cum, mode, type, named, false); 6921 } 6922 6923 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ 6924 6925 static rtx 6926 sparc_function_incoming_arg (cumulative_args_t cum, enum machine_mode mode, 6927 const_tree type, bool named) 6928 { 6929 return sparc_function_arg_1 (cum, mode, type, named, true); 6930 } 6931 6932 /* For sparc64, objects requiring 16 byte alignment are passed that way. */ 6933 6934 static unsigned int 6935 sparc_function_arg_boundary (enum machine_mode mode, const_tree type) 6936 { 6937 return ((TARGET_ARCH64 6938 && (GET_MODE_ALIGNMENT (mode) == 128 6939 || (type && TYPE_ALIGN (type) == 128))) 6940 ? 128 6941 : PARM_BOUNDARY); 6942 } 6943 6944 /* For an arg passed partly in registers and partly in memory, 6945 this is the number of bytes of registers used. 6946 For args passed entirely in registers or entirely in memory, zero. 6947 6948 Any arg that starts in the first 6 regs but won't entirely fit in them 6949 needs partial registers on v8. On v9, structures with integer 6950 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp 6951 values that begin in the last fp reg [where "last fp reg" varies with the 6952 mode] will be split between that reg and memory. */ 6953 6954 static int 6955 sparc_arg_partial_bytes (cumulative_args_t cum, enum machine_mode mode, 6956 tree type, bool named) 6957 { 6958 int slotno, regno, padding; 6959 6960 /* We pass false for incoming_p here, it doesn't matter. */ 6961 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named, 6962 false, ®no, &padding); 6963 6964 if (slotno == -1) 6965 return 0; 6966 6967 if (TARGET_ARCH32) 6968 { 6969 if ((slotno + (mode == BLKmode 6970 ? ROUND_ADVANCE (int_size_in_bytes (type)) 6971 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))) 6972 > SPARC_INT_ARG_MAX) 6973 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD; 6974 } 6975 else 6976 { 6977 /* We are guaranteed by pass_by_reference that the size of the 6978 argument is not greater than 16 bytes, so we only need to return 6979 one word if the argument is partially passed in registers. */ 6980 6981 if (type && AGGREGATE_TYPE_P (type)) 6982 { 6983 int size = int_size_in_bytes (type); 6984 6985 if (size > UNITS_PER_WORD 6986 && slotno == SPARC_INT_ARG_MAX - 1) 6987 return UNITS_PER_WORD; 6988 } 6989 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT 6990 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 6991 && ! (TARGET_FPU && named))) 6992 { 6993 /* The complex types are passed as packed types. */ 6994 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 6995 && slotno == SPARC_INT_ARG_MAX - 1) 6996 return UNITS_PER_WORD; 6997 } 6998 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 6999 { 7000 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD) 7001 > SPARC_FP_ARG_MAX) 7002 return UNITS_PER_WORD; 7003 } 7004 } 7005 7006 return 0; 7007 } 7008 7009 /* Handle the TARGET_PASS_BY_REFERENCE target hook. 7010 Specify whether to pass the argument by reference. */ 7011 7012 static bool 7013 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, 7014 enum machine_mode mode, const_tree type, 7015 bool named ATTRIBUTE_UNUSED) 7016 { 7017 if (TARGET_ARCH32) 7018 /* Original SPARC 32-bit ABI says that structures and unions, 7019 and quad-precision floats are passed by reference. For Pascal, 7020 also pass arrays by reference. All other base types are passed 7021 in registers. 7022 7023 Extended ABI (as implemented by the Sun compiler) says that all 7024 complex floats are passed by reference. Pass complex integers 7025 in registers up to 8 bytes. More generally, enforce the 2-word 7026 cap for passing arguments in registers. 7027 7028 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7029 integers are passed like floats of the same size, that is in 7030 registers up to 8 bytes. Pass all vector floats by reference 7031 like structure and unions. */ 7032 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 7033 || mode == SCmode 7034 /* Catch CDImode, TFmode, DCmode and TCmode. */ 7035 || GET_MODE_SIZE (mode) > 8 7036 || (type 7037 && TREE_CODE (type) == VECTOR_TYPE 7038 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7039 else 7040 /* Original SPARC 64-bit ABI says that structures and unions 7041 smaller than 16 bytes are passed in registers, as well as 7042 all other base types. 7043 7044 Extended ABI (as implemented by the Sun compiler) says that 7045 complex floats are passed in registers up to 16 bytes. Pass 7046 all complex integers in registers up to 16 bytes. More generally, 7047 enforce the 2-word cap for passing arguments in registers. 7048 7049 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7050 integers are passed like floats of the same size, that is in 7051 registers (up to 16 bytes). Pass all vector floats like structure 7052 and unions. */ 7053 return ((type 7054 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE) 7055 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) 7056 /* Catch CTImode and TCmode. */ 7057 || GET_MODE_SIZE (mode) > 16); 7058 } 7059 7060 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. 7061 Update the data in CUM to advance over an argument 7062 of mode MODE and data type TYPE. 7063 TYPE is null for libcalls where that information may not be available. */ 7064 7065 static void 7066 sparc_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode, 7067 const_tree type, bool named) 7068 { 7069 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7070 int regno, padding; 7071 7072 /* We pass false for incoming_p here, it doesn't matter. */ 7073 function_arg_slotno (cum, mode, type, named, false, ®no, &padding); 7074 7075 /* If argument requires leading padding, add it. */ 7076 cum->words += padding; 7077 7078 if (TARGET_ARCH32) 7079 { 7080 cum->words += (mode != BLKmode 7081 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) 7082 : ROUND_ADVANCE (int_size_in_bytes (type))); 7083 } 7084 else 7085 { 7086 if (type && AGGREGATE_TYPE_P (type)) 7087 { 7088 int size = int_size_in_bytes (type); 7089 7090 if (size <= 8) 7091 ++cum->words; 7092 else if (size <= 16) 7093 cum->words += 2; 7094 else /* passed by reference */ 7095 ++cum->words; 7096 } 7097 else 7098 { 7099 cum->words += (mode != BLKmode 7100 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) 7101 : ROUND_ADVANCE (int_size_in_bytes (type))); 7102 } 7103 } 7104 } 7105 7106 /* Handle the FUNCTION_ARG_PADDING macro. 7107 For the 64 bit ABI structs are always stored left shifted in their 7108 argument slot. */ 7109 7110 enum direction 7111 function_arg_padding (enum machine_mode mode, const_tree type) 7112 { 7113 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type)) 7114 return upward; 7115 7116 /* Fall back to the default. */ 7117 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); 7118 } 7119 7120 /* Handle the TARGET_RETURN_IN_MEMORY target hook. 7121 Specify whether to return the return value in memory. */ 7122 7123 static bool 7124 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 7125 { 7126 if (TARGET_ARCH32) 7127 /* Original SPARC 32-bit ABI says that structures and unions, 7128 and quad-precision floats are returned in memory. All other 7129 base types are returned in registers. 7130 7131 Extended ABI (as implemented by the Sun compiler) says that 7132 all complex floats are returned in registers (8 FP registers 7133 at most for '_Complex long double'). Return all complex integers 7134 in registers (4 at most for '_Complex long long'). 7135 7136 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7137 integers are returned like floats of the same size, that is in 7138 registers up to 8 bytes and in memory otherwise. Return all 7139 vector floats in memory like structure and unions; note that 7140 they always have BLKmode like the latter. */ 7141 return (TYPE_MODE (type) == BLKmode 7142 || TYPE_MODE (type) == TFmode 7143 || (TREE_CODE (type) == VECTOR_TYPE 7144 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7145 else 7146 /* Original SPARC 64-bit ABI says that structures and unions 7147 smaller than 32 bytes are returned in registers, as well as 7148 all other base types. 7149 7150 Extended ABI (as implemented by the Sun compiler) says that all 7151 complex floats are returned in registers (8 FP registers at most 7152 for '_Complex long double'). Return all complex integers in 7153 registers (4 at most for '_Complex TItype'). 7154 7155 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7156 integers are returned like floats of the same size, that is in 7157 registers. Return all vector floats like structure and unions; 7158 note that they always have BLKmode like the latter. */ 7159 return (TYPE_MODE (type) == BLKmode 7160 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); 7161 } 7162 7163 /* Handle the TARGET_STRUCT_VALUE target hook. 7164 Return where to find the structure return value address. */ 7165 7166 static rtx 7167 sparc_struct_value_rtx (tree fndecl, int incoming) 7168 { 7169 if (TARGET_ARCH64) 7170 return 0; 7171 else 7172 { 7173 rtx mem; 7174 7175 if (incoming) 7176 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, 7177 STRUCT_VALUE_OFFSET)); 7178 else 7179 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, 7180 STRUCT_VALUE_OFFSET)); 7181 7182 /* Only follow the SPARC ABI for fixed-size structure returns. 7183 Variable size structure returns are handled per the normal 7184 procedures in GCC. This is enabled by -mstd-struct-return */ 7185 if (incoming == 2 7186 && sparc_std_struct_return 7187 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) 7188 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) 7189 { 7190 /* We must check and adjust the return address, as it is 7191 optional as to whether the return object is really 7192 provided. */ 7193 rtx ret_reg = gen_rtx_REG (Pmode, 31); 7194 rtx scratch = gen_reg_rtx (SImode); 7195 rtx endlab = gen_label_rtx (); 7196 7197 /* Calculate the return object size */ 7198 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); 7199 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); 7200 /* Construct a temporary return value */ 7201 rtx temp_val 7202 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); 7203 7204 /* Implement SPARC 32-bit psABI callee return struct checking: 7205 7206 Fetch the instruction where we will return to and see if 7207 it's an unimp instruction (the most significant 10 bits 7208 will be zero). */ 7209 emit_move_insn (scratch, gen_rtx_MEM (SImode, 7210 plus_constant (Pmode, 7211 ret_reg, 8))); 7212 /* Assume the size is valid and pre-adjust */ 7213 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4))); 7214 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 7215 0, endlab); 7216 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4))); 7217 /* Write the address of the memory pointed to by temp_val into 7218 the memory pointed to by mem */ 7219 emit_move_insn (mem, XEXP (temp_val, 0)); 7220 emit_label (endlab); 7221 } 7222 7223 return mem; 7224 } 7225 } 7226 7227 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. 7228 For v9, function return values are subject to the same rules as arguments, 7229 except that up to 32 bytes may be returned in registers. */ 7230 7231 static rtx 7232 sparc_function_value_1 (const_tree type, enum machine_mode mode, 7233 bool outgoing) 7234 { 7235 /* Beware that the two values are swapped here wrt function_arg. */ 7236 int regbase = (outgoing 7237 ? SPARC_INCOMING_INT_ARG_FIRST 7238 : SPARC_OUTGOING_INT_ARG_FIRST); 7239 enum mode_class mclass = GET_MODE_CLASS (mode); 7240 int regno; 7241 7242 /* Vector types deserve special treatment because they are polymorphic wrt 7243 their mode, depending upon whether VIS instructions are enabled. */ 7244 if (type && TREE_CODE (type) == VECTOR_TYPE) 7245 { 7246 HOST_WIDE_INT size = int_size_in_bytes (type); 7247 gcc_assert ((TARGET_ARCH32 && size <= 8) 7248 || (TARGET_ARCH64 && size <= 32)); 7249 7250 if (mode == BLKmode) 7251 return function_arg_vector_value (size, 7252 SPARC_FP_ARG_FIRST); 7253 else 7254 mclass = MODE_FLOAT; 7255 } 7256 7257 if (TARGET_ARCH64 && type) 7258 { 7259 /* Structures up to 32 bytes in size are returned in registers. */ 7260 if (TREE_CODE (type) == RECORD_TYPE) 7261 { 7262 HOST_WIDE_INT size = int_size_in_bytes (type); 7263 gcc_assert (size <= 32); 7264 7265 return function_arg_record_value (type, mode, 0, 1, regbase); 7266 } 7267 7268 /* Unions up to 32 bytes in size are returned in integer registers. */ 7269 else if (TREE_CODE (type) == UNION_TYPE) 7270 { 7271 HOST_WIDE_INT size = int_size_in_bytes (type); 7272 gcc_assert (size <= 32); 7273 7274 return function_arg_union_value (size, mode, 0, regbase); 7275 } 7276 7277 /* Objects that require it are returned in FP registers. */ 7278 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7279 ; 7280 7281 /* All other aggregate types are returned in an integer register in a 7282 mode corresponding to the size of the type. */ 7283 else if (AGGREGATE_TYPE_P (type)) 7284 { 7285 /* All other aggregate types are passed in an integer register 7286 in a mode corresponding to the size of the type. */ 7287 HOST_WIDE_INT size = int_size_in_bytes (type); 7288 gcc_assert (size <= 32); 7289 7290 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 7291 7292 /* ??? We probably should have made the same ABI change in 7293 3.4.0 as the one we made for unions. The latter was 7294 required by the SCD though, while the former is not 7295 specified, so we favored compatibility and efficiency. 7296 7297 Now we're stuck for aggregates larger than 16 bytes, 7298 because OImode vanished in the meantime. Let's not 7299 try to be unduly clever, and simply follow the ABI 7300 for unions in that case. */ 7301 if (mode == BLKmode) 7302 return function_arg_union_value (size, mode, 0, regbase); 7303 else 7304 mclass = MODE_INT; 7305 } 7306 7307 /* We should only have pointer and integer types at this point. This 7308 must match sparc_promote_function_mode. */ 7309 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7310 mode = word_mode; 7311 } 7312 7313 /* We should only have pointer and integer types at this point. This must 7314 match sparc_promote_function_mode. */ 7315 else if (TARGET_ARCH32 7316 && mclass == MODE_INT 7317 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7318 mode = word_mode; 7319 7320 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) 7321 regno = SPARC_FP_ARG_FIRST; 7322 else 7323 regno = regbase; 7324 7325 return gen_rtx_REG (mode, regno); 7326 } 7327 7328 /* Handle TARGET_FUNCTION_VALUE. 7329 On the SPARC, the value is found in the first "output" register, but the 7330 called function leaves it in the first "input" register. */ 7331 7332 static rtx 7333 sparc_function_value (const_tree valtype, 7334 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 7335 bool outgoing) 7336 { 7337 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); 7338 } 7339 7340 /* Handle TARGET_LIBCALL_VALUE. */ 7341 7342 static rtx 7343 sparc_libcall_value (enum machine_mode mode, 7344 const_rtx fun ATTRIBUTE_UNUSED) 7345 { 7346 return sparc_function_value_1 (NULL_TREE, mode, false); 7347 } 7348 7349 /* Handle FUNCTION_VALUE_REGNO_P. 7350 On the SPARC, the first "output" reg is used for integer values, and the 7351 first floating point register is used for floating point values. */ 7352 7353 static bool 7354 sparc_function_value_regno_p (const unsigned int regno) 7355 { 7356 return (regno == 8 || regno == 32); 7357 } 7358 7359 /* Do what is necessary for `va_start'. We look at the current function 7360 to determine if stdarg or varargs is used and return the address of 7361 the first unnamed parameter. */ 7362 7363 static rtx 7364 sparc_builtin_saveregs (void) 7365 { 7366 int first_reg = crtl->args.info.words; 7367 rtx address; 7368 int regno; 7369 7370 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) 7371 emit_move_insn (gen_rtx_MEM (word_mode, 7372 gen_rtx_PLUS (Pmode, 7373 frame_pointer_rtx, 7374 GEN_INT (FIRST_PARM_OFFSET (0) 7375 + (UNITS_PER_WORD 7376 * regno)))), 7377 gen_rtx_REG (word_mode, 7378 SPARC_INCOMING_INT_ARG_FIRST + regno)); 7379 7380 address = gen_rtx_PLUS (Pmode, 7381 frame_pointer_rtx, 7382 GEN_INT (FIRST_PARM_OFFSET (0) 7383 + UNITS_PER_WORD * first_reg)); 7384 7385 return address; 7386 } 7387 7388 /* Implement `va_start' for stdarg. */ 7389 7390 static void 7391 sparc_va_start (tree valist, rtx nextarg) 7392 { 7393 nextarg = expand_builtin_saveregs (); 7394 std_expand_builtin_va_start (valist, nextarg); 7395 } 7396 7397 /* Implement `va_arg' for stdarg. */ 7398 7399 static tree 7400 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 7401 gimple_seq *post_p) 7402 { 7403 HOST_WIDE_INT size, rsize, align; 7404 tree addr, incr; 7405 bool indirect; 7406 tree ptrtype = build_pointer_type (type); 7407 7408 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 7409 { 7410 indirect = true; 7411 size = rsize = UNITS_PER_WORD; 7412 align = 0; 7413 } 7414 else 7415 { 7416 indirect = false; 7417 size = int_size_in_bytes (type); 7418 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 7419 align = 0; 7420 7421 if (TARGET_ARCH64) 7422 { 7423 /* For SPARC64, objects requiring 16-byte alignment get it. */ 7424 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) 7425 align = 2 * UNITS_PER_WORD; 7426 7427 /* SPARC-V9 ABI states that structures up to 16 bytes in size 7428 are left-justified in their slots. */ 7429 if (AGGREGATE_TYPE_P (type)) 7430 { 7431 if (size == 0) 7432 size = rsize = UNITS_PER_WORD; 7433 else 7434 size = rsize; 7435 } 7436 } 7437 } 7438 7439 incr = valist; 7440 if (align) 7441 { 7442 incr = fold_build_pointer_plus_hwi (incr, align - 1); 7443 incr = fold_convert (sizetype, incr); 7444 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, 7445 size_int (-align)); 7446 incr = fold_convert (ptr_type_node, incr); 7447 } 7448 7449 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); 7450 addr = incr; 7451 7452 if (BYTES_BIG_ENDIAN && size < rsize) 7453 addr = fold_build_pointer_plus_hwi (incr, rsize - size); 7454 7455 if (indirect) 7456 { 7457 addr = fold_convert (build_pointer_type (ptrtype), addr); 7458 addr = build_va_arg_indirect_ref (addr); 7459 } 7460 7461 /* If the address isn't aligned properly for the type, we need a temporary. 7462 FIXME: This is inefficient, usually we can do this in registers. */ 7463 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) 7464 { 7465 tree tmp = create_tmp_var (type, "va_arg_tmp"); 7466 tree dest_addr = build_fold_addr_expr (tmp); 7467 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 7468 3, dest_addr, addr, size_int (rsize)); 7469 TREE_ADDRESSABLE (tmp) = 1; 7470 gimplify_and_add (copy, pre_p); 7471 addr = dest_addr; 7472 } 7473 7474 else 7475 addr = fold_convert (ptrtype, addr); 7476 7477 incr = fold_build_pointer_plus_hwi (incr, rsize); 7478 gimplify_assign (valist, incr, post_p); 7479 7480 return build_va_arg_indirect_ref (addr); 7481 } 7482 7483 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. 7484 Specify whether the vector mode is supported by the hardware. */ 7485 7486 static bool 7487 sparc_vector_mode_supported_p (enum machine_mode mode) 7488 { 7489 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; 7490 } 7491 7492 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ 7493 7494 static enum machine_mode 7495 sparc_preferred_simd_mode (enum machine_mode mode) 7496 { 7497 if (TARGET_VIS) 7498 switch (mode) 7499 { 7500 case SImode: 7501 return V2SImode; 7502 case HImode: 7503 return V4HImode; 7504 case QImode: 7505 return V8QImode; 7506 7507 default:; 7508 } 7509 7510 return word_mode; 7511 } 7512 7513 /* Return the string to output an unconditional branch to LABEL, which is 7514 the operand number of the label. 7515 7516 DEST is the destination insn (i.e. the label), INSN is the source. */ 7517 7518 const char * 7519 output_ubranch (rtx dest, rtx insn) 7520 { 7521 static char string[64]; 7522 bool v9_form = false; 7523 int delta; 7524 char *p; 7525 7526 /* Even if we are trying to use cbcond for this, evaluate 7527 whether we can use V9 branches as our backup plan. */ 7528 7529 delta = 5000000; 7530 if (INSN_ADDRESSES_SET_P ()) 7531 delta = (INSN_ADDRESSES (INSN_UID (dest)) 7532 - INSN_ADDRESSES (INSN_UID (insn))); 7533 7534 /* Leave some instructions for "slop". */ 7535 if (TARGET_V9 && delta >= -260000 && delta < 260000) 7536 v9_form = true; 7537 7538 if (TARGET_CBCOND) 7539 { 7540 bool emit_nop = emit_cbcond_nop (insn); 7541 bool far = false; 7542 const char *rval; 7543 7544 if (delta < -500 || delta > 500) 7545 far = true; 7546 7547 if (far) 7548 { 7549 if (v9_form) 7550 rval = "ba,a,pt\t%%xcc, %l0"; 7551 else 7552 rval = "b,a\t%l0"; 7553 } 7554 else 7555 { 7556 if (emit_nop) 7557 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop"; 7558 else 7559 rval = "cwbe\t%%g0, %%g0, %l0"; 7560 } 7561 return rval; 7562 } 7563 7564 if (v9_form) 7565 strcpy (string, "ba%*,pt\t%%xcc, "); 7566 else 7567 strcpy (string, "b%*\t"); 7568 7569 p = strchr (string, '\0'); 7570 *p++ = '%'; 7571 *p++ = 'l'; 7572 *p++ = '0'; 7573 *p++ = '%'; 7574 *p++ = '('; 7575 *p = '\0'; 7576 7577 return string; 7578 } 7579 7580 /* Return the string to output a conditional branch to LABEL, which is 7581 the operand number of the label. OP is the conditional expression. 7582 XEXP (OP, 0) is assumed to be a condition code register (integer or 7583 floating point) and its mode specifies what kind of comparison we made. 7584 7585 DEST is the destination insn (i.e. the label), INSN is the source. 7586 7587 REVERSED is nonzero if we should reverse the sense of the comparison. 7588 7589 ANNUL is nonzero if we should generate an annulling branch. */ 7590 7591 const char * 7592 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, 7593 rtx insn) 7594 { 7595 static char string[64]; 7596 enum rtx_code code = GET_CODE (op); 7597 rtx cc_reg = XEXP (op, 0); 7598 enum machine_mode mode = GET_MODE (cc_reg); 7599 const char *labelno, *branch; 7600 int spaces = 8, far; 7601 char *p; 7602 7603 /* v9 branches are limited to +-1MB. If it is too far away, 7604 change 7605 7606 bne,pt %xcc, .LC30 7607 7608 to 7609 7610 be,pn %xcc, .+12 7611 nop 7612 ba .LC30 7613 7614 and 7615 7616 fbne,a,pn %fcc2, .LC29 7617 7618 to 7619 7620 fbe,pt %fcc2, .+16 7621 nop 7622 ba .LC29 */ 7623 7624 far = TARGET_V9 && (get_attr_length (insn) >= 3); 7625 if (reversed ^ far) 7626 { 7627 /* Reversal of FP compares takes care -- an ordered compare 7628 becomes an unordered compare and vice versa. */ 7629 if (mode == CCFPmode || mode == CCFPEmode) 7630 code = reverse_condition_maybe_unordered (code); 7631 else 7632 code = reverse_condition (code); 7633 } 7634 7635 /* Start by writing the branch condition. */ 7636 if (mode == CCFPmode || mode == CCFPEmode) 7637 { 7638 switch (code) 7639 { 7640 case NE: 7641 branch = "fbne"; 7642 break; 7643 case EQ: 7644 branch = "fbe"; 7645 break; 7646 case GE: 7647 branch = "fbge"; 7648 break; 7649 case GT: 7650 branch = "fbg"; 7651 break; 7652 case LE: 7653 branch = "fble"; 7654 break; 7655 case LT: 7656 branch = "fbl"; 7657 break; 7658 case UNORDERED: 7659 branch = "fbu"; 7660 break; 7661 case ORDERED: 7662 branch = "fbo"; 7663 break; 7664 case UNGT: 7665 branch = "fbug"; 7666 break; 7667 case UNLT: 7668 branch = "fbul"; 7669 break; 7670 case UNEQ: 7671 branch = "fbue"; 7672 break; 7673 case UNGE: 7674 branch = "fbuge"; 7675 break; 7676 case UNLE: 7677 branch = "fbule"; 7678 break; 7679 case LTGT: 7680 branch = "fblg"; 7681 break; 7682 7683 default: 7684 gcc_unreachable (); 7685 } 7686 7687 /* ??? !v9: FP branches cannot be preceded by another floating point 7688 insn. Because there is currently no concept of pre-delay slots, 7689 we can fix this only by always emitting a nop before a floating 7690 point branch. */ 7691 7692 string[0] = '\0'; 7693 if (! TARGET_V9) 7694 strcpy (string, "nop\n\t"); 7695 strcat (string, branch); 7696 } 7697 else 7698 { 7699 switch (code) 7700 { 7701 case NE: 7702 branch = "bne"; 7703 break; 7704 case EQ: 7705 branch = "be"; 7706 break; 7707 case GE: 7708 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 7709 branch = "bpos"; 7710 else 7711 branch = "bge"; 7712 break; 7713 case GT: 7714 branch = "bg"; 7715 break; 7716 case LE: 7717 branch = "ble"; 7718 break; 7719 case LT: 7720 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 7721 branch = "bneg"; 7722 else 7723 branch = "bl"; 7724 break; 7725 case GEU: 7726 branch = "bgeu"; 7727 break; 7728 case GTU: 7729 branch = "bgu"; 7730 break; 7731 case LEU: 7732 branch = "bleu"; 7733 break; 7734 case LTU: 7735 branch = "blu"; 7736 break; 7737 7738 default: 7739 gcc_unreachable (); 7740 } 7741 strcpy (string, branch); 7742 } 7743 spaces -= strlen (branch); 7744 p = strchr (string, '\0'); 7745 7746 /* Now add the annulling, the label, and a possible noop. */ 7747 if (annul && ! far) 7748 { 7749 strcpy (p, ",a"); 7750 p += 2; 7751 spaces -= 2; 7752 } 7753 7754 if (TARGET_V9) 7755 { 7756 rtx note; 7757 int v8 = 0; 7758 7759 if (! far && insn && INSN_ADDRESSES_SET_P ()) 7760 { 7761 int delta = (INSN_ADDRESSES (INSN_UID (dest)) 7762 - INSN_ADDRESSES (INSN_UID (insn))); 7763 /* Leave some instructions for "slop". */ 7764 if (delta < -260000 || delta >= 260000) 7765 v8 = 1; 7766 } 7767 7768 if (mode == CCFPmode || mode == CCFPEmode) 7769 { 7770 static char v9_fcc_labelno[] = "%%fccX, "; 7771 /* Set the char indicating the number of the fcc reg to use. */ 7772 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; 7773 labelno = v9_fcc_labelno; 7774 if (v8) 7775 { 7776 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); 7777 labelno = ""; 7778 } 7779 } 7780 else if (mode == CCXmode || mode == CCX_NOOVmode) 7781 { 7782 labelno = "%%xcc, "; 7783 gcc_assert (! v8); 7784 } 7785 else 7786 { 7787 labelno = "%%icc, "; 7788 if (v8) 7789 labelno = ""; 7790 } 7791 7792 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 7793 { 7794 strcpy (p, 7795 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) 7796 ? ",pt" : ",pn"); 7797 p += 3; 7798 spaces -= 3; 7799 } 7800 } 7801 else 7802 labelno = ""; 7803 7804 if (spaces > 0) 7805 *p++ = '\t'; 7806 else 7807 *p++ = ' '; 7808 strcpy (p, labelno); 7809 p = strchr (p, '\0'); 7810 if (far) 7811 { 7812 strcpy (p, ".+12\n\t nop\n\tb\t"); 7813 /* Skip the next insn if requested or 7814 if we know that it will be a nop. */ 7815 if (annul || ! final_sequence) 7816 p[3] = '6'; 7817 p += 14; 7818 } 7819 *p++ = '%'; 7820 *p++ = 'l'; 7821 *p++ = label + '0'; 7822 *p++ = '%'; 7823 *p++ = '#'; 7824 *p = '\0'; 7825 7826 return string; 7827 } 7828 7829 /* Emit a library call comparison between floating point X and Y. 7830 COMPARISON is the operator to compare with (EQ, NE, GT, etc). 7831 Return the new operator to be used in the comparison sequence. 7832 7833 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode 7834 values as arguments instead of the TFmode registers themselves, 7835 that's why we cannot call emit_float_lib_cmp. */ 7836 7837 rtx 7838 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) 7839 { 7840 const char *qpfunc; 7841 rtx slot0, slot1, result, tem, tem2, libfunc; 7842 enum machine_mode mode; 7843 enum rtx_code new_comparison; 7844 7845 switch (comparison) 7846 { 7847 case EQ: 7848 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); 7849 break; 7850 7851 case NE: 7852 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); 7853 break; 7854 7855 case GT: 7856 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); 7857 break; 7858 7859 case GE: 7860 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); 7861 break; 7862 7863 case LT: 7864 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); 7865 break; 7866 7867 case LE: 7868 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); 7869 break; 7870 7871 case ORDERED: 7872 case UNORDERED: 7873 case UNGT: 7874 case UNLT: 7875 case UNEQ: 7876 case UNGE: 7877 case UNLE: 7878 case LTGT: 7879 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); 7880 break; 7881 7882 default: 7883 gcc_unreachable (); 7884 } 7885 7886 if (TARGET_ARCH64) 7887 { 7888 if (MEM_P (x)) 7889 { 7890 tree expr = MEM_EXPR (x); 7891 if (expr) 7892 mark_addressable (expr); 7893 slot0 = x; 7894 } 7895 else 7896 { 7897 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 7898 emit_move_insn (slot0, x); 7899 } 7900 7901 if (MEM_P (y)) 7902 { 7903 tree expr = MEM_EXPR (y); 7904 if (expr) 7905 mark_addressable (expr); 7906 slot1 = y; 7907 } 7908 else 7909 { 7910 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 7911 emit_move_insn (slot1, y); 7912 } 7913 7914 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 7915 emit_library_call (libfunc, LCT_NORMAL, 7916 DImode, 2, 7917 XEXP (slot0, 0), Pmode, 7918 XEXP (slot1, 0), Pmode); 7919 mode = DImode; 7920 } 7921 else 7922 { 7923 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 7924 emit_library_call (libfunc, LCT_NORMAL, 7925 SImode, 2, 7926 x, TFmode, y, TFmode); 7927 mode = SImode; 7928 } 7929 7930 7931 /* Immediately move the result of the libcall into a pseudo 7932 register so reload doesn't clobber the value if it needs 7933 the return register for a spill reg. */ 7934 result = gen_reg_rtx (mode); 7935 emit_move_insn (result, hard_libcall_value (mode, libfunc)); 7936 7937 switch (comparison) 7938 { 7939 default: 7940 return gen_rtx_NE (VOIDmode, result, const0_rtx); 7941 case ORDERED: 7942 case UNORDERED: 7943 new_comparison = (comparison == UNORDERED ? EQ : NE); 7944 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); 7945 case UNGT: 7946 case UNGE: 7947 new_comparison = (comparison == UNGT ? GT : NE); 7948 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); 7949 case UNLE: 7950 return gen_rtx_NE (VOIDmode, result, const2_rtx); 7951 case UNLT: 7952 tem = gen_reg_rtx (mode); 7953 if (TARGET_ARCH32) 7954 emit_insn (gen_andsi3 (tem, result, const1_rtx)); 7955 else 7956 emit_insn (gen_anddi3 (tem, result, const1_rtx)); 7957 return gen_rtx_NE (VOIDmode, tem, const0_rtx); 7958 case UNEQ: 7959 case LTGT: 7960 tem = gen_reg_rtx (mode); 7961 if (TARGET_ARCH32) 7962 emit_insn (gen_addsi3 (tem, result, const1_rtx)); 7963 else 7964 emit_insn (gen_adddi3 (tem, result, const1_rtx)); 7965 tem2 = gen_reg_rtx (mode); 7966 if (TARGET_ARCH32) 7967 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); 7968 else 7969 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); 7970 new_comparison = (comparison == UNEQ ? EQ : NE); 7971 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); 7972 } 7973 7974 gcc_unreachable (); 7975 } 7976 7977 /* Generate an unsigned DImode to FP conversion. This is the same code 7978 optabs would emit if we didn't have TFmode patterns. */ 7979 7980 void 7981 sparc_emit_floatunsdi (rtx *operands, enum machine_mode mode) 7982 { 7983 rtx neglab, donelab, i0, i1, f0, in, out; 7984 7985 out = operands[0]; 7986 in = force_reg (DImode, operands[1]); 7987 neglab = gen_label_rtx (); 7988 donelab = gen_label_rtx (); 7989 i0 = gen_reg_rtx (DImode); 7990 i1 = gen_reg_rtx (DImode); 7991 f0 = gen_reg_rtx (mode); 7992 7993 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 7994 7995 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 7996 emit_jump_insn (gen_jump (donelab)); 7997 emit_barrier (); 7998 7999 emit_label (neglab); 8000 8001 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 8002 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 8003 emit_insn (gen_iordi3 (i0, i0, i1)); 8004 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); 8005 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 8006 8007 emit_label (donelab); 8008 } 8009 8010 /* Generate an FP to unsigned DImode conversion. This is the same code 8011 optabs would emit if we didn't have TFmode patterns. */ 8012 8013 void 8014 sparc_emit_fixunsdi (rtx *operands, enum machine_mode mode) 8015 { 8016 rtx neglab, donelab, i0, i1, f0, in, out, limit; 8017 8018 out = operands[0]; 8019 in = force_reg (mode, operands[1]); 8020 neglab = gen_label_rtx (); 8021 donelab = gen_label_rtx (); 8022 i0 = gen_reg_rtx (DImode); 8023 i1 = gen_reg_rtx (DImode); 8024 limit = gen_reg_rtx (mode); 8025 f0 = gen_reg_rtx (mode); 8026 8027 emit_move_insn (limit, 8028 CONST_DOUBLE_FROM_REAL_VALUE ( 8029 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); 8030 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); 8031 8032 emit_insn (gen_rtx_SET (VOIDmode, 8033 out, 8034 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); 8035 emit_jump_insn (gen_jump (donelab)); 8036 emit_barrier (); 8037 8038 emit_label (neglab); 8039 8040 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit))); 8041 emit_insn (gen_rtx_SET (VOIDmode, 8042 i0, 8043 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); 8044 emit_insn (gen_movdi (i1, const1_rtx)); 8045 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); 8046 emit_insn (gen_xordi3 (out, i0, i1)); 8047 8048 emit_label (donelab); 8049 } 8050 8051 /* Return the string to output a compare and branch instruction to DEST. 8052 DEST is the destination insn (i.e. the label), INSN is the source, 8053 and OP is the conditional expression. */ 8054 8055 const char * 8056 output_cbcond (rtx op, rtx dest, rtx insn) 8057 { 8058 enum machine_mode mode = GET_MODE (XEXP (op, 0)); 8059 enum rtx_code code = GET_CODE (op); 8060 const char *cond_str, *tmpl; 8061 int far, emit_nop, len; 8062 static char string[64]; 8063 char size_char; 8064 8065 /* Compare and Branch is limited to +-2KB. If it is too far away, 8066 change 8067 8068 cxbne X, Y, .LC30 8069 8070 to 8071 8072 cxbe X, Y, .+16 8073 nop 8074 ba,pt xcc, .LC30 8075 nop */ 8076 8077 len = get_attr_length (insn); 8078 8079 far = len == 4; 8080 emit_nop = len == 2; 8081 8082 if (far) 8083 code = reverse_condition (code); 8084 8085 size_char = ((mode == SImode) ? 'w' : 'x'); 8086 8087 switch (code) 8088 { 8089 case NE: 8090 cond_str = "ne"; 8091 break; 8092 8093 case EQ: 8094 cond_str = "e"; 8095 break; 8096 8097 case GE: 8098 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 8099 cond_str = "pos"; 8100 else 8101 cond_str = "ge"; 8102 break; 8103 8104 case GT: 8105 cond_str = "g"; 8106 break; 8107 8108 case LE: 8109 cond_str = "le"; 8110 break; 8111 8112 case LT: 8113 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 8114 cond_str = "neg"; 8115 else 8116 cond_str = "l"; 8117 break; 8118 8119 case GEU: 8120 cond_str = "cc"; 8121 break; 8122 8123 case GTU: 8124 cond_str = "gu"; 8125 break; 8126 8127 case LEU: 8128 cond_str = "leu"; 8129 break; 8130 8131 case LTU: 8132 cond_str = "cs"; 8133 break; 8134 8135 default: 8136 gcc_unreachable (); 8137 } 8138 8139 if (far) 8140 { 8141 int veryfar = 1, delta; 8142 8143 if (INSN_ADDRESSES_SET_P ()) 8144 { 8145 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8146 - INSN_ADDRESSES (INSN_UID (insn))); 8147 /* Leave some instructions for "slop". */ 8148 if (delta >= -260000 && delta < 260000) 8149 veryfar = 0; 8150 } 8151 8152 if (veryfar) 8153 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop"; 8154 else 8155 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop"; 8156 } 8157 else 8158 { 8159 if (emit_nop) 8160 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop"; 8161 else 8162 tmpl = "c%cb%s\t%%1, %%2, %%3"; 8163 } 8164 8165 snprintf (string, sizeof(string), tmpl, size_char, cond_str); 8166 8167 return string; 8168 } 8169 8170 /* Return the string to output a conditional branch to LABEL, testing 8171 register REG. LABEL is the operand number of the label; REG is the 8172 operand number of the reg. OP is the conditional expression. The mode 8173 of REG says what kind of comparison we made. 8174 8175 DEST is the destination insn (i.e. the label), INSN is the source. 8176 8177 REVERSED is nonzero if we should reverse the sense of the comparison. 8178 8179 ANNUL is nonzero if we should generate an annulling branch. */ 8180 8181 const char * 8182 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, 8183 int annul, rtx insn) 8184 { 8185 static char string[64]; 8186 enum rtx_code code = GET_CODE (op); 8187 enum machine_mode mode = GET_MODE (XEXP (op, 0)); 8188 rtx note; 8189 int far; 8190 char *p; 8191 8192 /* branch on register are limited to +-128KB. If it is too far away, 8193 change 8194 8195 brnz,pt %g1, .LC30 8196 8197 to 8198 8199 brz,pn %g1, .+12 8200 nop 8201 ba,pt %xcc, .LC30 8202 8203 and 8204 8205 brgez,a,pn %o1, .LC29 8206 8207 to 8208 8209 brlz,pt %o1, .+16 8210 nop 8211 ba,pt %xcc, .LC29 */ 8212 8213 far = get_attr_length (insn) >= 3; 8214 8215 /* If not floating-point or if EQ or NE, we can just reverse the code. */ 8216 if (reversed ^ far) 8217 code = reverse_condition (code); 8218 8219 /* Only 64 bit versions of these instructions exist. */ 8220 gcc_assert (mode == DImode); 8221 8222 /* Start by writing the branch condition. */ 8223 8224 switch (code) 8225 { 8226 case NE: 8227 strcpy (string, "brnz"); 8228 break; 8229 8230 case EQ: 8231 strcpy (string, "brz"); 8232 break; 8233 8234 case GE: 8235 strcpy (string, "brgez"); 8236 break; 8237 8238 case LT: 8239 strcpy (string, "brlz"); 8240 break; 8241 8242 case LE: 8243 strcpy (string, "brlez"); 8244 break; 8245 8246 case GT: 8247 strcpy (string, "brgz"); 8248 break; 8249 8250 default: 8251 gcc_unreachable (); 8252 } 8253 8254 p = strchr (string, '\0'); 8255 8256 /* Now add the annulling, reg, label, and nop. */ 8257 if (annul && ! far) 8258 { 8259 strcpy (p, ",a"); 8260 p += 2; 8261 } 8262 8263 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8264 { 8265 strcpy (p, 8266 ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far) 8267 ? ",pt" : ",pn"); 8268 p += 3; 8269 } 8270 8271 *p = p < string + 8 ? '\t' : ' '; 8272 p++; 8273 *p++ = '%'; 8274 *p++ = '0' + reg; 8275 *p++ = ','; 8276 *p++ = ' '; 8277 if (far) 8278 { 8279 int veryfar = 1, delta; 8280 8281 if (INSN_ADDRESSES_SET_P ()) 8282 { 8283 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8284 - INSN_ADDRESSES (INSN_UID (insn))); 8285 /* Leave some instructions for "slop". */ 8286 if (delta >= -260000 && delta < 260000) 8287 veryfar = 0; 8288 } 8289 8290 strcpy (p, ".+12\n\t nop\n\t"); 8291 /* Skip the next insn if requested or 8292 if we know that it will be a nop. */ 8293 if (annul || ! final_sequence) 8294 p[3] = '6'; 8295 p += 12; 8296 if (veryfar) 8297 { 8298 strcpy (p, "b\t"); 8299 p += 2; 8300 } 8301 else 8302 { 8303 strcpy (p, "ba,pt\t%%xcc, "); 8304 p += 13; 8305 } 8306 } 8307 *p++ = '%'; 8308 *p++ = 'l'; 8309 *p++ = '0' + label; 8310 *p++ = '%'; 8311 *p++ = '#'; 8312 *p = '\0'; 8313 8314 return string; 8315 } 8316 8317 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. 8318 Such instructions cannot be used in the delay slot of return insn on v9. 8319 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. 8320 */ 8321 8322 static int 8323 epilogue_renumber (register rtx *where, int test) 8324 { 8325 register const char *fmt; 8326 register int i; 8327 register enum rtx_code code; 8328 8329 if (*where == 0) 8330 return 0; 8331 8332 code = GET_CODE (*where); 8333 8334 switch (code) 8335 { 8336 case REG: 8337 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ 8338 return 1; 8339 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) 8340 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); 8341 case SCRATCH: 8342 case CC0: 8343 case PC: 8344 case CONST_INT: 8345 case CONST_DOUBLE: 8346 return 0; 8347 8348 /* Do not replace the frame pointer with the stack pointer because 8349 it can cause the delayed instruction to load below the stack. 8350 This occurs when instructions like: 8351 8352 (set (reg/i:SI 24 %i0) 8353 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) 8354 (const_int -20 [0xffffffec])) 0)) 8355 8356 are in the return delayed slot. */ 8357 case PLUS: 8358 if (GET_CODE (XEXP (*where, 0)) == REG 8359 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM 8360 && (GET_CODE (XEXP (*where, 1)) != CONST_INT 8361 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) 8362 return 1; 8363 break; 8364 8365 case MEM: 8366 if (SPARC_STACK_BIAS 8367 && GET_CODE (XEXP (*where, 0)) == REG 8368 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) 8369 return 1; 8370 break; 8371 8372 default: 8373 break; 8374 } 8375 8376 fmt = GET_RTX_FORMAT (code); 8377 8378 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 8379 { 8380 if (fmt[i] == 'E') 8381 { 8382 register int j; 8383 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) 8384 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) 8385 return 1; 8386 } 8387 else if (fmt[i] == 'e' 8388 && epilogue_renumber (&(XEXP (*where, i)), test)) 8389 return 1; 8390 } 8391 return 0; 8392 } 8393 8394 /* Leaf functions and non-leaf functions have different needs. */ 8395 8396 static const int 8397 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; 8398 8399 static const int 8400 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; 8401 8402 static const int *const reg_alloc_orders[] = { 8403 reg_leaf_alloc_order, 8404 reg_nonleaf_alloc_order}; 8405 8406 void 8407 order_regs_for_local_alloc (void) 8408 { 8409 static int last_order_nonleaf = 1; 8410 8411 if (df_regs_ever_live_p (15) != last_order_nonleaf) 8412 { 8413 last_order_nonleaf = !last_order_nonleaf; 8414 memcpy ((char *) reg_alloc_order, 8415 (const char *) reg_alloc_orders[last_order_nonleaf], 8416 FIRST_PSEUDO_REGISTER * sizeof (int)); 8417 } 8418 } 8419 8420 /* Return 1 if REG and MEM are legitimate enough to allow the various 8421 mem<-->reg splits to be run. */ 8422 8423 int 8424 sparc_splitdi_legitimate (rtx reg, rtx mem) 8425 { 8426 /* Punt if we are here by mistake. */ 8427 gcc_assert (reload_completed); 8428 8429 /* We must have an offsettable memory reference. */ 8430 if (! offsettable_memref_p (mem)) 8431 return 0; 8432 8433 /* If we have legitimate args for ldd/std, we do not want 8434 the split to happen. */ 8435 if ((REGNO (reg) % 2) == 0 8436 && mem_min_alignment (mem, 8)) 8437 return 0; 8438 8439 /* Success. */ 8440 return 1; 8441 } 8442 8443 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */ 8444 8445 int 8446 sparc_split_regreg_legitimate (rtx reg1, rtx reg2) 8447 { 8448 int regno1, regno2; 8449 8450 if (GET_CODE (reg1) == SUBREG) 8451 reg1 = SUBREG_REG (reg1); 8452 if (GET_CODE (reg1) != REG) 8453 return 0; 8454 regno1 = REGNO (reg1); 8455 8456 if (GET_CODE (reg2) == SUBREG) 8457 reg2 = SUBREG_REG (reg2); 8458 if (GET_CODE (reg2) != REG) 8459 return 0; 8460 regno2 = REGNO (reg2); 8461 8462 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2)) 8463 return 1; 8464 8465 if (TARGET_VIS3) 8466 { 8467 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2)) 8468 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2))) 8469 return 1; 8470 } 8471 8472 return 0; 8473 } 8474 8475 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. 8476 This makes them candidates for using ldd and std insns. 8477 8478 Note reg1 and reg2 *must* be hard registers. */ 8479 8480 int 8481 registers_ok_for_ldd_peep (rtx reg1, rtx reg2) 8482 { 8483 /* We might have been passed a SUBREG. */ 8484 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) 8485 return 0; 8486 8487 if (REGNO (reg1) % 2 != 0) 8488 return 0; 8489 8490 /* Integer ldd is deprecated in SPARC V9 */ 8491 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1))) 8492 return 0; 8493 8494 return (REGNO (reg1) == REGNO (reg2) - 1); 8495 } 8496 8497 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in 8498 an ldd or std insn. 8499 8500 This can only happen when addr1 and addr2, the addresses in mem1 8501 and mem2, are consecutive memory locations (addr1 + 4 == addr2). 8502 addr1 must also be aligned on a 64-bit boundary. 8503 8504 Also iff dependent_reg_rtx is not null it should not be used to 8505 compute the address for mem1, i.e. we cannot optimize a sequence 8506 like: 8507 ld [%o0], %o0 8508 ld [%o0 + 4], %o1 8509 to 8510 ldd [%o0], %o0 8511 nor: 8512 ld [%g3 + 4], %g3 8513 ld [%g3], %g2 8514 to 8515 ldd [%g3], %g2 8516 8517 But, note that the transformation from: 8518 ld [%g2 + 4], %g3 8519 ld [%g2], %g2 8520 to 8521 ldd [%g2], %g2 8522 is perfectly fine. Thus, the peephole2 patterns always pass us 8523 the destination register of the first load, never the second one. 8524 8525 For stores we don't have a similar problem, so dependent_reg_rtx is 8526 NULL_RTX. */ 8527 8528 int 8529 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) 8530 { 8531 rtx addr1, addr2; 8532 unsigned int reg1; 8533 HOST_WIDE_INT offset1; 8534 8535 /* The mems cannot be volatile. */ 8536 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 8537 return 0; 8538 8539 /* MEM1 should be aligned on a 64-bit boundary. */ 8540 if (MEM_ALIGN (mem1) < 64) 8541 return 0; 8542 8543 addr1 = XEXP (mem1, 0); 8544 addr2 = XEXP (mem2, 0); 8545 8546 /* Extract a register number and offset (if used) from the first addr. */ 8547 if (GET_CODE (addr1) == PLUS) 8548 { 8549 /* If not a REG, return zero. */ 8550 if (GET_CODE (XEXP (addr1, 0)) != REG) 8551 return 0; 8552 else 8553 { 8554 reg1 = REGNO (XEXP (addr1, 0)); 8555 /* The offset must be constant! */ 8556 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) 8557 return 0; 8558 offset1 = INTVAL (XEXP (addr1, 1)); 8559 } 8560 } 8561 else if (GET_CODE (addr1) != REG) 8562 return 0; 8563 else 8564 { 8565 reg1 = REGNO (addr1); 8566 /* This was a simple (mem (reg)) expression. Offset is 0. */ 8567 offset1 = 0; 8568 } 8569 8570 /* Make sure the second address is a (mem (plus (reg) (const_int). */ 8571 if (GET_CODE (addr2) != PLUS) 8572 return 0; 8573 8574 if (GET_CODE (XEXP (addr2, 0)) != REG 8575 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) 8576 return 0; 8577 8578 if (reg1 != REGNO (XEXP (addr2, 0))) 8579 return 0; 8580 8581 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) 8582 return 0; 8583 8584 /* The first offset must be evenly divisible by 8 to ensure the 8585 address is 64 bit aligned. */ 8586 if (offset1 % 8 != 0) 8587 return 0; 8588 8589 /* The offset for the second addr must be 4 more than the first addr. */ 8590 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) 8591 return 0; 8592 8593 /* All the tests passed. addr1 and addr2 are valid for ldd and std 8594 instructions. */ 8595 return 1; 8596 } 8597 8598 /* Return 1 if reg is a pseudo, or is the first register in 8599 a hard register pair. This makes it suitable for use in 8600 ldd and std insns. */ 8601 8602 int 8603 register_ok_for_ldd (rtx reg) 8604 { 8605 /* We might have been passed a SUBREG. */ 8606 if (!REG_P (reg)) 8607 return 0; 8608 8609 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) 8610 return (REGNO (reg) % 2 == 0); 8611 8612 return 1; 8613 } 8614 8615 /* Return 1 if OP, a MEM, has an address which is known to be 8616 aligned to an 8-byte boundary. */ 8617 8618 int 8619 memory_ok_for_ldd (rtx op) 8620 { 8621 /* In 64-bit mode, we assume that the address is word-aligned. */ 8622 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) 8623 return 0; 8624 8625 if (! can_create_pseudo_p () 8626 && !strict_memory_address_p (Pmode, XEXP (op, 0))) 8627 return 0; 8628 8629 return 1; 8630 } 8631 8632 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 8633 8634 static bool 8635 sparc_print_operand_punct_valid_p (unsigned char code) 8636 { 8637 if (code == '#' 8638 || code == '*' 8639 || code == '(' 8640 || code == ')' 8641 || code == '_' 8642 || code == '&') 8643 return true; 8644 8645 return false; 8646 } 8647 8648 /* Implement TARGET_PRINT_OPERAND. 8649 Print operand X (an rtx) in assembler syntax to file FILE. 8650 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 8651 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 8652 8653 static void 8654 sparc_print_operand (FILE *file, rtx x, int code) 8655 { 8656 switch (code) 8657 { 8658 case '#': 8659 /* Output an insn in a delay slot. */ 8660 if (final_sequence) 8661 sparc_indent_opcode = 1; 8662 else 8663 fputs ("\n\t nop", file); 8664 return; 8665 case '*': 8666 /* Output an annul flag if there's nothing for the delay slot and we 8667 are optimizing. This is always used with '(' below. 8668 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; 8669 this is a dbx bug. So, we only do this when optimizing. 8670 On UltraSPARC, a branch in a delay slot causes a pipeline flush. 8671 Always emit a nop in case the next instruction is a branch. */ 8672 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) 8673 fputs (",a", file); 8674 return; 8675 case '(': 8676 /* Output a 'nop' if there's nothing for the delay slot and we are 8677 not optimizing. This is always used with '*' above. */ 8678 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) 8679 fputs ("\n\t nop", file); 8680 else if (final_sequence) 8681 sparc_indent_opcode = 1; 8682 return; 8683 case ')': 8684 /* Output the right displacement from the saved PC on function return. 8685 The caller may have placed an "unimp" insn immediately after the call 8686 so we have to account for it. This insn is used in the 32-bit ABI 8687 when calling a function that returns a non zero-sized structure. The 8688 64-bit ABI doesn't have it. Be careful to have this test be the same 8689 as that for the call. The exception is when sparc_std_struct_return 8690 is enabled, the psABI is followed exactly and the adjustment is made 8691 by the code in sparc_struct_value_rtx. The call emitted is the same 8692 when sparc_std_struct_return is enabled. */ 8693 if (!TARGET_ARCH64 8694 && cfun->returns_struct 8695 && !sparc_std_struct_return 8696 && DECL_SIZE (DECL_RESULT (current_function_decl)) 8697 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) 8698 == INTEGER_CST 8699 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) 8700 fputs ("12", file); 8701 else 8702 fputc ('8', file); 8703 return; 8704 case '_': 8705 /* Output the Embedded Medium/Anywhere code model base register. */ 8706 fputs (EMBMEDANY_BASE_REG, file); 8707 return; 8708 case '&': 8709 /* Print some local dynamic TLS name. */ 8710 assemble_name (file, get_some_local_dynamic_name ()); 8711 return; 8712 8713 case 'Y': 8714 /* Adjust the operand to take into account a RESTORE operation. */ 8715 if (GET_CODE (x) == CONST_INT) 8716 break; 8717 else if (GET_CODE (x) != REG) 8718 output_operand_lossage ("invalid %%Y operand"); 8719 else if (REGNO (x) < 8) 8720 fputs (reg_names[REGNO (x)], file); 8721 else if (REGNO (x) >= 24 && REGNO (x) < 32) 8722 fputs (reg_names[REGNO (x)-16], file); 8723 else 8724 output_operand_lossage ("invalid %%Y operand"); 8725 return; 8726 case 'L': 8727 /* Print out the low order register name of a register pair. */ 8728 if (WORDS_BIG_ENDIAN) 8729 fputs (reg_names[REGNO (x)+1], file); 8730 else 8731 fputs (reg_names[REGNO (x)], file); 8732 return; 8733 case 'H': 8734 /* Print out the high order register name of a register pair. */ 8735 if (WORDS_BIG_ENDIAN) 8736 fputs (reg_names[REGNO (x)], file); 8737 else 8738 fputs (reg_names[REGNO (x)+1], file); 8739 return; 8740 case 'R': 8741 /* Print out the second register name of a register pair or quad. 8742 I.e., R (%o0) => %o1. */ 8743 fputs (reg_names[REGNO (x)+1], file); 8744 return; 8745 case 'S': 8746 /* Print out the third register name of a register quad. 8747 I.e., S (%o0) => %o2. */ 8748 fputs (reg_names[REGNO (x)+2], file); 8749 return; 8750 case 'T': 8751 /* Print out the fourth register name of a register quad. 8752 I.e., T (%o0) => %o3. */ 8753 fputs (reg_names[REGNO (x)+3], file); 8754 return; 8755 case 'x': 8756 /* Print a condition code register. */ 8757 if (REGNO (x) == SPARC_ICC_REG) 8758 { 8759 /* We don't handle CC[X]_NOOVmode because they're not supposed 8760 to occur here. */ 8761 if (GET_MODE (x) == CCmode) 8762 fputs ("%icc", file); 8763 else if (GET_MODE (x) == CCXmode) 8764 fputs ("%xcc", file); 8765 else 8766 gcc_unreachable (); 8767 } 8768 else 8769 /* %fccN register */ 8770 fputs (reg_names[REGNO (x)], file); 8771 return; 8772 case 'm': 8773 /* Print the operand's address only. */ 8774 output_address (XEXP (x, 0)); 8775 return; 8776 case 'r': 8777 /* In this case we need a register. Use %g0 if the 8778 operand is const0_rtx. */ 8779 if (x == const0_rtx 8780 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) 8781 { 8782 fputs ("%g0", file); 8783 return; 8784 } 8785 else 8786 break; 8787 8788 case 'A': 8789 switch (GET_CODE (x)) 8790 { 8791 case IOR: fputs ("or", file); break; 8792 case AND: fputs ("and", file); break; 8793 case XOR: fputs ("xor", file); break; 8794 default: output_operand_lossage ("invalid %%A operand"); 8795 } 8796 return; 8797 8798 case 'B': 8799 switch (GET_CODE (x)) 8800 { 8801 case IOR: fputs ("orn", file); break; 8802 case AND: fputs ("andn", file); break; 8803 case XOR: fputs ("xnor", file); break; 8804 default: output_operand_lossage ("invalid %%B operand"); 8805 } 8806 return; 8807 8808 /* This is used by the conditional move instructions. */ 8809 case 'C': 8810 { 8811 enum rtx_code rc = GET_CODE (x); 8812 8813 switch (rc) 8814 { 8815 case NE: fputs ("ne", file); break; 8816 case EQ: fputs ("e", file); break; 8817 case GE: fputs ("ge", file); break; 8818 case GT: fputs ("g", file); break; 8819 case LE: fputs ("le", file); break; 8820 case LT: fputs ("l", file); break; 8821 case GEU: fputs ("geu", file); break; 8822 case GTU: fputs ("gu", file); break; 8823 case LEU: fputs ("leu", file); break; 8824 case LTU: fputs ("lu", file); break; 8825 case LTGT: fputs ("lg", file); break; 8826 case UNORDERED: fputs ("u", file); break; 8827 case ORDERED: fputs ("o", file); break; 8828 case UNLT: fputs ("ul", file); break; 8829 case UNLE: fputs ("ule", file); break; 8830 case UNGT: fputs ("ug", file); break; 8831 case UNGE: fputs ("uge", file); break; 8832 case UNEQ: fputs ("ue", file); break; 8833 default: output_operand_lossage ("invalid %%C operand"); 8834 } 8835 return; 8836 } 8837 8838 /* This are used by the movr instruction pattern. */ 8839 case 'D': 8840 { 8841 enum rtx_code rc = GET_CODE (x); 8842 switch (rc) 8843 { 8844 case NE: fputs ("ne", file); break; 8845 case EQ: fputs ("e", file); break; 8846 case GE: fputs ("gez", file); break; 8847 case LT: fputs ("lz", file); break; 8848 case LE: fputs ("lez", file); break; 8849 case GT: fputs ("gz", file); break; 8850 default: output_operand_lossage ("invalid %%D operand"); 8851 } 8852 return; 8853 } 8854 8855 case 'b': 8856 { 8857 /* Print a sign-extended character. */ 8858 int i = trunc_int_for_mode (INTVAL (x), QImode); 8859 fprintf (file, "%d", i); 8860 return; 8861 } 8862 8863 case 'f': 8864 /* Operand must be a MEM; write its address. */ 8865 if (GET_CODE (x) != MEM) 8866 output_operand_lossage ("invalid %%f operand"); 8867 output_address (XEXP (x, 0)); 8868 return; 8869 8870 case 's': 8871 { 8872 /* Print a sign-extended 32-bit value. */ 8873 HOST_WIDE_INT i; 8874 if (GET_CODE(x) == CONST_INT) 8875 i = INTVAL (x); 8876 else if (GET_CODE(x) == CONST_DOUBLE) 8877 i = CONST_DOUBLE_LOW (x); 8878 else 8879 { 8880 output_operand_lossage ("invalid %%s operand"); 8881 return; 8882 } 8883 i = trunc_int_for_mode (i, SImode); 8884 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); 8885 return; 8886 } 8887 8888 case 0: 8889 /* Do nothing special. */ 8890 break; 8891 8892 default: 8893 /* Undocumented flag. */ 8894 output_operand_lossage ("invalid operand output code"); 8895 } 8896 8897 if (GET_CODE (x) == REG) 8898 fputs (reg_names[REGNO (x)], file); 8899 else if (GET_CODE (x) == MEM) 8900 { 8901 fputc ('[', file); 8902 /* Poor Sun assembler doesn't understand absolute addressing. */ 8903 if (CONSTANT_P (XEXP (x, 0))) 8904 fputs ("%g0+", file); 8905 output_address (XEXP (x, 0)); 8906 fputc (']', file); 8907 } 8908 else if (GET_CODE (x) == HIGH) 8909 { 8910 fputs ("%hi(", file); 8911 output_addr_const (file, XEXP (x, 0)); 8912 fputc (')', file); 8913 } 8914 else if (GET_CODE (x) == LO_SUM) 8915 { 8916 sparc_print_operand (file, XEXP (x, 0), 0); 8917 if (TARGET_CM_MEDMID) 8918 fputs ("+%l44(", file); 8919 else 8920 fputs ("+%lo(", file); 8921 output_addr_const (file, XEXP (x, 1)); 8922 fputc (')', file); 8923 } 8924 else if (GET_CODE (x) == CONST_DOUBLE 8925 && (GET_MODE (x) == VOIDmode 8926 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)) 8927 { 8928 if (CONST_DOUBLE_HIGH (x) == 0) 8929 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x)); 8930 else if (CONST_DOUBLE_HIGH (x) == -1 8931 && CONST_DOUBLE_LOW (x) < 0) 8932 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x)); 8933 else 8934 output_operand_lossage ("long long constant not a valid immediate operand"); 8935 } 8936 else if (GET_CODE (x) == CONST_DOUBLE) 8937 output_operand_lossage ("floating point constant not a valid immediate operand"); 8938 else { output_addr_const (file, x); } 8939 } 8940 8941 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 8942 8943 static void 8944 sparc_print_operand_address (FILE *file, rtx x) 8945 { 8946 register rtx base, index = 0; 8947 int offset = 0; 8948 register rtx addr = x; 8949 8950 if (REG_P (addr)) 8951 fputs (reg_names[REGNO (addr)], file); 8952 else if (GET_CODE (addr) == PLUS) 8953 { 8954 if (CONST_INT_P (XEXP (addr, 0))) 8955 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); 8956 else if (CONST_INT_P (XEXP (addr, 1))) 8957 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); 8958 else 8959 base = XEXP (addr, 0), index = XEXP (addr, 1); 8960 if (GET_CODE (base) == LO_SUM) 8961 { 8962 gcc_assert (USE_AS_OFFSETABLE_LO10 8963 && TARGET_ARCH64 8964 && ! TARGET_CM_MEDMID); 8965 output_operand (XEXP (base, 0), 0); 8966 fputs ("+%lo(", file); 8967 output_address (XEXP (base, 1)); 8968 fprintf (file, ")+%d", offset); 8969 } 8970 else 8971 { 8972 fputs (reg_names[REGNO (base)], file); 8973 if (index == 0) 8974 fprintf (file, "%+d", offset); 8975 else if (REG_P (index)) 8976 fprintf (file, "+%s", reg_names[REGNO (index)]); 8977 else if (GET_CODE (index) == SYMBOL_REF 8978 || GET_CODE (index) == LABEL_REF 8979 || GET_CODE (index) == CONST) 8980 fputc ('+', file), output_addr_const (file, index); 8981 else gcc_unreachable (); 8982 } 8983 } 8984 else if (GET_CODE (addr) == MINUS 8985 && GET_CODE (XEXP (addr, 1)) == LABEL_REF) 8986 { 8987 output_addr_const (file, XEXP (addr, 0)); 8988 fputs ("-(", file); 8989 output_addr_const (file, XEXP (addr, 1)); 8990 fputs ("-.)", file); 8991 } 8992 else if (GET_CODE (addr) == LO_SUM) 8993 { 8994 output_operand (XEXP (addr, 0), 0); 8995 if (TARGET_CM_MEDMID) 8996 fputs ("+%l44(", file); 8997 else 8998 fputs ("+%lo(", file); 8999 output_address (XEXP (addr, 1)); 9000 fputc (')', file); 9001 } 9002 else if (flag_pic 9003 && GET_CODE (addr) == CONST 9004 && GET_CODE (XEXP (addr, 0)) == MINUS 9005 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST 9006 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS 9007 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) 9008 { 9009 addr = XEXP (addr, 0); 9010 output_addr_const (file, XEXP (addr, 0)); 9011 /* Group the args of the second CONST in parenthesis. */ 9012 fputs ("-(", file); 9013 /* Skip past the second CONST--it does nothing for us. */ 9014 output_addr_const (file, XEXP (XEXP (addr, 1), 0)); 9015 /* Close the parenthesis. */ 9016 fputc (')', file); 9017 } 9018 else 9019 { 9020 output_addr_const (file, addr); 9021 } 9022 } 9023 9024 /* Target hook for assembling integer objects. The sparc version has 9025 special handling for aligned DI-mode objects. */ 9026 9027 static bool 9028 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) 9029 { 9030 /* ??? We only output .xword's for symbols and only then in environments 9031 where the assembler can handle them. */ 9032 if (aligned_p && size == 8 9033 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE)) 9034 { 9035 if (TARGET_V9) 9036 { 9037 assemble_integer_with_op ("\t.xword\t", x); 9038 return true; 9039 } 9040 else 9041 { 9042 assemble_aligned_integer (4, const0_rtx); 9043 assemble_aligned_integer (4, x); 9044 return true; 9045 } 9046 } 9047 return default_assemble_integer (x, size, aligned_p); 9048 } 9049 9050 /* Return the value of a code used in the .proc pseudo-op that says 9051 what kind of result this function returns. For non-C types, we pick 9052 the closest C type. */ 9053 9054 #ifndef SHORT_TYPE_SIZE 9055 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) 9056 #endif 9057 9058 #ifndef INT_TYPE_SIZE 9059 #define INT_TYPE_SIZE BITS_PER_WORD 9060 #endif 9061 9062 #ifndef LONG_TYPE_SIZE 9063 #define LONG_TYPE_SIZE BITS_PER_WORD 9064 #endif 9065 9066 #ifndef LONG_LONG_TYPE_SIZE 9067 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) 9068 #endif 9069 9070 #ifndef FLOAT_TYPE_SIZE 9071 #define FLOAT_TYPE_SIZE BITS_PER_WORD 9072 #endif 9073 9074 #ifndef DOUBLE_TYPE_SIZE 9075 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9076 #endif 9077 9078 #ifndef LONG_DOUBLE_TYPE_SIZE 9079 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9080 #endif 9081 9082 unsigned long 9083 sparc_type_code (register tree type) 9084 { 9085 register unsigned long qualifiers = 0; 9086 register unsigned shift; 9087 9088 /* Only the first 30 bits of the qualifier are valid. We must refrain from 9089 setting more, since some assemblers will give an error for this. Also, 9090 we must be careful to avoid shifts of 32 bits or more to avoid getting 9091 unpredictable results. */ 9092 9093 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) 9094 { 9095 switch (TREE_CODE (type)) 9096 { 9097 case ERROR_MARK: 9098 return qualifiers; 9099 9100 case ARRAY_TYPE: 9101 qualifiers |= (3 << shift); 9102 break; 9103 9104 case FUNCTION_TYPE: 9105 case METHOD_TYPE: 9106 qualifiers |= (2 << shift); 9107 break; 9108 9109 case POINTER_TYPE: 9110 case REFERENCE_TYPE: 9111 case OFFSET_TYPE: 9112 qualifiers |= (1 << shift); 9113 break; 9114 9115 case RECORD_TYPE: 9116 return (qualifiers | 8); 9117 9118 case UNION_TYPE: 9119 case QUAL_UNION_TYPE: 9120 return (qualifiers | 9); 9121 9122 case ENUMERAL_TYPE: 9123 return (qualifiers | 10); 9124 9125 case VOID_TYPE: 9126 return (qualifiers | 16); 9127 9128 case INTEGER_TYPE: 9129 /* If this is a range type, consider it to be the underlying 9130 type. */ 9131 if (TREE_TYPE (type) != 0) 9132 break; 9133 9134 /* Carefully distinguish all the standard types of C, 9135 without messing up if the language is not C. We do this by 9136 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to 9137 look at both the names and the above fields, but that's redundant. 9138 Any type whose size is between two C types will be considered 9139 to be the wider of the two types. Also, we do not have a 9140 special code to use for "long long", so anything wider than 9141 long is treated the same. Note that we can't distinguish 9142 between "int" and "long" in this code if they are the same 9143 size, but that's fine, since neither can the assembler. */ 9144 9145 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) 9146 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); 9147 9148 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) 9149 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); 9150 9151 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) 9152 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); 9153 9154 else 9155 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); 9156 9157 case REAL_TYPE: 9158 /* If this is a range type, consider it to be the underlying 9159 type. */ 9160 if (TREE_TYPE (type) != 0) 9161 break; 9162 9163 /* Carefully distinguish all the standard types of C, 9164 without messing up if the language is not C. */ 9165 9166 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) 9167 return (qualifiers | 6); 9168 9169 else 9170 return (qualifiers | 7); 9171 9172 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ 9173 /* ??? We need to distinguish between double and float complex types, 9174 but I don't know how yet because I can't reach this code from 9175 existing front-ends. */ 9176 return (qualifiers | 7); /* Who knows? */ 9177 9178 case VECTOR_TYPE: 9179 case BOOLEAN_TYPE: /* Boolean truth value type. */ 9180 case LANG_TYPE: 9181 case NULLPTR_TYPE: 9182 return qualifiers; 9183 9184 default: 9185 gcc_unreachable (); /* Not a type! */ 9186 } 9187 } 9188 9189 return qualifiers; 9190 } 9191 9192 /* Nested function support. */ 9193 9194 /* Emit RTL insns to initialize the variable parts of a trampoline. 9195 FNADDR is an RTX for the address of the function's pure code. 9196 CXT is an RTX for the static chain value for the function. 9197 9198 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi 9199 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes 9200 (to store insns). This is a bit excessive. Perhaps a different 9201 mechanism would be better here. 9202 9203 Emit enough FLUSH insns to synchronize the data and instruction caches. */ 9204 9205 static void 9206 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9207 { 9208 /* SPARC 32-bit trampoline: 9209 9210 sethi %hi(fn), %g1 9211 sethi %hi(static), %g2 9212 jmp %g1+%lo(fn) 9213 or %g2, %lo(static), %g2 9214 9215 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii 9216 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii 9217 */ 9218 9219 emit_move_insn 9220 (adjust_address (m_tramp, SImode, 0), 9221 expand_binop (SImode, ior_optab, 9222 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1), 9223 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), 9224 NULL_RTX, 1, OPTAB_DIRECT)); 9225 9226 emit_move_insn 9227 (adjust_address (m_tramp, SImode, 4), 9228 expand_binop (SImode, ior_optab, 9229 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1), 9230 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), 9231 NULL_RTX, 1, OPTAB_DIRECT)); 9232 9233 emit_move_insn 9234 (adjust_address (m_tramp, SImode, 8), 9235 expand_binop (SImode, ior_optab, 9236 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), 9237 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), 9238 NULL_RTX, 1, OPTAB_DIRECT)); 9239 9240 emit_move_insn 9241 (adjust_address (m_tramp, SImode, 12), 9242 expand_binop (SImode, ior_optab, 9243 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), 9244 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), 9245 NULL_RTX, 1, OPTAB_DIRECT)); 9246 9247 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is 9248 aligned on a 16 byte boundary so one flush clears it all. */ 9249 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 0)))); 9250 if (sparc_cpu != PROCESSOR_ULTRASPARC 9251 && sparc_cpu != PROCESSOR_ULTRASPARC3 9252 && sparc_cpu != PROCESSOR_NIAGARA 9253 && sparc_cpu != PROCESSOR_NIAGARA2 9254 && sparc_cpu != PROCESSOR_NIAGARA3 9255 && sparc_cpu != PROCESSOR_NIAGARA4) 9256 emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8)))); 9257 9258 /* Call __enable_execute_stack after writing onto the stack to make sure 9259 the stack address is accessible. */ 9260 #ifdef HAVE_ENABLE_EXECUTE_STACK 9261 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 9262 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 9263 #endif 9264 9265 } 9266 9267 /* The 64-bit version is simpler because it makes more sense to load the 9268 values as "immediate" data out of the trampoline. It's also easier since 9269 we can read the PC without clobbering a register. */ 9270 9271 static void 9272 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9273 { 9274 /* SPARC 64-bit trampoline: 9275 9276 rd %pc, %g1 9277 ldx [%g1+24], %g5 9278 jmp %g5 9279 ldx [%g1+16], %g5 9280 +16 bytes data 9281 */ 9282 9283 emit_move_insn (adjust_address (m_tramp, SImode, 0), 9284 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); 9285 emit_move_insn (adjust_address (m_tramp, SImode, 4), 9286 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); 9287 emit_move_insn (adjust_address (m_tramp, SImode, 8), 9288 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); 9289 emit_move_insn (adjust_address (m_tramp, SImode, 12), 9290 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); 9291 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); 9292 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); 9293 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); 9294 9295 if (sparc_cpu != PROCESSOR_ULTRASPARC 9296 && sparc_cpu != PROCESSOR_ULTRASPARC3 9297 && sparc_cpu != PROCESSOR_NIAGARA 9298 && sparc_cpu != PROCESSOR_NIAGARA2 9299 && sparc_cpu != PROCESSOR_NIAGARA3 9300 && sparc_cpu != PROCESSOR_NIAGARA4) 9301 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); 9302 9303 /* Call __enable_execute_stack after writing onto the stack to make sure 9304 the stack address is accessible. */ 9305 #ifdef HAVE_ENABLE_EXECUTE_STACK 9306 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 9307 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 9308 #endif 9309 } 9310 9311 /* Worker for TARGET_TRAMPOLINE_INIT. */ 9312 9313 static void 9314 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 9315 { 9316 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 9317 cxt = force_reg (Pmode, cxt); 9318 if (TARGET_ARCH64) 9319 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); 9320 else 9321 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); 9322 } 9323 9324 /* Adjust the cost of a scheduling dependency. Return the new cost of 9325 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 9326 9327 static int 9328 supersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 9329 { 9330 enum attr_type insn_type; 9331 9332 if (! recog_memoized (insn)) 9333 return 0; 9334 9335 insn_type = get_attr_type (insn); 9336 9337 if (REG_NOTE_KIND (link) == 0) 9338 { 9339 /* Data dependency; DEP_INSN writes a register that INSN reads some 9340 cycles later. */ 9341 9342 /* if a load, then the dependence must be on the memory address; 9343 add an extra "cycle". Note that the cost could be two cycles 9344 if the reg was written late in an instruction group; we ca not tell 9345 here. */ 9346 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) 9347 return cost + 3; 9348 9349 /* Get the delay only if the address of the store is the dependence. */ 9350 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) 9351 { 9352 rtx pat = PATTERN(insn); 9353 rtx dep_pat = PATTERN (dep_insn); 9354 9355 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 9356 return cost; /* This should not happen! */ 9357 9358 /* The dependency between the two instructions was on the data that 9359 is being stored. Assume that this implies that the address of the 9360 store is not dependent. */ 9361 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 9362 return cost; 9363 9364 return cost + 3; /* An approximation. */ 9365 } 9366 9367 /* A shift instruction cannot receive its data from an instruction 9368 in the same cycle; add a one cycle penalty. */ 9369 if (insn_type == TYPE_SHIFT) 9370 return cost + 3; /* Split before cascade into shift. */ 9371 } 9372 else 9373 { 9374 /* Anti- or output- dependency; DEP_INSN reads/writes a register that 9375 INSN writes some cycles later. */ 9376 9377 /* These are only significant for the fpu unit; writing a fp reg before 9378 the fpu has finished with it stalls the processor. */ 9379 9380 /* Reusing an integer register causes no problems. */ 9381 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 9382 return 0; 9383 } 9384 9385 return cost; 9386 } 9387 9388 static int 9389 hypersparc_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 9390 { 9391 enum attr_type insn_type, dep_type; 9392 rtx pat = PATTERN(insn); 9393 rtx dep_pat = PATTERN (dep_insn); 9394 9395 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 9396 return cost; 9397 9398 insn_type = get_attr_type (insn); 9399 dep_type = get_attr_type (dep_insn); 9400 9401 switch (REG_NOTE_KIND (link)) 9402 { 9403 case 0: 9404 /* Data dependency; DEP_INSN writes a register that INSN reads some 9405 cycles later. */ 9406 9407 switch (insn_type) 9408 { 9409 case TYPE_STORE: 9410 case TYPE_FPSTORE: 9411 /* Get the delay iff the address of the store is the dependence. */ 9412 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 9413 return cost; 9414 9415 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 9416 return cost; 9417 return cost + 3; 9418 9419 case TYPE_LOAD: 9420 case TYPE_SLOAD: 9421 case TYPE_FPLOAD: 9422 /* If a load, then the dependence must be on the memory address. If 9423 the addresses aren't equal, then it might be a false dependency */ 9424 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) 9425 { 9426 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET 9427 || GET_CODE (SET_DEST (dep_pat)) != MEM 9428 || GET_CODE (SET_SRC (pat)) != MEM 9429 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), 9430 XEXP (SET_SRC (pat), 0))) 9431 return cost + 2; 9432 9433 return cost + 8; 9434 } 9435 break; 9436 9437 case TYPE_BRANCH: 9438 /* Compare to branch latency is 0. There is no benefit from 9439 separating compare and branch. */ 9440 if (dep_type == TYPE_COMPARE) 9441 return 0; 9442 /* Floating point compare to branch latency is less than 9443 compare to conditional move. */ 9444 if (dep_type == TYPE_FPCMP) 9445 return cost - 1; 9446 break; 9447 default: 9448 break; 9449 } 9450 break; 9451 9452 case REG_DEP_ANTI: 9453 /* Anti-dependencies only penalize the fpu unit. */ 9454 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 9455 return 0; 9456 break; 9457 9458 default: 9459 break; 9460 } 9461 9462 return cost; 9463 } 9464 9465 static int 9466 sparc_adjust_cost(rtx insn, rtx link, rtx dep, int cost) 9467 { 9468 switch (sparc_cpu) 9469 { 9470 case PROCESSOR_SUPERSPARC: 9471 cost = supersparc_adjust_cost (insn, link, dep, cost); 9472 break; 9473 case PROCESSOR_HYPERSPARC: 9474 case PROCESSOR_SPARCLITE86X: 9475 cost = hypersparc_adjust_cost (insn, link, dep, cost); 9476 break; 9477 default: 9478 break; 9479 } 9480 return cost; 9481 } 9482 9483 static void 9484 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, 9485 int sched_verbose ATTRIBUTE_UNUSED, 9486 int max_ready ATTRIBUTE_UNUSED) 9487 {} 9488 9489 static int 9490 sparc_use_sched_lookahead (void) 9491 { 9492 if (sparc_cpu == PROCESSOR_NIAGARA 9493 || sparc_cpu == PROCESSOR_NIAGARA2 9494 || sparc_cpu == PROCESSOR_NIAGARA3) 9495 return 0; 9496 if (sparc_cpu == PROCESSOR_NIAGARA4) 9497 return 2; 9498 if (sparc_cpu == PROCESSOR_ULTRASPARC 9499 || sparc_cpu == PROCESSOR_ULTRASPARC3) 9500 return 4; 9501 if ((1 << sparc_cpu) & 9502 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | 9503 (1 << PROCESSOR_SPARCLITE86X))) 9504 return 3; 9505 return 0; 9506 } 9507 9508 static int 9509 sparc_issue_rate (void) 9510 { 9511 switch (sparc_cpu) 9512 { 9513 case PROCESSOR_NIAGARA: 9514 case PROCESSOR_NIAGARA2: 9515 case PROCESSOR_NIAGARA3: 9516 default: 9517 return 1; 9518 case PROCESSOR_NIAGARA4: 9519 case PROCESSOR_V9: 9520 /* Assume V9 processors are capable of at least dual-issue. */ 9521 return 2; 9522 case PROCESSOR_SUPERSPARC: 9523 return 3; 9524 case PROCESSOR_HYPERSPARC: 9525 case PROCESSOR_SPARCLITE86X: 9526 return 2; 9527 case PROCESSOR_ULTRASPARC: 9528 case PROCESSOR_ULTRASPARC3: 9529 return 4; 9530 } 9531 } 9532 9533 static int 9534 set_extends (rtx insn) 9535 { 9536 register rtx pat = PATTERN (insn); 9537 9538 switch (GET_CODE (SET_SRC (pat))) 9539 { 9540 /* Load and some shift instructions zero extend. */ 9541 case MEM: 9542 case ZERO_EXTEND: 9543 /* sethi clears the high bits */ 9544 case HIGH: 9545 /* LO_SUM is used with sethi. sethi cleared the high 9546 bits and the values used with lo_sum are positive */ 9547 case LO_SUM: 9548 /* Store flag stores 0 or 1 */ 9549 case LT: case LTU: 9550 case GT: case GTU: 9551 case LE: case LEU: 9552 case GE: case GEU: 9553 case EQ: 9554 case NE: 9555 return 1; 9556 case AND: 9557 { 9558 rtx op0 = XEXP (SET_SRC (pat), 0); 9559 rtx op1 = XEXP (SET_SRC (pat), 1); 9560 if (GET_CODE (op1) == CONST_INT) 9561 return INTVAL (op1) >= 0; 9562 if (GET_CODE (op0) != REG) 9563 return 0; 9564 if (sparc_check_64 (op0, insn) == 1) 9565 return 1; 9566 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 9567 } 9568 case IOR: 9569 case XOR: 9570 { 9571 rtx op0 = XEXP (SET_SRC (pat), 0); 9572 rtx op1 = XEXP (SET_SRC (pat), 1); 9573 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) 9574 return 0; 9575 if (GET_CODE (op1) == CONST_INT) 9576 return INTVAL (op1) >= 0; 9577 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 9578 } 9579 case LSHIFTRT: 9580 return GET_MODE (SET_SRC (pat)) == SImode; 9581 /* Positive integers leave the high bits zero. */ 9582 case CONST_DOUBLE: 9583 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000); 9584 case CONST_INT: 9585 return ! (INTVAL (SET_SRC (pat)) & 0x80000000); 9586 case ASHIFTRT: 9587 case SIGN_EXTEND: 9588 return - (GET_MODE (SET_SRC (pat)) == SImode); 9589 case REG: 9590 return sparc_check_64 (SET_SRC (pat), insn); 9591 default: 9592 return 0; 9593 } 9594 } 9595 9596 /* We _ought_ to have only one kind per function, but... */ 9597 static GTY(()) rtx sparc_addr_diff_list; 9598 static GTY(()) rtx sparc_addr_list; 9599 9600 void 9601 sparc_defer_case_vector (rtx lab, rtx vec, int diff) 9602 { 9603 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); 9604 if (diff) 9605 sparc_addr_diff_list 9606 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); 9607 else 9608 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); 9609 } 9610 9611 static void 9612 sparc_output_addr_vec (rtx vec) 9613 { 9614 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 9615 int idx, vlen = XVECLEN (body, 0); 9616 9617 #ifdef ASM_OUTPUT_ADDR_VEC_START 9618 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 9619 #endif 9620 9621 #ifdef ASM_OUTPUT_CASE_LABEL 9622 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 9623 NEXT_INSN (lab)); 9624 #else 9625 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 9626 #endif 9627 9628 for (idx = 0; idx < vlen; idx++) 9629 { 9630 ASM_OUTPUT_ADDR_VEC_ELT 9631 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 9632 } 9633 9634 #ifdef ASM_OUTPUT_ADDR_VEC_END 9635 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 9636 #endif 9637 } 9638 9639 static void 9640 sparc_output_addr_diff_vec (rtx vec) 9641 { 9642 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 9643 rtx base = XEXP (XEXP (body, 0), 0); 9644 int idx, vlen = XVECLEN (body, 1); 9645 9646 #ifdef ASM_OUTPUT_ADDR_VEC_START 9647 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 9648 #endif 9649 9650 #ifdef ASM_OUTPUT_CASE_LABEL 9651 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 9652 NEXT_INSN (lab)); 9653 #else 9654 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 9655 #endif 9656 9657 for (idx = 0; idx < vlen; idx++) 9658 { 9659 ASM_OUTPUT_ADDR_DIFF_ELT 9660 (asm_out_file, 9661 body, 9662 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 9663 CODE_LABEL_NUMBER (base)); 9664 } 9665 9666 #ifdef ASM_OUTPUT_ADDR_VEC_END 9667 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 9668 #endif 9669 } 9670 9671 static void 9672 sparc_output_deferred_case_vectors (void) 9673 { 9674 rtx t; 9675 int align; 9676 9677 if (sparc_addr_list == NULL_RTX 9678 && sparc_addr_diff_list == NULL_RTX) 9679 return; 9680 9681 /* Align to cache line in the function's code section. */ 9682 switch_to_section (current_function_section ()); 9683 9684 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 9685 if (align > 0) 9686 ASM_OUTPUT_ALIGN (asm_out_file, align); 9687 9688 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) 9689 sparc_output_addr_vec (XEXP (t, 0)); 9690 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) 9691 sparc_output_addr_diff_vec (XEXP (t, 0)); 9692 9693 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; 9694 } 9695 9696 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are 9697 unknown. Return 1 if the high bits are zero, -1 if the register is 9698 sign extended. */ 9699 int 9700 sparc_check_64 (rtx x, rtx insn) 9701 { 9702 /* If a register is set only once it is safe to ignore insns this 9703 code does not know how to handle. The loop will either recognize 9704 the single set and return the correct value or fail to recognize 9705 it and return 0. */ 9706 int set_once = 0; 9707 rtx y = x; 9708 9709 gcc_assert (GET_CODE (x) == REG); 9710 9711 if (GET_MODE (x) == DImode) 9712 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); 9713 9714 if (flag_expensive_optimizations 9715 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) 9716 set_once = 1; 9717 9718 if (insn == 0) 9719 { 9720 if (set_once) 9721 insn = get_last_insn_anywhere (); 9722 else 9723 return 0; 9724 } 9725 9726 while ((insn = PREV_INSN (insn))) 9727 { 9728 switch (GET_CODE (insn)) 9729 { 9730 case JUMP_INSN: 9731 case NOTE: 9732 break; 9733 case CODE_LABEL: 9734 case CALL_INSN: 9735 default: 9736 if (! set_once) 9737 return 0; 9738 break; 9739 case INSN: 9740 { 9741 rtx pat = PATTERN (insn); 9742 if (GET_CODE (pat) != SET) 9743 return 0; 9744 if (rtx_equal_p (x, SET_DEST (pat))) 9745 return set_extends (insn); 9746 if (y && rtx_equal_p (y, SET_DEST (pat))) 9747 return set_extends (insn); 9748 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) 9749 return 0; 9750 } 9751 } 9752 } 9753 return 0; 9754 } 9755 9756 /* Output a wide shift instruction in V8+ mode. INSN is the instruction, 9757 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 9758 9759 const char * 9760 output_v8plus_shift (rtx insn, rtx *operands, const char *opcode) 9761 { 9762 static char asm_code[60]; 9763 9764 /* The scratch register is only required when the destination 9765 register is not a 64-bit global or out register. */ 9766 if (which_alternative != 2) 9767 operands[3] = operands[0]; 9768 9769 /* We can only shift by constants <= 63. */ 9770 if (GET_CODE (operands[2]) == CONST_INT) 9771 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); 9772 9773 if (GET_CODE (operands[1]) == CONST_INT) 9774 { 9775 output_asm_insn ("mov\t%1, %3", operands); 9776 } 9777 else 9778 { 9779 output_asm_insn ("sllx\t%H1, 32, %3", operands); 9780 if (sparc_check_64 (operands[1], insn) <= 0) 9781 output_asm_insn ("srl\t%L1, 0, %L1", operands); 9782 output_asm_insn ("or\t%L1, %3, %3", operands); 9783 } 9784 9785 strcpy (asm_code, opcode); 9786 9787 if (which_alternative != 2) 9788 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); 9789 else 9790 return 9791 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); 9792 } 9793 9794 /* Output rtl to increment the profiler label LABELNO 9795 for profiling a function entry. */ 9796 9797 void 9798 sparc_profile_hook (int labelno) 9799 { 9800 char buf[32]; 9801 rtx lab, fun; 9802 9803 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); 9804 if (NO_PROFILE_COUNTERS) 9805 { 9806 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0); 9807 } 9808 else 9809 { 9810 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 9811 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 9812 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode); 9813 } 9814 } 9815 9816 #ifdef TARGET_SOLARIS 9817 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 9818 9819 static void 9820 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, 9821 tree decl ATTRIBUTE_UNUSED) 9822 { 9823 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 9824 { 9825 solaris_elf_asm_comdat_section (name, flags, decl); 9826 return; 9827 } 9828 9829 fprintf (asm_out_file, "\t.section\t\"%s\"", name); 9830 9831 if (!(flags & SECTION_DEBUG)) 9832 fputs (",#alloc", asm_out_file); 9833 if (flags & SECTION_WRITE) 9834 fputs (",#write", asm_out_file); 9835 if (flags & SECTION_TLS) 9836 fputs (",#tls", asm_out_file); 9837 if (flags & SECTION_CODE) 9838 fputs (",#execinstr", asm_out_file); 9839 9840 /* Sun as only supports #nobits/#progbits since Solaris 10. */ 9841 if (HAVE_AS_SPARC_NOBITS) 9842 { 9843 if (flags & SECTION_BSS) 9844 fputs (",#nobits", asm_out_file); 9845 else 9846 fputs (",#progbits", asm_out_file); 9847 } 9848 9849 fputc ('\n', asm_out_file); 9850 } 9851 #endif /* TARGET_SOLARIS */ 9852 9853 /* We do not allow indirect calls to be optimized into sibling calls. 9854 9855 We cannot use sibling calls when delayed branches are disabled 9856 because they will likely require the call delay slot to be filled. 9857 9858 Also, on SPARC 32-bit we cannot emit a sibling call when the 9859 current function returns a structure. This is because the "unimp 9860 after call" convention would cause the callee to return to the 9861 wrong place. The generic code already disallows cases where the 9862 function being called returns a structure. 9863 9864 It may seem strange how this last case could occur. Usually there 9865 is code after the call which jumps to epilogue code which dumps the 9866 return value into the struct return area. That ought to invalidate 9867 the sibling call right? Well, in the C++ case we can end up passing 9868 the pointer to the struct return area to a constructor (which returns 9869 void) and then nothing else happens. Such a sibling call would look 9870 valid without the added check here. 9871 9872 VxWorks PIC PLT entries require the global pointer to be initialized 9873 on entry. We therefore can't emit sibling calls to them. */ 9874 static bool 9875 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 9876 { 9877 return (decl 9878 && flag_delayed_branch 9879 && (TARGET_ARCH64 || ! cfun->returns_struct) 9880 && !(TARGET_VXWORKS_RTP 9881 && flag_pic 9882 && !targetm.binds_local_p (decl))); 9883 } 9884 9885 /* libfunc renaming. */ 9886 9887 static void 9888 sparc_init_libfuncs (void) 9889 { 9890 if (TARGET_ARCH32) 9891 { 9892 /* Use the subroutines that Sun's library provides for integer 9893 multiply and divide. The `*' prevents an underscore from 9894 being prepended by the compiler. .umul is a little faster 9895 than .mul. */ 9896 set_optab_libfunc (smul_optab, SImode, "*.umul"); 9897 set_optab_libfunc (sdiv_optab, SImode, "*.div"); 9898 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); 9899 set_optab_libfunc (smod_optab, SImode, "*.rem"); 9900 set_optab_libfunc (umod_optab, SImode, "*.urem"); 9901 9902 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ 9903 set_optab_libfunc (add_optab, TFmode, "_Q_add"); 9904 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); 9905 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); 9906 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); 9907 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); 9908 9909 /* We can define the TFmode sqrt optab only if TARGET_FPU. This 9910 is because with soft-float, the SFmode and DFmode sqrt 9911 instructions will be absent, and the compiler will notice and 9912 try to use the TFmode sqrt instruction for calls to the 9913 builtin function sqrt, but this fails. */ 9914 if (TARGET_FPU) 9915 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); 9916 9917 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); 9918 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); 9919 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); 9920 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); 9921 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); 9922 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); 9923 9924 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); 9925 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); 9926 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); 9927 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); 9928 9929 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); 9930 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); 9931 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); 9932 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); 9933 9934 if (DITF_CONVERSION_LIBFUNCS) 9935 { 9936 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); 9937 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); 9938 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); 9939 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); 9940 } 9941 9942 if (SUN_CONVERSION_LIBFUNCS) 9943 { 9944 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); 9945 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); 9946 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); 9947 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); 9948 } 9949 } 9950 if (TARGET_ARCH64) 9951 { 9952 /* In the SPARC 64bit ABI, SImode multiply and divide functions 9953 do not exist in the library. Make sure the compiler does not 9954 emit calls to them by accident. (It should always use the 9955 hardware instructions.) */ 9956 set_optab_libfunc (smul_optab, SImode, 0); 9957 set_optab_libfunc (sdiv_optab, SImode, 0); 9958 set_optab_libfunc (udiv_optab, SImode, 0); 9959 set_optab_libfunc (smod_optab, SImode, 0); 9960 set_optab_libfunc (umod_optab, SImode, 0); 9961 9962 if (SUN_INTEGER_MULTIPLY_64) 9963 { 9964 set_optab_libfunc (smul_optab, DImode, "__mul64"); 9965 set_optab_libfunc (sdiv_optab, DImode, "__div64"); 9966 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); 9967 set_optab_libfunc (smod_optab, DImode, "__rem64"); 9968 set_optab_libfunc (umod_optab, DImode, "__urem64"); 9969 } 9970 9971 if (SUN_CONVERSION_LIBFUNCS) 9972 { 9973 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); 9974 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); 9975 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); 9976 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); 9977 } 9978 } 9979 } 9980 9981 static tree def_builtin(const char *name, int code, tree type) 9982 { 9983 return add_builtin_function(name, type, code, BUILT_IN_MD, NULL, 9984 NULL_TREE); 9985 } 9986 9987 static tree def_builtin_const(const char *name, int code, tree type) 9988 { 9989 tree t = def_builtin(name, code, type); 9990 9991 if (t) 9992 TREE_READONLY (t) = 1; 9993 9994 return t; 9995 } 9996 9997 /* Implement the TARGET_INIT_BUILTINS target hook. 9998 Create builtin functions for special SPARC instructions. */ 9999 10000 static void 10001 sparc_init_builtins (void) 10002 { 10003 if (TARGET_VIS) 10004 sparc_vis_init_builtins (); 10005 } 10006 10007 /* Create builtin functions for VIS 1.0 instructions. */ 10008 10009 static void 10010 sparc_vis_init_builtins (void) 10011 { 10012 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); 10013 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); 10014 tree v4hi = build_vector_type (intHI_type_node, 4); 10015 tree v2hi = build_vector_type (intHI_type_node, 2); 10016 tree v2si = build_vector_type (intSI_type_node, 2); 10017 tree v1si = build_vector_type (intSI_type_node, 1); 10018 10019 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); 10020 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); 10021 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); 10022 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); 10023 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); 10024 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); 10025 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); 10026 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); 10027 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); 10028 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); 10029 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); 10030 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); 10031 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); 10032 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); 10033 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, 10034 v8qi, v8qi, 10035 intDI_type_node, 0); 10036 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node, 10037 v8qi, v8qi, 0); 10038 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node, 10039 v8qi, v8qi, 0); 10040 tree di_ftype_di_di = build_function_type_list (intDI_type_node, 10041 intDI_type_node, 10042 intDI_type_node, 0); 10043 tree si_ftype_si_si = build_function_type_list (intSI_type_node, 10044 intSI_type_node, 10045 intSI_type_node, 0); 10046 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, 10047 ptr_type_node, 10048 intSI_type_node, 0); 10049 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, 10050 ptr_type_node, 10051 intDI_type_node, 0); 10052 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, 10053 ptr_type_node, 10054 ptr_type_node, 0); 10055 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, 10056 ptr_type_node, 10057 ptr_type_node, 0); 10058 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, 10059 v4hi, v4hi, 0); 10060 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, 10061 v2si, v2si, 0); 10062 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, 10063 v4hi, v4hi, 0); 10064 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, 10065 v2si, v2si, 0); 10066 tree void_ftype_di = build_function_type_list (void_type_node, 10067 intDI_type_node, 0); 10068 tree di_ftype_void = build_function_type_list (intDI_type_node, 10069 void_type_node, 0); 10070 tree void_ftype_si = build_function_type_list (void_type_node, 10071 intSI_type_node, 0); 10072 tree sf_ftype_sf_sf = build_function_type_list (float_type_node, 10073 float_type_node, 10074 float_type_node, 0); 10075 tree df_ftype_df_df = build_function_type_list (double_type_node, 10076 double_type_node, 10077 double_type_node, 0); 10078 10079 /* Packing and expanding vectors. */ 10080 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, 10081 v4qi_ftype_v4hi); 10082 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, 10083 v8qi_ftype_v2si_v8qi); 10084 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, 10085 v2hi_ftype_v2si); 10086 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, 10087 v4hi_ftype_v4qi); 10088 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, 10089 v8qi_ftype_v4qi_v4qi); 10090 10091 /* Multiplications. */ 10092 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, 10093 v4hi_ftype_v4qi_v4hi); 10094 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, 10095 v4hi_ftype_v4qi_v2hi); 10096 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, 10097 v4hi_ftype_v4qi_v2hi); 10098 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, 10099 v4hi_ftype_v8qi_v4hi); 10100 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, 10101 v4hi_ftype_v8qi_v4hi); 10102 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, 10103 v2si_ftype_v4qi_v2hi); 10104 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, 10105 v2si_ftype_v4qi_v2hi); 10106 10107 /* Data aligning. */ 10108 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, 10109 v4hi_ftype_v4hi_v4hi); 10110 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, 10111 v8qi_ftype_v8qi_v8qi); 10112 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, 10113 v2si_ftype_v2si_v2si); 10114 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis, 10115 di_ftype_di_di); 10116 10117 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis, 10118 void_ftype_di); 10119 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis, 10120 di_ftype_void); 10121 10122 if (TARGET_ARCH64) 10123 { 10124 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, 10125 ptr_ftype_ptr_di); 10126 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis, 10127 ptr_ftype_ptr_di); 10128 } 10129 else 10130 { 10131 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, 10132 ptr_ftype_ptr_si); 10133 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis, 10134 ptr_ftype_ptr_si); 10135 } 10136 10137 /* Pixel distance. */ 10138 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis, 10139 di_ftype_v8qi_v8qi_di); 10140 10141 /* Edge handling. */ 10142 if (TARGET_ARCH64) 10143 { 10144 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, 10145 di_ftype_ptr_ptr); 10146 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, 10147 di_ftype_ptr_ptr); 10148 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, 10149 di_ftype_ptr_ptr); 10150 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, 10151 di_ftype_ptr_ptr); 10152 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, 10153 di_ftype_ptr_ptr); 10154 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, 10155 di_ftype_ptr_ptr); 10156 if (TARGET_VIS2) 10157 { 10158 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis, 10159 di_ftype_ptr_ptr); 10160 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis, 10161 di_ftype_ptr_ptr); 10162 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis, 10163 di_ftype_ptr_ptr); 10164 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis, 10165 di_ftype_ptr_ptr); 10166 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis, 10167 di_ftype_ptr_ptr); 10168 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis, 10169 di_ftype_ptr_ptr); 10170 } 10171 } 10172 else 10173 { 10174 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis, 10175 si_ftype_ptr_ptr); 10176 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis, 10177 si_ftype_ptr_ptr); 10178 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis, 10179 si_ftype_ptr_ptr); 10180 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis, 10181 si_ftype_ptr_ptr); 10182 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis, 10183 si_ftype_ptr_ptr); 10184 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis, 10185 si_ftype_ptr_ptr); 10186 if (TARGET_VIS2) 10187 { 10188 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis, 10189 si_ftype_ptr_ptr); 10190 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis, 10191 si_ftype_ptr_ptr); 10192 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis, 10193 si_ftype_ptr_ptr); 10194 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis, 10195 si_ftype_ptr_ptr); 10196 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis, 10197 si_ftype_ptr_ptr); 10198 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis, 10199 si_ftype_ptr_ptr); 10200 } 10201 } 10202 10203 /* Pixel compare. */ 10204 if (TARGET_ARCH64) 10205 { 10206 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, 10207 di_ftype_v4hi_v4hi); 10208 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, 10209 di_ftype_v2si_v2si); 10210 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, 10211 di_ftype_v4hi_v4hi); 10212 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, 10213 di_ftype_v2si_v2si); 10214 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, 10215 di_ftype_v4hi_v4hi); 10216 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, 10217 di_ftype_v2si_v2si); 10218 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, 10219 di_ftype_v4hi_v4hi); 10220 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, 10221 di_ftype_v2si_v2si); 10222 } 10223 else 10224 { 10225 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, 10226 si_ftype_v4hi_v4hi); 10227 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, 10228 si_ftype_v2si_v2si); 10229 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, 10230 si_ftype_v4hi_v4hi); 10231 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, 10232 si_ftype_v2si_v2si); 10233 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, 10234 si_ftype_v4hi_v4hi); 10235 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, 10236 si_ftype_v2si_v2si); 10237 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, 10238 si_ftype_v4hi_v4hi); 10239 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, 10240 si_ftype_v2si_v2si); 10241 } 10242 10243 /* Addition and subtraction. */ 10244 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, 10245 v4hi_ftype_v4hi_v4hi); 10246 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, 10247 v2hi_ftype_v2hi_v2hi); 10248 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, 10249 v2si_ftype_v2si_v2si); 10250 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3, 10251 v1si_ftype_v1si_v1si); 10252 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, 10253 v4hi_ftype_v4hi_v4hi); 10254 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, 10255 v2hi_ftype_v2hi_v2hi); 10256 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, 10257 v2si_ftype_v2si_v2si); 10258 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3, 10259 v1si_ftype_v1si_v1si); 10260 10261 /* Three-dimensional array addressing. */ 10262 if (TARGET_ARCH64) 10263 { 10264 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis, 10265 di_ftype_di_di); 10266 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis, 10267 di_ftype_di_di); 10268 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis, 10269 di_ftype_di_di); 10270 } 10271 else 10272 { 10273 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis, 10274 si_ftype_si_si); 10275 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis, 10276 si_ftype_si_si); 10277 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis, 10278 si_ftype_si_si); 10279 } 10280 10281 if (TARGET_VIS2) 10282 { 10283 /* Byte mask and shuffle */ 10284 if (TARGET_ARCH64) 10285 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis, 10286 di_ftype_di_di); 10287 else 10288 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis, 10289 si_ftype_si_si); 10290 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis, 10291 v4hi_ftype_v4hi_v4hi); 10292 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis, 10293 v8qi_ftype_v8qi_v8qi); 10294 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis, 10295 v2si_ftype_v2si_v2si); 10296 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis, 10297 di_ftype_di_di); 10298 } 10299 10300 if (TARGET_VIS3) 10301 { 10302 if (TARGET_ARCH64) 10303 { 10304 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis, 10305 void_ftype_di); 10306 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis, 10307 void_ftype_di); 10308 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis, 10309 void_ftype_di); 10310 } 10311 else 10312 { 10313 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis, 10314 void_ftype_si); 10315 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis, 10316 void_ftype_si); 10317 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis, 10318 void_ftype_si); 10319 } 10320 10321 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis, 10322 v4hi_ftype_v4hi_v4hi); 10323 10324 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3, 10325 v4hi_ftype_v4hi_v4hi); 10326 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3, 10327 v4hi_ftype_v4hi_v4hi); 10328 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3, 10329 v4hi_ftype_v4hi_v4hi); 10330 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3, 10331 v4hi_ftype_v4hi_v4hi); 10332 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3, 10333 v2si_ftype_v2si_v2si); 10334 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3, 10335 v2si_ftype_v2si_v2si); 10336 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3, 10337 v2si_ftype_v2si_v2si); 10338 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3, 10339 v2si_ftype_v2si_v2si); 10340 10341 if (TARGET_ARCH64) 10342 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis, 10343 di_ftype_v8qi_v8qi); 10344 else 10345 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis, 10346 si_ftype_v8qi_v8qi); 10347 10348 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis, 10349 v4hi_ftype_v4hi_v4hi); 10350 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis, 10351 di_ftype_di_di); 10352 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis, 10353 di_ftype_di_di); 10354 10355 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3, 10356 v4hi_ftype_v4hi_v4hi); 10357 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3, 10358 v2hi_ftype_v2hi_v2hi); 10359 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3, 10360 v4hi_ftype_v4hi_v4hi); 10361 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3, 10362 v2hi_ftype_v2hi_v2hi); 10363 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3, 10364 v2si_ftype_v2si_v2si); 10365 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3, 10366 v1si_ftype_v1si_v1si); 10367 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3, 10368 v2si_ftype_v2si_v2si); 10369 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3, 10370 v1si_ftype_v1si_v1si); 10371 10372 if (TARGET_ARCH64) 10373 { 10374 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis, 10375 di_ftype_v8qi_v8qi); 10376 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis, 10377 di_ftype_v8qi_v8qi); 10378 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis, 10379 di_ftype_v8qi_v8qi); 10380 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis, 10381 di_ftype_v8qi_v8qi); 10382 } 10383 else 10384 { 10385 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis, 10386 si_ftype_v8qi_v8qi); 10387 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis, 10388 si_ftype_v8qi_v8qi); 10389 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis, 10390 si_ftype_v8qi_v8qi); 10391 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis, 10392 si_ftype_v8qi_v8qi); 10393 } 10394 10395 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis, 10396 sf_ftype_sf_sf); 10397 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis, 10398 df_ftype_df_df); 10399 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis, 10400 sf_ftype_sf_sf); 10401 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis, 10402 df_ftype_df_df); 10403 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis, 10404 sf_ftype_sf_sf); 10405 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis, 10406 df_ftype_df_df); 10407 10408 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis, 10409 di_ftype_di_di); 10410 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis, 10411 di_ftype_di_di); 10412 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis, 10413 di_ftype_di_di); 10414 } 10415 } 10416 10417 /* Handle TARGET_EXPAND_BUILTIN target hook. 10418 Expand builtin functions for sparc intrinsics. */ 10419 10420 static rtx 10421 sparc_expand_builtin (tree exp, rtx target, 10422 rtx subtarget ATTRIBUTE_UNUSED, 10423 enum machine_mode tmode ATTRIBUTE_UNUSED, 10424 int ignore ATTRIBUTE_UNUSED) 10425 { 10426 tree arg; 10427 call_expr_arg_iterator iter; 10428 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10429 unsigned int icode = DECL_FUNCTION_CODE (fndecl); 10430 rtx pat, op[4]; 10431 int arg_count = 0; 10432 bool nonvoid; 10433 10434 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 10435 10436 if (nonvoid) 10437 { 10438 enum machine_mode tmode = insn_data[icode].operand[0].mode; 10439 if (!target 10440 || GET_MODE (target) != tmode 10441 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 10442 op[0] = gen_reg_rtx (tmode); 10443 else 10444 op[0] = target; 10445 } 10446 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 10447 { 10448 const struct insn_operand_data *insn_op; 10449 int idx; 10450 10451 if (arg == error_mark_node) 10452 return NULL_RTX; 10453 10454 arg_count++; 10455 idx = arg_count - !nonvoid; 10456 insn_op = &insn_data[icode].operand[idx]; 10457 op[arg_count] = expand_normal (arg); 10458 10459 if (insn_op->mode == V1DImode 10460 && GET_MODE (op[arg_count]) == DImode) 10461 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]); 10462 else if (insn_op->mode == V1SImode 10463 && GET_MODE (op[arg_count]) == SImode) 10464 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]); 10465 10466 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count], 10467 insn_op->mode)) 10468 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]); 10469 } 10470 10471 switch (arg_count) 10472 { 10473 case 0: 10474 pat = GEN_FCN (icode) (op[0]); 10475 break; 10476 case 1: 10477 if (nonvoid) 10478 pat = GEN_FCN (icode) (op[0], op[1]); 10479 else 10480 pat = GEN_FCN (icode) (op[1]); 10481 break; 10482 case 2: 10483 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 10484 break; 10485 case 3: 10486 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 10487 break; 10488 default: 10489 gcc_unreachable (); 10490 } 10491 10492 if (!pat) 10493 return NULL_RTX; 10494 10495 emit_insn (pat); 10496 10497 if (nonvoid) 10498 return op[0]; 10499 else 10500 return const0_rtx; 10501 } 10502 10503 static int 10504 sparc_vis_mul8x16 (int e8, int e16) 10505 { 10506 return (e8 * e16 + 128) / 256; 10507 } 10508 10509 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put 10510 the result into the array N_ELTS, whose elements are of INNER_TYPE. */ 10511 10512 static void 10513 sparc_handle_vis_mul8x16 (tree *n_elts, int fncode, tree inner_type, 10514 tree cst0, tree cst1) 10515 { 10516 unsigned i, num = VECTOR_CST_NELTS (cst0); 10517 int scale; 10518 10519 switch (fncode) 10520 { 10521 case CODE_FOR_fmul8x16_vis: 10522 for (i = 0; i < num; ++i) 10523 { 10524 int val 10525 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10526 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i))); 10527 n_elts[i] = build_int_cst (inner_type, val); 10528 } 10529 break; 10530 10531 case CODE_FOR_fmul8x16au_vis: 10532 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0)); 10533 10534 for (i = 0; i < num; ++i) 10535 { 10536 int val 10537 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10538 scale); 10539 n_elts[i] = build_int_cst (inner_type, val); 10540 } 10541 break; 10542 10543 case CODE_FOR_fmul8x16al_vis: 10544 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1)); 10545 10546 for (i = 0; i < num; ++i) 10547 { 10548 int val 10549 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10550 scale); 10551 n_elts[i] = build_int_cst (inner_type, val); 10552 } 10553 break; 10554 10555 default: 10556 gcc_unreachable (); 10557 } 10558 } 10559 10560 /* Handle TARGET_FOLD_BUILTIN target hook. 10561 Fold builtin functions for SPARC intrinsics. If IGNORE is true the 10562 result of the function call is ignored. NULL_TREE is returned if the 10563 function could not be folded. */ 10564 10565 static tree 10566 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 10567 tree *args, bool ignore) 10568 { 10569 tree arg0, arg1, arg2; 10570 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); 10571 enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl); 10572 10573 if (ignore) 10574 { 10575 /* Note that a switch statement instead of the sequence of tests would 10576 be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing 10577 and that would yield multiple alternatives with identical values. */ 10578 if (icode == CODE_FOR_alignaddrsi_vis 10579 || icode == CODE_FOR_alignaddrdi_vis 10580 || icode == CODE_FOR_wrgsr_vis 10581 || icode == CODE_FOR_bmasksi_vis 10582 || icode == CODE_FOR_bmaskdi_vis 10583 || icode == CODE_FOR_cmask8si_vis 10584 || icode == CODE_FOR_cmask8di_vis 10585 || icode == CODE_FOR_cmask16si_vis 10586 || icode == CODE_FOR_cmask16di_vis 10587 || icode == CODE_FOR_cmask32si_vis 10588 || icode == CODE_FOR_cmask32di_vis) 10589 ; 10590 else 10591 return build_zero_cst (rtype); 10592 } 10593 10594 switch (icode) 10595 { 10596 case CODE_FOR_fexpand_vis: 10597 arg0 = args[0]; 10598 STRIP_NOPS (arg0); 10599 10600 if (TREE_CODE (arg0) == VECTOR_CST) 10601 { 10602 tree inner_type = TREE_TYPE (rtype); 10603 tree *n_elts; 10604 unsigned i; 10605 10606 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0)); 10607 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10608 n_elts[i] = build_int_cst (inner_type, 10609 TREE_INT_CST_LOW 10610 (VECTOR_CST_ELT (arg0, i)) << 4); 10611 return build_vector (rtype, n_elts); 10612 } 10613 break; 10614 10615 case CODE_FOR_fmul8x16_vis: 10616 case CODE_FOR_fmul8x16au_vis: 10617 case CODE_FOR_fmul8x16al_vis: 10618 arg0 = args[0]; 10619 arg1 = args[1]; 10620 STRIP_NOPS (arg0); 10621 STRIP_NOPS (arg1); 10622 10623 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 10624 { 10625 tree inner_type = TREE_TYPE (rtype); 10626 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0)); 10627 sparc_handle_vis_mul8x16 (n_elts, icode, inner_type, arg0, arg1); 10628 return build_vector (rtype, n_elts); 10629 } 10630 break; 10631 10632 case CODE_FOR_fpmerge_vis: 10633 arg0 = args[0]; 10634 arg1 = args[1]; 10635 STRIP_NOPS (arg0); 10636 STRIP_NOPS (arg1); 10637 10638 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 10639 { 10640 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0)); 10641 unsigned i; 10642 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10643 { 10644 n_elts[2*i] = VECTOR_CST_ELT (arg0, i); 10645 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i); 10646 } 10647 10648 return build_vector (rtype, n_elts); 10649 } 10650 break; 10651 10652 case CODE_FOR_pdist_vis: 10653 arg0 = args[0]; 10654 arg1 = args[1]; 10655 arg2 = args[2]; 10656 STRIP_NOPS (arg0); 10657 STRIP_NOPS (arg1); 10658 STRIP_NOPS (arg2); 10659 10660 if (TREE_CODE (arg0) == VECTOR_CST 10661 && TREE_CODE (arg1) == VECTOR_CST 10662 && TREE_CODE (arg2) == INTEGER_CST) 10663 { 10664 bool overflow = false; 10665 double_int result = TREE_INT_CST (arg2); 10666 double_int tmp; 10667 unsigned i; 10668 10669 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10670 { 10671 double_int e0 = TREE_INT_CST (VECTOR_CST_ELT (arg0, i)); 10672 double_int e1 = TREE_INT_CST (VECTOR_CST_ELT (arg1, i)); 10673 10674 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf; 10675 10676 tmp = e1.neg_with_overflow (&neg1_ovf); 10677 tmp = e0.add_with_sign (tmp, false, &add1_ovf); 10678 if (tmp.is_negative ()) 10679 tmp = tmp.neg_with_overflow (&neg2_ovf); 10680 else 10681 neg2_ovf = false; 10682 result = result.add_with_sign (tmp, false, &add2_ovf); 10683 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf; 10684 } 10685 10686 gcc_assert (!overflow); 10687 10688 return build_int_cst_wide (rtype, result.low, result.high); 10689 } 10690 10691 default: 10692 break; 10693 } 10694 10695 return NULL_TREE; 10696 } 10697 10698 /* ??? This duplicates information provided to the compiler by the 10699 ??? scheduler description. Some day, teach genautomata to output 10700 ??? the latencies and then CSE will just use that. */ 10701 10702 static bool 10703 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 10704 int *total, bool speed ATTRIBUTE_UNUSED) 10705 { 10706 enum machine_mode mode = GET_MODE (x); 10707 bool float_mode_p = FLOAT_MODE_P (mode); 10708 10709 switch (code) 10710 { 10711 case CONST_INT: 10712 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) 10713 { 10714 *total = 0; 10715 return true; 10716 } 10717 /* FALLTHRU */ 10718 10719 case HIGH: 10720 *total = 2; 10721 return true; 10722 10723 case CONST: 10724 case LABEL_REF: 10725 case SYMBOL_REF: 10726 *total = 4; 10727 return true; 10728 10729 case CONST_DOUBLE: 10730 if (GET_MODE (x) == VOIDmode 10731 && ((CONST_DOUBLE_HIGH (x) == 0 10732 && CONST_DOUBLE_LOW (x) < 0x1000) 10733 || (CONST_DOUBLE_HIGH (x) == -1 10734 && CONST_DOUBLE_LOW (x) < 0 10735 && CONST_DOUBLE_LOW (x) >= -0x1000))) 10736 *total = 0; 10737 else 10738 *total = 8; 10739 return true; 10740 10741 case MEM: 10742 /* If outer-code was a sign or zero extension, a cost 10743 of COSTS_N_INSNS (1) was already added in. This is 10744 why we are subtracting it back out. */ 10745 if (outer_code == ZERO_EXTEND) 10746 { 10747 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); 10748 } 10749 else if (outer_code == SIGN_EXTEND) 10750 { 10751 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); 10752 } 10753 else if (float_mode_p) 10754 { 10755 *total = sparc_costs->float_load; 10756 } 10757 else 10758 { 10759 *total = sparc_costs->int_load; 10760 } 10761 10762 return true; 10763 10764 case PLUS: 10765 case MINUS: 10766 if (float_mode_p) 10767 *total = sparc_costs->float_plusminus; 10768 else 10769 *total = COSTS_N_INSNS (1); 10770 return false; 10771 10772 case FMA: 10773 { 10774 rtx sub; 10775 10776 gcc_assert (float_mode_p); 10777 *total = sparc_costs->float_mul; 10778 10779 sub = XEXP (x, 0); 10780 if (GET_CODE (sub) == NEG) 10781 sub = XEXP (sub, 0); 10782 *total += rtx_cost (sub, FMA, 0, speed); 10783 10784 sub = XEXP (x, 2); 10785 if (GET_CODE (sub) == NEG) 10786 sub = XEXP (sub, 0); 10787 *total += rtx_cost (sub, FMA, 2, speed); 10788 return true; 10789 } 10790 10791 case MULT: 10792 if (float_mode_p) 10793 *total = sparc_costs->float_mul; 10794 else if (! TARGET_HARD_MUL) 10795 *total = COSTS_N_INSNS (25); 10796 else 10797 { 10798 int bit_cost; 10799 10800 bit_cost = 0; 10801 if (sparc_costs->int_mul_bit_factor) 10802 { 10803 int nbits; 10804 10805 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 10806 { 10807 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 10808 for (nbits = 0; value != 0; value &= value - 1) 10809 nbits++; 10810 } 10811 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE 10812 && GET_MODE (XEXP (x, 1)) == VOIDmode) 10813 { 10814 rtx x1 = XEXP (x, 1); 10815 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1); 10816 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1); 10817 10818 for (nbits = 0; value1 != 0; value1 &= value1 - 1) 10819 nbits++; 10820 for (; value2 != 0; value2 &= value2 - 1) 10821 nbits++; 10822 } 10823 else 10824 nbits = 7; 10825 10826 if (nbits < 3) 10827 nbits = 3; 10828 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; 10829 bit_cost = COSTS_N_INSNS (bit_cost); 10830 } 10831 10832 if (mode == DImode) 10833 *total = sparc_costs->int_mulX + bit_cost; 10834 else 10835 *total = sparc_costs->int_mul + bit_cost; 10836 } 10837 return false; 10838 10839 case ASHIFT: 10840 case ASHIFTRT: 10841 case LSHIFTRT: 10842 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; 10843 return false; 10844 10845 case DIV: 10846 case UDIV: 10847 case MOD: 10848 case UMOD: 10849 if (float_mode_p) 10850 { 10851 if (mode == DFmode) 10852 *total = sparc_costs->float_div_df; 10853 else 10854 *total = sparc_costs->float_div_sf; 10855 } 10856 else 10857 { 10858 if (mode == DImode) 10859 *total = sparc_costs->int_divX; 10860 else 10861 *total = sparc_costs->int_div; 10862 } 10863 return false; 10864 10865 case NEG: 10866 if (! float_mode_p) 10867 { 10868 *total = COSTS_N_INSNS (1); 10869 return false; 10870 } 10871 /* FALLTHRU */ 10872 10873 case ABS: 10874 case FLOAT: 10875 case UNSIGNED_FLOAT: 10876 case FIX: 10877 case UNSIGNED_FIX: 10878 case FLOAT_EXTEND: 10879 case FLOAT_TRUNCATE: 10880 *total = sparc_costs->float_move; 10881 return false; 10882 10883 case SQRT: 10884 if (mode == DFmode) 10885 *total = sparc_costs->float_sqrt_df; 10886 else 10887 *total = sparc_costs->float_sqrt_sf; 10888 return false; 10889 10890 case COMPARE: 10891 if (float_mode_p) 10892 *total = sparc_costs->float_cmp; 10893 else 10894 *total = COSTS_N_INSNS (1); 10895 return false; 10896 10897 case IF_THEN_ELSE: 10898 if (float_mode_p) 10899 *total = sparc_costs->float_cmove; 10900 else 10901 *total = sparc_costs->int_cmove; 10902 return false; 10903 10904 case IOR: 10905 /* Handle the NAND vector patterns. */ 10906 if (sparc_vector_mode_supported_p (GET_MODE (x)) 10907 && GET_CODE (XEXP (x, 0)) == NOT 10908 && GET_CODE (XEXP (x, 1)) == NOT) 10909 { 10910 *total = COSTS_N_INSNS (1); 10911 return true; 10912 } 10913 else 10914 return false; 10915 10916 default: 10917 return false; 10918 } 10919 } 10920 10921 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */ 10922 10923 static inline bool 10924 general_or_i64_p (reg_class_t rclass) 10925 { 10926 return (rclass == GENERAL_REGS || rclass == I64_REGS); 10927 } 10928 10929 /* Implement TARGET_REGISTER_MOVE_COST. */ 10930 10931 static int 10932 sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED, 10933 reg_class_t from, reg_class_t to) 10934 { 10935 bool need_memory = false; 10936 10937 if (from == FPCC_REGS || to == FPCC_REGS) 10938 need_memory = true; 10939 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to)) 10940 || (general_or_i64_p (from) && FP_REG_CLASS_P (to))) 10941 { 10942 if (TARGET_VIS3) 10943 { 10944 int size = GET_MODE_SIZE (mode); 10945 if (size == 8 || size == 4) 10946 { 10947 if (! TARGET_ARCH32 || size == 4) 10948 return 4; 10949 else 10950 return 6; 10951 } 10952 } 10953 need_memory = true; 10954 } 10955 10956 if (need_memory) 10957 { 10958 if (sparc_cpu == PROCESSOR_ULTRASPARC 10959 || sparc_cpu == PROCESSOR_ULTRASPARC3 10960 || sparc_cpu == PROCESSOR_NIAGARA 10961 || sparc_cpu == PROCESSOR_NIAGARA2 10962 || sparc_cpu == PROCESSOR_NIAGARA3 10963 || sparc_cpu == PROCESSOR_NIAGARA4) 10964 return 12; 10965 10966 return 6; 10967 } 10968 10969 return 2; 10970 } 10971 10972 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2. 10973 This is achieved by means of a manual dynamic stack space allocation in 10974 the current frame. We make the assumption that SEQ doesn't contain any 10975 function calls, with the possible exception of calls to the GOT helper. */ 10976 10977 static void 10978 emit_and_preserve (rtx seq, rtx reg, rtx reg2) 10979 { 10980 /* We must preserve the lowest 16 words for the register save area. */ 10981 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; 10982 /* We really need only 2 words of fresh stack space. */ 10983 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); 10984 10985 rtx slot 10986 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx, 10987 SPARC_STACK_BIAS + offset)); 10988 10989 emit_insn (gen_stack_pointer_inc (GEN_INT (-size))); 10990 emit_insn (gen_rtx_SET (VOIDmode, slot, reg)); 10991 if (reg2) 10992 emit_insn (gen_rtx_SET (VOIDmode, 10993 adjust_address (slot, word_mode, UNITS_PER_WORD), 10994 reg2)); 10995 emit_insn (seq); 10996 if (reg2) 10997 emit_insn (gen_rtx_SET (VOIDmode, 10998 reg2, 10999 adjust_address (slot, word_mode, UNITS_PER_WORD))); 11000 emit_insn (gen_rtx_SET (VOIDmode, reg, slot)); 11001 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 11002 } 11003 11004 /* Output the assembler code for a thunk function. THUNK_DECL is the 11005 declaration for the thunk function itself, FUNCTION is the decl for 11006 the target function. DELTA is an immediate constant offset to be 11007 added to THIS. If VCALL_OFFSET is nonzero, the word at address 11008 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ 11009 11010 static void 11011 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 11012 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 11013 tree function) 11014 { 11015 rtx this_rtx, insn, funexp; 11016 unsigned int int_arg_first; 11017 11018 reload_completed = 1; 11019 epilogue_completed = 1; 11020 11021 emit_note (NOTE_INSN_PROLOGUE_END); 11022 11023 if (TARGET_FLAT) 11024 { 11025 sparc_leaf_function_p = 1; 11026 11027 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 11028 } 11029 else if (flag_delayed_branch) 11030 { 11031 /* We will emit a regular sibcall below, so we need to instruct 11032 output_sibcall that we are in a leaf function. */ 11033 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1; 11034 11035 /* This will cause final.c to invoke leaf_renumber_regs so we 11036 must behave as if we were in a not-yet-leafified function. */ 11037 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; 11038 } 11039 else 11040 { 11041 /* We will emit the sibcall manually below, so we will need to 11042 manually spill non-leaf registers. */ 11043 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0; 11044 11045 /* We really are in a leaf function. */ 11046 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 11047 } 11048 11049 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function 11050 returns a structure, the structure return pointer is there instead. */ 11051 if (TARGET_ARCH64 11052 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 11053 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); 11054 else 11055 this_rtx = gen_rtx_REG (Pmode, int_arg_first); 11056 11057 /* Add DELTA. When possible use a plain add, otherwise load it into 11058 a register first. */ 11059 if (delta) 11060 { 11061 rtx delta_rtx = GEN_INT (delta); 11062 11063 if (! SPARC_SIMM13_P (delta)) 11064 { 11065 rtx scratch = gen_rtx_REG (Pmode, 1); 11066 emit_move_insn (scratch, delta_rtx); 11067 delta_rtx = scratch; 11068 } 11069 11070 /* THIS_RTX += DELTA. */ 11071 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); 11072 } 11073 11074 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ 11075 if (vcall_offset) 11076 { 11077 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 11078 rtx scratch = gen_rtx_REG (Pmode, 1); 11079 11080 gcc_assert (vcall_offset < 0); 11081 11082 /* SCRATCH = *THIS_RTX. */ 11083 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); 11084 11085 /* Prepare for adding VCALL_OFFSET. The difficulty is that we 11086 may not have any available scratch register at this point. */ 11087 if (SPARC_SIMM13_P (vcall_offset)) 11088 ; 11089 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ 11090 else if (! fixed_regs[5] 11091 /* The below sequence is made up of at least 2 insns, 11092 while the default method may need only one. */ 11093 && vcall_offset < -8192) 11094 { 11095 rtx scratch2 = gen_rtx_REG (Pmode, 5); 11096 emit_move_insn (scratch2, vcall_offset_rtx); 11097 vcall_offset_rtx = scratch2; 11098 } 11099 else 11100 { 11101 rtx increment = GEN_INT (-4096); 11102 11103 /* VCALL_OFFSET is a negative number whose typical range can be 11104 estimated as -32768..0 in 32-bit mode. In almost all cases 11105 it is therefore cheaper to emit multiple add insns than 11106 spilling and loading the constant into a register (at least 11107 6 insns). */ 11108 while (! SPARC_SIMM13_P (vcall_offset)) 11109 { 11110 emit_insn (gen_add2_insn (scratch, increment)); 11111 vcall_offset += 4096; 11112 } 11113 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ 11114 } 11115 11116 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ 11117 emit_move_insn (scratch, gen_rtx_MEM (Pmode, 11118 gen_rtx_PLUS (Pmode, 11119 scratch, 11120 vcall_offset_rtx))); 11121 11122 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ 11123 emit_insn (gen_add2_insn (this_rtx, scratch)); 11124 } 11125 11126 /* Generate a tail call to the target function. */ 11127 if (! TREE_USED (function)) 11128 { 11129 assemble_external (function); 11130 TREE_USED (function) = 1; 11131 } 11132 funexp = XEXP (DECL_RTL (function), 0); 11133 11134 if (flag_delayed_branch) 11135 { 11136 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 11137 insn = emit_call_insn (gen_sibcall (funexp)); 11138 SIBLING_CALL_P (insn) = 1; 11139 } 11140 else 11141 { 11142 /* The hoops we have to jump through in order to generate a sibcall 11143 without using delay slots... */ 11144 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); 11145 11146 if (flag_pic) 11147 { 11148 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ 11149 start_sequence (); 11150 load_got_register (); /* clobbers %o7 */ 11151 scratch = sparc_legitimize_pic_address (funexp, scratch); 11152 seq = get_insns (); 11153 end_sequence (); 11154 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); 11155 } 11156 else if (TARGET_ARCH32) 11157 { 11158 emit_insn (gen_rtx_SET (VOIDmode, 11159 scratch, 11160 gen_rtx_HIGH (SImode, funexp))); 11161 emit_insn (gen_rtx_SET (VOIDmode, 11162 scratch, 11163 gen_rtx_LO_SUM (SImode, scratch, funexp))); 11164 } 11165 else /* TARGET_ARCH64 */ 11166 { 11167 switch (sparc_cmodel) 11168 { 11169 case CM_MEDLOW: 11170 case CM_MEDMID: 11171 /* The destination can serve as a temporary. */ 11172 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); 11173 break; 11174 11175 case CM_MEDANY: 11176 case CM_EMBMEDANY: 11177 /* The destination cannot serve as a temporary. */ 11178 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ 11179 start_sequence (); 11180 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); 11181 seq = get_insns (); 11182 end_sequence (); 11183 emit_and_preserve (seq, spill_reg, 0); 11184 break; 11185 11186 default: 11187 gcc_unreachable (); 11188 } 11189 } 11190 11191 emit_jump_insn (gen_indirect_jump (scratch)); 11192 } 11193 11194 emit_barrier (); 11195 11196 /* Run just enough of rest_of_compilation to get the insns emitted. 11197 There's not really enough bulk here to make other passes such as 11198 instruction scheduling worth while. Note that use_thunk calls 11199 assemble_start_function and assemble_end_function. */ 11200 insn = get_insns (); 11201 shorten_branches (insn); 11202 final_start_function (insn, file, 1); 11203 final (insn, file, 1); 11204 final_end_function (); 11205 11206 reload_completed = 0; 11207 epilogue_completed = 0; 11208 } 11209 11210 /* Return true if sparc_output_mi_thunk would be able to output the 11211 assembler code for the thunk function specified by the arguments 11212 it is passed, and false otherwise. */ 11213 static bool 11214 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, 11215 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 11216 HOST_WIDE_INT vcall_offset, 11217 const_tree function ATTRIBUTE_UNUSED) 11218 { 11219 /* Bound the loop used in the default method above. */ 11220 return (vcall_offset >= -32768 || ! fixed_regs[5]); 11221 } 11222 11223 /* How to allocate a 'struct machine_function'. */ 11224 11225 static struct machine_function * 11226 sparc_init_machine_status (void) 11227 { 11228 return ggc_alloc_cleared_machine_function (); 11229 } 11230 11231 /* Locate some local-dynamic symbol still in use by this function 11232 so that we can print its name in local-dynamic base patterns. */ 11233 11234 static const char * 11235 get_some_local_dynamic_name (void) 11236 { 11237 rtx insn; 11238 11239 if (cfun->machine->some_ld_name) 11240 return cfun->machine->some_ld_name; 11241 11242 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 11243 if (INSN_P (insn) 11244 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 11245 return cfun->machine->some_ld_name; 11246 11247 gcc_unreachable (); 11248 } 11249 11250 static int 11251 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 11252 { 11253 rtx x = *px; 11254 11255 if (x 11256 && GET_CODE (x) == SYMBOL_REF 11257 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 11258 { 11259 cfun->machine->some_ld_name = XSTR (x, 0); 11260 return 1; 11261 } 11262 11263 return 0; 11264 } 11265 11266 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 11267 We need to emit DTP-relative relocations. */ 11268 11269 static void 11270 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) 11271 { 11272 switch (size) 11273 { 11274 case 4: 11275 fputs ("\t.word\t%r_tls_dtpoff32(", file); 11276 break; 11277 case 8: 11278 fputs ("\t.xword\t%r_tls_dtpoff64(", file); 11279 break; 11280 default: 11281 gcc_unreachable (); 11282 } 11283 output_addr_const (file, x); 11284 fputs (")", file); 11285 } 11286 11287 /* Do whatever processing is required at the end of a file. */ 11288 11289 static void 11290 sparc_file_end (void) 11291 { 11292 /* If we need to emit the special GOT helper function, do so now. */ 11293 if (got_helper_rtx) 11294 { 11295 const char *name = XSTR (got_helper_rtx, 0); 11296 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM]; 11297 #ifdef DWARF2_UNWIND_INFO 11298 bool do_cfi; 11299 #endif 11300 11301 if (USE_HIDDEN_LINKONCE) 11302 { 11303 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 11304 get_identifier (name), 11305 build_function_type_list (void_type_node, 11306 NULL_TREE)); 11307 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 11308 NULL_TREE, void_type_node); 11309 TREE_PUBLIC (decl) = 1; 11310 TREE_STATIC (decl) = 1; 11311 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 11312 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 11313 DECL_VISIBILITY_SPECIFIED (decl) = 1; 11314 resolve_unique_section (decl, 0, flag_function_sections); 11315 allocate_struct_function (decl, true); 11316 cfun->is_thunk = 1; 11317 current_function_decl = decl; 11318 init_varasm_status (); 11319 assemble_start_function (decl, name); 11320 } 11321 else 11322 { 11323 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 11324 switch_to_section (text_section); 11325 if (align > 0) 11326 ASM_OUTPUT_ALIGN (asm_out_file, align); 11327 ASM_OUTPUT_LABEL (asm_out_file, name); 11328 } 11329 11330 #ifdef DWARF2_UNWIND_INFO 11331 do_cfi = dwarf2out_do_cfi_asm (); 11332 if (do_cfi) 11333 fprintf (asm_out_file, "\t.cfi_startproc\n"); 11334 #endif 11335 if (flag_delayed_branch) 11336 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n", 11337 reg_name, reg_name); 11338 else 11339 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n", 11340 reg_name, reg_name); 11341 #ifdef DWARF2_UNWIND_INFO 11342 if (do_cfi) 11343 fprintf (asm_out_file, "\t.cfi_endproc\n"); 11344 #endif 11345 } 11346 11347 if (NEED_INDICATE_EXEC_STACK) 11348 file_end_indicate_exec_stack (); 11349 11350 #ifdef TARGET_SOLARIS 11351 solaris_file_end (); 11352 #endif 11353 } 11354 11355 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 11356 /* Implement TARGET_MANGLE_TYPE. */ 11357 11358 static const char * 11359 sparc_mangle_type (const_tree type) 11360 { 11361 if (!TARGET_64BIT 11362 && TYPE_MAIN_VARIANT (type) == long_double_type_node 11363 && TARGET_LONG_DOUBLE_128) 11364 return "g"; 11365 11366 /* For all other types, use normal C++ mangling. */ 11367 return NULL; 11368 } 11369 #endif 11370 11371 /* Expand a membar instruction for various use cases. Both the LOAD_STORE 11372 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where 11373 bit 0 indicates that X is true, and bit 1 indicates Y is true. */ 11374 11375 void 11376 sparc_emit_membar_for_model (enum memmodel model, 11377 int load_store, int before_after) 11378 { 11379 /* Bits for the MEMBAR mmask field. */ 11380 const int LoadLoad = 1; 11381 const int StoreLoad = 2; 11382 const int LoadStore = 4; 11383 const int StoreStore = 8; 11384 11385 int mm = 0, implied = 0; 11386 11387 switch (sparc_memory_model) 11388 { 11389 case SMM_SC: 11390 /* Sequential Consistency. All memory transactions are immediately 11391 visible in sequential execution order. No barriers needed. */ 11392 implied = LoadLoad | StoreLoad | LoadStore | StoreStore; 11393 break; 11394 11395 case SMM_TSO: 11396 /* Total Store Ordering: all memory transactions with store semantics 11397 are followed by an implied StoreStore. */ 11398 implied |= StoreStore; 11399 11400 /* If we're not looking for a raw barrer (before+after), then atomic 11401 operations get the benefit of being both load and store. */ 11402 if (load_store == 3 && before_after == 1) 11403 implied |= StoreLoad; 11404 /* FALLTHRU */ 11405 11406 case SMM_PSO: 11407 /* Partial Store Ordering: all memory transactions with load semantics 11408 are followed by an implied LoadLoad | LoadStore. */ 11409 implied |= LoadLoad | LoadStore; 11410 11411 /* If we're not looking for a raw barrer (before+after), then atomic 11412 operations get the benefit of being both load and store. */ 11413 if (load_store == 3 && before_after == 2) 11414 implied |= StoreLoad | StoreStore; 11415 /* FALLTHRU */ 11416 11417 case SMM_RMO: 11418 /* Relaxed Memory Ordering: no implicit bits. */ 11419 break; 11420 11421 default: 11422 gcc_unreachable (); 11423 } 11424 11425 if (before_after & 1) 11426 { 11427 if (model == MEMMODEL_RELEASE 11428 || model == MEMMODEL_ACQ_REL 11429 || model == MEMMODEL_SEQ_CST) 11430 { 11431 if (load_store & 1) 11432 mm |= LoadLoad | StoreLoad; 11433 if (load_store & 2) 11434 mm |= LoadStore | StoreStore; 11435 } 11436 } 11437 if (before_after & 2) 11438 { 11439 if (model == MEMMODEL_ACQUIRE 11440 || model == MEMMODEL_ACQ_REL 11441 || model == MEMMODEL_SEQ_CST) 11442 { 11443 if (load_store & 1) 11444 mm |= LoadLoad | LoadStore; 11445 if (load_store & 2) 11446 mm |= StoreLoad | StoreStore; 11447 } 11448 } 11449 11450 /* Remove the bits implied by the system memory model. */ 11451 mm &= ~implied; 11452 11453 /* For raw barriers (before+after), always emit a barrier. 11454 This will become a compile-time barrier if needed. */ 11455 if (mm || before_after == 3) 11456 emit_insn (gen_membar (GEN_INT (mm))); 11457 } 11458 11459 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit 11460 compare and swap on the word containing the byte or half-word. */ 11461 11462 static void 11463 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem, 11464 rtx oldval, rtx newval) 11465 { 11466 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 11467 rtx addr = gen_reg_rtx (Pmode); 11468 rtx off = gen_reg_rtx (SImode); 11469 rtx oldv = gen_reg_rtx (SImode); 11470 rtx newv = gen_reg_rtx (SImode); 11471 rtx oldvalue = gen_reg_rtx (SImode); 11472 rtx newvalue = gen_reg_rtx (SImode); 11473 rtx res = gen_reg_rtx (SImode); 11474 rtx resv = gen_reg_rtx (SImode); 11475 rtx memsi, val, mask, end_label, loop_label, cc; 11476 11477 emit_insn (gen_rtx_SET (VOIDmode, addr, 11478 gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 11479 11480 if (Pmode != SImode) 11481 addr1 = gen_lowpart (SImode, addr1); 11482 emit_insn (gen_rtx_SET (VOIDmode, off, 11483 gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 11484 11485 memsi = gen_rtx_MEM (SImode, addr); 11486 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 11487 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 11488 11489 val = copy_to_reg (memsi); 11490 11491 emit_insn (gen_rtx_SET (VOIDmode, off, 11492 gen_rtx_XOR (SImode, off, 11493 GEN_INT (GET_MODE (mem) == QImode 11494 ? 3 : 2)))); 11495 11496 emit_insn (gen_rtx_SET (VOIDmode, off, 11497 gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 11498 11499 if (GET_MODE (mem) == QImode) 11500 mask = force_reg (SImode, GEN_INT (0xff)); 11501 else 11502 mask = force_reg (SImode, GEN_INT (0xffff)); 11503 11504 emit_insn (gen_rtx_SET (VOIDmode, mask, 11505 gen_rtx_ASHIFT (SImode, mask, off))); 11506 11507 emit_insn (gen_rtx_SET (VOIDmode, val, 11508 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 11509 val))); 11510 11511 oldval = gen_lowpart (SImode, oldval); 11512 emit_insn (gen_rtx_SET (VOIDmode, oldv, 11513 gen_rtx_ASHIFT (SImode, oldval, off))); 11514 11515 newval = gen_lowpart_common (SImode, newval); 11516 emit_insn (gen_rtx_SET (VOIDmode, newv, 11517 gen_rtx_ASHIFT (SImode, newval, off))); 11518 11519 emit_insn (gen_rtx_SET (VOIDmode, oldv, 11520 gen_rtx_AND (SImode, oldv, mask))); 11521 11522 emit_insn (gen_rtx_SET (VOIDmode, newv, 11523 gen_rtx_AND (SImode, newv, mask))); 11524 11525 end_label = gen_label_rtx (); 11526 loop_label = gen_label_rtx (); 11527 emit_label (loop_label); 11528 11529 emit_insn (gen_rtx_SET (VOIDmode, oldvalue, 11530 gen_rtx_IOR (SImode, oldv, val))); 11531 11532 emit_insn (gen_rtx_SET (VOIDmode, newvalue, 11533 gen_rtx_IOR (SImode, newv, val))); 11534 11535 emit_move_insn (bool_result, const1_rtx); 11536 11537 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue)); 11538 11539 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); 11540 11541 emit_insn (gen_rtx_SET (VOIDmode, resv, 11542 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 11543 res))); 11544 11545 emit_move_insn (bool_result, const0_rtx); 11546 11547 cc = gen_compare_reg_1 (NE, resv, val); 11548 emit_insn (gen_rtx_SET (VOIDmode, val, resv)); 11549 11550 /* Use cbranchcc4 to separate the compare and branch! */ 11551 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), 11552 cc, const0_rtx, loop_label)); 11553 11554 emit_label (end_label); 11555 11556 emit_insn (gen_rtx_SET (VOIDmode, res, 11557 gen_rtx_AND (SImode, res, mask))); 11558 11559 emit_insn (gen_rtx_SET (VOIDmode, res, 11560 gen_rtx_LSHIFTRT (SImode, res, off))); 11561 11562 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 11563 } 11564 11565 /* Expand code to perform a compare-and-swap. */ 11566 11567 void 11568 sparc_expand_compare_and_swap (rtx operands[]) 11569 { 11570 rtx bval, retval, mem, oldval, newval; 11571 enum machine_mode mode; 11572 enum memmodel model; 11573 11574 bval = operands[0]; 11575 retval = operands[1]; 11576 mem = operands[2]; 11577 oldval = operands[3]; 11578 newval = operands[4]; 11579 model = (enum memmodel) INTVAL (operands[6]); 11580 mode = GET_MODE (mem); 11581 11582 sparc_emit_membar_for_model (model, 3, 1); 11583 11584 if (reg_overlap_mentioned_p (retval, oldval)) 11585 oldval = copy_to_reg (oldval); 11586 11587 if (mode == QImode || mode == HImode) 11588 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval); 11589 else 11590 { 11591 rtx (*gen) (rtx, rtx, rtx, rtx); 11592 rtx x; 11593 11594 if (mode == SImode) 11595 gen = gen_atomic_compare_and_swapsi_1; 11596 else 11597 gen = gen_atomic_compare_and_swapdi_1; 11598 emit_insn (gen (retval, mem, oldval, newval)); 11599 11600 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1); 11601 if (x != bval) 11602 convert_move (bval, x, 1); 11603 } 11604 11605 sparc_emit_membar_for_model (model, 3, 2); 11606 } 11607 11608 void 11609 sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel) 11610 { 11611 rtx t_1, t_2, t_3; 11612 11613 sel = gen_lowpart (DImode, sel); 11614 switch (vmode) 11615 { 11616 case V2SImode: 11617 /* inp = xxxxxxxAxxxxxxxB */ 11618 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11619 NULL_RTX, 1, OPTAB_DIRECT); 11620 /* t_1 = ....xxxxxxxAxxx. */ 11621 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 11622 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); 11623 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 11624 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); 11625 /* sel = .......B */ 11626 /* t_1 = ...A.... */ 11627 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 11628 /* sel = ...A...B */ 11629 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); 11630 /* sel = AAAABBBB * 4 */ 11631 t_1 = force_reg (SImode, GEN_INT (0x01230123)); 11632 /* sel = { A*4, A*4+1, A*4+2, ... } */ 11633 break; 11634 11635 case V4HImode: 11636 /* inp = xxxAxxxBxxxCxxxD */ 11637 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 11638 NULL_RTX, 1, OPTAB_DIRECT); 11639 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11640 NULL_RTX, 1, OPTAB_DIRECT); 11641 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), 11642 NULL_RTX, 1, OPTAB_DIRECT); 11643 /* t_1 = ..xxxAxxxBxxxCxx */ 11644 /* t_2 = ....xxxAxxxBxxxC */ 11645 /* t_3 = ......xxxAxxxBxx */ 11646 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 11647 GEN_INT (0x07), 11648 NULL_RTX, 1, OPTAB_DIRECT); 11649 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 11650 GEN_INT (0x0700), 11651 NULL_RTX, 1, OPTAB_DIRECT); 11652 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), 11653 GEN_INT (0x070000), 11654 NULL_RTX, 1, OPTAB_DIRECT); 11655 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), 11656 GEN_INT (0x07000000), 11657 NULL_RTX, 1, OPTAB_DIRECT); 11658 /* sel = .......D */ 11659 /* t_1 = .....C.. */ 11660 /* t_2 = ...B.... */ 11661 /* t_3 = .A...... */ 11662 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 11663 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); 11664 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); 11665 /* sel = .A.B.C.D */ 11666 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); 11667 /* sel = AABBCCDD * 2 */ 11668 t_1 = force_reg (SImode, GEN_INT (0x01010101)); 11669 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ 11670 break; 11671 11672 case V8QImode: 11673 /* input = xAxBxCxDxExFxGxH */ 11674 sel = expand_simple_binop (DImode, AND, sel, 11675 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 11676 | 0x0f0f0f0f), 11677 NULL_RTX, 1, OPTAB_DIRECT); 11678 /* sel = .A.B.C.D.E.F.G.H */ 11679 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), 11680 NULL_RTX, 1, OPTAB_DIRECT); 11681 /* t_1 = ..A.B.C.D.E.F.G. */ 11682 sel = expand_simple_binop (DImode, IOR, sel, t_1, 11683 NULL_RTX, 1, OPTAB_DIRECT); 11684 /* sel = .AABBCCDDEEFFGGH */ 11685 sel = expand_simple_binop (DImode, AND, sel, 11686 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 11687 | 0xff00ff), 11688 NULL_RTX, 1, OPTAB_DIRECT); 11689 /* sel = ..AB..CD..EF..GH */ 11690 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 11691 NULL_RTX, 1, OPTAB_DIRECT); 11692 /* t_1 = ....AB..CD..EF.. */ 11693 sel = expand_simple_binop (DImode, IOR, sel, t_1, 11694 NULL_RTX, 1, OPTAB_DIRECT); 11695 /* sel = ..ABABCDCDEFEFGH */ 11696 sel = expand_simple_binop (DImode, AND, sel, 11697 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), 11698 NULL_RTX, 1, OPTAB_DIRECT); 11699 /* sel = ....ABCD....EFGH */ 11700 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11701 NULL_RTX, 1, OPTAB_DIRECT); 11702 /* t_1 = ........ABCD.... */ 11703 sel = gen_lowpart (SImode, sel); 11704 t_1 = gen_lowpart (SImode, t_1); 11705 break; 11706 11707 default: 11708 gcc_unreachable (); 11709 } 11710 11711 /* Always perform the final addition/merge within the bmask insn. */ 11712 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); 11713 } 11714 11715 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 11716 11717 static bool 11718 sparc_frame_pointer_required (void) 11719 { 11720 /* If the stack pointer is dynamically modified in the function, it cannot 11721 serve as the frame pointer. */ 11722 if (cfun->calls_alloca) 11723 return true; 11724 11725 /* If the function receives nonlocal gotos, it needs to save the frame 11726 pointer in the nonlocal_goto_save_area object. */ 11727 if (cfun->has_nonlocal_label) 11728 return true; 11729 11730 /* In flat mode, that's it. */ 11731 if (TARGET_FLAT) 11732 return false; 11733 11734 /* Otherwise, the frame pointer is required if the function isn't leaf. */ 11735 return !(crtl->is_leaf && only_leaf_regs_used ()); 11736 } 11737 11738 /* The way this is structured, we can't eliminate SFP in favor of SP 11739 if the frame pointer is required: we want to use the SFP->HFP elimination 11740 in that case. But the test in update_eliminables doesn't know we are 11741 assuming below that we only do the former elimination. */ 11742 11743 static bool 11744 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 11745 { 11746 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required (); 11747 } 11748 11749 /* Return the hard frame pointer directly to bypass the stack bias. */ 11750 11751 static rtx 11752 sparc_builtin_setjmp_frame_value (void) 11753 { 11754 return hard_frame_pointer_rtx; 11755 } 11756 11757 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that 11758 they won't be allocated. */ 11759 11760 static void 11761 sparc_conditional_register_usage (void) 11762 { 11763 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 11764 { 11765 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 11766 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 11767 } 11768 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ 11769 /* then honor it. */ 11770 if (TARGET_ARCH32 && fixed_regs[5]) 11771 fixed_regs[5] = 1; 11772 else if (TARGET_ARCH64 && fixed_regs[5] == 2) 11773 fixed_regs[5] = 0; 11774 if (! TARGET_V9) 11775 { 11776 int regno; 11777 for (regno = SPARC_FIRST_V9_FP_REG; 11778 regno <= SPARC_LAST_V9_FP_REG; 11779 regno++) 11780 fixed_regs[regno] = 1; 11781 /* %fcc0 is used by v8 and v9. */ 11782 for (regno = SPARC_FIRST_V9_FCC_REG + 1; 11783 regno <= SPARC_LAST_V9_FCC_REG; 11784 regno++) 11785 fixed_regs[regno] = 1; 11786 } 11787 if (! TARGET_FPU) 11788 { 11789 int regno; 11790 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) 11791 fixed_regs[regno] = 1; 11792 } 11793 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ 11794 /* then honor it. Likewise with g3 and g4. */ 11795 if (fixed_regs[2] == 2) 11796 fixed_regs[2] = ! TARGET_APP_REGS; 11797 if (fixed_regs[3] == 2) 11798 fixed_regs[3] = ! TARGET_APP_REGS; 11799 if (TARGET_ARCH32 && fixed_regs[4] == 2) 11800 fixed_regs[4] = ! TARGET_APP_REGS; 11801 else if (TARGET_CM_EMBMEDANY) 11802 fixed_regs[4] = 1; 11803 else if (fixed_regs[4] == 2) 11804 fixed_regs[4] = 0; 11805 if (TARGET_FLAT) 11806 { 11807 int regno; 11808 /* Disable leaf functions. */ 11809 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER); 11810 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 11811 leaf_reg_remap [regno] = regno; 11812 } 11813 if (TARGET_VIS) 11814 global_regs[SPARC_GSR_REG] = 1; 11815 } 11816 11817 /* Implement TARGET_PREFERRED_RELOAD_CLASS: 11818 11819 - We can't load constants into FP registers. 11820 - We can't load FP constants into integer registers when soft-float, 11821 because there is no soft-float pattern with a r/F constraint. 11822 - We can't load FP constants into integer registers for TFmode unless 11823 it is 0.0L, because there is no movtf pattern with a r/F constraint. 11824 - Try and reload integer constants (symbolic or otherwise) back into 11825 registers directly, rather than having them dumped to memory. */ 11826 11827 static reg_class_t 11828 sparc_preferred_reload_class (rtx x, reg_class_t rclass) 11829 { 11830 enum machine_mode mode = GET_MODE (x); 11831 if (CONSTANT_P (x)) 11832 { 11833 if (FP_REG_CLASS_P (rclass) 11834 || rclass == GENERAL_OR_FP_REGS 11835 || rclass == GENERAL_OR_EXTRA_FP_REGS 11836 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU) 11837 || (mode == TFmode && ! const_zero_operand (x, mode))) 11838 return NO_REGS; 11839 11840 if (GET_MODE_CLASS (mode) == MODE_INT) 11841 return GENERAL_REGS; 11842 11843 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 11844 { 11845 if (! FP_REG_CLASS_P (rclass) 11846 || !(const_zero_operand (x, mode) 11847 || const_all_ones_operand (x, mode))) 11848 return NO_REGS; 11849 } 11850 } 11851 11852 if (TARGET_VIS3 11853 && ! TARGET_ARCH64 11854 && (rclass == EXTRA_FP_REGS 11855 || rclass == GENERAL_OR_EXTRA_FP_REGS)) 11856 { 11857 int regno = true_regnum (x); 11858 11859 if (SPARC_INT_REG_P (regno)) 11860 return (rclass == EXTRA_FP_REGS 11861 ? FP_REGS : GENERAL_OR_FP_REGS); 11862 } 11863 11864 return rclass; 11865 } 11866 11867 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction, 11868 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 11869 11870 const char * 11871 output_v8plus_mult (rtx insn, rtx *operands, const char *opcode) 11872 { 11873 char mulstr[32]; 11874 11875 gcc_assert (! TARGET_ARCH64); 11876 11877 if (sparc_check_64 (operands[1], insn) <= 0) 11878 output_asm_insn ("srl\t%L1, 0, %L1", operands); 11879 if (which_alternative == 1) 11880 output_asm_insn ("sllx\t%H1, 32, %H1", operands); 11881 if (GET_CODE (operands[2]) == CONST_INT) 11882 { 11883 if (which_alternative == 1) 11884 { 11885 output_asm_insn ("or\t%L1, %H1, %H1", operands); 11886 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); 11887 output_asm_insn (mulstr, operands); 11888 return "srlx\t%L0, 32, %H0"; 11889 } 11890 else 11891 { 11892 output_asm_insn ("sllx\t%H1, 32, %3", operands); 11893 output_asm_insn ("or\t%L1, %3, %3", operands); 11894 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); 11895 output_asm_insn (mulstr, operands); 11896 output_asm_insn ("srlx\t%3, 32, %H0", operands); 11897 return "mov\t%3, %L0"; 11898 } 11899 } 11900 else if (rtx_equal_p (operands[1], operands[2])) 11901 { 11902 if (which_alternative == 1) 11903 { 11904 output_asm_insn ("or\t%L1, %H1, %H1", operands); 11905 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); 11906 output_asm_insn (mulstr, operands); 11907 return "srlx\t%L0, 32, %H0"; 11908 } 11909 else 11910 { 11911 output_asm_insn ("sllx\t%H1, 32, %3", operands); 11912 output_asm_insn ("or\t%L1, %3, %3", operands); 11913 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); 11914 output_asm_insn (mulstr, operands); 11915 output_asm_insn ("srlx\t%3, 32, %H0", operands); 11916 return "mov\t%3, %L0"; 11917 } 11918 } 11919 if (sparc_check_64 (operands[2], insn) <= 0) 11920 output_asm_insn ("srl\t%L2, 0, %L2", operands); 11921 if (which_alternative == 1) 11922 { 11923 output_asm_insn ("or\t%L1, %H1, %H1", operands); 11924 output_asm_insn ("sllx\t%H2, 32, %L1", operands); 11925 output_asm_insn ("or\t%L2, %L1, %L1", operands); 11926 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); 11927 output_asm_insn (mulstr, operands); 11928 return "srlx\t%L0, 32, %H0"; 11929 } 11930 else 11931 { 11932 output_asm_insn ("sllx\t%H1, 32, %3", operands); 11933 output_asm_insn ("sllx\t%H2, 32, %4", operands); 11934 output_asm_insn ("or\t%L1, %3, %3", operands); 11935 output_asm_insn ("or\t%L2, %4, %4", operands); 11936 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); 11937 output_asm_insn (mulstr, operands); 11938 output_asm_insn ("srlx\t%3, 32, %H0", operands); 11939 return "mov\t%3, %L0"; 11940 } 11941 } 11942 11943 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 11944 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE 11945 and INNER_MODE are the modes describing TARGET. */ 11946 11947 static void 11948 vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode, 11949 enum machine_mode inner_mode) 11950 { 11951 rtx t1, final_insn; 11952 int bmask; 11953 11954 t1 = gen_reg_rtx (mode); 11955 11956 elt = convert_modes (SImode, inner_mode, elt, true); 11957 emit_move_insn (gen_lowpart(SImode, t1), elt); 11958 11959 switch (mode) 11960 { 11961 case V2SImode: 11962 final_insn = gen_bshufflev2si_vis (target, t1, t1); 11963 bmask = 0x45674567; 11964 break; 11965 case V4HImode: 11966 final_insn = gen_bshufflev4hi_vis (target, t1, t1); 11967 bmask = 0x67676767; 11968 break; 11969 case V8QImode: 11970 final_insn = gen_bshufflev8qi_vis (target, t1, t1); 11971 bmask = 0x77777777; 11972 break; 11973 default: 11974 gcc_unreachable (); 11975 } 11976 11977 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode), 11978 force_reg (SImode, GEN_INT (bmask)))); 11979 emit_insn (final_insn); 11980 } 11981 11982 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 11983 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */ 11984 11985 static void 11986 vector_init_fpmerge (rtx target, rtx elt) 11987 { 11988 rtx t1, t2, t2_low, t3, t3_low; 11989 11990 t1 = gen_reg_rtx (V4QImode); 11991 elt = convert_modes (SImode, QImode, elt, true); 11992 emit_move_insn (gen_lowpart (SImode, t1), elt); 11993 11994 t2 = gen_reg_rtx (V8QImode); 11995 t2_low = gen_lowpart (V4QImode, t2); 11996 emit_insn (gen_fpmerge_vis (t2, t1, t1)); 11997 11998 t3 = gen_reg_rtx (V8QImode); 11999 t3_low = gen_lowpart (V4QImode, t3); 12000 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low)); 12001 12002 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low)); 12003 } 12004 12005 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 12006 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */ 12007 12008 static void 12009 vector_init_faligndata (rtx target, rtx elt) 12010 { 12011 rtx t1 = gen_reg_rtx (V4HImode); 12012 int i; 12013 12014 elt = convert_modes (SImode, HImode, elt, true); 12015 emit_move_insn (gen_lowpart (SImode, t1), elt); 12016 12017 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), 12018 force_reg (SImode, GEN_INT (6)), 12019 const0_rtx)); 12020 12021 for (i = 0; i < 4; i++) 12022 emit_insn (gen_faligndatav4hi_vis (target, t1, target)); 12023 } 12024 12025 /* Emit code to initialize TARGET to values for individual fields VALS. */ 12026 12027 void 12028 sparc_expand_vector_init (rtx target, rtx vals) 12029 { 12030 const enum machine_mode mode = GET_MODE (target); 12031 const enum machine_mode inner_mode = GET_MODE_INNER (mode); 12032 const int n_elts = GET_MODE_NUNITS (mode); 12033 int i, n_var = 0; 12034 bool all_same; 12035 rtx mem; 12036 12037 all_same = true; 12038 for (i = 0; i < n_elts; i++) 12039 { 12040 rtx x = XVECEXP (vals, 0, i); 12041 if (!CONSTANT_P (x)) 12042 n_var++; 12043 12044 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 12045 all_same = false; 12046 } 12047 12048 if (n_var == 0) 12049 { 12050 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 12051 return; 12052 } 12053 12054 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)) 12055 { 12056 if (GET_MODE_SIZE (inner_mode) == 4) 12057 { 12058 emit_move_insn (gen_lowpart (SImode, target), 12059 gen_lowpart (SImode, XVECEXP (vals, 0, 0))); 12060 return; 12061 } 12062 else if (GET_MODE_SIZE (inner_mode) == 8) 12063 { 12064 emit_move_insn (gen_lowpart (DImode, target), 12065 gen_lowpart (DImode, XVECEXP (vals, 0, 0))); 12066 return; 12067 } 12068 } 12069 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode) 12070 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode)) 12071 { 12072 emit_move_insn (gen_highpart (word_mode, target), 12073 gen_lowpart (word_mode, XVECEXP (vals, 0, 0))); 12074 emit_move_insn (gen_lowpart (word_mode, target), 12075 gen_lowpart (word_mode, XVECEXP (vals, 0, 1))); 12076 return; 12077 } 12078 12079 if (all_same && GET_MODE_SIZE (mode) == 8) 12080 { 12081 if (TARGET_VIS2) 12082 { 12083 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode); 12084 return; 12085 } 12086 if (mode == V8QImode) 12087 { 12088 vector_init_fpmerge (target, XVECEXP (vals, 0, 0)); 12089 return; 12090 } 12091 if (mode == V4HImode) 12092 { 12093 vector_init_faligndata (target, XVECEXP (vals, 0, 0)); 12094 return; 12095 } 12096 } 12097 12098 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 12099 for (i = 0; i < n_elts; i++) 12100 emit_move_insn (adjust_address_nv (mem, inner_mode, 12101 i * GET_MODE_SIZE (inner_mode)), 12102 XVECEXP (vals, 0, i)); 12103 emit_move_insn (target, mem); 12104 } 12105 12106 /* Implement TARGET_SECONDARY_RELOAD. */ 12107 12108 static reg_class_t 12109 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 12110 enum machine_mode mode, secondary_reload_info *sri) 12111 { 12112 enum reg_class rclass = (enum reg_class) rclass_i; 12113 12114 sri->icode = CODE_FOR_nothing; 12115 sri->extra_cost = 0; 12116 12117 /* We need a temporary when loading/storing a HImode/QImode value 12118 between memory and the FPU registers. This can happen when combine puts 12119 a paradoxical subreg in a float/fix conversion insn. */ 12120 if (FP_REG_CLASS_P (rclass) 12121 && (mode == HImode || mode == QImode) 12122 && (GET_CODE (x) == MEM 12123 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 12124 && true_regnum (x) == -1))) 12125 return GENERAL_REGS; 12126 12127 /* On 32-bit we need a temporary when loading/storing a DFmode value 12128 between unaligned memory and the upper FPU registers. */ 12129 if (TARGET_ARCH32 12130 && rclass == EXTRA_FP_REGS 12131 && mode == DFmode 12132 && GET_CODE (x) == MEM 12133 && ! mem_min_alignment (x, 8)) 12134 return FP_REGS; 12135 12136 if (((TARGET_CM_MEDANY 12137 && symbolic_operand (x, mode)) 12138 || (TARGET_CM_EMBMEDANY 12139 && text_segment_operand (x, mode))) 12140 && ! flag_pic) 12141 { 12142 if (in_p) 12143 sri->icode = direct_optab_handler (reload_in_optab, mode); 12144 else 12145 sri->icode = direct_optab_handler (reload_out_optab, mode); 12146 return NO_REGS; 12147 } 12148 12149 if (TARGET_VIS3 && TARGET_ARCH32) 12150 { 12151 int regno = true_regnum (x); 12152 12153 /* When using VIS3 fp<-->int register moves, on 32-bit we have 12154 to move 8-byte values in 4-byte pieces. This only works via 12155 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to 12156 move between EXTRA_FP_REGS and GENERAL_REGS, we will need 12157 an FP_REGS intermediate move. */ 12158 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno)) 12159 || ((general_or_i64_p (rclass) 12160 || rclass == GENERAL_OR_FP_REGS) 12161 && SPARC_FP_REG_P (regno))) 12162 { 12163 sri->extra_cost = 2; 12164 return FP_REGS; 12165 } 12166 } 12167 12168 return NO_REGS; 12169 } 12170 12171 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into 12172 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ 12173 12174 bool 12175 sparc_expand_conditional_move (enum machine_mode mode, rtx *operands) 12176 { 12177 enum rtx_code rc = GET_CODE (operands[1]); 12178 enum machine_mode cmp_mode; 12179 rtx cc_reg, dst, cmp; 12180 12181 cmp = operands[1]; 12182 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) 12183 return false; 12184 12185 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) 12186 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); 12187 12188 cmp_mode = GET_MODE (XEXP (cmp, 0)); 12189 rc = GET_CODE (cmp); 12190 12191 dst = operands[0]; 12192 if (! rtx_equal_p (operands[2], dst) 12193 && ! rtx_equal_p (operands[3], dst)) 12194 { 12195 if (reg_overlap_mentioned_p (dst, cmp)) 12196 dst = gen_reg_rtx (mode); 12197 12198 emit_move_insn (dst, operands[3]); 12199 } 12200 else if (operands[2] == dst) 12201 { 12202 operands[2] = operands[3]; 12203 12204 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT) 12205 rc = reverse_condition_maybe_unordered (rc); 12206 else 12207 rc = reverse_condition (rc); 12208 } 12209 12210 if (XEXP (cmp, 1) == const0_rtx 12211 && GET_CODE (XEXP (cmp, 0)) == REG 12212 && cmp_mode == DImode 12213 && v9_regcmp_p (rc)) 12214 cc_reg = XEXP (cmp, 0); 12215 else 12216 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1)); 12217 12218 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx); 12219 12220 emit_insn (gen_rtx_SET (VOIDmode, dst, 12221 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst))); 12222 12223 if (dst != operands[0]) 12224 emit_move_insn (operands[0], dst); 12225 12226 return true; 12227 } 12228 12229 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] 12230 into OPERANDS[0] in MODE, depending on the outcome of the comparison of 12231 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. 12232 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine 12233 code to be used for the condition mask. */ 12234 12235 void 12236 sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode) 12237 { 12238 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; 12239 enum rtx_code code = GET_CODE (operands[3]); 12240 12241 mask = gen_reg_rtx (Pmode); 12242 cop0 = operands[4]; 12243 cop1 = operands[5]; 12244 if (code == LT || code == GE) 12245 { 12246 rtx t; 12247 12248 code = swap_condition (code); 12249 t = cop0; cop0 = cop1; cop1 = t; 12250 } 12251 12252 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); 12253 12254 fcmp = gen_rtx_UNSPEC (Pmode, 12255 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), 12256 fcode); 12257 12258 cmask = gen_rtx_UNSPEC (DImode, 12259 gen_rtvec (2, mask, gsr), 12260 ccode); 12261 12262 bshuf = gen_rtx_UNSPEC (mode, 12263 gen_rtvec (3, operands[1], operands[2], gsr), 12264 UNSPEC_BSHUFFLE); 12265 12266 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp)); 12267 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask)); 12268 12269 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf)); 12270 } 12271 12272 /* On sparc, any mode which naturally allocates into the float 12273 registers should return 4 here. */ 12274 12275 unsigned int 12276 sparc_regmode_natural_size (enum machine_mode mode) 12277 { 12278 int size = UNITS_PER_WORD; 12279 12280 if (TARGET_ARCH64) 12281 { 12282 enum mode_class mclass = GET_MODE_CLASS (mode); 12283 12284 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) 12285 size = 4; 12286 } 12287 12288 return size; 12289 } 12290 12291 /* Return TRUE if it is a good idea to tie two pseudo registers 12292 when one has mode MODE1 and one has mode MODE2. 12293 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, 12294 for any hard reg, then this must be FALSE for correct output. 12295 12296 For V9 we have to deal with the fact that only the lower 32 floating 12297 point registers are 32-bit addressable. */ 12298 12299 bool 12300 sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 12301 { 12302 enum mode_class mclass1, mclass2; 12303 unsigned short size1, size2; 12304 12305 if (mode1 == mode2) 12306 return true; 12307 12308 mclass1 = GET_MODE_CLASS (mode1); 12309 mclass2 = GET_MODE_CLASS (mode2); 12310 if (mclass1 != mclass2) 12311 return false; 12312 12313 if (! TARGET_V9) 12314 return true; 12315 12316 /* Classes are the same and we are V9 so we have to deal with upper 12317 vs. lower floating point registers. If one of the modes is a 12318 4-byte mode, and the other is not, we have to mark them as not 12319 tieable because only the lower 32 floating point register are 12320 addressable 32-bits at a time. 12321 12322 We can't just test explicitly for SFmode, otherwise we won't 12323 cover the vector mode cases properly. */ 12324 12325 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT) 12326 return true; 12327 12328 size1 = GET_MODE_SIZE (mode1); 12329 size2 = GET_MODE_SIZE (mode2); 12330 if ((size1 > 4 && size2 == 4) 12331 || (size2 > 4 && size1 == 4)) 12332 return false; 12333 12334 return true; 12335 } 12336 12337 #include "gt-sparc.h" 12338