1 /* Subroutines for insn-output.c for SPARC. 2 Copyright (C) 1987-2015 Free Software Foundation, Inc. 3 Contributed by Michael Tiemann (tiemann@cygnus.com) 4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, 5 at Cygnus Support. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #include "config.h" 24 #include "system.h" 25 #include "coretypes.h" 26 #include "tm.h" 27 #include "hash-set.h" 28 #include "machmode.h" 29 #include "vec.h" 30 #include "double-int.h" 31 #include "input.h" 32 #include "alias.h" 33 #include "symtab.h" 34 #include "wide-int.h" 35 #include "inchash.h" 36 #include "tree.h" 37 #include "fold-const.h" 38 #include "stringpool.h" 39 #include "stor-layout.h" 40 #include "calls.h" 41 #include "varasm.h" 42 #include "rtl.h" 43 #include "regs.h" 44 #include "hard-reg-set.h" 45 #include "insn-config.h" 46 #include "insn-codes.h" 47 #include "conditions.h" 48 #include "output.h" 49 #include "insn-attr.h" 50 #include "flags.h" 51 #include "function.h" 52 #include "except.h" 53 #include "hashtab.h" 54 #include "statistics.h" 55 #include "real.h" 56 #include "fixed-value.h" 57 #include "expmed.h" 58 #include "dojump.h" 59 #include "explow.h" 60 #include "emit-rtl.h" 61 #include "stmt.h" 62 #include "expr.h" 63 #include "optabs.h" 64 #include "recog.h" 65 #include "diagnostic-core.h" 66 #include "ggc.h" 67 #include "tm_p.h" 68 #include "debug.h" 69 #include "target.h" 70 #include "target-def.h" 71 #include "common/common-target.h" 72 #include "hash-table.h" 73 #include "predict.h" 74 #include "dominance.h" 75 #include "cfg.h" 76 #include "cfgrtl.h" 77 #include "cfganal.h" 78 #include "lcm.h" 79 #include "cfgbuild.h" 80 #include "cfgcleanup.h" 81 #include "basic-block.h" 82 #include "tree-ssa-alias.h" 83 #include "internal-fn.h" 84 #include "gimple-fold.h" 85 #include "tree-eh.h" 86 #include "gimple-expr.h" 87 #include "is-a.h" 88 #include "gimple.h" 89 #include "gimplify.h" 90 #include "langhooks.h" 91 #include "reload.h" 92 #include "params.h" 93 #include "df.h" 94 #include "opts.h" 95 #include "tree-pass.h" 96 #include "context.h" 97 #include "builtins.h" 98 #include "rtl-iter.h" 99 100 /* Processor costs */ 101 102 struct processor_costs { 103 /* Integer load */ 104 const int int_load; 105 106 /* Integer signed load */ 107 const int int_sload; 108 109 /* Integer zeroed load */ 110 const int int_zload; 111 112 /* Float load */ 113 const int float_load; 114 115 /* fmov, fneg, fabs */ 116 const int float_move; 117 118 /* fadd, fsub */ 119 const int float_plusminus; 120 121 /* fcmp */ 122 const int float_cmp; 123 124 /* fmov, fmovr */ 125 const int float_cmove; 126 127 /* fmul */ 128 const int float_mul; 129 130 /* fdivs */ 131 const int float_div_sf; 132 133 /* fdivd */ 134 const int float_div_df; 135 136 /* fsqrts */ 137 const int float_sqrt_sf; 138 139 /* fsqrtd */ 140 const int float_sqrt_df; 141 142 /* umul/smul */ 143 const int int_mul; 144 145 /* mulX */ 146 const int int_mulX; 147 148 /* integer multiply cost for each bit set past the most 149 significant 3, so the formula for multiply cost becomes: 150 151 if (rs1 < 0) 152 highest_bit = highest_clear_bit(rs1); 153 else 154 highest_bit = highest_set_bit(rs1); 155 if (highest_bit < 3) 156 highest_bit = 3; 157 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); 158 159 A value of zero indicates that the multiply costs is fixed, 160 and not variable. */ 161 const int int_mul_bit_factor; 162 163 /* udiv/sdiv */ 164 const int int_div; 165 166 /* divX */ 167 const int int_divX; 168 169 /* movcc, movr */ 170 const int int_cmove; 171 172 /* penalty for shifts, due to scheduling rules etc. */ 173 const int shift_penalty; 174 }; 175 176 static const 177 struct processor_costs cypress_costs = { 178 COSTS_N_INSNS (2), /* int load */ 179 COSTS_N_INSNS (2), /* int signed load */ 180 COSTS_N_INSNS (2), /* int zeroed load */ 181 COSTS_N_INSNS (2), /* float load */ 182 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ 183 COSTS_N_INSNS (5), /* fadd, fsub */ 184 COSTS_N_INSNS (1), /* fcmp */ 185 COSTS_N_INSNS (1), /* fmov, fmovr */ 186 COSTS_N_INSNS (7), /* fmul */ 187 COSTS_N_INSNS (37), /* fdivs */ 188 COSTS_N_INSNS (37), /* fdivd */ 189 COSTS_N_INSNS (63), /* fsqrts */ 190 COSTS_N_INSNS (63), /* fsqrtd */ 191 COSTS_N_INSNS (1), /* imul */ 192 COSTS_N_INSNS (1), /* imulX */ 193 0, /* imul bit factor */ 194 COSTS_N_INSNS (1), /* idiv */ 195 COSTS_N_INSNS (1), /* idivX */ 196 COSTS_N_INSNS (1), /* movcc/movr */ 197 0, /* shift penalty */ 198 }; 199 200 static const 201 struct processor_costs supersparc_costs = { 202 COSTS_N_INSNS (1), /* int load */ 203 COSTS_N_INSNS (1), /* int signed load */ 204 COSTS_N_INSNS (1), /* int zeroed load */ 205 COSTS_N_INSNS (0), /* float load */ 206 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 207 COSTS_N_INSNS (3), /* fadd, fsub */ 208 COSTS_N_INSNS (3), /* fcmp */ 209 COSTS_N_INSNS (1), /* fmov, fmovr */ 210 COSTS_N_INSNS (3), /* fmul */ 211 COSTS_N_INSNS (6), /* fdivs */ 212 COSTS_N_INSNS (9), /* fdivd */ 213 COSTS_N_INSNS (12), /* fsqrts */ 214 COSTS_N_INSNS (12), /* fsqrtd */ 215 COSTS_N_INSNS (4), /* imul */ 216 COSTS_N_INSNS (4), /* imulX */ 217 0, /* imul bit factor */ 218 COSTS_N_INSNS (4), /* idiv */ 219 COSTS_N_INSNS (4), /* idivX */ 220 COSTS_N_INSNS (1), /* movcc/movr */ 221 1, /* shift penalty */ 222 }; 223 224 static const 225 struct processor_costs hypersparc_costs = { 226 COSTS_N_INSNS (1), /* int load */ 227 COSTS_N_INSNS (1), /* int signed load */ 228 COSTS_N_INSNS (1), /* int zeroed load */ 229 COSTS_N_INSNS (1), /* float load */ 230 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 231 COSTS_N_INSNS (1), /* fadd, fsub */ 232 COSTS_N_INSNS (1), /* fcmp */ 233 COSTS_N_INSNS (1), /* fmov, fmovr */ 234 COSTS_N_INSNS (1), /* fmul */ 235 COSTS_N_INSNS (8), /* fdivs */ 236 COSTS_N_INSNS (12), /* fdivd */ 237 COSTS_N_INSNS (17), /* fsqrts */ 238 COSTS_N_INSNS (17), /* fsqrtd */ 239 COSTS_N_INSNS (17), /* imul */ 240 COSTS_N_INSNS (17), /* imulX */ 241 0, /* imul bit factor */ 242 COSTS_N_INSNS (17), /* idiv */ 243 COSTS_N_INSNS (17), /* idivX */ 244 COSTS_N_INSNS (1), /* movcc/movr */ 245 0, /* shift penalty */ 246 }; 247 248 static const 249 struct processor_costs leon_costs = { 250 COSTS_N_INSNS (1), /* int load */ 251 COSTS_N_INSNS (1), /* int signed load */ 252 COSTS_N_INSNS (1), /* int zeroed load */ 253 COSTS_N_INSNS (1), /* float load */ 254 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 255 COSTS_N_INSNS (1), /* fadd, fsub */ 256 COSTS_N_INSNS (1), /* fcmp */ 257 COSTS_N_INSNS (1), /* fmov, fmovr */ 258 COSTS_N_INSNS (1), /* fmul */ 259 COSTS_N_INSNS (15), /* fdivs */ 260 COSTS_N_INSNS (15), /* fdivd */ 261 COSTS_N_INSNS (23), /* fsqrts */ 262 COSTS_N_INSNS (23), /* fsqrtd */ 263 COSTS_N_INSNS (5), /* imul */ 264 COSTS_N_INSNS (5), /* imulX */ 265 0, /* imul bit factor */ 266 COSTS_N_INSNS (5), /* idiv */ 267 COSTS_N_INSNS (5), /* idivX */ 268 COSTS_N_INSNS (1), /* movcc/movr */ 269 0, /* shift penalty */ 270 }; 271 272 static const 273 struct processor_costs leon3_costs = { 274 COSTS_N_INSNS (1), /* int load */ 275 COSTS_N_INSNS (1), /* int signed load */ 276 COSTS_N_INSNS (1), /* int zeroed load */ 277 COSTS_N_INSNS (1), /* float load */ 278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 279 COSTS_N_INSNS (1), /* fadd, fsub */ 280 COSTS_N_INSNS (1), /* fcmp */ 281 COSTS_N_INSNS (1), /* fmov, fmovr */ 282 COSTS_N_INSNS (1), /* fmul */ 283 COSTS_N_INSNS (14), /* fdivs */ 284 COSTS_N_INSNS (15), /* fdivd */ 285 COSTS_N_INSNS (22), /* fsqrts */ 286 COSTS_N_INSNS (23), /* fsqrtd */ 287 COSTS_N_INSNS (5), /* imul */ 288 COSTS_N_INSNS (5), /* imulX */ 289 0, /* imul bit factor */ 290 COSTS_N_INSNS (35), /* idiv */ 291 COSTS_N_INSNS (35), /* idivX */ 292 COSTS_N_INSNS (1), /* movcc/movr */ 293 0, /* shift penalty */ 294 }; 295 296 static const 297 struct processor_costs sparclet_costs = { 298 COSTS_N_INSNS (3), /* int load */ 299 COSTS_N_INSNS (3), /* int signed load */ 300 COSTS_N_INSNS (1), /* int zeroed load */ 301 COSTS_N_INSNS (1), /* float load */ 302 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 303 COSTS_N_INSNS (1), /* fadd, fsub */ 304 COSTS_N_INSNS (1), /* fcmp */ 305 COSTS_N_INSNS (1), /* fmov, fmovr */ 306 COSTS_N_INSNS (1), /* fmul */ 307 COSTS_N_INSNS (1), /* fdivs */ 308 COSTS_N_INSNS (1), /* fdivd */ 309 COSTS_N_INSNS (1), /* fsqrts */ 310 COSTS_N_INSNS (1), /* fsqrtd */ 311 COSTS_N_INSNS (5), /* imul */ 312 COSTS_N_INSNS (5), /* imulX */ 313 0, /* imul bit factor */ 314 COSTS_N_INSNS (5), /* idiv */ 315 COSTS_N_INSNS (5), /* idivX */ 316 COSTS_N_INSNS (1), /* movcc/movr */ 317 0, /* shift penalty */ 318 }; 319 320 static const 321 struct processor_costs ultrasparc_costs = { 322 COSTS_N_INSNS (2), /* int load */ 323 COSTS_N_INSNS (3), /* int signed load */ 324 COSTS_N_INSNS (2), /* int zeroed load */ 325 COSTS_N_INSNS (2), /* float load */ 326 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 327 COSTS_N_INSNS (4), /* fadd, fsub */ 328 COSTS_N_INSNS (1), /* fcmp */ 329 COSTS_N_INSNS (2), /* fmov, fmovr */ 330 COSTS_N_INSNS (4), /* fmul */ 331 COSTS_N_INSNS (13), /* fdivs */ 332 COSTS_N_INSNS (23), /* fdivd */ 333 COSTS_N_INSNS (13), /* fsqrts */ 334 COSTS_N_INSNS (23), /* fsqrtd */ 335 COSTS_N_INSNS (4), /* imul */ 336 COSTS_N_INSNS (4), /* imulX */ 337 2, /* imul bit factor */ 338 COSTS_N_INSNS (37), /* idiv */ 339 COSTS_N_INSNS (68), /* idivX */ 340 COSTS_N_INSNS (2), /* movcc/movr */ 341 2, /* shift penalty */ 342 }; 343 344 static const 345 struct processor_costs ultrasparc3_costs = { 346 COSTS_N_INSNS (2), /* int load */ 347 COSTS_N_INSNS (3), /* int signed load */ 348 COSTS_N_INSNS (3), /* int zeroed load */ 349 COSTS_N_INSNS (2), /* float load */ 350 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 351 COSTS_N_INSNS (4), /* fadd, fsub */ 352 COSTS_N_INSNS (5), /* fcmp */ 353 COSTS_N_INSNS (3), /* fmov, fmovr */ 354 COSTS_N_INSNS (4), /* fmul */ 355 COSTS_N_INSNS (17), /* fdivs */ 356 COSTS_N_INSNS (20), /* fdivd */ 357 COSTS_N_INSNS (20), /* fsqrts */ 358 COSTS_N_INSNS (29), /* fsqrtd */ 359 COSTS_N_INSNS (6), /* imul */ 360 COSTS_N_INSNS (6), /* imulX */ 361 0, /* imul bit factor */ 362 COSTS_N_INSNS (40), /* idiv */ 363 COSTS_N_INSNS (71), /* idivX */ 364 COSTS_N_INSNS (2), /* movcc/movr */ 365 0, /* shift penalty */ 366 }; 367 368 static const 369 struct processor_costs niagara_costs = { 370 COSTS_N_INSNS (3), /* int load */ 371 COSTS_N_INSNS (3), /* int signed load */ 372 COSTS_N_INSNS (3), /* int zeroed load */ 373 COSTS_N_INSNS (9), /* float load */ 374 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ 375 COSTS_N_INSNS (8), /* fadd, fsub */ 376 COSTS_N_INSNS (26), /* fcmp */ 377 COSTS_N_INSNS (8), /* fmov, fmovr */ 378 COSTS_N_INSNS (29), /* fmul */ 379 COSTS_N_INSNS (54), /* fdivs */ 380 COSTS_N_INSNS (83), /* fdivd */ 381 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ 382 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ 383 COSTS_N_INSNS (11), /* imul */ 384 COSTS_N_INSNS (11), /* imulX */ 385 0, /* imul bit factor */ 386 COSTS_N_INSNS (72), /* idiv */ 387 COSTS_N_INSNS (72), /* idivX */ 388 COSTS_N_INSNS (1), /* movcc/movr */ 389 0, /* shift penalty */ 390 }; 391 392 static const 393 struct processor_costs niagara2_costs = { 394 COSTS_N_INSNS (3), /* int load */ 395 COSTS_N_INSNS (3), /* int signed load */ 396 COSTS_N_INSNS (3), /* int zeroed load */ 397 COSTS_N_INSNS (3), /* float load */ 398 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ 399 COSTS_N_INSNS (6), /* fadd, fsub */ 400 COSTS_N_INSNS (6), /* fcmp */ 401 COSTS_N_INSNS (6), /* fmov, fmovr */ 402 COSTS_N_INSNS (6), /* fmul */ 403 COSTS_N_INSNS (19), /* fdivs */ 404 COSTS_N_INSNS (33), /* fdivd */ 405 COSTS_N_INSNS (19), /* fsqrts */ 406 COSTS_N_INSNS (33), /* fsqrtd */ 407 COSTS_N_INSNS (5), /* imul */ 408 COSTS_N_INSNS (5), /* imulX */ 409 0, /* imul bit factor */ 410 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ 411 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ 412 COSTS_N_INSNS (1), /* movcc/movr */ 413 0, /* shift penalty */ 414 }; 415 416 static const 417 struct processor_costs niagara3_costs = { 418 COSTS_N_INSNS (3), /* int load */ 419 COSTS_N_INSNS (3), /* int signed load */ 420 COSTS_N_INSNS (3), /* int zeroed load */ 421 COSTS_N_INSNS (3), /* float load */ 422 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 423 COSTS_N_INSNS (9), /* fadd, fsub */ 424 COSTS_N_INSNS (9), /* fcmp */ 425 COSTS_N_INSNS (9), /* fmov, fmovr */ 426 COSTS_N_INSNS (9), /* fmul */ 427 COSTS_N_INSNS (23), /* fdivs */ 428 COSTS_N_INSNS (37), /* fdivd */ 429 COSTS_N_INSNS (23), /* fsqrts */ 430 COSTS_N_INSNS (37), /* fsqrtd */ 431 COSTS_N_INSNS (9), /* imul */ 432 COSTS_N_INSNS (9), /* imulX */ 433 0, /* imul bit factor */ 434 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ 435 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ 436 COSTS_N_INSNS (1), /* movcc/movr */ 437 0, /* shift penalty */ 438 }; 439 440 static const 441 struct processor_costs niagara4_costs = { 442 COSTS_N_INSNS (5), /* int load */ 443 COSTS_N_INSNS (5), /* int signed load */ 444 COSTS_N_INSNS (5), /* int zeroed load */ 445 COSTS_N_INSNS (5), /* float load */ 446 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 447 COSTS_N_INSNS (11), /* fadd, fsub */ 448 COSTS_N_INSNS (11), /* fcmp */ 449 COSTS_N_INSNS (11), /* fmov, fmovr */ 450 COSTS_N_INSNS (11), /* fmul */ 451 COSTS_N_INSNS (24), /* fdivs */ 452 COSTS_N_INSNS (37), /* fdivd */ 453 COSTS_N_INSNS (24), /* fsqrts */ 454 COSTS_N_INSNS (37), /* fsqrtd */ 455 COSTS_N_INSNS (12), /* imul */ 456 COSTS_N_INSNS (12), /* imulX */ 457 0, /* imul bit factor */ 458 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ 459 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 460 COSTS_N_INSNS (1), /* movcc/movr */ 461 0, /* shift penalty */ 462 }; 463 464 static const struct processor_costs *sparc_costs = &cypress_costs; 465 466 #ifdef HAVE_AS_RELAX_OPTION 467 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use 468 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. 469 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if 470 somebody does not branch between the sethi and jmp. */ 471 #define LEAF_SIBCALL_SLOT_RESERVED_P 1 472 #else 473 #define LEAF_SIBCALL_SLOT_RESERVED_P \ 474 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) 475 #endif 476 477 /* Vector to say how input registers are mapped to output registers. 478 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to 479 eliminate it. You must use -fomit-frame-pointer to get that. */ 480 char leaf_reg_remap[] = 481 { 0, 1, 2, 3, 4, 5, 6, 7, 482 -1, -1, -1, -1, -1, -1, 14, -1, 483 -1, -1, -1, -1, -1, -1, -1, -1, 484 8, 9, 10, 11, 12, 13, -1, 15, 485 486 32, 33, 34, 35, 36, 37, 38, 39, 487 40, 41, 42, 43, 44, 45, 46, 47, 488 48, 49, 50, 51, 52, 53, 54, 55, 489 56, 57, 58, 59, 60, 61, 62, 63, 490 64, 65, 66, 67, 68, 69, 70, 71, 491 72, 73, 74, 75, 76, 77, 78, 79, 492 80, 81, 82, 83, 84, 85, 86, 87, 493 88, 89, 90, 91, 92, 93, 94, 95, 494 96, 97, 98, 99, 100, 101, 102}; 495 496 /* Vector, indexed by hard register number, which contains 1 497 for a register that is allowable in a candidate for leaf 498 function treatment. */ 499 char sparc_leaf_regs[] = 500 { 1, 1, 1, 1, 1, 1, 1, 1, 501 0, 0, 0, 0, 0, 0, 1, 0, 502 0, 0, 0, 0, 0, 0, 0, 0, 503 1, 1, 1, 1, 1, 1, 0, 1, 504 1, 1, 1, 1, 1, 1, 1, 1, 505 1, 1, 1, 1, 1, 1, 1, 1, 506 1, 1, 1, 1, 1, 1, 1, 1, 507 1, 1, 1, 1, 1, 1, 1, 1, 508 1, 1, 1, 1, 1, 1, 1, 1, 509 1, 1, 1, 1, 1, 1, 1, 1, 510 1, 1, 1, 1, 1, 1, 1, 1, 511 1, 1, 1, 1, 1, 1, 1, 1, 512 1, 1, 1, 1, 1, 1, 1}; 513 514 struct GTY(()) machine_function 515 { 516 /* Size of the frame of the function. */ 517 HOST_WIDE_INT frame_size; 518 519 /* Size of the frame of the function minus the register window save area 520 and the outgoing argument area. */ 521 HOST_WIDE_INT apparent_frame_size; 522 523 /* Register we pretend the frame pointer is allocated to. Normally, this 524 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We 525 record "offset" separately as it may be too big for (reg + disp). */ 526 rtx frame_base_reg; 527 HOST_WIDE_INT frame_base_offset; 528 529 /* Number of global or FP registers to be saved (as 4-byte quantities). */ 530 int n_global_fp_regs; 531 532 /* True if the current function is leaf and uses only leaf regs, 533 so that the SPARC leaf function optimization can be applied. 534 Private version of crtl->uses_only_leaf_regs, see 535 sparc_expand_prologue for the rationale. */ 536 int leaf_function_p; 537 538 /* True if the prologue saves local or in registers. */ 539 bool save_local_in_regs_p; 540 541 /* True if the data calculated by sparc_expand_prologue are valid. */ 542 bool prologue_data_valid_p; 543 }; 544 545 #define sparc_frame_size cfun->machine->frame_size 546 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size 547 #define sparc_frame_base_reg cfun->machine->frame_base_reg 548 #define sparc_frame_base_offset cfun->machine->frame_base_offset 549 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs 550 #define sparc_leaf_function_p cfun->machine->leaf_function_p 551 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p 552 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p 553 554 /* 1 if the next opcode is to be specially indented. */ 555 int sparc_indent_opcode = 0; 556 557 static void sparc_option_override (void); 558 static void sparc_init_modes (void); 559 static void scan_record_type (const_tree, int *, int *, int *); 560 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, 561 const_tree, bool, bool, int *, int *); 562 563 static int supersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); 564 static int hypersparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); 565 566 static void sparc_emit_set_const32 (rtx, rtx); 567 static void sparc_emit_set_const64 (rtx, rtx); 568 static void sparc_output_addr_vec (rtx); 569 static void sparc_output_addr_diff_vec (rtx); 570 static void sparc_output_deferred_case_vectors (void); 571 static bool sparc_legitimate_address_p (machine_mode, rtx, bool); 572 static bool sparc_legitimate_constant_p (machine_mode, rtx); 573 static rtx sparc_builtin_saveregs (void); 574 static int epilogue_renumber (rtx *, int); 575 static bool sparc_assemble_integer (rtx, unsigned int, int); 576 static int set_extends (rtx_insn *); 577 static void sparc_asm_function_prologue (FILE *, HOST_WIDE_INT); 578 static void sparc_asm_function_epilogue (FILE *, HOST_WIDE_INT); 579 #ifdef TARGET_SOLARIS 580 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, 581 tree) ATTRIBUTE_UNUSED; 582 #endif 583 static int sparc_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); 584 static int sparc_issue_rate (void); 585 static void sparc_sched_init (FILE *, int, int); 586 static int sparc_use_sched_lookahead (void); 587 588 static void emit_soft_tfmode_libcall (const char *, int, rtx *); 589 static void emit_soft_tfmode_binop (enum rtx_code, rtx *); 590 static void emit_soft_tfmode_unop (enum rtx_code, rtx *); 591 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); 592 static void emit_hard_tfmode_operation (enum rtx_code, rtx *); 593 594 static bool sparc_function_ok_for_sibcall (tree, tree); 595 static void sparc_init_libfuncs (void); 596 static void sparc_init_builtins (void); 597 static void sparc_fpu_init_builtins (void); 598 static void sparc_vis_init_builtins (void); 599 static tree sparc_builtin_decl (unsigned, bool); 600 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int); 601 static tree sparc_fold_builtin (tree, int, tree *, bool); 602 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 603 HOST_WIDE_INT, tree); 604 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, 605 HOST_WIDE_INT, const_tree); 606 static struct machine_function * sparc_init_machine_status (void); 607 static bool sparc_cannot_force_const_mem (machine_mode, rtx); 608 static rtx sparc_tls_get_addr (void); 609 static rtx sparc_tls_got (void); 610 static int sparc_register_move_cost (machine_mode, 611 reg_class_t, reg_class_t); 612 static bool sparc_rtx_costs (rtx, int, int, int, int *, bool); 613 static rtx sparc_function_value (const_tree, const_tree, bool); 614 static rtx sparc_libcall_value (machine_mode, const_rtx); 615 static bool sparc_function_value_regno_p (const unsigned int); 616 static rtx sparc_struct_value_rtx (tree, int); 617 static machine_mode sparc_promote_function_mode (const_tree, machine_mode, 618 int *, const_tree, int); 619 static bool sparc_return_in_memory (const_tree, const_tree); 620 static bool sparc_strict_argument_naming (cumulative_args_t); 621 static void sparc_va_start (tree, rtx); 622 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 623 static bool sparc_vector_mode_supported_p (machine_mode); 624 static bool sparc_tls_referenced_p (rtx); 625 static rtx sparc_legitimize_tls_address (rtx); 626 static rtx sparc_legitimize_pic_address (rtx, rtx); 627 static rtx sparc_legitimize_address (rtx, rtx, machine_mode); 628 static rtx sparc_delegitimize_address (rtx); 629 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); 630 static bool sparc_pass_by_reference (cumulative_args_t, 631 machine_mode, const_tree, bool); 632 static void sparc_function_arg_advance (cumulative_args_t, 633 machine_mode, const_tree, bool); 634 static rtx sparc_function_arg_1 (cumulative_args_t, 635 machine_mode, const_tree, bool, bool); 636 static rtx sparc_function_arg (cumulative_args_t, 637 machine_mode, const_tree, bool); 638 static rtx sparc_function_incoming_arg (cumulative_args_t, 639 machine_mode, const_tree, bool); 640 static unsigned int sparc_function_arg_boundary (machine_mode, 641 const_tree); 642 static int sparc_arg_partial_bytes (cumulative_args_t, 643 machine_mode, tree, bool); 644 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 645 static void sparc_file_end (void); 646 static bool sparc_frame_pointer_required (void); 647 static bool sparc_can_eliminate (const int, const int); 648 static rtx sparc_builtin_setjmp_frame_value (void); 649 static void sparc_conditional_register_usage (void); 650 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 651 static const char *sparc_mangle_type (const_tree); 652 #endif 653 static void sparc_trampoline_init (rtx, tree, rtx); 654 static machine_mode sparc_preferred_simd_mode (machine_mode); 655 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); 656 static bool sparc_print_operand_punct_valid_p (unsigned char); 657 static void sparc_print_operand (FILE *, rtx, int); 658 static void sparc_print_operand_address (FILE *, rtx); 659 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, 660 machine_mode, 661 secondary_reload_info *); 662 static machine_mode sparc_cstore_mode (enum insn_code icode); 663 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *); 664 665 #ifdef SUBTARGET_ATTRIBUTE_TABLE 666 /* Table of valid machine attributes. */ 667 static const struct attribute_spec sparc_attribute_table[] = 668 { 669 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, 670 do_diagnostic } */ 671 SUBTARGET_ATTRIBUTE_TABLE, 672 { NULL, 0, 0, false, false, false, NULL, false } 673 }; 674 #endif 675 676 /* Option handling. */ 677 678 /* Parsed value. */ 679 enum cmodel sparc_cmodel; 680 681 char sparc_hard_reg_printed[8]; 682 683 /* Initialize the GCC target structure. */ 684 685 /* The default is to use .half rather than .short for aligned HI objects. */ 686 #undef TARGET_ASM_ALIGNED_HI_OP 687 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 688 689 #undef TARGET_ASM_UNALIGNED_HI_OP 690 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" 691 #undef TARGET_ASM_UNALIGNED_SI_OP 692 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" 693 #undef TARGET_ASM_UNALIGNED_DI_OP 694 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" 695 696 /* The target hook has to handle DI-mode values. */ 697 #undef TARGET_ASM_INTEGER 698 #define TARGET_ASM_INTEGER sparc_assemble_integer 699 700 #undef TARGET_ASM_FUNCTION_PROLOGUE 701 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue 702 #undef TARGET_ASM_FUNCTION_EPILOGUE 703 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue 704 705 #undef TARGET_SCHED_ADJUST_COST 706 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost 707 #undef TARGET_SCHED_ISSUE_RATE 708 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate 709 #undef TARGET_SCHED_INIT 710 #define TARGET_SCHED_INIT sparc_sched_init 711 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 712 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead 713 714 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 715 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall 716 717 #undef TARGET_INIT_LIBFUNCS 718 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs 719 720 #undef TARGET_LEGITIMIZE_ADDRESS 721 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address 722 #undef TARGET_DELEGITIMIZE_ADDRESS 723 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address 724 #undef TARGET_MODE_DEPENDENT_ADDRESS_P 725 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p 726 727 #undef TARGET_INIT_BUILTINS 728 #define TARGET_INIT_BUILTINS sparc_init_builtins 729 #undef TARGET_BUILTIN_DECL 730 #define TARGET_BUILTIN_DECL sparc_builtin_decl 731 #undef TARGET_EXPAND_BUILTIN 732 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin 733 #undef TARGET_FOLD_BUILTIN 734 #define TARGET_FOLD_BUILTIN sparc_fold_builtin 735 736 #if TARGET_TLS 737 #undef TARGET_HAVE_TLS 738 #define TARGET_HAVE_TLS true 739 #endif 740 741 #undef TARGET_CANNOT_FORCE_CONST_MEM 742 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem 743 744 #undef TARGET_ASM_OUTPUT_MI_THUNK 745 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk 746 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 747 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk 748 749 #undef TARGET_RTX_COSTS 750 #define TARGET_RTX_COSTS sparc_rtx_costs 751 #undef TARGET_ADDRESS_COST 752 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 753 #undef TARGET_REGISTER_MOVE_COST 754 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost 755 756 #undef TARGET_PROMOTE_FUNCTION_MODE 757 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode 758 759 #undef TARGET_FUNCTION_VALUE 760 #define TARGET_FUNCTION_VALUE sparc_function_value 761 #undef TARGET_LIBCALL_VALUE 762 #define TARGET_LIBCALL_VALUE sparc_libcall_value 763 #undef TARGET_FUNCTION_VALUE_REGNO_P 764 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p 765 766 #undef TARGET_STRUCT_VALUE_RTX 767 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx 768 #undef TARGET_RETURN_IN_MEMORY 769 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory 770 #undef TARGET_MUST_PASS_IN_STACK 771 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 772 #undef TARGET_PASS_BY_REFERENCE 773 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference 774 #undef TARGET_ARG_PARTIAL_BYTES 775 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes 776 #undef TARGET_FUNCTION_ARG_ADVANCE 777 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance 778 #undef TARGET_FUNCTION_ARG 779 #define TARGET_FUNCTION_ARG sparc_function_arg 780 #undef TARGET_FUNCTION_INCOMING_ARG 781 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg 782 #undef TARGET_FUNCTION_ARG_BOUNDARY 783 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary 784 785 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 786 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs 787 #undef TARGET_STRICT_ARGUMENT_NAMING 788 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming 789 790 #undef TARGET_EXPAND_BUILTIN_VA_START 791 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start 792 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 793 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg 794 795 #undef TARGET_VECTOR_MODE_SUPPORTED_P 796 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p 797 798 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 799 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode 800 801 #ifdef SUBTARGET_INSERT_ATTRIBUTES 802 #undef TARGET_INSERT_ATTRIBUTES 803 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 804 #endif 805 806 #ifdef SUBTARGET_ATTRIBUTE_TABLE 807 #undef TARGET_ATTRIBUTE_TABLE 808 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table 809 #endif 810 811 #undef TARGET_RELAXED_ORDERING 812 #define TARGET_RELAXED_ORDERING SPARC_RELAXED_ORDERING 813 814 #undef TARGET_OPTION_OVERRIDE 815 #define TARGET_OPTION_OVERRIDE sparc_option_override 816 817 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) 818 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 819 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel 820 #endif 821 822 #undef TARGET_ASM_FILE_END 823 #define TARGET_ASM_FILE_END sparc_file_end 824 825 #undef TARGET_FRAME_POINTER_REQUIRED 826 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required 827 828 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 829 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value 830 831 #undef TARGET_CAN_ELIMINATE 832 #define TARGET_CAN_ELIMINATE sparc_can_eliminate 833 834 #undef TARGET_PREFERRED_RELOAD_CLASS 835 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class 836 837 #undef TARGET_SECONDARY_RELOAD 838 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload 839 840 #undef TARGET_CONDITIONAL_REGISTER_USAGE 841 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage 842 843 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 844 #undef TARGET_MANGLE_TYPE 845 #define TARGET_MANGLE_TYPE sparc_mangle_type 846 #endif 847 848 #undef TARGET_LEGITIMATE_ADDRESS_P 849 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p 850 851 #undef TARGET_LEGITIMATE_CONSTANT_P 852 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p 853 854 #undef TARGET_TRAMPOLINE_INIT 855 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init 856 857 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 858 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p 859 #undef TARGET_PRINT_OPERAND 860 #define TARGET_PRINT_OPERAND sparc_print_operand 861 #undef TARGET_PRINT_OPERAND_ADDRESS 862 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address 863 864 /* The value stored by LDSTUB. */ 865 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 866 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff 867 868 #undef TARGET_CSTORE_MODE 869 #define TARGET_CSTORE_MODE sparc_cstore_mode 870 871 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 872 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv 873 874 struct gcc_target targetm = TARGET_INITIALIZER; 875 876 /* Return the memory reference contained in X if any, zero otherwise. */ 877 878 static rtx 879 mem_ref (rtx x) 880 { 881 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) 882 x = XEXP (x, 0); 883 884 if (MEM_P (x)) 885 return x; 886 887 return NULL_RTX; 888 } 889 890 /* We use a machine specific pass to enable workarounds for errata. 891 We need to have the (essentially) final form of the insn stream in order 892 to properly detect the various hazards. Therefore, this machine specific 893 pass runs as late as possible. The pass is inserted in the pass pipeline 894 at the end of sparc_option_override. */ 895 896 static unsigned int 897 sparc_do_work_around_errata (void) 898 { 899 rtx_insn *insn, *next; 900 901 /* Force all instructions to be split into their final form. */ 902 split_all_insns_noflow (); 903 904 /* Now look for specific patterns in the insn stream. */ 905 for (insn = get_insns (); insn; insn = next) 906 { 907 bool insert_nop = false; 908 rtx set; 909 910 /* Look into the instruction in a delay slot. */ 911 if (NONJUMP_INSN_P (insn)) 912 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn))) 913 insn = seq->insn (1); 914 915 /* Look for a single-word load into an odd-numbered FP register. */ 916 if (sparc_fix_at697f 917 && NONJUMP_INSN_P (insn) 918 && (set = single_set (insn)) != NULL_RTX 919 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 920 && MEM_P (SET_SRC (set)) 921 && REG_P (SET_DEST (set)) 922 && REGNO (SET_DEST (set)) > 31 923 && REGNO (SET_DEST (set)) % 2 != 0) 924 { 925 /* The wrong dependency is on the enclosing double register. */ 926 const unsigned int x = REGNO (SET_DEST (set)) - 1; 927 unsigned int src1, src2, dest; 928 int code; 929 930 next = next_active_insn (insn); 931 if (!next) 932 break; 933 /* If the insn is a branch, then it cannot be problematic. */ 934 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 935 continue; 936 937 extract_insn (next); 938 code = INSN_CODE (next); 939 940 switch (code) 941 { 942 case CODE_FOR_adddf3: 943 case CODE_FOR_subdf3: 944 case CODE_FOR_muldf3: 945 case CODE_FOR_divdf3: 946 dest = REGNO (recog_data.operand[0]); 947 src1 = REGNO (recog_data.operand[1]); 948 src2 = REGNO (recog_data.operand[2]); 949 if (src1 != src2) 950 { 951 /* Case [1-4]: 952 ld [address], %fx+1 953 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ 954 if ((src1 == x || src2 == x) 955 && (dest == src1 || dest == src2)) 956 insert_nop = true; 957 } 958 else 959 { 960 /* Case 5: 961 ld [address], %fx+1 962 FPOPd %fx, %fx, %fx */ 963 if (src1 == x 964 && dest == src1 965 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) 966 insert_nop = true; 967 } 968 break; 969 970 case CODE_FOR_sqrtdf2: 971 dest = REGNO (recog_data.operand[0]); 972 src1 = REGNO (recog_data.operand[1]); 973 /* Case 6: 974 ld [address], %fx+1 975 fsqrtd %fx, %fx */ 976 if (src1 == x && dest == src1) 977 insert_nop = true; 978 break; 979 980 default: 981 break; 982 } 983 } 984 985 /* Look for a single-word load into an integer register. */ 986 else if (sparc_fix_ut699 987 && NONJUMP_INSN_P (insn) 988 && (set = single_set (insn)) != NULL_RTX 989 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 990 && mem_ref (SET_SRC (set)) != NULL_RTX 991 && REG_P (SET_DEST (set)) 992 && REGNO (SET_DEST (set)) < 32) 993 { 994 /* There is no problem if the second memory access has a data 995 dependency on the first single-cycle load. */ 996 rtx x = SET_DEST (set); 997 998 next = next_active_insn (insn); 999 if (!next) 1000 break; 1001 /* If the insn is a branch, then it cannot be problematic. */ 1002 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1003 continue; 1004 1005 /* Look for a second memory access to/from an integer register. */ 1006 if ((set = single_set (next)) != NULL_RTX) 1007 { 1008 rtx src = SET_SRC (set); 1009 rtx dest = SET_DEST (set); 1010 rtx mem; 1011 1012 /* LDD is affected. */ 1013 if ((mem = mem_ref (src)) != NULL_RTX 1014 && REG_P (dest) 1015 && REGNO (dest) < 32 1016 && !reg_mentioned_p (x, XEXP (mem, 0))) 1017 insert_nop = true; 1018 1019 /* STD is *not* affected. */ 1020 else if (MEM_P (dest) 1021 && GET_MODE_SIZE (GET_MODE (dest)) <= 4 1022 && (src == CONST0_RTX (GET_MODE (dest)) 1023 || (REG_P (src) 1024 && REGNO (src) < 32 1025 && REGNO (src) != REGNO (x))) 1026 && !reg_mentioned_p (x, XEXP (dest, 0))) 1027 insert_nop = true; 1028 } 1029 } 1030 1031 /* Look for a single-word load/operation into an FP register. */ 1032 else if (sparc_fix_ut699 1033 && NONJUMP_INSN_P (insn) 1034 && (set = single_set (insn)) != NULL_RTX 1035 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1036 && REG_P (SET_DEST (set)) 1037 && REGNO (SET_DEST (set)) > 31) 1038 { 1039 /* Number of instructions in the problematic window. */ 1040 const int n_insns = 4; 1041 /* The problematic combination is with the sibling FP register. */ 1042 const unsigned int x = REGNO (SET_DEST (set)); 1043 const unsigned int y = x ^ 1; 1044 rtx_insn *after; 1045 int i; 1046 1047 next = next_active_insn (insn); 1048 if (!next) 1049 break; 1050 /* If the insn is a branch, then it cannot be problematic. */ 1051 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1052 continue; 1053 1054 /* Look for a second load/operation into the sibling FP register. */ 1055 if (!((set = single_set (next)) != NULL_RTX 1056 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1057 && REG_P (SET_DEST (set)) 1058 && REGNO (SET_DEST (set)) == y)) 1059 continue; 1060 1061 /* Look for a (possible) store from the FP register in the next N 1062 instructions, but bail out if it is again modified or if there 1063 is a store from the sibling FP register before this store. */ 1064 for (after = next, i = 0; i < n_insns; i++) 1065 { 1066 bool branch_p; 1067 1068 after = next_active_insn (after); 1069 if (!after) 1070 break; 1071 1072 /* This is a branch with an empty delay slot. */ 1073 if (!NONJUMP_INSN_P (after)) 1074 { 1075 if (++i == n_insns) 1076 break; 1077 branch_p = true; 1078 after = NULL; 1079 } 1080 /* This is a branch with a filled delay slot. */ 1081 else if (rtx_sequence *seq = 1082 dyn_cast <rtx_sequence *> (PATTERN (after))) 1083 { 1084 if (++i == n_insns) 1085 break; 1086 branch_p = true; 1087 after = seq->insn (1); 1088 } 1089 /* This is a regular instruction. */ 1090 else 1091 branch_p = false; 1092 1093 if (after && (set = single_set (after)) != NULL_RTX) 1094 { 1095 const rtx src = SET_SRC (set); 1096 const rtx dest = SET_DEST (set); 1097 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); 1098 1099 /* If the FP register is again modified before the store, 1100 then the store isn't affected. */ 1101 if (REG_P (dest) 1102 && (REGNO (dest) == x 1103 || (REGNO (dest) == y && size == 8))) 1104 break; 1105 1106 if (MEM_P (dest) && REG_P (src)) 1107 { 1108 /* If there is a store from the sibling FP register 1109 before the store, then the store is not affected. */ 1110 if (REGNO (src) == y || (REGNO (src) == x && size == 8)) 1111 break; 1112 1113 /* Otherwise, the store is affected. */ 1114 if (REGNO (src) == x && size == 4) 1115 { 1116 insert_nop = true; 1117 break; 1118 } 1119 } 1120 } 1121 1122 /* If we have a branch in the first M instructions, then we 1123 cannot see the (M+2)th instruction so we play safe. */ 1124 if (branch_p && i <= (n_insns - 2)) 1125 { 1126 insert_nop = true; 1127 break; 1128 } 1129 } 1130 } 1131 1132 else 1133 next = NEXT_INSN (insn); 1134 1135 if (insert_nop) 1136 emit_insn_before (gen_nop (), next); 1137 } 1138 1139 return 0; 1140 } 1141 1142 namespace { 1143 1144 const pass_data pass_data_work_around_errata = 1145 { 1146 RTL_PASS, /* type */ 1147 "errata", /* name */ 1148 OPTGROUP_NONE, /* optinfo_flags */ 1149 TV_MACH_DEP, /* tv_id */ 1150 0, /* properties_required */ 1151 0, /* properties_provided */ 1152 0, /* properties_destroyed */ 1153 0, /* todo_flags_start */ 1154 0, /* todo_flags_finish */ 1155 }; 1156 1157 class pass_work_around_errata : public rtl_opt_pass 1158 { 1159 public: 1160 pass_work_around_errata(gcc::context *ctxt) 1161 : rtl_opt_pass(pass_data_work_around_errata, ctxt) 1162 {} 1163 1164 /* opt_pass methods: */ 1165 virtual bool gate (function *) 1166 { 1167 /* The only errata we handle are those of the AT697F and UT699. */ 1168 return sparc_fix_at697f != 0 || sparc_fix_ut699 != 0; 1169 } 1170 1171 virtual unsigned int execute (function *) 1172 { 1173 return sparc_do_work_around_errata (); 1174 } 1175 1176 }; // class pass_work_around_errata 1177 1178 } // anon namespace 1179 1180 rtl_opt_pass * 1181 make_pass_work_around_errata (gcc::context *ctxt) 1182 { 1183 return new pass_work_around_errata (ctxt); 1184 } 1185 1186 /* Helpers for TARGET_DEBUG_OPTIONS. */ 1187 static void 1188 dump_target_flag_bits (const int flags) 1189 { 1190 if (flags & MASK_64BIT) 1191 fprintf (stderr, "64BIT "); 1192 if (flags & MASK_APP_REGS) 1193 fprintf (stderr, "APP_REGS "); 1194 if (flags & MASK_FASTER_STRUCTS) 1195 fprintf (stderr, "FASTER_STRUCTS "); 1196 if (flags & MASK_FLAT) 1197 fprintf (stderr, "FLAT "); 1198 if (flags & MASK_FMAF) 1199 fprintf (stderr, "FMAF "); 1200 if (flags & MASK_FPU) 1201 fprintf (stderr, "FPU "); 1202 if (flags & MASK_HARD_QUAD) 1203 fprintf (stderr, "HARD_QUAD "); 1204 if (flags & MASK_POPC) 1205 fprintf (stderr, "POPC "); 1206 if (flags & MASK_PTR64) 1207 fprintf (stderr, "PTR64 "); 1208 if (flags & MASK_STACK_BIAS) 1209 fprintf (stderr, "STACK_BIAS "); 1210 if (flags & MASK_UNALIGNED_DOUBLES) 1211 fprintf (stderr, "UNALIGNED_DOUBLES "); 1212 if (flags & MASK_V8PLUS) 1213 fprintf (stderr, "V8PLUS "); 1214 if (flags & MASK_VIS) 1215 fprintf (stderr, "VIS "); 1216 if (flags & MASK_VIS2) 1217 fprintf (stderr, "VIS2 "); 1218 if (flags & MASK_VIS3) 1219 fprintf (stderr, "VIS3 "); 1220 if (flags & MASK_CBCOND) 1221 fprintf (stderr, "CBCOND "); 1222 if (flags & MASK_DEPRECATED_V8_INSNS) 1223 fprintf (stderr, "DEPRECATED_V8_INSNS "); 1224 if (flags & MASK_SPARCLET) 1225 fprintf (stderr, "SPARCLET "); 1226 if (flags & MASK_SPARCLITE) 1227 fprintf (stderr, "SPARCLITE "); 1228 if (flags & MASK_V8) 1229 fprintf (stderr, "V8 "); 1230 if (flags & MASK_V9) 1231 fprintf (stderr, "V9 "); 1232 } 1233 1234 static void 1235 dump_target_flags (const char *prefix, const int flags) 1236 { 1237 fprintf (stderr, "%s: (%08x) [ ", prefix, flags); 1238 dump_target_flag_bits (flags); 1239 fprintf(stderr, "]\n"); 1240 } 1241 1242 /* Validate and override various options, and do some machine dependent 1243 initialization. */ 1244 1245 static void 1246 sparc_option_override (void) 1247 { 1248 static struct code_model { 1249 const char *const name; 1250 const enum cmodel value; 1251 } const cmodels[] = { 1252 { "32", CM_32 }, 1253 { "medlow", CM_MEDLOW }, 1254 { "medmid", CM_MEDMID }, 1255 { "medany", CM_MEDANY }, 1256 { "embmedany", CM_EMBMEDANY }, 1257 { NULL, (enum cmodel) 0 } 1258 }; 1259 const struct code_model *cmodel; 1260 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ 1261 static struct cpu_default { 1262 const int cpu; 1263 const enum processor_type processor; 1264 } const cpu_default[] = { 1265 /* There must be one entry here for each TARGET_CPU value. */ 1266 { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, 1267 { TARGET_CPU_v8, PROCESSOR_V8 }, 1268 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, 1269 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, 1270 { TARGET_CPU_leon, PROCESSOR_LEON }, 1271 { TARGET_CPU_leon3, PROCESSOR_LEON3 }, 1272 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, 1273 { TARGET_CPU_sparclite, PROCESSOR_F930 }, 1274 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, 1275 { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, 1276 { TARGET_CPU_v9, PROCESSOR_V9 }, 1277 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, 1278 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, 1279 { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, 1280 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, 1281 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, 1282 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, 1283 { -1, PROCESSOR_V7 } 1284 }; 1285 const struct cpu_default *def; 1286 /* Table of values for -m{cpu,tune}=. This must match the order of 1287 the enum processor_type in sparc-opts.h. */ 1288 static struct cpu_table { 1289 const char *const name; 1290 const int disable; 1291 const int enable; 1292 } const cpu_table[] = { 1293 { "v7", MASK_ISA, 0 }, 1294 { "cypress", MASK_ISA, 0 }, 1295 { "v8", MASK_ISA, MASK_V8 }, 1296 /* TI TMS390Z55 supersparc */ 1297 { "supersparc", MASK_ISA, MASK_V8 }, 1298 { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU }, 1299 { "leon", MASK_ISA, MASK_V8|MASK_LEON|MASK_FPU }, 1300 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3|MASK_FPU }, 1301 { "leon3v7", MASK_ISA, MASK_LEON3|MASK_FPU }, 1302 { "sparclite", MASK_ISA, MASK_SPARCLITE }, 1303 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ 1304 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1305 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ 1306 { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU }, 1307 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1308 { "sparclet", MASK_ISA, MASK_SPARCLET }, 1309 /* TEMIC sparclet */ 1310 { "tsc701", MASK_ISA, MASK_SPARCLET }, 1311 { "v9", MASK_ISA, MASK_V9 }, 1312 /* UltraSPARC I, II, IIi */ 1313 { "ultrasparc", MASK_ISA, 1314 /* Although insns using %y are deprecated, it is a clear win. */ 1315 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1316 /* UltraSPARC III */ 1317 /* ??? Check if %y issue still holds true. */ 1318 { "ultrasparc3", MASK_ISA, 1319 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, 1320 /* UltraSPARC T1 */ 1321 { "niagara", MASK_ISA, 1322 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1323 /* UltraSPARC T2 */ 1324 { "niagara2", MASK_ISA, 1325 MASK_V9|MASK_POPC|MASK_VIS2 }, 1326 /* UltraSPARC T3 */ 1327 { "niagara3", MASK_ISA, 1328 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF }, 1329 /* UltraSPARC T4 */ 1330 { "niagara4", MASK_ISA, 1331 MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, 1332 }; 1333 const struct cpu_table *cpu; 1334 unsigned int i; 1335 int fpu; 1336 1337 if (sparc_debug_string != NULL) 1338 { 1339 const char *q; 1340 char *p; 1341 1342 p = ASTRDUP (sparc_debug_string); 1343 while ((q = strtok (p, ",")) != NULL) 1344 { 1345 bool invert; 1346 int mask; 1347 1348 p = NULL; 1349 if (*q == '!') 1350 { 1351 invert = true; 1352 q++; 1353 } 1354 else 1355 invert = false; 1356 1357 if (! strcmp (q, "all")) 1358 mask = MASK_DEBUG_ALL; 1359 else if (! strcmp (q, "options")) 1360 mask = MASK_DEBUG_OPTIONS; 1361 else 1362 error ("unknown -mdebug-%s switch", q); 1363 1364 if (invert) 1365 sparc_debug &= ~mask; 1366 else 1367 sparc_debug |= mask; 1368 } 1369 } 1370 1371 if (TARGET_DEBUG_OPTIONS) 1372 { 1373 dump_target_flags("Initial target_flags", target_flags); 1374 dump_target_flags("target_flags_explicit", target_flags_explicit); 1375 } 1376 1377 #ifdef SUBTARGET_OVERRIDE_OPTIONS 1378 SUBTARGET_OVERRIDE_OPTIONS; 1379 #endif 1380 1381 #ifndef SPARC_BI_ARCH 1382 /* Check for unsupported architecture size. */ 1383 if (! TARGET_64BIT != DEFAULT_ARCH32_P) 1384 error ("%s is not supported by this configuration", 1385 DEFAULT_ARCH32_P ? "-m64" : "-m32"); 1386 #endif 1387 1388 /* We force all 64bit archs to use 128 bit long double */ 1389 if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128) 1390 { 1391 error ("-mlong-double-64 not allowed with -m64"); 1392 target_flags |= MASK_LONG_DOUBLE_128; 1393 } 1394 1395 /* Code model selection. */ 1396 sparc_cmodel = SPARC_DEFAULT_CMODEL; 1397 1398 #ifdef SPARC_BI_ARCH 1399 if (TARGET_ARCH32) 1400 sparc_cmodel = CM_32; 1401 #endif 1402 1403 if (sparc_cmodel_string != NULL) 1404 { 1405 if (TARGET_ARCH64) 1406 { 1407 for (cmodel = &cmodels[0]; cmodel->name; cmodel++) 1408 if (strcmp (sparc_cmodel_string, cmodel->name) == 0) 1409 break; 1410 if (cmodel->name == NULL) 1411 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string); 1412 else 1413 sparc_cmodel = cmodel->value; 1414 } 1415 else 1416 error ("-mcmodel= is not supported on 32 bit systems"); 1417 } 1418 1419 /* Check that -fcall-saved-REG wasn't specified for out registers. */ 1420 for (i = 8; i < 16; i++) 1421 if (!call_used_regs [i]) 1422 { 1423 error ("-fcall-saved-REG is not supported for out registers"); 1424 call_used_regs [i] = 1; 1425 } 1426 1427 fpu = target_flags & MASK_FPU; /* save current -mfpu status */ 1428 1429 /* Set the default CPU. */ 1430 if (!global_options_set.x_sparc_cpu_and_features) 1431 { 1432 for (def = &cpu_default[0]; def->cpu != -1; ++def) 1433 if (def->cpu == TARGET_CPU_DEFAULT) 1434 break; 1435 gcc_assert (def->cpu != -1); 1436 sparc_cpu_and_features = def->processor; 1437 } 1438 1439 if (!global_options_set.x_sparc_cpu) 1440 sparc_cpu = sparc_cpu_and_features; 1441 1442 cpu = &cpu_table[(int) sparc_cpu_and_features]; 1443 1444 if (TARGET_DEBUG_OPTIONS) 1445 { 1446 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); 1447 fprintf (stderr, "sparc_cpu: %s\n", 1448 cpu_table[(int) sparc_cpu].name); 1449 dump_target_flags ("cpu->disable", cpu->disable); 1450 dump_target_flags ("cpu->enable", cpu->enable); 1451 } 1452 1453 target_flags &= ~cpu->disable; 1454 target_flags |= (cpu->enable 1455 #ifndef HAVE_AS_FMAF_HPC_VIS3 1456 & ~(MASK_FMAF | MASK_VIS3) 1457 #endif 1458 #ifndef HAVE_AS_SPARC4 1459 & ~MASK_CBCOND 1460 #endif 1461 #ifndef HAVE_AS_LEON 1462 & ~(MASK_LEON | MASK_LEON3) 1463 #endif 1464 ); 1465 1466 /* If -mfpu or -mno-fpu was explicitly used, don't override with 1467 the processor default. */ 1468 if (target_flags_explicit & MASK_FPU) 1469 target_flags = (target_flags & ~MASK_FPU) | fpu; 1470 1471 /* -mvis2 implies -mvis */ 1472 if (TARGET_VIS2) 1473 target_flags |= MASK_VIS; 1474 1475 /* -mvis3 implies -mvis2 and -mvis */ 1476 if (TARGET_VIS3) 1477 target_flags |= MASK_VIS2 | MASK_VIS; 1478 1479 /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is 1480 disabled. */ 1481 if (! TARGET_FPU) 1482 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF); 1483 1484 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions 1485 are available. 1486 -m64 also implies v9. */ 1487 if (TARGET_VIS || TARGET_ARCH64) 1488 { 1489 target_flags |= MASK_V9; 1490 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); 1491 } 1492 1493 /* -mvis also implies -mv8plus on 32-bit */ 1494 if (TARGET_VIS && ! TARGET_ARCH64) 1495 target_flags |= MASK_V8PLUS; 1496 1497 /* Use the deprecated v8 insns for sparc64 in 32 bit mode. */ 1498 if (TARGET_V9 && TARGET_ARCH32) 1499 target_flags |= MASK_DEPRECATED_V8_INSNS; 1500 1501 /* V8PLUS requires V9, makes no sense in 64 bit mode. */ 1502 if (! TARGET_V9 || TARGET_ARCH64) 1503 target_flags &= ~MASK_V8PLUS; 1504 1505 /* Don't use stack biasing in 32 bit mode. */ 1506 if (TARGET_ARCH32) 1507 target_flags &= ~MASK_STACK_BIAS; 1508 1509 /* Supply a default value for align_functions. */ 1510 if (align_functions == 0 1511 && (sparc_cpu == PROCESSOR_ULTRASPARC 1512 || sparc_cpu == PROCESSOR_ULTRASPARC3 1513 || sparc_cpu == PROCESSOR_NIAGARA 1514 || sparc_cpu == PROCESSOR_NIAGARA2 1515 || sparc_cpu == PROCESSOR_NIAGARA3 1516 || sparc_cpu == PROCESSOR_NIAGARA4)) 1517 align_functions = 32; 1518 1519 /* Validate PCC_STRUCT_RETURN. */ 1520 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) 1521 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); 1522 1523 /* Only use .uaxword when compiling for a 64-bit target. */ 1524 if (!TARGET_ARCH64) 1525 targetm.asm_out.unaligned_op.di = NULL; 1526 1527 /* Do various machine dependent initializations. */ 1528 sparc_init_modes (); 1529 1530 /* Set up function hooks. */ 1531 init_machine_status = sparc_init_machine_status; 1532 1533 switch (sparc_cpu) 1534 { 1535 case PROCESSOR_V7: 1536 case PROCESSOR_CYPRESS: 1537 sparc_costs = &cypress_costs; 1538 break; 1539 case PROCESSOR_V8: 1540 case PROCESSOR_SPARCLITE: 1541 case PROCESSOR_SUPERSPARC: 1542 sparc_costs = &supersparc_costs; 1543 break; 1544 case PROCESSOR_F930: 1545 case PROCESSOR_F934: 1546 case PROCESSOR_HYPERSPARC: 1547 case PROCESSOR_SPARCLITE86X: 1548 sparc_costs = &hypersparc_costs; 1549 break; 1550 case PROCESSOR_LEON: 1551 sparc_costs = &leon_costs; 1552 break; 1553 case PROCESSOR_LEON3: 1554 case PROCESSOR_LEON3V7: 1555 sparc_costs = &leon3_costs; 1556 break; 1557 case PROCESSOR_SPARCLET: 1558 case PROCESSOR_TSC701: 1559 sparc_costs = &sparclet_costs; 1560 break; 1561 case PROCESSOR_V9: 1562 case PROCESSOR_ULTRASPARC: 1563 sparc_costs = &ultrasparc_costs; 1564 break; 1565 case PROCESSOR_ULTRASPARC3: 1566 sparc_costs = &ultrasparc3_costs; 1567 break; 1568 case PROCESSOR_NIAGARA: 1569 sparc_costs = &niagara_costs; 1570 break; 1571 case PROCESSOR_NIAGARA2: 1572 sparc_costs = &niagara2_costs; 1573 break; 1574 case PROCESSOR_NIAGARA3: 1575 sparc_costs = &niagara3_costs; 1576 break; 1577 case PROCESSOR_NIAGARA4: 1578 sparc_costs = &niagara4_costs; 1579 break; 1580 case PROCESSOR_NATIVE: 1581 gcc_unreachable (); 1582 }; 1583 1584 if (sparc_memory_model == SMM_DEFAULT) 1585 { 1586 /* Choose the memory model for the operating system. */ 1587 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; 1588 if (os_default != SMM_DEFAULT) 1589 sparc_memory_model = os_default; 1590 /* Choose the most relaxed model for the processor. */ 1591 else if (TARGET_V9) 1592 sparc_memory_model = SMM_RMO; 1593 else if (TARGET_LEON3) 1594 sparc_memory_model = SMM_TSO; 1595 else if (TARGET_LEON) 1596 sparc_memory_model = SMM_SC; 1597 else if (TARGET_V8) 1598 sparc_memory_model = SMM_PSO; 1599 else 1600 sparc_memory_model = SMM_SC; 1601 } 1602 1603 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 1604 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 1605 target_flags |= MASK_LONG_DOUBLE_128; 1606 #endif 1607 1608 if (TARGET_DEBUG_OPTIONS) 1609 dump_target_flags ("Final target_flags", target_flags); 1610 1611 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 1612 ((sparc_cpu == PROCESSOR_ULTRASPARC 1613 || sparc_cpu == PROCESSOR_NIAGARA 1614 || sparc_cpu == PROCESSOR_NIAGARA2 1615 || sparc_cpu == PROCESSOR_NIAGARA3 1616 || sparc_cpu == PROCESSOR_NIAGARA4) 1617 ? 2 1618 : (sparc_cpu == PROCESSOR_ULTRASPARC3 1619 ? 8 : 3)), 1620 global_options.x_param_values, 1621 global_options_set.x_param_values); 1622 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 1623 ((sparc_cpu == PROCESSOR_ULTRASPARC 1624 || sparc_cpu == PROCESSOR_ULTRASPARC3 1625 || sparc_cpu == PROCESSOR_NIAGARA 1626 || sparc_cpu == PROCESSOR_NIAGARA2 1627 || sparc_cpu == PROCESSOR_NIAGARA3 1628 || sparc_cpu == PROCESSOR_NIAGARA4) 1629 ? 64 : 32), 1630 global_options.x_param_values, 1631 global_options_set.x_param_values); 1632 1633 /* Disable save slot sharing for call-clobbered registers by default. 1634 The IRA sharing algorithm works on single registers only and this 1635 pessimizes for double floating-point registers. */ 1636 if (!global_options_set.x_flag_ira_share_save_slots) 1637 flag_ira_share_save_slots = 0; 1638 1639 /* We register a machine specific pass to work around errata, if any. 1640 The pass mut be scheduled as late as possible so that we have the 1641 (essentially) final form of the insn stream to work on. 1642 Registering the pass must be done at start up. It's convenient to 1643 do it here. */ 1644 opt_pass *errata_pass = make_pass_work_around_errata (g); 1645 struct register_pass_info insert_pass_work_around_errata = 1646 { 1647 errata_pass, /* pass */ 1648 "dbr", /* reference_pass_name */ 1649 1, /* ref_pass_instance_number */ 1650 PASS_POS_INSERT_AFTER /* po_op */ 1651 }; 1652 register_pass (&insert_pass_work_around_errata); 1653 } 1654 1655 /* Miscellaneous utilities. */ 1656 1657 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move 1658 or branch on register contents instructions. */ 1659 1660 int 1661 v9_regcmp_p (enum rtx_code code) 1662 { 1663 return (code == EQ || code == NE || code == GE || code == LT 1664 || code == LE || code == GT); 1665 } 1666 1667 /* Nonzero if OP is a floating point constant which can 1668 be loaded into an integer register using a single 1669 sethi instruction. */ 1670 1671 int 1672 fp_sethi_p (rtx op) 1673 { 1674 if (GET_CODE (op) == CONST_DOUBLE) 1675 { 1676 REAL_VALUE_TYPE r; 1677 long i; 1678 1679 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1680 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1681 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); 1682 } 1683 1684 return 0; 1685 } 1686 1687 /* Nonzero if OP is a floating point constant which can 1688 be loaded into an integer register using a single 1689 mov instruction. */ 1690 1691 int 1692 fp_mov_p (rtx op) 1693 { 1694 if (GET_CODE (op) == CONST_DOUBLE) 1695 { 1696 REAL_VALUE_TYPE r; 1697 long i; 1698 1699 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1700 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1701 return SPARC_SIMM13_P (i); 1702 } 1703 1704 return 0; 1705 } 1706 1707 /* Nonzero if OP is a floating point constant which can 1708 be loaded into an integer register using a high/losum 1709 instruction sequence. */ 1710 1711 int 1712 fp_high_losum_p (rtx op) 1713 { 1714 /* The constraints calling this should only be in 1715 SFmode move insns, so any constant which cannot 1716 be moved using a single insn will do. */ 1717 if (GET_CODE (op) == CONST_DOUBLE) 1718 { 1719 REAL_VALUE_TYPE r; 1720 long i; 1721 1722 REAL_VALUE_FROM_CONST_DOUBLE (r, op); 1723 REAL_VALUE_TO_TARGET_SINGLE (r, i); 1724 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); 1725 } 1726 1727 return 0; 1728 } 1729 1730 /* Return true if the address of LABEL can be loaded by means of the 1731 mov{si,di}_pic_label_ref patterns in PIC mode. */ 1732 1733 static bool 1734 can_use_mov_pic_label_ref (rtx label) 1735 { 1736 /* VxWorks does not impose a fixed gap between segments; the run-time 1737 gap can be different from the object-file gap. We therefore can't 1738 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we 1739 are absolutely sure that X is in the same segment as the GOT. 1740 Unfortunately, the flexibility of linker scripts means that we 1741 can't be sure of that in general, so assume that GOT-relative 1742 accesses are never valid on VxWorks. */ 1743 if (TARGET_VXWORKS_RTP) 1744 return false; 1745 1746 /* Similarly, if the label is non-local, it might end up being placed 1747 in a different section than the current one; now mov_pic_label_ref 1748 requires the label and the code to be in the same section. */ 1749 if (LABEL_REF_NONLOCAL_P (label)) 1750 return false; 1751 1752 /* Finally, if we are reordering basic blocks and partition into hot 1753 and cold sections, this might happen for any label. */ 1754 if (flag_reorder_blocks_and_partition) 1755 return false; 1756 1757 return true; 1758 } 1759 1760 /* Expand a move instruction. Return true if all work is done. */ 1761 1762 bool 1763 sparc_expand_move (machine_mode mode, rtx *operands) 1764 { 1765 /* Handle sets of MEM first. */ 1766 if (GET_CODE (operands[0]) == MEM) 1767 { 1768 /* 0 is a register (or a pair of registers) on SPARC. */ 1769 if (register_or_zero_operand (operands[1], mode)) 1770 return false; 1771 1772 if (!reload_in_progress) 1773 { 1774 operands[0] = validize_mem (operands[0]); 1775 operands[1] = force_reg (mode, operands[1]); 1776 } 1777 } 1778 1779 /* Fixup TLS cases. */ 1780 if (TARGET_HAVE_TLS 1781 && CONSTANT_P (operands[1]) 1782 && sparc_tls_referenced_p (operands [1])) 1783 { 1784 operands[1] = sparc_legitimize_tls_address (operands[1]); 1785 return false; 1786 } 1787 1788 /* Fixup PIC cases. */ 1789 if (flag_pic && CONSTANT_P (operands[1])) 1790 { 1791 if (pic_address_needs_scratch (operands[1])) 1792 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); 1793 1794 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ 1795 if (GET_CODE (operands[1]) == LABEL_REF 1796 && can_use_mov_pic_label_ref (operands[1])) 1797 { 1798 if (mode == SImode) 1799 { 1800 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); 1801 return true; 1802 } 1803 1804 if (mode == DImode) 1805 { 1806 gcc_assert (TARGET_ARCH64); 1807 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); 1808 return true; 1809 } 1810 } 1811 1812 if (symbolic_operand (operands[1], mode)) 1813 { 1814 operands[1] 1815 = sparc_legitimize_pic_address (operands[1], 1816 reload_in_progress 1817 ? operands[0] : NULL_RTX); 1818 return false; 1819 } 1820 } 1821 1822 /* If we are trying to toss an integer constant into FP registers, 1823 or loading a FP or vector constant, force it into memory. */ 1824 if (CONSTANT_P (operands[1]) 1825 && REG_P (operands[0]) 1826 && (SPARC_FP_REG_P (REGNO (operands[0])) 1827 || SCALAR_FLOAT_MODE_P (mode) 1828 || VECTOR_MODE_P (mode))) 1829 { 1830 /* emit_group_store will send such bogosity to us when it is 1831 not storing directly into memory. So fix this up to avoid 1832 crashes in output_constant_pool. */ 1833 if (operands [1] == const0_rtx) 1834 operands[1] = CONST0_RTX (mode); 1835 1836 /* We can clear or set to all-ones FP registers if TARGET_VIS, and 1837 always other regs. */ 1838 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) 1839 && (const_zero_operand (operands[1], mode) 1840 || const_all_ones_operand (operands[1], mode))) 1841 return false; 1842 1843 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG 1844 /* We are able to build any SF constant in integer registers 1845 with at most 2 instructions. */ 1846 && (mode == SFmode 1847 /* And any DF constant in integer registers. */ 1848 || (mode == DFmode 1849 && ! can_create_pseudo_p ()))) 1850 return false; 1851 1852 operands[1] = force_const_mem (mode, operands[1]); 1853 if (!reload_in_progress) 1854 operands[1] = validize_mem (operands[1]); 1855 return false; 1856 } 1857 1858 /* Accept non-constants and valid constants unmodified. */ 1859 if (!CONSTANT_P (operands[1]) 1860 || GET_CODE (operands[1]) == HIGH 1861 || input_operand (operands[1], mode)) 1862 return false; 1863 1864 switch (mode) 1865 { 1866 case QImode: 1867 /* All QImode constants require only one insn, so proceed. */ 1868 break; 1869 1870 case HImode: 1871 case SImode: 1872 sparc_emit_set_const32 (operands[0], operands[1]); 1873 return true; 1874 1875 case DImode: 1876 /* input_operand should have filtered out 32-bit mode. */ 1877 sparc_emit_set_const64 (operands[0], operands[1]); 1878 return true; 1879 1880 case TImode: 1881 { 1882 rtx high, low; 1883 /* TImode isn't available in 32-bit mode. */ 1884 split_double (operands[1], &high, &low); 1885 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), 1886 high)); 1887 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), 1888 low)); 1889 } 1890 return true; 1891 1892 default: 1893 gcc_unreachable (); 1894 } 1895 1896 return false; 1897 } 1898 1899 /* Load OP1, a 32-bit constant, into OP0, a register. 1900 We know it can't be done in one insn when we get 1901 here, the move expander guarantees this. */ 1902 1903 static void 1904 sparc_emit_set_const32 (rtx op0, rtx op1) 1905 { 1906 machine_mode mode = GET_MODE (op0); 1907 rtx temp = op0; 1908 1909 if (can_create_pseudo_p ()) 1910 temp = gen_reg_rtx (mode); 1911 1912 if (GET_CODE (op1) == CONST_INT) 1913 { 1914 gcc_assert (!small_int_operand (op1, mode) 1915 && !const_high_operand (op1, mode)); 1916 1917 /* Emit them as real moves instead of a HIGH/LO_SUM, 1918 this way CSE can see everything and reuse intermediate 1919 values if it wants. */ 1920 emit_insn (gen_rtx_SET (VOIDmode, temp, 1921 GEN_INT (INTVAL (op1) 1922 & ~(HOST_WIDE_INT)0x3ff))); 1923 1924 emit_insn (gen_rtx_SET (VOIDmode, 1925 op0, 1926 gen_rtx_IOR (mode, temp, 1927 GEN_INT (INTVAL (op1) & 0x3ff)))); 1928 } 1929 else 1930 { 1931 /* A symbol, emit in the traditional way. */ 1932 emit_insn (gen_rtx_SET (VOIDmode, temp, 1933 gen_rtx_HIGH (mode, op1))); 1934 emit_insn (gen_rtx_SET (VOIDmode, 1935 op0, gen_rtx_LO_SUM (mode, temp, op1))); 1936 } 1937 } 1938 1939 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. 1940 If TEMP is nonzero, we are forbidden to use any other scratch 1941 registers. Otherwise, we are allowed to generate them as needed. 1942 1943 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY 1944 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ 1945 1946 void 1947 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) 1948 { 1949 rtx temp1, temp2, temp3, temp4, temp5; 1950 rtx ti_temp = 0; 1951 1952 if (temp && GET_MODE (temp) == TImode) 1953 { 1954 ti_temp = temp; 1955 temp = gen_rtx_REG (DImode, REGNO (temp)); 1956 } 1957 1958 /* SPARC-V9 code-model support. */ 1959 switch (sparc_cmodel) 1960 { 1961 case CM_MEDLOW: 1962 /* The range spanned by all instructions in the object is less 1963 than 2^31 bytes (2GB) and the distance from any instruction 1964 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 1965 than 2^31 bytes (2GB). 1966 1967 The executable must be in the low 4TB of the virtual address 1968 space. 1969 1970 sethi %hi(symbol), %temp1 1971 or %temp1, %lo(symbol), %reg */ 1972 if (temp) 1973 temp1 = temp; /* op0 is allowed. */ 1974 else 1975 temp1 = gen_reg_rtx (DImode); 1976 1977 emit_insn (gen_rtx_SET (VOIDmode, temp1, gen_rtx_HIGH (DImode, op1))); 1978 emit_insn (gen_rtx_SET (VOIDmode, op0, gen_rtx_LO_SUM (DImode, temp1, op1))); 1979 break; 1980 1981 case CM_MEDMID: 1982 /* The range spanned by all instructions in the object is less 1983 than 2^31 bytes (2GB) and the distance from any instruction 1984 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 1985 than 2^31 bytes (2GB). 1986 1987 The executable must be in the low 16TB of the virtual address 1988 space. 1989 1990 sethi %h44(symbol), %temp1 1991 or %temp1, %m44(symbol), %temp2 1992 sllx %temp2, 12, %temp3 1993 or %temp3, %l44(symbol), %reg */ 1994 if (temp) 1995 { 1996 temp1 = op0; 1997 temp2 = op0; 1998 temp3 = temp; /* op0 is allowed. */ 1999 } 2000 else 2001 { 2002 temp1 = gen_reg_rtx (DImode); 2003 temp2 = gen_reg_rtx (DImode); 2004 temp3 = gen_reg_rtx (DImode); 2005 } 2006 2007 emit_insn (gen_seth44 (temp1, op1)); 2008 emit_insn (gen_setm44 (temp2, temp1, op1)); 2009 emit_insn (gen_rtx_SET (VOIDmode, temp3, 2010 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); 2011 emit_insn (gen_setl44 (op0, temp3, op1)); 2012 break; 2013 2014 case CM_MEDANY: 2015 /* The range spanned by all instructions in the object is less 2016 than 2^31 bytes (2GB) and the distance from any instruction 2017 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2018 than 2^31 bytes (2GB). 2019 2020 The executable can be placed anywhere in the virtual address 2021 space. 2022 2023 sethi %hh(symbol), %temp1 2024 sethi %lm(symbol), %temp2 2025 or %temp1, %hm(symbol), %temp3 2026 sllx %temp3, 32, %temp4 2027 or %temp4, %temp2, %temp5 2028 or %temp5, %lo(symbol), %reg */ 2029 if (temp) 2030 { 2031 /* It is possible that one of the registers we got for operands[2] 2032 might coincide with that of operands[0] (which is why we made 2033 it TImode). Pick the other one to use as our scratch. */ 2034 if (rtx_equal_p (temp, op0)) 2035 { 2036 gcc_assert (ti_temp); 2037 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2038 } 2039 temp1 = op0; 2040 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2041 temp3 = op0; 2042 temp4 = op0; 2043 temp5 = op0; 2044 } 2045 else 2046 { 2047 temp1 = gen_reg_rtx (DImode); 2048 temp2 = gen_reg_rtx (DImode); 2049 temp3 = gen_reg_rtx (DImode); 2050 temp4 = gen_reg_rtx (DImode); 2051 temp5 = gen_reg_rtx (DImode); 2052 } 2053 2054 emit_insn (gen_sethh (temp1, op1)); 2055 emit_insn (gen_setlm (temp2, op1)); 2056 emit_insn (gen_sethm (temp3, temp1, op1)); 2057 emit_insn (gen_rtx_SET (VOIDmode, temp4, 2058 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2059 emit_insn (gen_rtx_SET (VOIDmode, temp5, 2060 gen_rtx_PLUS (DImode, temp4, temp2))); 2061 emit_insn (gen_setlo (op0, temp5, op1)); 2062 break; 2063 2064 case CM_EMBMEDANY: 2065 /* Old old old backwards compatibility kruft here. 2066 Essentially it is MEDLOW with a fixed 64-bit 2067 virtual base added to all data segment addresses. 2068 Text-segment stuff is computed like MEDANY, we can't 2069 reuse the code above because the relocation knobs 2070 look different. 2071 2072 Data segment: sethi %hi(symbol), %temp1 2073 add %temp1, EMBMEDANY_BASE_REG, %temp2 2074 or %temp2, %lo(symbol), %reg */ 2075 if (data_segment_operand (op1, GET_MODE (op1))) 2076 { 2077 if (temp) 2078 { 2079 temp1 = temp; /* op0 is allowed. */ 2080 temp2 = op0; 2081 } 2082 else 2083 { 2084 temp1 = gen_reg_rtx (DImode); 2085 temp2 = gen_reg_rtx (DImode); 2086 } 2087 2088 emit_insn (gen_embmedany_sethi (temp1, op1)); 2089 emit_insn (gen_embmedany_brsum (temp2, temp1)); 2090 emit_insn (gen_embmedany_losum (op0, temp2, op1)); 2091 } 2092 2093 /* Text segment: sethi %uhi(symbol), %temp1 2094 sethi %hi(symbol), %temp2 2095 or %temp1, %ulo(symbol), %temp3 2096 sllx %temp3, 32, %temp4 2097 or %temp4, %temp2, %temp5 2098 or %temp5, %lo(symbol), %reg */ 2099 else 2100 { 2101 if (temp) 2102 { 2103 /* It is possible that one of the registers we got for operands[2] 2104 might coincide with that of operands[0] (which is why we made 2105 it TImode). Pick the other one to use as our scratch. */ 2106 if (rtx_equal_p (temp, op0)) 2107 { 2108 gcc_assert (ti_temp); 2109 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2110 } 2111 temp1 = op0; 2112 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2113 temp3 = op0; 2114 temp4 = op0; 2115 temp5 = op0; 2116 } 2117 else 2118 { 2119 temp1 = gen_reg_rtx (DImode); 2120 temp2 = gen_reg_rtx (DImode); 2121 temp3 = gen_reg_rtx (DImode); 2122 temp4 = gen_reg_rtx (DImode); 2123 temp5 = gen_reg_rtx (DImode); 2124 } 2125 2126 emit_insn (gen_embmedany_textuhi (temp1, op1)); 2127 emit_insn (gen_embmedany_texthi (temp2, op1)); 2128 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); 2129 emit_insn (gen_rtx_SET (VOIDmode, temp4, 2130 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2131 emit_insn (gen_rtx_SET (VOIDmode, temp5, 2132 gen_rtx_PLUS (DImode, temp4, temp2))); 2133 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); 2134 } 2135 break; 2136 2137 default: 2138 gcc_unreachable (); 2139 } 2140 } 2141 2142 #if HOST_BITS_PER_WIDE_INT == 32 2143 static void 2144 sparc_emit_set_const64 (rtx op0 ATTRIBUTE_UNUSED, rtx op1 ATTRIBUTE_UNUSED) 2145 { 2146 gcc_unreachable (); 2147 } 2148 #else 2149 /* These avoid problems when cross compiling. If we do not 2150 go through all this hair then the optimizer will see 2151 invalid REG_EQUAL notes or in some cases none at all. */ 2152 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); 2153 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); 2154 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); 2155 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); 2156 2157 /* The optimizer is not to assume anything about exactly 2158 which bits are set for a HIGH, they are unspecified. 2159 Unfortunately this leads to many missed optimizations 2160 during CSE. We mask out the non-HIGH bits, and matches 2161 a plain movdi, to alleviate this problem. */ 2162 static rtx 2163 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) 2164 { 2165 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); 2166 } 2167 2168 static rtx 2169 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) 2170 { 2171 return gen_rtx_SET (VOIDmode, dest, GEN_INT (val)); 2172 } 2173 2174 static rtx 2175 gen_safe_OR64 (rtx src, HOST_WIDE_INT val) 2176 { 2177 return gen_rtx_IOR (DImode, src, GEN_INT (val)); 2178 } 2179 2180 static rtx 2181 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) 2182 { 2183 return gen_rtx_XOR (DImode, src, GEN_INT (val)); 2184 } 2185 2186 /* Worker routines for 64-bit constant formation on arch64. 2187 One of the key things to be doing in these emissions is 2188 to create as many temp REGs as possible. This makes it 2189 possible for half-built constants to be used later when 2190 such values are similar to something required later on. 2191 Without doing this, the optimizer cannot see such 2192 opportunities. */ 2193 2194 static void sparc_emit_set_const64_quick1 (rtx, rtx, 2195 unsigned HOST_WIDE_INT, int); 2196 2197 static void 2198 sparc_emit_set_const64_quick1 (rtx op0, rtx temp, 2199 unsigned HOST_WIDE_INT low_bits, int is_neg) 2200 { 2201 unsigned HOST_WIDE_INT high_bits; 2202 2203 if (is_neg) 2204 high_bits = (~low_bits) & 0xffffffff; 2205 else 2206 high_bits = low_bits; 2207 2208 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2209 if (!is_neg) 2210 { 2211 emit_insn (gen_rtx_SET (VOIDmode, op0, 2212 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2213 } 2214 else 2215 { 2216 /* If we are XOR'ing with -1, then we should emit a one's complement 2217 instead. This way the combiner will notice logical operations 2218 such as ANDN later on and substitute. */ 2219 if ((low_bits & 0x3ff) == 0x3ff) 2220 { 2221 emit_insn (gen_rtx_SET (VOIDmode, op0, 2222 gen_rtx_NOT (DImode, temp))); 2223 } 2224 else 2225 { 2226 emit_insn (gen_rtx_SET (VOIDmode, op0, 2227 gen_safe_XOR64 (temp, 2228 (-(HOST_WIDE_INT)0x400 2229 | (low_bits & 0x3ff))))); 2230 } 2231 } 2232 } 2233 2234 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, 2235 unsigned HOST_WIDE_INT, int); 2236 2237 static void 2238 sparc_emit_set_const64_quick2 (rtx op0, rtx temp, 2239 unsigned HOST_WIDE_INT high_bits, 2240 unsigned HOST_WIDE_INT low_immediate, 2241 int shift_count) 2242 { 2243 rtx temp2 = op0; 2244 2245 if ((high_bits & 0xfffffc00) != 0) 2246 { 2247 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2248 if ((high_bits & ~0xfffffc00) != 0) 2249 emit_insn (gen_rtx_SET (VOIDmode, op0, 2250 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2251 else 2252 temp2 = temp; 2253 } 2254 else 2255 { 2256 emit_insn (gen_safe_SET64 (temp, high_bits)); 2257 temp2 = temp; 2258 } 2259 2260 /* Now shift it up into place. */ 2261 emit_insn (gen_rtx_SET (VOIDmode, op0, 2262 gen_rtx_ASHIFT (DImode, temp2, 2263 GEN_INT (shift_count)))); 2264 2265 /* If there is a low immediate part piece, finish up by 2266 putting that in as well. */ 2267 if (low_immediate != 0) 2268 emit_insn (gen_rtx_SET (VOIDmode, op0, 2269 gen_safe_OR64 (op0, low_immediate))); 2270 } 2271 2272 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, 2273 unsigned HOST_WIDE_INT); 2274 2275 /* Full 64-bit constant decomposition. Even though this is the 2276 'worst' case, we still optimize a few things away. */ 2277 static void 2278 sparc_emit_set_const64_longway (rtx op0, rtx temp, 2279 unsigned HOST_WIDE_INT high_bits, 2280 unsigned HOST_WIDE_INT low_bits) 2281 { 2282 rtx sub_temp = op0; 2283 2284 if (can_create_pseudo_p ()) 2285 sub_temp = gen_reg_rtx (DImode); 2286 2287 if ((high_bits & 0xfffffc00) != 0) 2288 { 2289 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2290 if ((high_bits & ~0xfffffc00) != 0) 2291 emit_insn (gen_rtx_SET (VOIDmode, 2292 sub_temp, 2293 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2294 else 2295 sub_temp = temp; 2296 } 2297 else 2298 { 2299 emit_insn (gen_safe_SET64 (temp, high_bits)); 2300 sub_temp = temp; 2301 } 2302 2303 if (can_create_pseudo_p ()) 2304 { 2305 rtx temp2 = gen_reg_rtx (DImode); 2306 rtx temp3 = gen_reg_rtx (DImode); 2307 rtx temp4 = gen_reg_rtx (DImode); 2308 2309 emit_insn (gen_rtx_SET (VOIDmode, temp4, 2310 gen_rtx_ASHIFT (DImode, sub_temp, 2311 GEN_INT (32)))); 2312 2313 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); 2314 if ((low_bits & ~0xfffffc00) != 0) 2315 { 2316 emit_insn (gen_rtx_SET (VOIDmode, temp3, 2317 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); 2318 emit_insn (gen_rtx_SET (VOIDmode, op0, 2319 gen_rtx_PLUS (DImode, temp4, temp3))); 2320 } 2321 else 2322 { 2323 emit_insn (gen_rtx_SET (VOIDmode, op0, 2324 gen_rtx_PLUS (DImode, temp4, temp2))); 2325 } 2326 } 2327 else 2328 { 2329 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); 2330 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); 2331 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); 2332 int to_shift = 12; 2333 2334 /* We are in the middle of reload, so this is really 2335 painful. However we do still make an attempt to 2336 avoid emitting truly stupid code. */ 2337 if (low1 != const0_rtx) 2338 { 2339 emit_insn (gen_rtx_SET (VOIDmode, op0, 2340 gen_rtx_ASHIFT (DImode, sub_temp, 2341 GEN_INT (to_shift)))); 2342 emit_insn (gen_rtx_SET (VOIDmode, op0, 2343 gen_rtx_IOR (DImode, op0, low1))); 2344 sub_temp = op0; 2345 to_shift = 12; 2346 } 2347 else 2348 { 2349 to_shift += 12; 2350 } 2351 if (low2 != const0_rtx) 2352 { 2353 emit_insn (gen_rtx_SET (VOIDmode, op0, 2354 gen_rtx_ASHIFT (DImode, sub_temp, 2355 GEN_INT (to_shift)))); 2356 emit_insn (gen_rtx_SET (VOIDmode, op0, 2357 gen_rtx_IOR (DImode, op0, low2))); 2358 sub_temp = op0; 2359 to_shift = 8; 2360 } 2361 else 2362 { 2363 to_shift += 8; 2364 } 2365 emit_insn (gen_rtx_SET (VOIDmode, op0, 2366 gen_rtx_ASHIFT (DImode, sub_temp, 2367 GEN_INT (to_shift)))); 2368 if (low3 != const0_rtx) 2369 emit_insn (gen_rtx_SET (VOIDmode, op0, 2370 gen_rtx_IOR (DImode, op0, low3))); 2371 /* phew... */ 2372 } 2373 } 2374 2375 /* Analyze a 64-bit constant for certain properties. */ 2376 static void analyze_64bit_constant (unsigned HOST_WIDE_INT, 2377 unsigned HOST_WIDE_INT, 2378 int *, int *, int *); 2379 2380 static void 2381 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, 2382 unsigned HOST_WIDE_INT low_bits, 2383 int *hbsp, int *lbsp, int *abbasp) 2384 { 2385 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 2386 int i; 2387 2388 lowest_bit_set = highest_bit_set = -1; 2389 i = 0; 2390 do 2391 { 2392 if ((lowest_bit_set == -1) 2393 && ((low_bits >> i) & 1)) 2394 lowest_bit_set = i; 2395 if ((highest_bit_set == -1) 2396 && ((high_bits >> (32 - i - 1)) & 1)) 2397 highest_bit_set = (64 - i - 1); 2398 } 2399 while (++i < 32 2400 && ((highest_bit_set == -1) 2401 || (lowest_bit_set == -1))); 2402 if (i == 32) 2403 { 2404 i = 0; 2405 do 2406 { 2407 if ((lowest_bit_set == -1) 2408 && ((high_bits >> i) & 1)) 2409 lowest_bit_set = i + 32; 2410 if ((highest_bit_set == -1) 2411 && ((low_bits >> (32 - i - 1)) & 1)) 2412 highest_bit_set = 32 - i - 1; 2413 } 2414 while (++i < 32 2415 && ((highest_bit_set == -1) 2416 || (lowest_bit_set == -1))); 2417 } 2418 /* If there are no bits set this should have gone out 2419 as one instruction! */ 2420 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); 2421 all_bits_between_are_set = 1; 2422 for (i = lowest_bit_set; i <= highest_bit_set; i++) 2423 { 2424 if (i < 32) 2425 { 2426 if ((low_bits & (1 << i)) != 0) 2427 continue; 2428 } 2429 else 2430 { 2431 if ((high_bits & (1 << (i - 32))) != 0) 2432 continue; 2433 } 2434 all_bits_between_are_set = 0; 2435 break; 2436 } 2437 *hbsp = highest_bit_set; 2438 *lbsp = lowest_bit_set; 2439 *abbasp = all_bits_between_are_set; 2440 } 2441 2442 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); 2443 2444 static int 2445 const64_is_2insns (unsigned HOST_WIDE_INT high_bits, 2446 unsigned HOST_WIDE_INT low_bits) 2447 { 2448 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 2449 2450 if (high_bits == 0 2451 || high_bits == 0xffffffff) 2452 return 1; 2453 2454 analyze_64bit_constant (high_bits, low_bits, 2455 &highest_bit_set, &lowest_bit_set, 2456 &all_bits_between_are_set); 2457 2458 if ((highest_bit_set == 63 2459 || lowest_bit_set == 0) 2460 && all_bits_between_are_set != 0) 2461 return 1; 2462 2463 if ((highest_bit_set - lowest_bit_set) < 21) 2464 return 1; 2465 2466 return 0; 2467 } 2468 2469 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, 2470 unsigned HOST_WIDE_INT, 2471 int, int); 2472 2473 static unsigned HOST_WIDE_INT 2474 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, 2475 unsigned HOST_WIDE_INT low_bits, 2476 int lowest_bit_set, int shift) 2477 { 2478 HOST_WIDE_INT hi, lo; 2479 2480 if (lowest_bit_set < 32) 2481 { 2482 lo = (low_bits >> lowest_bit_set) << shift; 2483 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 2484 } 2485 else 2486 { 2487 lo = 0; 2488 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 2489 } 2490 gcc_assert (! (hi & lo)); 2491 return (hi | lo); 2492 } 2493 2494 /* Here we are sure to be arch64 and this is an integer constant 2495 being loaded into a register. Emit the most efficient 2496 insn sequence possible. Detection of all the 1-insn cases 2497 has been done already. */ 2498 static void 2499 sparc_emit_set_const64 (rtx op0, rtx op1) 2500 { 2501 unsigned HOST_WIDE_INT high_bits, low_bits; 2502 int lowest_bit_set, highest_bit_set; 2503 int all_bits_between_are_set; 2504 rtx temp = 0; 2505 2506 /* Sanity check that we know what we are working with. */ 2507 gcc_assert (TARGET_ARCH64 2508 && (GET_CODE (op0) == SUBREG 2509 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); 2510 2511 if (! can_create_pseudo_p ()) 2512 temp = op0; 2513 2514 if (GET_CODE (op1) != CONST_INT) 2515 { 2516 sparc_emit_set_symbolic_const64 (op0, op1, temp); 2517 return; 2518 } 2519 2520 if (! temp) 2521 temp = gen_reg_rtx (DImode); 2522 2523 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); 2524 low_bits = (INTVAL (op1) & 0xffffffff); 2525 2526 /* low_bits bits 0 --> 31 2527 high_bits bits 32 --> 63 */ 2528 2529 analyze_64bit_constant (high_bits, low_bits, 2530 &highest_bit_set, &lowest_bit_set, 2531 &all_bits_between_are_set); 2532 2533 /* First try for a 2-insn sequence. */ 2534 2535 /* These situations are preferred because the optimizer can 2536 * do more things with them: 2537 * 1) mov -1, %reg 2538 * sllx %reg, shift, %reg 2539 * 2) mov -1, %reg 2540 * srlx %reg, shift, %reg 2541 * 3) mov some_small_const, %reg 2542 * sllx %reg, shift, %reg 2543 */ 2544 if (((highest_bit_set == 63 2545 || lowest_bit_set == 0) 2546 && all_bits_between_are_set != 0) 2547 || ((highest_bit_set - lowest_bit_set) < 12)) 2548 { 2549 HOST_WIDE_INT the_const = -1; 2550 int shift = lowest_bit_set; 2551 2552 if ((highest_bit_set != 63 2553 && lowest_bit_set != 0) 2554 || all_bits_between_are_set == 0) 2555 { 2556 the_const = 2557 create_simple_focus_bits (high_bits, low_bits, 2558 lowest_bit_set, 0); 2559 } 2560 else if (lowest_bit_set == 0) 2561 shift = -(63 - highest_bit_set); 2562 2563 gcc_assert (SPARC_SIMM13_P (the_const)); 2564 gcc_assert (shift != 0); 2565 2566 emit_insn (gen_safe_SET64 (temp, the_const)); 2567 if (shift > 0) 2568 emit_insn (gen_rtx_SET (VOIDmode, 2569 op0, 2570 gen_rtx_ASHIFT (DImode, 2571 temp, 2572 GEN_INT (shift)))); 2573 else if (shift < 0) 2574 emit_insn (gen_rtx_SET (VOIDmode, 2575 op0, 2576 gen_rtx_LSHIFTRT (DImode, 2577 temp, 2578 GEN_INT (-shift)))); 2579 return; 2580 } 2581 2582 /* Now a range of 22 or less bits set somewhere. 2583 * 1) sethi %hi(focus_bits), %reg 2584 * sllx %reg, shift, %reg 2585 * 2) sethi %hi(focus_bits), %reg 2586 * srlx %reg, shift, %reg 2587 */ 2588 if ((highest_bit_set - lowest_bit_set) < 21) 2589 { 2590 unsigned HOST_WIDE_INT focus_bits = 2591 create_simple_focus_bits (high_bits, low_bits, 2592 lowest_bit_set, 10); 2593 2594 gcc_assert (SPARC_SETHI_P (focus_bits)); 2595 gcc_assert (lowest_bit_set != 10); 2596 2597 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); 2598 2599 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ 2600 if (lowest_bit_set < 10) 2601 emit_insn (gen_rtx_SET (VOIDmode, 2602 op0, 2603 gen_rtx_LSHIFTRT (DImode, temp, 2604 GEN_INT (10 - lowest_bit_set)))); 2605 else if (lowest_bit_set > 10) 2606 emit_insn (gen_rtx_SET (VOIDmode, 2607 op0, 2608 gen_rtx_ASHIFT (DImode, temp, 2609 GEN_INT (lowest_bit_set - 10)))); 2610 return; 2611 } 2612 2613 /* 1) sethi %hi(low_bits), %reg 2614 * or %reg, %lo(low_bits), %reg 2615 * 2) sethi %hi(~low_bits), %reg 2616 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg 2617 */ 2618 if (high_bits == 0 2619 || high_bits == 0xffffffff) 2620 { 2621 sparc_emit_set_const64_quick1 (op0, temp, low_bits, 2622 (high_bits == 0xffffffff)); 2623 return; 2624 } 2625 2626 /* Now, try 3-insn sequences. */ 2627 2628 /* 1) sethi %hi(high_bits), %reg 2629 * or %reg, %lo(high_bits), %reg 2630 * sllx %reg, 32, %reg 2631 */ 2632 if (low_bits == 0) 2633 { 2634 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); 2635 return; 2636 } 2637 2638 /* We may be able to do something quick 2639 when the constant is negated, so try that. */ 2640 if (const64_is_2insns ((~high_bits) & 0xffffffff, 2641 (~low_bits) & 0xfffffc00)) 2642 { 2643 /* NOTE: The trailing bits get XOR'd so we need the 2644 non-negated bits, not the negated ones. */ 2645 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; 2646 2647 if ((((~high_bits) & 0xffffffff) == 0 2648 && ((~low_bits) & 0x80000000) == 0) 2649 || (((~high_bits) & 0xffffffff) == 0xffffffff 2650 && ((~low_bits) & 0x80000000) != 0)) 2651 { 2652 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); 2653 2654 if ((SPARC_SETHI_P (fast_int) 2655 && (~high_bits & 0xffffffff) == 0) 2656 || SPARC_SIMM13_P (fast_int)) 2657 emit_insn (gen_safe_SET64 (temp, fast_int)); 2658 else 2659 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); 2660 } 2661 else 2662 { 2663 rtx negated_const; 2664 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | 2665 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); 2666 sparc_emit_set_const64 (temp, negated_const); 2667 } 2668 2669 /* If we are XOR'ing with -1, then we should emit a one's complement 2670 instead. This way the combiner will notice logical operations 2671 such as ANDN later on and substitute. */ 2672 if (trailing_bits == 0x3ff) 2673 { 2674 emit_insn (gen_rtx_SET (VOIDmode, op0, 2675 gen_rtx_NOT (DImode, temp))); 2676 } 2677 else 2678 { 2679 emit_insn (gen_rtx_SET (VOIDmode, 2680 op0, 2681 gen_safe_XOR64 (temp, 2682 (-0x400 | trailing_bits)))); 2683 } 2684 return; 2685 } 2686 2687 /* 1) sethi %hi(xxx), %reg 2688 * or %reg, %lo(xxx), %reg 2689 * sllx %reg, yyy, %reg 2690 * 2691 * ??? This is just a generalized version of the low_bits==0 2692 * thing above, FIXME... 2693 */ 2694 if ((highest_bit_set - lowest_bit_set) < 32) 2695 { 2696 unsigned HOST_WIDE_INT focus_bits = 2697 create_simple_focus_bits (high_bits, low_bits, 2698 lowest_bit_set, 0); 2699 2700 /* We can't get here in this state. */ 2701 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); 2702 2703 /* So what we know is that the set bits straddle the 2704 middle of the 64-bit word. */ 2705 sparc_emit_set_const64_quick2 (op0, temp, 2706 focus_bits, 0, 2707 lowest_bit_set); 2708 return; 2709 } 2710 2711 /* 1) sethi %hi(high_bits), %reg 2712 * or %reg, %lo(high_bits), %reg 2713 * sllx %reg, 32, %reg 2714 * or %reg, low_bits, %reg 2715 */ 2716 if (SPARC_SIMM13_P(low_bits) 2717 && ((int)low_bits > 0)) 2718 { 2719 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); 2720 return; 2721 } 2722 2723 /* The easiest way when all else fails, is full decomposition. */ 2724 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); 2725 } 2726 #endif /* HOST_BITS_PER_WIDE_INT == 32 */ 2727 2728 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 2729 return the mode to be used for the comparison. For floating-point, 2730 CCFP[E]mode is used. CC_NOOVmode should be used when the first operand 2731 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special 2732 processing is needed. */ 2733 2734 machine_mode 2735 select_cc_mode (enum rtx_code op, rtx x, rtx y ATTRIBUTE_UNUSED) 2736 { 2737 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2738 { 2739 switch (op) 2740 { 2741 case EQ: 2742 case NE: 2743 case UNORDERED: 2744 case ORDERED: 2745 case UNLT: 2746 case UNLE: 2747 case UNGT: 2748 case UNGE: 2749 case UNEQ: 2750 case LTGT: 2751 return CCFPmode; 2752 2753 case LT: 2754 case LE: 2755 case GT: 2756 case GE: 2757 return CCFPEmode; 2758 2759 default: 2760 gcc_unreachable (); 2761 } 2762 } 2763 else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 2764 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) 2765 { 2766 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 2767 return CCX_NOOVmode; 2768 else 2769 return CC_NOOVmode; 2770 } 2771 else 2772 { 2773 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 2774 return CCXmode; 2775 else 2776 return CCmode; 2777 } 2778 } 2779 2780 /* Emit the compare insn and return the CC reg for a CODE comparison 2781 with operands X and Y. */ 2782 2783 static rtx 2784 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) 2785 { 2786 machine_mode mode; 2787 rtx cc_reg; 2788 2789 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) 2790 return x; 2791 2792 mode = SELECT_CC_MODE (code, x, y); 2793 2794 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the 2795 fcc regs (cse can't tell they're really call clobbered regs and will 2796 remove a duplicate comparison even if there is an intervening function 2797 call - it will then try to reload the cc reg via an int reg which is why 2798 we need the movcc patterns). It is possible to provide the movcc 2799 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two 2800 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be 2801 to tell cse that CCFPE mode registers (even pseudos) are call 2802 clobbered. */ 2803 2804 /* ??? This is an experiment. Rather than making changes to cse which may 2805 or may not be easy/clean, we do our own cse. This is possible because 2806 we will generate hard registers. Cse knows they're call clobbered (it 2807 doesn't know the same thing about pseudos). If we guess wrong, no big 2808 deal, but if we win, great! */ 2809 2810 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2811 #if 1 /* experiment */ 2812 { 2813 int reg; 2814 /* We cycle through the registers to ensure they're all exercised. */ 2815 static int next_fcc_reg = 0; 2816 /* Previous x,y for each fcc reg. */ 2817 static rtx prev_args[4][2]; 2818 2819 /* Scan prev_args for x,y. */ 2820 for (reg = 0; reg < 4; reg++) 2821 if (prev_args[reg][0] == x && prev_args[reg][1] == y) 2822 break; 2823 if (reg == 4) 2824 { 2825 reg = next_fcc_reg; 2826 prev_args[reg][0] = x; 2827 prev_args[reg][1] = y; 2828 next_fcc_reg = (next_fcc_reg + 1) & 3; 2829 } 2830 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); 2831 } 2832 #else 2833 cc_reg = gen_reg_rtx (mode); 2834 #endif /* ! experiment */ 2835 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 2836 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); 2837 else 2838 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); 2839 2840 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this 2841 will only result in an unrecognizable insn so no point in asserting. */ 2842 emit_insn (gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y))); 2843 2844 return cc_reg; 2845 } 2846 2847 2848 /* Emit the compare insn and return the CC reg for the comparison in CMP. */ 2849 2850 rtx 2851 gen_compare_reg (rtx cmp) 2852 { 2853 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); 2854 } 2855 2856 /* This function is used for v9 only. 2857 DEST is the target of the Scc insn. 2858 CODE is the code for an Scc's comparison. 2859 X and Y are the values we compare. 2860 2861 This function is needed to turn 2862 2863 (set (reg:SI 110) 2864 (gt (reg:CCX 100 %icc) 2865 (const_int 0))) 2866 into 2867 (set (reg:SI 110) 2868 (gt:DI (reg:CCX 100 %icc) 2869 (const_int 0))) 2870 2871 IE: The instruction recognizer needs to see the mode of the comparison to 2872 find the right instruction. We could use "gt:DI" right in the 2873 define_expand, but leaving it out allows us to handle DI, SI, etc. */ 2874 2875 static int 2876 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) 2877 { 2878 if (! TARGET_ARCH64 2879 && (GET_MODE (x) == DImode 2880 || GET_MODE (dest) == DImode)) 2881 return 0; 2882 2883 /* Try to use the movrCC insns. */ 2884 if (TARGET_ARCH64 2885 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 2886 && y == const0_rtx 2887 && v9_regcmp_p (compare_code)) 2888 { 2889 rtx op0 = x; 2890 rtx temp; 2891 2892 /* Special case for op0 != 0. This can be done with one instruction if 2893 dest == x. */ 2894 2895 if (compare_code == NE 2896 && GET_MODE (dest) == DImode 2897 && rtx_equal_p (op0, dest)) 2898 { 2899 emit_insn (gen_rtx_SET (VOIDmode, dest, 2900 gen_rtx_IF_THEN_ELSE (DImode, 2901 gen_rtx_fmt_ee (compare_code, DImode, 2902 op0, const0_rtx), 2903 const1_rtx, 2904 dest))); 2905 return 1; 2906 } 2907 2908 if (reg_overlap_mentioned_p (dest, op0)) 2909 { 2910 /* Handle the case where dest == x. 2911 We "early clobber" the result. */ 2912 op0 = gen_reg_rtx (GET_MODE (x)); 2913 emit_move_insn (op0, x); 2914 } 2915 2916 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); 2917 if (GET_MODE (op0) != DImode) 2918 { 2919 temp = gen_reg_rtx (DImode); 2920 convert_move (temp, op0, 0); 2921 } 2922 else 2923 temp = op0; 2924 emit_insn (gen_rtx_SET (VOIDmode, dest, 2925 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 2926 gen_rtx_fmt_ee (compare_code, DImode, 2927 temp, const0_rtx), 2928 const1_rtx, 2929 dest))); 2930 return 1; 2931 } 2932 else 2933 { 2934 x = gen_compare_reg_1 (compare_code, x, y); 2935 y = const0_rtx; 2936 2937 gcc_assert (GET_MODE (x) != CC_NOOVmode 2938 && GET_MODE (x) != CCX_NOOVmode); 2939 2940 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); 2941 emit_insn (gen_rtx_SET (VOIDmode, dest, 2942 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 2943 gen_rtx_fmt_ee (compare_code, 2944 GET_MODE (x), x, y), 2945 const1_rtx, dest))); 2946 return 1; 2947 } 2948 } 2949 2950 2951 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this 2952 without jumps using the addx/subx instructions. */ 2953 2954 bool 2955 emit_scc_insn (rtx operands[]) 2956 { 2957 rtx tem; 2958 rtx x; 2959 rtx y; 2960 enum rtx_code code; 2961 2962 /* The quad-word fp compare library routines all return nonzero to indicate 2963 true, which is different from the equivalent libgcc routines, so we must 2964 handle them specially here. */ 2965 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) 2966 { 2967 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], 2968 GET_CODE (operands[1])); 2969 operands[2] = XEXP (operands[1], 0); 2970 operands[3] = XEXP (operands[1], 1); 2971 } 2972 2973 code = GET_CODE (operands[1]); 2974 x = operands[2]; 2975 y = operands[3]; 2976 2977 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has 2978 more applications). The exception to this is "reg != 0" which can 2979 be done in one instruction on v9 (so we do it). */ 2980 if (code == EQ) 2981 { 2982 if (GET_MODE (x) == SImode) 2983 { 2984 rtx pat; 2985 if (TARGET_ARCH64) 2986 pat = gen_seqsidi_special (operands[0], x, y); 2987 else 2988 pat = gen_seqsisi_special (operands[0], x, y); 2989 emit_insn (pat); 2990 return true; 2991 } 2992 else if (GET_MODE (x) == DImode) 2993 { 2994 rtx pat = gen_seqdi_special (operands[0], x, y); 2995 emit_insn (pat); 2996 return true; 2997 } 2998 } 2999 3000 if (code == NE) 3001 { 3002 if (GET_MODE (x) == SImode) 3003 { 3004 rtx pat; 3005 if (TARGET_ARCH64) 3006 pat = gen_snesidi_special (operands[0], x, y); 3007 else 3008 pat = gen_snesisi_special (operands[0], x, y); 3009 emit_insn (pat); 3010 return true; 3011 } 3012 else if (GET_MODE (x) == DImode) 3013 { 3014 rtx pat; 3015 if (TARGET_VIS3) 3016 pat = gen_snedi_special_vis3 (operands[0], x, y); 3017 else 3018 pat = gen_snedi_special (operands[0], x, y); 3019 emit_insn (pat); 3020 return true; 3021 } 3022 } 3023 3024 if (TARGET_V9 3025 && TARGET_ARCH64 3026 && GET_MODE (x) == DImode 3027 && !(TARGET_VIS3 3028 && (code == GTU || code == LTU)) 3029 && gen_v9_scc (operands[0], code, x, y)) 3030 return true; 3031 3032 /* We can do LTU and GEU using the addx/subx instructions too. And 3033 for GTU/LEU, if both operands are registers swap them and fall 3034 back to the easy case. */ 3035 if (code == GTU || code == LEU) 3036 { 3037 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 3038 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) 3039 { 3040 tem = x; 3041 x = y; 3042 y = tem; 3043 code = swap_condition (code); 3044 } 3045 } 3046 3047 if (code == LTU 3048 || (!TARGET_VIS3 && code == GEU)) 3049 { 3050 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 3051 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3052 gen_compare_reg_1 (code, x, y), 3053 const0_rtx))); 3054 return true; 3055 } 3056 3057 /* All the posibilities to use addx/subx based sequences has been 3058 exhausted, try for a 3 instruction sequence using v9 conditional 3059 moves. */ 3060 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y)) 3061 return true; 3062 3063 /* Nope, do branches. */ 3064 return false; 3065 } 3066 3067 /* Emit a conditional jump insn for the v9 architecture using comparison code 3068 CODE and jump target LABEL. 3069 This function exists to take advantage of the v9 brxx insns. */ 3070 3071 static void 3072 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) 3073 { 3074 emit_jump_insn (gen_rtx_SET (VOIDmode, 3075 pc_rtx, 3076 gen_rtx_IF_THEN_ELSE (VOIDmode, 3077 gen_rtx_fmt_ee (code, GET_MODE (op0), 3078 op0, const0_rtx), 3079 gen_rtx_LABEL_REF (VOIDmode, label), 3080 pc_rtx))); 3081 } 3082 3083 /* Emit a conditional jump insn for the UA2011 architecture using 3084 comparison code CODE and jump target LABEL. This function exists 3085 to take advantage of the UA2011 Compare and Branch insns. */ 3086 3087 static void 3088 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label) 3089 { 3090 rtx if_then_else; 3091 3092 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode, 3093 gen_rtx_fmt_ee(code, GET_MODE(op0), 3094 op0, op1), 3095 gen_rtx_LABEL_REF (VOIDmode, label), 3096 pc_rtx); 3097 3098 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, if_then_else)); 3099 } 3100 3101 void 3102 emit_conditional_branch_insn (rtx operands[]) 3103 { 3104 /* The quad-word fp compare library routines all return nonzero to indicate 3105 true, which is different from the equivalent libgcc routines, so we must 3106 handle them specially here. */ 3107 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) 3108 { 3109 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], 3110 GET_CODE (operands[0])); 3111 operands[1] = XEXP (operands[0], 0); 3112 operands[2] = XEXP (operands[0], 1); 3113 } 3114 3115 /* If we can tell early on that the comparison is against a constant 3116 that won't fit in the 5-bit signed immediate field of a cbcond, 3117 use one of the other v9 conditional branch sequences. */ 3118 if (TARGET_CBCOND 3119 && GET_CODE (operands[1]) == REG 3120 && (GET_MODE (operands[1]) == SImode 3121 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode)) 3122 && (GET_CODE (operands[2]) != CONST_INT 3123 || SPARC_SIMM5_P (INTVAL (operands[2])))) 3124 { 3125 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); 3126 return; 3127 } 3128 3129 if (TARGET_ARCH64 && operands[2] == const0_rtx 3130 && GET_CODE (operands[1]) == REG 3131 && GET_MODE (operands[1]) == DImode) 3132 { 3133 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); 3134 return; 3135 } 3136 3137 operands[1] = gen_compare_reg (operands[0]); 3138 operands[2] = const0_rtx; 3139 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, 3140 operands[1], operands[2]); 3141 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], 3142 operands[3])); 3143 } 3144 3145 3146 /* Generate a DFmode part of a hard TFmode register. 3147 REG is the TFmode hard register, LOW is 1 for the 3148 low 64bit of the register and 0 otherwise. 3149 */ 3150 rtx 3151 gen_df_reg (rtx reg, int low) 3152 { 3153 int regno = REGNO (reg); 3154 3155 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) 3156 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2; 3157 return gen_rtx_REG (DFmode, regno); 3158 } 3159 3160 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. 3161 Unlike normal calls, TFmode operands are passed by reference. It is 3162 assumed that no more than 3 operands are required. */ 3163 3164 static void 3165 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) 3166 { 3167 rtx ret_slot = NULL, arg[3], func_sym; 3168 int i; 3169 3170 /* We only expect to be called for conversions, unary, and binary ops. */ 3171 gcc_assert (nargs == 2 || nargs == 3); 3172 3173 for (i = 0; i < nargs; ++i) 3174 { 3175 rtx this_arg = operands[i]; 3176 rtx this_slot; 3177 3178 /* TFmode arguments and return values are passed by reference. */ 3179 if (GET_MODE (this_arg) == TFmode) 3180 { 3181 int force_stack_temp; 3182 3183 force_stack_temp = 0; 3184 if (TARGET_BUGGY_QP_LIB && i == 0) 3185 force_stack_temp = 1; 3186 3187 if (GET_CODE (this_arg) == MEM 3188 && ! force_stack_temp) 3189 { 3190 tree expr = MEM_EXPR (this_arg); 3191 if (expr) 3192 mark_addressable (expr); 3193 this_arg = XEXP (this_arg, 0); 3194 } 3195 else if (CONSTANT_P (this_arg) 3196 && ! force_stack_temp) 3197 { 3198 this_slot = force_const_mem (TFmode, this_arg); 3199 this_arg = XEXP (this_slot, 0); 3200 } 3201 else 3202 { 3203 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode)); 3204 3205 /* Operand 0 is the return value. We'll copy it out later. */ 3206 if (i > 0) 3207 emit_move_insn (this_slot, this_arg); 3208 else 3209 ret_slot = this_slot; 3210 3211 this_arg = XEXP (this_slot, 0); 3212 } 3213 } 3214 3215 arg[i] = this_arg; 3216 } 3217 3218 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); 3219 3220 if (GET_MODE (operands[0]) == TFmode) 3221 { 3222 if (nargs == 2) 3223 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2, 3224 arg[0], GET_MODE (arg[0]), 3225 arg[1], GET_MODE (arg[1])); 3226 else 3227 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3, 3228 arg[0], GET_MODE (arg[0]), 3229 arg[1], GET_MODE (arg[1]), 3230 arg[2], GET_MODE (arg[2])); 3231 3232 if (ret_slot) 3233 emit_move_insn (operands[0], ret_slot); 3234 } 3235 else 3236 { 3237 rtx ret; 3238 3239 gcc_assert (nargs == 2); 3240 3241 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, 3242 GET_MODE (operands[0]), 1, 3243 arg[1], GET_MODE (arg[1])); 3244 3245 if (ret != operands[0]) 3246 emit_move_insn (operands[0], ret); 3247 } 3248 } 3249 3250 /* Expand soft-float TFmode calls to sparc abi routines. */ 3251 3252 static void 3253 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) 3254 { 3255 const char *func; 3256 3257 switch (code) 3258 { 3259 case PLUS: 3260 func = "_Qp_add"; 3261 break; 3262 case MINUS: 3263 func = "_Qp_sub"; 3264 break; 3265 case MULT: 3266 func = "_Qp_mul"; 3267 break; 3268 case DIV: 3269 func = "_Qp_div"; 3270 break; 3271 default: 3272 gcc_unreachable (); 3273 } 3274 3275 emit_soft_tfmode_libcall (func, 3, operands); 3276 } 3277 3278 static void 3279 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) 3280 { 3281 const char *func; 3282 3283 gcc_assert (code == SQRT); 3284 func = "_Qp_sqrt"; 3285 3286 emit_soft_tfmode_libcall (func, 2, operands); 3287 } 3288 3289 static void 3290 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) 3291 { 3292 const char *func; 3293 3294 switch (code) 3295 { 3296 case FLOAT_EXTEND: 3297 switch (GET_MODE (operands[1])) 3298 { 3299 case SFmode: 3300 func = "_Qp_stoq"; 3301 break; 3302 case DFmode: 3303 func = "_Qp_dtoq"; 3304 break; 3305 default: 3306 gcc_unreachable (); 3307 } 3308 break; 3309 3310 case FLOAT_TRUNCATE: 3311 switch (GET_MODE (operands[0])) 3312 { 3313 case SFmode: 3314 func = "_Qp_qtos"; 3315 break; 3316 case DFmode: 3317 func = "_Qp_qtod"; 3318 break; 3319 default: 3320 gcc_unreachable (); 3321 } 3322 break; 3323 3324 case FLOAT: 3325 switch (GET_MODE (operands[1])) 3326 { 3327 case SImode: 3328 func = "_Qp_itoq"; 3329 if (TARGET_ARCH64) 3330 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); 3331 break; 3332 case DImode: 3333 func = "_Qp_xtoq"; 3334 break; 3335 default: 3336 gcc_unreachable (); 3337 } 3338 break; 3339 3340 case UNSIGNED_FLOAT: 3341 switch (GET_MODE (operands[1])) 3342 { 3343 case SImode: 3344 func = "_Qp_uitoq"; 3345 if (TARGET_ARCH64) 3346 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); 3347 break; 3348 case DImode: 3349 func = "_Qp_uxtoq"; 3350 break; 3351 default: 3352 gcc_unreachable (); 3353 } 3354 break; 3355 3356 case FIX: 3357 switch (GET_MODE (operands[0])) 3358 { 3359 case SImode: 3360 func = "_Qp_qtoi"; 3361 break; 3362 case DImode: 3363 func = "_Qp_qtox"; 3364 break; 3365 default: 3366 gcc_unreachable (); 3367 } 3368 break; 3369 3370 case UNSIGNED_FIX: 3371 switch (GET_MODE (operands[0])) 3372 { 3373 case SImode: 3374 func = "_Qp_qtoui"; 3375 break; 3376 case DImode: 3377 func = "_Qp_qtoux"; 3378 break; 3379 default: 3380 gcc_unreachable (); 3381 } 3382 break; 3383 3384 default: 3385 gcc_unreachable (); 3386 } 3387 3388 emit_soft_tfmode_libcall (func, 2, operands); 3389 } 3390 3391 /* Expand a hard-float tfmode operation. All arguments must be in 3392 registers. */ 3393 3394 static void 3395 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) 3396 { 3397 rtx op, dest; 3398 3399 if (GET_RTX_CLASS (code) == RTX_UNARY) 3400 { 3401 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3402 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); 3403 } 3404 else 3405 { 3406 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3407 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); 3408 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3409 operands[1], operands[2]); 3410 } 3411 3412 if (register_operand (operands[0], VOIDmode)) 3413 dest = operands[0]; 3414 else 3415 dest = gen_reg_rtx (GET_MODE (operands[0])); 3416 3417 emit_insn (gen_rtx_SET (VOIDmode, dest, op)); 3418 3419 if (dest != operands[0]) 3420 emit_move_insn (operands[0], dest); 3421 } 3422 3423 void 3424 emit_tfmode_binop (enum rtx_code code, rtx *operands) 3425 { 3426 if (TARGET_HARD_QUAD) 3427 emit_hard_tfmode_operation (code, operands); 3428 else 3429 emit_soft_tfmode_binop (code, operands); 3430 } 3431 3432 void 3433 emit_tfmode_unop (enum rtx_code code, rtx *operands) 3434 { 3435 if (TARGET_HARD_QUAD) 3436 emit_hard_tfmode_operation (code, operands); 3437 else 3438 emit_soft_tfmode_unop (code, operands); 3439 } 3440 3441 void 3442 emit_tfmode_cvt (enum rtx_code code, rtx *operands) 3443 { 3444 if (TARGET_HARD_QUAD) 3445 emit_hard_tfmode_operation (code, operands); 3446 else 3447 emit_soft_tfmode_cvt (code, operands); 3448 } 3449 3450 /* Return nonzero if a branch/jump/call instruction will be emitting 3451 nop into its delay slot. */ 3452 3453 int 3454 empty_delay_slot (rtx_insn *insn) 3455 { 3456 rtx seq; 3457 3458 /* If no previous instruction (should not happen), return true. */ 3459 if (PREV_INSN (insn) == NULL) 3460 return 1; 3461 3462 seq = NEXT_INSN (PREV_INSN (insn)); 3463 if (GET_CODE (PATTERN (seq)) == SEQUENCE) 3464 return 0; 3465 3466 return 1; 3467 } 3468 3469 /* Return nonzero if we should emit a nop after a cbcond instruction. 3470 The cbcond instruction does not have a delay slot, however there is 3471 a severe performance penalty if a control transfer appears right 3472 after a cbcond. Therefore we emit a nop when we detect this 3473 situation. */ 3474 3475 int 3476 emit_cbcond_nop (rtx insn) 3477 { 3478 rtx next = next_active_insn (insn); 3479 3480 if (!next) 3481 return 1; 3482 3483 if (NONJUMP_INSN_P (next) 3484 && GET_CODE (PATTERN (next)) == SEQUENCE) 3485 next = XVECEXP (PATTERN (next), 0, 0); 3486 else if (CALL_P (next) 3487 && GET_CODE (PATTERN (next)) == PARALLEL) 3488 { 3489 rtx delay = XVECEXP (PATTERN (next), 0, 1); 3490 3491 if (GET_CODE (delay) == RETURN) 3492 { 3493 /* It's a sibling call. Do not emit the nop if we're going 3494 to emit something other than the jump itself as the first 3495 instruction of the sibcall sequence. */ 3496 if (sparc_leaf_function_p || TARGET_FLAT) 3497 return 0; 3498 } 3499 } 3500 3501 if (NONJUMP_INSN_P (next)) 3502 return 0; 3503 3504 return 1; 3505 } 3506 3507 /* Return nonzero if TRIAL can go into the call delay slot. */ 3508 3509 int 3510 eligible_for_call_delay (rtx_insn *trial) 3511 { 3512 rtx pat; 3513 3514 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3515 return 0; 3516 3517 /* Binutils allows 3518 call __tls_get_addr, %tgd_call (foo) 3519 add %l7, %o0, %o0, %tgd_add (foo) 3520 while Sun as/ld does not. */ 3521 if (TARGET_GNU_TLS || !TARGET_TLS) 3522 return 1; 3523 3524 pat = PATTERN (trial); 3525 3526 /* We must reject tgd_add{32|64}, i.e. 3527 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) 3528 and tldm_add{32|64}, i.e. 3529 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) 3530 for Sun as/ld. */ 3531 if (GET_CODE (pat) == SET 3532 && GET_CODE (SET_SRC (pat)) == PLUS) 3533 { 3534 rtx unspec = XEXP (SET_SRC (pat), 1); 3535 3536 if (GET_CODE (unspec) == UNSPEC 3537 && (XINT (unspec, 1) == UNSPEC_TLSGD 3538 || XINT (unspec, 1) == UNSPEC_TLSLDM)) 3539 return 0; 3540 } 3541 3542 return 1; 3543 } 3544 3545 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore' 3546 instruction. RETURN_P is true if the v9 variant 'return' is to be 3547 considered in the test too. 3548 3549 TRIAL must be a SET whose destination is a REG appropriate for the 3550 'restore' instruction or, if RETURN_P is true, for the 'return' 3551 instruction. */ 3552 3553 static int 3554 eligible_for_restore_insn (rtx trial, bool return_p) 3555 { 3556 rtx pat = PATTERN (trial); 3557 rtx src = SET_SRC (pat); 3558 bool src_is_freg = false; 3559 rtx src_reg; 3560 3561 /* Since we now can do moves between float and integer registers when 3562 VIS3 is enabled, we have to catch this case. We can allow such 3563 moves when doing a 'return' however. */ 3564 src_reg = src; 3565 if (GET_CODE (src_reg) == SUBREG) 3566 src_reg = SUBREG_REG (src_reg); 3567 if (GET_CODE (src_reg) == REG 3568 && SPARC_FP_REG_P (REGNO (src_reg))) 3569 src_is_freg = true; 3570 3571 /* The 'restore src,%g0,dest' pattern for word mode and below. */ 3572 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 3573 && arith_operand (src, GET_MODE (src)) 3574 && ! src_is_freg) 3575 { 3576 if (TARGET_ARCH64) 3577 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 3578 else 3579 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); 3580 } 3581 3582 /* The 'restore src,%g0,dest' pattern for double-word mode. */ 3583 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 3584 && arith_double_operand (src, GET_MODE (src)) 3585 && ! src_is_freg) 3586 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 3587 3588 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ 3589 else if (! TARGET_FPU && register_operand (src, SFmode)) 3590 return 1; 3591 3592 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ 3593 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) 3594 return 1; 3595 3596 /* If we have the 'return' instruction, anything that does not use 3597 local or output registers and can go into a delay slot wins. */ 3598 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1)) 3599 return 1; 3600 3601 /* The 'restore src1,src2,dest' pattern for SImode. */ 3602 else if (GET_CODE (src) == PLUS 3603 && register_operand (XEXP (src, 0), SImode) 3604 && arith_operand (XEXP (src, 1), SImode)) 3605 return 1; 3606 3607 /* The 'restore src1,src2,dest' pattern for DImode. */ 3608 else if (GET_CODE (src) == PLUS 3609 && register_operand (XEXP (src, 0), DImode) 3610 && arith_double_operand (XEXP (src, 1), DImode)) 3611 return 1; 3612 3613 /* The 'restore src1,%lo(src2),dest' pattern. */ 3614 else if (GET_CODE (src) == LO_SUM 3615 && ! TARGET_CM_MEDMID 3616 && ((register_operand (XEXP (src, 0), SImode) 3617 && immediate_operand (XEXP (src, 1), SImode)) 3618 || (TARGET_ARCH64 3619 && register_operand (XEXP (src, 0), DImode) 3620 && immediate_operand (XEXP (src, 1), DImode)))) 3621 return 1; 3622 3623 /* The 'restore src,src,dest' pattern. */ 3624 else if (GET_CODE (src) == ASHIFT 3625 && (register_operand (XEXP (src, 0), SImode) 3626 || register_operand (XEXP (src, 0), DImode)) 3627 && XEXP (src, 1) == const1_rtx) 3628 return 1; 3629 3630 return 0; 3631 } 3632 3633 /* Return nonzero if TRIAL can go into the function return's delay slot. */ 3634 3635 int 3636 eligible_for_return_delay (rtx_insn *trial) 3637 { 3638 int regno; 3639 rtx pat; 3640 3641 /* If the function uses __builtin_eh_return, the eh_return machinery 3642 occupies the delay slot. */ 3643 if (crtl->calls_eh_return) 3644 return 0; 3645 3646 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3647 return 0; 3648 3649 /* In the case of a leaf or flat function, anything can go into the slot. */ 3650 if (sparc_leaf_function_p || TARGET_FLAT) 3651 return 1; 3652 3653 if (!NONJUMP_INSN_P (trial)) 3654 return 0; 3655 3656 pat = PATTERN (trial); 3657 if (GET_CODE (pat) == PARALLEL) 3658 { 3659 int i; 3660 3661 if (! TARGET_V9) 3662 return 0; 3663 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 3664 { 3665 rtx expr = XVECEXP (pat, 0, i); 3666 if (GET_CODE (expr) != SET) 3667 return 0; 3668 if (GET_CODE (SET_DEST (expr)) != REG) 3669 return 0; 3670 regno = REGNO (SET_DEST (expr)); 3671 if (regno >= 8 && regno < 24) 3672 return 0; 3673 } 3674 return !epilogue_renumber (&pat, 1); 3675 } 3676 3677 if (GET_CODE (pat) != SET) 3678 return 0; 3679 3680 if (GET_CODE (SET_DEST (pat)) != REG) 3681 return 0; 3682 3683 regno = REGNO (SET_DEST (pat)); 3684 3685 /* Otherwise, only operations which can be done in tandem with 3686 a `restore' or `return' insn can go into the delay slot. */ 3687 if (regno >= 8 && regno < 24) 3688 return 0; 3689 3690 /* If this instruction sets up floating point register and we have a return 3691 instruction, it can probably go in. But restore will not work 3692 with FP_REGS. */ 3693 if (! SPARC_INT_REG_P (regno)) 3694 return TARGET_V9 && !epilogue_renumber (&pat, 1); 3695 3696 return eligible_for_restore_insn (trial, true); 3697 } 3698 3699 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */ 3700 3701 int 3702 eligible_for_sibcall_delay (rtx_insn *trial) 3703 { 3704 rtx pat; 3705 3706 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3707 return 0; 3708 3709 if (!NONJUMP_INSN_P (trial)) 3710 return 0; 3711 3712 pat = PATTERN (trial); 3713 3714 if (sparc_leaf_function_p || TARGET_FLAT) 3715 { 3716 /* If the tail call is done using the call instruction, 3717 we have to restore %o7 in the delay slot. */ 3718 if (LEAF_SIBCALL_SLOT_RESERVED_P) 3719 return 0; 3720 3721 /* %g1 is used to build the function address */ 3722 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) 3723 return 0; 3724 3725 return 1; 3726 } 3727 3728 if (GET_CODE (pat) != SET) 3729 return 0; 3730 3731 /* Otherwise, only operations which can be done in tandem with 3732 a `restore' insn can go into the delay slot. */ 3733 if (GET_CODE (SET_DEST (pat)) != REG 3734 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) 3735 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat)))) 3736 return 0; 3737 3738 /* If it mentions %o7, it can't go in, because sibcall will clobber it 3739 in most cases. */ 3740 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) 3741 return 0; 3742 3743 return eligible_for_restore_insn (trial, false); 3744 } 3745 3746 /* Determine if it's legal to put X into the constant pool. This 3747 is not possible if X contains the address of a symbol that is 3748 not constant (TLS) or not known at final link time (PIC). */ 3749 3750 static bool 3751 sparc_cannot_force_const_mem (machine_mode mode, rtx x) 3752 { 3753 switch (GET_CODE (x)) 3754 { 3755 case CONST_INT: 3756 case CONST_DOUBLE: 3757 case CONST_VECTOR: 3758 /* Accept all non-symbolic constants. */ 3759 return false; 3760 3761 case LABEL_REF: 3762 /* Labels are OK iff we are non-PIC. */ 3763 return flag_pic != 0; 3764 3765 case SYMBOL_REF: 3766 /* 'Naked' TLS symbol references are never OK, 3767 non-TLS symbols are OK iff we are non-PIC. */ 3768 if (SYMBOL_REF_TLS_MODEL (x)) 3769 return true; 3770 else 3771 return flag_pic != 0; 3772 3773 case CONST: 3774 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)); 3775 case PLUS: 3776 case MINUS: 3777 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)) 3778 || sparc_cannot_force_const_mem (mode, XEXP (x, 1)); 3779 case UNSPEC: 3780 return true; 3781 default: 3782 gcc_unreachable (); 3783 } 3784 } 3785 3786 /* Global Offset Table support. */ 3787 static GTY(()) rtx got_helper_rtx = NULL_RTX; 3788 static GTY(()) rtx global_offset_table_rtx = NULL_RTX; 3789 3790 /* Return the SYMBOL_REF for the Global Offset Table. */ 3791 3792 static GTY(()) rtx sparc_got_symbol = NULL_RTX; 3793 3794 static rtx 3795 sparc_got (void) 3796 { 3797 if (!sparc_got_symbol) 3798 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 3799 3800 return sparc_got_symbol; 3801 } 3802 3803 /* Ensure that we are not using patterns that are not OK with PIC. */ 3804 3805 int 3806 check_pic (int i) 3807 { 3808 rtx op; 3809 3810 switch (flag_pic) 3811 { 3812 case 1: 3813 op = recog_data.operand[i]; 3814 gcc_assert (GET_CODE (op) != SYMBOL_REF 3815 && (GET_CODE (op) != CONST 3816 || (GET_CODE (XEXP (op, 0)) == MINUS 3817 && XEXP (XEXP (op, 0), 0) == sparc_got () 3818 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); 3819 case 2: 3820 default: 3821 return 1; 3822 } 3823 } 3824 3825 /* Return true if X is an address which needs a temporary register when 3826 reloaded while generating PIC code. */ 3827 3828 int 3829 pic_address_needs_scratch (rtx x) 3830 { 3831 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ 3832 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS 3833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 3834 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 3835 && ! SMALL_INT (XEXP (XEXP (x, 0), 1))) 3836 return 1; 3837 3838 return 0; 3839 } 3840 3841 /* Determine if a given RTX is a valid constant. We already know this 3842 satisfies CONSTANT_P. */ 3843 3844 static bool 3845 sparc_legitimate_constant_p (machine_mode mode, rtx x) 3846 { 3847 switch (GET_CODE (x)) 3848 { 3849 case CONST: 3850 case SYMBOL_REF: 3851 if (sparc_tls_referenced_p (x)) 3852 return false; 3853 break; 3854 3855 case CONST_DOUBLE: 3856 if (GET_MODE (x) == VOIDmode) 3857 return true; 3858 3859 /* Floating point constants are generally not ok. 3860 The only exception is 0.0 and all-ones in VIS. */ 3861 if (TARGET_VIS 3862 && SCALAR_FLOAT_MODE_P (mode) 3863 && (const_zero_operand (x, mode) 3864 || const_all_ones_operand (x, mode))) 3865 return true; 3866 3867 return false; 3868 3869 case CONST_VECTOR: 3870 /* Vector constants are generally not ok. 3871 The only exception is 0 or -1 in VIS. */ 3872 if (TARGET_VIS 3873 && (const_zero_operand (x, mode) 3874 || const_all_ones_operand (x, mode))) 3875 return true; 3876 3877 return false; 3878 3879 default: 3880 break; 3881 } 3882 3883 return true; 3884 } 3885 3886 /* Determine if a given RTX is a valid constant address. */ 3887 3888 bool 3889 constant_address_p (rtx x) 3890 { 3891 switch (GET_CODE (x)) 3892 { 3893 case LABEL_REF: 3894 case CONST_INT: 3895 case HIGH: 3896 return true; 3897 3898 case CONST: 3899 if (flag_pic && pic_address_needs_scratch (x)) 3900 return false; 3901 return sparc_legitimate_constant_p (Pmode, x); 3902 3903 case SYMBOL_REF: 3904 return !flag_pic && sparc_legitimate_constant_p (Pmode, x); 3905 3906 default: 3907 return false; 3908 } 3909 } 3910 3911 /* Nonzero if the constant value X is a legitimate general operand 3912 when generating PIC code. It is given that flag_pic is on and 3913 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 3914 3915 bool 3916 legitimate_pic_operand_p (rtx x) 3917 { 3918 if (pic_address_needs_scratch (x)) 3919 return false; 3920 if (sparc_tls_referenced_p (x)) 3921 return false; 3922 return true; 3923 } 3924 3925 #define RTX_OK_FOR_OFFSET_P(X, MODE) \ 3926 (CONST_INT_P (X) \ 3927 && INTVAL (X) >= -0x1000 \ 3928 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE))) 3929 3930 #define RTX_OK_FOR_OLO10_P(X, MODE) \ 3931 (CONST_INT_P (X) \ 3932 && INTVAL (X) >= -0x1000 \ 3933 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE))) 3934 3935 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook. 3936 3937 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT 3938 ordinarily. This changes a bit when generating PIC. */ 3939 3940 static bool 3941 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 3942 { 3943 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; 3944 3945 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 3946 rs1 = addr; 3947 else if (GET_CODE (addr) == PLUS) 3948 { 3949 rs1 = XEXP (addr, 0); 3950 rs2 = XEXP (addr, 1); 3951 3952 /* Canonicalize. REG comes first, if there are no regs, 3953 LO_SUM comes first. */ 3954 if (!REG_P (rs1) 3955 && GET_CODE (rs1) != SUBREG 3956 && (REG_P (rs2) 3957 || GET_CODE (rs2) == SUBREG 3958 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) 3959 { 3960 rs1 = XEXP (addr, 1); 3961 rs2 = XEXP (addr, 0); 3962 } 3963 3964 if ((flag_pic == 1 3965 && rs1 == pic_offset_table_rtx 3966 && !REG_P (rs2) 3967 && GET_CODE (rs2) != SUBREG 3968 && GET_CODE (rs2) != LO_SUM 3969 && GET_CODE (rs2) != MEM 3970 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) 3971 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) 3972 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) 3973 || ((REG_P (rs1) 3974 || GET_CODE (rs1) == SUBREG) 3975 && RTX_OK_FOR_OFFSET_P (rs2, mode))) 3976 { 3977 imm1 = rs2; 3978 rs2 = NULL; 3979 } 3980 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) 3981 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) 3982 { 3983 /* We prohibit REG + REG for TFmode when there are no quad move insns 3984 and we consequently need to split. We do this because REG+REG 3985 is not an offsettable address. If we get the situation in reload 3986 where source and destination of a movtf pattern are both MEMs with 3987 REG+REG address, then only one of them gets converted to an 3988 offsettable address. */ 3989 if (mode == TFmode 3990 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) 3991 return 0; 3992 3993 /* Likewise for TImode, but in all cases. */ 3994 if (mode == TImode) 3995 return 0; 3996 3997 /* We prohibit REG + REG on ARCH32 if not optimizing for 3998 DFmode/DImode because then mem_min_alignment is likely to be zero 3999 after reload and the forced split would lack a matching splitter 4000 pattern. */ 4001 if (TARGET_ARCH32 && !optimize 4002 && (mode == DFmode || mode == DImode)) 4003 return 0; 4004 } 4005 else if (USE_AS_OFFSETABLE_LO10 4006 && GET_CODE (rs1) == LO_SUM 4007 && TARGET_ARCH64 4008 && ! TARGET_CM_MEDMID 4009 && RTX_OK_FOR_OLO10_P (rs2, mode)) 4010 { 4011 rs2 = NULL; 4012 imm1 = XEXP (rs1, 1); 4013 rs1 = XEXP (rs1, 0); 4014 if (!CONSTANT_P (imm1) 4015 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4016 return 0; 4017 } 4018 } 4019 else if (GET_CODE (addr) == LO_SUM) 4020 { 4021 rs1 = XEXP (addr, 0); 4022 imm1 = XEXP (addr, 1); 4023 4024 if (!CONSTANT_P (imm1) 4025 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4026 return 0; 4027 4028 /* We can't allow TFmode in 32-bit mode, because an offset greater 4029 than the alignment (8) may cause the LO_SUM to overflow. */ 4030 if (mode == TFmode && TARGET_ARCH32) 4031 return 0; 4032 } 4033 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) 4034 return 1; 4035 else 4036 return 0; 4037 4038 if (GET_CODE (rs1) == SUBREG) 4039 rs1 = SUBREG_REG (rs1); 4040 if (!REG_P (rs1)) 4041 return 0; 4042 4043 if (rs2) 4044 { 4045 if (GET_CODE (rs2) == SUBREG) 4046 rs2 = SUBREG_REG (rs2); 4047 if (!REG_P (rs2)) 4048 return 0; 4049 } 4050 4051 if (strict) 4052 { 4053 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) 4054 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) 4055 return 0; 4056 } 4057 else 4058 { 4059 if ((! SPARC_INT_REG_P (REGNO (rs1)) 4060 && REGNO (rs1) != FRAME_POINTER_REGNUM 4061 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) 4062 || (rs2 4063 && (! SPARC_INT_REG_P (REGNO (rs2)) 4064 && REGNO (rs2) != FRAME_POINTER_REGNUM 4065 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) 4066 return 0; 4067 } 4068 return 1; 4069 } 4070 4071 /* Return the SYMBOL_REF for the tls_get_addr function. */ 4072 4073 static GTY(()) rtx sparc_tls_symbol = NULL_RTX; 4074 4075 static rtx 4076 sparc_tls_get_addr (void) 4077 { 4078 if (!sparc_tls_symbol) 4079 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); 4080 4081 return sparc_tls_symbol; 4082 } 4083 4084 /* Return the Global Offset Table to be used in TLS mode. */ 4085 4086 static rtx 4087 sparc_tls_got (void) 4088 { 4089 /* In PIC mode, this is just the PIC offset table. */ 4090 if (flag_pic) 4091 { 4092 crtl->uses_pic_offset_table = 1; 4093 return pic_offset_table_rtx; 4094 } 4095 4096 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for 4097 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ 4098 if (TARGET_SUN_TLS && TARGET_ARCH32) 4099 { 4100 load_got_register (); 4101 return global_offset_table_rtx; 4102 } 4103 4104 /* In all other cases, we load a new pseudo with the GOT symbol. */ 4105 return copy_to_reg (sparc_got ()); 4106 } 4107 4108 /* Return true if X contains a thread-local symbol. */ 4109 4110 static bool 4111 sparc_tls_referenced_p (rtx x) 4112 { 4113 if (!TARGET_HAVE_TLS) 4114 return false; 4115 4116 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 4117 x = XEXP (XEXP (x, 0), 0); 4118 4119 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 4120 return true; 4121 4122 /* That's all we handle in sparc_legitimize_tls_address for now. */ 4123 return false; 4124 } 4125 4126 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4127 this (thread-local) address. */ 4128 4129 static rtx 4130 sparc_legitimize_tls_address (rtx addr) 4131 { 4132 rtx temp1, temp2, temp3, ret, o0, got; 4133 rtx_insn *insn; 4134 4135 gcc_assert (can_create_pseudo_p ()); 4136 4137 if (GET_CODE (addr) == SYMBOL_REF) 4138 switch (SYMBOL_REF_TLS_MODEL (addr)) 4139 { 4140 case TLS_MODEL_GLOBAL_DYNAMIC: 4141 start_sequence (); 4142 temp1 = gen_reg_rtx (SImode); 4143 temp2 = gen_reg_rtx (SImode); 4144 ret = gen_reg_rtx (Pmode); 4145 o0 = gen_rtx_REG (Pmode, 8); 4146 got = sparc_tls_got (); 4147 emit_insn (gen_tgd_hi22 (temp1, addr)); 4148 emit_insn (gen_tgd_lo10 (temp2, temp1, addr)); 4149 if (TARGET_ARCH32) 4150 { 4151 emit_insn (gen_tgd_add32 (o0, got, temp2, addr)); 4152 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (), 4153 addr, const1_rtx)); 4154 } 4155 else 4156 { 4157 emit_insn (gen_tgd_add64 (o0, got, temp2, addr)); 4158 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (), 4159 addr, const1_rtx)); 4160 } 4161 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4162 insn = get_insns (); 4163 end_sequence (); 4164 emit_libcall_block (insn, ret, o0, addr); 4165 break; 4166 4167 case TLS_MODEL_LOCAL_DYNAMIC: 4168 start_sequence (); 4169 temp1 = gen_reg_rtx (SImode); 4170 temp2 = gen_reg_rtx (SImode); 4171 temp3 = gen_reg_rtx (Pmode); 4172 ret = gen_reg_rtx (Pmode); 4173 o0 = gen_rtx_REG (Pmode, 8); 4174 got = sparc_tls_got (); 4175 emit_insn (gen_tldm_hi22 (temp1)); 4176 emit_insn (gen_tldm_lo10 (temp2, temp1)); 4177 if (TARGET_ARCH32) 4178 { 4179 emit_insn (gen_tldm_add32 (o0, got, temp2)); 4180 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (), 4181 const1_rtx)); 4182 } 4183 else 4184 { 4185 emit_insn (gen_tldm_add64 (o0, got, temp2)); 4186 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (), 4187 const1_rtx)); 4188 } 4189 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4190 insn = get_insns (); 4191 end_sequence (); 4192 emit_libcall_block (insn, temp3, o0, 4193 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 4194 UNSPEC_TLSLD_BASE)); 4195 temp1 = gen_reg_rtx (SImode); 4196 temp2 = gen_reg_rtx (SImode); 4197 emit_insn (gen_tldo_hix22 (temp1, addr)); 4198 emit_insn (gen_tldo_lox10 (temp2, temp1, addr)); 4199 if (TARGET_ARCH32) 4200 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr)); 4201 else 4202 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr)); 4203 break; 4204 4205 case TLS_MODEL_INITIAL_EXEC: 4206 temp1 = gen_reg_rtx (SImode); 4207 temp2 = gen_reg_rtx (SImode); 4208 temp3 = gen_reg_rtx (Pmode); 4209 got = sparc_tls_got (); 4210 emit_insn (gen_tie_hi22 (temp1, addr)); 4211 emit_insn (gen_tie_lo10 (temp2, temp1, addr)); 4212 if (TARGET_ARCH32) 4213 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); 4214 else 4215 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); 4216 if (TARGET_SUN_TLS) 4217 { 4218 ret = gen_reg_rtx (Pmode); 4219 if (TARGET_ARCH32) 4220 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7), 4221 temp3, addr)); 4222 else 4223 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7), 4224 temp3, addr)); 4225 } 4226 else 4227 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); 4228 break; 4229 4230 case TLS_MODEL_LOCAL_EXEC: 4231 temp1 = gen_reg_rtx (Pmode); 4232 temp2 = gen_reg_rtx (Pmode); 4233 if (TARGET_ARCH32) 4234 { 4235 emit_insn (gen_tle_hix22_sp32 (temp1, addr)); 4236 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr)); 4237 } 4238 else 4239 { 4240 emit_insn (gen_tle_hix22_sp64 (temp1, addr)); 4241 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr)); 4242 } 4243 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); 4244 break; 4245 4246 default: 4247 gcc_unreachable (); 4248 } 4249 4250 else if (GET_CODE (addr) == CONST) 4251 { 4252 rtx base, offset; 4253 4254 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); 4255 4256 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); 4257 offset = XEXP (XEXP (addr, 0), 1); 4258 4259 base = force_operand (base, NULL_RTX); 4260 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) 4261 offset = force_reg (Pmode, offset); 4262 ret = gen_rtx_PLUS (Pmode, base, offset); 4263 } 4264 4265 else 4266 gcc_unreachable (); /* for now ... */ 4267 4268 return ret; 4269 } 4270 4271 /* Legitimize PIC addresses. If the address is already position-independent, 4272 we return ORIG. Newly generated position-independent addresses go into a 4273 reg. This is REG if nonzero, otherwise we allocate register(s) as 4274 necessary. */ 4275 4276 static rtx 4277 sparc_legitimize_pic_address (rtx orig, rtx reg) 4278 { 4279 bool gotdata_op = false; 4280 4281 if (GET_CODE (orig) == SYMBOL_REF 4282 /* See the comment in sparc_expand_move. */ 4283 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) 4284 { 4285 rtx pic_ref, address; 4286 rtx_insn *insn; 4287 4288 if (reg == 0) 4289 { 4290 gcc_assert (can_create_pseudo_p ()); 4291 reg = gen_reg_rtx (Pmode); 4292 } 4293 4294 if (flag_pic == 2) 4295 { 4296 /* If not during reload, allocate another temp reg here for loading 4297 in the address, so that these instructions can be optimized 4298 properly. */ 4299 rtx temp_reg = (! can_create_pseudo_p () 4300 ? reg : gen_reg_rtx (Pmode)); 4301 4302 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse 4303 won't get confused into thinking that these two instructions 4304 are loading in the true address of the symbol. If in the 4305 future a PIC rtx exists, that should be used instead. */ 4306 if (TARGET_ARCH64) 4307 { 4308 emit_insn (gen_movdi_high_pic (temp_reg, orig)); 4309 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); 4310 } 4311 else 4312 { 4313 emit_insn (gen_movsi_high_pic (temp_reg, orig)); 4314 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); 4315 } 4316 address = temp_reg; 4317 gotdata_op = true; 4318 } 4319 else 4320 address = orig; 4321 4322 crtl->uses_pic_offset_table = 1; 4323 if (gotdata_op) 4324 { 4325 if (TARGET_ARCH64) 4326 insn = emit_insn (gen_movdi_pic_gotdata_op (reg, 4327 pic_offset_table_rtx, 4328 address, orig)); 4329 else 4330 insn = emit_insn (gen_movsi_pic_gotdata_op (reg, 4331 pic_offset_table_rtx, 4332 address, orig)); 4333 } 4334 else 4335 { 4336 pic_ref 4337 = gen_const_mem (Pmode, 4338 gen_rtx_PLUS (Pmode, 4339 pic_offset_table_rtx, address)); 4340 insn = emit_move_insn (reg, pic_ref); 4341 } 4342 4343 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4344 by loop. */ 4345 set_unique_reg_note (insn, REG_EQUAL, orig); 4346 return reg; 4347 } 4348 else if (GET_CODE (orig) == CONST) 4349 { 4350 rtx base, offset; 4351 4352 if (GET_CODE (XEXP (orig, 0)) == PLUS 4353 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) 4354 return orig; 4355 4356 if (reg == 0) 4357 { 4358 gcc_assert (can_create_pseudo_p ()); 4359 reg = gen_reg_rtx (Pmode); 4360 } 4361 4362 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 4363 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); 4364 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 4365 base == reg ? NULL_RTX : reg); 4366 4367 if (GET_CODE (offset) == CONST_INT) 4368 { 4369 if (SMALL_INT (offset)) 4370 return plus_constant (Pmode, base, INTVAL (offset)); 4371 else if (can_create_pseudo_p ()) 4372 offset = force_reg (Pmode, offset); 4373 else 4374 /* If we reach here, then something is seriously wrong. */ 4375 gcc_unreachable (); 4376 } 4377 return gen_rtx_PLUS (Pmode, base, offset); 4378 } 4379 else if (GET_CODE (orig) == LABEL_REF) 4380 /* ??? We ought to be checking that the register is live instead, in case 4381 it is eliminated. */ 4382 crtl->uses_pic_offset_table = 1; 4383 4384 return orig; 4385 } 4386 4387 /* Try machine-dependent ways of modifying an illegitimate address X 4388 to be legitimate. If we find one, return the new, valid address. 4389 4390 OLDX is the address as it was before break_out_memory_refs was called. 4391 In some cases it is useful to look at this to decide what needs to be done. 4392 4393 MODE is the mode of the operand pointed to by X. 4394 4395 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ 4396 4397 static rtx 4398 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 4399 machine_mode mode) 4400 { 4401 rtx orig_x = x; 4402 4403 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) 4404 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 4405 force_operand (XEXP (x, 0), NULL_RTX)); 4406 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) 4407 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4408 force_operand (XEXP (x, 1), NULL_RTX)); 4409 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) 4410 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), 4411 XEXP (x, 1)); 4412 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) 4413 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4414 force_operand (XEXP (x, 1), NULL_RTX)); 4415 4416 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) 4417 return x; 4418 4419 if (sparc_tls_referenced_p (x)) 4420 x = sparc_legitimize_tls_address (x); 4421 else if (flag_pic) 4422 x = sparc_legitimize_pic_address (x, NULL_RTX); 4423 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) 4424 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 4425 copy_to_mode_reg (Pmode, XEXP (x, 1))); 4426 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) 4427 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 4428 copy_to_mode_reg (Pmode, XEXP (x, 0))); 4429 else if (GET_CODE (x) == SYMBOL_REF 4430 || GET_CODE (x) == CONST 4431 || GET_CODE (x) == LABEL_REF) 4432 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); 4433 4434 return x; 4435 } 4436 4437 /* Delegitimize an address that was legitimized by the above function. */ 4438 4439 static rtx 4440 sparc_delegitimize_address (rtx x) 4441 { 4442 x = delegitimize_mem_from_attrs (x); 4443 4444 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC) 4445 switch (XINT (XEXP (x, 1), 1)) 4446 { 4447 case UNSPEC_MOVE_PIC: 4448 case UNSPEC_TLSLE: 4449 x = XVECEXP (XEXP (x, 1), 0, 0); 4450 gcc_assert (GET_CODE (x) == SYMBOL_REF); 4451 break; 4452 default: 4453 break; 4454 } 4455 4456 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */ 4457 if (GET_CODE (x) == MINUS 4458 && REG_P (XEXP (x, 0)) 4459 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM 4460 && GET_CODE (XEXP (x, 1)) == LO_SUM 4461 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC 4462 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL) 4463 { 4464 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0); 4465 gcc_assert (GET_CODE (x) == LABEL_REF); 4466 } 4467 4468 return x; 4469 } 4470 4471 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to 4472 replace the input X, or the original X if no replacement is called for. 4473 The output parameter *WIN is 1 if the calling macro should goto WIN, 4474 0 if it should not. 4475 4476 For SPARC, we wish to handle addresses by splitting them into 4477 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 4478 This cuts the number of extra insns by one. 4479 4480 Do nothing when generating PIC code and the address is a symbolic 4481 operand or requires a scratch register. */ 4482 4483 rtx 4484 sparc_legitimize_reload_address (rtx x, machine_mode mode, 4485 int opnum, int type, 4486 int ind_levels ATTRIBUTE_UNUSED, int *win) 4487 { 4488 /* Decompose SImode constants into HIGH+LO_SUM. */ 4489 if (CONSTANT_P (x) 4490 && (mode != TFmode || TARGET_ARCH64) 4491 && GET_MODE (x) == SImode 4492 && GET_CODE (x) != LO_SUM 4493 && GET_CODE (x) != HIGH 4494 && sparc_cmodel <= CM_MEDLOW 4495 && !(flag_pic 4496 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) 4497 { 4498 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); 4499 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4500 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4501 opnum, (enum reload_type)type); 4502 *win = 1; 4503 return x; 4504 } 4505 4506 /* We have to recognize what we have already generated above. */ 4507 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) 4508 { 4509 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 4510 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 4511 opnum, (enum reload_type)type); 4512 *win = 1; 4513 return x; 4514 } 4515 4516 *win = 0; 4517 return x; 4518 } 4519 4520 /* Return true if ADDR (a legitimate address expression) 4521 has an effect that depends on the machine mode it is used for. 4522 4523 In PIC mode, 4524 4525 (mem:HI [%l7+a]) 4526 4527 is not equivalent to 4528 4529 (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) 4530 4531 because [%l7+a+1] is interpreted as the address of (a+1). */ 4532 4533 4534 static bool 4535 sparc_mode_dependent_address_p (const_rtx addr, 4536 addr_space_t as ATTRIBUTE_UNUSED) 4537 { 4538 if (flag_pic && GET_CODE (addr) == PLUS) 4539 { 4540 rtx op0 = XEXP (addr, 0); 4541 rtx op1 = XEXP (addr, 1); 4542 if (op0 == pic_offset_table_rtx 4543 && symbolic_operand (op1, VOIDmode)) 4544 return true; 4545 } 4546 4547 return false; 4548 } 4549 4550 #ifdef HAVE_GAS_HIDDEN 4551 # define USE_HIDDEN_LINKONCE 1 4552 #else 4553 # define USE_HIDDEN_LINKONCE 0 4554 #endif 4555 4556 static void 4557 get_pc_thunk_name (char name[32], unsigned int regno) 4558 { 4559 const char *reg_name = reg_names[regno]; 4560 4561 /* Skip the leading '%' as that cannot be used in a 4562 symbol name. */ 4563 reg_name += 1; 4564 4565 if (USE_HIDDEN_LINKONCE) 4566 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name); 4567 else 4568 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno); 4569 } 4570 4571 /* Wrapper around the load_pcrel_sym{si,di} patterns. */ 4572 4573 static rtx 4574 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3) 4575 { 4576 int orig_flag_pic = flag_pic; 4577 rtx insn; 4578 4579 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ 4580 flag_pic = 0; 4581 if (TARGET_ARCH64) 4582 insn = gen_load_pcrel_symdi (op0, op1, op2, op3); 4583 else 4584 insn = gen_load_pcrel_symsi (op0, op1, op2, op3); 4585 flag_pic = orig_flag_pic; 4586 4587 return insn; 4588 } 4589 4590 /* Emit code to load the GOT register. */ 4591 4592 void 4593 load_got_register (void) 4594 { 4595 /* In PIC mode, this will retrieve pic_offset_table_rtx. */ 4596 if (!global_offset_table_rtx) 4597 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); 4598 4599 if (TARGET_VXWORKS_RTP) 4600 emit_insn (gen_vxworks_load_got ()); 4601 else 4602 { 4603 /* The GOT symbol is subject to a PC-relative relocation so we need a 4604 helper function to add the PC value and thus get the final value. */ 4605 if (!got_helper_rtx) 4606 { 4607 char name[32]; 4608 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM); 4609 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4610 } 4611 4612 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (), 4613 got_helper_rtx, 4614 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM))); 4615 } 4616 4617 /* Need to emit this whether or not we obey regdecls, 4618 since setjmp/longjmp can cause life info to screw up. 4619 ??? In the case where we don't obey regdecls, this is not sufficient 4620 since we may not fall out the bottom. */ 4621 emit_use (global_offset_table_rtx); 4622 } 4623 4624 /* Emit a call instruction with the pattern given by PAT. ADDR is the 4625 address of the call target. */ 4626 4627 void 4628 sparc_emit_call_insn (rtx pat, rtx addr) 4629 { 4630 rtx_insn *insn; 4631 4632 insn = emit_call_insn (pat); 4633 4634 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ 4635 if (TARGET_VXWORKS_RTP 4636 && flag_pic 4637 && GET_CODE (addr) == SYMBOL_REF 4638 && (SYMBOL_REF_DECL (addr) 4639 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 4640 : !SYMBOL_REF_LOCAL_P (addr))) 4641 { 4642 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 4643 crtl->uses_pic_offset_table = 1; 4644 } 4645 } 4646 4647 /* Return 1 if RTX is a MEM which is known to be aligned to at 4648 least a DESIRED byte boundary. */ 4649 4650 int 4651 mem_min_alignment (rtx mem, int desired) 4652 { 4653 rtx addr, base, offset; 4654 4655 /* If it's not a MEM we can't accept it. */ 4656 if (GET_CODE (mem) != MEM) 4657 return 0; 4658 4659 /* Obviously... */ 4660 if (!TARGET_UNALIGNED_DOUBLES 4661 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) 4662 return 1; 4663 4664 /* ??? The rest of the function predates MEM_ALIGN so 4665 there is probably a bit of redundancy. */ 4666 addr = XEXP (mem, 0); 4667 base = offset = NULL_RTX; 4668 if (GET_CODE (addr) == PLUS) 4669 { 4670 if (GET_CODE (XEXP (addr, 0)) == REG) 4671 { 4672 base = XEXP (addr, 0); 4673 4674 /* What we are saying here is that if the base 4675 REG is aligned properly, the compiler will make 4676 sure any REG based index upon it will be so 4677 as well. */ 4678 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) 4679 offset = XEXP (addr, 1); 4680 else 4681 offset = const0_rtx; 4682 } 4683 } 4684 else if (GET_CODE (addr) == REG) 4685 { 4686 base = addr; 4687 offset = const0_rtx; 4688 } 4689 4690 if (base != NULL_RTX) 4691 { 4692 int regno = REGNO (base); 4693 4694 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) 4695 { 4696 /* Check if the compiler has recorded some information 4697 about the alignment of the base REG. If reload has 4698 completed, we already matched with proper alignments. 4699 If not running global_alloc, reload might give us 4700 unaligned pointer to local stack though. */ 4701 if (((cfun != 0 4702 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) 4703 || (optimize && reload_completed)) 4704 && (INTVAL (offset) & (desired - 1)) == 0) 4705 return 1; 4706 } 4707 else 4708 { 4709 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) 4710 return 1; 4711 } 4712 } 4713 else if (! TARGET_UNALIGNED_DOUBLES 4714 || CONSTANT_P (addr) 4715 || GET_CODE (addr) == LO_SUM) 4716 { 4717 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES 4718 is true, in which case we can only assume that an access is aligned if 4719 it is to a constant address, or the address involves a LO_SUM. */ 4720 return 1; 4721 } 4722 4723 /* An obviously unaligned address. */ 4724 return 0; 4725 } 4726 4727 4728 /* Vectors to keep interesting information about registers where it can easily 4729 be got. We used to use the actual mode value as the bit number, but there 4730 are more than 32 modes now. Instead we use two tables: one indexed by 4731 hard register number, and one indexed by mode. */ 4732 4733 /* The purpose of sparc_mode_class is to shrink the range of modes so that 4734 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 4735 mapped into one sparc_mode_class mode. */ 4736 4737 enum sparc_mode_class { 4738 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, 4739 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 4740 CC_MODE, CCFP_MODE 4741 }; 4742 4743 /* Modes for single-word and smaller quantities. */ 4744 #define S_MODES \ 4745 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) 4746 4747 /* Modes for double-word and smaller quantities. */ 4748 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) 4749 4750 /* Modes for quad-word and smaller quantities. */ 4751 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 4752 4753 /* Modes for 8-word and smaller quantities. */ 4754 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) 4755 4756 /* Modes for single-float quantities. */ 4757 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 4758 4759 /* Modes for double-float and smaller quantities. */ 4760 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE)) 4761 4762 /* Modes for quad-float and smaller quantities. */ 4763 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) 4764 4765 /* Modes for quad-float pairs and smaller quantities. */ 4766 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) 4767 4768 /* Modes for double-float only quantities. */ 4769 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) 4770 4771 /* Modes for quad-float and double-float only quantities. */ 4772 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) 4773 4774 /* Modes for quad-float pairs and double-float only quantities. */ 4775 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) 4776 4777 /* Modes for condition codes. */ 4778 #define CC_MODES (1 << (int) CC_MODE) 4779 #define CCFP_MODES (1 << (int) CCFP_MODE) 4780 4781 /* Value is 1 if register/mode pair is acceptable on sparc. 4782 4783 The funny mixture of D and T modes is because integer operations 4784 do not specially operate on tetra quantities, so non-quad-aligned 4785 registers can hold quadword quantities (except %o4 and %i4 because 4786 they cross fixed registers). 4787 4788 ??? Note that, despite the settings, non-double-aligned parameter 4789 registers can hold double-word quantities in 32-bit mode. */ 4790 4791 /* This points to either the 32 bit or the 64 bit version. */ 4792 const int *hard_regno_mode_classes; 4793 4794 static const int hard_32bit_mode_classes[] = { 4795 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 4796 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 4797 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 4798 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 4799 4800 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4801 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4802 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4803 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 4804 4805 /* FP regs f32 to f63. Only the even numbered registers actually exist, 4806 and none can hold SFmode/SImode values. */ 4807 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4808 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4809 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4810 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4811 4812 /* %fcc[0123] */ 4813 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 4814 4815 /* %icc, %sfp, %gsr */ 4816 CC_MODES, 0, D_MODES 4817 }; 4818 4819 static const int hard_64bit_mode_classes[] = { 4820 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4821 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4822 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4823 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 4824 4825 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4826 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4827 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 4828 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 4829 4830 /* FP regs f32 to f63. Only the even numbered registers actually exist, 4831 and none can hold SFmode/SImode values. */ 4832 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4833 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4834 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4835 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 4836 4837 /* %fcc[0123] */ 4838 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 4839 4840 /* %icc, %sfp, %gsr */ 4841 CC_MODES, 0, D_MODES 4842 }; 4843 4844 int sparc_mode_class [NUM_MACHINE_MODES]; 4845 4846 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 4847 4848 static void 4849 sparc_init_modes (void) 4850 { 4851 int i; 4852 4853 for (i = 0; i < NUM_MACHINE_MODES; i++) 4854 { 4855 machine_mode m = (machine_mode) i; 4856 unsigned int size = GET_MODE_SIZE (m); 4857 4858 switch (GET_MODE_CLASS (m)) 4859 { 4860 case MODE_INT: 4861 case MODE_PARTIAL_INT: 4862 case MODE_COMPLEX_INT: 4863 if (size < 4) 4864 sparc_mode_class[i] = 1 << (int) H_MODE; 4865 else if (size == 4) 4866 sparc_mode_class[i] = 1 << (int) S_MODE; 4867 else if (size == 8) 4868 sparc_mode_class[i] = 1 << (int) D_MODE; 4869 else if (size == 16) 4870 sparc_mode_class[i] = 1 << (int) T_MODE; 4871 else if (size == 32) 4872 sparc_mode_class[i] = 1 << (int) O_MODE; 4873 else 4874 sparc_mode_class[i] = 0; 4875 break; 4876 case MODE_VECTOR_INT: 4877 if (size == 4) 4878 sparc_mode_class[i] = 1 << (int) SF_MODE; 4879 else if (size == 8) 4880 sparc_mode_class[i] = 1 << (int) DF_MODE; 4881 else 4882 sparc_mode_class[i] = 0; 4883 break; 4884 case MODE_FLOAT: 4885 case MODE_COMPLEX_FLOAT: 4886 if (size == 4) 4887 sparc_mode_class[i] = 1 << (int) SF_MODE; 4888 else if (size == 8) 4889 sparc_mode_class[i] = 1 << (int) DF_MODE; 4890 else if (size == 16) 4891 sparc_mode_class[i] = 1 << (int) TF_MODE; 4892 else if (size == 32) 4893 sparc_mode_class[i] = 1 << (int) OF_MODE; 4894 else 4895 sparc_mode_class[i] = 0; 4896 break; 4897 case MODE_CC: 4898 if (m == CCFPmode || m == CCFPEmode) 4899 sparc_mode_class[i] = 1 << (int) CCFP_MODE; 4900 else 4901 sparc_mode_class[i] = 1 << (int) CC_MODE; 4902 break; 4903 default: 4904 sparc_mode_class[i] = 0; 4905 break; 4906 } 4907 } 4908 4909 if (TARGET_ARCH64) 4910 hard_regno_mode_classes = hard_64bit_mode_classes; 4911 else 4912 hard_regno_mode_classes = hard_32bit_mode_classes; 4913 4914 /* Initialize the array used by REGNO_REG_CLASS. */ 4915 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 4916 { 4917 if (i < 16 && TARGET_V8PLUS) 4918 sparc_regno_reg_class[i] = I64_REGS; 4919 else if (i < 32 || i == FRAME_POINTER_REGNUM) 4920 sparc_regno_reg_class[i] = GENERAL_REGS; 4921 else if (i < 64) 4922 sparc_regno_reg_class[i] = FP_REGS; 4923 else if (i < 96) 4924 sparc_regno_reg_class[i] = EXTRA_FP_REGS; 4925 else if (i < 100) 4926 sparc_regno_reg_class[i] = FPCC_REGS; 4927 else 4928 sparc_regno_reg_class[i] = NO_REGS; 4929 } 4930 } 4931 4932 /* Return whether REGNO, a global or FP register, must be saved/restored. */ 4933 4934 static inline bool 4935 save_global_or_fp_reg_p (unsigned int regno, 4936 int leaf_function ATTRIBUTE_UNUSED) 4937 { 4938 return !call_used_regs[regno] && df_regs_ever_live_p (regno); 4939 } 4940 4941 /* Return whether the return address register (%i7) is needed. */ 4942 4943 static inline bool 4944 return_addr_reg_needed_p (int leaf_function) 4945 { 4946 /* If it is live, for example because of __builtin_return_address (0). */ 4947 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 4948 return true; 4949 4950 /* Otherwise, it is needed as save register if %o7 is clobbered. */ 4951 if (!leaf_function 4952 /* Loading the GOT register clobbers %o7. */ 4953 || crtl->uses_pic_offset_table 4954 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM)) 4955 return true; 4956 4957 return false; 4958 } 4959 4960 /* Return whether REGNO, a local or in register, must be saved/restored. */ 4961 4962 static bool 4963 save_local_or_in_reg_p (unsigned int regno, int leaf_function) 4964 { 4965 /* General case: call-saved registers live at some point. */ 4966 if (!call_used_regs[regno] && df_regs_ever_live_p (regno)) 4967 return true; 4968 4969 /* Frame pointer register (%fp) if needed. */ 4970 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 4971 return true; 4972 4973 /* Return address register (%i7) if needed. */ 4974 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function)) 4975 return true; 4976 4977 /* GOT register (%l7) if needed. */ 4978 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table) 4979 return true; 4980 4981 /* If the function accesses prior frames, the frame pointer and the return 4982 address of the previous frame must be saved on the stack. */ 4983 if (crtl->accesses_prior_frames 4984 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM)) 4985 return true; 4986 4987 return false; 4988 } 4989 4990 /* Compute the frame size required by the function. This function is called 4991 during the reload pass and also by sparc_expand_prologue. */ 4992 4993 HOST_WIDE_INT 4994 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function) 4995 { 4996 HOST_WIDE_INT frame_size, apparent_frame_size; 4997 int args_size, n_global_fp_regs = 0; 4998 bool save_local_in_regs_p = false; 4999 unsigned int i; 5000 5001 /* If the function allocates dynamic stack space, the dynamic offset is 5002 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */ 5003 if (leaf_function && !cfun->calls_alloca) 5004 args_size = 0; 5005 else 5006 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl); 5007 5008 /* Calculate space needed for global registers. */ 5009 if (TARGET_ARCH64) 5010 { 5011 for (i = 0; i < 8; i++) 5012 if (save_global_or_fp_reg_p (i, 0)) 5013 n_global_fp_regs += 2; 5014 } 5015 else 5016 { 5017 for (i = 0; i < 8; i += 2) 5018 if (save_global_or_fp_reg_p (i, 0) 5019 || save_global_or_fp_reg_p (i + 1, 0)) 5020 n_global_fp_regs += 2; 5021 } 5022 5023 /* In the flat window model, find out which local and in registers need to 5024 be saved. We don't reserve space in the current frame for them as they 5025 will be spilled into the register window save area of the caller's frame. 5026 However, as soon as we use this register window save area, we must create 5027 that of the current frame to make it the live one. */ 5028 if (TARGET_FLAT) 5029 for (i = 16; i < 32; i++) 5030 if (save_local_or_in_reg_p (i, leaf_function)) 5031 { 5032 save_local_in_regs_p = true; 5033 break; 5034 } 5035 5036 /* Calculate space needed for FP registers. */ 5037 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) 5038 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 5039 n_global_fp_regs += 2; 5040 5041 if (size == 0 5042 && n_global_fp_regs == 0 5043 && args_size == 0 5044 && !save_local_in_regs_p) 5045 frame_size = apparent_frame_size = 0; 5046 else 5047 { 5048 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */ 5049 apparent_frame_size = (size - STARTING_FRAME_OFFSET + 7) & -8; 5050 apparent_frame_size += n_global_fp_regs * 4; 5051 5052 /* We need to add the size of the outgoing argument area. */ 5053 frame_size = apparent_frame_size + ((args_size + 7) & -8); 5054 5055 /* And that of the register window save area. */ 5056 frame_size += FIRST_PARM_OFFSET (cfun->decl); 5057 5058 /* Finally, bump to the appropriate alignment. */ 5059 frame_size = SPARC_STACK_ALIGN (frame_size); 5060 } 5061 5062 /* Set up values for use in prologue and epilogue. */ 5063 sparc_frame_size = frame_size; 5064 sparc_apparent_frame_size = apparent_frame_size; 5065 sparc_n_global_fp_regs = n_global_fp_regs; 5066 sparc_save_local_in_regs_p = save_local_in_regs_p; 5067 5068 return frame_size; 5069 } 5070 5071 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ 5072 5073 int 5074 sparc_initial_elimination_offset (int to) 5075 { 5076 int offset; 5077 5078 if (to == STACK_POINTER_REGNUM) 5079 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf); 5080 else 5081 offset = 0; 5082 5083 offset += SPARC_STACK_BIAS; 5084 return offset; 5085 } 5086 5087 /* Output any necessary .register pseudo-ops. */ 5088 5089 void 5090 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) 5091 { 5092 #ifdef HAVE_AS_REGISTER_PSEUDO_OP 5093 int i; 5094 5095 if (TARGET_ARCH32) 5096 return; 5097 5098 /* Check if %g[2367] were used without 5099 .register being printed for them already. */ 5100 for (i = 2; i < 8; i++) 5101 { 5102 if (df_regs_ever_live_p (i) 5103 && ! sparc_hard_reg_printed [i]) 5104 { 5105 sparc_hard_reg_printed [i] = 1; 5106 /* %g7 is used as TLS base register, use #ignore 5107 for it instead of #scratch. */ 5108 fprintf (file, "\t.register\t%%g%d, #%s\n", i, 5109 i == 7 ? "ignore" : "scratch"); 5110 } 5111 if (i == 3) i = 5; 5112 } 5113 #endif 5114 } 5115 5116 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 5117 5118 #if PROBE_INTERVAL > 4096 5119 #error Cannot use indexed addressing mode for stack probing 5120 #endif 5121 5122 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 5123 inclusive. These are offsets from the current stack pointer. 5124 5125 Note that we don't use the REG+REG addressing mode for the probes because 5126 of the stack bias in 64-bit mode. And it doesn't really buy us anything 5127 so the advantages of having a single code win here. */ 5128 5129 static void 5130 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) 5131 { 5132 rtx g1 = gen_rtx_REG (Pmode, 1); 5133 5134 /* See if we have a constant small number of probes to generate. If so, 5135 that's the easy case. */ 5136 if (size <= PROBE_INTERVAL) 5137 { 5138 emit_move_insn (g1, GEN_INT (first)); 5139 emit_insn (gen_rtx_SET (VOIDmode, g1, 5140 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5141 emit_stack_probe (plus_constant (Pmode, g1, -size)); 5142 } 5143 5144 /* The run-time loop is made up of 10 insns in the generic case while the 5145 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ 5146 else if (size <= 5 * PROBE_INTERVAL) 5147 { 5148 HOST_WIDE_INT i; 5149 5150 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); 5151 emit_insn (gen_rtx_SET (VOIDmode, g1, 5152 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5153 emit_stack_probe (g1); 5154 5155 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 5156 it exceeds SIZE. If only two probes are needed, this will not 5157 generate any code. Then probe at FIRST + SIZE. */ 5158 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 5159 { 5160 emit_insn (gen_rtx_SET (VOIDmode, g1, 5161 plus_constant (Pmode, g1, -PROBE_INTERVAL))); 5162 emit_stack_probe (g1); 5163 } 5164 5165 emit_stack_probe (plus_constant (Pmode, g1, 5166 (i - PROBE_INTERVAL) - size)); 5167 } 5168 5169 /* Otherwise, do the same as above, but in a loop. Note that we must be 5170 extra careful with variables wrapping around because we might be at 5171 the very top (or the very bottom) of the address space and we have 5172 to be able to handle this case properly; in particular, we use an 5173 equality test for the loop condition. */ 5174 else 5175 { 5176 HOST_WIDE_INT rounded_size; 5177 rtx g4 = gen_rtx_REG (Pmode, 4); 5178 5179 emit_move_insn (g1, GEN_INT (first)); 5180 5181 5182 /* Step 1: round SIZE to the previous multiple of the interval. */ 5183 5184 rounded_size = size & -PROBE_INTERVAL; 5185 emit_move_insn (g4, GEN_INT (rounded_size)); 5186 5187 5188 /* Step 2: compute initial and final value of the loop counter. */ 5189 5190 /* TEST_ADDR = SP + FIRST. */ 5191 emit_insn (gen_rtx_SET (VOIDmode, g1, 5192 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5193 5194 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 5195 emit_insn (gen_rtx_SET (VOIDmode, g4, gen_rtx_MINUS (Pmode, g1, g4))); 5196 5197 5198 /* Step 3: the loop 5199 5200 while (TEST_ADDR != LAST_ADDR) 5201 { 5202 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 5203 probe at TEST_ADDR 5204 } 5205 5206 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 5207 until it is equal to ROUNDED_SIZE. */ 5208 5209 if (TARGET_ARCH64) 5210 emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); 5211 else 5212 emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); 5213 5214 5215 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 5216 that SIZE is equal to ROUNDED_SIZE. */ 5217 5218 if (size != rounded_size) 5219 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size)); 5220 } 5221 5222 /* Make sure nothing is scheduled before we are done. */ 5223 emit_insn (gen_blockage ()); 5224 } 5225 5226 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 5227 absolute addresses. */ 5228 5229 const char * 5230 output_probe_stack_range (rtx reg1, rtx reg2) 5231 { 5232 static int labelno = 0; 5233 char loop_lab[32], end_lab[32]; 5234 rtx xops[2]; 5235 5236 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno); 5237 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++); 5238 5239 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 5240 5241 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */ 5242 xops[0] = reg1; 5243 xops[1] = reg2; 5244 output_asm_insn ("cmp\t%0, %1", xops); 5245 if (TARGET_ARCH64) 5246 fputs ("\tbe,pn\t%xcc,", asm_out_file); 5247 else 5248 fputs ("\tbe\t", asm_out_file); 5249 assemble_name_raw (asm_out_file, end_lab); 5250 fputc ('\n', asm_out_file); 5251 5252 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 5253 xops[1] = GEN_INT (-PROBE_INTERVAL); 5254 output_asm_insn (" add\t%0, %1, %0", xops); 5255 5256 /* Probe at TEST_ADDR and branch. */ 5257 if (TARGET_ARCH64) 5258 fputs ("\tba,pt\t%xcc,", asm_out_file); 5259 else 5260 fputs ("\tba\t", asm_out_file); 5261 assemble_name_raw (asm_out_file, loop_lab); 5262 fputc ('\n', asm_out_file); 5263 xops[1] = GEN_INT (SPARC_STACK_BIAS); 5264 output_asm_insn (" st\t%%g0, [%0+%1]", xops); 5265 5266 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab); 5267 5268 return ""; 5269 } 5270 5271 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as 5272 needed. LOW is supposed to be double-word aligned for 32-bit registers. 5273 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE 5274 is the action to be performed if SAVE_P returns true and ACTION_FALSE 5275 the action to be performed if it returns false. Return the new offset. */ 5276 5277 typedef bool (*sorr_pred_t) (unsigned int, int); 5278 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t; 5279 5280 static int 5281 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base, 5282 int offset, int leaf_function, sorr_pred_t save_p, 5283 sorr_act_t action_true, sorr_act_t action_false) 5284 { 5285 unsigned int i; 5286 rtx mem; 5287 rtx_insn *insn; 5288 5289 if (TARGET_ARCH64 && high <= 32) 5290 { 5291 int fp_offset = -1; 5292 5293 for (i = low; i < high; i++) 5294 { 5295 if (save_p (i, leaf_function)) 5296 { 5297 mem = gen_frame_mem (DImode, plus_constant (Pmode, 5298 base, offset)); 5299 if (action_true == SORR_SAVE) 5300 { 5301 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); 5302 RTX_FRAME_RELATED_P (insn) = 1; 5303 } 5304 else /* action_true == SORR_RESTORE */ 5305 { 5306 /* The frame pointer must be restored last since its old 5307 value may be used as base address for the frame. This 5308 is problematic in 64-bit mode only because of the lack 5309 of double-word load instruction. */ 5310 if (i == HARD_FRAME_POINTER_REGNUM) 5311 fp_offset = offset; 5312 else 5313 emit_move_insn (gen_rtx_REG (DImode, i), mem); 5314 } 5315 offset += 8; 5316 } 5317 else if (action_false == SORR_ADVANCE) 5318 offset += 8; 5319 } 5320 5321 if (fp_offset >= 0) 5322 { 5323 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset)); 5324 emit_move_insn (hard_frame_pointer_rtx, mem); 5325 } 5326 } 5327 else 5328 { 5329 for (i = low; i < high; i += 2) 5330 { 5331 bool reg0 = save_p (i, leaf_function); 5332 bool reg1 = save_p (i + 1, leaf_function); 5333 machine_mode mode; 5334 int regno; 5335 5336 if (reg0 && reg1) 5337 { 5338 mode = SPARC_INT_REG_P (i) ? DImode : DFmode; 5339 regno = i; 5340 } 5341 else if (reg0) 5342 { 5343 mode = SPARC_INT_REG_P (i) ? SImode : SFmode; 5344 regno = i; 5345 } 5346 else if (reg1) 5347 { 5348 mode = SPARC_INT_REG_P (i) ? SImode : SFmode; 5349 regno = i + 1; 5350 offset += 4; 5351 } 5352 else 5353 { 5354 if (action_false == SORR_ADVANCE) 5355 offset += 8; 5356 continue; 5357 } 5358 5359 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset)); 5360 if (action_true == SORR_SAVE) 5361 { 5362 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); 5363 RTX_FRAME_RELATED_P (insn) = 1; 5364 if (mode == DImode) 5365 { 5366 rtx set1, set2; 5367 mem = gen_frame_mem (SImode, plus_constant (Pmode, base, 5368 offset)); 5369 set1 = gen_rtx_SET (VOIDmode, mem, 5370 gen_rtx_REG (SImode, regno)); 5371 RTX_FRAME_RELATED_P (set1) = 1; 5372 mem 5373 = gen_frame_mem (SImode, plus_constant (Pmode, base, 5374 offset + 4)); 5375 set2 = gen_rtx_SET (VOIDmode, mem, 5376 gen_rtx_REG (SImode, regno + 1)); 5377 RTX_FRAME_RELATED_P (set2) = 1; 5378 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5379 gen_rtx_PARALLEL (VOIDmode, 5380 gen_rtvec (2, set1, set2))); 5381 } 5382 } 5383 else /* action_true == SORR_RESTORE */ 5384 emit_move_insn (gen_rtx_REG (mode, regno), mem); 5385 5386 /* Always preserve double-word alignment. */ 5387 offset = (offset + 8) & -8; 5388 } 5389 } 5390 5391 return offset; 5392 } 5393 5394 /* Emit code to adjust BASE to OFFSET. Return the new base. */ 5395 5396 static rtx 5397 emit_adjust_base_to_offset (rtx base, int offset) 5398 { 5399 /* ??? This might be optimized a little as %g1 might already have a 5400 value close enough that a single add insn will do. */ 5401 /* ??? Although, all of this is probably only a temporary fix because 5402 if %g1 can hold a function result, then sparc_expand_epilogue will 5403 lose (the result will be clobbered). */ 5404 rtx new_base = gen_rtx_REG (Pmode, 1); 5405 emit_move_insn (new_base, GEN_INT (offset)); 5406 emit_insn (gen_rtx_SET (VOIDmode, 5407 new_base, gen_rtx_PLUS (Pmode, base, new_base))); 5408 return new_base; 5409 } 5410 5411 /* Emit code to save/restore call-saved global and FP registers. */ 5412 5413 static void 5414 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action) 5415 { 5416 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095) 5417 { 5418 base = emit_adjust_base_to_offset (base, offset); 5419 offset = 0; 5420 } 5421 5422 offset 5423 = emit_save_or_restore_regs (0, 8, base, offset, 0, 5424 save_global_or_fp_reg_p, action, SORR_NONE); 5425 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0, 5426 save_global_or_fp_reg_p, action, SORR_NONE); 5427 } 5428 5429 /* Emit code to save/restore call-saved local and in registers. */ 5430 5431 static void 5432 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action) 5433 { 5434 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095) 5435 { 5436 base = emit_adjust_base_to_offset (base, offset); 5437 offset = 0; 5438 } 5439 5440 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p, 5441 save_local_or_in_reg_p, action, SORR_ADVANCE); 5442 } 5443 5444 /* Emit a window_save insn. */ 5445 5446 static rtx_insn * 5447 emit_window_save (rtx increment) 5448 { 5449 rtx_insn *insn = emit_insn (gen_window_save (increment)); 5450 RTX_FRAME_RELATED_P (insn) = 1; 5451 5452 /* The incoming return address (%o7) is saved in %i7. */ 5453 add_reg_note (insn, REG_CFA_REGISTER, 5454 gen_rtx_SET (VOIDmode, 5455 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), 5456 gen_rtx_REG (Pmode, 5457 INCOMING_RETURN_ADDR_REGNUM))); 5458 5459 /* The window save event. */ 5460 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx); 5461 5462 /* The CFA is %fp, the hard frame pointer. */ 5463 add_reg_note (insn, REG_CFA_DEF_CFA, 5464 plus_constant (Pmode, hard_frame_pointer_rtx, 5465 INCOMING_FRAME_SP_OFFSET)); 5466 5467 return insn; 5468 } 5469 5470 /* Generate an increment for the stack pointer. */ 5471 5472 static rtx 5473 gen_stack_pointer_inc (rtx increment) 5474 { 5475 return gen_rtx_SET (VOIDmode, 5476 stack_pointer_rtx, 5477 gen_rtx_PLUS (Pmode, 5478 stack_pointer_rtx, 5479 increment)); 5480 } 5481 5482 /* Expand the function prologue. The prologue is responsible for reserving 5483 storage for the frame, saving the call-saved registers and loading the 5484 GOT register if needed. */ 5485 5486 void 5487 sparc_expand_prologue (void) 5488 { 5489 HOST_WIDE_INT size; 5490 rtx_insn *insn; 5491 5492 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying 5493 on the final value of the flag means deferring the prologue/epilogue 5494 expansion until just before the second scheduling pass, which is too 5495 late to emit multiple epilogues or return insns. 5496 5497 Of course we are making the assumption that the value of the flag 5498 will not change between now and its final value. Of the three parts 5499 of the formula, only the last one can reasonably vary. Let's take a 5500 closer look, after assuming that the first two ones are set to true 5501 (otherwise the last value is effectively silenced). 5502 5503 If only_leaf_regs_used returns false, the global predicate will also 5504 be false so the actual frame size calculated below will be positive. 5505 As a consequence, the save_register_window insn will be emitted in 5506 the instruction stream; now this insn explicitly references %fp 5507 which is not a leaf register so only_leaf_regs_used will always 5508 return false subsequently. 5509 5510 If only_leaf_regs_used returns true, we hope that the subsequent 5511 optimization passes won't cause non-leaf registers to pop up. For 5512 example, the regrename pass has special provisions to not rename to 5513 non-leaf registers in a leaf function. */ 5514 sparc_leaf_function_p 5515 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used (); 5516 5517 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 5518 5519 if (flag_stack_usage_info) 5520 current_function_static_stack_size = size; 5521 5522 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 5523 { 5524 if (crtl->is_leaf && !cfun->calls_alloca) 5525 { 5526 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) 5527 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, 5528 size - STACK_CHECK_PROTECT); 5529 } 5530 else if (size > 0) 5531 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); 5532 } 5533 5534 if (size == 0) 5535 ; /* do nothing. */ 5536 else if (sparc_leaf_function_p) 5537 { 5538 rtx size_int_rtx = GEN_INT (-size); 5539 5540 if (size <= 4096) 5541 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 5542 else if (size <= 8192) 5543 { 5544 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 5545 RTX_FRAME_RELATED_P (insn) = 1; 5546 5547 /* %sp is still the CFA register. */ 5548 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5549 } 5550 else 5551 { 5552 rtx size_rtx = gen_rtx_REG (Pmode, 1); 5553 emit_move_insn (size_rtx, size_int_rtx); 5554 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 5555 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5556 gen_stack_pointer_inc (size_int_rtx)); 5557 } 5558 5559 RTX_FRAME_RELATED_P (insn) = 1; 5560 } 5561 else 5562 { 5563 rtx size_int_rtx = GEN_INT (-size); 5564 5565 if (size <= 4096) 5566 emit_window_save (size_int_rtx); 5567 else if (size <= 8192) 5568 { 5569 emit_window_save (GEN_INT (-4096)); 5570 5571 /* %sp is not the CFA register anymore. */ 5572 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5573 5574 /* Make sure no %fp-based store is issued until after the frame is 5575 established. The offset between the frame pointer and the stack 5576 pointer is calculated relative to the value of the stack pointer 5577 at the end of the function prologue, and moving instructions that 5578 access the stack via the frame pointer between the instructions 5579 that decrement the stack pointer could result in accessing the 5580 register window save area, which is volatile. */ 5581 emit_insn (gen_frame_blockage ()); 5582 } 5583 else 5584 { 5585 rtx size_rtx = gen_rtx_REG (Pmode, 1); 5586 emit_move_insn (size_rtx, size_int_rtx); 5587 emit_window_save (size_rtx); 5588 } 5589 } 5590 5591 if (sparc_leaf_function_p) 5592 { 5593 sparc_frame_base_reg = stack_pointer_rtx; 5594 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 5595 } 5596 else 5597 { 5598 sparc_frame_base_reg = hard_frame_pointer_rtx; 5599 sparc_frame_base_offset = SPARC_STACK_BIAS; 5600 } 5601 5602 if (sparc_n_global_fp_regs > 0) 5603 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5604 sparc_frame_base_offset 5605 - sparc_apparent_frame_size, 5606 SORR_SAVE); 5607 5608 /* Load the GOT register if needed. */ 5609 if (crtl->uses_pic_offset_table) 5610 load_got_register (); 5611 5612 /* Advertise that the data calculated just above are now valid. */ 5613 sparc_prologue_data_valid_p = true; 5614 } 5615 5616 /* Expand the function prologue. The prologue is responsible for reserving 5617 storage for the frame, saving the call-saved registers and loading the 5618 GOT register if needed. */ 5619 5620 void 5621 sparc_flat_expand_prologue (void) 5622 { 5623 HOST_WIDE_INT size; 5624 rtx_insn *insn; 5625 5626 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf; 5627 5628 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 5629 5630 if (flag_stack_usage_info) 5631 current_function_static_stack_size = size; 5632 5633 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) 5634 { 5635 if (crtl->is_leaf && !cfun->calls_alloca) 5636 { 5637 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT) 5638 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, 5639 size - STACK_CHECK_PROTECT); 5640 } 5641 else if (size > 0) 5642 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size); 5643 } 5644 5645 if (sparc_save_local_in_regs_p) 5646 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, 5647 SORR_SAVE); 5648 5649 if (size == 0) 5650 ; /* do nothing. */ 5651 else 5652 { 5653 rtx size_int_rtx, size_rtx; 5654 5655 size_rtx = size_int_rtx = GEN_INT (-size); 5656 5657 /* We establish the frame (i.e. decrement the stack pointer) first, even 5658 if we use a frame pointer, because we cannot clobber any call-saved 5659 registers, including the frame pointer, if we haven't created a new 5660 register save area, for the sake of compatibility with the ABI. */ 5661 if (size <= 4096) 5662 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 5663 else if (size <= 8192 && !frame_pointer_needed) 5664 { 5665 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 5666 RTX_FRAME_RELATED_P (insn) = 1; 5667 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 5668 } 5669 else 5670 { 5671 size_rtx = gen_rtx_REG (Pmode, 1); 5672 emit_move_insn (size_rtx, size_int_rtx); 5673 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 5674 add_reg_note (insn, REG_CFA_ADJUST_CFA, 5675 gen_stack_pointer_inc (size_int_rtx)); 5676 } 5677 RTX_FRAME_RELATED_P (insn) = 1; 5678 5679 /* Ensure nothing is scheduled until after the frame is established. */ 5680 emit_insn (gen_blockage ()); 5681 5682 if (frame_pointer_needed) 5683 { 5684 insn = emit_insn (gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, 5685 gen_rtx_MINUS (Pmode, 5686 stack_pointer_rtx, 5687 size_rtx))); 5688 RTX_FRAME_RELATED_P (insn) = 1; 5689 5690 add_reg_note (insn, REG_CFA_ADJUST_CFA, 5691 gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx, 5692 plus_constant (Pmode, stack_pointer_rtx, 5693 size))); 5694 } 5695 5696 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 5697 { 5698 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM); 5699 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 5700 5701 insn = emit_move_insn (i7, o7); 5702 RTX_FRAME_RELATED_P (insn) = 1; 5703 5704 add_reg_note (insn, REG_CFA_REGISTER, 5705 gen_rtx_SET (VOIDmode, i7, o7)); 5706 5707 /* Prevent this instruction from ever being considered dead, 5708 even if this function has no epilogue. */ 5709 emit_use (i7); 5710 } 5711 } 5712 5713 if (frame_pointer_needed) 5714 { 5715 sparc_frame_base_reg = hard_frame_pointer_rtx; 5716 sparc_frame_base_offset = SPARC_STACK_BIAS; 5717 } 5718 else 5719 { 5720 sparc_frame_base_reg = stack_pointer_rtx; 5721 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 5722 } 5723 5724 if (sparc_n_global_fp_regs > 0) 5725 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5726 sparc_frame_base_offset 5727 - sparc_apparent_frame_size, 5728 SORR_SAVE); 5729 5730 /* Load the GOT register if needed. */ 5731 if (crtl->uses_pic_offset_table) 5732 load_got_register (); 5733 5734 /* Advertise that the data calculated just above are now valid. */ 5735 sparc_prologue_data_valid_p = true; 5736 } 5737 5738 /* This function generates the assembly code for function entry, which boils 5739 down to emitting the necessary .register directives. */ 5740 5741 static void 5742 sparc_asm_function_prologue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5743 { 5744 /* Check that the assumption we made in sparc_expand_prologue is valid. */ 5745 if (!TARGET_FLAT) 5746 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs); 5747 5748 sparc_output_scratch_registers (file); 5749 } 5750 5751 /* Expand the function epilogue, either normal or part of a sibcall. 5752 We emit all the instructions except the return or the call. */ 5753 5754 void 5755 sparc_expand_epilogue (bool for_eh) 5756 { 5757 HOST_WIDE_INT size = sparc_frame_size; 5758 5759 if (cfun->calls_alloca) 5760 emit_insn (gen_frame_blockage ()); 5761 5762 if (sparc_n_global_fp_regs > 0) 5763 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5764 sparc_frame_base_offset 5765 - sparc_apparent_frame_size, 5766 SORR_RESTORE); 5767 5768 if (size == 0 || for_eh) 5769 ; /* do nothing. */ 5770 else if (sparc_leaf_function_p) 5771 { 5772 if (size <= 4096) 5773 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 5774 else if (size <= 8192) 5775 { 5776 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 5777 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 5778 } 5779 else 5780 { 5781 rtx reg = gen_rtx_REG (Pmode, 1); 5782 emit_move_insn (reg, GEN_INT (size)); 5783 emit_insn (gen_stack_pointer_inc (reg)); 5784 } 5785 } 5786 } 5787 5788 /* Expand the function epilogue, either normal or part of a sibcall. 5789 We emit all the instructions except the return or the call. */ 5790 5791 void 5792 sparc_flat_expand_epilogue (bool for_eh) 5793 { 5794 HOST_WIDE_INT size = sparc_frame_size; 5795 5796 if (sparc_n_global_fp_regs > 0) 5797 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 5798 sparc_frame_base_offset 5799 - sparc_apparent_frame_size, 5800 SORR_RESTORE); 5801 5802 /* If we have a frame pointer, we'll need both to restore it before the 5803 frame is destroyed and use its current value in destroying the frame. 5804 Since we don't have an atomic way to do that in the flat window model, 5805 we save the current value into a temporary register (%g1). */ 5806 if (frame_pointer_needed && !for_eh) 5807 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx); 5808 5809 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 5810 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM), 5811 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)); 5812 5813 if (sparc_save_local_in_regs_p) 5814 emit_save_or_restore_local_in_regs (sparc_frame_base_reg, 5815 sparc_frame_base_offset, 5816 SORR_RESTORE); 5817 5818 if (size == 0 || for_eh) 5819 ; /* do nothing. */ 5820 else if (frame_pointer_needed) 5821 { 5822 /* Make sure the frame is destroyed after everything else is done. */ 5823 emit_insn (gen_blockage ()); 5824 5825 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1)); 5826 } 5827 else 5828 { 5829 /* Likewise. */ 5830 emit_insn (gen_blockage ()); 5831 5832 if (size <= 4096) 5833 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 5834 else if (size <= 8192) 5835 { 5836 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 5837 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 5838 } 5839 else 5840 { 5841 rtx reg = gen_rtx_REG (Pmode, 1); 5842 emit_move_insn (reg, GEN_INT (size)); 5843 emit_insn (gen_stack_pointer_inc (reg)); 5844 } 5845 } 5846 } 5847 5848 /* Return true if it is appropriate to emit `return' instructions in the 5849 body of a function. */ 5850 5851 bool 5852 sparc_can_use_return_insn_p (void) 5853 { 5854 return sparc_prologue_data_valid_p 5855 && sparc_n_global_fp_regs == 0 5856 && TARGET_FLAT 5857 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p) 5858 : (sparc_frame_size == 0 || !sparc_leaf_function_p); 5859 } 5860 5861 /* This function generates the assembly code for function exit. */ 5862 5863 static void 5864 sparc_asm_function_epilogue (FILE *file, HOST_WIDE_INT size ATTRIBUTE_UNUSED) 5865 { 5866 /* If the last two instructions of a function are "call foo; dslot;" 5867 the return address might point to the first instruction in the next 5868 function and we have to output a dummy nop for the sake of sane 5869 backtraces in such cases. This is pointless for sibling calls since 5870 the return address is explicitly adjusted. */ 5871 5872 rtx insn, last_real_insn; 5873 5874 insn = get_last_insn (); 5875 5876 last_real_insn = prev_real_insn (insn); 5877 if (last_real_insn 5878 && NONJUMP_INSN_P (last_real_insn) 5879 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) 5880 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); 5881 5882 if (last_real_insn 5883 && CALL_P (last_real_insn) 5884 && !SIBLING_CALL_P (last_real_insn)) 5885 fputs("\tnop\n", file); 5886 5887 sparc_output_deferred_case_vectors (); 5888 } 5889 5890 /* Output a 'restore' instruction. */ 5891 5892 static void 5893 output_restore (rtx pat) 5894 { 5895 rtx operands[3]; 5896 5897 if (! pat) 5898 { 5899 fputs ("\t restore\n", asm_out_file); 5900 return; 5901 } 5902 5903 gcc_assert (GET_CODE (pat) == SET); 5904 5905 operands[0] = SET_DEST (pat); 5906 pat = SET_SRC (pat); 5907 5908 switch (GET_CODE (pat)) 5909 { 5910 case PLUS: 5911 operands[1] = XEXP (pat, 0); 5912 operands[2] = XEXP (pat, 1); 5913 output_asm_insn (" restore %r1, %2, %Y0", operands); 5914 break; 5915 case LO_SUM: 5916 operands[1] = XEXP (pat, 0); 5917 operands[2] = XEXP (pat, 1); 5918 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); 5919 break; 5920 case ASHIFT: 5921 operands[1] = XEXP (pat, 0); 5922 gcc_assert (XEXP (pat, 1) == const1_rtx); 5923 output_asm_insn (" restore %r1, %r1, %Y0", operands); 5924 break; 5925 default: 5926 operands[1] = pat; 5927 output_asm_insn (" restore %%g0, %1, %Y0", operands); 5928 break; 5929 } 5930 } 5931 5932 /* Output a return. */ 5933 5934 const char * 5935 output_return (rtx_insn *insn) 5936 { 5937 if (crtl->calls_eh_return) 5938 { 5939 /* If the function uses __builtin_eh_return, the eh_return 5940 machinery occupies the delay slot. */ 5941 gcc_assert (!final_sequence); 5942 5943 if (flag_delayed_branch) 5944 { 5945 if (!TARGET_FLAT && TARGET_V9) 5946 fputs ("\treturn\t%i7+8\n", asm_out_file); 5947 else 5948 { 5949 if (!TARGET_FLAT) 5950 fputs ("\trestore\n", asm_out_file); 5951 5952 fputs ("\tjmp\t%o7+8\n", asm_out_file); 5953 } 5954 5955 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); 5956 } 5957 else 5958 { 5959 if (!TARGET_FLAT) 5960 fputs ("\trestore\n", asm_out_file); 5961 5962 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file); 5963 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); 5964 } 5965 } 5966 else if (sparc_leaf_function_p || TARGET_FLAT) 5967 { 5968 /* This is a leaf or flat function so we don't have to bother restoring 5969 the register window, which frees us from dealing with the convoluted 5970 semantics of restore/return. We simply output the jump to the 5971 return address and the insn in the delay slot (if any). */ 5972 5973 return "jmp\t%%o7+%)%#"; 5974 } 5975 else 5976 { 5977 /* This is a regular function so we have to restore the register window. 5978 We may have a pending insn for the delay slot, which will be either 5979 combined with the 'restore' instruction or put in the delay slot of 5980 the 'return' instruction. */ 5981 5982 if (final_sequence) 5983 { 5984 rtx delay, pat; 5985 5986 delay = NEXT_INSN (insn); 5987 gcc_assert (delay); 5988 5989 pat = PATTERN (delay); 5990 5991 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) 5992 { 5993 epilogue_renumber (&pat, 0); 5994 return "return\t%%i7+%)%#"; 5995 } 5996 else 5997 { 5998 output_asm_insn ("jmp\t%%i7+%)", NULL); 5999 output_restore (pat); 6000 PATTERN (delay) = gen_blockage (); 6001 INSN_CODE (delay) = -1; 6002 } 6003 } 6004 else 6005 { 6006 /* The delay slot is empty. */ 6007 if (TARGET_V9) 6008 return "return\t%%i7+%)\n\t nop"; 6009 else if (flag_delayed_branch) 6010 return "jmp\t%%i7+%)\n\t restore"; 6011 else 6012 return "restore\n\tjmp\t%%o7+%)\n\t nop"; 6013 } 6014 } 6015 6016 return ""; 6017 } 6018 6019 /* Output a sibling call. */ 6020 6021 const char * 6022 output_sibcall (rtx_insn *insn, rtx call_operand) 6023 { 6024 rtx operands[1]; 6025 6026 gcc_assert (flag_delayed_branch); 6027 6028 operands[0] = call_operand; 6029 6030 if (sparc_leaf_function_p || TARGET_FLAT) 6031 { 6032 /* This is a leaf or flat function so we don't have to bother restoring 6033 the register window. We simply output the jump to the function and 6034 the insn in the delay slot (if any). */ 6035 6036 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); 6037 6038 if (final_sequence) 6039 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", 6040 operands); 6041 else 6042 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize 6043 it into branch if possible. */ 6044 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", 6045 operands); 6046 } 6047 else 6048 { 6049 /* This is a regular function so we have to restore the register window. 6050 We may have a pending insn for the delay slot, which will be combined 6051 with the 'restore' instruction. */ 6052 6053 output_asm_insn ("call\t%a0, 0", operands); 6054 6055 if (final_sequence) 6056 { 6057 rtx_insn *delay = NEXT_INSN (insn); 6058 gcc_assert (delay); 6059 6060 output_restore (PATTERN (delay)); 6061 6062 PATTERN (delay) = gen_blockage (); 6063 INSN_CODE (delay) = -1; 6064 } 6065 else 6066 output_restore (NULL_RTX); 6067 } 6068 6069 return ""; 6070 } 6071 6072 /* Functions for handling argument passing. 6073 6074 For 32-bit, the first 6 args are normally in registers and the rest are 6075 pushed. Any arg that starts within the first 6 words is at least 6076 partially passed in a register unless its data type forbids. 6077 6078 For 64-bit, the argument registers are laid out as an array of 16 elements 6079 and arguments are added sequentially. The first 6 int args and up to the 6080 first 16 fp args (depending on size) are passed in regs. 6081 6082 Slot Stack Integral Float Float in structure Double Long Double 6083 ---- ----- -------- ----- ------------------ ------ ----------- 6084 15 [SP+248] %f31 %f30,%f31 %d30 6085 14 [SP+240] %f29 %f28,%f29 %d28 %q28 6086 13 [SP+232] %f27 %f26,%f27 %d26 6087 12 [SP+224] %f25 %f24,%f25 %d24 %q24 6088 11 [SP+216] %f23 %f22,%f23 %d22 6089 10 [SP+208] %f21 %f20,%f21 %d20 %q20 6090 9 [SP+200] %f19 %f18,%f19 %d18 6091 8 [SP+192] %f17 %f16,%f17 %d16 %q16 6092 7 [SP+184] %f15 %f14,%f15 %d14 6093 6 [SP+176] %f13 %f12,%f13 %d12 %q12 6094 5 [SP+168] %o5 %f11 %f10,%f11 %d10 6095 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 6096 3 [SP+152] %o3 %f7 %f6,%f7 %d6 6097 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 6098 1 [SP+136] %o1 %f3 %f2,%f3 %d2 6099 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 6100 6101 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. 6102 6103 Integral arguments are always passed as 64-bit quantities appropriately 6104 extended. 6105 6106 Passing of floating point values is handled as follows. 6107 If a prototype is in scope: 6108 If the value is in a named argument (i.e. not a stdarg function or a 6109 value not part of the `...') then the value is passed in the appropriate 6110 fp reg. 6111 If the value is part of the `...' and is passed in one of the first 6 6112 slots then the value is passed in the appropriate int reg. 6113 If the value is part of the `...' and is not passed in one of the first 6 6114 slots then the value is passed in memory. 6115 If a prototype is not in scope: 6116 If the value is one of the first 6 arguments the value is passed in the 6117 appropriate integer reg and the appropriate fp reg. 6118 If the value is not one of the first 6 arguments the value is passed in 6119 the appropriate fp reg and in memory. 6120 6121 6122 Summary of the calling conventions implemented by GCC on the SPARC: 6123 6124 32-bit ABI: 6125 size argument return value 6126 6127 small integer <4 int. reg. int. reg. 6128 word 4 int. reg. int. reg. 6129 double word 8 int. reg. int. reg. 6130 6131 _Complex small integer <8 int. reg. int. reg. 6132 _Complex word 8 int. reg. int. reg. 6133 _Complex double word 16 memory int. reg. 6134 6135 vector integer <=8 int. reg. FP reg. 6136 vector integer >8 memory memory 6137 6138 float 4 int. reg. FP reg. 6139 double 8 int. reg. FP reg. 6140 long double 16 memory memory 6141 6142 _Complex float 8 memory FP reg. 6143 _Complex double 16 memory FP reg. 6144 _Complex long double 32 memory FP reg. 6145 6146 vector float any memory memory 6147 6148 aggregate any memory memory 6149 6150 6151 6152 64-bit ABI: 6153 size argument return value 6154 6155 small integer <8 int. reg. int. reg. 6156 word 8 int. reg. int. reg. 6157 double word 16 int. reg. int. reg. 6158 6159 _Complex small integer <16 int. reg. int. reg. 6160 _Complex word 16 int. reg. int. reg. 6161 _Complex double word 32 memory int. reg. 6162 6163 vector integer <=16 FP reg. FP reg. 6164 vector integer 16<s<=32 memory FP reg. 6165 vector integer >32 memory memory 6166 6167 float 4 FP reg. FP reg. 6168 double 8 FP reg. FP reg. 6169 long double 16 FP reg. FP reg. 6170 6171 _Complex float 8 FP reg. FP reg. 6172 _Complex double 16 FP reg. FP reg. 6173 _Complex long double 32 memory FP reg. 6174 6175 vector float <=16 FP reg. FP reg. 6176 vector float 16<s<=32 memory FP reg. 6177 vector float >32 memory memory 6178 6179 aggregate <=16 reg. reg. 6180 aggregate 16<s<=32 memory reg. 6181 aggregate >32 memory memory 6182 6183 6184 6185 Note #1: complex floating-point types follow the extended SPARC ABIs as 6186 implemented by the Sun compiler. 6187 6188 Note #2: integral vector types follow the scalar floating-point types 6189 conventions to match what is implemented by the Sun VIS SDK. 6190 6191 Note #3: floating-point vector types follow the aggregate types 6192 conventions. */ 6193 6194 6195 /* Maximum number of int regs for args. */ 6196 #define SPARC_INT_ARG_MAX 6 6197 /* Maximum number of fp regs for args. */ 6198 #define SPARC_FP_ARG_MAX 16 6199 6200 #define ROUND_ADVANCE(SIZE) (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD) 6201 6202 /* Handle the INIT_CUMULATIVE_ARGS macro. 6203 Initialize a variable CUM of type CUMULATIVE_ARGS 6204 for a call to a function whose data type is FNTYPE. 6205 For a library call, FNTYPE is 0. */ 6206 6207 void 6208 init_cumulative_args (struct sparc_args *cum, tree fntype, 6209 rtx libname ATTRIBUTE_UNUSED, 6210 tree fndecl ATTRIBUTE_UNUSED) 6211 { 6212 cum->words = 0; 6213 cum->prototype_p = fntype && prototype_p (fntype); 6214 cum->libcall_p = fntype == 0; 6215 } 6216 6217 /* Handle promotion of pointer and integer arguments. */ 6218 6219 static machine_mode 6220 sparc_promote_function_mode (const_tree type, 6221 machine_mode mode, 6222 int *punsignedp, 6223 const_tree fntype ATTRIBUTE_UNUSED, 6224 int for_return ATTRIBUTE_UNUSED) 6225 { 6226 if (type != NULL_TREE && POINTER_TYPE_P (type)) 6227 { 6228 *punsignedp = POINTERS_EXTEND_UNSIGNED; 6229 return Pmode; 6230 } 6231 6232 /* Integral arguments are passed as full words, as per the ABI. */ 6233 if (GET_MODE_CLASS (mode) == MODE_INT 6234 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 6235 return word_mode; 6236 6237 return mode; 6238 } 6239 6240 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ 6241 6242 static bool 6243 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 6244 { 6245 return TARGET_ARCH64 ? true : false; 6246 } 6247 6248 /* Scan the record type TYPE and return the following predicates: 6249 - INTREGS_P: the record contains at least one field or sub-field 6250 that is eligible for promotion in integer registers. 6251 - FP_REGS_P: the record contains at least one field or sub-field 6252 that is eligible for promotion in floating-point registers. 6253 - PACKED_P: the record contains at least one field that is packed. 6254 6255 Sub-fields are not taken into account for the PACKED_P predicate. */ 6256 6257 static void 6258 scan_record_type (const_tree type, int *intregs_p, int *fpregs_p, 6259 int *packed_p) 6260 { 6261 tree field; 6262 6263 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6264 { 6265 if (TREE_CODE (field) == FIELD_DECL) 6266 { 6267 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6268 scan_record_type (TREE_TYPE (field), intregs_p, fpregs_p, 0); 6269 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6270 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6271 && TARGET_FPU) 6272 *fpregs_p = 1; 6273 else 6274 *intregs_p = 1; 6275 6276 if (packed_p && DECL_PACKED (field)) 6277 *packed_p = 1; 6278 } 6279 } 6280 } 6281 6282 /* Compute the slot number to pass an argument in. 6283 Return the slot number or -1 if passing on the stack. 6284 6285 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6286 the preceding args and about the function being called. 6287 MODE is the argument's machine mode. 6288 TYPE is the data type of the argument (as a tree). 6289 This is null for libcalls where that information may 6290 not be available. 6291 NAMED is nonzero if this argument is a named parameter 6292 (otherwise it is an extra parameter matching an ellipsis). 6293 INCOMING_P is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. 6294 *PREGNO records the register number to use if scalar type. 6295 *PPADDING records the amount of padding needed in words. */ 6296 6297 static int 6298 function_arg_slotno (const struct sparc_args *cum, machine_mode mode, 6299 const_tree type, bool named, bool incoming_p, 6300 int *pregno, int *ppadding) 6301 { 6302 int regbase = (incoming_p 6303 ? SPARC_INCOMING_INT_ARG_FIRST 6304 : SPARC_OUTGOING_INT_ARG_FIRST); 6305 int slotno = cum->words; 6306 enum mode_class mclass; 6307 int regno; 6308 6309 *ppadding = 0; 6310 6311 if (type && TREE_ADDRESSABLE (type)) 6312 return -1; 6313 6314 if (TARGET_ARCH32 6315 && mode == BLKmode 6316 && type 6317 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0) 6318 return -1; 6319 6320 /* For SPARC64, objects requiring 16-byte alignment get it. */ 6321 if (TARGET_ARCH64 6322 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 6323 && (slotno & 1) != 0) 6324 slotno++, *ppadding = 1; 6325 6326 mclass = GET_MODE_CLASS (mode); 6327 if (type && TREE_CODE (type) == VECTOR_TYPE) 6328 { 6329 /* Vector types deserve special treatment because they are 6330 polymorphic wrt their mode, depending upon whether VIS 6331 instructions are enabled. */ 6332 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 6333 { 6334 /* The SPARC port defines no floating-point vector modes. */ 6335 gcc_assert (mode == BLKmode); 6336 } 6337 else 6338 { 6339 /* Integral vector types should either have a vector 6340 mode or an integral mode, because we are guaranteed 6341 by pass_by_reference that their size is not greater 6342 than 16 bytes and TImode is 16-byte wide. */ 6343 gcc_assert (mode != BLKmode); 6344 6345 /* Vector integers are handled like floats according to 6346 the Sun VIS SDK. */ 6347 mclass = MODE_FLOAT; 6348 } 6349 } 6350 6351 switch (mclass) 6352 { 6353 case MODE_FLOAT: 6354 case MODE_COMPLEX_FLOAT: 6355 case MODE_VECTOR_INT: 6356 if (TARGET_ARCH64 && TARGET_FPU && named) 6357 { 6358 if (slotno >= SPARC_FP_ARG_MAX) 6359 return -1; 6360 regno = SPARC_FP_ARG_FIRST + slotno * 2; 6361 /* Arguments filling only one single FP register are 6362 right-justified in the outer double FP register. */ 6363 if (GET_MODE_SIZE (mode) <= 4) 6364 regno++; 6365 break; 6366 } 6367 /* fallthrough */ 6368 6369 case MODE_INT: 6370 case MODE_COMPLEX_INT: 6371 if (slotno >= SPARC_INT_ARG_MAX) 6372 return -1; 6373 regno = regbase + slotno; 6374 break; 6375 6376 case MODE_RANDOM: 6377 if (mode == VOIDmode) 6378 /* MODE is VOIDmode when generating the actual call. */ 6379 return -1; 6380 6381 gcc_assert (mode == BLKmode); 6382 6383 if (TARGET_ARCH32 6384 || !type 6385 || (TREE_CODE (type) != VECTOR_TYPE 6386 && TREE_CODE (type) != RECORD_TYPE)) 6387 { 6388 if (slotno >= SPARC_INT_ARG_MAX) 6389 return -1; 6390 regno = regbase + slotno; 6391 } 6392 else /* TARGET_ARCH64 && type */ 6393 { 6394 int intregs_p = 0, fpregs_p = 0, packed_p = 0; 6395 6396 /* First see what kinds of registers we would need. */ 6397 if (TREE_CODE (type) == VECTOR_TYPE) 6398 fpregs_p = 1; 6399 else 6400 scan_record_type (type, &intregs_p, &fpregs_p, &packed_p); 6401 6402 /* The ABI obviously doesn't specify how packed structures 6403 are passed. These are defined to be passed in int regs 6404 if possible, otherwise memory. */ 6405 if (packed_p || !named) 6406 fpregs_p = 0, intregs_p = 1; 6407 6408 /* If all arg slots are filled, then must pass on stack. */ 6409 if (fpregs_p && slotno >= SPARC_FP_ARG_MAX) 6410 return -1; 6411 6412 /* If there are only int args and all int arg slots are filled, 6413 then must pass on stack. */ 6414 if (!fpregs_p && intregs_p && slotno >= SPARC_INT_ARG_MAX) 6415 return -1; 6416 6417 /* Note that even if all int arg slots are filled, fp members may 6418 still be passed in regs if such regs are available. 6419 *PREGNO isn't set because there may be more than one, it's up 6420 to the caller to compute them. */ 6421 return slotno; 6422 } 6423 break; 6424 6425 default : 6426 gcc_unreachable (); 6427 } 6428 6429 *pregno = regno; 6430 return slotno; 6431 } 6432 6433 /* Handle recursive register counting for structure field layout. */ 6434 6435 struct function_arg_record_value_parms 6436 { 6437 rtx ret; /* return expression being built. */ 6438 int slotno; /* slot number of the argument. */ 6439 int named; /* whether the argument is named. */ 6440 int regbase; /* regno of the base register. */ 6441 int stack; /* 1 if part of the argument is on the stack. */ 6442 int intoffset; /* offset of the first pending integer field. */ 6443 unsigned int nregs; /* number of words passed in registers. */ 6444 }; 6445 6446 static void function_arg_record_value_3 6447 (HOST_WIDE_INT, struct function_arg_record_value_parms *); 6448 static void function_arg_record_value_2 6449 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); 6450 static void function_arg_record_value_1 6451 (const_tree, HOST_WIDE_INT, struct function_arg_record_value_parms *, bool); 6452 static rtx function_arg_record_value (const_tree, machine_mode, int, int, int); 6453 static rtx function_arg_union_value (int, machine_mode, int, int); 6454 6455 /* A subroutine of function_arg_record_value. Traverse the structure 6456 recursively and determine how many registers will be required. */ 6457 6458 static void 6459 function_arg_record_value_1 (const_tree type, HOST_WIDE_INT startbitpos, 6460 struct function_arg_record_value_parms *parms, 6461 bool packed_p) 6462 { 6463 tree field; 6464 6465 /* We need to compute how many registers are needed so we can 6466 allocate the PARALLEL but before we can do that we need to know 6467 whether there are any packed fields. The ABI obviously doesn't 6468 specify how structures are passed in this case, so they are 6469 defined to be passed in int regs if possible, otherwise memory, 6470 regardless of whether there are fp values present. */ 6471 6472 if (! packed_p) 6473 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 6474 { 6475 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6476 { 6477 packed_p = true; 6478 break; 6479 } 6480 } 6481 6482 /* Compute how many registers we need. */ 6483 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6484 { 6485 if (TREE_CODE (field) == FIELD_DECL) 6486 { 6487 HOST_WIDE_INT bitpos = startbitpos; 6488 6489 if (DECL_SIZE (field) != 0) 6490 { 6491 if (integer_zerop (DECL_SIZE (field))) 6492 continue; 6493 6494 if (tree_fits_uhwi_p (bit_position (field))) 6495 bitpos += int_bit_position (field); 6496 } 6497 6498 /* ??? FIXME: else assume zero offset. */ 6499 6500 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6501 function_arg_record_value_1 (TREE_TYPE (field), 6502 bitpos, 6503 parms, 6504 packed_p); 6505 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6506 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6507 && TARGET_FPU 6508 && parms->named 6509 && ! packed_p) 6510 { 6511 if (parms->intoffset != -1) 6512 { 6513 unsigned int startbit, endbit; 6514 int intslots, this_slotno; 6515 6516 startbit = parms->intoffset & -BITS_PER_WORD; 6517 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6518 6519 intslots = (endbit - startbit) / BITS_PER_WORD; 6520 this_slotno = parms->slotno + parms->intoffset 6521 / BITS_PER_WORD; 6522 6523 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) 6524 { 6525 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); 6526 /* We need to pass this field on the stack. */ 6527 parms->stack = 1; 6528 } 6529 6530 parms->nregs += intslots; 6531 parms->intoffset = -1; 6532 } 6533 6534 /* There's no need to check this_slotno < SPARC_FP_ARG MAX. 6535 If it wasn't true we wouldn't be here. */ 6536 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE 6537 && DECL_MODE (field) == BLKmode) 6538 parms->nregs += TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 6539 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 6540 parms->nregs += 2; 6541 else 6542 parms->nregs += 1; 6543 } 6544 else 6545 { 6546 if (parms->intoffset == -1) 6547 parms->intoffset = bitpos; 6548 } 6549 } 6550 } 6551 } 6552 6553 /* A subroutine of function_arg_record_value. Assign the bits of the 6554 structure between parms->intoffset and bitpos to integer registers. */ 6555 6556 static void 6557 function_arg_record_value_3 (HOST_WIDE_INT bitpos, 6558 struct function_arg_record_value_parms *parms) 6559 { 6560 machine_mode mode; 6561 unsigned int regno; 6562 unsigned int startbit, endbit; 6563 int this_slotno, intslots, intoffset; 6564 rtx reg; 6565 6566 if (parms->intoffset == -1) 6567 return; 6568 6569 intoffset = parms->intoffset; 6570 parms->intoffset = -1; 6571 6572 startbit = intoffset & -BITS_PER_WORD; 6573 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6574 intslots = (endbit - startbit) / BITS_PER_WORD; 6575 this_slotno = parms->slotno + intoffset / BITS_PER_WORD; 6576 6577 intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); 6578 if (intslots <= 0) 6579 return; 6580 6581 /* If this is the trailing part of a word, only load that much into 6582 the register. Otherwise load the whole register. Note that in 6583 the latter case we may pick up unwanted bits. It's not a problem 6584 at the moment but may wish to revisit. */ 6585 6586 if (intoffset % BITS_PER_WORD != 0) 6587 mode = smallest_mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD, 6588 MODE_INT); 6589 else 6590 mode = word_mode; 6591 6592 intoffset /= BITS_PER_UNIT; 6593 do 6594 { 6595 regno = parms->regbase + this_slotno; 6596 reg = gen_rtx_REG (mode, regno); 6597 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6598 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); 6599 6600 this_slotno += 1; 6601 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; 6602 mode = word_mode; 6603 parms->nregs += 1; 6604 intslots -= 1; 6605 } 6606 while (intslots > 0); 6607 } 6608 6609 /* A subroutine of function_arg_record_value. Traverse the structure 6610 recursively and assign bits to floating point registers. Track which 6611 bits in between need integer registers; invoke function_arg_record_value_3 6612 to make that happen. */ 6613 6614 static void 6615 function_arg_record_value_2 (const_tree type, HOST_WIDE_INT startbitpos, 6616 struct function_arg_record_value_parms *parms, 6617 bool packed_p) 6618 { 6619 tree field; 6620 6621 if (! packed_p) 6622 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6623 { 6624 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6625 { 6626 packed_p = true; 6627 break; 6628 } 6629 } 6630 6631 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6632 { 6633 if (TREE_CODE (field) == FIELD_DECL) 6634 { 6635 HOST_WIDE_INT bitpos = startbitpos; 6636 6637 if (DECL_SIZE (field) != 0) 6638 { 6639 if (integer_zerop (DECL_SIZE (field))) 6640 continue; 6641 6642 if (tree_fits_uhwi_p (bit_position (field))) 6643 bitpos += int_bit_position (field); 6644 } 6645 6646 /* ??? FIXME: else assume zero offset. */ 6647 6648 if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE) 6649 function_arg_record_value_2 (TREE_TYPE (field), 6650 bitpos, 6651 parms, 6652 packed_p); 6653 else if ((FLOAT_TYPE_P (TREE_TYPE (field)) 6654 || TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) 6655 && TARGET_FPU 6656 && parms->named 6657 && ! packed_p) 6658 { 6659 int this_slotno = parms->slotno + bitpos / BITS_PER_WORD; 6660 int regno, nregs, pos; 6661 machine_mode mode = DECL_MODE (field); 6662 rtx reg; 6663 6664 function_arg_record_value_3 (bitpos, parms); 6665 6666 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE 6667 && mode == BLKmode) 6668 { 6669 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 6670 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 6671 } 6672 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 6673 { 6674 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 6675 nregs = 2; 6676 } 6677 else 6678 nregs = 1; 6679 6680 regno = SPARC_FP_ARG_FIRST + this_slotno * 2; 6681 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) 6682 regno++; 6683 reg = gen_rtx_REG (mode, regno); 6684 pos = bitpos / BITS_PER_UNIT; 6685 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6686 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 6687 parms->nregs += 1; 6688 while (--nregs > 0) 6689 { 6690 regno += GET_MODE_SIZE (mode) / 4; 6691 reg = gen_rtx_REG (mode, regno); 6692 pos += GET_MODE_SIZE (mode); 6693 XVECEXP (parms->ret, 0, parms->stack + parms->nregs) 6694 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 6695 parms->nregs += 1; 6696 } 6697 } 6698 else 6699 { 6700 if (parms->intoffset == -1) 6701 parms->intoffset = bitpos; 6702 } 6703 } 6704 } 6705 } 6706 6707 /* Used by function_arg and sparc_function_value_1 to implement the complex 6708 conventions of the 64-bit ABI for passing and returning structures. 6709 Return an expression valid as a return value for the FUNCTION_ARG 6710 and TARGET_FUNCTION_VALUE. 6711 6712 TYPE is the data type of the argument (as a tree). 6713 This is null for libcalls where that information may 6714 not be available. 6715 MODE is the argument's machine mode. 6716 SLOTNO is the index number of the argument's slot in the parameter array. 6717 NAMED is nonzero if this argument is a named parameter 6718 (otherwise it is an extra parameter matching an ellipsis). 6719 REGBASE is the regno of the base register for the parameter array. */ 6720 6721 static rtx 6722 function_arg_record_value (const_tree type, machine_mode mode, 6723 int slotno, int named, int regbase) 6724 { 6725 HOST_WIDE_INT typesize = int_size_in_bytes (type); 6726 struct function_arg_record_value_parms parms; 6727 unsigned int nregs; 6728 6729 parms.ret = NULL_RTX; 6730 parms.slotno = slotno; 6731 parms.named = named; 6732 parms.regbase = regbase; 6733 parms.stack = 0; 6734 6735 /* Compute how many registers we need. */ 6736 parms.nregs = 0; 6737 parms.intoffset = 0; 6738 function_arg_record_value_1 (type, 0, &parms, false); 6739 6740 /* Take into account pending integer fields. */ 6741 if (parms.intoffset != -1) 6742 { 6743 unsigned int startbit, endbit; 6744 int intslots, this_slotno; 6745 6746 startbit = parms.intoffset & -BITS_PER_WORD; 6747 endbit = (typesize*BITS_PER_UNIT + BITS_PER_WORD - 1) & -BITS_PER_WORD; 6748 intslots = (endbit - startbit) / BITS_PER_WORD; 6749 this_slotno = slotno + parms.intoffset / BITS_PER_WORD; 6750 6751 if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) 6752 { 6753 intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); 6754 /* We need to pass this field on the stack. */ 6755 parms.stack = 1; 6756 } 6757 6758 parms.nregs += intslots; 6759 } 6760 nregs = parms.nregs; 6761 6762 /* Allocate the vector and handle some annoying special cases. */ 6763 if (nregs == 0) 6764 { 6765 /* ??? Empty structure has no value? Duh? */ 6766 if (typesize <= 0) 6767 { 6768 /* Though there's nothing really to store, return a word register 6769 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL 6770 leads to breakage due to the fact that there are zero bytes to 6771 load. */ 6772 return gen_rtx_REG (mode, regbase); 6773 } 6774 else 6775 { 6776 /* ??? C++ has structures with no fields, and yet a size. Give up 6777 for now and pass everything back in integer registers. */ 6778 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 6779 } 6780 if (nregs + slotno > SPARC_INT_ARG_MAX) 6781 nregs = SPARC_INT_ARG_MAX - slotno; 6782 } 6783 gcc_assert (nregs != 0); 6784 6785 parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs)); 6786 6787 /* If at least one field must be passed on the stack, generate 6788 (parallel [(expr_list (nil) ...) ...]) so that all fields will 6789 also be passed on the stack. We can't do much better because the 6790 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case 6791 of structures for which the fields passed exclusively in registers 6792 are not at the beginning of the structure. */ 6793 if (parms.stack) 6794 XVECEXP (parms.ret, 0, 0) 6795 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 6796 6797 /* Fill in the entries. */ 6798 parms.nregs = 0; 6799 parms.intoffset = 0; 6800 function_arg_record_value_2 (type, 0, &parms, false); 6801 function_arg_record_value_3 (typesize * BITS_PER_UNIT, &parms); 6802 6803 gcc_assert (parms.nregs == nregs); 6804 6805 return parms.ret; 6806 } 6807 6808 /* Used by function_arg and sparc_function_value_1 to implement the conventions 6809 of the 64-bit ABI for passing and returning unions. 6810 Return an expression valid as a return value for the FUNCTION_ARG 6811 and TARGET_FUNCTION_VALUE. 6812 6813 SIZE is the size in bytes of the union. 6814 MODE is the argument's machine mode. 6815 REGNO is the hard register the union will be passed in. */ 6816 6817 static rtx 6818 function_arg_union_value (int size, machine_mode mode, int slotno, 6819 int regno) 6820 { 6821 int nwords = ROUND_ADVANCE (size), i; 6822 rtx regs; 6823 6824 /* See comment in previous function for empty structures. */ 6825 if (nwords == 0) 6826 return gen_rtx_REG (mode, regno); 6827 6828 if (slotno == SPARC_INT_ARG_MAX - 1) 6829 nwords = 1; 6830 6831 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); 6832 6833 for (i = 0; i < nwords; i++) 6834 { 6835 /* Unions are passed left-justified. */ 6836 XVECEXP (regs, 0, i) 6837 = gen_rtx_EXPR_LIST (VOIDmode, 6838 gen_rtx_REG (word_mode, regno), 6839 GEN_INT (UNITS_PER_WORD * i)); 6840 regno++; 6841 } 6842 6843 return regs; 6844 } 6845 6846 /* Used by function_arg and sparc_function_value_1 to implement the conventions 6847 for passing and returning BLKmode vectors. 6848 Return an expression valid as a return value for the FUNCTION_ARG 6849 and TARGET_FUNCTION_VALUE. 6850 6851 SIZE is the size in bytes of the vector. 6852 REGNO is the FP hard register the vector will be passed in. */ 6853 6854 static rtx 6855 function_arg_vector_value (int size, int regno) 6856 { 6857 const int nregs = MAX (1, size / 8); 6858 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); 6859 6860 if (size < 8) 6861 XVECEXP (regs, 0, 0) 6862 = gen_rtx_EXPR_LIST (VOIDmode, 6863 gen_rtx_REG (SImode, regno), 6864 const0_rtx); 6865 else 6866 for (int i = 0; i < nregs; i++) 6867 XVECEXP (regs, 0, i) 6868 = gen_rtx_EXPR_LIST (VOIDmode, 6869 gen_rtx_REG (DImode, regno + 2*i), 6870 GEN_INT (i*8)); 6871 6872 return regs; 6873 } 6874 6875 /* Determine where to put an argument to a function. 6876 Value is zero to push the argument on the stack, 6877 or a hard register in which to store the argument. 6878 6879 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6880 the preceding args and about the function being called. 6881 MODE is the argument's machine mode. 6882 TYPE is the data type of the argument (as a tree). 6883 This is null for libcalls where that information may 6884 not be available. 6885 NAMED is true if this argument is a named parameter 6886 (otherwise it is an extra parameter matching an ellipsis). 6887 INCOMING_P is false for TARGET_FUNCTION_ARG, true for 6888 TARGET_FUNCTION_INCOMING_ARG. */ 6889 6890 static rtx 6891 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode, 6892 const_tree type, bool named, bool incoming_p) 6893 { 6894 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 6895 6896 int regbase = (incoming_p 6897 ? SPARC_INCOMING_INT_ARG_FIRST 6898 : SPARC_OUTGOING_INT_ARG_FIRST); 6899 int slotno, regno, padding; 6900 enum mode_class mclass = GET_MODE_CLASS (mode); 6901 6902 slotno = function_arg_slotno (cum, mode, type, named, incoming_p, 6903 ®no, &padding); 6904 if (slotno == -1) 6905 return 0; 6906 6907 /* Vector types deserve special treatment because they are polymorphic wrt 6908 their mode, depending upon whether VIS instructions are enabled. */ 6909 if (type && TREE_CODE (type) == VECTOR_TYPE) 6910 { 6911 HOST_WIDE_INT size = int_size_in_bytes (type); 6912 gcc_assert ((TARGET_ARCH32 && size <= 8) 6913 || (TARGET_ARCH64 && size <= 16)); 6914 6915 if (mode == BLKmode) 6916 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno); 6917 6918 mclass = MODE_FLOAT; 6919 } 6920 6921 if (TARGET_ARCH32) 6922 return gen_rtx_REG (mode, regno); 6923 6924 /* Structures up to 16 bytes in size are passed in arg slots on the stack 6925 and are promoted to registers if possible. */ 6926 if (type && TREE_CODE (type) == RECORD_TYPE) 6927 { 6928 HOST_WIDE_INT size = int_size_in_bytes (type); 6929 gcc_assert (size <= 16); 6930 6931 return function_arg_record_value (type, mode, slotno, named, regbase); 6932 } 6933 6934 /* Unions up to 16 bytes in size are passed in integer registers. */ 6935 else if (type && TREE_CODE (type) == UNION_TYPE) 6936 { 6937 HOST_WIDE_INT size = int_size_in_bytes (type); 6938 gcc_assert (size <= 16); 6939 6940 return function_arg_union_value (size, mode, slotno, regno); 6941 } 6942 6943 /* v9 fp args in reg slots beyond the int reg slots get passed in regs 6944 but also have the slot allocated for them. 6945 If no prototype is in scope fp values in register slots get passed 6946 in two places, either fp regs and int regs or fp regs and memory. */ 6947 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 6948 && SPARC_FP_REG_P (regno)) 6949 { 6950 rtx reg = gen_rtx_REG (mode, regno); 6951 if (cum->prototype_p || cum->libcall_p) 6952 { 6953 /* "* 2" because fp reg numbers are recorded in 4 byte 6954 quantities. */ 6955 #if 0 6956 /* ??? This will cause the value to be passed in the fp reg and 6957 in the stack. When a prototype exists we want to pass the 6958 value in the reg but reserve space on the stack. That's an 6959 optimization, and is deferred [for a bit]. */ 6960 if ((regno - SPARC_FP_ARG_FIRST) >= SPARC_INT_ARG_MAX * 2) 6961 return gen_rtx_PARALLEL (mode, 6962 gen_rtvec (2, 6963 gen_rtx_EXPR_LIST (VOIDmode, 6964 NULL_RTX, const0_rtx), 6965 gen_rtx_EXPR_LIST (VOIDmode, 6966 reg, const0_rtx))); 6967 else 6968 #else 6969 /* ??? It seems that passing back a register even when past 6970 the area declared by REG_PARM_STACK_SPACE will allocate 6971 space appropriately, and will not copy the data onto the 6972 stack, exactly as we desire. 6973 6974 This is due to locate_and_pad_parm being called in 6975 expand_call whenever reg_parm_stack_space > 0, which 6976 while beneficial to our example here, would seem to be 6977 in error from what had been intended. Ho hum... -- r~ */ 6978 #endif 6979 return reg; 6980 } 6981 else 6982 { 6983 rtx v0, v1; 6984 6985 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) 6986 { 6987 int intreg; 6988 6989 /* On incoming, we don't need to know that the value 6990 is passed in %f0 and %i0, and it confuses other parts 6991 causing needless spillage even on the simplest cases. */ 6992 if (incoming_p) 6993 return reg; 6994 6995 intreg = (SPARC_OUTGOING_INT_ARG_FIRST 6996 + (regno - SPARC_FP_ARG_FIRST) / 2); 6997 6998 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 6999 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), 7000 const0_rtx); 7001 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7002 } 7003 else 7004 { 7005 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7006 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7007 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7008 } 7009 } 7010 } 7011 7012 /* All other aggregate types are passed in an integer register in a mode 7013 corresponding to the size of the type. */ 7014 else if (type && AGGREGATE_TYPE_P (type)) 7015 { 7016 HOST_WIDE_INT size = int_size_in_bytes (type); 7017 gcc_assert (size <= 16); 7018 7019 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 7020 } 7021 7022 return gen_rtx_REG (mode, regno); 7023 } 7024 7025 /* Handle the TARGET_FUNCTION_ARG target hook. */ 7026 7027 static rtx 7028 sparc_function_arg (cumulative_args_t cum, machine_mode mode, 7029 const_tree type, bool named) 7030 { 7031 return sparc_function_arg_1 (cum, mode, type, named, false); 7032 } 7033 7034 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ 7035 7036 static rtx 7037 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode, 7038 const_tree type, bool named) 7039 { 7040 return sparc_function_arg_1 (cum, mode, type, named, true); 7041 } 7042 7043 /* For sparc64, objects requiring 16 byte alignment are passed that way. */ 7044 7045 static unsigned int 7046 sparc_function_arg_boundary (machine_mode mode, const_tree type) 7047 { 7048 return ((TARGET_ARCH64 7049 && (GET_MODE_ALIGNMENT (mode) == 128 7050 || (type && TYPE_ALIGN (type) == 128))) 7051 ? 128 7052 : PARM_BOUNDARY); 7053 } 7054 7055 /* For an arg passed partly in registers and partly in memory, 7056 this is the number of bytes of registers used. 7057 For args passed entirely in registers or entirely in memory, zero. 7058 7059 Any arg that starts in the first 6 regs but won't entirely fit in them 7060 needs partial registers on v8. On v9, structures with integer 7061 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp 7062 values that begin in the last fp reg [where "last fp reg" varies with the 7063 mode] will be split between that reg and memory. */ 7064 7065 static int 7066 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode, 7067 tree type, bool named) 7068 { 7069 int slotno, regno, padding; 7070 7071 /* We pass false for incoming_p here, it doesn't matter. */ 7072 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named, 7073 false, ®no, &padding); 7074 7075 if (slotno == -1) 7076 return 0; 7077 7078 if (TARGET_ARCH32) 7079 { 7080 if ((slotno + (mode == BLKmode 7081 ? ROUND_ADVANCE (int_size_in_bytes (type)) 7082 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))) 7083 > SPARC_INT_ARG_MAX) 7084 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD; 7085 } 7086 else 7087 { 7088 /* We are guaranteed by pass_by_reference that the size of the 7089 argument is not greater than 16 bytes, so we only need to return 7090 one word if the argument is partially passed in registers. */ 7091 7092 if (type && AGGREGATE_TYPE_P (type)) 7093 { 7094 int size = int_size_in_bytes (type); 7095 7096 if (size > UNITS_PER_WORD 7097 && slotno == SPARC_INT_ARG_MAX - 1) 7098 return UNITS_PER_WORD; 7099 } 7100 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT 7101 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 7102 && ! (TARGET_FPU && named))) 7103 { 7104 /* The complex types are passed as packed types. */ 7105 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 7106 && slotno == SPARC_INT_ARG_MAX - 1) 7107 return UNITS_PER_WORD; 7108 } 7109 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 7110 { 7111 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD) 7112 > SPARC_FP_ARG_MAX) 7113 return UNITS_PER_WORD; 7114 } 7115 } 7116 7117 return 0; 7118 } 7119 7120 /* Handle the TARGET_PASS_BY_REFERENCE target hook. 7121 Specify whether to pass the argument by reference. */ 7122 7123 static bool 7124 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, 7125 machine_mode mode, const_tree type, 7126 bool named ATTRIBUTE_UNUSED) 7127 { 7128 if (TARGET_ARCH32) 7129 /* Original SPARC 32-bit ABI says that structures and unions, 7130 and quad-precision floats are passed by reference. For Pascal, 7131 also pass arrays by reference. All other base types are passed 7132 in registers. 7133 7134 Extended ABI (as implemented by the Sun compiler) says that all 7135 complex floats are passed by reference. Pass complex integers 7136 in registers up to 8 bytes. More generally, enforce the 2-word 7137 cap for passing arguments in registers. 7138 7139 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7140 integers are passed like floats of the same size, that is in 7141 registers up to 8 bytes. Pass all vector floats by reference 7142 like structure and unions. */ 7143 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 7144 || mode == SCmode 7145 /* Catch CDImode, TFmode, DCmode and TCmode. */ 7146 || GET_MODE_SIZE (mode) > 8 7147 || (type 7148 && TREE_CODE (type) == VECTOR_TYPE 7149 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7150 else 7151 /* Original SPARC 64-bit ABI says that structures and unions 7152 smaller than 16 bytes are passed in registers, as well as 7153 all other base types. 7154 7155 Extended ABI (as implemented by the Sun compiler) says that 7156 complex floats are passed in registers up to 16 bytes. Pass 7157 all complex integers in registers up to 16 bytes. More generally, 7158 enforce the 2-word cap for passing arguments in registers. 7159 7160 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7161 integers are passed like floats of the same size, that is in 7162 registers (up to 16 bytes). Pass all vector floats like structure 7163 and unions. */ 7164 return ((type 7165 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE) 7166 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) 7167 /* Catch CTImode and TCmode. */ 7168 || GET_MODE_SIZE (mode) > 16); 7169 } 7170 7171 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. 7172 Update the data in CUM to advance over an argument 7173 of mode MODE and data type TYPE. 7174 TYPE is null for libcalls where that information may not be available. */ 7175 7176 static void 7177 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 7178 const_tree type, bool named) 7179 { 7180 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7181 int regno, padding; 7182 7183 /* We pass false for incoming_p here, it doesn't matter. */ 7184 function_arg_slotno (cum, mode, type, named, false, ®no, &padding); 7185 7186 /* If argument requires leading padding, add it. */ 7187 cum->words += padding; 7188 7189 if (TARGET_ARCH32) 7190 { 7191 cum->words += (mode != BLKmode 7192 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) 7193 : ROUND_ADVANCE (int_size_in_bytes (type))); 7194 } 7195 else 7196 { 7197 if (type && AGGREGATE_TYPE_P (type)) 7198 { 7199 int size = int_size_in_bytes (type); 7200 7201 if (size <= 8) 7202 ++cum->words; 7203 else if (size <= 16) 7204 cum->words += 2; 7205 else /* passed by reference */ 7206 ++cum->words; 7207 } 7208 else 7209 { 7210 cum->words += (mode != BLKmode 7211 ? ROUND_ADVANCE (GET_MODE_SIZE (mode)) 7212 : ROUND_ADVANCE (int_size_in_bytes (type))); 7213 } 7214 } 7215 } 7216 7217 /* Handle the FUNCTION_ARG_PADDING macro. 7218 For the 64 bit ABI structs are always stored left shifted in their 7219 argument slot. */ 7220 7221 enum direction 7222 function_arg_padding (machine_mode mode, const_tree type) 7223 { 7224 if (TARGET_ARCH64 && type != 0 && AGGREGATE_TYPE_P (type)) 7225 return upward; 7226 7227 /* Fall back to the default. */ 7228 return DEFAULT_FUNCTION_ARG_PADDING (mode, type); 7229 } 7230 7231 /* Handle the TARGET_RETURN_IN_MEMORY target hook. 7232 Specify whether to return the return value in memory. */ 7233 7234 static bool 7235 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 7236 { 7237 if (TARGET_ARCH32) 7238 /* Original SPARC 32-bit ABI says that structures and unions, 7239 and quad-precision floats are returned in memory. All other 7240 base types are returned in registers. 7241 7242 Extended ABI (as implemented by the Sun compiler) says that 7243 all complex floats are returned in registers (8 FP registers 7244 at most for '_Complex long double'). Return all complex integers 7245 in registers (4 at most for '_Complex long long'). 7246 7247 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7248 integers are returned like floats of the same size, that is in 7249 registers up to 8 bytes and in memory otherwise. Return all 7250 vector floats in memory like structure and unions; note that 7251 they always have BLKmode like the latter. */ 7252 return (TYPE_MODE (type) == BLKmode 7253 || TYPE_MODE (type) == TFmode 7254 || (TREE_CODE (type) == VECTOR_TYPE 7255 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7256 else 7257 /* Original SPARC 64-bit ABI says that structures and unions 7258 smaller than 32 bytes are returned in registers, as well as 7259 all other base types. 7260 7261 Extended ABI (as implemented by the Sun compiler) says that all 7262 complex floats are returned in registers (8 FP registers at most 7263 for '_Complex long double'). Return all complex integers in 7264 registers (4 at most for '_Complex TItype'). 7265 7266 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7267 integers are returned like floats of the same size, that is in 7268 registers. Return all vector floats like structure and unions; 7269 note that they always have BLKmode like the latter. */ 7270 return (TYPE_MODE (type) == BLKmode 7271 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); 7272 } 7273 7274 /* Handle the TARGET_STRUCT_VALUE target hook. 7275 Return where to find the structure return value address. */ 7276 7277 static rtx 7278 sparc_struct_value_rtx (tree fndecl, int incoming) 7279 { 7280 if (TARGET_ARCH64) 7281 return 0; 7282 else 7283 { 7284 rtx mem; 7285 7286 if (incoming) 7287 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, 7288 STRUCT_VALUE_OFFSET)); 7289 else 7290 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, 7291 STRUCT_VALUE_OFFSET)); 7292 7293 /* Only follow the SPARC ABI for fixed-size structure returns. 7294 Variable size structure returns are handled per the normal 7295 procedures in GCC. This is enabled by -mstd-struct-return */ 7296 if (incoming == 2 7297 && sparc_std_struct_return 7298 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) 7299 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) 7300 { 7301 /* We must check and adjust the return address, as it is 7302 optional as to whether the return object is really 7303 provided. */ 7304 rtx ret_reg = gen_rtx_REG (Pmode, 31); 7305 rtx scratch = gen_reg_rtx (SImode); 7306 rtx_code_label *endlab = gen_label_rtx (); 7307 7308 /* Calculate the return object size */ 7309 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); 7310 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); 7311 /* Construct a temporary return value */ 7312 rtx temp_val 7313 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); 7314 7315 /* Implement SPARC 32-bit psABI callee return struct checking: 7316 7317 Fetch the instruction where we will return to and see if 7318 it's an unimp instruction (the most significant 10 bits 7319 will be zero). */ 7320 emit_move_insn (scratch, gen_rtx_MEM (SImode, 7321 plus_constant (Pmode, 7322 ret_reg, 8))); 7323 /* Assume the size is valid and pre-adjust */ 7324 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4))); 7325 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 7326 0, endlab); 7327 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4))); 7328 /* Write the address of the memory pointed to by temp_val into 7329 the memory pointed to by mem */ 7330 emit_move_insn (mem, XEXP (temp_val, 0)); 7331 emit_label (endlab); 7332 } 7333 7334 return mem; 7335 } 7336 } 7337 7338 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. 7339 For v9, function return values are subject to the same rules as arguments, 7340 except that up to 32 bytes may be returned in registers. */ 7341 7342 static rtx 7343 sparc_function_value_1 (const_tree type, machine_mode mode, 7344 bool outgoing) 7345 { 7346 /* Beware that the two values are swapped here wrt function_arg. */ 7347 int regbase = (outgoing 7348 ? SPARC_INCOMING_INT_ARG_FIRST 7349 : SPARC_OUTGOING_INT_ARG_FIRST); 7350 enum mode_class mclass = GET_MODE_CLASS (mode); 7351 int regno; 7352 7353 /* Vector types deserve special treatment because they are polymorphic wrt 7354 their mode, depending upon whether VIS instructions are enabled. */ 7355 if (type && TREE_CODE (type) == VECTOR_TYPE) 7356 { 7357 HOST_WIDE_INT size = int_size_in_bytes (type); 7358 gcc_assert ((TARGET_ARCH32 && size <= 8) 7359 || (TARGET_ARCH64 && size <= 32)); 7360 7361 if (mode == BLKmode) 7362 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST); 7363 7364 mclass = MODE_FLOAT; 7365 } 7366 7367 if (TARGET_ARCH64 && type) 7368 { 7369 /* Structures up to 32 bytes in size are returned in registers. */ 7370 if (TREE_CODE (type) == RECORD_TYPE) 7371 { 7372 HOST_WIDE_INT size = int_size_in_bytes (type); 7373 gcc_assert (size <= 32); 7374 7375 return function_arg_record_value (type, mode, 0, 1, regbase); 7376 } 7377 7378 /* Unions up to 32 bytes in size are returned in integer registers. */ 7379 else if (TREE_CODE (type) == UNION_TYPE) 7380 { 7381 HOST_WIDE_INT size = int_size_in_bytes (type); 7382 gcc_assert (size <= 32); 7383 7384 return function_arg_union_value (size, mode, 0, regbase); 7385 } 7386 7387 /* Objects that require it are returned in FP registers. */ 7388 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7389 ; 7390 7391 /* All other aggregate types are returned in an integer register in a 7392 mode corresponding to the size of the type. */ 7393 else if (AGGREGATE_TYPE_P (type)) 7394 { 7395 /* All other aggregate types are passed in an integer register 7396 in a mode corresponding to the size of the type. */ 7397 HOST_WIDE_INT size = int_size_in_bytes (type); 7398 gcc_assert (size <= 32); 7399 7400 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); 7401 7402 /* ??? We probably should have made the same ABI change in 7403 3.4.0 as the one we made for unions. The latter was 7404 required by the SCD though, while the former is not 7405 specified, so we favored compatibility and efficiency. 7406 7407 Now we're stuck for aggregates larger than 16 bytes, 7408 because OImode vanished in the meantime. Let's not 7409 try to be unduly clever, and simply follow the ABI 7410 for unions in that case. */ 7411 if (mode == BLKmode) 7412 return function_arg_union_value (size, mode, 0, regbase); 7413 else 7414 mclass = MODE_INT; 7415 } 7416 7417 /* We should only have pointer and integer types at this point. This 7418 must match sparc_promote_function_mode. */ 7419 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7420 mode = word_mode; 7421 } 7422 7423 /* We should only have pointer and integer types at this point, except with 7424 -freg-struct-return. This must match sparc_promote_function_mode. */ 7425 else if (TARGET_ARCH32 7426 && !(type && AGGREGATE_TYPE_P (type)) 7427 && mclass == MODE_INT 7428 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7429 mode = word_mode; 7430 7431 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) 7432 regno = SPARC_FP_ARG_FIRST; 7433 else 7434 regno = regbase; 7435 7436 return gen_rtx_REG (mode, regno); 7437 } 7438 7439 /* Handle TARGET_FUNCTION_VALUE. 7440 On the SPARC, the value is found in the first "output" register, but the 7441 called function leaves it in the first "input" register. */ 7442 7443 static rtx 7444 sparc_function_value (const_tree valtype, 7445 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 7446 bool outgoing) 7447 { 7448 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); 7449 } 7450 7451 /* Handle TARGET_LIBCALL_VALUE. */ 7452 7453 static rtx 7454 sparc_libcall_value (machine_mode mode, 7455 const_rtx fun ATTRIBUTE_UNUSED) 7456 { 7457 return sparc_function_value_1 (NULL_TREE, mode, false); 7458 } 7459 7460 /* Handle FUNCTION_VALUE_REGNO_P. 7461 On the SPARC, the first "output" reg is used for integer values, and the 7462 first floating point register is used for floating point values. */ 7463 7464 static bool 7465 sparc_function_value_regno_p (const unsigned int regno) 7466 { 7467 return (regno == 8 || (TARGET_FPU && regno == 32)); 7468 } 7469 7470 /* Do what is necessary for `va_start'. We look at the current function 7471 to determine if stdarg or varargs is used and return the address of 7472 the first unnamed parameter. */ 7473 7474 static rtx 7475 sparc_builtin_saveregs (void) 7476 { 7477 int first_reg = crtl->args.info.words; 7478 rtx address; 7479 int regno; 7480 7481 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) 7482 emit_move_insn (gen_rtx_MEM (word_mode, 7483 gen_rtx_PLUS (Pmode, 7484 frame_pointer_rtx, 7485 GEN_INT (FIRST_PARM_OFFSET (0) 7486 + (UNITS_PER_WORD 7487 * regno)))), 7488 gen_rtx_REG (word_mode, 7489 SPARC_INCOMING_INT_ARG_FIRST + regno)); 7490 7491 address = gen_rtx_PLUS (Pmode, 7492 frame_pointer_rtx, 7493 GEN_INT (FIRST_PARM_OFFSET (0) 7494 + UNITS_PER_WORD * first_reg)); 7495 7496 return address; 7497 } 7498 7499 /* Implement `va_start' for stdarg. */ 7500 7501 static void 7502 sparc_va_start (tree valist, rtx nextarg) 7503 { 7504 nextarg = expand_builtin_saveregs (); 7505 std_expand_builtin_va_start (valist, nextarg); 7506 } 7507 7508 /* Implement `va_arg' for stdarg. */ 7509 7510 static tree 7511 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 7512 gimple_seq *post_p) 7513 { 7514 HOST_WIDE_INT size, rsize, align; 7515 tree addr, incr; 7516 bool indirect; 7517 tree ptrtype = build_pointer_type (type); 7518 7519 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 7520 { 7521 indirect = true; 7522 size = rsize = UNITS_PER_WORD; 7523 align = 0; 7524 } 7525 else 7526 { 7527 indirect = false; 7528 size = int_size_in_bytes (type); 7529 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; 7530 align = 0; 7531 7532 if (TARGET_ARCH64) 7533 { 7534 /* For SPARC64, objects requiring 16-byte alignment get it. */ 7535 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) 7536 align = 2 * UNITS_PER_WORD; 7537 7538 /* SPARC-V9 ABI states that structures up to 16 bytes in size 7539 are left-justified in their slots. */ 7540 if (AGGREGATE_TYPE_P (type)) 7541 { 7542 if (size == 0) 7543 size = rsize = UNITS_PER_WORD; 7544 else 7545 size = rsize; 7546 } 7547 } 7548 } 7549 7550 incr = valist; 7551 if (align) 7552 { 7553 incr = fold_build_pointer_plus_hwi (incr, align - 1); 7554 incr = fold_convert (sizetype, incr); 7555 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, 7556 size_int (-align)); 7557 incr = fold_convert (ptr_type_node, incr); 7558 } 7559 7560 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); 7561 addr = incr; 7562 7563 if (BYTES_BIG_ENDIAN && size < rsize) 7564 addr = fold_build_pointer_plus_hwi (incr, rsize - size); 7565 7566 if (indirect) 7567 { 7568 addr = fold_convert (build_pointer_type (ptrtype), addr); 7569 addr = build_va_arg_indirect_ref (addr); 7570 } 7571 7572 /* If the address isn't aligned properly for the type, we need a temporary. 7573 FIXME: This is inefficient, usually we can do this in registers. */ 7574 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) 7575 { 7576 tree tmp = create_tmp_var (type, "va_arg_tmp"); 7577 tree dest_addr = build_fold_addr_expr (tmp); 7578 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 7579 3, dest_addr, addr, size_int (rsize)); 7580 TREE_ADDRESSABLE (tmp) = 1; 7581 gimplify_and_add (copy, pre_p); 7582 addr = dest_addr; 7583 } 7584 7585 else 7586 addr = fold_convert (ptrtype, addr); 7587 7588 incr = fold_build_pointer_plus_hwi (incr, rsize); 7589 gimplify_assign (valist, incr, post_p); 7590 7591 return build_va_arg_indirect_ref (addr); 7592 } 7593 7594 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. 7595 Specify whether the vector mode is supported by the hardware. */ 7596 7597 static bool 7598 sparc_vector_mode_supported_p (machine_mode mode) 7599 { 7600 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; 7601 } 7602 7603 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ 7604 7605 static machine_mode 7606 sparc_preferred_simd_mode (machine_mode mode) 7607 { 7608 if (TARGET_VIS) 7609 switch (mode) 7610 { 7611 case SImode: 7612 return V2SImode; 7613 case HImode: 7614 return V4HImode; 7615 case QImode: 7616 return V8QImode; 7617 7618 default:; 7619 } 7620 7621 return word_mode; 7622 } 7623 7624 /* Return the string to output an unconditional branch to LABEL, which is 7625 the operand number of the label. 7626 7627 DEST is the destination insn (i.e. the label), INSN is the source. */ 7628 7629 const char * 7630 output_ubranch (rtx dest, rtx_insn *insn) 7631 { 7632 static char string[64]; 7633 bool v9_form = false; 7634 int delta; 7635 char *p; 7636 7637 /* Even if we are trying to use cbcond for this, evaluate 7638 whether we can use V9 branches as our backup plan. */ 7639 7640 delta = 5000000; 7641 if (INSN_ADDRESSES_SET_P ()) 7642 delta = (INSN_ADDRESSES (INSN_UID (dest)) 7643 - INSN_ADDRESSES (INSN_UID (insn))); 7644 7645 /* Leave some instructions for "slop". */ 7646 if (TARGET_V9 && delta >= -260000 && delta < 260000) 7647 v9_form = true; 7648 7649 if (TARGET_CBCOND) 7650 { 7651 bool emit_nop = emit_cbcond_nop (insn); 7652 bool far = false; 7653 const char *rval; 7654 7655 if (delta < -500 || delta > 500) 7656 far = true; 7657 7658 if (far) 7659 { 7660 if (v9_form) 7661 rval = "ba,a,pt\t%%xcc, %l0"; 7662 else 7663 rval = "b,a\t%l0"; 7664 } 7665 else 7666 { 7667 if (emit_nop) 7668 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop"; 7669 else 7670 rval = "cwbe\t%%g0, %%g0, %l0"; 7671 } 7672 return rval; 7673 } 7674 7675 if (v9_form) 7676 strcpy (string, "ba%*,pt\t%%xcc, "); 7677 else 7678 strcpy (string, "b%*\t"); 7679 7680 p = strchr (string, '\0'); 7681 *p++ = '%'; 7682 *p++ = 'l'; 7683 *p++ = '0'; 7684 *p++ = '%'; 7685 *p++ = '('; 7686 *p = '\0'; 7687 7688 return string; 7689 } 7690 7691 /* Return the string to output a conditional branch to LABEL, which is 7692 the operand number of the label. OP is the conditional expression. 7693 XEXP (OP, 0) is assumed to be a condition code register (integer or 7694 floating point) and its mode specifies what kind of comparison we made. 7695 7696 DEST is the destination insn (i.e. the label), INSN is the source. 7697 7698 REVERSED is nonzero if we should reverse the sense of the comparison. 7699 7700 ANNUL is nonzero if we should generate an annulling branch. */ 7701 7702 const char * 7703 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, 7704 rtx_insn *insn) 7705 { 7706 static char string[64]; 7707 enum rtx_code code = GET_CODE (op); 7708 rtx cc_reg = XEXP (op, 0); 7709 machine_mode mode = GET_MODE (cc_reg); 7710 const char *labelno, *branch; 7711 int spaces = 8, far; 7712 char *p; 7713 7714 /* v9 branches are limited to +-1MB. If it is too far away, 7715 change 7716 7717 bne,pt %xcc, .LC30 7718 7719 to 7720 7721 be,pn %xcc, .+12 7722 nop 7723 ba .LC30 7724 7725 and 7726 7727 fbne,a,pn %fcc2, .LC29 7728 7729 to 7730 7731 fbe,pt %fcc2, .+16 7732 nop 7733 ba .LC29 */ 7734 7735 far = TARGET_V9 && (get_attr_length (insn) >= 3); 7736 if (reversed ^ far) 7737 { 7738 /* Reversal of FP compares takes care -- an ordered compare 7739 becomes an unordered compare and vice versa. */ 7740 if (mode == CCFPmode || mode == CCFPEmode) 7741 code = reverse_condition_maybe_unordered (code); 7742 else 7743 code = reverse_condition (code); 7744 } 7745 7746 /* Start by writing the branch condition. */ 7747 if (mode == CCFPmode || mode == CCFPEmode) 7748 { 7749 switch (code) 7750 { 7751 case NE: 7752 branch = "fbne"; 7753 break; 7754 case EQ: 7755 branch = "fbe"; 7756 break; 7757 case GE: 7758 branch = "fbge"; 7759 break; 7760 case GT: 7761 branch = "fbg"; 7762 break; 7763 case LE: 7764 branch = "fble"; 7765 break; 7766 case LT: 7767 branch = "fbl"; 7768 break; 7769 case UNORDERED: 7770 branch = "fbu"; 7771 break; 7772 case ORDERED: 7773 branch = "fbo"; 7774 break; 7775 case UNGT: 7776 branch = "fbug"; 7777 break; 7778 case UNLT: 7779 branch = "fbul"; 7780 break; 7781 case UNEQ: 7782 branch = "fbue"; 7783 break; 7784 case UNGE: 7785 branch = "fbuge"; 7786 break; 7787 case UNLE: 7788 branch = "fbule"; 7789 break; 7790 case LTGT: 7791 branch = "fblg"; 7792 break; 7793 7794 default: 7795 gcc_unreachable (); 7796 } 7797 7798 /* ??? !v9: FP branches cannot be preceded by another floating point 7799 insn. Because there is currently no concept of pre-delay slots, 7800 we can fix this only by always emitting a nop before a floating 7801 point branch. */ 7802 7803 string[0] = '\0'; 7804 if (! TARGET_V9) 7805 strcpy (string, "nop\n\t"); 7806 strcat (string, branch); 7807 } 7808 else 7809 { 7810 switch (code) 7811 { 7812 case NE: 7813 branch = "bne"; 7814 break; 7815 case EQ: 7816 branch = "be"; 7817 break; 7818 case GE: 7819 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 7820 branch = "bpos"; 7821 else 7822 branch = "bge"; 7823 break; 7824 case GT: 7825 branch = "bg"; 7826 break; 7827 case LE: 7828 branch = "ble"; 7829 break; 7830 case LT: 7831 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 7832 branch = "bneg"; 7833 else 7834 branch = "bl"; 7835 break; 7836 case GEU: 7837 branch = "bgeu"; 7838 break; 7839 case GTU: 7840 branch = "bgu"; 7841 break; 7842 case LEU: 7843 branch = "bleu"; 7844 break; 7845 case LTU: 7846 branch = "blu"; 7847 break; 7848 7849 default: 7850 gcc_unreachable (); 7851 } 7852 strcpy (string, branch); 7853 } 7854 spaces -= strlen (branch); 7855 p = strchr (string, '\0'); 7856 7857 /* Now add the annulling, the label, and a possible noop. */ 7858 if (annul && ! far) 7859 { 7860 strcpy (p, ",a"); 7861 p += 2; 7862 spaces -= 2; 7863 } 7864 7865 if (TARGET_V9) 7866 { 7867 rtx note; 7868 int v8 = 0; 7869 7870 if (! far && insn && INSN_ADDRESSES_SET_P ()) 7871 { 7872 int delta = (INSN_ADDRESSES (INSN_UID (dest)) 7873 - INSN_ADDRESSES (INSN_UID (insn))); 7874 /* Leave some instructions for "slop". */ 7875 if (delta < -260000 || delta >= 260000) 7876 v8 = 1; 7877 } 7878 7879 if (mode == CCFPmode || mode == CCFPEmode) 7880 { 7881 static char v9_fcc_labelno[] = "%%fccX, "; 7882 /* Set the char indicating the number of the fcc reg to use. */ 7883 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; 7884 labelno = v9_fcc_labelno; 7885 if (v8) 7886 { 7887 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); 7888 labelno = ""; 7889 } 7890 } 7891 else if (mode == CCXmode || mode == CCX_NOOVmode) 7892 { 7893 labelno = "%%xcc, "; 7894 gcc_assert (! v8); 7895 } 7896 else 7897 { 7898 labelno = "%%icc, "; 7899 if (v8) 7900 labelno = ""; 7901 } 7902 7903 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 7904 { 7905 strcpy (p, 7906 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far) 7907 ? ",pt" : ",pn"); 7908 p += 3; 7909 spaces -= 3; 7910 } 7911 } 7912 else 7913 labelno = ""; 7914 7915 if (spaces > 0) 7916 *p++ = '\t'; 7917 else 7918 *p++ = ' '; 7919 strcpy (p, labelno); 7920 p = strchr (p, '\0'); 7921 if (far) 7922 { 7923 strcpy (p, ".+12\n\t nop\n\tb\t"); 7924 /* Skip the next insn if requested or 7925 if we know that it will be a nop. */ 7926 if (annul || ! final_sequence) 7927 p[3] = '6'; 7928 p += 14; 7929 } 7930 *p++ = '%'; 7931 *p++ = 'l'; 7932 *p++ = label + '0'; 7933 *p++ = '%'; 7934 *p++ = '#'; 7935 *p = '\0'; 7936 7937 return string; 7938 } 7939 7940 /* Emit a library call comparison between floating point X and Y. 7941 COMPARISON is the operator to compare with (EQ, NE, GT, etc). 7942 Return the new operator to be used in the comparison sequence. 7943 7944 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode 7945 values as arguments instead of the TFmode registers themselves, 7946 that's why we cannot call emit_float_lib_cmp. */ 7947 7948 rtx 7949 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) 7950 { 7951 const char *qpfunc; 7952 rtx slot0, slot1, result, tem, tem2, libfunc; 7953 machine_mode mode; 7954 enum rtx_code new_comparison; 7955 7956 switch (comparison) 7957 { 7958 case EQ: 7959 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); 7960 break; 7961 7962 case NE: 7963 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); 7964 break; 7965 7966 case GT: 7967 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); 7968 break; 7969 7970 case GE: 7971 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); 7972 break; 7973 7974 case LT: 7975 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); 7976 break; 7977 7978 case LE: 7979 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); 7980 break; 7981 7982 case ORDERED: 7983 case UNORDERED: 7984 case UNGT: 7985 case UNLT: 7986 case UNEQ: 7987 case UNGE: 7988 case UNLE: 7989 case LTGT: 7990 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); 7991 break; 7992 7993 default: 7994 gcc_unreachable (); 7995 } 7996 7997 if (TARGET_ARCH64) 7998 { 7999 if (MEM_P (x)) 8000 { 8001 tree expr = MEM_EXPR (x); 8002 if (expr) 8003 mark_addressable (expr); 8004 slot0 = x; 8005 } 8006 else 8007 { 8008 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8009 emit_move_insn (slot0, x); 8010 } 8011 8012 if (MEM_P (y)) 8013 { 8014 tree expr = MEM_EXPR (y); 8015 if (expr) 8016 mark_addressable (expr); 8017 slot1 = y; 8018 } 8019 else 8020 { 8021 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8022 emit_move_insn (slot1, y); 8023 } 8024 8025 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8026 emit_library_call (libfunc, LCT_NORMAL, 8027 DImode, 2, 8028 XEXP (slot0, 0), Pmode, 8029 XEXP (slot1, 0), Pmode); 8030 mode = DImode; 8031 } 8032 else 8033 { 8034 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8035 emit_library_call (libfunc, LCT_NORMAL, 8036 SImode, 2, 8037 x, TFmode, y, TFmode); 8038 mode = SImode; 8039 } 8040 8041 8042 /* Immediately move the result of the libcall into a pseudo 8043 register so reload doesn't clobber the value if it needs 8044 the return register for a spill reg. */ 8045 result = gen_reg_rtx (mode); 8046 emit_move_insn (result, hard_libcall_value (mode, libfunc)); 8047 8048 switch (comparison) 8049 { 8050 default: 8051 return gen_rtx_NE (VOIDmode, result, const0_rtx); 8052 case ORDERED: 8053 case UNORDERED: 8054 new_comparison = (comparison == UNORDERED ? EQ : NE); 8055 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); 8056 case UNGT: 8057 case UNGE: 8058 new_comparison = (comparison == UNGT ? GT : NE); 8059 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); 8060 case UNLE: 8061 return gen_rtx_NE (VOIDmode, result, const2_rtx); 8062 case UNLT: 8063 tem = gen_reg_rtx (mode); 8064 if (TARGET_ARCH32) 8065 emit_insn (gen_andsi3 (tem, result, const1_rtx)); 8066 else 8067 emit_insn (gen_anddi3 (tem, result, const1_rtx)); 8068 return gen_rtx_NE (VOIDmode, tem, const0_rtx); 8069 case UNEQ: 8070 case LTGT: 8071 tem = gen_reg_rtx (mode); 8072 if (TARGET_ARCH32) 8073 emit_insn (gen_addsi3 (tem, result, const1_rtx)); 8074 else 8075 emit_insn (gen_adddi3 (tem, result, const1_rtx)); 8076 tem2 = gen_reg_rtx (mode); 8077 if (TARGET_ARCH32) 8078 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); 8079 else 8080 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); 8081 new_comparison = (comparison == UNEQ ? EQ : NE); 8082 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); 8083 } 8084 8085 gcc_unreachable (); 8086 } 8087 8088 /* Generate an unsigned DImode to FP conversion. This is the same code 8089 optabs would emit if we didn't have TFmode patterns. */ 8090 8091 void 8092 sparc_emit_floatunsdi (rtx *operands, machine_mode mode) 8093 { 8094 rtx i0, i1, f0, in, out; 8095 8096 out = operands[0]; 8097 in = force_reg (DImode, operands[1]); 8098 rtx_code_label *neglab = gen_label_rtx (); 8099 rtx_code_label *donelab = gen_label_rtx (); 8100 i0 = gen_reg_rtx (DImode); 8101 i1 = gen_reg_rtx (DImode); 8102 f0 = gen_reg_rtx (mode); 8103 8104 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 8105 8106 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in))); 8107 emit_jump_insn (gen_jump (donelab)); 8108 emit_barrier (); 8109 8110 emit_label (neglab); 8111 8112 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 8113 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 8114 emit_insn (gen_iordi3 (i0, i0, i1)); 8115 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0))); 8116 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 8117 8118 emit_label (donelab); 8119 } 8120 8121 /* Generate an FP to unsigned DImode conversion. This is the same code 8122 optabs would emit if we didn't have TFmode patterns. */ 8123 8124 void 8125 sparc_emit_fixunsdi (rtx *operands, machine_mode mode) 8126 { 8127 rtx i0, i1, f0, in, out, limit; 8128 8129 out = operands[0]; 8130 in = force_reg (mode, operands[1]); 8131 rtx_code_label *neglab = gen_label_rtx (); 8132 rtx_code_label *donelab = gen_label_rtx (); 8133 i0 = gen_reg_rtx (DImode); 8134 i1 = gen_reg_rtx (DImode); 8135 limit = gen_reg_rtx (mode); 8136 f0 = gen_reg_rtx (mode); 8137 8138 emit_move_insn (limit, 8139 CONST_DOUBLE_FROM_REAL_VALUE ( 8140 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); 8141 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); 8142 8143 emit_insn (gen_rtx_SET (VOIDmode, 8144 out, 8145 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); 8146 emit_jump_insn (gen_jump (donelab)); 8147 emit_barrier (); 8148 8149 emit_label (neglab); 8150 8151 emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_MINUS (mode, in, limit))); 8152 emit_insn (gen_rtx_SET (VOIDmode, 8153 i0, 8154 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); 8155 emit_insn (gen_movdi (i1, const1_rtx)); 8156 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); 8157 emit_insn (gen_xordi3 (out, i0, i1)); 8158 8159 emit_label (donelab); 8160 } 8161 8162 /* Return the string to output a compare and branch instruction to DEST. 8163 DEST is the destination insn (i.e. the label), INSN is the source, 8164 and OP is the conditional expression. */ 8165 8166 const char * 8167 output_cbcond (rtx op, rtx dest, rtx_insn *insn) 8168 { 8169 machine_mode mode = GET_MODE (XEXP (op, 0)); 8170 enum rtx_code code = GET_CODE (op); 8171 const char *cond_str, *tmpl; 8172 int far, emit_nop, len; 8173 static char string[64]; 8174 char size_char; 8175 8176 /* Compare and Branch is limited to +-2KB. If it is too far away, 8177 change 8178 8179 cxbne X, Y, .LC30 8180 8181 to 8182 8183 cxbe X, Y, .+16 8184 nop 8185 ba,pt xcc, .LC30 8186 nop */ 8187 8188 len = get_attr_length (insn); 8189 8190 far = len == 4; 8191 emit_nop = len == 2; 8192 8193 if (far) 8194 code = reverse_condition (code); 8195 8196 size_char = ((mode == SImode) ? 'w' : 'x'); 8197 8198 switch (code) 8199 { 8200 case NE: 8201 cond_str = "ne"; 8202 break; 8203 8204 case EQ: 8205 cond_str = "e"; 8206 break; 8207 8208 case GE: 8209 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 8210 cond_str = "pos"; 8211 else 8212 cond_str = "ge"; 8213 break; 8214 8215 case GT: 8216 cond_str = "g"; 8217 break; 8218 8219 case LE: 8220 cond_str = "le"; 8221 break; 8222 8223 case LT: 8224 if (mode == CC_NOOVmode || mode == CCX_NOOVmode) 8225 cond_str = "neg"; 8226 else 8227 cond_str = "l"; 8228 break; 8229 8230 case GEU: 8231 cond_str = "cc"; 8232 break; 8233 8234 case GTU: 8235 cond_str = "gu"; 8236 break; 8237 8238 case LEU: 8239 cond_str = "leu"; 8240 break; 8241 8242 case LTU: 8243 cond_str = "cs"; 8244 break; 8245 8246 default: 8247 gcc_unreachable (); 8248 } 8249 8250 if (far) 8251 { 8252 int veryfar = 1, delta; 8253 8254 if (INSN_ADDRESSES_SET_P ()) 8255 { 8256 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8257 - INSN_ADDRESSES (INSN_UID (insn))); 8258 /* Leave some instructions for "slop". */ 8259 if (delta >= -260000 && delta < 260000) 8260 veryfar = 0; 8261 } 8262 8263 if (veryfar) 8264 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop"; 8265 else 8266 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop"; 8267 } 8268 else 8269 { 8270 if (emit_nop) 8271 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop"; 8272 else 8273 tmpl = "c%cb%s\t%%1, %%2, %%3"; 8274 } 8275 8276 snprintf (string, sizeof(string), tmpl, size_char, cond_str); 8277 8278 return string; 8279 } 8280 8281 /* Return the string to output a conditional branch to LABEL, testing 8282 register REG. LABEL is the operand number of the label; REG is the 8283 operand number of the reg. OP is the conditional expression. The mode 8284 of REG says what kind of comparison we made. 8285 8286 DEST is the destination insn (i.e. the label), INSN is the source. 8287 8288 REVERSED is nonzero if we should reverse the sense of the comparison. 8289 8290 ANNUL is nonzero if we should generate an annulling branch. */ 8291 8292 const char * 8293 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, 8294 int annul, rtx_insn *insn) 8295 { 8296 static char string[64]; 8297 enum rtx_code code = GET_CODE (op); 8298 machine_mode mode = GET_MODE (XEXP (op, 0)); 8299 rtx note; 8300 int far; 8301 char *p; 8302 8303 /* branch on register are limited to +-128KB. If it is too far away, 8304 change 8305 8306 brnz,pt %g1, .LC30 8307 8308 to 8309 8310 brz,pn %g1, .+12 8311 nop 8312 ba,pt %xcc, .LC30 8313 8314 and 8315 8316 brgez,a,pn %o1, .LC29 8317 8318 to 8319 8320 brlz,pt %o1, .+16 8321 nop 8322 ba,pt %xcc, .LC29 */ 8323 8324 far = get_attr_length (insn) >= 3; 8325 8326 /* If not floating-point or if EQ or NE, we can just reverse the code. */ 8327 if (reversed ^ far) 8328 code = reverse_condition (code); 8329 8330 /* Only 64 bit versions of these instructions exist. */ 8331 gcc_assert (mode == DImode); 8332 8333 /* Start by writing the branch condition. */ 8334 8335 switch (code) 8336 { 8337 case NE: 8338 strcpy (string, "brnz"); 8339 break; 8340 8341 case EQ: 8342 strcpy (string, "brz"); 8343 break; 8344 8345 case GE: 8346 strcpy (string, "brgez"); 8347 break; 8348 8349 case LT: 8350 strcpy (string, "brlz"); 8351 break; 8352 8353 case LE: 8354 strcpy (string, "brlez"); 8355 break; 8356 8357 case GT: 8358 strcpy (string, "brgz"); 8359 break; 8360 8361 default: 8362 gcc_unreachable (); 8363 } 8364 8365 p = strchr (string, '\0'); 8366 8367 /* Now add the annulling, reg, label, and nop. */ 8368 if (annul && ! far) 8369 { 8370 strcpy (p, ",a"); 8371 p += 2; 8372 } 8373 8374 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8375 { 8376 strcpy (p, 8377 ((XINT (note, 0) >= REG_BR_PROB_BASE / 2) ^ far) 8378 ? ",pt" : ",pn"); 8379 p += 3; 8380 } 8381 8382 *p = p < string + 8 ? '\t' : ' '; 8383 p++; 8384 *p++ = '%'; 8385 *p++ = '0' + reg; 8386 *p++ = ','; 8387 *p++ = ' '; 8388 if (far) 8389 { 8390 int veryfar = 1, delta; 8391 8392 if (INSN_ADDRESSES_SET_P ()) 8393 { 8394 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8395 - INSN_ADDRESSES (INSN_UID (insn))); 8396 /* Leave some instructions for "slop". */ 8397 if (delta >= -260000 && delta < 260000) 8398 veryfar = 0; 8399 } 8400 8401 strcpy (p, ".+12\n\t nop\n\t"); 8402 /* Skip the next insn if requested or 8403 if we know that it will be a nop. */ 8404 if (annul || ! final_sequence) 8405 p[3] = '6'; 8406 p += 12; 8407 if (veryfar) 8408 { 8409 strcpy (p, "b\t"); 8410 p += 2; 8411 } 8412 else 8413 { 8414 strcpy (p, "ba,pt\t%%xcc, "); 8415 p += 13; 8416 } 8417 } 8418 *p++ = '%'; 8419 *p++ = 'l'; 8420 *p++ = '0' + label; 8421 *p++ = '%'; 8422 *p++ = '#'; 8423 *p = '\0'; 8424 8425 return string; 8426 } 8427 8428 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. 8429 Such instructions cannot be used in the delay slot of return insn on v9. 8430 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. 8431 */ 8432 8433 static int 8434 epilogue_renumber (register rtx *where, int test) 8435 { 8436 register const char *fmt; 8437 register int i; 8438 register enum rtx_code code; 8439 8440 if (*where == 0) 8441 return 0; 8442 8443 code = GET_CODE (*where); 8444 8445 switch (code) 8446 { 8447 case REG: 8448 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ 8449 return 1; 8450 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) 8451 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); 8452 case SCRATCH: 8453 case CC0: 8454 case PC: 8455 case CONST_INT: 8456 case CONST_DOUBLE: 8457 return 0; 8458 8459 /* Do not replace the frame pointer with the stack pointer because 8460 it can cause the delayed instruction to load below the stack. 8461 This occurs when instructions like: 8462 8463 (set (reg/i:SI 24 %i0) 8464 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) 8465 (const_int -20 [0xffffffec])) 0)) 8466 8467 are in the return delayed slot. */ 8468 case PLUS: 8469 if (GET_CODE (XEXP (*where, 0)) == REG 8470 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM 8471 && (GET_CODE (XEXP (*where, 1)) != CONST_INT 8472 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) 8473 return 1; 8474 break; 8475 8476 case MEM: 8477 if (SPARC_STACK_BIAS 8478 && GET_CODE (XEXP (*where, 0)) == REG 8479 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) 8480 return 1; 8481 break; 8482 8483 default: 8484 break; 8485 } 8486 8487 fmt = GET_RTX_FORMAT (code); 8488 8489 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 8490 { 8491 if (fmt[i] == 'E') 8492 { 8493 register int j; 8494 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) 8495 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) 8496 return 1; 8497 } 8498 else if (fmt[i] == 'e' 8499 && epilogue_renumber (&(XEXP (*where, i)), test)) 8500 return 1; 8501 } 8502 return 0; 8503 } 8504 8505 /* Leaf functions and non-leaf functions have different needs. */ 8506 8507 static const int 8508 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; 8509 8510 static const int 8511 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; 8512 8513 static const int *const reg_alloc_orders[] = { 8514 reg_leaf_alloc_order, 8515 reg_nonleaf_alloc_order}; 8516 8517 void 8518 order_regs_for_local_alloc (void) 8519 { 8520 static int last_order_nonleaf = 1; 8521 8522 if (df_regs_ever_live_p (15) != last_order_nonleaf) 8523 { 8524 last_order_nonleaf = !last_order_nonleaf; 8525 memcpy ((char *) reg_alloc_order, 8526 (const char *) reg_alloc_orders[last_order_nonleaf], 8527 FIRST_PSEUDO_REGISTER * sizeof (int)); 8528 } 8529 } 8530 8531 /* Return 1 if REG and MEM are legitimate enough to allow the various 8532 mem<-->reg splits to be run. */ 8533 8534 int 8535 sparc_splitdi_legitimate (rtx reg, rtx mem) 8536 { 8537 /* Punt if we are here by mistake. */ 8538 gcc_assert (reload_completed); 8539 8540 /* We must have an offsettable memory reference. */ 8541 if (! offsettable_memref_p (mem)) 8542 return 0; 8543 8544 /* If we have legitimate args for ldd/std, we do not want 8545 the split to happen. */ 8546 if ((REGNO (reg) % 2) == 0 8547 && mem_min_alignment (mem, 8)) 8548 return 0; 8549 8550 /* Success. */ 8551 return 1; 8552 } 8553 8554 /* Like sparc_splitdi_legitimate but for REG <--> REG moves. */ 8555 8556 int 8557 sparc_split_regreg_legitimate (rtx reg1, rtx reg2) 8558 { 8559 int regno1, regno2; 8560 8561 if (GET_CODE (reg1) == SUBREG) 8562 reg1 = SUBREG_REG (reg1); 8563 if (GET_CODE (reg1) != REG) 8564 return 0; 8565 regno1 = REGNO (reg1); 8566 8567 if (GET_CODE (reg2) == SUBREG) 8568 reg2 = SUBREG_REG (reg2); 8569 if (GET_CODE (reg2) != REG) 8570 return 0; 8571 regno2 = REGNO (reg2); 8572 8573 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2)) 8574 return 1; 8575 8576 if (TARGET_VIS3) 8577 { 8578 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2)) 8579 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2))) 8580 return 1; 8581 } 8582 8583 return 0; 8584 } 8585 8586 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. 8587 This makes them candidates for using ldd and std insns. 8588 8589 Note reg1 and reg2 *must* be hard registers. */ 8590 8591 int 8592 registers_ok_for_ldd_peep (rtx reg1, rtx reg2) 8593 { 8594 /* We might have been passed a SUBREG. */ 8595 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) 8596 return 0; 8597 8598 if (REGNO (reg1) % 2 != 0) 8599 return 0; 8600 8601 /* Integer ldd is deprecated in SPARC V9 */ 8602 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1))) 8603 return 0; 8604 8605 return (REGNO (reg1) == REGNO (reg2) - 1); 8606 } 8607 8608 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in 8609 an ldd or std insn. 8610 8611 This can only happen when addr1 and addr2, the addresses in mem1 8612 and mem2, are consecutive memory locations (addr1 + 4 == addr2). 8613 addr1 must also be aligned on a 64-bit boundary. 8614 8615 Also iff dependent_reg_rtx is not null it should not be used to 8616 compute the address for mem1, i.e. we cannot optimize a sequence 8617 like: 8618 ld [%o0], %o0 8619 ld [%o0 + 4], %o1 8620 to 8621 ldd [%o0], %o0 8622 nor: 8623 ld [%g3 + 4], %g3 8624 ld [%g3], %g2 8625 to 8626 ldd [%g3], %g2 8627 8628 But, note that the transformation from: 8629 ld [%g2 + 4], %g3 8630 ld [%g2], %g2 8631 to 8632 ldd [%g2], %g2 8633 is perfectly fine. Thus, the peephole2 patterns always pass us 8634 the destination register of the first load, never the second one. 8635 8636 For stores we don't have a similar problem, so dependent_reg_rtx is 8637 NULL_RTX. */ 8638 8639 int 8640 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) 8641 { 8642 rtx addr1, addr2; 8643 unsigned int reg1; 8644 HOST_WIDE_INT offset1; 8645 8646 /* The mems cannot be volatile. */ 8647 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 8648 return 0; 8649 8650 /* MEM1 should be aligned on a 64-bit boundary. */ 8651 if (MEM_ALIGN (mem1) < 64) 8652 return 0; 8653 8654 addr1 = XEXP (mem1, 0); 8655 addr2 = XEXP (mem2, 0); 8656 8657 /* Extract a register number and offset (if used) from the first addr. */ 8658 if (GET_CODE (addr1) == PLUS) 8659 { 8660 /* If not a REG, return zero. */ 8661 if (GET_CODE (XEXP (addr1, 0)) != REG) 8662 return 0; 8663 else 8664 { 8665 reg1 = REGNO (XEXP (addr1, 0)); 8666 /* The offset must be constant! */ 8667 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) 8668 return 0; 8669 offset1 = INTVAL (XEXP (addr1, 1)); 8670 } 8671 } 8672 else if (GET_CODE (addr1) != REG) 8673 return 0; 8674 else 8675 { 8676 reg1 = REGNO (addr1); 8677 /* This was a simple (mem (reg)) expression. Offset is 0. */ 8678 offset1 = 0; 8679 } 8680 8681 /* Make sure the second address is a (mem (plus (reg) (const_int). */ 8682 if (GET_CODE (addr2) != PLUS) 8683 return 0; 8684 8685 if (GET_CODE (XEXP (addr2, 0)) != REG 8686 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) 8687 return 0; 8688 8689 if (reg1 != REGNO (XEXP (addr2, 0))) 8690 return 0; 8691 8692 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) 8693 return 0; 8694 8695 /* The first offset must be evenly divisible by 8 to ensure the 8696 address is 64 bit aligned. */ 8697 if (offset1 % 8 != 0) 8698 return 0; 8699 8700 /* The offset for the second addr must be 4 more than the first addr. */ 8701 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) 8702 return 0; 8703 8704 /* All the tests passed. addr1 and addr2 are valid for ldd and std 8705 instructions. */ 8706 return 1; 8707 } 8708 8709 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */ 8710 8711 rtx 8712 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode) 8713 { 8714 rtx x = widen_memory_access (mem1, mode, 0); 8715 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2); 8716 return x; 8717 } 8718 8719 /* Return 1 if reg is a pseudo, or is the first register in 8720 a hard register pair. This makes it suitable for use in 8721 ldd and std insns. */ 8722 8723 int 8724 register_ok_for_ldd (rtx reg) 8725 { 8726 /* We might have been passed a SUBREG. */ 8727 if (!REG_P (reg)) 8728 return 0; 8729 8730 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) 8731 return (REGNO (reg) % 2 == 0); 8732 8733 return 1; 8734 } 8735 8736 /* Return 1 if OP, a MEM, has an address which is known to be 8737 aligned to an 8-byte boundary. */ 8738 8739 int 8740 memory_ok_for_ldd (rtx op) 8741 { 8742 /* In 64-bit mode, we assume that the address is word-aligned. */ 8743 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) 8744 return 0; 8745 8746 if (! can_create_pseudo_p () 8747 && !strict_memory_address_p (Pmode, XEXP (op, 0))) 8748 return 0; 8749 8750 return 1; 8751 } 8752 8753 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 8754 8755 static bool 8756 sparc_print_operand_punct_valid_p (unsigned char code) 8757 { 8758 if (code == '#' 8759 || code == '*' 8760 || code == '(' 8761 || code == ')' 8762 || code == '_' 8763 || code == '&') 8764 return true; 8765 8766 return false; 8767 } 8768 8769 /* Implement TARGET_PRINT_OPERAND. 8770 Print operand X (an rtx) in assembler syntax to file FILE. 8771 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 8772 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 8773 8774 static void 8775 sparc_print_operand (FILE *file, rtx x, int code) 8776 { 8777 switch (code) 8778 { 8779 case '#': 8780 /* Output an insn in a delay slot. */ 8781 if (final_sequence) 8782 sparc_indent_opcode = 1; 8783 else 8784 fputs ("\n\t nop", file); 8785 return; 8786 case '*': 8787 /* Output an annul flag if there's nothing for the delay slot and we 8788 are optimizing. This is always used with '(' below. 8789 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; 8790 this is a dbx bug. So, we only do this when optimizing. 8791 On UltraSPARC, a branch in a delay slot causes a pipeline flush. 8792 Always emit a nop in case the next instruction is a branch. */ 8793 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) 8794 fputs (",a", file); 8795 return; 8796 case '(': 8797 /* Output a 'nop' if there's nothing for the delay slot and we are 8798 not optimizing. This is always used with '*' above. */ 8799 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) 8800 fputs ("\n\t nop", file); 8801 else if (final_sequence) 8802 sparc_indent_opcode = 1; 8803 return; 8804 case ')': 8805 /* Output the right displacement from the saved PC on function return. 8806 The caller may have placed an "unimp" insn immediately after the call 8807 so we have to account for it. This insn is used in the 32-bit ABI 8808 when calling a function that returns a non zero-sized structure. The 8809 64-bit ABI doesn't have it. Be careful to have this test be the same 8810 as that for the call. The exception is when sparc_std_struct_return 8811 is enabled, the psABI is followed exactly and the adjustment is made 8812 by the code in sparc_struct_value_rtx. The call emitted is the same 8813 when sparc_std_struct_return is enabled. */ 8814 if (!TARGET_ARCH64 8815 && cfun->returns_struct 8816 && !sparc_std_struct_return 8817 && DECL_SIZE (DECL_RESULT (current_function_decl)) 8818 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) 8819 == INTEGER_CST 8820 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) 8821 fputs ("12", file); 8822 else 8823 fputc ('8', file); 8824 return; 8825 case '_': 8826 /* Output the Embedded Medium/Anywhere code model base register. */ 8827 fputs (EMBMEDANY_BASE_REG, file); 8828 return; 8829 case '&': 8830 /* Print some local dynamic TLS name. */ 8831 if (const char *name = get_some_local_dynamic_name ()) 8832 assemble_name (file, name); 8833 else 8834 output_operand_lossage ("'%%&' used without any " 8835 "local dynamic TLS references"); 8836 return; 8837 8838 case 'Y': 8839 /* Adjust the operand to take into account a RESTORE operation. */ 8840 if (GET_CODE (x) == CONST_INT) 8841 break; 8842 else if (GET_CODE (x) != REG) 8843 output_operand_lossage ("invalid %%Y operand"); 8844 else if (REGNO (x) < 8) 8845 fputs (reg_names[REGNO (x)], file); 8846 else if (REGNO (x) >= 24 && REGNO (x) < 32) 8847 fputs (reg_names[REGNO (x)-16], file); 8848 else 8849 output_operand_lossage ("invalid %%Y operand"); 8850 return; 8851 case 'L': 8852 /* Print out the low order register name of a register pair. */ 8853 if (WORDS_BIG_ENDIAN) 8854 fputs (reg_names[REGNO (x)+1], file); 8855 else 8856 fputs (reg_names[REGNO (x)], file); 8857 return; 8858 case 'H': 8859 /* Print out the high order register name of a register pair. */ 8860 if (WORDS_BIG_ENDIAN) 8861 fputs (reg_names[REGNO (x)], file); 8862 else 8863 fputs (reg_names[REGNO (x)+1], file); 8864 return; 8865 case 'R': 8866 /* Print out the second register name of a register pair or quad. 8867 I.e., R (%o0) => %o1. */ 8868 fputs (reg_names[REGNO (x)+1], file); 8869 return; 8870 case 'S': 8871 /* Print out the third register name of a register quad. 8872 I.e., S (%o0) => %o2. */ 8873 fputs (reg_names[REGNO (x)+2], file); 8874 return; 8875 case 'T': 8876 /* Print out the fourth register name of a register quad. 8877 I.e., T (%o0) => %o3. */ 8878 fputs (reg_names[REGNO (x)+3], file); 8879 return; 8880 case 'x': 8881 /* Print a condition code register. */ 8882 if (REGNO (x) == SPARC_ICC_REG) 8883 { 8884 /* We don't handle CC[X]_NOOVmode because they're not supposed 8885 to occur here. */ 8886 if (GET_MODE (x) == CCmode) 8887 fputs ("%icc", file); 8888 else if (GET_MODE (x) == CCXmode) 8889 fputs ("%xcc", file); 8890 else 8891 gcc_unreachable (); 8892 } 8893 else 8894 /* %fccN register */ 8895 fputs (reg_names[REGNO (x)], file); 8896 return; 8897 case 'm': 8898 /* Print the operand's address only. */ 8899 output_address (XEXP (x, 0)); 8900 return; 8901 case 'r': 8902 /* In this case we need a register. Use %g0 if the 8903 operand is const0_rtx. */ 8904 if (x == const0_rtx 8905 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) 8906 { 8907 fputs ("%g0", file); 8908 return; 8909 } 8910 else 8911 break; 8912 8913 case 'A': 8914 switch (GET_CODE (x)) 8915 { 8916 case IOR: fputs ("or", file); break; 8917 case AND: fputs ("and", file); break; 8918 case XOR: fputs ("xor", file); break; 8919 default: output_operand_lossage ("invalid %%A operand"); 8920 } 8921 return; 8922 8923 case 'B': 8924 switch (GET_CODE (x)) 8925 { 8926 case IOR: fputs ("orn", file); break; 8927 case AND: fputs ("andn", file); break; 8928 case XOR: fputs ("xnor", file); break; 8929 default: output_operand_lossage ("invalid %%B operand"); 8930 } 8931 return; 8932 8933 /* This is used by the conditional move instructions. */ 8934 case 'C': 8935 { 8936 enum rtx_code rc = GET_CODE (x); 8937 8938 switch (rc) 8939 { 8940 case NE: fputs ("ne", file); break; 8941 case EQ: fputs ("e", file); break; 8942 case GE: fputs ("ge", file); break; 8943 case GT: fputs ("g", file); break; 8944 case LE: fputs ("le", file); break; 8945 case LT: fputs ("l", file); break; 8946 case GEU: fputs ("geu", file); break; 8947 case GTU: fputs ("gu", file); break; 8948 case LEU: fputs ("leu", file); break; 8949 case LTU: fputs ("lu", file); break; 8950 case LTGT: fputs ("lg", file); break; 8951 case UNORDERED: fputs ("u", file); break; 8952 case ORDERED: fputs ("o", file); break; 8953 case UNLT: fputs ("ul", file); break; 8954 case UNLE: fputs ("ule", file); break; 8955 case UNGT: fputs ("ug", file); break; 8956 case UNGE: fputs ("uge", file); break; 8957 case UNEQ: fputs ("ue", file); break; 8958 default: output_operand_lossage ("invalid %%C operand"); 8959 } 8960 return; 8961 } 8962 8963 /* This are used by the movr instruction pattern. */ 8964 case 'D': 8965 { 8966 enum rtx_code rc = GET_CODE (x); 8967 switch (rc) 8968 { 8969 case NE: fputs ("ne", file); break; 8970 case EQ: fputs ("e", file); break; 8971 case GE: fputs ("gez", file); break; 8972 case LT: fputs ("lz", file); break; 8973 case LE: fputs ("lez", file); break; 8974 case GT: fputs ("gz", file); break; 8975 default: output_operand_lossage ("invalid %%D operand"); 8976 } 8977 return; 8978 } 8979 8980 case 'b': 8981 { 8982 /* Print a sign-extended character. */ 8983 int i = trunc_int_for_mode (INTVAL (x), QImode); 8984 fprintf (file, "%d", i); 8985 return; 8986 } 8987 8988 case 'f': 8989 /* Operand must be a MEM; write its address. */ 8990 if (GET_CODE (x) != MEM) 8991 output_operand_lossage ("invalid %%f operand"); 8992 output_address (XEXP (x, 0)); 8993 return; 8994 8995 case 's': 8996 { 8997 /* Print a sign-extended 32-bit value. */ 8998 HOST_WIDE_INT i; 8999 if (GET_CODE(x) == CONST_INT) 9000 i = INTVAL (x); 9001 else if (GET_CODE(x) == CONST_DOUBLE) 9002 i = CONST_DOUBLE_LOW (x); 9003 else 9004 { 9005 output_operand_lossage ("invalid %%s operand"); 9006 return; 9007 } 9008 i = trunc_int_for_mode (i, SImode); 9009 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); 9010 return; 9011 } 9012 9013 case 0: 9014 /* Do nothing special. */ 9015 break; 9016 9017 default: 9018 /* Undocumented flag. */ 9019 output_operand_lossage ("invalid operand output code"); 9020 } 9021 9022 if (GET_CODE (x) == REG) 9023 fputs (reg_names[REGNO (x)], file); 9024 else if (GET_CODE (x) == MEM) 9025 { 9026 fputc ('[', file); 9027 /* Poor Sun assembler doesn't understand absolute addressing. */ 9028 if (CONSTANT_P (XEXP (x, 0))) 9029 fputs ("%g0+", file); 9030 output_address (XEXP (x, 0)); 9031 fputc (']', file); 9032 } 9033 else if (GET_CODE (x) == HIGH) 9034 { 9035 fputs ("%hi(", file); 9036 output_addr_const (file, XEXP (x, 0)); 9037 fputc (')', file); 9038 } 9039 else if (GET_CODE (x) == LO_SUM) 9040 { 9041 sparc_print_operand (file, XEXP (x, 0), 0); 9042 if (TARGET_CM_MEDMID) 9043 fputs ("+%l44(", file); 9044 else 9045 fputs ("+%lo(", file); 9046 output_addr_const (file, XEXP (x, 1)); 9047 fputc (')', file); 9048 } 9049 else if (GET_CODE (x) == CONST_DOUBLE 9050 && (GET_MODE (x) == VOIDmode 9051 || GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)) 9052 { 9053 if (CONST_DOUBLE_HIGH (x) == 0) 9054 fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x)); 9055 else if (CONST_DOUBLE_HIGH (x) == -1 9056 && CONST_DOUBLE_LOW (x) < 0) 9057 fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x)); 9058 else 9059 output_operand_lossage ("long long constant not a valid immediate operand"); 9060 } 9061 else if (GET_CODE (x) == CONST_DOUBLE) 9062 output_operand_lossage ("floating point constant not a valid immediate operand"); 9063 else { output_addr_const (file, x); } 9064 } 9065 9066 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 9067 9068 static void 9069 sparc_print_operand_address (FILE *file, rtx x) 9070 { 9071 register rtx base, index = 0; 9072 int offset = 0; 9073 register rtx addr = x; 9074 9075 if (REG_P (addr)) 9076 fputs (reg_names[REGNO (addr)], file); 9077 else if (GET_CODE (addr) == PLUS) 9078 { 9079 if (CONST_INT_P (XEXP (addr, 0))) 9080 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); 9081 else if (CONST_INT_P (XEXP (addr, 1))) 9082 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); 9083 else 9084 base = XEXP (addr, 0), index = XEXP (addr, 1); 9085 if (GET_CODE (base) == LO_SUM) 9086 { 9087 gcc_assert (USE_AS_OFFSETABLE_LO10 9088 && TARGET_ARCH64 9089 && ! TARGET_CM_MEDMID); 9090 output_operand (XEXP (base, 0), 0); 9091 fputs ("+%lo(", file); 9092 output_address (XEXP (base, 1)); 9093 fprintf (file, ")+%d", offset); 9094 } 9095 else 9096 { 9097 fputs (reg_names[REGNO (base)], file); 9098 if (index == 0) 9099 fprintf (file, "%+d", offset); 9100 else if (REG_P (index)) 9101 fprintf (file, "+%s", reg_names[REGNO (index)]); 9102 else if (GET_CODE (index) == SYMBOL_REF 9103 || GET_CODE (index) == LABEL_REF 9104 || GET_CODE (index) == CONST) 9105 fputc ('+', file), output_addr_const (file, index); 9106 else gcc_unreachable (); 9107 } 9108 } 9109 else if (GET_CODE (addr) == MINUS 9110 && GET_CODE (XEXP (addr, 1)) == LABEL_REF) 9111 { 9112 output_addr_const (file, XEXP (addr, 0)); 9113 fputs ("-(", file); 9114 output_addr_const (file, XEXP (addr, 1)); 9115 fputs ("-.)", file); 9116 } 9117 else if (GET_CODE (addr) == LO_SUM) 9118 { 9119 output_operand (XEXP (addr, 0), 0); 9120 if (TARGET_CM_MEDMID) 9121 fputs ("+%l44(", file); 9122 else 9123 fputs ("+%lo(", file); 9124 output_address (XEXP (addr, 1)); 9125 fputc (')', file); 9126 } 9127 else if (flag_pic 9128 && GET_CODE (addr) == CONST 9129 && GET_CODE (XEXP (addr, 0)) == MINUS 9130 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST 9131 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS 9132 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) 9133 { 9134 addr = XEXP (addr, 0); 9135 output_addr_const (file, XEXP (addr, 0)); 9136 /* Group the args of the second CONST in parenthesis. */ 9137 fputs ("-(", file); 9138 /* Skip past the second CONST--it does nothing for us. */ 9139 output_addr_const (file, XEXP (XEXP (addr, 1), 0)); 9140 /* Close the parenthesis. */ 9141 fputc (')', file); 9142 } 9143 else 9144 { 9145 output_addr_const (file, addr); 9146 } 9147 } 9148 9149 /* Target hook for assembling integer objects. The sparc version has 9150 special handling for aligned DI-mode objects. */ 9151 9152 static bool 9153 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) 9154 { 9155 /* ??? We only output .xword's for symbols and only then in environments 9156 where the assembler can handle them. */ 9157 if (aligned_p && size == 8 9158 && (GET_CODE (x) != CONST_INT && GET_CODE (x) != CONST_DOUBLE)) 9159 { 9160 if (TARGET_V9) 9161 { 9162 assemble_integer_with_op ("\t.xword\t", x); 9163 return true; 9164 } 9165 else 9166 { 9167 assemble_aligned_integer (4, const0_rtx); 9168 assemble_aligned_integer (4, x); 9169 return true; 9170 } 9171 } 9172 return default_assemble_integer (x, size, aligned_p); 9173 } 9174 9175 /* Return the value of a code used in the .proc pseudo-op that says 9176 what kind of result this function returns. For non-C types, we pick 9177 the closest C type. */ 9178 9179 #ifndef SHORT_TYPE_SIZE 9180 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) 9181 #endif 9182 9183 #ifndef INT_TYPE_SIZE 9184 #define INT_TYPE_SIZE BITS_PER_WORD 9185 #endif 9186 9187 #ifndef LONG_TYPE_SIZE 9188 #define LONG_TYPE_SIZE BITS_PER_WORD 9189 #endif 9190 9191 #ifndef LONG_LONG_TYPE_SIZE 9192 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) 9193 #endif 9194 9195 #ifndef FLOAT_TYPE_SIZE 9196 #define FLOAT_TYPE_SIZE BITS_PER_WORD 9197 #endif 9198 9199 #ifndef DOUBLE_TYPE_SIZE 9200 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9201 #endif 9202 9203 #ifndef LONG_DOUBLE_TYPE_SIZE 9204 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9205 #endif 9206 9207 unsigned long 9208 sparc_type_code (register tree type) 9209 { 9210 register unsigned long qualifiers = 0; 9211 register unsigned shift; 9212 9213 /* Only the first 30 bits of the qualifier are valid. We must refrain from 9214 setting more, since some assemblers will give an error for this. Also, 9215 we must be careful to avoid shifts of 32 bits or more to avoid getting 9216 unpredictable results. */ 9217 9218 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) 9219 { 9220 switch (TREE_CODE (type)) 9221 { 9222 case ERROR_MARK: 9223 return qualifiers; 9224 9225 case ARRAY_TYPE: 9226 qualifiers |= (3 << shift); 9227 break; 9228 9229 case FUNCTION_TYPE: 9230 case METHOD_TYPE: 9231 qualifiers |= (2 << shift); 9232 break; 9233 9234 case POINTER_TYPE: 9235 case REFERENCE_TYPE: 9236 case OFFSET_TYPE: 9237 qualifiers |= (1 << shift); 9238 break; 9239 9240 case RECORD_TYPE: 9241 return (qualifiers | 8); 9242 9243 case UNION_TYPE: 9244 case QUAL_UNION_TYPE: 9245 return (qualifiers | 9); 9246 9247 case ENUMERAL_TYPE: 9248 return (qualifiers | 10); 9249 9250 case VOID_TYPE: 9251 return (qualifiers | 16); 9252 9253 case INTEGER_TYPE: 9254 /* If this is a range type, consider it to be the underlying 9255 type. */ 9256 if (TREE_TYPE (type) != 0) 9257 break; 9258 9259 /* Carefully distinguish all the standard types of C, 9260 without messing up if the language is not C. We do this by 9261 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to 9262 look at both the names and the above fields, but that's redundant. 9263 Any type whose size is between two C types will be considered 9264 to be the wider of the two types. Also, we do not have a 9265 special code to use for "long long", so anything wider than 9266 long is treated the same. Note that we can't distinguish 9267 between "int" and "long" in this code if they are the same 9268 size, but that's fine, since neither can the assembler. */ 9269 9270 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) 9271 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); 9272 9273 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) 9274 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); 9275 9276 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) 9277 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); 9278 9279 else 9280 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); 9281 9282 case REAL_TYPE: 9283 /* If this is a range type, consider it to be the underlying 9284 type. */ 9285 if (TREE_TYPE (type) != 0) 9286 break; 9287 9288 /* Carefully distinguish all the standard types of C, 9289 without messing up if the language is not C. */ 9290 9291 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) 9292 return (qualifiers | 6); 9293 9294 else 9295 return (qualifiers | 7); 9296 9297 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ 9298 /* ??? We need to distinguish between double and float complex types, 9299 but I don't know how yet because I can't reach this code from 9300 existing front-ends. */ 9301 return (qualifiers | 7); /* Who knows? */ 9302 9303 case VECTOR_TYPE: 9304 case BOOLEAN_TYPE: /* Boolean truth value type. */ 9305 case LANG_TYPE: 9306 case NULLPTR_TYPE: 9307 return qualifiers; 9308 9309 default: 9310 gcc_unreachable (); /* Not a type! */ 9311 } 9312 } 9313 9314 return qualifiers; 9315 } 9316 9317 /* Nested function support. */ 9318 9319 /* Emit RTL insns to initialize the variable parts of a trampoline. 9320 FNADDR is an RTX for the address of the function's pure code. 9321 CXT is an RTX for the static chain value for the function. 9322 9323 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi 9324 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes 9325 (to store insns). This is a bit excessive. Perhaps a different 9326 mechanism would be better here. 9327 9328 Emit enough FLUSH insns to synchronize the data and instruction caches. */ 9329 9330 static void 9331 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9332 { 9333 /* SPARC 32-bit trampoline: 9334 9335 sethi %hi(fn), %g1 9336 sethi %hi(static), %g2 9337 jmp %g1+%lo(fn) 9338 or %g2, %lo(static), %g2 9339 9340 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii 9341 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii 9342 */ 9343 9344 emit_move_insn 9345 (adjust_address (m_tramp, SImode, 0), 9346 expand_binop (SImode, ior_optab, 9347 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1), 9348 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), 9349 NULL_RTX, 1, OPTAB_DIRECT)); 9350 9351 emit_move_insn 9352 (adjust_address (m_tramp, SImode, 4), 9353 expand_binop (SImode, ior_optab, 9354 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1), 9355 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), 9356 NULL_RTX, 1, OPTAB_DIRECT)); 9357 9358 emit_move_insn 9359 (adjust_address (m_tramp, SImode, 8), 9360 expand_binop (SImode, ior_optab, 9361 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), 9362 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), 9363 NULL_RTX, 1, OPTAB_DIRECT)); 9364 9365 emit_move_insn 9366 (adjust_address (m_tramp, SImode, 12), 9367 expand_binop (SImode, ior_optab, 9368 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), 9369 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), 9370 NULL_RTX, 1, OPTAB_DIRECT)); 9371 9372 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is 9373 aligned on a 16 byte boundary so one flush clears it all. */ 9374 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0)))); 9375 if (sparc_cpu != PROCESSOR_ULTRASPARC 9376 && sparc_cpu != PROCESSOR_ULTRASPARC3 9377 && sparc_cpu != PROCESSOR_NIAGARA 9378 && sparc_cpu != PROCESSOR_NIAGARA2 9379 && sparc_cpu != PROCESSOR_NIAGARA3 9380 && sparc_cpu != PROCESSOR_NIAGARA4) 9381 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8)))); 9382 9383 /* Call __enable_execute_stack after writing onto the stack to make sure 9384 the stack address is accessible. */ 9385 #ifdef HAVE_ENABLE_EXECUTE_STACK 9386 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 9387 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 9388 #endif 9389 9390 } 9391 9392 /* The 64-bit version is simpler because it makes more sense to load the 9393 values as "immediate" data out of the trampoline. It's also easier since 9394 we can read the PC without clobbering a register. */ 9395 9396 static void 9397 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9398 { 9399 /* SPARC 64-bit trampoline: 9400 9401 rd %pc, %g1 9402 ldx [%g1+24], %g5 9403 jmp %g5 9404 ldx [%g1+16], %g5 9405 +16 bytes data 9406 */ 9407 9408 emit_move_insn (adjust_address (m_tramp, SImode, 0), 9409 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); 9410 emit_move_insn (adjust_address (m_tramp, SImode, 4), 9411 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); 9412 emit_move_insn (adjust_address (m_tramp, SImode, 8), 9413 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); 9414 emit_move_insn (adjust_address (m_tramp, SImode, 12), 9415 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); 9416 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); 9417 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); 9418 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); 9419 9420 if (sparc_cpu != PROCESSOR_ULTRASPARC 9421 && sparc_cpu != PROCESSOR_ULTRASPARC3 9422 && sparc_cpu != PROCESSOR_NIAGARA 9423 && sparc_cpu != PROCESSOR_NIAGARA2 9424 && sparc_cpu != PROCESSOR_NIAGARA3 9425 && sparc_cpu != PROCESSOR_NIAGARA4) 9426 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); 9427 9428 /* Call __enable_execute_stack after writing onto the stack to make sure 9429 the stack address is accessible. */ 9430 #ifdef HAVE_ENABLE_EXECUTE_STACK 9431 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 9432 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode); 9433 #endif 9434 } 9435 9436 /* Worker for TARGET_TRAMPOLINE_INIT. */ 9437 9438 static void 9439 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 9440 { 9441 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 9442 cxt = force_reg (Pmode, cxt); 9443 if (TARGET_ARCH64) 9444 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); 9445 else 9446 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); 9447 } 9448 9449 /* Adjust the cost of a scheduling dependency. Return the new cost of 9450 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 9451 9452 static int 9453 supersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) 9454 { 9455 enum attr_type insn_type; 9456 9457 if (! recog_memoized (insn)) 9458 return 0; 9459 9460 insn_type = get_attr_type (insn); 9461 9462 if (REG_NOTE_KIND (link) == 0) 9463 { 9464 /* Data dependency; DEP_INSN writes a register that INSN reads some 9465 cycles later. */ 9466 9467 /* if a load, then the dependence must be on the memory address; 9468 add an extra "cycle". Note that the cost could be two cycles 9469 if the reg was written late in an instruction group; we ca not tell 9470 here. */ 9471 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) 9472 return cost + 3; 9473 9474 /* Get the delay only if the address of the store is the dependence. */ 9475 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) 9476 { 9477 rtx pat = PATTERN(insn); 9478 rtx dep_pat = PATTERN (dep_insn); 9479 9480 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 9481 return cost; /* This should not happen! */ 9482 9483 /* The dependency between the two instructions was on the data that 9484 is being stored. Assume that this implies that the address of the 9485 store is not dependent. */ 9486 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 9487 return cost; 9488 9489 return cost + 3; /* An approximation. */ 9490 } 9491 9492 /* A shift instruction cannot receive its data from an instruction 9493 in the same cycle; add a one cycle penalty. */ 9494 if (insn_type == TYPE_SHIFT) 9495 return cost + 3; /* Split before cascade into shift. */ 9496 } 9497 else 9498 { 9499 /* Anti- or output- dependency; DEP_INSN reads/writes a register that 9500 INSN writes some cycles later. */ 9501 9502 /* These are only significant for the fpu unit; writing a fp reg before 9503 the fpu has finished with it stalls the processor. */ 9504 9505 /* Reusing an integer register causes no problems. */ 9506 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 9507 return 0; 9508 } 9509 9510 return cost; 9511 } 9512 9513 static int 9514 hypersparc_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost) 9515 { 9516 enum attr_type insn_type, dep_type; 9517 rtx pat = PATTERN(insn); 9518 rtx dep_pat = PATTERN (dep_insn); 9519 9520 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 9521 return cost; 9522 9523 insn_type = get_attr_type (insn); 9524 dep_type = get_attr_type (dep_insn); 9525 9526 switch (REG_NOTE_KIND (link)) 9527 { 9528 case 0: 9529 /* Data dependency; DEP_INSN writes a register that INSN reads some 9530 cycles later. */ 9531 9532 switch (insn_type) 9533 { 9534 case TYPE_STORE: 9535 case TYPE_FPSTORE: 9536 /* Get the delay iff the address of the store is the dependence. */ 9537 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 9538 return cost; 9539 9540 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 9541 return cost; 9542 return cost + 3; 9543 9544 case TYPE_LOAD: 9545 case TYPE_SLOAD: 9546 case TYPE_FPLOAD: 9547 /* If a load, then the dependence must be on the memory address. If 9548 the addresses aren't equal, then it might be a false dependency */ 9549 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) 9550 { 9551 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET 9552 || GET_CODE (SET_DEST (dep_pat)) != MEM 9553 || GET_CODE (SET_SRC (pat)) != MEM 9554 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), 9555 XEXP (SET_SRC (pat), 0))) 9556 return cost + 2; 9557 9558 return cost + 8; 9559 } 9560 break; 9561 9562 case TYPE_BRANCH: 9563 /* Compare to branch latency is 0. There is no benefit from 9564 separating compare and branch. */ 9565 if (dep_type == TYPE_COMPARE) 9566 return 0; 9567 /* Floating point compare to branch latency is less than 9568 compare to conditional move. */ 9569 if (dep_type == TYPE_FPCMP) 9570 return cost - 1; 9571 break; 9572 default: 9573 break; 9574 } 9575 break; 9576 9577 case REG_DEP_ANTI: 9578 /* Anti-dependencies only penalize the fpu unit. */ 9579 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 9580 return 0; 9581 break; 9582 9583 default: 9584 break; 9585 } 9586 9587 return cost; 9588 } 9589 9590 static int 9591 sparc_adjust_cost(rtx_insn *insn, rtx link, rtx_insn *dep, int cost) 9592 { 9593 switch (sparc_cpu) 9594 { 9595 case PROCESSOR_SUPERSPARC: 9596 cost = supersparc_adjust_cost (insn, link, dep, cost); 9597 break; 9598 case PROCESSOR_HYPERSPARC: 9599 case PROCESSOR_SPARCLITE86X: 9600 cost = hypersparc_adjust_cost (insn, link, dep, cost); 9601 break; 9602 default: 9603 break; 9604 } 9605 return cost; 9606 } 9607 9608 static void 9609 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, 9610 int sched_verbose ATTRIBUTE_UNUSED, 9611 int max_ready ATTRIBUTE_UNUSED) 9612 {} 9613 9614 static int 9615 sparc_use_sched_lookahead (void) 9616 { 9617 if (sparc_cpu == PROCESSOR_NIAGARA 9618 || sparc_cpu == PROCESSOR_NIAGARA2 9619 || sparc_cpu == PROCESSOR_NIAGARA3) 9620 return 0; 9621 if (sparc_cpu == PROCESSOR_NIAGARA4) 9622 return 2; 9623 if (sparc_cpu == PROCESSOR_ULTRASPARC 9624 || sparc_cpu == PROCESSOR_ULTRASPARC3) 9625 return 4; 9626 if ((1 << sparc_cpu) & 9627 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | 9628 (1 << PROCESSOR_SPARCLITE86X))) 9629 return 3; 9630 return 0; 9631 } 9632 9633 static int 9634 sparc_issue_rate (void) 9635 { 9636 switch (sparc_cpu) 9637 { 9638 case PROCESSOR_NIAGARA: 9639 case PROCESSOR_NIAGARA2: 9640 case PROCESSOR_NIAGARA3: 9641 default: 9642 return 1; 9643 case PROCESSOR_NIAGARA4: 9644 case PROCESSOR_V9: 9645 /* Assume V9 processors are capable of at least dual-issue. */ 9646 return 2; 9647 case PROCESSOR_SUPERSPARC: 9648 return 3; 9649 case PROCESSOR_HYPERSPARC: 9650 case PROCESSOR_SPARCLITE86X: 9651 return 2; 9652 case PROCESSOR_ULTRASPARC: 9653 case PROCESSOR_ULTRASPARC3: 9654 return 4; 9655 } 9656 } 9657 9658 static int 9659 set_extends (rtx_insn *insn) 9660 { 9661 register rtx pat = PATTERN (insn); 9662 9663 switch (GET_CODE (SET_SRC (pat))) 9664 { 9665 /* Load and some shift instructions zero extend. */ 9666 case MEM: 9667 case ZERO_EXTEND: 9668 /* sethi clears the high bits */ 9669 case HIGH: 9670 /* LO_SUM is used with sethi. sethi cleared the high 9671 bits and the values used with lo_sum are positive */ 9672 case LO_SUM: 9673 /* Store flag stores 0 or 1 */ 9674 case LT: case LTU: 9675 case GT: case GTU: 9676 case LE: case LEU: 9677 case GE: case GEU: 9678 case EQ: 9679 case NE: 9680 return 1; 9681 case AND: 9682 { 9683 rtx op0 = XEXP (SET_SRC (pat), 0); 9684 rtx op1 = XEXP (SET_SRC (pat), 1); 9685 if (GET_CODE (op1) == CONST_INT) 9686 return INTVAL (op1) >= 0; 9687 if (GET_CODE (op0) != REG) 9688 return 0; 9689 if (sparc_check_64 (op0, insn) == 1) 9690 return 1; 9691 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 9692 } 9693 case IOR: 9694 case XOR: 9695 { 9696 rtx op0 = XEXP (SET_SRC (pat), 0); 9697 rtx op1 = XEXP (SET_SRC (pat), 1); 9698 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) 9699 return 0; 9700 if (GET_CODE (op1) == CONST_INT) 9701 return INTVAL (op1) >= 0; 9702 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 9703 } 9704 case LSHIFTRT: 9705 return GET_MODE (SET_SRC (pat)) == SImode; 9706 /* Positive integers leave the high bits zero. */ 9707 case CONST_DOUBLE: 9708 return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000); 9709 case CONST_INT: 9710 return ! (INTVAL (SET_SRC (pat)) & 0x80000000); 9711 case ASHIFTRT: 9712 case SIGN_EXTEND: 9713 return - (GET_MODE (SET_SRC (pat)) == SImode); 9714 case REG: 9715 return sparc_check_64 (SET_SRC (pat), insn); 9716 default: 9717 return 0; 9718 } 9719 } 9720 9721 /* We _ought_ to have only one kind per function, but... */ 9722 static GTY(()) rtx sparc_addr_diff_list; 9723 static GTY(()) rtx sparc_addr_list; 9724 9725 void 9726 sparc_defer_case_vector (rtx lab, rtx vec, int diff) 9727 { 9728 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); 9729 if (diff) 9730 sparc_addr_diff_list 9731 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); 9732 else 9733 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); 9734 } 9735 9736 static void 9737 sparc_output_addr_vec (rtx vec) 9738 { 9739 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 9740 int idx, vlen = XVECLEN (body, 0); 9741 9742 #ifdef ASM_OUTPUT_ADDR_VEC_START 9743 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 9744 #endif 9745 9746 #ifdef ASM_OUTPUT_CASE_LABEL 9747 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 9748 NEXT_INSN (lab)); 9749 #else 9750 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 9751 #endif 9752 9753 for (idx = 0; idx < vlen; idx++) 9754 { 9755 ASM_OUTPUT_ADDR_VEC_ELT 9756 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 9757 } 9758 9759 #ifdef ASM_OUTPUT_ADDR_VEC_END 9760 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 9761 #endif 9762 } 9763 9764 static void 9765 sparc_output_addr_diff_vec (rtx vec) 9766 { 9767 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 9768 rtx base = XEXP (XEXP (body, 0), 0); 9769 int idx, vlen = XVECLEN (body, 1); 9770 9771 #ifdef ASM_OUTPUT_ADDR_VEC_START 9772 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 9773 #endif 9774 9775 #ifdef ASM_OUTPUT_CASE_LABEL 9776 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 9777 NEXT_INSN (lab)); 9778 #else 9779 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 9780 #endif 9781 9782 for (idx = 0; idx < vlen; idx++) 9783 { 9784 ASM_OUTPUT_ADDR_DIFF_ELT 9785 (asm_out_file, 9786 body, 9787 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 9788 CODE_LABEL_NUMBER (base)); 9789 } 9790 9791 #ifdef ASM_OUTPUT_ADDR_VEC_END 9792 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 9793 #endif 9794 } 9795 9796 static void 9797 sparc_output_deferred_case_vectors (void) 9798 { 9799 rtx t; 9800 int align; 9801 9802 if (sparc_addr_list == NULL_RTX 9803 && sparc_addr_diff_list == NULL_RTX) 9804 return; 9805 9806 /* Align to cache line in the function's code section. */ 9807 switch_to_section (current_function_section ()); 9808 9809 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 9810 if (align > 0) 9811 ASM_OUTPUT_ALIGN (asm_out_file, align); 9812 9813 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) 9814 sparc_output_addr_vec (XEXP (t, 0)); 9815 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) 9816 sparc_output_addr_diff_vec (XEXP (t, 0)); 9817 9818 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; 9819 } 9820 9821 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are 9822 unknown. Return 1 if the high bits are zero, -1 if the register is 9823 sign extended. */ 9824 int 9825 sparc_check_64 (rtx x, rtx_insn *insn) 9826 { 9827 /* If a register is set only once it is safe to ignore insns this 9828 code does not know how to handle. The loop will either recognize 9829 the single set and return the correct value or fail to recognize 9830 it and return 0. */ 9831 int set_once = 0; 9832 rtx y = x; 9833 9834 gcc_assert (GET_CODE (x) == REG); 9835 9836 if (GET_MODE (x) == DImode) 9837 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); 9838 9839 if (flag_expensive_optimizations 9840 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) 9841 set_once = 1; 9842 9843 if (insn == 0) 9844 { 9845 if (set_once) 9846 insn = get_last_insn_anywhere (); 9847 else 9848 return 0; 9849 } 9850 9851 while ((insn = PREV_INSN (insn))) 9852 { 9853 switch (GET_CODE (insn)) 9854 { 9855 case JUMP_INSN: 9856 case NOTE: 9857 break; 9858 case CODE_LABEL: 9859 case CALL_INSN: 9860 default: 9861 if (! set_once) 9862 return 0; 9863 break; 9864 case INSN: 9865 { 9866 rtx pat = PATTERN (insn); 9867 if (GET_CODE (pat) != SET) 9868 return 0; 9869 if (rtx_equal_p (x, SET_DEST (pat))) 9870 return set_extends (insn); 9871 if (y && rtx_equal_p (y, SET_DEST (pat))) 9872 return set_extends (insn); 9873 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) 9874 return 0; 9875 } 9876 } 9877 } 9878 return 0; 9879 } 9880 9881 /* Output a wide shift instruction in V8+ mode. INSN is the instruction, 9882 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 9883 9884 const char * 9885 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode) 9886 { 9887 static char asm_code[60]; 9888 9889 /* The scratch register is only required when the destination 9890 register is not a 64-bit global or out register. */ 9891 if (which_alternative != 2) 9892 operands[3] = operands[0]; 9893 9894 /* We can only shift by constants <= 63. */ 9895 if (GET_CODE (operands[2]) == CONST_INT) 9896 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); 9897 9898 if (GET_CODE (operands[1]) == CONST_INT) 9899 { 9900 output_asm_insn ("mov\t%1, %3", operands); 9901 } 9902 else 9903 { 9904 output_asm_insn ("sllx\t%H1, 32, %3", operands); 9905 if (sparc_check_64 (operands[1], insn) <= 0) 9906 output_asm_insn ("srl\t%L1, 0, %L1", operands); 9907 output_asm_insn ("or\t%L1, %3, %3", operands); 9908 } 9909 9910 strcpy (asm_code, opcode); 9911 9912 if (which_alternative != 2) 9913 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); 9914 else 9915 return 9916 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); 9917 } 9918 9919 /* Output rtl to increment the profiler label LABELNO 9920 for profiling a function entry. */ 9921 9922 void 9923 sparc_profile_hook (int labelno) 9924 { 9925 char buf[32]; 9926 rtx lab, fun; 9927 9928 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); 9929 if (NO_PROFILE_COUNTERS) 9930 { 9931 emit_library_call (fun, LCT_NORMAL, VOIDmode, 0); 9932 } 9933 else 9934 { 9935 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 9936 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 9937 emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode); 9938 } 9939 } 9940 9941 #ifdef TARGET_SOLARIS 9942 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 9943 9944 static void 9945 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, 9946 tree decl ATTRIBUTE_UNUSED) 9947 { 9948 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 9949 { 9950 solaris_elf_asm_comdat_section (name, flags, decl); 9951 return; 9952 } 9953 9954 fprintf (asm_out_file, "\t.section\t\"%s\"", name); 9955 9956 if (!(flags & SECTION_DEBUG)) 9957 fputs (",#alloc", asm_out_file); 9958 if (flags & SECTION_WRITE) 9959 fputs (",#write", asm_out_file); 9960 if (flags & SECTION_TLS) 9961 fputs (",#tls", asm_out_file); 9962 if (flags & SECTION_CODE) 9963 fputs (",#execinstr", asm_out_file); 9964 9965 /* Sun as only supports #nobits/#progbits since Solaris 10. */ 9966 if (HAVE_AS_SPARC_NOBITS) 9967 { 9968 if (flags & SECTION_BSS) 9969 fputs (",#nobits", asm_out_file); 9970 else 9971 fputs (",#progbits", asm_out_file); 9972 } 9973 9974 fputc ('\n', asm_out_file); 9975 } 9976 #endif /* TARGET_SOLARIS */ 9977 9978 /* We do not allow indirect calls to be optimized into sibling calls. 9979 9980 We cannot use sibling calls when delayed branches are disabled 9981 because they will likely require the call delay slot to be filled. 9982 9983 Also, on SPARC 32-bit we cannot emit a sibling call when the 9984 current function returns a structure. This is because the "unimp 9985 after call" convention would cause the callee to return to the 9986 wrong place. The generic code already disallows cases where the 9987 function being called returns a structure. 9988 9989 It may seem strange how this last case could occur. Usually there 9990 is code after the call which jumps to epilogue code which dumps the 9991 return value into the struct return area. That ought to invalidate 9992 the sibling call right? Well, in the C++ case we can end up passing 9993 the pointer to the struct return area to a constructor (which returns 9994 void) and then nothing else happens. Such a sibling call would look 9995 valid without the added check here. 9996 9997 VxWorks PIC PLT entries require the global pointer to be initialized 9998 on entry. We therefore can't emit sibling calls to them. */ 9999 static bool 10000 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10001 { 10002 return (decl 10003 && flag_delayed_branch 10004 && (TARGET_ARCH64 || ! cfun->returns_struct) 10005 && !(TARGET_VXWORKS_RTP 10006 && flag_pic 10007 && !targetm.binds_local_p (decl))); 10008 } 10009 10010 /* libfunc renaming. */ 10011 10012 static void 10013 sparc_init_libfuncs (void) 10014 { 10015 if (TARGET_ARCH32) 10016 { 10017 /* Use the subroutines that Sun's library provides for integer 10018 multiply and divide. The `*' prevents an underscore from 10019 being prepended by the compiler. .umul is a little faster 10020 than .mul. */ 10021 set_optab_libfunc (smul_optab, SImode, "*.umul"); 10022 set_optab_libfunc (sdiv_optab, SImode, "*.div"); 10023 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); 10024 set_optab_libfunc (smod_optab, SImode, "*.rem"); 10025 set_optab_libfunc (umod_optab, SImode, "*.urem"); 10026 10027 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ 10028 set_optab_libfunc (add_optab, TFmode, "_Q_add"); 10029 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); 10030 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); 10031 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); 10032 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); 10033 10034 /* We can define the TFmode sqrt optab only if TARGET_FPU. This 10035 is because with soft-float, the SFmode and DFmode sqrt 10036 instructions will be absent, and the compiler will notice and 10037 try to use the TFmode sqrt instruction for calls to the 10038 builtin function sqrt, but this fails. */ 10039 if (TARGET_FPU) 10040 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); 10041 10042 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); 10043 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); 10044 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); 10045 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); 10046 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); 10047 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); 10048 10049 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); 10050 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); 10051 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); 10052 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); 10053 10054 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); 10055 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); 10056 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); 10057 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); 10058 10059 if (DITF_CONVERSION_LIBFUNCS) 10060 { 10061 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); 10062 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); 10063 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); 10064 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); 10065 } 10066 10067 if (SUN_CONVERSION_LIBFUNCS) 10068 { 10069 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); 10070 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); 10071 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); 10072 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); 10073 } 10074 } 10075 if (TARGET_ARCH64) 10076 { 10077 /* In the SPARC 64bit ABI, SImode multiply and divide functions 10078 do not exist in the library. Make sure the compiler does not 10079 emit calls to them by accident. (It should always use the 10080 hardware instructions.) */ 10081 set_optab_libfunc (smul_optab, SImode, 0); 10082 set_optab_libfunc (sdiv_optab, SImode, 0); 10083 set_optab_libfunc (udiv_optab, SImode, 0); 10084 set_optab_libfunc (smod_optab, SImode, 0); 10085 set_optab_libfunc (umod_optab, SImode, 0); 10086 10087 if (SUN_INTEGER_MULTIPLY_64) 10088 { 10089 set_optab_libfunc (smul_optab, DImode, "__mul64"); 10090 set_optab_libfunc (sdiv_optab, DImode, "__div64"); 10091 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); 10092 set_optab_libfunc (smod_optab, DImode, "__rem64"); 10093 set_optab_libfunc (umod_optab, DImode, "__urem64"); 10094 } 10095 10096 if (SUN_CONVERSION_LIBFUNCS) 10097 { 10098 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); 10099 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); 10100 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); 10101 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); 10102 } 10103 } 10104 } 10105 10106 /* SPARC builtins. */ 10107 enum sparc_builtins 10108 { 10109 /* FPU builtins. */ 10110 SPARC_BUILTIN_LDFSR, 10111 SPARC_BUILTIN_STFSR, 10112 10113 /* VIS 1.0 builtins. */ 10114 SPARC_BUILTIN_FPACK16, 10115 SPARC_BUILTIN_FPACK32, 10116 SPARC_BUILTIN_FPACKFIX, 10117 SPARC_BUILTIN_FEXPAND, 10118 SPARC_BUILTIN_FPMERGE, 10119 SPARC_BUILTIN_FMUL8X16, 10120 SPARC_BUILTIN_FMUL8X16AU, 10121 SPARC_BUILTIN_FMUL8X16AL, 10122 SPARC_BUILTIN_FMUL8SUX16, 10123 SPARC_BUILTIN_FMUL8ULX16, 10124 SPARC_BUILTIN_FMULD8SUX16, 10125 SPARC_BUILTIN_FMULD8ULX16, 10126 SPARC_BUILTIN_FALIGNDATAV4HI, 10127 SPARC_BUILTIN_FALIGNDATAV8QI, 10128 SPARC_BUILTIN_FALIGNDATAV2SI, 10129 SPARC_BUILTIN_FALIGNDATADI, 10130 SPARC_BUILTIN_WRGSR, 10131 SPARC_BUILTIN_RDGSR, 10132 SPARC_BUILTIN_ALIGNADDR, 10133 SPARC_BUILTIN_ALIGNADDRL, 10134 SPARC_BUILTIN_PDIST, 10135 SPARC_BUILTIN_EDGE8, 10136 SPARC_BUILTIN_EDGE8L, 10137 SPARC_BUILTIN_EDGE16, 10138 SPARC_BUILTIN_EDGE16L, 10139 SPARC_BUILTIN_EDGE32, 10140 SPARC_BUILTIN_EDGE32L, 10141 SPARC_BUILTIN_FCMPLE16, 10142 SPARC_BUILTIN_FCMPLE32, 10143 SPARC_BUILTIN_FCMPNE16, 10144 SPARC_BUILTIN_FCMPNE32, 10145 SPARC_BUILTIN_FCMPGT16, 10146 SPARC_BUILTIN_FCMPGT32, 10147 SPARC_BUILTIN_FCMPEQ16, 10148 SPARC_BUILTIN_FCMPEQ32, 10149 SPARC_BUILTIN_FPADD16, 10150 SPARC_BUILTIN_FPADD16S, 10151 SPARC_BUILTIN_FPADD32, 10152 SPARC_BUILTIN_FPADD32S, 10153 SPARC_BUILTIN_FPSUB16, 10154 SPARC_BUILTIN_FPSUB16S, 10155 SPARC_BUILTIN_FPSUB32, 10156 SPARC_BUILTIN_FPSUB32S, 10157 SPARC_BUILTIN_ARRAY8, 10158 SPARC_BUILTIN_ARRAY16, 10159 SPARC_BUILTIN_ARRAY32, 10160 10161 /* VIS 2.0 builtins. */ 10162 SPARC_BUILTIN_EDGE8N, 10163 SPARC_BUILTIN_EDGE8LN, 10164 SPARC_BUILTIN_EDGE16N, 10165 SPARC_BUILTIN_EDGE16LN, 10166 SPARC_BUILTIN_EDGE32N, 10167 SPARC_BUILTIN_EDGE32LN, 10168 SPARC_BUILTIN_BMASK, 10169 SPARC_BUILTIN_BSHUFFLEV4HI, 10170 SPARC_BUILTIN_BSHUFFLEV8QI, 10171 SPARC_BUILTIN_BSHUFFLEV2SI, 10172 SPARC_BUILTIN_BSHUFFLEDI, 10173 10174 /* VIS 3.0 builtins. */ 10175 SPARC_BUILTIN_CMASK8, 10176 SPARC_BUILTIN_CMASK16, 10177 SPARC_BUILTIN_CMASK32, 10178 SPARC_BUILTIN_FCHKSM16, 10179 SPARC_BUILTIN_FSLL16, 10180 SPARC_BUILTIN_FSLAS16, 10181 SPARC_BUILTIN_FSRL16, 10182 SPARC_BUILTIN_FSRA16, 10183 SPARC_BUILTIN_FSLL32, 10184 SPARC_BUILTIN_FSLAS32, 10185 SPARC_BUILTIN_FSRL32, 10186 SPARC_BUILTIN_FSRA32, 10187 SPARC_BUILTIN_PDISTN, 10188 SPARC_BUILTIN_FMEAN16, 10189 SPARC_BUILTIN_FPADD64, 10190 SPARC_BUILTIN_FPSUB64, 10191 SPARC_BUILTIN_FPADDS16, 10192 SPARC_BUILTIN_FPADDS16S, 10193 SPARC_BUILTIN_FPSUBS16, 10194 SPARC_BUILTIN_FPSUBS16S, 10195 SPARC_BUILTIN_FPADDS32, 10196 SPARC_BUILTIN_FPADDS32S, 10197 SPARC_BUILTIN_FPSUBS32, 10198 SPARC_BUILTIN_FPSUBS32S, 10199 SPARC_BUILTIN_FUCMPLE8, 10200 SPARC_BUILTIN_FUCMPNE8, 10201 SPARC_BUILTIN_FUCMPGT8, 10202 SPARC_BUILTIN_FUCMPEQ8, 10203 SPARC_BUILTIN_FHADDS, 10204 SPARC_BUILTIN_FHADDD, 10205 SPARC_BUILTIN_FHSUBS, 10206 SPARC_BUILTIN_FHSUBD, 10207 SPARC_BUILTIN_FNHADDS, 10208 SPARC_BUILTIN_FNHADDD, 10209 SPARC_BUILTIN_UMULXHI, 10210 SPARC_BUILTIN_XMULX, 10211 SPARC_BUILTIN_XMULXHI, 10212 10213 SPARC_BUILTIN_MAX 10214 }; 10215 10216 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX]; 10217 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX]; 10218 10219 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the 10220 function decl or NULL_TREE if the builtin was not added. */ 10221 10222 static tree 10223 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code, 10224 tree type) 10225 { 10226 tree t 10227 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); 10228 10229 if (t) 10230 { 10231 sparc_builtins[code] = t; 10232 sparc_builtins_icode[code] = icode; 10233 } 10234 10235 return t; 10236 } 10237 10238 /* Likewise, but also marks the function as "const". */ 10239 10240 static tree 10241 def_builtin_const (const char *name, enum insn_code icode, 10242 enum sparc_builtins code, tree type) 10243 { 10244 tree t = def_builtin (name, icode, code, type); 10245 10246 if (t) 10247 TREE_READONLY (t) = 1; 10248 10249 return t; 10250 } 10251 10252 /* Implement the TARGET_INIT_BUILTINS target hook. 10253 Create builtin functions for special SPARC instructions. */ 10254 10255 static void 10256 sparc_init_builtins (void) 10257 { 10258 if (TARGET_FPU) 10259 sparc_fpu_init_builtins (); 10260 10261 if (TARGET_VIS) 10262 sparc_vis_init_builtins (); 10263 } 10264 10265 /* Create builtin functions for FPU instructions. */ 10266 10267 static void 10268 sparc_fpu_init_builtins (void) 10269 { 10270 tree ftype 10271 = build_function_type_list (void_type_node, 10272 build_pointer_type (unsigned_type_node), 0); 10273 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr, 10274 SPARC_BUILTIN_LDFSR, ftype); 10275 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr, 10276 SPARC_BUILTIN_STFSR, ftype); 10277 } 10278 10279 /* Create builtin functions for VIS instructions. */ 10280 10281 static void 10282 sparc_vis_init_builtins (void) 10283 { 10284 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); 10285 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); 10286 tree v4hi = build_vector_type (intHI_type_node, 4); 10287 tree v2hi = build_vector_type (intHI_type_node, 2); 10288 tree v2si = build_vector_type (intSI_type_node, 2); 10289 tree v1si = build_vector_type (intSI_type_node, 1); 10290 10291 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); 10292 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); 10293 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); 10294 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); 10295 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); 10296 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); 10297 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); 10298 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); 10299 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); 10300 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); 10301 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); 10302 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); 10303 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); 10304 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); 10305 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, 10306 v8qi, v8qi, 10307 intDI_type_node, 0); 10308 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node, 10309 v8qi, v8qi, 0); 10310 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node, 10311 v8qi, v8qi, 0); 10312 tree di_ftype_di_di = build_function_type_list (intDI_type_node, 10313 intDI_type_node, 10314 intDI_type_node, 0); 10315 tree si_ftype_si_si = build_function_type_list (intSI_type_node, 10316 intSI_type_node, 10317 intSI_type_node, 0); 10318 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, 10319 ptr_type_node, 10320 intSI_type_node, 0); 10321 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, 10322 ptr_type_node, 10323 intDI_type_node, 0); 10324 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, 10325 ptr_type_node, 10326 ptr_type_node, 0); 10327 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, 10328 ptr_type_node, 10329 ptr_type_node, 0); 10330 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, 10331 v4hi, v4hi, 0); 10332 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, 10333 v2si, v2si, 0); 10334 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, 10335 v4hi, v4hi, 0); 10336 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, 10337 v2si, v2si, 0); 10338 tree void_ftype_di = build_function_type_list (void_type_node, 10339 intDI_type_node, 0); 10340 tree di_ftype_void = build_function_type_list (intDI_type_node, 10341 void_type_node, 0); 10342 tree void_ftype_si = build_function_type_list (void_type_node, 10343 intSI_type_node, 0); 10344 tree sf_ftype_sf_sf = build_function_type_list (float_type_node, 10345 float_type_node, 10346 float_type_node, 0); 10347 tree df_ftype_df_df = build_function_type_list (double_type_node, 10348 double_type_node, 10349 double_type_node, 0); 10350 10351 /* Packing and expanding vectors. */ 10352 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, 10353 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi); 10354 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, 10355 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi); 10356 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, 10357 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si); 10358 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, 10359 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi); 10360 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, 10361 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi); 10362 10363 /* Multiplications. */ 10364 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, 10365 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi); 10366 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, 10367 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi); 10368 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, 10369 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi); 10370 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, 10371 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi); 10372 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, 10373 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi); 10374 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, 10375 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi); 10376 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, 10377 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi); 10378 10379 /* Data aligning. */ 10380 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, 10381 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi); 10382 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, 10383 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi); 10384 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, 10385 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si); 10386 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis, 10387 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di); 10388 10389 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis, 10390 SPARC_BUILTIN_WRGSR, void_ftype_di); 10391 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis, 10392 SPARC_BUILTIN_RDGSR, di_ftype_void); 10393 10394 if (TARGET_ARCH64) 10395 { 10396 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, 10397 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di); 10398 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis, 10399 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di); 10400 } 10401 else 10402 { 10403 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, 10404 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si); 10405 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis, 10406 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si); 10407 } 10408 10409 /* Pixel distance. */ 10410 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis, 10411 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di); 10412 10413 /* Edge handling. */ 10414 if (TARGET_ARCH64) 10415 { 10416 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, 10417 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr); 10418 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, 10419 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr); 10420 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, 10421 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr); 10422 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, 10423 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr); 10424 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, 10425 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr); 10426 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, 10427 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr); 10428 } 10429 else 10430 { 10431 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis, 10432 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr); 10433 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis, 10434 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr); 10435 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis, 10436 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr); 10437 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis, 10438 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr); 10439 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis, 10440 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr); 10441 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis, 10442 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr); 10443 } 10444 10445 /* Pixel compare. */ 10446 if (TARGET_ARCH64) 10447 { 10448 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, 10449 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi); 10450 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, 10451 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si); 10452 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, 10453 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi); 10454 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, 10455 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si); 10456 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, 10457 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi); 10458 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, 10459 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si); 10460 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, 10461 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi); 10462 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, 10463 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si); 10464 } 10465 else 10466 { 10467 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, 10468 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi); 10469 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, 10470 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si); 10471 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, 10472 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi); 10473 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, 10474 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si); 10475 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, 10476 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi); 10477 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, 10478 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si); 10479 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, 10480 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi); 10481 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, 10482 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si); 10483 } 10484 10485 /* Addition and subtraction. */ 10486 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, 10487 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi); 10488 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, 10489 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi); 10490 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, 10491 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si); 10492 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3, 10493 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si); 10494 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, 10495 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi); 10496 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, 10497 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi); 10498 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, 10499 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si); 10500 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3, 10501 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si); 10502 10503 /* Three-dimensional array addressing. */ 10504 if (TARGET_ARCH64) 10505 { 10506 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis, 10507 SPARC_BUILTIN_ARRAY8, di_ftype_di_di); 10508 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis, 10509 SPARC_BUILTIN_ARRAY16, di_ftype_di_di); 10510 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis, 10511 SPARC_BUILTIN_ARRAY32, di_ftype_di_di); 10512 } 10513 else 10514 { 10515 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis, 10516 SPARC_BUILTIN_ARRAY8, si_ftype_si_si); 10517 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis, 10518 SPARC_BUILTIN_ARRAY16, si_ftype_si_si); 10519 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis, 10520 SPARC_BUILTIN_ARRAY32, si_ftype_si_si); 10521 } 10522 10523 if (TARGET_VIS2) 10524 { 10525 /* Edge handling. */ 10526 if (TARGET_ARCH64) 10527 { 10528 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis, 10529 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr); 10530 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis, 10531 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr); 10532 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis, 10533 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr); 10534 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis, 10535 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr); 10536 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis, 10537 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr); 10538 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis, 10539 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr); 10540 } 10541 else 10542 { 10543 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis, 10544 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr); 10545 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis, 10546 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr); 10547 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis, 10548 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr); 10549 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis, 10550 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr); 10551 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis, 10552 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr); 10553 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis, 10554 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr); 10555 } 10556 10557 /* Byte mask and shuffle. */ 10558 if (TARGET_ARCH64) 10559 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis, 10560 SPARC_BUILTIN_BMASK, di_ftype_di_di); 10561 else 10562 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis, 10563 SPARC_BUILTIN_BMASK, si_ftype_si_si); 10564 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis, 10565 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi); 10566 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis, 10567 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi); 10568 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis, 10569 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si); 10570 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis, 10571 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di); 10572 } 10573 10574 if (TARGET_VIS3) 10575 { 10576 if (TARGET_ARCH64) 10577 { 10578 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis, 10579 SPARC_BUILTIN_CMASK8, void_ftype_di); 10580 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis, 10581 SPARC_BUILTIN_CMASK16, void_ftype_di); 10582 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis, 10583 SPARC_BUILTIN_CMASK32, void_ftype_di); 10584 } 10585 else 10586 { 10587 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis, 10588 SPARC_BUILTIN_CMASK8, void_ftype_si); 10589 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis, 10590 SPARC_BUILTIN_CMASK16, void_ftype_si); 10591 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis, 10592 SPARC_BUILTIN_CMASK32, void_ftype_si); 10593 } 10594 10595 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis, 10596 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi); 10597 10598 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3, 10599 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi); 10600 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3, 10601 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi); 10602 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3, 10603 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi); 10604 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3, 10605 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi); 10606 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3, 10607 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si); 10608 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3, 10609 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si); 10610 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3, 10611 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si); 10612 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3, 10613 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si); 10614 10615 if (TARGET_ARCH64) 10616 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis, 10617 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi); 10618 else 10619 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis, 10620 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi); 10621 10622 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis, 10623 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi); 10624 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis, 10625 SPARC_BUILTIN_FPADD64, di_ftype_di_di); 10626 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis, 10627 SPARC_BUILTIN_FPSUB64, di_ftype_di_di); 10628 10629 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3, 10630 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi); 10631 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3, 10632 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi); 10633 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3, 10634 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi); 10635 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3, 10636 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi); 10637 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3, 10638 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si); 10639 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3, 10640 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si); 10641 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3, 10642 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si); 10643 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3, 10644 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si); 10645 10646 if (TARGET_ARCH64) 10647 { 10648 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis, 10649 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi); 10650 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis, 10651 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi); 10652 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis, 10653 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi); 10654 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis, 10655 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi); 10656 } 10657 else 10658 { 10659 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis, 10660 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi); 10661 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis, 10662 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi); 10663 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis, 10664 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi); 10665 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis, 10666 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi); 10667 } 10668 10669 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis, 10670 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf); 10671 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis, 10672 SPARC_BUILTIN_FHADDD, df_ftype_df_df); 10673 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis, 10674 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf); 10675 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis, 10676 SPARC_BUILTIN_FHSUBD, df_ftype_df_df); 10677 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis, 10678 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf); 10679 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis, 10680 SPARC_BUILTIN_FNHADDD, df_ftype_df_df); 10681 10682 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis, 10683 SPARC_BUILTIN_UMULXHI, di_ftype_di_di); 10684 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis, 10685 SPARC_BUILTIN_XMULX, di_ftype_di_di); 10686 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis, 10687 SPARC_BUILTIN_XMULXHI, di_ftype_di_di); 10688 } 10689 } 10690 10691 /* Implement TARGET_BUILTIN_DECL hook. */ 10692 10693 static tree 10694 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 10695 { 10696 if (code >= SPARC_BUILTIN_MAX) 10697 return error_mark_node; 10698 10699 return sparc_builtins[code]; 10700 } 10701 10702 /* Implemented TARGET_EXPAND_BUILTIN hook. */ 10703 10704 static rtx 10705 sparc_expand_builtin (tree exp, rtx target, 10706 rtx subtarget ATTRIBUTE_UNUSED, 10707 machine_mode tmode ATTRIBUTE_UNUSED, 10708 int ignore ATTRIBUTE_UNUSED) 10709 { 10710 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 10711 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 10712 enum insn_code icode = sparc_builtins_icode[code]; 10713 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 10714 call_expr_arg_iterator iter; 10715 int arg_count = 0; 10716 rtx pat, op[4]; 10717 tree arg; 10718 10719 if (nonvoid) 10720 { 10721 machine_mode tmode = insn_data[icode].operand[0].mode; 10722 if (!target 10723 || GET_MODE (target) != tmode 10724 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 10725 op[0] = gen_reg_rtx (tmode); 10726 else 10727 op[0] = target; 10728 } 10729 10730 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 10731 { 10732 const struct insn_operand_data *insn_op; 10733 int idx; 10734 10735 if (arg == error_mark_node) 10736 return NULL_RTX; 10737 10738 arg_count++; 10739 idx = arg_count - !nonvoid; 10740 insn_op = &insn_data[icode].operand[idx]; 10741 op[arg_count] = expand_normal (arg); 10742 10743 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR) 10744 { 10745 if (!address_operand (op[arg_count], SImode)) 10746 { 10747 op[arg_count] = convert_memory_address (Pmode, op[arg_count]); 10748 op[arg_count] = copy_addr_to_reg (op[arg_count]); 10749 } 10750 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]); 10751 } 10752 10753 else if (insn_op->mode == V1DImode 10754 && GET_MODE (op[arg_count]) == DImode) 10755 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]); 10756 10757 else if (insn_op->mode == V1SImode 10758 && GET_MODE (op[arg_count]) == SImode) 10759 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]); 10760 10761 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count], 10762 insn_op->mode)) 10763 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]); 10764 } 10765 10766 switch (arg_count) 10767 { 10768 case 0: 10769 pat = GEN_FCN (icode) (op[0]); 10770 break; 10771 case 1: 10772 if (nonvoid) 10773 pat = GEN_FCN (icode) (op[0], op[1]); 10774 else 10775 pat = GEN_FCN (icode) (op[1]); 10776 break; 10777 case 2: 10778 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 10779 break; 10780 case 3: 10781 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 10782 break; 10783 default: 10784 gcc_unreachable (); 10785 } 10786 10787 if (!pat) 10788 return NULL_RTX; 10789 10790 emit_insn (pat); 10791 10792 return (nonvoid ? op[0] : const0_rtx); 10793 } 10794 10795 /* Return the upper 16 bits of the 8x16 multiplication. */ 10796 10797 static int 10798 sparc_vis_mul8x16 (int e8, int e16) 10799 { 10800 return (e8 * e16 + 128) / 256; 10801 } 10802 10803 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put 10804 the result into the array N_ELTS, whose elements are of INNER_TYPE. */ 10805 10806 static void 10807 sparc_handle_vis_mul8x16 (tree *n_elts, enum sparc_builtins fncode, 10808 tree inner_type, tree cst0, tree cst1) 10809 { 10810 unsigned i, num = VECTOR_CST_NELTS (cst0); 10811 int scale; 10812 10813 switch (fncode) 10814 { 10815 case SPARC_BUILTIN_FMUL8X16: 10816 for (i = 0; i < num; ++i) 10817 { 10818 int val 10819 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10820 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i))); 10821 n_elts[i] = build_int_cst (inner_type, val); 10822 } 10823 break; 10824 10825 case SPARC_BUILTIN_FMUL8X16AU: 10826 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0)); 10827 10828 for (i = 0; i < num; ++i) 10829 { 10830 int val 10831 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10832 scale); 10833 n_elts[i] = build_int_cst (inner_type, val); 10834 } 10835 break; 10836 10837 case SPARC_BUILTIN_FMUL8X16AL: 10838 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1)); 10839 10840 for (i = 0; i < num; ++i) 10841 { 10842 int val 10843 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 10844 scale); 10845 n_elts[i] = build_int_cst (inner_type, val); 10846 } 10847 break; 10848 10849 default: 10850 gcc_unreachable (); 10851 } 10852 } 10853 10854 /* Implement TARGET_FOLD_BUILTIN hook. 10855 10856 Fold builtin functions for SPARC intrinsics. If IGNORE is true the 10857 result of the function call is ignored. NULL_TREE is returned if the 10858 function could not be folded. */ 10859 10860 static tree 10861 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 10862 tree *args, bool ignore) 10863 { 10864 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 10865 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); 10866 tree arg0, arg1, arg2; 10867 10868 if (ignore) 10869 switch (code) 10870 { 10871 case SPARC_BUILTIN_LDFSR: 10872 case SPARC_BUILTIN_STFSR: 10873 case SPARC_BUILTIN_ALIGNADDR: 10874 case SPARC_BUILTIN_WRGSR: 10875 case SPARC_BUILTIN_BMASK: 10876 case SPARC_BUILTIN_CMASK8: 10877 case SPARC_BUILTIN_CMASK16: 10878 case SPARC_BUILTIN_CMASK32: 10879 break; 10880 10881 default: 10882 return build_zero_cst (rtype); 10883 } 10884 10885 switch (code) 10886 { 10887 case SPARC_BUILTIN_FEXPAND: 10888 arg0 = args[0]; 10889 STRIP_NOPS (arg0); 10890 10891 if (TREE_CODE (arg0) == VECTOR_CST) 10892 { 10893 tree inner_type = TREE_TYPE (rtype); 10894 tree *n_elts; 10895 unsigned i; 10896 10897 n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0)); 10898 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10899 n_elts[i] = build_int_cst (inner_type, 10900 TREE_INT_CST_LOW 10901 (VECTOR_CST_ELT (arg0, i)) << 4); 10902 return build_vector (rtype, n_elts); 10903 } 10904 break; 10905 10906 case SPARC_BUILTIN_FMUL8X16: 10907 case SPARC_BUILTIN_FMUL8X16AU: 10908 case SPARC_BUILTIN_FMUL8X16AL: 10909 arg0 = args[0]; 10910 arg1 = args[1]; 10911 STRIP_NOPS (arg0); 10912 STRIP_NOPS (arg1); 10913 10914 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 10915 { 10916 tree inner_type = TREE_TYPE (rtype); 10917 tree *n_elts = XALLOCAVEC (tree, VECTOR_CST_NELTS (arg0)); 10918 sparc_handle_vis_mul8x16 (n_elts, code, inner_type, arg0, arg1); 10919 return build_vector (rtype, n_elts); 10920 } 10921 break; 10922 10923 case SPARC_BUILTIN_FPMERGE: 10924 arg0 = args[0]; 10925 arg1 = args[1]; 10926 STRIP_NOPS (arg0); 10927 STRIP_NOPS (arg1); 10928 10929 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 10930 { 10931 tree *n_elts = XALLOCAVEC (tree, 2 * VECTOR_CST_NELTS (arg0)); 10932 unsigned i; 10933 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10934 { 10935 n_elts[2*i] = VECTOR_CST_ELT (arg0, i); 10936 n_elts[2*i+1] = VECTOR_CST_ELT (arg1, i); 10937 } 10938 10939 return build_vector (rtype, n_elts); 10940 } 10941 break; 10942 10943 case SPARC_BUILTIN_PDIST: 10944 case SPARC_BUILTIN_PDISTN: 10945 arg0 = args[0]; 10946 arg1 = args[1]; 10947 STRIP_NOPS (arg0); 10948 STRIP_NOPS (arg1); 10949 if (code == SPARC_BUILTIN_PDIST) 10950 { 10951 arg2 = args[2]; 10952 STRIP_NOPS (arg2); 10953 } 10954 else 10955 arg2 = integer_zero_node; 10956 10957 if (TREE_CODE (arg0) == VECTOR_CST 10958 && TREE_CODE (arg1) == VECTOR_CST 10959 && TREE_CODE (arg2) == INTEGER_CST) 10960 { 10961 bool overflow = false; 10962 widest_int result = wi::to_widest (arg2); 10963 widest_int tmp; 10964 unsigned i; 10965 10966 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 10967 { 10968 tree e0 = VECTOR_CST_ELT (arg0, i); 10969 tree e1 = VECTOR_CST_ELT (arg1, i); 10970 10971 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf; 10972 10973 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf); 10974 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf); 10975 if (wi::neg_p (tmp)) 10976 tmp = wi::neg (tmp, &neg2_ovf); 10977 else 10978 neg2_ovf = false; 10979 result = wi::add (result, tmp, SIGNED, &add2_ovf); 10980 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf; 10981 } 10982 10983 gcc_assert (!overflow); 10984 10985 return wide_int_to_tree (rtype, result); 10986 } 10987 10988 default: 10989 break; 10990 } 10991 10992 return NULL_TREE; 10993 } 10994 10995 /* ??? This duplicates information provided to the compiler by the 10996 ??? scheduler description. Some day, teach genautomata to output 10997 ??? the latencies and then CSE will just use that. */ 10998 10999 static bool 11000 sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, 11001 int *total, bool speed ATTRIBUTE_UNUSED) 11002 { 11003 machine_mode mode = GET_MODE (x); 11004 bool float_mode_p = FLOAT_MODE_P (mode); 11005 11006 switch (code) 11007 { 11008 case CONST_INT: 11009 if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) 11010 { 11011 *total = 0; 11012 return true; 11013 } 11014 /* FALLTHRU */ 11015 11016 case HIGH: 11017 *total = 2; 11018 return true; 11019 11020 case CONST: 11021 case LABEL_REF: 11022 case SYMBOL_REF: 11023 *total = 4; 11024 return true; 11025 11026 case CONST_DOUBLE: 11027 if (GET_MODE (x) == VOIDmode 11028 && ((CONST_DOUBLE_HIGH (x) == 0 11029 && CONST_DOUBLE_LOW (x) < 0x1000) 11030 || (CONST_DOUBLE_HIGH (x) == -1 11031 && CONST_DOUBLE_LOW (x) < 0 11032 && CONST_DOUBLE_LOW (x) >= -0x1000))) 11033 *total = 0; 11034 else 11035 *total = 8; 11036 return true; 11037 11038 case MEM: 11039 /* If outer-code was a sign or zero extension, a cost 11040 of COSTS_N_INSNS (1) was already added in. This is 11041 why we are subtracting it back out. */ 11042 if (outer_code == ZERO_EXTEND) 11043 { 11044 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); 11045 } 11046 else if (outer_code == SIGN_EXTEND) 11047 { 11048 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); 11049 } 11050 else if (float_mode_p) 11051 { 11052 *total = sparc_costs->float_load; 11053 } 11054 else 11055 { 11056 *total = sparc_costs->int_load; 11057 } 11058 11059 return true; 11060 11061 case PLUS: 11062 case MINUS: 11063 if (float_mode_p) 11064 *total = sparc_costs->float_plusminus; 11065 else 11066 *total = COSTS_N_INSNS (1); 11067 return false; 11068 11069 case FMA: 11070 { 11071 rtx sub; 11072 11073 gcc_assert (float_mode_p); 11074 *total = sparc_costs->float_mul; 11075 11076 sub = XEXP (x, 0); 11077 if (GET_CODE (sub) == NEG) 11078 sub = XEXP (sub, 0); 11079 *total += rtx_cost (sub, FMA, 0, speed); 11080 11081 sub = XEXP (x, 2); 11082 if (GET_CODE (sub) == NEG) 11083 sub = XEXP (sub, 0); 11084 *total += rtx_cost (sub, FMA, 2, speed); 11085 return true; 11086 } 11087 11088 case MULT: 11089 if (float_mode_p) 11090 *total = sparc_costs->float_mul; 11091 else if (TARGET_ARCH32 && !TARGET_HARD_MUL) 11092 *total = COSTS_N_INSNS (25); 11093 else 11094 { 11095 int bit_cost; 11096 11097 bit_cost = 0; 11098 if (sparc_costs->int_mul_bit_factor) 11099 { 11100 int nbits; 11101 11102 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 11103 { 11104 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 11105 for (nbits = 0; value != 0; value &= value - 1) 11106 nbits++; 11107 } 11108 else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE 11109 && GET_MODE (XEXP (x, 1)) == VOIDmode) 11110 { 11111 rtx x1 = XEXP (x, 1); 11112 unsigned HOST_WIDE_INT value1 = CONST_DOUBLE_LOW (x1); 11113 unsigned HOST_WIDE_INT value2 = CONST_DOUBLE_HIGH (x1); 11114 11115 for (nbits = 0; value1 != 0; value1 &= value1 - 1) 11116 nbits++; 11117 for (; value2 != 0; value2 &= value2 - 1) 11118 nbits++; 11119 } 11120 else 11121 nbits = 7; 11122 11123 if (nbits < 3) 11124 nbits = 3; 11125 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; 11126 bit_cost = COSTS_N_INSNS (bit_cost); 11127 } 11128 11129 if (mode == DImode || !TARGET_HARD_MUL) 11130 *total = sparc_costs->int_mulX + bit_cost; 11131 else 11132 *total = sparc_costs->int_mul + bit_cost; 11133 } 11134 return false; 11135 11136 case ASHIFT: 11137 case ASHIFTRT: 11138 case LSHIFTRT: 11139 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; 11140 return false; 11141 11142 case DIV: 11143 case UDIV: 11144 case MOD: 11145 case UMOD: 11146 if (float_mode_p) 11147 { 11148 if (mode == DFmode) 11149 *total = sparc_costs->float_div_df; 11150 else 11151 *total = sparc_costs->float_div_sf; 11152 } 11153 else 11154 { 11155 if (mode == DImode) 11156 *total = sparc_costs->int_divX; 11157 else 11158 *total = sparc_costs->int_div; 11159 } 11160 return false; 11161 11162 case NEG: 11163 if (! float_mode_p) 11164 { 11165 *total = COSTS_N_INSNS (1); 11166 return false; 11167 } 11168 /* FALLTHRU */ 11169 11170 case ABS: 11171 case FLOAT: 11172 case UNSIGNED_FLOAT: 11173 case FIX: 11174 case UNSIGNED_FIX: 11175 case FLOAT_EXTEND: 11176 case FLOAT_TRUNCATE: 11177 *total = sparc_costs->float_move; 11178 return false; 11179 11180 case SQRT: 11181 if (mode == DFmode) 11182 *total = sparc_costs->float_sqrt_df; 11183 else 11184 *total = sparc_costs->float_sqrt_sf; 11185 return false; 11186 11187 case COMPARE: 11188 if (float_mode_p) 11189 *total = sparc_costs->float_cmp; 11190 else 11191 *total = COSTS_N_INSNS (1); 11192 return false; 11193 11194 case IF_THEN_ELSE: 11195 if (float_mode_p) 11196 *total = sparc_costs->float_cmove; 11197 else 11198 *total = sparc_costs->int_cmove; 11199 return false; 11200 11201 case IOR: 11202 /* Handle the NAND vector patterns. */ 11203 if (sparc_vector_mode_supported_p (GET_MODE (x)) 11204 && GET_CODE (XEXP (x, 0)) == NOT 11205 && GET_CODE (XEXP (x, 1)) == NOT) 11206 { 11207 *total = COSTS_N_INSNS (1); 11208 return true; 11209 } 11210 else 11211 return false; 11212 11213 default: 11214 return false; 11215 } 11216 } 11217 11218 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */ 11219 11220 static inline bool 11221 general_or_i64_p (reg_class_t rclass) 11222 { 11223 return (rclass == GENERAL_REGS || rclass == I64_REGS); 11224 } 11225 11226 /* Implement TARGET_REGISTER_MOVE_COST. */ 11227 11228 static int 11229 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 11230 reg_class_t from, reg_class_t to) 11231 { 11232 bool need_memory = false; 11233 11234 if (from == FPCC_REGS || to == FPCC_REGS) 11235 need_memory = true; 11236 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to)) 11237 || (general_or_i64_p (from) && FP_REG_CLASS_P (to))) 11238 { 11239 if (TARGET_VIS3) 11240 { 11241 int size = GET_MODE_SIZE (mode); 11242 if (size == 8 || size == 4) 11243 { 11244 if (! TARGET_ARCH32 || size == 4) 11245 return 4; 11246 else 11247 return 6; 11248 } 11249 } 11250 need_memory = true; 11251 } 11252 11253 if (need_memory) 11254 { 11255 if (sparc_cpu == PROCESSOR_ULTRASPARC 11256 || sparc_cpu == PROCESSOR_ULTRASPARC3 11257 || sparc_cpu == PROCESSOR_NIAGARA 11258 || sparc_cpu == PROCESSOR_NIAGARA2 11259 || sparc_cpu == PROCESSOR_NIAGARA3 11260 || sparc_cpu == PROCESSOR_NIAGARA4) 11261 return 12; 11262 11263 return 6; 11264 } 11265 11266 return 2; 11267 } 11268 11269 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2. 11270 This is achieved by means of a manual dynamic stack space allocation in 11271 the current frame. We make the assumption that SEQ doesn't contain any 11272 function calls, with the possible exception of calls to the GOT helper. */ 11273 11274 static void 11275 emit_and_preserve (rtx seq, rtx reg, rtx reg2) 11276 { 11277 /* We must preserve the lowest 16 words for the register save area. */ 11278 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; 11279 /* We really need only 2 words of fresh stack space. */ 11280 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); 11281 11282 rtx slot 11283 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx, 11284 SPARC_STACK_BIAS + offset)); 11285 11286 emit_insn (gen_stack_pointer_inc (GEN_INT (-size))); 11287 emit_insn (gen_rtx_SET (VOIDmode, slot, reg)); 11288 if (reg2) 11289 emit_insn (gen_rtx_SET (VOIDmode, 11290 adjust_address (slot, word_mode, UNITS_PER_WORD), 11291 reg2)); 11292 emit_insn (seq); 11293 if (reg2) 11294 emit_insn (gen_rtx_SET (VOIDmode, 11295 reg2, 11296 adjust_address (slot, word_mode, UNITS_PER_WORD))); 11297 emit_insn (gen_rtx_SET (VOIDmode, reg, slot)); 11298 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 11299 } 11300 11301 /* Output the assembler code for a thunk function. THUNK_DECL is the 11302 declaration for the thunk function itself, FUNCTION is the decl for 11303 the target function. DELTA is an immediate constant offset to be 11304 added to THIS. If VCALL_OFFSET is nonzero, the word at address 11305 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ 11306 11307 static void 11308 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 11309 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 11310 tree function) 11311 { 11312 rtx this_rtx, funexp; 11313 rtx_insn *insn; 11314 unsigned int int_arg_first; 11315 11316 reload_completed = 1; 11317 epilogue_completed = 1; 11318 11319 emit_note (NOTE_INSN_PROLOGUE_END); 11320 11321 if (TARGET_FLAT) 11322 { 11323 sparc_leaf_function_p = 1; 11324 11325 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 11326 } 11327 else if (flag_delayed_branch) 11328 { 11329 /* We will emit a regular sibcall below, so we need to instruct 11330 output_sibcall that we are in a leaf function. */ 11331 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1; 11332 11333 /* This will cause final.c to invoke leaf_renumber_regs so we 11334 must behave as if we were in a not-yet-leafified function. */ 11335 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; 11336 } 11337 else 11338 { 11339 /* We will emit the sibcall manually below, so we will need to 11340 manually spill non-leaf registers. */ 11341 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0; 11342 11343 /* We really are in a leaf function. */ 11344 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 11345 } 11346 11347 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function 11348 returns a structure, the structure return pointer is there instead. */ 11349 if (TARGET_ARCH64 11350 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 11351 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); 11352 else 11353 this_rtx = gen_rtx_REG (Pmode, int_arg_first); 11354 11355 /* Add DELTA. When possible use a plain add, otherwise load it into 11356 a register first. */ 11357 if (delta) 11358 { 11359 rtx delta_rtx = GEN_INT (delta); 11360 11361 if (! SPARC_SIMM13_P (delta)) 11362 { 11363 rtx scratch = gen_rtx_REG (Pmode, 1); 11364 emit_move_insn (scratch, delta_rtx); 11365 delta_rtx = scratch; 11366 } 11367 11368 /* THIS_RTX += DELTA. */ 11369 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); 11370 } 11371 11372 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ 11373 if (vcall_offset) 11374 { 11375 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 11376 rtx scratch = gen_rtx_REG (Pmode, 1); 11377 11378 gcc_assert (vcall_offset < 0); 11379 11380 /* SCRATCH = *THIS_RTX. */ 11381 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); 11382 11383 /* Prepare for adding VCALL_OFFSET. The difficulty is that we 11384 may not have any available scratch register at this point. */ 11385 if (SPARC_SIMM13_P (vcall_offset)) 11386 ; 11387 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ 11388 else if (! fixed_regs[5] 11389 /* The below sequence is made up of at least 2 insns, 11390 while the default method may need only one. */ 11391 && vcall_offset < -8192) 11392 { 11393 rtx scratch2 = gen_rtx_REG (Pmode, 5); 11394 emit_move_insn (scratch2, vcall_offset_rtx); 11395 vcall_offset_rtx = scratch2; 11396 } 11397 else 11398 { 11399 rtx increment = GEN_INT (-4096); 11400 11401 /* VCALL_OFFSET is a negative number whose typical range can be 11402 estimated as -32768..0 in 32-bit mode. In almost all cases 11403 it is therefore cheaper to emit multiple add insns than 11404 spilling and loading the constant into a register (at least 11405 6 insns). */ 11406 while (! SPARC_SIMM13_P (vcall_offset)) 11407 { 11408 emit_insn (gen_add2_insn (scratch, increment)); 11409 vcall_offset += 4096; 11410 } 11411 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ 11412 } 11413 11414 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ 11415 emit_move_insn (scratch, gen_rtx_MEM (Pmode, 11416 gen_rtx_PLUS (Pmode, 11417 scratch, 11418 vcall_offset_rtx))); 11419 11420 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ 11421 emit_insn (gen_add2_insn (this_rtx, scratch)); 11422 } 11423 11424 /* Generate a tail call to the target function. */ 11425 if (! TREE_USED (function)) 11426 { 11427 assemble_external (function); 11428 TREE_USED (function) = 1; 11429 } 11430 funexp = XEXP (DECL_RTL (function), 0); 11431 11432 if (flag_delayed_branch) 11433 { 11434 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 11435 insn = emit_call_insn (gen_sibcall (funexp)); 11436 SIBLING_CALL_P (insn) = 1; 11437 } 11438 else 11439 { 11440 /* The hoops we have to jump through in order to generate a sibcall 11441 without using delay slots... */ 11442 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); 11443 11444 if (flag_pic) 11445 { 11446 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ 11447 start_sequence (); 11448 load_got_register (); /* clobbers %o7 */ 11449 scratch = sparc_legitimize_pic_address (funexp, scratch); 11450 seq = get_insns (); 11451 end_sequence (); 11452 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); 11453 } 11454 else if (TARGET_ARCH32) 11455 { 11456 emit_insn (gen_rtx_SET (VOIDmode, 11457 scratch, 11458 gen_rtx_HIGH (SImode, funexp))); 11459 emit_insn (gen_rtx_SET (VOIDmode, 11460 scratch, 11461 gen_rtx_LO_SUM (SImode, scratch, funexp))); 11462 } 11463 else /* TARGET_ARCH64 */ 11464 { 11465 switch (sparc_cmodel) 11466 { 11467 case CM_MEDLOW: 11468 case CM_MEDMID: 11469 /* The destination can serve as a temporary. */ 11470 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); 11471 break; 11472 11473 case CM_MEDANY: 11474 case CM_EMBMEDANY: 11475 /* The destination cannot serve as a temporary. */ 11476 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ 11477 start_sequence (); 11478 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); 11479 seq = get_insns (); 11480 end_sequence (); 11481 emit_and_preserve (seq, spill_reg, 0); 11482 break; 11483 11484 default: 11485 gcc_unreachable (); 11486 } 11487 } 11488 11489 emit_jump_insn (gen_indirect_jump (scratch)); 11490 } 11491 11492 emit_barrier (); 11493 11494 /* Run just enough of rest_of_compilation to get the insns emitted. 11495 There's not really enough bulk here to make other passes such as 11496 instruction scheduling worth while. Note that use_thunk calls 11497 assemble_start_function and assemble_end_function. */ 11498 insn = get_insns (); 11499 shorten_branches (insn); 11500 final_start_function (insn, file, 1); 11501 final (insn, file, 1); 11502 final_end_function (); 11503 11504 reload_completed = 0; 11505 epilogue_completed = 0; 11506 } 11507 11508 /* Return true if sparc_output_mi_thunk would be able to output the 11509 assembler code for the thunk function specified by the arguments 11510 it is passed, and false otherwise. */ 11511 static bool 11512 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, 11513 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 11514 HOST_WIDE_INT vcall_offset, 11515 const_tree function ATTRIBUTE_UNUSED) 11516 { 11517 /* Bound the loop used in the default method above. */ 11518 return (vcall_offset >= -32768 || ! fixed_regs[5]); 11519 } 11520 11521 /* How to allocate a 'struct machine_function'. */ 11522 11523 static struct machine_function * 11524 sparc_init_machine_status (void) 11525 { 11526 return ggc_cleared_alloc<machine_function> (); 11527 } 11528 11529 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 11530 We need to emit DTP-relative relocations. */ 11531 11532 static void 11533 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) 11534 { 11535 switch (size) 11536 { 11537 case 4: 11538 fputs ("\t.word\t%r_tls_dtpoff32(", file); 11539 break; 11540 case 8: 11541 fputs ("\t.xword\t%r_tls_dtpoff64(", file); 11542 break; 11543 default: 11544 gcc_unreachable (); 11545 } 11546 output_addr_const (file, x); 11547 fputs (")", file); 11548 } 11549 11550 /* Do whatever processing is required at the end of a file. */ 11551 11552 static void 11553 sparc_file_end (void) 11554 { 11555 /* If we need to emit the special GOT helper function, do so now. */ 11556 if (got_helper_rtx) 11557 { 11558 const char *name = XSTR (got_helper_rtx, 0); 11559 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM]; 11560 #ifdef DWARF2_UNWIND_INFO 11561 bool do_cfi; 11562 #endif 11563 11564 if (USE_HIDDEN_LINKONCE) 11565 { 11566 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 11567 get_identifier (name), 11568 build_function_type_list (void_type_node, 11569 NULL_TREE)); 11570 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 11571 NULL_TREE, void_type_node); 11572 TREE_PUBLIC (decl) = 1; 11573 TREE_STATIC (decl) = 1; 11574 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 11575 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 11576 DECL_VISIBILITY_SPECIFIED (decl) = 1; 11577 resolve_unique_section (decl, 0, flag_function_sections); 11578 allocate_struct_function (decl, true); 11579 cfun->is_thunk = 1; 11580 current_function_decl = decl; 11581 init_varasm_status (); 11582 assemble_start_function (decl, name); 11583 } 11584 else 11585 { 11586 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 11587 switch_to_section (text_section); 11588 if (align > 0) 11589 ASM_OUTPUT_ALIGN (asm_out_file, align); 11590 ASM_OUTPUT_LABEL (asm_out_file, name); 11591 } 11592 11593 #ifdef DWARF2_UNWIND_INFO 11594 do_cfi = dwarf2out_do_cfi_asm (); 11595 if (do_cfi) 11596 fprintf (asm_out_file, "\t.cfi_startproc\n"); 11597 #endif 11598 if (flag_delayed_branch) 11599 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n", 11600 reg_name, reg_name); 11601 else 11602 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n", 11603 reg_name, reg_name); 11604 #ifdef DWARF2_UNWIND_INFO 11605 if (do_cfi) 11606 fprintf (asm_out_file, "\t.cfi_endproc\n"); 11607 #endif 11608 } 11609 11610 if (NEED_INDICATE_EXEC_STACK) 11611 file_end_indicate_exec_stack (); 11612 11613 #ifdef TARGET_SOLARIS 11614 solaris_file_end (); 11615 #endif 11616 } 11617 11618 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 11619 /* Implement TARGET_MANGLE_TYPE. */ 11620 11621 static const char * 11622 sparc_mangle_type (const_tree type) 11623 { 11624 if (!TARGET_64BIT 11625 && TYPE_MAIN_VARIANT (type) == long_double_type_node 11626 && TARGET_LONG_DOUBLE_128) 11627 return "g"; 11628 11629 /* For all other types, use normal C++ mangling. */ 11630 return NULL; 11631 } 11632 #endif 11633 11634 /* Expand a membar instruction for various use cases. Both the LOAD_STORE 11635 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where 11636 bit 0 indicates that X is true, and bit 1 indicates Y is true. */ 11637 11638 void 11639 sparc_emit_membar_for_model (enum memmodel model, 11640 int load_store, int before_after) 11641 { 11642 /* Bits for the MEMBAR mmask field. */ 11643 const int LoadLoad = 1; 11644 const int StoreLoad = 2; 11645 const int LoadStore = 4; 11646 const int StoreStore = 8; 11647 11648 int mm = 0, implied = 0; 11649 11650 switch (sparc_memory_model) 11651 { 11652 case SMM_SC: 11653 /* Sequential Consistency. All memory transactions are immediately 11654 visible in sequential execution order. No barriers needed. */ 11655 implied = LoadLoad | StoreLoad | LoadStore | StoreStore; 11656 break; 11657 11658 case SMM_TSO: 11659 /* Total Store Ordering: all memory transactions with store semantics 11660 are followed by an implied StoreStore. */ 11661 implied |= StoreStore; 11662 11663 /* If we're not looking for a raw barrer (before+after), then atomic 11664 operations get the benefit of being both load and store. */ 11665 if (load_store == 3 && before_after == 1) 11666 implied |= StoreLoad; 11667 /* FALLTHRU */ 11668 11669 case SMM_PSO: 11670 /* Partial Store Ordering: all memory transactions with load semantics 11671 are followed by an implied LoadLoad | LoadStore. */ 11672 implied |= LoadLoad | LoadStore; 11673 11674 /* If we're not looking for a raw barrer (before+after), then atomic 11675 operations get the benefit of being both load and store. */ 11676 if (load_store == 3 && before_after == 2) 11677 implied |= StoreLoad | StoreStore; 11678 /* FALLTHRU */ 11679 11680 case SMM_RMO: 11681 /* Relaxed Memory Ordering: no implicit bits. */ 11682 break; 11683 11684 default: 11685 gcc_unreachable (); 11686 } 11687 11688 if (before_after & 1) 11689 { 11690 if (is_mm_release (model) || is_mm_acq_rel (model) 11691 || is_mm_seq_cst (model)) 11692 { 11693 if (load_store & 1) 11694 mm |= LoadLoad | StoreLoad; 11695 if (load_store & 2) 11696 mm |= LoadStore | StoreStore; 11697 } 11698 } 11699 if (before_after & 2) 11700 { 11701 if (is_mm_acquire (model) || is_mm_acq_rel (model) 11702 || is_mm_seq_cst (model)) 11703 { 11704 if (load_store & 1) 11705 mm |= LoadLoad | LoadStore; 11706 if (load_store & 2) 11707 mm |= StoreLoad | StoreStore; 11708 } 11709 } 11710 11711 /* Remove the bits implied by the system memory model. */ 11712 mm &= ~implied; 11713 11714 /* For raw barriers (before+after), always emit a barrier. 11715 This will become a compile-time barrier if needed. */ 11716 if (mm || before_after == 3) 11717 emit_insn (gen_membar (GEN_INT (mm))); 11718 } 11719 11720 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit 11721 compare and swap on the word containing the byte or half-word. */ 11722 11723 static void 11724 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem, 11725 rtx oldval, rtx newval) 11726 { 11727 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 11728 rtx addr = gen_reg_rtx (Pmode); 11729 rtx off = gen_reg_rtx (SImode); 11730 rtx oldv = gen_reg_rtx (SImode); 11731 rtx newv = gen_reg_rtx (SImode); 11732 rtx oldvalue = gen_reg_rtx (SImode); 11733 rtx newvalue = gen_reg_rtx (SImode); 11734 rtx res = gen_reg_rtx (SImode); 11735 rtx resv = gen_reg_rtx (SImode); 11736 rtx memsi, val, mask, cc; 11737 11738 emit_insn (gen_rtx_SET (VOIDmode, addr, 11739 gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 11740 11741 if (Pmode != SImode) 11742 addr1 = gen_lowpart (SImode, addr1); 11743 emit_insn (gen_rtx_SET (VOIDmode, off, 11744 gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 11745 11746 memsi = gen_rtx_MEM (SImode, addr); 11747 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 11748 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 11749 11750 val = copy_to_reg (memsi); 11751 11752 emit_insn (gen_rtx_SET (VOIDmode, off, 11753 gen_rtx_XOR (SImode, off, 11754 GEN_INT (GET_MODE (mem) == QImode 11755 ? 3 : 2)))); 11756 11757 emit_insn (gen_rtx_SET (VOIDmode, off, 11758 gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 11759 11760 if (GET_MODE (mem) == QImode) 11761 mask = force_reg (SImode, GEN_INT (0xff)); 11762 else 11763 mask = force_reg (SImode, GEN_INT (0xffff)); 11764 11765 emit_insn (gen_rtx_SET (VOIDmode, mask, 11766 gen_rtx_ASHIFT (SImode, mask, off))); 11767 11768 emit_insn (gen_rtx_SET (VOIDmode, val, 11769 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 11770 val))); 11771 11772 oldval = gen_lowpart (SImode, oldval); 11773 emit_insn (gen_rtx_SET (VOIDmode, oldv, 11774 gen_rtx_ASHIFT (SImode, oldval, off))); 11775 11776 newval = gen_lowpart_common (SImode, newval); 11777 emit_insn (gen_rtx_SET (VOIDmode, newv, 11778 gen_rtx_ASHIFT (SImode, newval, off))); 11779 11780 emit_insn (gen_rtx_SET (VOIDmode, oldv, 11781 gen_rtx_AND (SImode, oldv, mask))); 11782 11783 emit_insn (gen_rtx_SET (VOIDmode, newv, 11784 gen_rtx_AND (SImode, newv, mask))); 11785 11786 rtx_code_label *end_label = gen_label_rtx (); 11787 rtx_code_label *loop_label = gen_label_rtx (); 11788 emit_label (loop_label); 11789 11790 emit_insn (gen_rtx_SET (VOIDmode, oldvalue, 11791 gen_rtx_IOR (SImode, oldv, val))); 11792 11793 emit_insn (gen_rtx_SET (VOIDmode, newvalue, 11794 gen_rtx_IOR (SImode, newv, val))); 11795 11796 emit_move_insn (bool_result, const1_rtx); 11797 11798 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue)); 11799 11800 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); 11801 11802 emit_insn (gen_rtx_SET (VOIDmode, resv, 11803 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 11804 res))); 11805 11806 emit_move_insn (bool_result, const0_rtx); 11807 11808 cc = gen_compare_reg_1 (NE, resv, val); 11809 emit_insn (gen_rtx_SET (VOIDmode, val, resv)); 11810 11811 /* Use cbranchcc4 to separate the compare and branch! */ 11812 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), 11813 cc, const0_rtx, loop_label)); 11814 11815 emit_label (end_label); 11816 11817 emit_insn (gen_rtx_SET (VOIDmode, res, 11818 gen_rtx_AND (SImode, res, mask))); 11819 11820 emit_insn (gen_rtx_SET (VOIDmode, res, 11821 gen_rtx_LSHIFTRT (SImode, res, off))); 11822 11823 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 11824 } 11825 11826 /* Expand code to perform a compare-and-swap. */ 11827 11828 void 11829 sparc_expand_compare_and_swap (rtx operands[]) 11830 { 11831 rtx bval, retval, mem, oldval, newval; 11832 machine_mode mode; 11833 enum memmodel model; 11834 11835 bval = operands[0]; 11836 retval = operands[1]; 11837 mem = operands[2]; 11838 oldval = operands[3]; 11839 newval = operands[4]; 11840 model = (enum memmodel) INTVAL (operands[6]); 11841 mode = GET_MODE (mem); 11842 11843 sparc_emit_membar_for_model (model, 3, 1); 11844 11845 if (reg_overlap_mentioned_p (retval, oldval)) 11846 oldval = copy_to_reg (oldval); 11847 11848 if (mode == QImode || mode == HImode) 11849 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval); 11850 else 11851 { 11852 rtx (*gen) (rtx, rtx, rtx, rtx); 11853 rtx x; 11854 11855 if (mode == SImode) 11856 gen = gen_atomic_compare_and_swapsi_1; 11857 else 11858 gen = gen_atomic_compare_and_swapdi_1; 11859 emit_insn (gen (retval, mem, oldval, newval)); 11860 11861 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1); 11862 if (x != bval) 11863 convert_move (bval, x, 1); 11864 } 11865 11866 sparc_emit_membar_for_model (model, 3, 2); 11867 } 11868 11869 void 11870 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) 11871 { 11872 rtx t_1, t_2, t_3; 11873 11874 sel = gen_lowpart (DImode, sel); 11875 switch (vmode) 11876 { 11877 case V2SImode: 11878 /* inp = xxxxxxxAxxxxxxxB */ 11879 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11880 NULL_RTX, 1, OPTAB_DIRECT); 11881 /* t_1 = ....xxxxxxxAxxx. */ 11882 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 11883 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); 11884 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 11885 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); 11886 /* sel = .......B */ 11887 /* t_1 = ...A.... */ 11888 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 11889 /* sel = ...A...B */ 11890 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); 11891 /* sel = AAAABBBB * 4 */ 11892 t_1 = force_reg (SImode, GEN_INT (0x01230123)); 11893 /* sel = { A*4, A*4+1, A*4+2, ... } */ 11894 break; 11895 11896 case V4HImode: 11897 /* inp = xxxAxxxBxxxCxxxD */ 11898 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 11899 NULL_RTX, 1, OPTAB_DIRECT); 11900 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11901 NULL_RTX, 1, OPTAB_DIRECT); 11902 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), 11903 NULL_RTX, 1, OPTAB_DIRECT); 11904 /* t_1 = ..xxxAxxxBxxxCxx */ 11905 /* t_2 = ....xxxAxxxBxxxC */ 11906 /* t_3 = ......xxxAxxxBxx */ 11907 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 11908 GEN_INT (0x07), 11909 NULL_RTX, 1, OPTAB_DIRECT); 11910 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 11911 GEN_INT (0x0700), 11912 NULL_RTX, 1, OPTAB_DIRECT); 11913 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), 11914 GEN_INT (0x070000), 11915 NULL_RTX, 1, OPTAB_DIRECT); 11916 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), 11917 GEN_INT (0x07000000), 11918 NULL_RTX, 1, OPTAB_DIRECT); 11919 /* sel = .......D */ 11920 /* t_1 = .....C.. */ 11921 /* t_2 = ...B.... */ 11922 /* t_3 = .A...... */ 11923 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 11924 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); 11925 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); 11926 /* sel = .A.B.C.D */ 11927 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); 11928 /* sel = AABBCCDD * 2 */ 11929 t_1 = force_reg (SImode, GEN_INT (0x01010101)); 11930 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ 11931 break; 11932 11933 case V8QImode: 11934 /* input = xAxBxCxDxExFxGxH */ 11935 sel = expand_simple_binop (DImode, AND, sel, 11936 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 11937 | 0x0f0f0f0f), 11938 NULL_RTX, 1, OPTAB_DIRECT); 11939 /* sel = .A.B.C.D.E.F.G.H */ 11940 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), 11941 NULL_RTX, 1, OPTAB_DIRECT); 11942 /* t_1 = ..A.B.C.D.E.F.G. */ 11943 sel = expand_simple_binop (DImode, IOR, sel, t_1, 11944 NULL_RTX, 1, OPTAB_DIRECT); 11945 /* sel = .AABBCCDDEEFFGGH */ 11946 sel = expand_simple_binop (DImode, AND, sel, 11947 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 11948 | 0xff00ff), 11949 NULL_RTX, 1, OPTAB_DIRECT); 11950 /* sel = ..AB..CD..EF..GH */ 11951 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 11952 NULL_RTX, 1, OPTAB_DIRECT); 11953 /* t_1 = ....AB..CD..EF.. */ 11954 sel = expand_simple_binop (DImode, IOR, sel, t_1, 11955 NULL_RTX, 1, OPTAB_DIRECT); 11956 /* sel = ..ABABCDCDEFEFGH */ 11957 sel = expand_simple_binop (DImode, AND, sel, 11958 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), 11959 NULL_RTX, 1, OPTAB_DIRECT); 11960 /* sel = ....ABCD....EFGH */ 11961 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 11962 NULL_RTX, 1, OPTAB_DIRECT); 11963 /* t_1 = ........ABCD.... */ 11964 sel = gen_lowpart (SImode, sel); 11965 t_1 = gen_lowpart (SImode, t_1); 11966 break; 11967 11968 default: 11969 gcc_unreachable (); 11970 } 11971 11972 /* Always perform the final addition/merge within the bmask insn. */ 11973 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); 11974 } 11975 11976 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 11977 11978 static bool 11979 sparc_frame_pointer_required (void) 11980 { 11981 /* If the stack pointer is dynamically modified in the function, it cannot 11982 serve as the frame pointer. */ 11983 if (cfun->calls_alloca) 11984 return true; 11985 11986 /* If the function receives nonlocal gotos, it needs to save the frame 11987 pointer in the nonlocal_goto_save_area object. */ 11988 if (cfun->has_nonlocal_label) 11989 return true; 11990 11991 /* In flat mode, that's it. */ 11992 if (TARGET_FLAT) 11993 return false; 11994 11995 /* Otherwise, the frame pointer is required if the function isn't leaf, but 11996 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */ 11997 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ()); 11998 } 11999 12000 /* The way this is structured, we can't eliminate SFP in favor of SP 12001 if the frame pointer is required: we want to use the SFP->HFP elimination 12002 in that case. But the test in update_eliminables doesn't know we are 12003 assuming below that we only do the former elimination. */ 12004 12005 static bool 12006 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 12007 { 12008 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required (); 12009 } 12010 12011 /* Return the hard frame pointer directly to bypass the stack bias. */ 12012 12013 static rtx 12014 sparc_builtin_setjmp_frame_value (void) 12015 { 12016 return hard_frame_pointer_rtx; 12017 } 12018 12019 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that 12020 they won't be allocated. */ 12021 12022 static void 12023 sparc_conditional_register_usage (void) 12024 { 12025 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 12026 { 12027 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12028 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12029 } 12030 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ 12031 /* then honor it. */ 12032 if (TARGET_ARCH32 && fixed_regs[5]) 12033 fixed_regs[5] = 1; 12034 else if (TARGET_ARCH64 && fixed_regs[5] == 2) 12035 fixed_regs[5] = 0; 12036 if (! TARGET_V9) 12037 { 12038 int regno; 12039 for (regno = SPARC_FIRST_V9_FP_REG; 12040 regno <= SPARC_LAST_V9_FP_REG; 12041 regno++) 12042 fixed_regs[regno] = 1; 12043 /* %fcc0 is used by v8 and v9. */ 12044 for (regno = SPARC_FIRST_V9_FCC_REG + 1; 12045 regno <= SPARC_LAST_V9_FCC_REG; 12046 regno++) 12047 fixed_regs[regno] = 1; 12048 } 12049 if (! TARGET_FPU) 12050 { 12051 int regno; 12052 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) 12053 fixed_regs[regno] = 1; 12054 } 12055 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ 12056 /* then honor it. Likewise with g3 and g4. */ 12057 if (fixed_regs[2] == 2) 12058 fixed_regs[2] = ! TARGET_APP_REGS; 12059 if (fixed_regs[3] == 2) 12060 fixed_regs[3] = ! TARGET_APP_REGS; 12061 if (TARGET_ARCH32 && fixed_regs[4] == 2) 12062 fixed_regs[4] = ! TARGET_APP_REGS; 12063 else if (TARGET_CM_EMBMEDANY) 12064 fixed_regs[4] = 1; 12065 else if (fixed_regs[4] == 2) 12066 fixed_regs[4] = 0; 12067 if (TARGET_FLAT) 12068 { 12069 int regno; 12070 /* Disable leaf functions. */ 12071 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER); 12072 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 12073 leaf_reg_remap [regno] = regno; 12074 } 12075 if (TARGET_VIS) 12076 global_regs[SPARC_GSR_REG] = 1; 12077 } 12078 12079 /* Implement TARGET_PREFERRED_RELOAD_CLASS: 12080 12081 - We can't load constants into FP registers. 12082 - We can't load FP constants into integer registers when soft-float, 12083 because there is no soft-float pattern with a r/F constraint. 12084 - We can't load FP constants into integer registers for TFmode unless 12085 it is 0.0L, because there is no movtf pattern with a r/F constraint. 12086 - Try and reload integer constants (symbolic or otherwise) back into 12087 registers directly, rather than having them dumped to memory. */ 12088 12089 static reg_class_t 12090 sparc_preferred_reload_class (rtx x, reg_class_t rclass) 12091 { 12092 machine_mode mode = GET_MODE (x); 12093 if (CONSTANT_P (x)) 12094 { 12095 if (FP_REG_CLASS_P (rclass) 12096 || rclass == GENERAL_OR_FP_REGS 12097 || rclass == GENERAL_OR_EXTRA_FP_REGS 12098 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU) 12099 || (mode == TFmode && ! const_zero_operand (x, mode))) 12100 return NO_REGS; 12101 12102 if (GET_MODE_CLASS (mode) == MODE_INT) 12103 return GENERAL_REGS; 12104 12105 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 12106 { 12107 if (! FP_REG_CLASS_P (rclass) 12108 || !(const_zero_operand (x, mode) 12109 || const_all_ones_operand (x, mode))) 12110 return NO_REGS; 12111 } 12112 } 12113 12114 if (TARGET_VIS3 12115 && ! TARGET_ARCH64 12116 && (rclass == EXTRA_FP_REGS 12117 || rclass == GENERAL_OR_EXTRA_FP_REGS)) 12118 { 12119 int regno = true_regnum (x); 12120 12121 if (SPARC_INT_REG_P (regno)) 12122 return (rclass == EXTRA_FP_REGS 12123 ? FP_REGS : GENERAL_OR_FP_REGS); 12124 } 12125 12126 return rclass; 12127 } 12128 12129 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction, 12130 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 12131 12132 const char * 12133 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode) 12134 { 12135 char mulstr[32]; 12136 12137 gcc_assert (! TARGET_ARCH64); 12138 12139 if (sparc_check_64 (operands[1], insn) <= 0) 12140 output_asm_insn ("srl\t%L1, 0, %L1", operands); 12141 if (which_alternative == 1) 12142 output_asm_insn ("sllx\t%H1, 32, %H1", operands); 12143 if (GET_CODE (operands[2]) == CONST_INT) 12144 { 12145 if (which_alternative == 1) 12146 { 12147 output_asm_insn ("or\t%L1, %H1, %H1", operands); 12148 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); 12149 output_asm_insn (mulstr, operands); 12150 return "srlx\t%L0, 32, %H0"; 12151 } 12152 else 12153 { 12154 output_asm_insn ("sllx\t%H1, 32, %3", operands); 12155 output_asm_insn ("or\t%L1, %3, %3", operands); 12156 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); 12157 output_asm_insn (mulstr, operands); 12158 output_asm_insn ("srlx\t%3, 32, %H0", operands); 12159 return "mov\t%3, %L0"; 12160 } 12161 } 12162 else if (rtx_equal_p (operands[1], operands[2])) 12163 { 12164 if (which_alternative == 1) 12165 { 12166 output_asm_insn ("or\t%L1, %H1, %H1", operands); 12167 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); 12168 output_asm_insn (mulstr, operands); 12169 return "srlx\t%L0, 32, %H0"; 12170 } 12171 else 12172 { 12173 output_asm_insn ("sllx\t%H1, 32, %3", operands); 12174 output_asm_insn ("or\t%L1, %3, %3", operands); 12175 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); 12176 output_asm_insn (mulstr, operands); 12177 output_asm_insn ("srlx\t%3, 32, %H0", operands); 12178 return "mov\t%3, %L0"; 12179 } 12180 } 12181 if (sparc_check_64 (operands[2], insn) <= 0) 12182 output_asm_insn ("srl\t%L2, 0, %L2", operands); 12183 if (which_alternative == 1) 12184 { 12185 output_asm_insn ("or\t%L1, %H1, %H1", operands); 12186 output_asm_insn ("sllx\t%H2, 32, %L1", operands); 12187 output_asm_insn ("or\t%L2, %L1, %L1", operands); 12188 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); 12189 output_asm_insn (mulstr, operands); 12190 return "srlx\t%L0, 32, %H0"; 12191 } 12192 else 12193 { 12194 output_asm_insn ("sllx\t%H1, 32, %3", operands); 12195 output_asm_insn ("sllx\t%H2, 32, %4", operands); 12196 output_asm_insn ("or\t%L1, %3, %3", operands); 12197 output_asm_insn ("or\t%L2, %4, %4", operands); 12198 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); 12199 output_asm_insn (mulstr, operands); 12200 output_asm_insn ("srlx\t%3, 32, %H0", operands); 12201 return "mov\t%3, %L0"; 12202 } 12203 } 12204 12205 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 12206 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE 12207 and INNER_MODE are the modes describing TARGET. */ 12208 12209 static void 12210 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode, 12211 machine_mode inner_mode) 12212 { 12213 rtx t1, final_insn, sel; 12214 int bmask; 12215 12216 t1 = gen_reg_rtx (mode); 12217 12218 elt = convert_modes (SImode, inner_mode, elt, true); 12219 emit_move_insn (gen_lowpart(SImode, t1), elt); 12220 12221 switch (mode) 12222 { 12223 case V2SImode: 12224 final_insn = gen_bshufflev2si_vis (target, t1, t1); 12225 bmask = 0x45674567; 12226 break; 12227 case V4HImode: 12228 final_insn = gen_bshufflev4hi_vis (target, t1, t1); 12229 bmask = 0x67676767; 12230 break; 12231 case V8QImode: 12232 final_insn = gen_bshufflev8qi_vis (target, t1, t1); 12233 bmask = 0x77777777; 12234 break; 12235 default: 12236 gcc_unreachable (); 12237 } 12238 12239 sel = force_reg (SImode, GEN_INT (bmask)); 12240 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); 12241 emit_insn (final_insn); 12242 } 12243 12244 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 12245 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */ 12246 12247 static void 12248 vector_init_fpmerge (rtx target, rtx elt) 12249 { 12250 rtx t1, t2, t2_low, t3, t3_low; 12251 12252 t1 = gen_reg_rtx (V4QImode); 12253 elt = convert_modes (SImode, QImode, elt, true); 12254 emit_move_insn (gen_lowpart (SImode, t1), elt); 12255 12256 t2 = gen_reg_rtx (V8QImode); 12257 t2_low = gen_lowpart (V4QImode, t2); 12258 emit_insn (gen_fpmerge_vis (t2, t1, t1)); 12259 12260 t3 = gen_reg_rtx (V8QImode); 12261 t3_low = gen_lowpart (V4QImode, t3); 12262 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low)); 12263 12264 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low)); 12265 } 12266 12267 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 12268 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */ 12269 12270 static void 12271 vector_init_faligndata (rtx target, rtx elt) 12272 { 12273 rtx t1 = gen_reg_rtx (V4HImode); 12274 int i; 12275 12276 elt = convert_modes (SImode, HImode, elt, true); 12277 emit_move_insn (gen_lowpart (SImode, t1), elt); 12278 12279 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), 12280 force_reg (SImode, GEN_INT (6)), 12281 const0_rtx)); 12282 12283 for (i = 0; i < 4; i++) 12284 emit_insn (gen_faligndatav4hi_vis (target, t1, target)); 12285 } 12286 12287 /* Emit code to initialize TARGET to values for individual fields VALS. */ 12288 12289 void 12290 sparc_expand_vector_init (rtx target, rtx vals) 12291 { 12292 const machine_mode mode = GET_MODE (target); 12293 const machine_mode inner_mode = GET_MODE_INNER (mode); 12294 const int n_elts = GET_MODE_NUNITS (mode); 12295 int i, n_var = 0; 12296 bool all_same = true; 12297 rtx mem; 12298 12299 for (i = 0; i < n_elts; i++) 12300 { 12301 rtx x = XVECEXP (vals, 0, i); 12302 if (!(CONST_INT_P (x) 12303 || GET_CODE (x) == CONST_DOUBLE 12304 || GET_CODE (x) == CONST_FIXED)) 12305 n_var++; 12306 12307 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 12308 all_same = false; 12309 } 12310 12311 if (n_var == 0) 12312 { 12313 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 12314 return; 12315 } 12316 12317 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)) 12318 { 12319 if (GET_MODE_SIZE (inner_mode) == 4) 12320 { 12321 emit_move_insn (gen_lowpart (SImode, target), 12322 gen_lowpart (SImode, XVECEXP (vals, 0, 0))); 12323 return; 12324 } 12325 else if (GET_MODE_SIZE (inner_mode) == 8) 12326 { 12327 emit_move_insn (gen_lowpart (DImode, target), 12328 gen_lowpart (DImode, XVECEXP (vals, 0, 0))); 12329 return; 12330 } 12331 } 12332 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode) 12333 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode)) 12334 { 12335 emit_move_insn (gen_highpart (word_mode, target), 12336 gen_lowpart (word_mode, XVECEXP (vals, 0, 0))); 12337 emit_move_insn (gen_lowpart (word_mode, target), 12338 gen_lowpart (word_mode, XVECEXP (vals, 0, 1))); 12339 return; 12340 } 12341 12342 if (all_same && GET_MODE_SIZE (mode) == 8) 12343 { 12344 if (TARGET_VIS2) 12345 { 12346 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode); 12347 return; 12348 } 12349 if (mode == V8QImode) 12350 { 12351 vector_init_fpmerge (target, XVECEXP (vals, 0, 0)); 12352 return; 12353 } 12354 if (mode == V4HImode) 12355 { 12356 vector_init_faligndata (target, XVECEXP (vals, 0, 0)); 12357 return; 12358 } 12359 } 12360 12361 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 12362 for (i = 0; i < n_elts; i++) 12363 emit_move_insn (adjust_address_nv (mem, inner_mode, 12364 i * GET_MODE_SIZE (inner_mode)), 12365 XVECEXP (vals, 0, i)); 12366 emit_move_insn (target, mem); 12367 } 12368 12369 /* Implement TARGET_SECONDARY_RELOAD. */ 12370 12371 static reg_class_t 12372 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 12373 machine_mode mode, secondary_reload_info *sri) 12374 { 12375 enum reg_class rclass = (enum reg_class) rclass_i; 12376 12377 sri->icode = CODE_FOR_nothing; 12378 sri->extra_cost = 0; 12379 12380 /* We need a temporary when loading/storing a HImode/QImode value 12381 between memory and the FPU registers. This can happen when combine puts 12382 a paradoxical subreg in a float/fix conversion insn. */ 12383 if (FP_REG_CLASS_P (rclass) 12384 && (mode == HImode || mode == QImode) 12385 && (GET_CODE (x) == MEM 12386 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 12387 && true_regnum (x) == -1))) 12388 return GENERAL_REGS; 12389 12390 /* On 32-bit we need a temporary when loading/storing a DFmode value 12391 between unaligned memory and the upper FPU registers. */ 12392 if (TARGET_ARCH32 12393 && rclass == EXTRA_FP_REGS 12394 && mode == DFmode 12395 && GET_CODE (x) == MEM 12396 && ! mem_min_alignment (x, 8)) 12397 return FP_REGS; 12398 12399 if (((TARGET_CM_MEDANY 12400 && symbolic_operand (x, mode)) 12401 || (TARGET_CM_EMBMEDANY 12402 && text_segment_operand (x, mode))) 12403 && ! flag_pic) 12404 { 12405 if (in_p) 12406 sri->icode = direct_optab_handler (reload_in_optab, mode); 12407 else 12408 sri->icode = direct_optab_handler (reload_out_optab, mode); 12409 return NO_REGS; 12410 } 12411 12412 if (TARGET_VIS3 && TARGET_ARCH32) 12413 { 12414 int regno = true_regnum (x); 12415 12416 /* When using VIS3 fp<-->int register moves, on 32-bit we have 12417 to move 8-byte values in 4-byte pieces. This only works via 12418 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to 12419 move between EXTRA_FP_REGS and GENERAL_REGS, we will need 12420 an FP_REGS intermediate move. */ 12421 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno)) 12422 || ((general_or_i64_p (rclass) 12423 || rclass == GENERAL_OR_FP_REGS) 12424 && SPARC_FP_REG_P (regno))) 12425 { 12426 sri->extra_cost = 2; 12427 return FP_REGS; 12428 } 12429 } 12430 12431 return NO_REGS; 12432 } 12433 12434 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into 12435 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ 12436 12437 bool 12438 sparc_expand_conditional_move (machine_mode mode, rtx *operands) 12439 { 12440 enum rtx_code rc = GET_CODE (operands[1]); 12441 machine_mode cmp_mode; 12442 rtx cc_reg, dst, cmp; 12443 12444 cmp = operands[1]; 12445 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) 12446 return false; 12447 12448 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) 12449 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); 12450 12451 cmp_mode = GET_MODE (XEXP (cmp, 0)); 12452 rc = GET_CODE (cmp); 12453 12454 dst = operands[0]; 12455 if (! rtx_equal_p (operands[2], dst) 12456 && ! rtx_equal_p (operands[3], dst)) 12457 { 12458 if (reg_overlap_mentioned_p (dst, cmp)) 12459 dst = gen_reg_rtx (mode); 12460 12461 emit_move_insn (dst, operands[3]); 12462 } 12463 else if (operands[2] == dst) 12464 { 12465 operands[2] = operands[3]; 12466 12467 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT) 12468 rc = reverse_condition_maybe_unordered (rc); 12469 else 12470 rc = reverse_condition (rc); 12471 } 12472 12473 if (XEXP (cmp, 1) == const0_rtx 12474 && GET_CODE (XEXP (cmp, 0)) == REG 12475 && cmp_mode == DImode 12476 && v9_regcmp_p (rc)) 12477 cc_reg = XEXP (cmp, 0); 12478 else 12479 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1)); 12480 12481 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx); 12482 12483 emit_insn (gen_rtx_SET (VOIDmode, dst, 12484 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst))); 12485 12486 if (dst != operands[0]) 12487 emit_move_insn (operands[0], dst); 12488 12489 return true; 12490 } 12491 12492 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] 12493 into OPERANDS[0] in MODE, depending on the outcome of the comparison of 12494 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. 12495 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine 12496 code to be used for the condition mask. */ 12497 12498 void 12499 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode) 12500 { 12501 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; 12502 enum rtx_code code = GET_CODE (operands[3]); 12503 12504 mask = gen_reg_rtx (Pmode); 12505 cop0 = operands[4]; 12506 cop1 = operands[5]; 12507 if (code == LT || code == GE) 12508 { 12509 rtx t; 12510 12511 code = swap_condition (code); 12512 t = cop0; cop0 = cop1; cop1 = t; 12513 } 12514 12515 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); 12516 12517 fcmp = gen_rtx_UNSPEC (Pmode, 12518 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), 12519 fcode); 12520 12521 cmask = gen_rtx_UNSPEC (DImode, 12522 gen_rtvec (2, mask, gsr), 12523 ccode); 12524 12525 bshuf = gen_rtx_UNSPEC (mode, 12526 gen_rtvec (3, operands[1], operands[2], gsr), 12527 UNSPEC_BSHUFFLE); 12528 12529 emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp)); 12530 emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask)); 12531 12532 emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf)); 12533 } 12534 12535 /* On sparc, any mode which naturally allocates into the float 12536 registers should return 4 here. */ 12537 12538 unsigned int 12539 sparc_regmode_natural_size (machine_mode mode) 12540 { 12541 int size = UNITS_PER_WORD; 12542 12543 if (TARGET_ARCH64) 12544 { 12545 enum mode_class mclass = GET_MODE_CLASS (mode); 12546 12547 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) 12548 size = 4; 12549 } 12550 12551 return size; 12552 } 12553 12554 /* Return TRUE if it is a good idea to tie two pseudo registers 12555 when one has mode MODE1 and one has mode MODE2. 12556 If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2, 12557 for any hard reg, then this must be FALSE for correct output. 12558 12559 For V9 we have to deal with the fact that only the lower 32 floating 12560 point registers are 32-bit addressable. */ 12561 12562 bool 12563 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2) 12564 { 12565 enum mode_class mclass1, mclass2; 12566 unsigned short size1, size2; 12567 12568 if (mode1 == mode2) 12569 return true; 12570 12571 mclass1 = GET_MODE_CLASS (mode1); 12572 mclass2 = GET_MODE_CLASS (mode2); 12573 if (mclass1 != mclass2) 12574 return false; 12575 12576 if (! TARGET_V9) 12577 return true; 12578 12579 /* Classes are the same and we are V9 so we have to deal with upper 12580 vs. lower floating point registers. If one of the modes is a 12581 4-byte mode, and the other is not, we have to mark them as not 12582 tieable because only the lower 32 floating point register are 12583 addressable 32-bits at a time. 12584 12585 We can't just test explicitly for SFmode, otherwise we won't 12586 cover the vector mode cases properly. */ 12587 12588 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT) 12589 return true; 12590 12591 size1 = GET_MODE_SIZE (mode1); 12592 size2 = GET_MODE_SIZE (mode2); 12593 if ((size1 > 4 && size2 == 4) 12594 || (size2 > 4 && size1 == 4)) 12595 return false; 12596 12597 return true; 12598 } 12599 12600 /* Implement TARGET_CSTORE_MODE. */ 12601 12602 static machine_mode 12603 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED) 12604 { 12605 return (TARGET_ARCH64 ? DImode : SImode); 12606 } 12607 12608 /* Return the compound expression made of T1 and T2. */ 12609 12610 static inline tree 12611 compound_expr (tree t1, tree t2) 12612 { 12613 return build2 (COMPOUND_EXPR, void_type_node, t1, t2); 12614 } 12615 12616 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 12617 12618 static void 12619 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 12620 { 12621 if (!TARGET_FPU) 12622 return; 12623 12624 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5; 12625 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23; 12626 12627 /* We generate the equivalent of feholdexcept (&fenv_var): 12628 12629 unsigned int fenv_var; 12630 __builtin_store_fsr (&fenv_var); 12631 12632 unsigned int tmp1_var; 12633 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask); 12634 12635 __builtin_load_fsr (&tmp1_var); */ 12636 12637 tree fenv_var = create_tmp_var (unsigned_type_node); 12638 mark_addressable (fenv_var); 12639 tree fenv_addr = build_fold_addr_expr (fenv_var); 12640 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR]; 12641 tree hold_stfsr = build_call_expr (stfsr, 1, fenv_addr); 12642 12643 tree tmp1_var = create_tmp_var (unsigned_type_node); 12644 mark_addressable (tmp1_var); 12645 tree masked_fenv_var 12646 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 12647 build_int_cst (unsigned_type_node, 12648 ~(accrued_exception_mask | trap_enable_mask))); 12649 tree hold_mask 12650 = build2 (MODIFY_EXPR, void_type_node, tmp1_var, masked_fenv_var); 12651 12652 tree tmp1_addr = build_fold_addr_expr (tmp1_var); 12653 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR]; 12654 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr); 12655 12656 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr); 12657 12658 /* We reload the value of tmp1_var to clear the exceptions: 12659 12660 __builtin_load_fsr (&tmp1_var); */ 12661 12662 *clear = build_call_expr (ldfsr, 1, tmp1_addr); 12663 12664 /* We generate the equivalent of feupdateenv (&fenv_var): 12665 12666 unsigned int tmp2_var; 12667 __builtin_store_fsr (&tmp2_var); 12668 12669 __builtin_load_fsr (&fenv_var); 12670 12671 if (SPARC_LOW_FE_EXCEPT_VALUES) 12672 tmp2_var >>= 5; 12673 __atomic_feraiseexcept ((int) tmp2_var); */ 12674 12675 tree tmp2_var = create_tmp_var (unsigned_type_node); 12676 mark_addressable (tmp2_var); 12677 tree tmp3_addr = build_fold_addr_expr (tmp2_var); 12678 tree update_stfsr = build_call_expr (stfsr, 1, tmp3_addr); 12679 12680 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr); 12681 12682 tree atomic_feraiseexcept 12683 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 12684 tree update_call 12685 = build_call_expr (atomic_feraiseexcept, 1, 12686 fold_convert (integer_type_node, tmp2_var)); 12687 12688 if (SPARC_LOW_FE_EXCEPT_VALUES) 12689 { 12690 tree shifted_tmp2_var 12691 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var, 12692 build_int_cst (unsigned_type_node, 5)); 12693 tree update_shift 12694 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var); 12695 update_call = compound_expr (update_shift, update_call); 12696 } 12697 12698 *update 12699 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call); 12700 } 12701 12702 #include "gt-sparc.h" 12703