1 /* Subroutines for insn-output.c for SPARC. 2 Copyright (C) 1987-2019 Free Software Foundation, Inc. 3 Contributed by Michael Tiemann (tiemann@cygnus.com) 4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, 5 at Cygnus Support. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #define IN_TARGET_CODE 1 24 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "backend.h" 29 #include "target.h" 30 #include "rtl.h" 31 #include "tree.h" 32 #include "memmodel.h" 33 #include "gimple.h" 34 #include "df.h" 35 #include "tm_p.h" 36 #include "stringpool.h" 37 #include "attribs.h" 38 #include "expmed.h" 39 #include "optabs.h" 40 #include "regs.h" 41 #include "emit-rtl.h" 42 #include "recog.h" 43 #include "diagnostic-core.h" 44 #include "alias.h" 45 #include "fold-const.h" 46 #include "stor-layout.h" 47 #include "calls.h" 48 #include "varasm.h" 49 #include "output.h" 50 #include "insn-attr.h" 51 #include "explow.h" 52 #include "expr.h" 53 #include "debug.h" 54 #include "cfgrtl.h" 55 #include "common/common-target.h" 56 #include "gimplify.h" 57 #include "langhooks.h" 58 #include "reload.h" 59 #include "params.h" 60 #include "tree-pass.h" 61 #include "context.h" 62 #include "builtins.h" 63 #include "tree-vector-builder.h" 64 65 /* This file should be included last. */ 66 #include "target-def.h" 67 68 /* Processor costs */ 69 70 struct processor_costs { 71 /* Integer load */ 72 const int int_load; 73 74 /* Integer signed load */ 75 const int int_sload; 76 77 /* Integer zeroed load */ 78 const int int_zload; 79 80 /* Float load */ 81 const int float_load; 82 83 /* fmov, fneg, fabs */ 84 const int float_move; 85 86 /* fadd, fsub */ 87 const int float_plusminus; 88 89 /* fcmp */ 90 const int float_cmp; 91 92 /* fmov, fmovr */ 93 const int float_cmove; 94 95 /* fmul */ 96 const int float_mul; 97 98 /* fdivs */ 99 const int float_div_sf; 100 101 /* fdivd */ 102 const int float_div_df; 103 104 /* fsqrts */ 105 const int float_sqrt_sf; 106 107 /* fsqrtd */ 108 const int float_sqrt_df; 109 110 /* umul/smul */ 111 const int int_mul; 112 113 /* mulX */ 114 const int int_mulX; 115 116 /* integer multiply cost for each bit set past the most 117 significant 3, so the formula for multiply cost becomes: 118 119 if (rs1 < 0) 120 highest_bit = highest_clear_bit(rs1); 121 else 122 highest_bit = highest_set_bit(rs1); 123 if (highest_bit < 3) 124 highest_bit = 3; 125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); 126 127 A value of zero indicates that the multiply costs is fixed, 128 and not variable. */ 129 const int int_mul_bit_factor; 130 131 /* udiv/sdiv */ 132 const int int_div; 133 134 /* divX */ 135 const int int_divX; 136 137 /* movcc, movr */ 138 const int int_cmove; 139 140 /* penalty for shifts, due to scheduling rules etc. */ 141 const int shift_penalty; 142 143 /* cost of a (predictable) branch. */ 144 const int branch_cost; 145 }; 146 147 static const 148 struct processor_costs cypress_costs = { 149 COSTS_N_INSNS (2), /* int load */ 150 COSTS_N_INSNS (2), /* int signed load */ 151 COSTS_N_INSNS (2), /* int zeroed load */ 152 COSTS_N_INSNS (2), /* float load */ 153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ 154 COSTS_N_INSNS (5), /* fadd, fsub */ 155 COSTS_N_INSNS (1), /* fcmp */ 156 COSTS_N_INSNS (1), /* fmov, fmovr */ 157 COSTS_N_INSNS (7), /* fmul */ 158 COSTS_N_INSNS (37), /* fdivs */ 159 COSTS_N_INSNS (37), /* fdivd */ 160 COSTS_N_INSNS (63), /* fsqrts */ 161 COSTS_N_INSNS (63), /* fsqrtd */ 162 COSTS_N_INSNS (1), /* imul */ 163 COSTS_N_INSNS (1), /* imulX */ 164 0, /* imul bit factor */ 165 COSTS_N_INSNS (1), /* idiv */ 166 COSTS_N_INSNS (1), /* idivX */ 167 COSTS_N_INSNS (1), /* movcc/movr */ 168 0, /* shift penalty */ 169 3 /* branch cost */ 170 }; 171 172 static const 173 struct processor_costs supersparc_costs = { 174 COSTS_N_INSNS (1), /* int load */ 175 COSTS_N_INSNS (1), /* int signed load */ 176 COSTS_N_INSNS (1), /* int zeroed load */ 177 COSTS_N_INSNS (0), /* float load */ 178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 179 COSTS_N_INSNS (3), /* fadd, fsub */ 180 COSTS_N_INSNS (3), /* fcmp */ 181 COSTS_N_INSNS (1), /* fmov, fmovr */ 182 COSTS_N_INSNS (3), /* fmul */ 183 COSTS_N_INSNS (6), /* fdivs */ 184 COSTS_N_INSNS (9), /* fdivd */ 185 COSTS_N_INSNS (12), /* fsqrts */ 186 COSTS_N_INSNS (12), /* fsqrtd */ 187 COSTS_N_INSNS (4), /* imul */ 188 COSTS_N_INSNS (4), /* imulX */ 189 0, /* imul bit factor */ 190 COSTS_N_INSNS (4), /* idiv */ 191 COSTS_N_INSNS (4), /* idivX */ 192 COSTS_N_INSNS (1), /* movcc/movr */ 193 1, /* shift penalty */ 194 3 /* branch cost */ 195 }; 196 197 static const 198 struct processor_costs hypersparc_costs = { 199 COSTS_N_INSNS (1), /* int load */ 200 COSTS_N_INSNS (1), /* int signed load */ 201 COSTS_N_INSNS (1), /* int zeroed load */ 202 COSTS_N_INSNS (1), /* float load */ 203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 204 COSTS_N_INSNS (1), /* fadd, fsub */ 205 COSTS_N_INSNS (1), /* fcmp */ 206 COSTS_N_INSNS (1), /* fmov, fmovr */ 207 COSTS_N_INSNS (1), /* fmul */ 208 COSTS_N_INSNS (8), /* fdivs */ 209 COSTS_N_INSNS (12), /* fdivd */ 210 COSTS_N_INSNS (17), /* fsqrts */ 211 COSTS_N_INSNS (17), /* fsqrtd */ 212 COSTS_N_INSNS (17), /* imul */ 213 COSTS_N_INSNS (17), /* imulX */ 214 0, /* imul bit factor */ 215 COSTS_N_INSNS (17), /* idiv */ 216 COSTS_N_INSNS (17), /* idivX */ 217 COSTS_N_INSNS (1), /* movcc/movr */ 218 0, /* shift penalty */ 219 3 /* branch cost */ 220 }; 221 222 static const 223 struct processor_costs leon_costs = { 224 COSTS_N_INSNS (1), /* int load */ 225 COSTS_N_INSNS (1), /* int signed load */ 226 COSTS_N_INSNS (1), /* int zeroed load */ 227 COSTS_N_INSNS (1), /* float load */ 228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 229 COSTS_N_INSNS (1), /* fadd, fsub */ 230 COSTS_N_INSNS (1), /* fcmp */ 231 COSTS_N_INSNS (1), /* fmov, fmovr */ 232 COSTS_N_INSNS (1), /* fmul */ 233 COSTS_N_INSNS (15), /* fdivs */ 234 COSTS_N_INSNS (15), /* fdivd */ 235 COSTS_N_INSNS (23), /* fsqrts */ 236 COSTS_N_INSNS (23), /* fsqrtd */ 237 COSTS_N_INSNS (5), /* imul */ 238 COSTS_N_INSNS (5), /* imulX */ 239 0, /* imul bit factor */ 240 COSTS_N_INSNS (5), /* idiv */ 241 COSTS_N_INSNS (5), /* idivX */ 242 COSTS_N_INSNS (1), /* movcc/movr */ 243 0, /* shift penalty */ 244 3 /* branch cost */ 245 }; 246 247 static const 248 struct processor_costs leon3_costs = { 249 COSTS_N_INSNS (1), /* int load */ 250 COSTS_N_INSNS (1), /* int signed load */ 251 COSTS_N_INSNS (1), /* int zeroed load */ 252 COSTS_N_INSNS (1), /* float load */ 253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 254 COSTS_N_INSNS (1), /* fadd, fsub */ 255 COSTS_N_INSNS (1), /* fcmp */ 256 COSTS_N_INSNS (1), /* fmov, fmovr */ 257 COSTS_N_INSNS (1), /* fmul */ 258 COSTS_N_INSNS (14), /* fdivs */ 259 COSTS_N_INSNS (15), /* fdivd */ 260 COSTS_N_INSNS (22), /* fsqrts */ 261 COSTS_N_INSNS (23), /* fsqrtd */ 262 COSTS_N_INSNS (5), /* imul */ 263 COSTS_N_INSNS (5), /* imulX */ 264 0, /* imul bit factor */ 265 COSTS_N_INSNS (35), /* idiv */ 266 COSTS_N_INSNS (35), /* idivX */ 267 COSTS_N_INSNS (1), /* movcc/movr */ 268 0, /* shift penalty */ 269 3 /* branch cost */ 270 }; 271 272 static const 273 struct processor_costs sparclet_costs = { 274 COSTS_N_INSNS (3), /* int load */ 275 COSTS_N_INSNS (3), /* int signed load */ 276 COSTS_N_INSNS (1), /* int zeroed load */ 277 COSTS_N_INSNS (1), /* float load */ 278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 279 COSTS_N_INSNS (1), /* fadd, fsub */ 280 COSTS_N_INSNS (1), /* fcmp */ 281 COSTS_N_INSNS (1), /* fmov, fmovr */ 282 COSTS_N_INSNS (1), /* fmul */ 283 COSTS_N_INSNS (1), /* fdivs */ 284 COSTS_N_INSNS (1), /* fdivd */ 285 COSTS_N_INSNS (1), /* fsqrts */ 286 COSTS_N_INSNS (1), /* fsqrtd */ 287 COSTS_N_INSNS (5), /* imul */ 288 COSTS_N_INSNS (5), /* imulX */ 289 0, /* imul bit factor */ 290 COSTS_N_INSNS (5), /* idiv */ 291 COSTS_N_INSNS (5), /* idivX */ 292 COSTS_N_INSNS (1), /* movcc/movr */ 293 0, /* shift penalty */ 294 3 /* branch cost */ 295 }; 296 297 static const 298 struct processor_costs ultrasparc_costs = { 299 COSTS_N_INSNS (2), /* int load */ 300 COSTS_N_INSNS (3), /* int signed load */ 301 COSTS_N_INSNS (2), /* int zeroed load */ 302 COSTS_N_INSNS (2), /* float load */ 303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 304 COSTS_N_INSNS (4), /* fadd, fsub */ 305 COSTS_N_INSNS (1), /* fcmp */ 306 COSTS_N_INSNS (2), /* fmov, fmovr */ 307 COSTS_N_INSNS (4), /* fmul */ 308 COSTS_N_INSNS (13), /* fdivs */ 309 COSTS_N_INSNS (23), /* fdivd */ 310 COSTS_N_INSNS (13), /* fsqrts */ 311 COSTS_N_INSNS (23), /* fsqrtd */ 312 COSTS_N_INSNS (4), /* imul */ 313 COSTS_N_INSNS (4), /* imulX */ 314 2, /* imul bit factor */ 315 COSTS_N_INSNS (37), /* idiv */ 316 COSTS_N_INSNS (68), /* idivX */ 317 COSTS_N_INSNS (2), /* movcc/movr */ 318 2, /* shift penalty */ 319 2 /* branch cost */ 320 }; 321 322 static const 323 struct processor_costs ultrasparc3_costs = { 324 COSTS_N_INSNS (2), /* int load */ 325 COSTS_N_INSNS (3), /* int signed load */ 326 COSTS_N_INSNS (3), /* int zeroed load */ 327 COSTS_N_INSNS (2), /* float load */ 328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 329 COSTS_N_INSNS (4), /* fadd, fsub */ 330 COSTS_N_INSNS (5), /* fcmp */ 331 COSTS_N_INSNS (3), /* fmov, fmovr */ 332 COSTS_N_INSNS (4), /* fmul */ 333 COSTS_N_INSNS (17), /* fdivs */ 334 COSTS_N_INSNS (20), /* fdivd */ 335 COSTS_N_INSNS (20), /* fsqrts */ 336 COSTS_N_INSNS (29), /* fsqrtd */ 337 COSTS_N_INSNS (6), /* imul */ 338 COSTS_N_INSNS (6), /* imulX */ 339 0, /* imul bit factor */ 340 COSTS_N_INSNS (40), /* idiv */ 341 COSTS_N_INSNS (71), /* idivX */ 342 COSTS_N_INSNS (2), /* movcc/movr */ 343 0, /* shift penalty */ 344 2 /* branch cost */ 345 }; 346 347 static const 348 struct processor_costs niagara_costs = { 349 COSTS_N_INSNS (3), /* int load */ 350 COSTS_N_INSNS (3), /* int signed load */ 351 COSTS_N_INSNS (3), /* int zeroed load */ 352 COSTS_N_INSNS (9), /* float load */ 353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ 354 COSTS_N_INSNS (8), /* fadd, fsub */ 355 COSTS_N_INSNS (26), /* fcmp */ 356 COSTS_N_INSNS (8), /* fmov, fmovr */ 357 COSTS_N_INSNS (29), /* fmul */ 358 COSTS_N_INSNS (54), /* fdivs */ 359 COSTS_N_INSNS (83), /* fdivd */ 360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ 361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ 362 COSTS_N_INSNS (11), /* imul */ 363 COSTS_N_INSNS (11), /* imulX */ 364 0, /* imul bit factor */ 365 COSTS_N_INSNS (72), /* idiv */ 366 COSTS_N_INSNS (72), /* idivX */ 367 COSTS_N_INSNS (1), /* movcc/movr */ 368 0, /* shift penalty */ 369 4 /* branch cost */ 370 }; 371 372 static const 373 struct processor_costs niagara2_costs = { 374 COSTS_N_INSNS (3), /* int load */ 375 COSTS_N_INSNS (3), /* int signed load */ 376 COSTS_N_INSNS (3), /* int zeroed load */ 377 COSTS_N_INSNS (3), /* float load */ 378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ 379 COSTS_N_INSNS (6), /* fadd, fsub */ 380 COSTS_N_INSNS (6), /* fcmp */ 381 COSTS_N_INSNS (6), /* fmov, fmovr */ 382 COSTS_N_INSNS (6), /* fmul */ 383 COSTS_N_INSNS (19), /* fdivs */ 384 COSTS_N_INSNS (33), /* fdivd */ 385 COSTS_N_INSNS (19), /* fsqrts */ 386 COSTS_N_INSNS (33), /* fsqrtd */ 387 COSTS_N_INSNS (5), /* imul */ 388 COSTS_N_INSNS (5), /* imulX */ 389 0, /* imul bit factor */ 390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ 391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ 392 COSTS_N_INSNS (1), /* movcc/movr */ 393 0, /* shift penalty */ 394 5 /* branch cost */ 395 }; 396 397 static const 398 struct processor_costs niagara3_costs = { 399 COSTS_N_INSNS (3), /* int load */ 400 COSTS_N_INSNS (3), /* int signed load */ 401 COSTS_N_INSNS (3), /* int zeroed load */ 402 COSTS_N_INSNS (3), /* float load */ 403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 404 COSTS_N_INSNS (9), /* fadd, fsub */ 405 COSTS_N_INSNS (9), /* fcmp */ 406 COSTS_N_INSNS (9), /* fmov, fmovr */ 407 COSTS_N_INSNS (9), /* fmul */ 408 COSTS_N_INSNS (23), /* fdivs */ 409 COSTS_N_INSNS (37), /* fdivd */ 410 COSTS_N_INSNS (23), /* fsqrts */ 411 COSTS_N_INSNS (37), /* fsqrtd */ 412 COSTS_N_INSNS (9), /* imul */ 413 COSTS_N_INSNS (9), /* imulX */ 414 0, /* imul bit factor */ 415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ 416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ 417 COSTS_N_INSNS (1), /* movcc/movr */ 418 0, /* shift penalty */ 419 5 /* branch cost */ 420 }; 421 422 static const 423 struct processor_costs niagara4_costs = { 424 COSTS_N_INSNS (5), /* int load */ 425 COSTS_N_INSNS (5), /* int signed load */ 426 COSTS_N_INSNS (5), /* int zeroed load */ 427 COSTS_N_INSNS (5), /* float load */ 428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 429 COSTS_N_INSNS (11), /* fadd, fsub */ 430 COSTS_N_INSNS (11), /* fcmp */ 431 COSTS_N_INSNS (11), /* fmov, fmovr */ 432 COSTS_N_INSNS (11), /* fmul */ 433 COSTS_N_INSNS (24), /* fdivs */ 434 COSTS_N_INSNS (37), /* fdivd */ 435 COSTS_N_INSNS (24), /* fsqrts */ 436 COSTS_N_INSNS (37), /* fsqrtd */ 437 COSTS_N_INSNS (12), /* imul */ 438 COSTS_N_INSNS (12), /* imulX */ 439 0, /* imul bit factor */ 440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ 441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 442 COSTS_N_INSNS (1), /* movcc/movr */ 443 0, /* shift penalty */ 444 2 /* branch cost */ 445 }; 446 447 static const 448 struct processor_costs niagara7_costs = { 449 COSTS_N_INSNS (5), /* int load */ 450 COSTS_N_INSNS (5), /* int signed load */ 451 COSTS_N_INSNS (5), /* int zeroed load */ 452 COSTS_N_INSNS (5), /* float load */ 453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 454 COSTS_N_INSNS (11), /* fadd, fsub */ 455 COSTS_N_INSNS (11), /* fcmp */ 456 COSTS_N_INSNS (11), /* fmov, fmovr */ 457 COSTS_N_INSNS (11), /* fmul */ 458 COSTS_N_INSNS (24), /* fdivs */ 459 COSTS_N_INSNS (37), /* fdivd */ 460 COSTS_N_INSNS (24), /* fsqrts */ 461 COSTS_N_INSNS (37), /* fsqrtd */ 462 COSTS_N_INSNS (12), /* imul */ 463 COSTS_N_INSNS (12), /* imulX */ 464 0, /* imul bit factor */ 465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */ 466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 467 COSTS_N_INSNS (1), /* movcc/movr */ 468 0, /* shift penalty */ 469 1 /* branch cost */ 470 }; 471 472 static const 473 struct processor_costs m8_costs = { 474 COSTS_N_INSNS (3), /* int load */ 475 COSTS_N_INSNS (3), /* int signed load */ 476 COSTS_N_INSNS (3), /* int zeroed load */ 477 COSTS_N_INSNS (3), /* float load */ 478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 479 COSTS_N_INSNS (9), /* fadd, fsub */ 480 COSTS_N_INSNS (9), /* fcmp */ 481 COSTS_N_INSNS (9), /* fmov, fmovr */ 482 COSTS_N_INSNS (9), /* fmul */ 483 COSTS_N_INSNS (26), /* fdivs */ 484 COSTS_N_INSNS (30), /* fdivd */ 485 COSTS_N_INSNS (33), /* fsqrts */ 486 COSTS_N_INSNS (41), /* fsqrtd */ 487 COSTS_N_INSNS (12), /* imul */ 488 COSTS_N_INSNS (10), /* imulX */ 489 0, /* imul bit factor */ 490 COSTS_N_INSNS (57), /* udiv/sdiv */ 491 COSTS_N_INSNS (30), /* udivx/sdivx */ 492 COSTS_N_INSNS (1), /* movcc/movr */ 493 0, /* shift penalty */ 494 1 /* branch cost */ 495 }; 496 497 static const struct processor_costs *sparc_costs = &cypress_costs; 498 499 #ifdef HAVE_AS_RELAX_OPTION 500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use 501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. 502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if 503 somebody does not branch between the sethi and jmp. */ 504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1 505 #else 506 #define LEAF_SIBCALL_SLOT_RESERVED_P \ 507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) 508 #endif 509 510 /* Vector to say how input registers are mapped to output registers. 511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to 512 eliminate it. You must use -fomit-frame-pointer to get that. */ 513 char leaf_reg_remap[] = 514 { 0, 1, 2, 3, 4, 5, 6, 7, 515 -1, -1, -1, -1, -1, -1, 14, -1, 516 -1, -1, -1, -1, -1, -1, -1, -1, 517 8, 9, 10, 11, 12, 13, -1, 15, 518 519 32, 33, 34, 35, 36, 37, 38, 39, 520 40, 41, 42, 43, 44, 45, 46, 47, 521 48, 49, 50, 51, 52, 53, 54, 55, 522 56, 57, 58, 59, 60, 61, 62, 63, 523 64, 65, 66, 67, 68, 69, 70, 71, 524 72, 73, 74, 75, 76, 77, 78, 79, 525 80, 81, 82, 83, 84, 85, 86, 87, 526 88, 89, 90, 91, 92, 93, 94, 95, 527 96, 97, 98, 99, 100, 101, 102}; 528 529 /* Vector, indexed by hard register number, which contains 1 530 for a register that is allowable in a candidate for leaf 531 function treatment. */ 532 char sparc_leaf_regs[] = 533 { 1, 1, 1, 1, 1, 1, 1, 1, 534 0, 0, 0, 0, 0, 0, 1, 0, 535 0, 0, 0, 0, 0, 0, 0, 0, 536 1, 1, 1, 1, 1, 1, 0, 1, 537 1, 1, 1, 1, 1, 1, 1, 1, 538 1, 1, 1, 1, 1, 1, 1, 1, 539 1, 1, 1, 1, 1, 1, 1, 1, 540 1, 1, 1, 1, 1, 1, 1, 1, 541 1, 1, 1, 1, 1, 1, 1, 1, 542 1, 1, 1, 1, 1, 1, 1, 1, 543 1, 1, 1, 1, 1, 1, 1, 1, 544 1, 1, 1, 1, 1, 1, 1, 1, 545 1, 1, 1, 1, 1, 1, 1}; 546 547 struct GTY(()) machine_function 548 { 549 /* Size of the frame of the function. */ 550 HOST_WIDE_INT frame_size; 551 552 /* Size of the frame of the function minus the register window save area 553 and the outgoing argument area. */ 554 HOST_WIDE_INT apparent_frame_size; 555 556 /* Register we pretend the frame pointer is allocated to. Normally, this 557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We 558 record "offset" separately as it may be too big for (reg + disp). */ 559 rtx frame_base_reg; 560 HOST_WIDE_INT frame_base_offset; 561 562 /* Number of global or FP registers to be saved (as 4-byte quantities). */ 563 int n_global_fp_regs; 564 565 /* True if the current function is leaf and uses only leaf regs, 566 so that the SPARC leaf function optimization can be applied. 567 Private version of crtl->uses_only_leaf_regs, see 568 sparc_expand_prologue for the rationale. */ 569 int leaf_function_p; 570 571 /* True if the prologue saves local or in registers. */ 572 bool save_local_in_regs_p; 573 574 /* True if the data calculated by sparc_expand_prologue are valid. */ 575 bool prologue_data_valid_p; 576 }; 577 578 #define sparc_frame_size cfun->machine->frame_size 579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size 580 #define sparc_frame_base_reg cfun->machine->frame_base_reg 581 #define sparc_frame_base_offset cfun->machine->frame_base_offset 582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs 583 #define sparc_leaf_function_p cfun->machine->leaf_function_p 584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p 585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p 586 587 /* 1 if the next opcode is to be specially indented. */ 588 int sparc_indent_opcode = 0; 589 590 static void sparc_option_override (void); 591 static void sparc_init_modes (void); 592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, 593 const_tree, bool, bool, int *, int *); 594 595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 597 598 static void sparc_emit_set_const32 (rtx, rtx); 599 static void sparc_emit_set_const64 (rtx, rtx); 600 static void sparc_output_addr_vec (rtx); 601 static void sparc_output_addr_diff_vec (rtx); 602 static void sparc_output_deferred_case_vectors (void); 603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool); 604 static bool sparc_legitimate_constant_p (machine_mode, rtx); 605 static rtx sparc_builtin_saveregs (void); 606 static int epilogue_renumber (rtx *, int); 607 static bool sparc_assemble_integer (rtx, unsigned int, int); 608 static int set_extends (rtx_insn *); 609 static void sparc_asm_function_prologue (FILE *); 610 static void sparc_asm_function_epilogue (FILE *); 611 #ifdef TARGET_SOLARIS 612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, 613 tree) ATTRIBUTE_UNUSED; 614 #endif 615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 616 static int sparc_issue_rate (void); 617 static void sparc_sched_init (FILE *, int, int); 618 static int sparc_use_sched_lookahead (void); 619 620 static void emit_soft_tfmode_libcall (const char *, int, rtx *); 621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *); 622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *); 623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); 624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *); 625 626 static bool sparc_function_ok_for_sibcall (tree, tree); 627 static void sparc_init_libfuncs (void); 628 static void sparc_init_builtins (void); 629 static void sparc_fpu_init_builtins (void); 630 static void sparc_vis_init_builtins (void); 631 static tree sparc_builtin_decl (unsigned, bool); 632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int); 633 static tree sparc_fold_builtin (tree, int, tree *, bool); 634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 635 HOST_WIDE_INT, tree); 636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, 637 HOST_WIDE_INT, const_tree); 638 static struct machine_function * sparc_init_machine_status (void); 639 static bool sparc_cannot_force_const_mem (machine_mode, rtx); 640 static rtx sparc_tls_get_addr (void); 641 static rtx sparc_tls_got (void); 642 static int sparc_register_move_cost (machine_mode, 643 reg_class_t, reg_class_t); 644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool); 645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode, 646 int *, const_tree, int); 647 static bool sparc_strict_argument_naming (cumulative_args_t); 648 static void sparc_va_start (tree, rtx); 649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 650 static bool sparc_vector_mode_supported_p (machine_mode); 651 static bool sparc_tls_referenced_p (rtx); 652 static rtx sparc_legitimize_tls_address (rtx); 653 static rtx sparc_legitimize_pic_address (rtx, rtx); 654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode); 655 static rtx sparc_delegitimize_address (rtx); 656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); 657 static bool sparc_pass_by_reference (cumulative_args_t, 658 machine_mode, const_tree, bool); 659 static void sparc_function_arg_advance (cumulative_args_t, 660 machine_mode, const_tree, bool); 661 static rtx sparc_function_arg_1 (cumulative_args_t, 662 machine_mode, const_tree, bool, bool); 663 static rtx sparc_function_arg (cumulative_args_t, 664 machine_mode, const_tree, bool); 665 static rtx sparc_function_incoming_arg (cumulative_args_t, 666 machine_mode, const_tree, bool); 667 static pad_direction sparc_function_arg_padding (machine_mode, const_tree); 668 static unsigned int sparc_function_arg_boundary (machine_mode, 669 const_tree); 670 static int sparc_arg_partial_bytes (cumulative_args_t, 671 machine_mode, tree, bool); 672 static bool sparc_return_in_memory (const_tree, const_tree); 673 static rtx sparc_struct_value_rtx (tree, int); 674 static rtx sparc_function_value (const_tree, const_tree, bool); 675 static rtx sparc_libcall_value (machine_mode, const_rtx); 676 static bool sparc_function_value_regno_p (const unsigned int); 677 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void); 678 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 679 static void sparc_file_end (void); 680 static bool sparc_frame_pointer_required (void); 681 static bool sparc_can_eliminate (const int, const int); 682 static rtx sparc_builtin_setjmp_frame_value (void); 683 static void sparc_conditional_register_usage (void); 684 static bool sparc_use_pseudo_pic_reg (void); 685 static void sparc_init_pic_reg (void); 686 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 687 static const char *sparc_mangle_type (const_tree); 688 #endif 689 static void sparc_trampoline_init (rtx, tree, rtx); 690 static machine_mode sparc_preferred_simd_mode (scalar_mode); 691 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); 692 static bool sparc_lra_p (void); 693 static bool sparc_print_operand_punct_valid_p (unsigned char); 694 static void sparc_print_operand (FILE *, rtx, int); 695 static void sparc_print_operand_address (FILE *, machine_mode, rtx); 696 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, 697 machine_mode, 698 secondary_reload_info *); 699 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t, 700 reg_class_t); 701 static machine_mode sparc_secondary_memory_needed_mode (machine_mode); 702 static scalar_int_mode sparc_cstore_mode (enum insn_code icode); 703 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *); 704 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *); 705 static unsigned int sparc_min_arithmetic_precision (void); 706 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode); 707 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode); 708 static bool sparc_modes_tieable_p (machine_mode, machine_mode); 709 static bool sparc_can_change_mode_class (machine_mode, machine_mode, 710 reg_class_t); 711 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); 712 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, 713 const vec_perm_indices &); 714 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *); 715 716 #ifdef SUBTARGET_ATTRIBUTE_TABLE 717 /* Table of valid machine attributes. */ 718 static const struct attribute_spec sparc_attribute_table[] = 719 { 720 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 721 do_diagnostic, handler, exclude } */ 722 SUBTARGET_ATTRIBUTE_TABLE, 723 { NULL, 0, 0, false, false, false, false, NULL, NULL } 724 }; 725 #endif 726 727 char sparc_hard_reg_printed[8]; 728 729 /* Initialize the GCC target structure. */ 730 731 /* The default is to use .half rather than .short for aligned HI objects. */ 732 #undef TARGET_ASM_ALIGNED_HI_OP 733 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 734 735 #undef TARGET_ASM_UNALIGNED_HI_OP 736 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" 737 #undef TARGET_ASM_UNALIGNED_SI_OP 738 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" 739 #undef TARGET_ASM_UNALIGNED_DI_OP 740 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" 741 742 /* The target hook has to handle DI-mode values. */ 743 #undef TARGET_ASM_INTEGER 744 #define TARGET_ASM_INTEGER sparc_assemble_integer 745 746 #undef TARGET_ASM_FUNCTION_PROLOGUE 747 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue 748 #undef TARGET_ASM_FUNCTION_EPILOGUE 749 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue 750 751 #undef TARGET_SCHED_ADJUST_COST 752 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost 753 #undef TARGET_SCHED_ISSUE_RATE 754 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate 755 #undef TARGET_SCHED_INIT 756 #define TARGET_SCHED_INIT sparc_sched_init 757 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 758 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead 759 760 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 761 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall 762 763 #undef TARGET_INIT_LIBFUNCS 764 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs 765 766 #undef TARGET_LEGITIMIZE_ADDRESS 767 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address 768 #undef TARGET_DELEGITIMIZE_ADDRESS 769 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address 770 #undef TARGET_MODE_DEPENDENT_ADDRESS_P 771 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p 772 773 #undef TARGET_INIT_BUILTINS 774 #define TARGET_INIT_BUILTINS sparc_init_builtins 775 #undef TARGET_BUILTIN_DECL 776 #define TARGET_BUILTIN_DECL sparc_builtin_decl 777 #undef TARGET_EXPAND_BUILTIN 778 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin 779 #undef TARGET_FOLD_BUILTIN 780 #define TARGET_FOLD_BUILTIN sparc_fold_builtin 781 782 #if TARGET_TLS 783 #undef TARGET_HAVE_TLS 784 #define TARGET_HAVE_TLS true 785 #endif 786 787 #undef TARGET_CANNOT_FORCE_CONST_MEM 788 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem 789 790 #undef TARGET_ASM_OUTPUT_MI_THUNK 791 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk 792 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 793 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk 794 795 #undef TARGET_RTX_COSTS 796 #define TARGET_RTX_COSTS sparc_rtx_costs 797 #undef TARGET_ADDRESS_COST 798 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 799 #undef TARGET_REGISTER_MOVE_COST 800 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost 801 802 #undef TARGET_PROMOTE_FUNCTION_MODE 803 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode 804 #undef TARGET_STRICT_ARGUMENT_NAMING 805 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming 806 807 #undef TARGET_MUST_PASS_IN_STACK 808 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 809 #undef TARGET_PASS_BY_REFERENCE 810 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference 811 #undef TARGET_ARG_PARTIAL_BYTES 812 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes 813 #undef TARGET_FUNCTION_ARG_ADVANCE 814 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance 815 #undef TARGET_FUNCTION_ARG 816 #define TARGET_FUNCTION_ARG sparc_function_arg 817 #undef TARGET_FUNCTION_INCOMING_ARG 818 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg 819 #undef TARGET_FUNCTION_ARG_PADDING 820 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding 821 #undef TARGET_FUNCTION_ARG_BOUNDARY 822 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary 823 824 #undef TARGET_RETURN_IN_MEMORY 825 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory 826 #undef TARGET_STRUCT_VALUE_RTX 827 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx 828 #undef TARGET_FUNCTION_VALUE 829 #define TARGET_FUNCTION_VALUE sparc_function_value 830 #undef TARGET_LIBCALL_VALUE 831 #define TARGET_LIBCALL_VALUE sparc_libcall_value 832 #undef TARGET_FUNCTION_VALUE_REGNO_P 833 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p 834 835 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 836 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs 837 838 #undef TARGET_ASAN_SHADOW_OFFSET 839 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset 840 841 #undef TARGET_EXPAND_BUILTIN_VA_START 842 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start 843 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 844 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg 845 846 #undef TARGET_VECTOR_MODE_SUPPORTED_P 847 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p 848 849 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 850 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode 851 852 #ifdef SUBTARGET_INSERT_ATTRIBUTES 853 #undef TARGET_INSERT_ATTRIBUTES 854 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 855 #endif 856 857 #ifdef SUBTARGET_ATTRIBUTE_TABLE 858 #undef TARGET_ATTRIBUTE_TABLE 859 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table 860 #endif 861 862 #undef TARGET_OPTION_OVERRIDE 863 #define TARGET_OPTION_OVERRIDE sparc_option_override 864 865 #ifdef TARGET_THREAD_SSP_OFFSET 866 #undef TARGET_STACK_PROTECT_GUARD 867 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null 868 #endif 869 870 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) 871 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 872 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel 873 #endif 874 875 #undef TARGET_ASM_FILE_END 876 #define TARGET_ASM_FILE_END sparc_file_end 877 878 #undef TARGET_FRAME_POINTER_REQUIRED 879 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required 880 881 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 882 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value 883 884 #undef TARGET_CAN_ELIMINATE 885 #define TARGET_CAN_ELIMINATE sparc_can_eliminate 886 887 #undef TARGET_PREFERRED_RELOAD_CLASS 888 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class 889 890 #undef TARGET_SECONDARY_RELOAD 891 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload 892 #undef TARGET_SECONDARY_MEMORY_NEEDED 893 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed 894 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 895 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode 896 897 #undef TARGET_CONDITIONAL_REGISTER_USAGE 898 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage 899 900 #undef TARGET_INIT_PIC_REG 901 #define TARGET_INIT_PIC_REG sparc_init_pic_reg 902 903 #undef TARGET_USE_PSEUDO_PIC_REG 904 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg 905 906 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 907 #undef TARGET_MANGLE_TYPE 908 #define TARGET_MANGLE_TYPE sparc_mangle_type 909 #endif 910 911 #undef TARGET_LRA_P 912 #define TARGET_LRA_P sparc_lra_p 913 914 #undef TARGET_LEGITIMATE_ADDRESS_P 915 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p 916 917 #undef TARGET_LEGITIMATE_CONSTANT_P 918 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p 919 920 #undef TARGET_TRAMPOLINE_INIT 921 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init 922 923 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 924 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p 925 #undef TARGET_PRINT_OPERAND 926 #define TARGET_PRINT_OPERAND sparc_print_operand 927 #undef TARGET_PRINT_OPERAND_ADDRESS 928 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address 929 930 /* The value stored by LDSTUB. */ 931 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 932 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff 933 934 #undef TARGET_CSTORE_MODE 935 #define TARGET_CSTORE_MODE sparc_cstore_mode 936 937 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 938 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv 939 940 #undef TARGET_FIXED_CONDITION_CODE_REGS 941 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs 942 943 #undef TARGET_MIN_ARITHMETIC_PRECISION 944 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision 945 946 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 947 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 948 949 #undef TARGET_HARD_REGNO_NREGS 950 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs 951 #undef TARGET_HARD_REGNO_MODE_OK 952 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok 953 954 #undef TARGET_MODES_TIEABLE_P 955 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p 956 957 #undef TARGET_CAN_CHANGE_MODE_CLASS 958 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class 959 960 #undef TARGET_CONSTANT_ALIGNMENT 961 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment 962 963 #undef TARGET_VECTORIZE_VEC_PERM_CONST 964 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const 965 966 #undef TARGET_CAN_FOLLOW_JUMP 967 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump 968 969 struct gcc_target targetm = TARGET_INITIALIZER; 970 971 /* Return the memory reference contained in X if any, zero otherwise. */ 972 973 static rtx 974 mem_ref (rtx x) 975 { 976 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) 977 x = XEXP (x, 0); 978 979 if (MEM_P (x)) 980 return x; 981 982 return NULL_RTX; 983 } 984 985 /* True if any of INSN's source register(s) is REG. */ 986 987 static bool 988 insn_uses_reg_p (rtx_insn *insn, unsigned int reg) 989 { 990 extract_insn (insn); 991 return ((REG_P (recog_data.operand[1]) 992 && REGNO (recog_data.operand[1]) == reg) 993 || (recog_data.n_operands == 3 994 && REG_P (recog_data.operand[2]) 995 && REGNO (recog_data.operand[2]) == reg)); 996 } 997 998 /* True if INSN is a floating-point division or square-root. */ 999 1000 static bool 1001 div_sqrt_insn_p (rtx_insn *insn) 1002 { 1003 if (GET_CODE (PATTERN (insn)) != SET) 1004 return false; 1005 1006 switch (get_attr_type (insn)) 1007 { 1008 case TYPE_FPDIVS: 1009 case TYPE_FPSQRTS: 1010 case TYPE_FPDIVD: 1011 case TYPE_FPSQRTD: 1012 return true; 1013 default: 1014 return false; 1015 } 1016 } 1017 1018 /* True if INSN is a floating-point instruction. */ 1019 1020 static bool 1021 fpop_insn_p (rtx_insn *insn) 1022 { 1023 if (GET_CODE (PATTERN (insn)) != SET) 1024 return false; 1025 1026 switch (get_attr_type (insn)) 1027 { 1028 case TYPE_FPMOVE: 1029 case TYPE_FPCMOVE: 1030 case TYPE_FP: 1031 case TYPE_FPCMP: 1032 case TYPE_FPMUL: 1033 case TYPE_FPDIVS: 1034 case TYPE_FPSQRTS: 1035 case TYPE_FPDIVD: 1036 case TYPE_FPSQRTD: 1037 return true; 1038 default: 1039 return false; 1040 } 1041 } 1042 1043 /* True if INSN is an atomic instruction. */ 1044 1045 static bool 1046 atomic_insn_for_leon3_p (rtx_insn *insn) 1047 { 1048 switch (INSN_CODE (insn)) 1049 { 1050 case CODE_FOR_swapsi: 1051 case CODE_FOR_ldstub: 1052 case CODE_FOR_atomic_compare_and_swap_leon3_1: 1053 return true; 1054 default: 1055 return false; 1056 } 1057 } 1058 1059 /* We use a machine specific pass to enable workarounds for errata. 1060 1061 We need to have the (essentially) final form of the insn stream in order 1062 to properly detect the various hazards. Therefore, this machine specific 1063 pass runs as late as possible. */ 1064 1065 /* True if INSN is a md pattern or asm statement. */ 1066 #define USEFUL_INSN_P(INSN) \ 1067 (NONDEBUG_INSN_P (INSN) \ 1068 && GET_CODE (PATTERN (INSN)) != USE \ 1069 && GET_CODE (PATTERN (INSN)) != CLOBBER) 1070 1071 static unsigned int 1072 sparc_do_work_around_errata (void) 1073 { 1074 rtx_insn *insn, *next; 1075 1076 /* Force all instructions to be split into their final form. */ 1077 split_all_insns_noflow (); 1078 1079 /* Now look for specific patterns in the insn stream. */ 1080 for (insn = get_insns (); insn; insn = next) 1081 { 1082 bool insert_nop = false; 1083 rtx set; 1084 rtx_insn *jump; 1085 rtx_sequence *seq; 1086 1087 /* Look into the instruction in a delay slot. */ 1088 if (NONJUMP_INSN_P (insn) 1089 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))) 1090 { 1091 jump = seq->insn (0); 1092 insn = seq->insn (1); 1093 } 1094 else if (JUMP_P (insn)) 1095 jump = insn; 1096 else 1097 jump = NULL; 1098 1099 /* Place a NOP at the branch target of an integer branch if it is a 1100 floating-point operation or a floating-point branch. */ 1101 if (sparc_fix_gr712rc 1102 && jump 1103 && jump_to_label_p (jump) 1104 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC) 1105 { 1106 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1107 if (target 1108 && (fpop_insn_p (target) 1109 || (JUMP_P (target) 1110 && get_attr_branch_type (target) == BRANCH_TYPE_FCC))) 1111 emit_insn_before (gen_nop (), target); 1112 } 1113 1114 /* Insert a NOP between load instruction and atomic instruction. Insert 1115 a NOP at branch target if there is a load in delay slot and an atomic 1116 instruction at branch target. */ 1117 if (sparc_fix_ut700 1118 && NONJUMP_INSN_P (insn) 1119 && (set = single_set (insn)) != NULL_RTX 1120 && mem_ref (SET_SRC (set)) 1121 && REG_P (SET_DEST (set))) 1122 { 1123 if (jump && jump_to_label_p (jump)) 1124 { 1125 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1126 if (target && atomic_insn_for_leon3_p (target)) 1127 emit_insn_before (gen_nop (), target); 1128 } 1129 1130 next = next_active_insn (insn); 1131 if (!next) 1132 break; 1133 1134 if (atomic_insn_for_leon3_p (next)) 1135 insert_nop = true; 1136 } 1137 1138 /* Look for a sequence that starts with a fdiv or fsqrt instruction and 1139 ends with another fdiv or fsqrt instruction with no dependencies on 1140 the former, along with an appropriate pattern in between. */ 1141 if (sparc_fix_lost_divsqrt 1142 && NONJUMP_INSN_P (insn) 1143 && div_sqrt_insn_p (insn)) 1144 { 1145 int i; 1146 int fp_found = 0; 1147 rtx_insn *after; 1148 1149 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn))); 1150 1151 next = next_active_insn (insn); 1152 if (!next) 1153 break; 1154 1155 for (after = next, i = 0; i < 4; i++) 1156 { 1157 /* Count floating-point operations. */ 1158 if (i != 3 && fpop_insn_p (after)) 1159 { 1160 /* If the insn uses the destination register of 1161 the div/sqrt, then it cannot be problematic. */ 1162 if (insn_uses_reg_p (after, dest_reg)) 1163 break; 1164 fp_found++; 1165 } 1166 1167 /* Count floating-point loads. */ 1168 if (i != 3 1169 && (set = single_set (after)) != NULL_RTX 1170 && REG_P (SET_DEST (set)) 1171 && REGNO (SET_DEST (set)) > 31) 1172 { 1173 /* If the insn uses the destination register of 1174 the div/sqrt, then it cannot be problematic. */ 1175 if (REGNO (SET_DEST (set)) == dest_reg) 1176 break; 1177 fp_found++; 1178 } 1179 1180 /* Check if this is a problematic sequence. */ 1181 if (i > 1 1182 && fp_found >= 2 1183 && div_sqrt_insn_p (after)) 1184 { 1185 /* If this is the short version of the problematic 1186 sequence we add two NOPs in a row to also prevent 1187 the long version. */ 1188 if (i == 2) 1189 emit_insn_before (gen_nop (), next); 1190 insert_nop = true; 1191 break; 1192 } 1193 1194 /* No need to scan past a second div/sqrt. */ 1195 if (div_sqrt_insn_p (after)) 1196 break; 1197 1198 /* Insert NOP before branch. */ 1199 if (i < 3 1200 && (!NONJUMP_INSN_P (after) 1201 || GET_CODE (PATTERN (after)) == SEQUENCE)) 1202 { 1203 insert_nop = true; 1204 break; 1205 } 1206 1207 after = next_active_insn (after); 1208 if (!after) 1209 break; 1210 } 1211 } 1212 1213 /* Look for either of these two sequences: 1214 1215 Sequence A: 1216 1. store of word size or less (e.g. st / stb / sth / stf) 1217 2. any single instruction that is not a load or store 1218 3. any store instruction (e.g. st / stb / sth / stf / std / stdf) 1219 1220 Sequence B: 1221 1. store of double word size (e.g. std / stdf) 1222 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */ 1223 if (sparc_fix_b2bst 1224 && NONJUMP_INSN_P (insn) 1225 && (set = single_set (insn)) != NULL_RTX 1226 && MEM_P (SET_DEST (set))) 1227 { 1228 /* Sequence B begins with a double-word store. */ 1229 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8; 1230 rtx_insn *after; 1231 int i; 1232 1233 next = next_active_insn (insn); 1234 if (!next) 1235 break; 1236 1237 for (after = next, i = 0; i < 2; i++) 1238 { 1239 /* Skip empty assembly statements. */ 1240 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE) 1241 || (USEFUL_INSN_P (after) 1242 && (asm_noperands (PATTERN (after))>=0) 1243 && !strcmp (decode_asm_operands (PATTERN (after), 1244 NULL, NULL, NULL, 1245 NULL, NULL), ""))) 1246 after = next_active_insn (after); 1247 if (!after) 1248 break; 1249 1250 /* If the insn is a branch, then it cannot be problematic. */ 1251 if (!NONJUMP_INSN_P (after) 1252 || GET_CODE (PATTERN (after)) == SEQUENCE) 1253 break; 1254 1255 /* Sequence B is only two instructions long. */ 1256 if (seq_b) 1257 { 1258 /* Add NOP if followed by a store. */ 1259 if ((set = single_set (after)) != NULL_RTX 1260 && MEM_P (SET_DEST (set))) 1261 insert_nop = true; 1262 1263 /* Otherwise it is ok. */ 1264 break; 1265 } 1266 1267 /* If the second instruction is a load or a store, 1268 then the sequence cannot be problematic. */ 1269 if (i == 0) 1270 { 1271 if ((set = single_set (after)) != NULL_RTX 1272 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) 1273 break; 1274 1275 after = next_active_insn (after); 1276 if (!after) 1277 break; 1278 } 1279 1280 /* Add NOP if third instruction is a store. */ 1281 if (i == 1 1282 && (set = single_set (after)) != NULL_RTX 1283 && MEM_P (SET_DEST (set))) 1284 insert_nop = true; 1285 } 1286 } 1287 1288 /* Look for a single-word load into an odd-numbered FP register. */ 1289 else if (sparc_fix_at697f 1290 && NONJUMP_INSN_P (insn) 1291 && (set = single_set (insn)) != NULL_RTX 1292 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1293 && mem_ref (SET_SRC (set)) 1294 && REG_P (SET_DEST (set)) 1295 && REGNO (SET_DEST (set)) > 31 1296 && REGNO (SET_DEST (set)) % 2 != 0) 1297 { 1298 /* The wrong dependency is on the enclosing double register. */ 1299 const unsigned int x = REGNO (SET_DEST (set)) - 1; 1300 unsigned int src1, src2, dest; 1301 int code; 1302 1303 next = next_active_insn (insn); 1304 if (!next) 1305 break; 1306 /* If the insn is a branch, then it cannot be problematic. */ 1307 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1308 continue; 1309 1310 extract_insn (next); 1311 code = INSN_CODE (next); 1312 1313 switch (code) 1314 { 1315 case CODE_FOR_adddf3: 1316 case CODE_FOR_subdf3: 1317 case CODE_FOR_muldf3: 1318 case CODE_FOR_divdf3: 1319 dest = REGNO (recog_data.operand[0]); 1320 src1 = REGNO (recog_data.operand[1]); 1321 src2 = REGNO (recog_data.operand[2]); 1322 if (src1 != src2) 1323 { 1324 /* Case [1-4]: 1325 ld [address], %fx+1 1326 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ 1327 if ((src1 == x || src2 == x) 1328 && (dest == src1 || dest == src2)) 1329 insert_nop = true; 1330 } 1331 else 1332 { 1333 /* Case 5: 1334 ld [address], %fx+1 1335 FPOPd %fx, %fx, %fx */ 1336 if (src1 == x 1337 && dest == src1 1338 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) 1339 insert_nop = true; 1340 } 1341 break; 1342 1343 case CODE_FOR_sqrtdf2: 1344 dest = REGNO (recog_data.operand[0]); 1345 src1 = REGNO (recog_data.operand[1]); 1346 /* Case 6: 1347 ld [address], %fx+1 1348 fsqrtd %fx, %fx */ 1349 if (src1 == x && dest == src1) 1350 insert_nop = true; 1351 break; 1352 1353 default: 1354 break; 1355 } 1356 } 1357 1358 /* Look for a single-word load into an integer register. */ 1359 else if (sparc_fix_ut699 1360 && NONJUMP_INSN_P (insn) 1361 && (set = single_set (insn)) != NULL_RTX 1362 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 1363 && (mem_ref (SET_SRC (set)) != NULL_RTX 1364 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op) 1365 && REG_P (SET_DEST (set)) 1366 && REGNO (SET_DEST (set)) < 32) 1367 { 1368 /* There is no problem if the second memory access has a data 1369 dependency on the first single-cycle load. */ 1370 rtx x = SET_DEST (set); 1371 1372 next = next_active_insn (insn); 1373 if (!next) 1374 break; 1375 /* If the insn is a branch, then it cannot be problematic. */ 1376 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1377 continue; 1378 1379 /* Look for a second memory access to/from an integer register. */ 1380 if ((set = single_set (next)) != NULL_RTX) 1381 { 1382 rtx src = SET_SRC (set); 1383 rtx dest = SET_DEST (set); 1384 rtx mem; 1385 1386 /* LDD is affected. */ 1387 if ((mem = mem_ref (src)) != NULL_RTX 1388 && REG_P (dest) 1389 && REGNO (dest) < 32 1390 && !reg_mentioned_p (x, XEXP (mem, 0))) 1391 insert_nop = true; 1392 1393 /* STD is *not* affected. */ 1394 else if (MEM_P (dest) 1395 && GET_MODE_SIZE (GET_MODE (dest)) <= 4 1396 && (src == CONST0_RTX (GET_MODE (dest)) 1397 || (REG_P (src) 1398 && REGNO (src) < 32 1399 && REGNO (src) != REGNO (x))) 1400 && !reg_mentioned_p (x, XEXP (dest, 0))) 1401 insert_nop = true; 1402 1403 /* GOT accesses uses LD. */ 1404 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op 1405 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1))) 1406 insert_nop = true; 1407 } 1408 } 1409 1410 /* Look for a single-word load/operation into an FP register. */ 1411 else if (sparc_fix_ut699 1412 && NONJUMP_INSN_P (insn) 1413 && (set = single_set (insn)) != NULL_RTX 1414 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1415 && REG_P (SET_DEST (set)) 1416 && REGNO (SET_DEST (set)) > 31) 1417 { 1418 /* Number of instructions in the problematic window. */ 1419 const int n_insns = 4; 1420 /* The problematic combination is with the sibling FP register. */ 1421 const unsigned int x = REGNO (SET_DEST (set)); 1422 const unsigned int y = x ^ 1; 1423 rtx_insn *after; 1424 int i; 1425 1426 next = next_active_insn (insn); 1427 if (!next) 1428 break; 1429 /* If the insn is a branch, then it cannot be problematic. */ 1430 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1431 continue; 1432 1433 /* Look for a second load/operation into the sibling FP register. */ 1434 if (!((set = single_set (next)) != NULL_RTX 1435 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1436 && REG_P (SET_DEST (set)) 1437 && REGNO (SET_DEST (set)) == y)) 1438 continue; 1439 1440 /* Look for a (possible) store from the FP register in the next N 1441 instructions, but bail out if it is again modified or if there 1442 is a store from the sibling FP register before this store. */ 1443 for (after = next, i = 0; i < n_insns; i++) 1444 { 1445 bool branch_p; 1446 1447 after = next_active_insn (after); 1448 if (!after) 1449 break; 1450 1451 /* This is a branch with an empty delay slot. */ 1452 if (!NONJUMP_INSN_P (after)) 1453 { 1454 if (++i == n_insns) 1455 break; 1456 branch_p = true; 1457 after = NULL; 1458 } 1459 /* This is a branch with a filled delay slot. */ 1460 else if (rtx_sequence *seq = 1461 dyn_cast <rtx_sequence *> (PATTERN (after))) 1462 { 1463 if (++i == n_insns) 1464 break; 1465 branch_p = true; 1466 after = seq->insn (1); 1467 } 1468 /* This is a regular instruction. */ 1469 else 1470 branch_p = false; 1471 1472 if (after && (set = single_set (after)) != NULL_RTX) 1473 { 1474 const rtx src = SET_SRC (set); 1475 const rtx dest = SET_DEST (set); 1476 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); 1477 1478 /* If the FP register is again modified before the store, 1479 then the store isn't affected. */ 1480 if (REG_P (dest) 1481 && (REGNO (dest) == x 1482 || (REGNO (dest) == y && size == 8))) 1483 break; 1484 1485 if (MEM_P (dest) && REG_P (src)) 1486 { 1487 /* If there is a store from the sibling FP register 1488 before the store, then the store is not affected. */ 1489 if (REGNO (src) == y || (REGNO (src) == x && size == 8)) 1490 break; 1491 1492 /* Otherwise, the store is affected. */ 1493 if (REGNO (src) == x && size == 4) 1494 { 1495 insert_nop = true; 1496 break; 1497 } 1498 } 1499 } 1500 1501 /* If we have a branch in the first M instructions, then we 1502 cannot see the (M+2)th instruction so we play safe. */ 1503 if (branch_p && i <= (n_insns - 2)) 1504 { 1505 insert_nop = true; 1506 break; 1507 } 1508 } 1509 } 1510 1511 else 1512 next = NEXT_INSN (insn); 1513 1514 if (insert_nop) 1515 emit_insn_before (gen_nop (), next); 1516 } 1517 1518 return 0; 1519 } 1520 1521 namespace { 1522 1523 const pass_data pass_data_work_around_errata = 1524 { 1525 RTL_PASS, /* type */ 1526 "errata", /* name */ 1527 OPTGROUP_NONE, /* optinfo_flags */ 1528 TV_MACH_DEP, /* tv_id */ 1529 0, /* properties_required */ 1530 0, /* properties_provided */ 1531 0, /* properties_destroyed */ 1532 0, /* todo_flags_start */ 1533 0, /* todo_flags_finish */ 1534 }; 1535 1536 class pass_work_around_errata : public rtl_opt_pass 1537 { 1538 public: 1539 pass_work_around_errata(gcc::context *ctxt) 1540 : rtl_opt_pass(pass_data_work_around_errata, ctxt) 1541 {} 1542 1543 /* opt_pass methods: */ 1544 virtual bool gate (function *) 1545 { 1546 return sparc_fix_at697f 1547 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc 1548 || sparc_fix_b2bst || sparc_fix_lost_divsqrt; 1549 } 1550 1551 virtual unsigned int execute (function *) 1552 { 1553 return sparc_do_work_around_errata (); 1554 } 1555 1556 }; // class pass_work_around_errata 1557 1558 } // anon namespace 1559 1560 rtl_opt_pass * 1561 make_pass_work_around_errata (gcc::context *ctxt) 1562 { 1563 return new pass_work_around_errata (ctxt); 1564 } 1565 1566 /* Helpers for TARGET_DEBUG_OPTIONS. */ 1567 static void 1568 dump_target_flag_bits (const int flags) 1569 { 1570 if (flags & MASK_64BIT) 1571 fprintf (stderr, "64BIT "); 1572 if (flags & MASK_APP_REGS) 1573 fprintf (stderr, "APP_REGS "); 1574 if (flags & MASK_FASTER_STRUCTS) 1575 fprintf (stderr, "FASTER_STRUCTS "); 1576 if (flags & MASK_FLAT) 1577 fprintf (stderr, "FLAT "); 1578 if (flags & MASK_FMAF) 1579 fprintf (stderr, "FMAF "); 1580 if (flags & MASK_FSMULD) 1581 fprintf (stderr, "FSMULD "); 1582 if (flags & MASK_FPU) 1583 fprintf (stderr, "FPU "); 1584 if (flags & MASK_HARD_QUAD) 1585 fprintf (stderr, "HARD_QUAD "); 1586 if (flags & MASK_POPC) 1587 fprintf (stderr, "POPC "); 1588 if (flags & MASK_PTR64) 1589 fprintf (stderr, "PTR64 "); 1590 if (flags & MASK_STACK_BIAS) 1591 fprintf (stderr, "STACK_BIAS "); 1592 if (flags & MASK_UNALIGNED_DOUBLES) 1593 fprintf (stderr, "UNALIGNED_DOUBLES "); 1594 if (flags & MASK_V8PLUS) 1595 fprintf (stderr, "V8PLUS "); 1596 if (flags & MASK_VIS) 1597 fprintf (stderr, "VIS "); 1598 if (flags & MASK_VIS2) 1599 fprintf (stderr, "VIS2 "); 1600 if (flags & MASK_VIS3) 1601 fprintf (stderr, "VIS3 "); 1602 if (flags & MASK_VIS4) 1603 fprintf (stderr, "VIS4 "); 1604 if (flags & MASK_VIS4B) 1605 fprintf (stderr, "VIS4B "); 1606 if (flags & MASK_CBCOND) 1607 fprintf (stderr, "CBCOND "); 1608 if (flags & MASK_DEPRECATED_V8_INSNS) 1609 fprintf (stderr, "DEPRECATED_V8_INSNS "); 1610 if (flags & MASK_SPARCLET) 1611 fprintf (stderr, "SPARCLET "); 1612 if (flags & MASK_SPARCLITE) 1613 fprintf (stderr, "SPARCLITE "); 1614 if (flags & MASK_V8) 1615 fprintf (stderr, "V8 "); 1616 if (flags & MASK_V9) 1617 fprintf (stderr, "V9 "); 1618 } 1619 1620 static void 1621 dump_target_flags (const char *prefix, const int flags) 1622 { 1623 fprintf (stderr, "%s: (%08x) [ ", prefix, flags); 1624 dump_target_flag_bits (flags); 1625 fprintf(stderr, "]\n"); 1626 } 1627 1628 /* Validate and override various options, and do some machine dependent 1629 initialization. */ 1630 1631 static void 1632 sparc_option_override (void) 1633 { 1634 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ 1635 static struct cpu_default { 1636 const int cpu; 1637 const enum sparc_processor_type processor; 1638 } const cpu_default[] = { 1639 /* There must be one entry here for each TARGET_CPU value. */ 1640 { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, 1641 { TARGET_CPU_v8, PROCESSOR_V8 }, 1642 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, 1643 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, 1644 { TARGET_CPU_leon, PROCESSOR_LEON }, 1645 { TARGET_CPU_leon3, PROCESSOR_LEON3 }, 1646 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, 1647 { TARGET_CPU_sparclite, PROCESSOR_F930 }, 1648 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, 1649 { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, 1650 { TARGET_CPU_v9, PROCESSOR_V9 }, 1651 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, 1652 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, 1653 { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, 1654 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, 1655 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, 1656 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, 1657 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 }, 1658 { TARGET_CPU_m8, PROCESSOR_M8 }, 1659 { -1, PROCESSOR_V7 } 1660 }; 1661 const struct cpu_default *def; 1662 /* Table of values for -m{cpu,tune}=. This must match the order of 1663 the enum processor_type in sparc-opts.h. */ 1664 static struct cpu_table { 1665 const char *const name; 1666 const int disable; 1667 const int enable; 1668 } const cpu_table[] = { 1669 { "v7", MASK_ISA, 0 }, 1670 { "cypress", MASK_ISA, 0 }, 1671 { "v8", MASK_ISA, MASK_V8 }, 1672 /* TI TMS390Z55 supersparc */ 1673 { "supersparc", MASK_ISA, MASK_V8 }, 1674 { "hypersparc", MASK_ISA, MASK_V8 }, 1675 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON }, 1676 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 }, 1677 { "leon3v7", MASK_ISA, MASK_LEON3 }, 1678 { "sparclite", MASK_ISA, MASK_SPARCLITE }, 1679 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ 1680 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1681 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ 1682 { "f934", MASK_ISA, MASK_SPARCLITE }, 1683 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1684 { "sparclet", MASK_ISA, MASK_SPARCLET }, 1685 /* TEMIC sparclet */ 1686 { "tsc701", MASK_ISA, MASK_SPARCLET }, 1687 { "v9", MASK_ISA, MASK_V9 }, 1688 /* UltraSPARC I, II, IIi */ 1689 { "ultrasparc", MASK_ISA, 1690 /* Although insns using %y are deprecated, it is a clear win. */ 1691 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1692 /* UltraSPARC III */ 1693 /* ??? Check if %y issue still holds true. */ 1694 { "ultrasparc3", MASK_ISA, 1695 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, 1696 /* UltraSPARC T1 */ 1697 { "niagara", MASK_ISA, 1698 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1699 /* UltraSPARC T2 */ 1700 { "niagara2", MASK_ISA, 1701 MASK_V9|MASK_POPC|MASK_VIS2 }, 1702 /* UltraSPARC T3 */ 1703 { "niagara3", MASK_ISA, 1704 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF }, 1705 /* UltraSPARC T4 */ 1706 { "niagara4", MASK_ISA, 1707 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, 1708 /* UltraSPARC M7 */ 1709 { "niagara7", MASK_ISA, 1710 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }, 1711 /* UltraSPARC M8 */ 1712 { "m8", MASK_ISA, 1713 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC } 1714 }; 1715 const struct cpu_table *cpu; 1716 unsigned int i; 1717 1718 if (sparc_debug_string != NULL) 1719 { 1720 const char *q; 1721 char *p; 1722 1723 p = ASTRDUP (sparc_debug_string); 1724 while ((q = strtok (p, ",")) != NULL) 1725 { 1726 bool invert; 1727 int mask; 1728 1729 p = NULL; 1730 if (*q == '!') 1731 { 1732 invert = true; 1733 q++; 1734 } 1735 else 1736 invert = false; 1737 1738 if (! strcmp (q, "all")) 1739 mask = MASK_DEBUG_ALL; 1740 else if (! strcmp (q, "options")) 1741 mask = MASK_DEBUG_OPTIONS; 1742 else 1743 error ("unknown %<-mdebug-%s%> switch", q); 1744 1745 if (invert) 1746 sparc_debug &= ~mask; 1747 else 1748 sparc_debug |= mask; 1749 } 1750 } 1751 1752 /* Enable the FsMULd instruction by default if not explicitly specified by 1753 the user. It may be later disabled by the CPU (explicitly or not). */ 1754 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD)) 1755 target_flags |= MASK_FSMULD; 1756 1757 if (TARGET_DEBUG_OPTIONS) 1758 { 1759 dump_target_flags("Initial target_flags", target_flags); 1760 dump_target_flags("target_flags_explicit", target_flags_explicit); 1761 } 1762 1763 #ifdef SUBTARGET_OVERRIDE_OPTIONS 1764 SUBTARGET_OVERRIDE_OPTIONS; 1765 #endif 1766 1767 #ifndef SPARC_BI_ARCH 1768 /* Check for unsupported architecture size. */ 1769 if (!TARGET_64BIT != DEFAULT_ARCH32_P) 1770 error ("%s is not supported by this configuration", 1771 DEFAULT_ARCH32_P ? "-m64" : "-m32"); 1772 #endif 1773 1774 /* We force all 64bit archs to use 128 bit long double */ 1775 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128) 1776 { 1777 error ("%<-mlong-double-64%> not allowed with %<-m64%>"); 1778 target_flags |= MASK_LONG_DOUBLE_128; 1779 } 1780 1781 /* Check that -fcall-saved-REG wasn't specified for out registers. */ 1782 for (i = 8; i < 16; i++) 1783 if (!call_used_regs [i]) 1784 { 1785 error ("%<-fcall-saved-REG%> is not supported for out registers"); 1786 call_used_regs [i] = 1; 1787 } 1788 1789 /* Set the default CPU if no -mcpu option was specified. */ 1790 if (!global_options_set.x_sparc_cpu_and_features) 1791 { 1792 for (def = &cpu_default[0]; def->cpu != -1; ++def) 1793 if (def->cpu == TARGET_CPU_DEFAULT) 1794 break; 1795 gcc_assert (def->cpu != -1); 1796 sparc_cpu_and_features = def->processor; 1797 } 1798 1799 /* Set the default CPU if no -mtune option was specified. */ 1800 if (!global_options_set.x_sparc_cpu) 1801 sparc_cpu = sparc_cpu_and_features; 1802 1803 cpu = &cpu_table[(int) sparc_cpu_and_features]; 1804 1805 if (TARGET_DEBUG_OPTIONS) 1806 { 1807 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); 1808 dump_target_flags ("cpu->disable", cpu->disable); 1809 dump_target_flags ("cpu->enable", cpu->enable); 1810 } 1811 1812 target_flags &= ~cpu->disable; 1813 target_flags |= (cpu->enable 1814 #ifndef HAVE_AS_FMAF_HPC_VIS3 1815 & ~(MASK_FMAF | MASK_VIS3) 1816 #endif 1817 #ifndef HAVE_AS_SPARC4 1818 & ~MASK_CBCOND 1819 #endif 1820 #ifndef HAVE_AS_SPARC5_VIS4 1821 & ~(MASK_VIS4 | MASK_SUBXC) 1822 #endif 1823 #ifndef HAVE_AS_SPARC6 1824 & ~(MASK_VIS4B) 1825 #endif 1826 #ifndef HAVE_AS_LEON 1827 & ~(MASK_LEON | MASK_LEON3) 1828 #endif 1829 & ~(target_flags_explicit & MASK_FEATURES) 1830 ); 1831 1832 /* FsMULd is a V8 instruction. */ 1833 if (!TARGET_V8 && !TARGET_V9) 1834 target_flags &= ~MASK_FSMULD; 1835 1836 /* -mvis2 implies -mvis. */ 1837 if (TARGET_VIS2) 1838 target_flags |= MASK_VIS; 1839 1840 /* -mvis3 implies -mvis2 and -mvis. */ 1841 if (TARGET_VIS3) 1842 target_flags |= MASK_VIS2 | MASK_VIS; 1843 1844 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */ 1845 if (TARGET_VIS4) 1846 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1847 1848 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */ 1849 if (TARGET_VIS4B) 1850 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1851 1852 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if 1853 FPU is disabled. */ 1854 if (!TARGET_FPU) 1855 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4 1856 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD); 1857 1858 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions 1859 are available; -m64 also implies v9. */ 1860 if (TARGET_VIS || TARGET_ARCH64) 1861 { 1862 target_flags |= MASK_V9; 1863 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); 1864 } 1865 1866 /* -mvis also implies -mv8plus on 32-bit. */ 1867 if (TARGET_VIS && !TARGET_ARCH64) 1868 target_flags |= MASK_V8PLUS; 1869 1870 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */ 1871 if (TARGET_V9 && TARGET_ARCH32) 1872 target_flags |= MASK_DEPRECATED_V8_INSNS; 1873 1874 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */ 1875 if (!TARGET_V9 || TARGET_ARCH64) 1876 target_flags &= ~MASK_V8PLUS; 1877 1878 /* Don't use stack biasing in 32-bit mode. */ 1879 if (TARGET_ARCH32) 1880 target_flags &= ~MASK_STACK_BIAS; 1881 1882 /* Use LRA instead of reload, unless otherwise instructed. */ 1883 if (!(target_flags_explicit & MASK_LRA)) 1884 target_flags |= MASK_LRA; 1885 1886 /* Enable applicable errata workarounds for LEON3FT. */ 1887 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc) 1888 { 1889 sparc_fix_b2bst = 1; 1890 sparc_fix_lost_divsqrt = 1; 1891 } 1892 1893 /* Disable FsMULd for the UT699 since it doesn't work correctly. */ 1894 if (sparc_fix_ut699) 1895 target_flags &= ~MASK_FSMULD; 1896 1897 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 1898 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 1899 target_flags |= MASK_LONG_DOUBLE_128; 1900 #endif 1901 1902 if (TARGET_DEBUG_OPTIONS) 1903 dump_target_flags ("Final target_flags", target_flags); 1904 1905 /* Set the code model if no -mcmodel option was specified. */ 1906 if (global_options_set.x_sparc_code_model) 1907 { 1908 if (TARGET_ARCH32) 1909 error ("%<-mcmodel=%> is not supported in 32-bit mode"); 1910 } 1911 else 1912 { 1913 if (TARGET_ARCH32) 1914 sparc_code_model = CM_32; 1915 else 1916 sparc_code_model = SPARC_DEFAULT_CMODEL; 1917 } 1918 1919 /* Set the memory model if no -mmemory-model option was specified. */ 1920 if (!global_options_set.x_sparc_memory_model) 1921 { 1922 /* Choose the memory model for the operating system. */ 1923 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; 1924 if (os_default != SMM_DEFAULT) 1925 sparc_memory_model = os_default; 1926 /* Choose the most relaxed model for the processor. */ 1927 else if (TARGET_V9) 1928 sparc_memory_model = SMM_RMO; 1929 else if (TARGET_LEON3) 1930 sparc_memory_model = SMM_TSO; 1931 else if (TARGET_LEON) 1932 sparc_memory_model = SMM_SC; 1933 else if (TARGET_V8) 1934 sparc_memory_model = SMM_PSO; 1935 else 1936 sparc_memory_model = SMM_SC; 1937 } 1938 1939 /* Supply a default value for align_functions. */ 1940 if (flag_align_functions && !str_align_functions) 1941 { 1942 if (sparc_cpu == PROCESSOR_ULTRASPARC 1943 || sparc_cpu == PROCESSOR_ULTRASPARC3 1944 || sparc_cpu == PROCESSOR_NIAGARA 1945 || sparc_cpu == PROCESSOR_NIAGARA2 1946 || sparc_cpu == PROCESSOR_NIAGARA3 1947 || sparc_cpu == PROCESSOR_NIAGARA4) 1948 str_align_functions = "32"; 1949 else if (sparc_cpu == PROCESSOR_NIAGARA7 1950 || sparc_cpu == PROCESSOR_M8) 1951 str_align_functions = "64"; 1952 } 1953 1954 /* Validate PCC_STRUCT_RETURN. */ 1955 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) 1956 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); 1957 1958 /* Only use .uaxword when compiling for a 64-bit target. */ 1959 if (!TARGET_ARCH64) 1960 targetm.asm_out.unaligned_op.di = NULL; 1961 1962 /* Set the processor costs. */ 1963 switch (sparc_cpu) 1964 { 1965 case PROCESSOR_V7: 1966 case PROCESSOR_CYPRESS: 1967 sparc_costs = &cypress_costs; 1968 break; 1969 case PROCESSOR_V8: 1970 case PROCESSOR_SPARCLITE: 1971 case PROCESSOR_SUPERSPARC: 1972 sparc_costs = &supersparc_costs; 1973 break; 1974 case PROCESSOR_F930: 1975 case PROCESSOR_F934: 1976 case PROCESSOR_HYPERSPARC: 1977 case PROCESSOR_SPARCLITE86X: 1978 sparc_costs = &hypersparc_costs; 1979 break; 1980 case PROCESSOR_LEON: 1981 sparc_costs = &leon_costs; 1982 break; 1983 case PROCESSOR_LEON3: 1984 case PROCESSOR_LEON3V7: 1985 sparc_costs = &leon3_costs; 1986 break; 1987 case PROCESSOR_SPARCLET: 1988 case PROCESSOR_TSC701: 1989 sparc_costs = &sparclet_costs; 1990 break; 1991 case PROCESSOR_V9: 1992 case PROCESSOR_ULTRASPARC: 1993 sparc_costs = &ultrasparc_costs; 1994 break; 1995 case PROCESSOR_ULTRASPARC3: 1996 sparc_costs = &ultrasparc3_costs; 1997 break; 1998 case PROCESSOR_NIAGARA: 1999 sparc_costs = &niagara_costs; 2000 break; 2001 case PROCESSOR_NIAGARA2: 2002 sparc_costs = &niagara2_costs; 2003 break; 2004 case PROCESSOR_NIAGARA3: 2005 sparc_costs = &niagara3_costs; 2006 break; 2007 case PROCESSOR_NIAGARA4: 2008 sparc_costs = &niagara4_costs; 2009 break; 2010 case PROCESSOR_NIAGARA7: 2011 sparc_costs = &niagara7_costs; 2012 break; 2013 case PROCESSOR_M8: 2014 sparc_costs = &m8_costs; 2015 break; 2016 case PROCESSOR_NATIVE: 2017 gcc_unreachable (); 2018 }; 2019 2020 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that 2021 can run at the same time. More important, it is the threshold 2022 defining when additional prefetches will be dropped by the 2023 hardware. 2024 2025 The UltraSPARC-III features a documented prefetch queue with a 2026 size of 8. Additional prefetches issued in the cpu are 2027 dropped. 2028 2029 Niagara processors are different. In these processors prefetches 2030 are handled much like regular loads. The L1 miss buffer is 32 2031 entries, but prefetches start getting affected when 30 entries 2032 become occupied. That occupation could be a mix of regular loads 2033 and prefetches though. And that buffer is shared by all threads. 2034 Once the threshold is reached, if the core is running a single 2035 thread the prefetch will retry. If more than one thread is 2036 running, the prefetch will be dropped. 2037 2038 All this makes it very difficult to determine how many 2039 simultaneous prefetches can be issued simultaneously, even in a 2040 single-threaded program. Experimental results show that setting 2041 this parameter to 32 works well when the number of threads is not 2042 high. */ 2043 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2044 ((sparc_cpu == PROCESSOR_ULTRASPARC 2045 || sparc_cpu == PROCESSOR_NIAGARA 2046 || sparc_cpu == PROCESSOR_NIAGARA2 2047 || sparc_cpu == PROCESSOR_NIAGARA3 2048 || sparc_cpu == PROCESSOR_NIAGARA4) 2049 ? 2 2050 : (sparc_cpu == PROCESSOR_ULTRASPARC3 2051 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7 2052 || sparc_cpu == PROCESSOR_M8) 2053 ? 32 : 3))), 2054 global_options.x_param_values, 2055 global_options_set.x_param_values); 2056 2057 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in 2058 bytes. 2059 2060 The Oracle SPARC Architecture (previously the UltraSPARC 2061 Architecture) specification states that when a PREFETCH[A] 2062 instruction is executed an implementation-specific amount of data 2063 is prefetched, and that it is at least 64 bytes long (aligned to 2064 at least 64 bytes). 2065 2066 However, this is not correct. The M7 (and implementations prior 2067 to that) does not guarantee a 64B prefetch into a cache if the 2068 line size is smaller. A single cache line is all that is ever 2069 prefetched. So for the M7, where the L1D$ has 32B lines and the 2070 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the 2071 L2 and L3, but only 32B are brought into the L1D$. (Assuming it 2072 is a read_n prefetch, which is the only type which allocates to 2073 the L1.) */ 2074 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 2075 (sparc_cpu == PROCESSOR_M8 2076 ? 64 : 32), 2077 global_options.x_param_values, 2078 global_options_set.x_param_values); 2079 2080 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use 2081 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and 2082 Niagara processors feature a L1D$ of 16KB. */ 2083 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 2084 ((sparc_cpu == PROCESSOR_ULTRASPARC 2085 || sparc_cpu == PROCESSOR_ULTRASPARC3 2086 || sparc_cpu == PROCESSOR_NIAGARA 2087 || sparc_cpu == PROCESSOR_NIAGARA2 2088 || sparc_cpu == PROCESSOR_NIAGARA3 2089 || sparc_cpu == PROCESSOR_NIAGARA4 2090 || sparc_cpu == PROCESSOR_NIAGARA7 2091 || sparc_cpu == PROCESSOR_M8) 2092 ? 16 : 64), 2093 global_options.x_param_values, 2094 global_options_set.x_param_values); 2095 2096 2097 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note 2098 that 512 is the default in params.def. */ 2099 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 2100 ((sparc_cpu == PROCESSOR_NIAGARA4 2101 || sparc_cpu == PROCESSOR_M8) 2102 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7 2103 ? 256 : 512)), 2104 global_options.x_param_values, 2105 global_options_set.x_param_values); 2106 2107 2108 /* Disable save slot sharing for call-clobbered registers by default. 2109 The IRA sharing algorithm works on single registers only and this 2110 pessimizes for double floating-point registers. */ 2111 if (!global_options_set.x_flag_ira_share_save_slots) 2112 flag_ira_share_save_slots = 0; 2113 2114 /* Only enable REE by default in 64-bit mode where it helps to eliminate 2115 redundant 32-to-64-bit extensions. */ 2116 if (!global_options_set.x_flag_ree && TARGET_ARCH32) 2117 flag_ree = 0; 2118 2119 /* Do various machine dependent initializations. */ 2120 sparc_init_modes (); 2121 2122 /* Set up function hooks. */ 2123 init_machine_status = sparc_init_machine_status; 2124 } 2125 2126 /* Miscellaneous utilities. */ 2127 2128 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move 2129 or branch on register contents instructions. */ 2130 2131 int 2132 v9_regcmp_p (enum rtx_code code) 2133 { 2134 return (code == EQ || code == NE || code == GE || code == LT 2135 || code == LE || code == GT); 2136 } 2137 2138 /* Nonzero if OP is a floating point constant which can 2139 be loaded into an integer register using a single 2140 sethi instruction. */ 2141 2142 int 2143 fp_sethi_p (rtx op) 2144 { 2145 if (GET_CODE (op) == CONST_DOUBLE) 2146 { 2147 long i; 2148 2149 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2150 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); 2151 } 2152 2153 return 0; 2154 } 2155 2156 /* Nonzero if OP is a floating point constant which can 2157 be loaded into an integer register using a single 2158 mov instruction. */ 2159 2160 int 2161 fp_mov_p (rtx op) 2162 { 2163 if (GET_CODE (op) == CONST_DOUBLE) 2164 { 2165 long i; 2166 2167 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2168 return SPARC_SIMM13_P (i); 2169 } 2170 2171 return 0; 2172 } 2173 2174 /* Nonzero if OP is a floating point constant which can 2175 be loaded into an integer register using a high/losum 2176 instruction sequence. */ 2177 2178 int 2179 fp_high_losum_p (rtx op) 2180 { 2181 /* The constraints calling this should only be in 2182 SFmode move insns, so any constant which cannot 2183 be moved using a single insn will do. */ 2184 if (GET_CODE (op) == CONST_DOUBLE) 2185 { 2186 long i; 2187 2188 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2189 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); 2190 } 2191 2192 return 0; 2193 } 2194 2195 /* Return true if the address of LABEL can be loaded by means of the 2196 mov{si,di}_pic_label_ref patterns in PIC mode. */ 2197 2198 static bool 2199 can_use_mov_pic_label_ref (rtx label) 2200 { 2201 /* VxWorks does not impose a fixed gap between segments; the run-time 2202 gap can be different from the object-file gap. We therefore can't 2203 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we 2204 are absolutely sure that X is in the same segment as the GOT. 2205 Unfortunately, the flexibility of linker scripts means that we 2206 can't be sure of that in general, so assume that GOT-relative 2207 accesses are never valid on VxWorks. */ 2208 if (TARGET_VXWORKS_RTP) 2209 return false; 2210 2211 /* Similarly, if the label is non-local, it might end up being placed 2212 in a different section than the current one; now mov_pic_label_ref 2213 requires the label and the code to be in the same section. */ 2214 if (LABEL_REF_NONLOCAL_P (label)) 2215 return false; 2216 2217 /* Finally, if we are reordering basic blocks and partition into hot 2218 and cold sections, this might happen for any label. */ 2219 if (flag_reorder_blocks_and_partition) 2220 return false; 2221 2222 return true; 2223 } 2224 2225 /* Expand a move instruction. Return true if all work is done. */ 2226 2227 bool 2228 sparc_expand_move (machine_mode mode, rtx *operands) 2229 { 2230 /* Handle sets of MEM first. */ 2231 if (GET_CODE (operands[0]) == MEM) 2232 { 2233 /* 0 is a register (or a pair of registers) on SPARC. */ 2234 if (register_or_zero_operand (operands[1], mode)) 2235 return false; 2236 2237 if (!reload_in_progress) 2238 { 2239 operands[0] = validize_mem (operands[0]); 2240 operands[1] = force_reg (mode, operands[1]); 2241 } 2242 } 2243 2244 /* Fix up TLS cases. */ 2245 if (TARGET_HAVE_TLS 2246 && CONSTANT_P (operands[1]) 2247 && sparc_tls_referenced_p (operands [1])) 2248 { 2249 operands[1] = sparc_legitimize_tls_address (operands[1]); 2250 return false; 2251 } 2252 2253 /* Fix up PIC cases. */ 2254 if (flag_pic && CONSTANT_P (operands[1])) 2255 { 2256 if (pic_address_needs_scratch (operands[1])) 2257 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); 2258 2259 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ 2260 if ((GET_CODE (operands[1]) == LABEL_REF 2261 && can_use_mov_pic_label_ref (operands[1])) 2262 || (GET_CODE (operands[1]) == CONST 2263 && GET_CODE (XEXP (operands[1], 0)) == PLUS 2264 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF 2265 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT 2266 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0)))) 2267 { 2268 if (mode == SImode) 2269 { 2270 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); 2271 return true; 2272 } 2273 2274 if (mode == DImode) 2275 { 2276 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); 2277 return true; 2278 } 2279 } 2280 2281 if (symbolic_operand (operands[1], mode)) 2282 { 2283 operands[1] 2284 = sparc_legitimize_pic_address (operands[1], 2285 reload_in_progress 2286 ? operands[0] : NULL_RTX); 2287 return false; 2288 } 2289 } 2290 2291 /* If we are trying to toss an integer constant into FP registers, 2292 or loading a FP or vector constant, force it into memory. */ 2293 if (CONSTANT_P (operands[1]) 2294 && REG_P (operands[0]) 2295 && (SPARC_FP_REG_P (REGNO (operands[0])) 2296 || SCALAR_FLOAT_MODE_P (mode) 2297 || VECTOR_MODE_P (mode))) 2298 { 2299 /* emit_group_store will send such bogosity to us when it is 2300 not storing directly into memory. So fix this up to avoid 2301 crashes in output_constant_pool. */ 2302 if (operands [1] == const0_rtx) 2303 operands[1] = CONST0_RTX (mode); 2304 2305 /* We can clear or set to all-ones FP registers if TARGET_VIS, and 2306 always other regs. */ 2307 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) 2308 && (const_zero_operand (operands[1], mode) 2309 || const_all_ones_operand (operands[1], mode))) 2310 return false; 2311 2312 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG 2313 /* We are able to build any SF constant in integer registers 2314 with at most 2 instructions. */ 2315 && (mode == SFmode 2316 /* And any DF constant in integer registers if needed. */ 2317 || (mode == DFmode && !can_create_pseudo_p ()))) 2318 return false; 2319 2320 operands[1] = force_const_mem (mode, operands[1]); 2321 if (!reload_in_progress) 2322 operands[1] = validize_mem (operands[1]); 2323 return false; 2324 } 2325 2326 /* Accept non-constants and valid constants unmodified. */ 2327 if (!CONSTANT_P (operands[1]) 2328 || GET_CODE (operands[1]) == HIGH 2329 || input_operand (operands[1], mode)) 2330 return false; 2331 2332 switch (mode) 2333 { 2334 case E_QImode: 2335 /* All QImode constants require only one insn, so proceed. */ 2336 break; 2337 2338 case E_HImode: 2339 case E_SImode: 2340 sparc_emit_set_const32 (operands[0], operands[1]); 2341 return true; 2342 2343 case E_DImode: 2344 /* input_operand should have filtered out 32-bit mode. */ 2345 sparc_emit_set_const64 (operands[0], operands[1]); 2346 return true; 2347 2348 case E_TImode: 2349 { 2350 rtx high, low; 2351 /* TImode isn't available in 32-bit mode. */ 2352 split_double (operands[1], &high, &low); 2353 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), 2354 high)); 2355 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), 2356 low)); 2357 } 2358 return true; 2359 2360 default: 2361 gcc_unreachable (); 2362 } 2363 2364 return false; 2365 } 2366 2367 /* Load OP1, a 32-bit constant, into OP0, a register. 2368 We know it can't be done in one insn when we get 2369 here, the move expander guarantees this. */ 2370 2371 static void 2372 sparc_emit_set_const32 (rtx op0, rtx op1) 2373 { 2374 machine_mode mode = GET_MODE (op0); 2375 rtx temp = op0; 2376 2377 if (can_create_pseudo_p ()) 2378 temp = gen_reg_rtx (mode); 2379 2380 if (GET_CODE (op1) == CONST_INT) 2381 { 2382 gcc_assert (!small_int_operand (op1, mode) 2383 && !const_high_operand (op1, mode)); 2384 2385 /* Emit them as real moves instead of a HIGH/LO_SUM, 2386 this way CSE can see everything and reuse intermediate 2387 values if it wants. */ 2388 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1) 2389 & ~(HOST_WIDE_INT) 0x3ff))); 2390 2391 emit_insn (gen_rtx_SET (op0, 2392 gen_rtx_IOR (mode, temp, 2393 GEN_INT (INTVAL (op1) & 0x3ff)))); 2394 } 2395 else 2396 { 2397 /* A symbol, emit in the traditional way. */ 2398 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1))); 2399 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1))); 2400 } 2401 } 2402 2403 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. 2404 If TEMP is nonzero, we are forbidden to use any other scratch 2405 registers. Otherwise, we are allowed to generate them as needed. 2406 2407 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY 2408 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ 2409 2410 void 2411 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) 2412 { 2413 rtx cst, temp1, temp2, temp3, temp4, temp5; 2414 rtx ti_temp = 0; 2415 2416 /* Deal with too large offsets. */ 2417 if (GET_CODE (op1) == CONST 2418 && GET_CODE (XEXP (op1, 0)) == PLUS 2419 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1)) 2420 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst)) 2421 { 2422 gcc_assert (!temp); 2423 temp1 = gen_reg_rtx (DImode); 2424 temp2 = gen_reg_rtx (DImode); 2425 sparc_emit_set_const64 (temp2, cst); 2426 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0), 2427 NULL_RTX); 2428 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2))); 2429 return; 2430 } 2431 2432 if (temp && GET_MODE (temp) == TImode) 2433 { 2434 ti_temp = temp; 2435 temp = gen_rtx_REG (DImode, REGNO (temp)); 2436 } 2437 2438 /* SPARC-V9 code model support. */ 2439 switch (sparc_code_model) 2440 { 2441 case CM_MEDLOW: 2442 /* The range spanned by all instructions in the object is less 2443 than 2^31 bytes (2GB) and the distance from any instruction 2444 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2445 than 2^31 bytes (2GB). 2446 2447 The executable must be in the low 4TB of the virtual address 2448 space. 2449 2450 sethi %hi(symbol), %temp1 2451 or %temp1, %lo(symbol), %reg */ 2452 if (temp) 2453 temp1 = temp; /* op0 is allowed. */ 2454 else 2455 temp1 = gen_reg_rtx (DImode); 2456 2457 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1))); 2458 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1))); 2459 break; 2460 2461 case CM_MEDMID: 2462 /* The range spanned by all instructions in the object is less 2463 than 2^31 bytes (2GB) and the distance from any instruction 2464 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2465 than 2^31 bytes (2GB). 2466 2467 The executable must be in the low 16TB of the virtual address 2468 space. 2469 2470 sethi %h44(symbol), %temp1 2471 or %temp1, %m44(symbol), %temp2 2472 sllx %temp2, 12, %temp3 2473 or %temp3, %l44(symbol), %reg */ 2474 if (temp) 2475 { 2476 temp1 = op0; 2477 temp2 = op0; 2478 temp3 = temp; /* op0 is allowed. */ 2479 } 2480 else 2481 { 2482 temp1 = gen_reg_rtx (DImode); 2483 temp2 = gen_reg_rtx (DImode); 2484 temp3 = gen_reg_rtx (DImode); 2485 } 2486 2487 emit_insn (gen_seth44 (temp1, op1)); 2488 emit_insn (gen_setm44 (temp2, temp1, op1)); 2489 emit_insn (gen_rtx_SET (temp3, 2490 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); 2491 emit_insn (gen_setl44 (op0, temp3, op1)); 2492 break; 2493 2494 case CM_MEDANY: 2495 /* The range spanned by all instructions in the object is less 2496 than 2^31 bytes (2GB) and the distance from any instruction 2497 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2498 than 2^31 bytes (2GB). 2499 2500 The executable can be placed anywhere in the virtual address 2501 space. 2502 2503 sethi %hh(symbol), %temp1 2504 sethi %lm(symbol), %temp2 2505 or %temp1, %hm(symbol), %temp3 2506 sllx %temp3, 32, %temp4 2507 or %temp4, %temp2, %temp5 2508 or %temp5, %lo(symbol), %reg */ 2509 if (temp) 2510 { 2511 /* It is possible that one of the registers we got for operands[2] 2512 might coincide with that of operands[0] (which is why we made 2513 it TImode). Pick the other one to use as our scratch. */ 2514 if (rtx_equal_p (temp, op0)) 2515 { 2516 gcc_assert (ti_temp); 2517 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2518 } 2519 temp1 = op0; 2520 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2521 temp3 = op0; 2522 temp4 = op0; 2523 temp5 = op0; 2524 } 2525 else 2526 { 2527 temp1 = gen_reg_rtx (DImode); 2528 temp2 = gen_reg_rtx (DImode); 2529 temp3 = gen_reg_rtx (DImode); 2530 temp4 = gen_reg_rtx (DImode); 2531 temp5 = gen_reg_rtx (DImode); 2532 } 2533 2534 emit_insn (gen_sethh (temp1, op1)); 2535 emit_insn (gen_setlm (temp2, op1)); 2536 emit_insn (gen_sethm (temp3, temp1, op1)); 2537 emit_insn (gen_rtx_SET (temp4, 2538 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2539 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2540 emit_insn (gen_setlo (op0, temp5, op1)); 2541 break; 2542 2543 case CM_EMBMEDANY: 2544 /* Old old old backwards compatibility kruft here. 2545 Essentially it is MEDLOW with a fixed 64-bit 2546 virtual base added to all data segment addresses. 2547 Text-segment stuff is computed like MEDANY, we can't 2548 reuse the code above because the relocation knobs 2549 look different. 2550 2551 Data segment: sethi %hi(symbol), %temp1 2552 add %temp1, EMBMEDANY_BASE_REG, %temp2 2553 or %temp2, %lo(symbol), %reg */ 2554 if (data_segment_operand (op1, GET_MODE (op1))) 2555 { 2556 if (temp) 2557 { 2558 temp1 = temp; /* op0 is allowed. */ 2559 temp2 = op0; 2560 } 2561 else 2562 { 2563 temp1 = gen_reg_rtx (DImode); 2564 temp2 = gen_reg_rtx (DImode); 2565 } 2566 2567 emit_insn (gen_embmedany_sethi (temp1, op1)); 2568 emit_insn (gen_embmedany_brsum (temp2, temp1)); 2569 emit_insn (gen_embmedany_losum (op0, temp2, op1)); 2570 } 2571 2572 /* Text segment: sethi %uhi(symbol), %temp1 2573 sethi %hi(symbol), %temp2 2574 or %temp1, %ulo(symbol), %temp3 2575 sllx %temp3, 32, %temp4 2576 or %temp4, %temp2, %temp5 2577 or %temp5, %lo(symbol), %reg */ 2578 else 2579 { 2580 if (temp) 2581 { 2582 /* It is possible that one of the registers we got for operands[2] 2583 might coincide with that of operands[0] (which is why we made 2584 it TImode). Pick the other one to use as our scratch. */ 2585 if (rtx_equal_p (temp, op0)) 2586 { 2587 gcc_assert (ti_temp); 2588 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2589 } 2590 temp1 = op0; 2591 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2592 temp3 = op0; 2593 temp4 = op0; 2594 temp5 = op0; 2595 } 2596 else 2597 { 2598 temp1 = gen_reg_rtx (DImode); 2599 temp2 = gen_reg_rtx (DImode); 2600 temp3 = gen_reg_rtx (DImode); 2601 temp4 = gen_reg_rtx (DImode); 2602 temp5 = gen_reg_rtx (DImode); 2603 } 2604 2605 emit_insn (gen_embmedany_textuhi (temp1, op1)); 2606 emit_insn (gen_embmedany_texthi (temp2, op1)); 2607 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); 2608 emit_insn (gen_rtx_SET (temp4, 2609 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2610 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2611 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); 2612 } 2613 break; 2614 2615 default: 2616 gcc_unreachable (); 2617 } 2618 } 2619 2620 /* These avoid problems when cross compiling. If we do not 2621 go through all this hair then the optimizer will see 2622 invalid REG_EQUAL notes or in some cases none at all. */ 2623 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); 2624 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); 2625 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); 2626 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); 2627 2628 /* The optimizer is not to assume anything about exactly 2629 which bits are set for a HIGH, they are unspecified. 2630 Unfortunately this leads to many missed optimizations 2631 during CSE. We mask out the non-HIGH bits, and matches 2632 a plain movdi, to alleviate this problem. */ 2633 static rtx 2634 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) 2635 { 2636 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); 2637 } 2638 2639 static rtx 2640 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) 2641 { 2642 return gen_rtx_SET (dest, GEN_INT (val)); 2643 } 2644 2645 static rtx 2646 gen_safe_OR64 (rtx src, HOST_WIDE_INT val) 2647 { 2648 return gen_rtx_IOR (DImode, src, GEN_INT (val)); 2649 } 2650 2651 static rtx 2652 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) 2653 { 2654 return gen_rtx_XOR (DImode, src, GEN_INT (val)); 2655 } 2656 2657 /* Worker routines for 64-bit constant formation on arch64. 2658 One of the key things to be doing in these emissions is 2659 to create as many temp REGs as possible. This makes it 2660 possible for half-built constants to be used later when 2661 such values are similar to something required later on. 2662 Without doing this, the optimizer cannot see such 2663 opportunities. */ 2664 2665 static void sparc_emit_set_const64_quick1 (rtx, rtx, 2666 unsigned HOST_WIDE_INT, int); 2667 2668 static void 2669 sparc_emit_set_const64_quick1 (rtx op0, rtx temp, 2670 unsigned HOST_WIDE_INT low_bits, int is_neg) 2671 { 2672 unsigned HOST_WIDE_INT high_bits; 2673 2674 if (is_neg) 2675 high_bits = (~low_bits) & 0xffffffff; 2676 else 2677 high_bits = low_bits; 2678 2679 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2680 if (!is_neg) 2681 { 2682 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2683 } 2684 else 2685 { 2686 /* If we are XOR'ing with -1, then we should emit a one's complement 2687 instead. This way the combiner will notice logical operations 2688 such as ANDN later on and substitute. */ 2689 if ((low_bits & 0x3ff) == 0x3ff) 2690 { 2691 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 2692 } 2693 else 2694 { 2695 emit_insn (gen_rtx_SET (op0, 2696 gen_safe_XOR64 (temp, 2697 (-(HOST_WIDE_INT)0x400 2698 | (low_bits & 0x3ff))))); 2699 } 2700 } 2701 } 2702 2703 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, 2704 unsigned HOST_WIDE_INT, int); 2705 2706 static void 2707 sparc_emit_set_const64_quick2 (rtx op0, rtx temp, 2708 unsigned HOST_WIDE_INT high_bits, 2709 unsigned HOST_WIDE_INT low_immediate, 2710 int shift_count) 2711 { 2712 rtx temp2 = op0; 2713 2714 if ((high_bits & 0xfffffc00) != 0) 2715 { 2716 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2717 if ((high_bits & ~0xfffffc00) != 0) 2718 emit_insn (gen_rtx_SET (op0, 2719 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2720 else 2721 temp2 = temp; 2722 } 2723 else 2724 { 2725 emit_insn (gen_safe_SET64 (temp, high_bits)); 2726 temp2 = temp; 2727 } 2728 2729 /* Now shift it up into place. */ 2730 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2, 2731 GEN_INT (shift_count)))); 2732 2733 /* If there is a low immediate part piece, finish up by 2734 putting that in as well. */ 2735 if (low_immediate != 0) 2736 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate))); 2737 } 2738 2739 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, 2740 unsigned HOST_WIDE_INT); 2741 2742 /* Full 64-bit constant decomposition. Even though this is the 2743 'worst' case, we still optimize a few things away. */ 2744 static void 2745 sparc_emit_set_const64_longway (rtx op0, rtx temp, 2746 unsigned HOST_WIDE_INT high_bits, 2747 unsigned HOST_WIDE_INT low_bits) 2748 { 2749 rtx sub_temp = op0; 2750 2751 if (can_create_pseudo_p ()) 2752 sub_temp = gen_reg_rtx (DImode); 2753 2754 if ((high_bits & 0xfffffc00) != 0) 2755 { 2756 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2757 if ((high_bits & ~0xfffffc00) != 0) 2758 emit_insn (gen_rtx_SET (sub_temp, 2759 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2760 else 2761 sub_temp = temp; 2762 } 2763 else 2764 { 2765 emit_insn (gen_safe_SET64 (temp, high_bits)); 2766 sub_temp = temp; 2767 } 2768 2769 if (can_create_pseudo_p ()) 2770 { 2771 rtx temp2 = gen_reg_rtx (DImode); 2772 rtx temp3 = gen_reg_rtx (DImode); 2773 rtx temp4 = gen_reg_rtx (DImode); 2774 2775 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp, 2776 GEN_INT (32)))); 2777 2778 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); 2779 if ((low_bits & ~0xfffffc00) != 0) 2780 { 2781 emit_insn (gen_rtx_SET (temp3, 2782 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); 2783 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3))); 2784 } 2785 else 2786 { 2787 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2))); 2788 } 2789 } 2790 else 2791 { 2792 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); 2793 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); 2794 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); 2795 int to_shift = 12; 2796 2797 /* We are in the middle of reload, so this is really 2798 painful. However we do still make an attempt to 2799 avoid emitting truly stupid code. */ 2800 if (low1 != const0_rtx) 2801 { 2802 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2803 GEN_INT (to_shift)))); 2804 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1))); 2805 sub_temp = op0; 2806 to_shift = 12; 2807 } 2808 else 2809 { 2810 to_shift += 12; 2811 } 2812 if (low2 != const0_rtx) 2813 { 2814 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2815 GEN_INT (to_shift)))); 2816 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2))); 2817 sub_temp = op0; 2818 to_shift = 8; 2819 } 2820 else 2821 { 2822 to_shift += 8; 2823 } 2824 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2825 GEN_INT (to_shift)))); 2826 if (low3 != const0_rtx) 2827 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3))); 2828 /* phew... */ 2829 } 2830 } 2831 2832 /* Analyze a 64-bit constant for certain properties. */ 2833 static void analyze_64bit_constant (unsigned HOST_WIDE_INT, 2834 unsigned HOST_WIDE_INT, 2835 int *, int *, int *); 2836 2837 static void 2838 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, 2839 unsigned HOST_WIDE_INT low_bits, 2840 int *hbsp, int *lbsp, int *abbasp) 2841 { 2842 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 2843 int i; 2844 2845 lowest_bit_set = highest_bit_set = -1; 2846 i = 0; 2847 do 2848 { 2849 if ((lowest_bit_set == -1) 2850 && ((low_bits >> i) & 1)) 2851 lowest_bit_set = i; 2852 if ((highest_bit_set == -1) 2853 && ((high_bits >> (32 - i - 1)) & 1)) 2854 highest_bit_set = (64 - i - 1); 2855 } 2856 while (++i < 32 2857 && ((highest_bit_set == -1) 2858 || (lowest_bit_set == -1))); 2859 if (i == 32) 2860 { 2861 i = 0; 2862 do 2863 { 2864 if ((lowest_bit_set == -1) 2865 && ((high_bits >> i) & 1)) 2866 lowest_bit_set = i + 32; 2867 if ((highest_bit_set == -1) 2868 && ((low_bits >> (32 - i - 1)) & 1)) 2869 highest_bit_set = 32 - i - 1; 2870 } 2871 while (++i < 32 2872 && ((highest_bit_set == -1) 2873 || (lowest_bit_set == -1))); 2874 } 2875 /* If there are no bits set this should have gone out 2876 as one instruction! */ 2877 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); 2878 all_bits_between_are_set = 1; 2879 for (i = lowest_bit_set; i <= highest_bit_set; i++) 2880 { 2881 if (i < 32) 2882 { 2883 if ((low_bits & (1 << i)) != 0) 2884 continue; 2885 } 2886 else 2887 { 2888 if ((high_bits & (1 << (i - 32))) != 0) 2889 continue; 2890 } 2891 all_bits_between_are_set = 0; 2892 break; 2893 } 2894 *hbsp = highest_bit_set; 2895 *lbsp = lowest_bit_set; 2896 *abbasp = all_bits_between_are_set; 2897 } 2898 2899 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); 2900 2901 static int 2902 const64_is_2insns (unsigned HOST_WIDE_INT high_bits, 2903 unsigned HOST_WIDE_INT low_bits) 2904 { 2905 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 2906 2907 if (high_bits == 0 2908 || high_bits == 0xffffffff) 2909 return 1; 2910 2911 analyze_64bit_constant (high_bits, low_bits, 2912 &highest_bit_set, &lowest_bit_set, 2913 &all_bits_between_are_set); 2914 2915 if ((highest_bit_set == 63 2916 || lowest_bit_set == 0) 2917 && all_bits_between_are_set != 0) 2918 return 1; 2919 2920 if ((highest_bit_set - lowest_bit_set) < 21) 2921 return 1; 2922 2923 return 0; 2924 } 2925 2926 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, 2927 unsigned HOST_WIDE_INT, 2928 int, int); 2929 2930 static unsigned HOST_WIDE_INT 2931 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, 2932 unsigned HOST_WIDE_INT low_bits, 2933 int lowest_bit_set, int shift) 2934 { 2935 HOST_WIDE_INT hi, lo; 2936 2937 if (lowest_bit_set < 32) 2938 { 2939 lo = (low_bits >> lowest_bit_set) << shift; 2940 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 2941 } 2942 else 2943 { 2944 lo = 0; 2945 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 2946 } 2947 gcc_assert (! (hi & lo)); 2948 return (hi | lo); 2949 } 2950 2951 /* Here we are sure to be arch64 and this is an integer constant 2952 being loaded into a register. Emit the most efficient 2953 insn sequence possible. Detection of all the 1-insn cases 2954 has been done already. */ 2955 static void 2956 sparc_emit_set_const64 (rtx op0, rtx op1) 2957 { 2958 unsigned HOST_WIDE_INT high_bits, low_bits; 2959 int lowest_bit_set, highest_bit_set; 2960 int all_bits_between_are_set; 2961 rtx temp = 0; 2962 2963 /* Sanity check that we know what we are working with. */ 2964 gcc_assert (TARGET_ARCH64 2965 && (GET_CODE (op0) == SUBREG 2966 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); 2967 2968 if (! can_create_pseudo_p ()) 2969 temp = op0; 2970 2971 if (GET_CODE (op1) != CONST_INT) 2972 { 2973 sparc_emit_set_symbolic_const64 (op0, op1, temp); 2974 return; 2975 } 2976 2977 if (! temp) 2978 temp = gen_reg_rtx (DImode); 2979 2980 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); 2981 low_bits = (INTVAL (op1) & 0xffffffff); 2982 2983 /* low_bits bits 0 --> 31 2984 high_bits bits 32 --> 63 */ 2985 2986 analyze_64bit_constant (high_bits, low_bits, 2987 &highest_bit_set, &lowest_bit_set, 2988 &all_bits_between_are_set); 2989 2990 /* First try for a 2-insn sequence. */ 2991 2992 /* These situations are preferred because the optimizer can 2993 * do more things with them: 2994 * 1) mov -1, %reg 2995 * sllx %reg, shift, %reg 2996 * 2) mov -1, %reg 2997 * srlx %reg, shift, %reg 2998 * 3) mov some_small_const, %reg 2999 * sllx %reg, shift, %reg 3000 */ 3001 if (((highest_bit_set == 63 3002 || lowest_bit_set == 0) 3003 && all_bits_between_are_set != 0) 3004 || ((highest_bit_set - lowest_bit_set) < 12)) 3005 { 3006 HOST_WIDE_INT the_const = -1; 3007 int shift = lowest_bit_set; 3008 3009 if ((highest_bit_set != 63 3010 && lowest_bit_set != 0) 3011 || all_bits_between_are_set == 0) 3012 { 3013 the_const = 3014 create_simple_focus_bits (high_bits, low_bits, 3015 lowest_bit_set, 0); 3016 } 3017 else if (lowest_bit_set == 0) 3018 shift = -(63 - highest_bit_set); 3019 3020 gcc_assert (SPARC_SIMM13_P (the_const)); 3021 gcc_assert (shift != 0); 3022 3023 emit_insn (gen_safe_SET64 (temp, the_const)); 3024 if (shift > 0) 3025 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp, 3026 GEN_INT (shift)))); 3027 else if (shift < 0) 3028 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp, 3029 GEN_INT (-shift)))); 3030 return; 3031 } 3032 3033 /* Now a range of 22 or less bits set somewhere. 3034 * 1) sethi %hi(focus_bits), %reg 3035 * sllx %reg, shift, %reg 3036 * 2) sethi %hi(focus_bits), %reg 3037 * srlx %reg, shift, %reg 3038 */ 3039 if ((highest_bit_set - lowest_bit_set) < 21) 3040 { 3041 unsigned HOST_WIDE_INT focus_bits = 3042 create_simple_focus_bits (high_bits, low_bits, 3043 lowest_bit_set, 10); 3044 3045 gcc_assert (SPARC_SETHI_P (focus_bits)); 3046 gcc_assert (lowest_bit_set != 10); 3047 3048 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); 3049 3050 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ 3051 if (lowest_bit_set < 10) 3052 emit_insn (gen_rtx_SET (op0, 3053 gen_rtx_LSHIFTRT (DImode, temp, 3054 GEN_INT (10 - lowest_bit_set)))); 3055 else if (lowest_bit_set > 10) 3056 emit_insn (gen_rtx_SET (op0, 3057 gen_rtx_ASHIFT (DImode, temp, 3058 GEN_INT (lowest_bit_set - 10)))); 3059 return; 3060 } 3061 3062 /* 1) sethi %hi(low_bits), %reg 3063 * or %reg, %lo(low_bits), %reg 3064 * 2) sethi %hi(~low_bits), %reg 3065 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg 3066 */ 3067 if (high_bits == 0 3068 || high_bits == 0xffffffff) 3069 { 3070 sparc_emit_set_const64_quick1 (op0, temp, low_bits, 3071 (high_bits == 0xffffffff)); 3072 return; 3073 } 3074 3075 /* Now, try 3-insn sequences. */ 3076 3077 /* 1) sethi %hi(high_bits), %reg 3078 * or %reg, %lo(high_bits), %reg 3079 * sllx %reg, 32, %reg 3080 */ 3081 if (low_bits == 0) 3082 { 3083 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); 3084 return; 3085 } 3086 3087 /* We may be able to do something quick 3088 when the constant is negated, so try that. */ 3089 if (const64_is_2insns ((~high_bits) & 0xffffffff, 3090 (~low_bits) & 0xfffffc00)) 3091 { 3092 /* NOTE: The trailing bits get XOR'd so we need the 3093 non-negated bits, not the negated ones. */ 3094 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; 3095 3096 if ((((~high_bits) & 0xffffffff) == 0 3097 && ((~low_bits) & 0x80000000) == 0) 3098 || (((~high_bits) & 0xffffffff) == 0xffffffff 3099 && ((~low_bits) & 0x80000000) != 0)) 3100 { 3101 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); 3102 3103 if ((SPARC_SETHI_P (fast_int) 3104 && (~high_bits & 0xffffffff) == 0) 3105 || SPARC_SIMM13_P (fast_int)) 3106 emit_insn (gen_safe_SET64 (temp, fast_int)); 3107 else 3108 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); 3109 } 3110 else 3111 { 3112 rtx negated_const; 3113 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | 3114 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); 3115 sparc_emit_set_const64 (temp, negated_const); 3116 } 3117 3118 /* If we are XOR'ing with -1, then we should emit a one's complement 3119 instead. This way the combiner will notice logical operations 3120 such as ANDN later on and substitute. */ 3121 if (trailing_bits == 0x3ff) 3122 { 3123 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 3124 } 3125 else 3126 { 3127 emit_insn (gen_rtx_SET (op0, 3128 gen_safe_XOR64 (temp, 3129 (-0x400 | trailing_bits)))); 3130 } 3131 return; 3132 } 3133 3134 /* 1) sethi %hi(xxx), %reg 3135 * or %reg, %lo(xxx), %reg 3136 * sllx %reg, yyy, %reg 3137 * 3138 * ??? This is just a generalized version of the low_bits==0 3139 * thing above, FIXME... 3140 */ 3141 if ((highest_bit_set - lowest_bit_set) < 32) 3142 { 3143 unsigned HOST_WIDE_INT focus_bits = 3144 create_simple_focus_bits (high_bits, low_bits, 3145 lowest_bit_set, 0); 3146 3147 /* We can't get here in this state. */ 3148 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); 3149 3150 /* So what we know is that the set bits straddle the 3151 middle of the 64-bit word. */ 3152 sparc_emit_set_const64_quick2 (op0, temp, 3153 focus_bits, 0, 3154 lowest_bit_set); 3155 return; 3156 } 3157 3158 /* 1) sethi %hi(high_bits), %reg 3159 * or %reg, %lo(high_bits), %reg 3160 * sllx %reg, 32, %reg 3161 * or %reg, low_bits, %reg 3162 */ 3163 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0)) 3164 { 3165 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); 3166 return; 3167 } 3168 3169 /* The easiest way when all else fails, is full decomposition. */ 3170 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); 3171 } 3172 3173 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */ 3174 3175 static bool 3176 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 3177 { 3178 *p1 = SPARC_ICC_REG; 3179 *p2 = SPARC_FCC_REG; 3180 return true; 3181 } 3182 3183 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ 3184 3185 static unsigned int 3186 sparc_min_arithmetic_precision (void) 3187 { 3188 return 32; 3189 } 3190 3191 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 3192 return the mode to be used for the comparison. For floating-point, 3193 CCFP[E]mode is used. CCNZmode should be used when the first operand 3194 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special 3195 processing is needed. */ 3196 3197 machine_mode 3198 select_cc_mode (enum rtx_code op, rtx x, rtx y) 3199 { 3200 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3201 { 3202 switch (op) 3203 { 3204 case EQ: 3205 case NE: 3206 case UNORDERED: 3207 case ORDERED: 3208 case UNLT: 3209 case UNLE: 3210 case UNGT: 3211 case UNGE: 3212 case UNEQ: 3213 case LTGT: 3214 return CCFPmode; 3215 3216 case LT: 3217 case LE: 3218 case GT: 3219 case GE: 3220 return CCFPEmode; 3221 3222 default: 3223 gcc_unreachable (); 3224 } 3225 } 3226 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 3227 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) 3228 && y == const0_rtx) 3229 { 3230 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3231 return CCXNZmode; 3232 else 3233 return CCNZmode; 3234 } 3235 else 3236 { 3237 /* This is for the cmp<mode>_sne pattern. */ 3238 if (GET_CODE (x) == NOT && y == constm1_rtx) 3239 { 3240 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3241 return CCXCmode; 3242 else 3243 return CCCmode; 3244 } 3245 3246 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */ 3247 if (!TARGET_ARCH64 && GET_MODE (x) == DImode) 3248 { 3249 if (GET_CODE (y) == UNSPEC 3250 && (XINT (y, 1) == UNSPEC_ADDV 3251 || XINT (y, 1) == UNSPEC_SUBV 3252 || XINT (y, 1) == UNSPEC_NEGV)) 3253 return CCVmode; 3254 else 3255 return CCCmode; 3256 } 3257 3258 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3259 return CCXmode; 3260 else 3261 return CCmode; 3262 } 3263 } 3264 3265 /* Emit the compare insn and return the CC reg for a CODE comparison 3266 with operands X and Y. */ 3267 3268 static rtx 3269 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) 3270 { 3271 machine_mode mode; 3272 rtx cc_reg; 3273 3274 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) 3275 return x; 3276 3277 mode = SELECT_CC_MODE (code, x, y); 3278 3279 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the 3280 fcc regs (cse can't tell they're really call clobbered regs and will 3281 remove a duplicate comparison even if there is an intervening function 3282 call - it will then try to reload the cc reg via an int reg which is why 3283 we need the movcc patterns). It is possible to provide the movcc 3284 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two 3285 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be 3286 to tell cse that CCFPE mode registers (even pseudos) are call 3287 clobbered. */ 3288 3289 /* ??? This is an experiment. Rather than making changes to cse which may 3290 or may not be easy/clean, we do our own cse. This is possible because 3291 we will generate hard registers. Cse knows they're call clobbered (it 3292 doesn't know the same thing about pseudos). If we guess wrong, no big 3293 deal, but if we win, great! */ 3294 3295 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3296 #if 1 /* experiment */ 3297 { 3298 int reg; 3299 /* We cycle through the registers to ensure they're all exercised. */ 3300 static int next_fcc_reg = 0; 3301 /* Previous x,y for each fcc reg. */ 3302 static rtx prev_args[4][2]; 3303 3304 /* Scan prev_args for x,y. */ 3305 for (reg = 0; reg < 4; reg++) 3306 if (prev_args[reg][0] == x && prev_args[reg][1] == y) 3307 break; 3308 if (reg == 4) 3309 { 3310 reg = next_fcc_reg; 3311 prev_args[reg][0] = x; 3312 prev_args[reg][1] = y; 3313 next_fcc_reg = (next_fcc_reg + 1) & 3; 3314 } 3315 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); 3316 } 3317 #else 3318 cc_reg = gen_reg_rtx (mode); 3319 #endif /* ! experiment */ 3320 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3321 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); 3322 else 3323 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); 3324 3325 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this 3326 will only result in an unrecognizable insn so no point in asserting. */ 3327 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y))); 3328 3329 return cc_reg; 3330 } 3331 3332 3333 /* Emit the compare insn and return the CC reg for the comparison in CMP. */ 3334 3335 rtx 3336 gen_compare_reg (rtx cmp) 3337 { 3338 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); 3339 } 3340 3341 /* This function is used for v9 only. 3342 DEST is the target of the Scc insn. 3343 CODE is the code for an Scc's comparison. 3344 X and Y are the values we compare. 3345 3346 This function is needed to turn 3347 3348 (set (reg:SI 110) 3349 (gt (reg:CCX 100 %icc) 3350 (const_int 0))) 3351 into 3352 (set (reg:SI 110) 3353 (gt:DI (reg:CCX 100 %icc) 3354 (const_int 0))) 3355 3356 IE: The instruction recognizer needs to see the mode of the comparison to 3357 find the right instruction. We could use "gt:DI" right in the 3358 define_expand, but leaving it out allows us to handle DI, SI, etc. */ 3359 3360 static int 3361 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) 3362 { 3363 if (! TARGET_ARCH64 3364 && (GET_MODE (x) == DImode 3365 || GET_MODE (dest) == DImode)) 3366 return 0; 3367 3368 /* Try to use the movrCC insns. */ 3369 if (TARGET_ARCH64 3370 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 3371 && y == const0_rtx 3372 && v9_regcmp_p (compare_code)) 3373 { 3374 rtx op0 = x; 3375 rtx temp; 3376 3377 /* Special case for op0 != 0. This can be done with one instruction if 3378 dest == x. */ 3379 3380 if (compare_code == NE 3381 && GET_MODE (dest) == DImode 3382 && rtx_equal_p (op0, dest)) 3383 { 3384 emit_insn (gen_rtx_SET (dest, 3385 gen_rtx_IF_THEN_ELSE (DImode, 3386 gen_rtx_fmt_ee (compare_code, DImode, 3387 op0, const0_rtx), 3388 const1_rtx, 3389 dest))); 3390 return 1; 3391 } 3392 3393 if (reg_overlap_mentioned_p (dest, op0)) 3394 { 3395 /* Handle the case where dest == x. 3396 We "early clobber" the result. */ 3397 op0 = gen_reg_rtx (GET_MODE (x)); 3398 emit_move_insn (op0, x); 3399 } 3400 3401 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3402 if (GET_MODE (op0) != DImode) 3403 { 3404 temp = gen_reg_rtx (DImode); 3405 convert_move (temp, op0, 0); 3406 } 3407 else 3408 temp = op0; 3409 emit_insn (gen_rtx_SET (dest, 3410 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3411 gen_rtx_fmt_ee (compare_code, DImode, 3412 temp, const0_rtx), 3413 const1_rtx, 3414 dest))); 3415 return 1; 3416 } 3417 else 3418 { 3419 x = gen_compare_reg_1 (compare_code, x, y); 3420 y = const0_rtx; 3421 3422 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3423 emit_insn (gen_rtx_SET (dest, 3424 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3425 gen_rtx_fmt_ee (compare_code, 3426 GET_MODE (x), x, y), 3427 const1_rtx, dest))); 3428 return 1; 3429 } 3430 } 3431 3432 3433 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this 3434 without jumps using the addx/subx instructions. */ 3435 3436 bool 3437 emit_scc_insn (rtx operands[]) 3438 { 3439 rtx tem, x, y; 3440 enum rtx_code code; 3441 machine_mode mode; 3442 3443 /* The quad-word fp compare library routines all return nonzero to indicate 3444 true, which is different from the equivalent libgcc routines, so we must 3445 handle them specially here. */ 3446 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) 3447 { 3448 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], 3449 GET_CODE (operands[1])); 3450 operands[2] = XEXP (operands[1], 0); 3451 operands[3] = XEXP (operands[1], 1); 3452 } 3453 3454 code = GET_CODE (operands[1]); 3455 x = operands[2]; 3456 y = operands[3]; 3457 mode = GET_MODE (x); 3458 3459 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has 3460 more applications). The exception to this is "reg != 0" which can 3461 be done in one instruction on v9 (so we do it). */ 3462 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode)) 3463 { 3464 if (y != const0_rtx) 3465 x = force_reg (mode, gen_rtx_XOR (mode, x, y)); 3466 3467 rtx pat = gen_rtx_SET (operands[0], 3468 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3469 x, const0_rtx)); 3470 3471 /* If we can use addx/subx or addxc, add a clobber for CC. */ 3472 if (mode == SImode || (code == NE && TARGET_VIS3)) 3473 { 3474 rtx clobber 3475 = gen_rtx_CLOBBER (VOIDmode, 3476 gen_rtx_REG (mode == SImode ? CCmode : CCXmode, 3477 SPARC_ICC_REG)); 3478 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber)); 3479 } 3480 3481 emit_insn (pat); 3482 return true; 3483 } 3484 3485 /* We can do LTU in DImode using the addxc instruction with VIS3. */ 3486 if (TARGET_ARCH64 3487 && mode == DImode 3488 && !((code == LTU || code == GTU) && TARGET_VIS3) 3489 && gen_v9_scc (operands[0], code, x, y)) 3490 return true; 3491 3492 /* We can do LTU and GEU using the addx/subx instructions too. And 3493 for GTU/LEU, if both operands are registers swap them and fall 3494 back to the easy case. */ 3495 if (code == GTU || code == LEU) 3496 { 3497 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 3498 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) 3499 { 3500 tem = x; 3501 x = y; 3502 y = tem; 3503 code = swap_condition (code); 3504 } 3505 } 3506 3507 if (code == LTU || code == GEU) 3508 { 3509 emit_insn (gen_rtx_SET (operands[0], 3510 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3511 gen_compare_reg_1 (code, x, y), 3512 const0_rtx))); 3513 return true; 3514 } 3515 3516 /* All the posibilities to use addx/subx based sequences has been 3517 exhausted, try for a 3 instruction sequence using v9 conditional 3518 moves. */ 3519 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y)) 3520 return true; 3521 3522 /* Nope, do branches. */ 3523 return false; 3524 } 3525 3526 /* Emit a conditional jump insn for the v9 architecture using comparison code 3527 CODE and jump target LABEL. 3528 This function exists to take advantage of the v9 brxx insns. */ 3529 3530 static void 3531 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) 3532 { 3533 emit_jump_insn (gen_rtx_SET (pc_rtx, 3534 gen_rtx_IF_THEN_ELSE (VOIDmode, 3535 gen_rtx_fmt_ee (code, GET_MODE (op0), 3536 op0, const0_rtx), 3537 gen_rtx_LABEL_REF (VOIDmode, label), 3538 pc_rtx))); 3539 } 3540 3541 /* Emit a conditional jump insn for the UA2011 architecture using 3542 comparison code CODE and jump target LABEL. This function exists 3543 to take advantage of the UA2011 Compare and Branch insns. */ 3544 3545 static void 3546 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label) 3547 { 3548 rtx if_then_else; 3549 3550 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode, 3551 gen_rtx_fmt_ee(code, GET_MODE(op0), 3552 op0, op1), 3553 gen_rtx_LABEL_REF (VOIDmode, label), 3554 pc_rtx); 3555 3556 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else)); 3557 } 3558 3559 void 3560 emit_conditional_branch_insn (rtx operands[]) 3561 { 3562 /* The quad-word fp compare library routines all return nonzero to indicate 3563 true, which is different from the equivalent libgcc routines, so we must 3564 handle them specially here. */ 3565 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) 3566 { 3567 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], 3568 GET_CODE (operands[0])); 3569 operands[1] = XEXP (operands[0], 0); 3570 operands[2] = XEXP (operands[0], 1); 3571 } 3572 3573 /* If we can tell early on that the comparison is against a constant 3574 that won't fit in the 5-bit signed immediate field of a cbcond, 3575 use one of the other v9 conditional branch sequences. */ 3576 if (TARGET_CBCOND 3577 && GET_CODE (operands[1]) == REG 3578 && (GET_MODE (operands[1]) == SImode 3579 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode)) 3580 && (GET_CODE (operands[2]) != CONST_INT 3581 || SPARC_SIMM5_P (INTVAL (operands[2])))) 3582 { 3583 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); 3584 return; 3585 } 3586 3587 if (TARGET_ARCH64 && operands[2] == const0_rtx 3588 && GET_CODE (operands[1]) == REG 3589 && GET_MODE (operands[1]) == DImode) 3590 { 3591 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); 3592 return; 3593 } 3594 3595 operands[1] = gen_compare_reg (operands[0]); 3596 operands[2] = const0_rtx; 3597 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, 3598 operands[1], operands[2]); 3599 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], 3600 operands[3])); 3601 } 3602 3603 3604 /* Generate a DFmode part of a hard TFmode register. 3605 REG is the TFmode hard register, LOW is 1 for the 3606 low 64bit of the register and 0 otherwise. 3607 */ 3608 rtx 3609 gen_df_reg (rtx reg, int low) 3610 { 3611 int regno = REGNO (reg); 3612 3613 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) 3614 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2; 3615 return gen_rtx_REG (DFmode, regno); 3616 } 3617 3618 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. 3619 Unlike normal calls, TFmode operands are passed by reference. It is 3620 assumed that no more than 3 operands are required. */ 3621 3622 static void 3623 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) 3624 { 3625 rtx ret_slot = NULL, arg[3], func_sym; 3626 int i; 3627 3628 /* We only expect to be called for conversions, unary, and binary ops. */ 3629 gcc_assert (nargs == 2 || nargs == 3); 3630 3631 for (i = 0; i < nargs; ++i) 3632 { 3633 rtx this_arg = operands[i]; 3634 rtx this_slot; 3635 3636 /* TFmode arguments and return values are passed by reference. */ 3637 if (GET_MODE (this_arg) == TFmode) 3638 { 3639 int force_stack_temp; 3640 3641 force_stack_temp = 0; 3642 if (TARGET_BUGGY_QP_LIB && i == 0) 3643 force_stack_temp = 1; 3644 3645 if (GET_CODE (this_arg) == MEM 3646 && ! force_stack_temp) 3647 { 3648 tree expr = MEM_EXPR (this_arg); 3649 if (expr) 3650 mark_addressable (expr); 3651 this_arg = XEXP (this_arg, 0); 3652 } 3653 else if (CONSTANT_P (this_arg) 3654 && ! force_stack_temp) 3655 { 3656 this_slot = force_const_mem (TFmode, this_arg); 3657 this_arg = XEXP (this_slot, 0); 3658 } 3659 else 3660 { 3661 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode)); 3662 3663 /* Operand 0 is the return value. We'll copy it out later. */ 3664 if (i > 0) 3665 emit_move_insn (this_slot, this_arg); 3666 else 3667 ret_slot = this_slot; 3668 3669 this_arg = XEXP (this_slot, 0); 3670 } 3671 } 3672 3673 arg[i] = this_arg; 3674 } 3675 3676 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); 3677 3678 if (GET_MODE (operands[0]) == TFmode) 3679 { 3680 if (nargs == 2) 3681 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3682 arg[0], GET_MODE (arg[0]), 3683 arg[1], GET_MODE (arg[1])); 3684 else 3685 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3686 arg[0], GET_MODE (arg[0]), 3687 arg[1], GET_MODE (arg[1]), 3688 arg[2], GET_MODE (arg[2])); 3689 3690 if (ret_slot) 3691 emit_move_insn (operands[0], ret_slot); 3692 } 3693 else 3694 { 3695 rtx ret; 3696 3697 gcc_assert (nargs == 2); 3698 3699 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, 3700 GET_MODE (operands[0]), 3701 arg[1], GET_MODE (arg[1])); 3702 3703 if (ret != operands[0]) 3704 emit_move_insn (operands[0], ret); 3705 } 3706 } 3707 3708 /* Expand soft-float TFmode calls to sparc abi routines. */ 3709 3710 static void 3711 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) 3712 { 3713 const char *func; 3714 3715 switch (code) 3716 { 3717 case PLUS: 3718 func = "_Qp_add"; 3719 break; 3720 case MINUS: 3721 func = "_Qp_sub"; 3722 break; 3723 case MULT: 3724 func = "_Qp_mul"; 3725 break; 3726 case DIV: 3727 func = "_Qp_div"; 3728 break; 3729 default: 3730 gcc_unreachable (); 3731 } 3732 3733 emit_soft_tfmode_libcall (func, 3, operands); 3734 } 3735 3736 static void 3737 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) 3738 { 3739 const char *func; 3740 3741 gcc_assert (code == SQRT); 3742 func = "_Qp_sqrt"; 3743 3744 emit_soft_tfmode_libcall (func, 2, operands); 3745 } 3746 3747 static void 3748 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) 3749 { 3750 const char *func; 3751 3752 switch (code) 3753 { 3754 case FLOAT_EXTEND: 3755 switch (GET_MODE (operands[1])) 3756 { 3757 case E_SFmode: 3758 func = "_Qp_stoq"; 3759 break; 3760 case E_DFmode: 3761 func = "_Qp_dtoq"; 3762 break; 3763 default: 3764 gcc_unreachable (); 3765 } 3766 break; 3767 3768 case FLOAT_TRUNCATE: 3769 switch (GET_MODE (operands[0])) 3770 { 3771 case E_SFmode: 3772 func = "_Qp_qtos"; 3773 break; 3774 case E_DFmode: 3775 func = "_Qp_qtod"; 3776 break; 3777 default: 3778 gcc_unreachable (); 3779 } 3780 break; 3781 3782 case FLOAT: 3783 switch (GET_MODE (operands[1])) 3784 { 3785 case E_SImode: 3786 func = "_Qp_itoq"; 3787 if (TARGET_ARCH64) 3788 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); 3789 break; 3790 case E_DImode: 3791 func = "_Qp_xtoq"; 3792 break; 3793 default: 3794 gcc_unreachable (); 3795 } 3796 break; 3797 3798 case UNSIGNED_FLOAT: 3799 switch (GET_MODE (operands[1])) 3800 { 3801 case E_SImode: 3802 func = "_Qp_uitoq"; 3803 if (TARGET_ARCH64) 3804 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); 3805 break; 3806 case E_DImode: 3807 func = "_Qp_uxtoq"; 3808 break; 3809 default: 3810 gcc_unreachable (); 3811 } 3812 break; 3813 3814 case FIX: 3815 switch (GET_MODE (operands[0])) 3816 { 3817 case E_SImode: 3818 func = "_Qp_qtoi"; 3819 break; 3820 case E_DImode: 3821 func = "_Qp_qtox"; 3822 break; 3823 default: 3824 gcc_unreachable (); 3825 } 3826 break; 3827 3828 case UNSIGNED_FIX: 3829 switch (GET_MODE (operands[0])) 3830 { 3831 case E_SImode: 3832 func = "_Qp_qtoui"; 3833 break; 3834 case E_DImode: 3835 func = "_Qp_qtoux"; 3836 break; 3837 default: 3838 gcc_unreachable (); 3839 } 3840 break; 3841 3842 default: 3843 gcc_unreachable (); 3844 } 3845 3846 emit_soft_tfmode_libcall (func, 2, operands); 3847 } 3848 3849 /* Expand a hard-float tfmode operation. All arguments must be in 3850 registers. */ 3851 3852 static void 3853 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) 3854 { 3855 rtx op, dest; 3856 3857 if (GET_RTX_CLASS (code) == RTX_UNARY) 3858 { 3859 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3860 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); 3861 } 3862 else 3863 { 3864 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3865 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); 3866 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3867 operands[1], operands[2]); 3868 } 3869 3870 if (register_operand (operands[0], VOIDmode)) 3871 dest = operands[0]; 3872 else 3873 dest = gen_reg_rtx (GET_MODE (operands[0])); 3874 3875 emit_insn (gen_rtx_SET (dest, op)); 3876 3877 if (dest != operands[0]) 3878 emit_move_insn (operands[0], dest); 3879 } 3880 3881 void 3882 emit_tfmode_binop (enum rtx_code code, rtx *operands) 3883 { 3884 if (TARGET_HARD_QUAD) 3885 emit_hard_tfmode_operation (code, operands); 3886 else 3887 emit_soft_tfmode_binop (code, operands); 3888 } 3889 3890 void 3891 emit_tfmode_unop (enum rtx_code code, rtx *operands) 3892 { 3893 if (TARGET_HARD_QUAD) 3894 emit_hard_tfmode_operation (code, operands); 3895 else 3896 emit_soft_tfmode_unop (code, operands); 3897 } 3898 3899 void 3900 emit_tfmode_cvt (enum rtx_code code, rtx *operands) 3901 { 3902 if (TARGET_HARD_QUAD) 3903 emit_hard_tfmode_operation (code, operands); 3904 else 3905 emit_soft_tfmode_cvt (code, operands); 3906 } 3907 3908 /* Return nonzero if a branch/jump/call instruction will be emitting 3909 nop into its delay slot. */ 3910 3911 int 3912 empty_delay_slot (rtx_insn *insn) 3913 { 3914 rtx seq; 3915 3916 /* If no previous instruction (should not happen), return true. */ 3917 if (PREV_INSN (insn) == NULL) 3918 return 1; 3919 3920 seq = NEXT_INSN (PREV_INSN (insn)); 3921 if (GET_CODE (PATTERN (seq)) == SEQUENCE) 3922 return 0; 3923 3924 return 1; 3925 } 3926 3927 /* Return nonzero if we should emit a nop after a cbcond instruction. 3928 The cbcond instruction does not have a delay slot, however there is 3929 a severe performance penalty if a control transfer appears right 3930 after a cbcond. Therefore we emit a nop when we detect this 3931 situation. */ 3932 3933 int 3934 emit_cbcond_nop (rtx_insn *insn) 3935 { 3936 rtx next = next_active_insn (insn); 3937 3938 if (!next) 3939 return 1; 3940 3941 if (NONJUMP_INSN_P (next) 3942 && GET_CODE (PATTERN (next)) == SEQUENCE) 3943 next = XVECEXP (PATTERN (next), 0, 0); 3944 else if (CALL_P (next) 3945 && GET_CODE (PATTERN (next)) == PARALLEL) 3946 { 3947 rtx delay = XVECEXP (PATTERN (next), 0, 1); 3948 3949 if (GET_CODE (delay) == RETURN) 3950 { 3951 /* It's a sibling call. Do not emit the nop if we're going 3952 to emit something other than the jump itself as the first 3953 instruction of the sibcall sequence. */ 3954 if (sparc_leaf_function_p || TARGET_FLAT) 3955 return 0; 3956 } 3957 } 3958 3959 if (NONJUMP_INSN_P (next)) 3960 return 0; 3961 3962 return 1; 3963 } 3964 3965 /* Return nonzero if TRIAL can go into the call delay slot. */ 3966 3967 int 3968 eligible_for_call_delay (rtx_insn *trial) 3969 { 3970 rtx pat; 3971 3972 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3973 return 0; 3974 3975 /* The only problematic cases are TLS sequences with Sun as/ld. */ 3976 if ((TARGET_GNU_TLS && HAVE_GNU_LD) || !TARGET_TLS) 3977 return 1; 3978 3979 pat = PATTERN (trial); 3980 3981 /* We must reject tgd_add{32|64}, i.e. 3982 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) 3983 and tldm_add{32|64}, i.e. 3984 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) 3985 for Sun as/ld. */ 3986 if (GET_CODE (pat) == SET 3987 && GET_CODE (SET_SRC (pat)) == PLUS) 3988 { 3989 rtx unspec = XEXP (SET_SRC (pat), 1); 3990 3991 if (GET_CODE (unspec) == UNSPEC 3992 && (XINT (unspec, 1) == UNSPEC_TLSGD 3993 || XINT (unspec, 1) == UNSPEC_TLSLDM)) 3994 return 0; 3995 } 3996 3997 return 1; 3998 } 3999 4000 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore' 4001 instruction. RETURN_P is true if the v9 variant 'return' is to be 4002 considered in the test too. 4003 4004 TRIAL must be a SET whose destination is a REG appropriate for the 4005 'restore' instruction or, if RETURN_P is true, for the 'return' 4006 instruction. */ 4007 4008 static int 4009 eligible_for_restore_insn (rtx trial, bool return_p) 4010 { 4011 rtx pat = PATTERN (trial); 4012 rtx src = SET_SRC (pat); 4013 bool src_is_freg = false; 4014 rtx src_reg; 4015 4016 /* Since we now can do moves between float and integer registers when 4017 VIS3 is enabled, we have to catch this case. We can allow such 4018 moves when doing a 'return' however. */ 4019 src_reg = src; 4020 if (GET_CODE (src_reg) == SUBREG) 4021 src_reg = SUBREG_REG (src_reg); 4022 if (GET_CODE (src_reg) == REG 4023 && SPARC_FP_REG_P (REGNO (src_reg))) 4024 src_is_freg = true; 4025 4026 /* The 'restore src,%g0,dest' pattern for word mode and below. */ 4027 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4028 && arith_operand (src, GET_MODE (src)) 4029 && ! src_is_freg) 4030 { 4031 if (TARGET_ARCH64) 4032 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4033 else 4034 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); 4035 } 4036 4037 /* The 'restore src,%g0,dest' pattern for double-word mode. */ 4038 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4039 && arith_double_operand (src, GET_MODE (src)) 4040 && ! src_is_freg) 4041 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4042 4043 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ 4044 else if (! TARGET_FPU && register_operand (src, SFmode)) 4045 return 1; 4046 4047 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ 4048 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) 4049 return 1; 4050 4051 /* If we have the 'return' instruction, anything that does not use 4052 local or output registers and can go into a delay slot wins. */ 4053 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1)) 4054 return 1; 4055 4056 /* The 'restore src1,src2,dest' pattern for SImode. */ 4057 else if (GET_CODE (src) == PLUS 4058 && register_operand (XEXP (src, 0), SImode) 4059 && arith_operand (XEXP (src, 1), SImode)) 4060 return 1; 4061 4062 /* The 'restore src1,src2,dest' pattern for DImode. */ 4063 else if (GET_CODE (src) == PLUS 4064 && register_operand (XEXP (src, 0), DImode) 4065 && arith_double_operand (XEXP (src, 1), DImode)) 4066 return 1; 4067 4068 /* The 'restore src1,%lo(src2),dest' pattern. */ 4069 else if (GET_CODE (src) == LO_SUM 4070 && ! TARGET_CM_MEDMID 4071 && ((register_operand (XEXP (src, 0), SImode) 4072 && immediate_operand (XEXP (src, 1), SImode)) 4073 || (TARGET_ARCH64 4074 && register_operand (XEXP (src, 0), DImode) 4075 && immediate_operand (XEXP (src, 1), DImode)))) 4076 return 1; 4077 4078 /* The 'restore src,src,dest' pattern. */ 4079 else if (GET_CODE (src) == ASHIFT 4080 && (register_operand (XEXP (src, 0), SImode) 4081 || register_operand (XEXP (src, 0), DImode)) 4082 && XEXP (src, 1) == const1_rtx) 4083 return 1; 4084 4085 return 0; 4086 } 4087 4088 /* Return nonzero if TRIAL can go into the function return's delay slot. */ 4089 4090 int 4091 eligible_for_return_delay (rtx_insn *trial) 4092 { 4093 int regno; 4094 rtx pat; 4095 4096 /* If the function uses __builtin_eh_return, the eh_return machinery 4097 occupies the delay slot. */ 4098 if (crtl->calls_eh_return) 4099 return 0; 4100 4101 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4102 return 0; 4103 4104 /* In the case of a leaf or flat function, anything can go into the slot. */ 4105 if (sparc_leaf_function_p || TARGET_FLAT) 4106 return 1; 4107 4108 if (!NONJUMP_INSN_P (trial)) 4109 return 0; 4110 4111 pat = PATTERN (trial); 4112 if (GET_CODE (pat) == PARALLEL) 4113 { 4114 int i; 4115 4116 if (! TARGET_V9) 4117 return 0; 4118 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 4119 { 4120 rtx expr = XVECEXP (pat, 0, i); 4121 if (GET_CODE (expr) != SET) 4122 return 0; 4123 if (GET_CODE (SET_DEST (expr)) != REG) 4124 return 0; 4125 regno = REGNO (SET_DEST (expr)); 4126 if (regno >= 8 && regno < 24) 4127 return 0; 4128 } 4129 return !epilogue_renumber (&pat, 1); 4130 } 4131 4132 if (GET_CODE (pat) != SET) 4133 return 0; 4134 4135 if (GET_CODE (SET_DEST (pat)) != REG) 4136 return 0; 4137 4138 regno = REGNO (SET_DEST (pat)); 4139 4140 /* Otherwise, only operations which can be done in tandem with 4141 a `restore' or `return' insn can go into the delay slot. */ 4142 if (regno >= 8 && regno < 24) 4143 return 0; 4144 4145 /* If this instruction sets up floating point register and we have a return 4146 instruction, it can probably go in. But restore will not work 4147 with FP_REGS. */ 4148 if (! SPARC_INT_REG_P (regno)) 4149 return TARGET_V9 && !epilogue_renumber (&pat, 1); 4150 4151 return eligible_for_restore_insn (trial, true); 4152 } 4153 4154 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */ 4155 4156 int 4157 eligible_for_sibcall_delay (rtx_insn *trial) 4158 { 4159 rtx pat; 4160 4161 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4162 return 0; 4163 4164 if (!NONJUMP_INSN_P (trial)) 4165 return 0; 4166 4167 pat = PATTERN (trial); 4168 4169 if (sparc_leaf_function_p || TARGET_FLAT) 4170 { 4171 /* If the tail call is done using the call instruction, 4172 we have to restore %o7 in the delay slot. */ 4173 if (LEAF_SIBCALL_SLOT_RESERVED_P) 4174 return 0; 4175 4176 /* %g1 is used to build the function address */ 4177 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) 4178 return 0; 4179 4180 return 1; 4181 } 4182 4183 if (GET_CODE (pat) != SET) 4184 return 0; 4185 4186 /* Otherwise, only operations which can be done in tandem with 4187 a `restore' insn can go into the delay slot. */ 4188 if (GET_CODE (SET_DEST (pat)) != REG 4189 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) 4190 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat)))) 4191 return 0; 4192 4193 /* If it mentions %o7, it can't go in, because sibcall will clobber it 4194 in most cases. */ 4195 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) 4196 return 0; 4197 4198 return eligible_for_restore_insn (trial, false); 4199 } 4200 4201 /* Determine if it's legal to put X into the constant pool. This 4202 is not possible if X contains the address of a symbol that is 4203 not constant (TLS) or not known at final link time (PIC). */ 4204 4205 static bool 4206 sparc_cannot_force_const_mem (machine_mode mode, rtx x) 4207 { 4208 switch (GET_CODE (x)) 4209 { 4210 case CONST_INT: 4211 case CONST_WIDE_INT: 4212 case CONST_DOUBLE: 4213 case CONST_VECTOR: 4214 /* Accept all non-symbolic constants. */ 4215 return false; 4216 4217 case LABEL_REF: 4218 /* Labels are OK iff we are non-PIC. */ 4219 return flag_pic != 0; 4220 4221 case SYMBOL_REF: 4222 /* 'Naked' TLS symbol references are never OK, 4223 non-TLS symbols are OK iff we are non-PIC. */ 4224 if (SYMBOL_REF_TLS_MODEL (x)) 4225 return true; 4226 else 4227 return flag_pic != 0; 4228 4229 case CONST: 4230 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)); 4231 case PLUS: 4232 case MINUS: 4233 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)) 4234 || sparc_cannot_force_const_mem (mode, XEXP (x, 1)); 4235 case UNSPEC: 4236 return true; 4237 default: 4238 gcc_unreachable (); 4239 } 4240 } 4241 4242 /* Global Offset Table support. */ 4243 static GTY(()) rtx got_symbol_rtx = NULL_RTX; 4244 static GTY(()) rtx got_register_rtx = NULL_RTX; 4245 static GTY(()) rtx got_helper_rtx = NULL_RTX; 4246 4247 static GTY(()) bool got_helper_needed = false; 4248 4249 /* Return the SYMBOL_REF for the Global Offset Table. */ 4250 4251 static rtx 4252 sparc_got (void) 4253 { 4254 if (!got_symbol_rtx) 4255 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 4256 4257 return got_symbol_rtx; 4258 } 4259 4260 /* Wrapper around the load_pcrel_sym{si,di} patterns. */ 4261 4262 static rtx 4263 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2) 4264 { 4265 int orig_flag_pic = flag_pic; 4266 rtx insn; 4267 4268 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ 4269 flag_pic = 0; 4270 if (TARGET_ARCH64) 4271 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0))); 4272 else 4273 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0))); 4274 flag_pic = orig_flag_pic; 4275 4276 return insn; 4277 } 4278 4279 /* Output the load_pcrel_sym{si,di} patterns. */ 4280 4281 const char * 4282 output_load_pcrel_sym (rtx *operands) 4283 { 4284 if (flag_delayed_branch) 4285 { 4286 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands); 4287 output_asm_insn ("call\t%a2", operands); 4288 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands); 4289 } 4290 else 4291 { 4292 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands); 4293 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands); 4294 output_asm_insn ("call\t%a2", operands); 4295 output_asm_insn (" nop", NULL); 4296 } 4297 4298 if (operands[2] == got_helper_rtx) 4299 got_helper_needed = true; 4300 4301 return ""; 4302 } 4303 4304 #ifdef HAVE_GAS_HIDDEN 4305 # define USE_HIDDEN_LINKONCE 1 4306 #else 4307 # define USE_HIDDEN_LINKONCE 0 4308 #endif 4309 4310 /* Emit code to load the GOT register. */ 4311 4312 void 4313 load_got_register (void) 4314 { 4315 rtx insn; 4316 4317 if (TARGET_VXWORKS_RTP) 4318 { 4319 if (!got_register_rtx) 4320 got_register_rtx = pic_offset_table_rtx; 4321 4322 insn = gen_vxworks_load_got (); 4323 } 4324 else 4325 { 4326 if (!got_register_rtx) 4327 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); 4328 4329 /* The GOT symbol is subject to a PC-relative relocation so we need a 4330 helper function to add the PC value and thus get the final value. */ 4331 if (!got_helper_rtx) 4332 { 4333 char name[32]; 4334 4335 /* Skip the leading '%' as that cannot be used in a symbol name. */ 4336 if (USE_HIDDEN_LINKONCE) 4337 sprintf (name, "__sparc_get_pc_thunk.%s", 4338 reg_names[REGNO (got_register_rtx)] + 1); 4339 else 4340 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", 4341 REGNO (got_register_rtx)); 4342 4343 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4344 } 4345 4346 insn 4347 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx); 4348 } 4349 4350 emit_insn (insn); 4351 } 4352 4353 /* Ensure that we are not using patterns that are not OK with PIC. */ 4354 4355 int 4356 check_pic (int i) 4357 { 4358 rtx op; 4359 4360 switch (flag_pic) 4361 { 4362 case 1: 4363 op = recog_data.operand[i]; 4364 gcc_assert (GET_CODE (op) != SYMBOL_REF 4365 && (GET_CODE (op) != CONST 4366 || (GET_CODE (XEXP (op, 0)) == MINUS 4367 && XEXP (XEXP (op, 0), 0) == sparc_got () 4368 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); 4369 /* fallthrough */ 4370 case 2: 4371 default: 4372 return 1; 4373 } 4374 } 4375 4376 /* Return true if X is an address which needs a temporary register when 4377 reloaded while generating PIC code. */ 4378 4379 int 4380 pic_address_needs_scratch (rtx x) 4381 { 4382 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ 4383 if (GET_CODE (x) == CONST 4384 && GET_CODE (XEXP (x, 0)) == PLUS 4385 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 4386 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4387 && !SMALL_INT (XEXP (XEXP (x, 0), 1))) 4388 return 1; 4389 4390 return 0; 4391 } 4392 4393 /* Determine if a given RTX is a valid constant. We already know this 4394 satisfies CONSTANT_P. */ 4395 4396 static bool 4397 sparc_legitimate_constant_p (machine_mode mode, rtx x) 4398 { 4399 switch (GET_CODE (x)) 4400 { 4401 case CONST: 4402 case SYMBOL_REF: 4403 if (sparc_tls_referenced_p (x)) 4404 return false; 4405 break; 4406 4407 case CONST_DOUBLE: 4408 /* Floating point constants are generally not ok. 4409 The only exception is 0.0 and all-ones in VIS. */ 4410 if (TARGET_VIS 4411 && SCALAR_FLOAT_MODE_P (mode) 4412 && (const_zero_operand (x, mode) 4413 || const_all_ones_operand (x, mode))) 4414 return true; 4415 4416 return false; 4417 4418 case CONST_VECTOR: 4419 /* Vector constants are generally not ok. 4420 The only exception is 0 or -1 in VIS. */ 4421 if (TARGET_VIS 4422 && (const_zero_operand (x, mode) 4423 || const_all_ones_operand (x, mode))) 4424 return true; 4425 4426 return false; 4427 4428 default: 4429 break; 4430 } 4431 4432 return true; 4433 } 4434 4435 /* Determine if a given RTX is a valid constant address. */ 4436 4437 bool 4438 constant_address_p (rtx x) 4439 { 4440 switch (GET_CODE (x)) 4441 { 4442 case LABEL_REF: 4443 case CONST_INT: 4444 case HIGH: 4445 return true; 4446 4447 case CONST: 4448 if (flag_pic && pic_address_needs_scratch (x)) 4449 return false; 4450 return sparc_legitimate_constant_p (Pmode, x); 4451 4452 case SYMBOL_REF: 4453 return !flag_pic && sparc_legitimate_constant_p (Pmode, x); 4454 4455 default: 4456 return false; 4457 } 4458 } 4459 4460 /* Nonzero if the constant value X is a legitimate general operand 4461 when generating PIC code. It is given that flag_pic is on and 4462 that X satisfies CONSTANT_P. */ 4463 4464 bool 4465 legitimate_pic_operand_p (rtx x) 4466 { 4467 if (pic_address_needs_scratch (x)) 4468 return false; 4469 if (sparc_tls_referenced_p (x)) 4470 return false; 4471 return true; 4472 } 4473 4474 /* Return true if X is a representation of the PIC register. */ 4475 4476 static bool 4477 sparc_pic_register_p (rtx x) 4478 { 4479 if (!REG_P (x) || !pic_offset_table_rtx) 4480 return false; 4481 4482 if (x == pic_offset_table_rtx) 4483 return true; 4484 4485 if (!HARD_REGISTER_P (pic_offset_table_rtx) 4486 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress) 4487 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) 4488 return true; 4489 4490 return false; 4491 } 4492 4493 #define RTX_OK_FOR_OFFSET_P(X, MODE) \ 4494 (CONST_INT_P (X) \ 4495 && INTVAL (X) >= -0x1000 \ 4496 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE))) 4497 4498 #define RTX_OK_FOR_OLO10_P(X, MODE) \ 4499 (CONST_INT_P (X) \ 4500 && INTVAL (X) >= -0x1000 \ 4501 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE))) 4502 4503 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook. 4504 4505 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT 4506 ordinarily. This changes a bit when generating PIC. */ 4507 4508 static bool 4509 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 4510 { 4511 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; 4512 4513 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 4514 rs1 = addr; 4515 else if (GET_CODE (addr) == PLUS) 4516 { 4517 rs1 = XEXP (addr, 0); 4518 rs2 = XEXP (addr, 1); 4519 4520 /* Canonicalize. REG comes first, if there are no regs, 4521 LO_SUM comes first. */ 4522 if (!REG_P (rs1) 4523 && GET_CODE (rs1) != SUBREG 4524 && (REG_P (rs2) 4525 || GET_CODE (rs2) == SUBREG 4526 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) 4527 { 4528 rs1 = XEXP (addr, 1); 4529 rs2 = XEXP (addr, 0); 4530 } 4531 4532 if ((flag_pic == 1 4533 && sparc_pic_register_p (rs1) 4534 && !REG_P (rs2) 4535 && GET_CODE (rs2) != SUBREG 4536 && GET_CODE (rs2) != LO_SUM 4537 && GET_CODE (rs2) != MEM 4538 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) 4539 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) 4540 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) 4541 || ((REG_P (rs1) 4542 || GET_CODE (rs1) == SUBREG) 4543 && RTX_OK_FOR_OFFSET_P (rs2, mode))) 4544 { 4545 imm1 = rs2; 4546 rs2 = NULL; 4547 } 4548 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) 4549 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) 4550 { 4551 /* We prohibit REG + REG for TFmode when there are no quad move insns 4552 and we consequently need to split. We do this because REG+REG 4553 is not an offsettable address. If we get the situation in reload 4554 where source and destination of a movtf pattern are both MEMs with 4555 REG+REG address, then only one of them gets converted to an 4556 offsettable address. */ 4557 if (mode == TFmode 4558 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) 4559 return 0; 4560 4561 /* Likewise for TImode, but in all cases. */ 4562 if (mode == TImode) 4563 return 0; 4564 4565 /* We prohibit REG + REG on ARCH32 if not optimizing for 4566 DFmode/DImode because then mem_min_alignment is likely to be zero 4567 after reload and the forced split would lack a matching splitter 4568 pattern. */ 4569 if (TARGET_ARCH32 && !optimize 4570 && (mode == DFmode || mode == DImode)) 4571 return 0; 4572 } 4573 else if (USE_AS_OFFSETABLE_LO10 4574 && GET_CODE (rs1) == LO_SUM 4575 && TARGET_ARCH64 4576 && ! TARGET_CM_MEDMID 4577 && RTX_OK_FOR_OLO10_P (rs2, mode)) 4578 { 4579 rs2 = NULL; 4580 imm1 = XEXP (rs1, 1); 4581 rs1 = XEXP (rs1, 0); 4582 if (!CONSTANT_P (imm1) 4583 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4584 return 0; 4585 } 4586 } 4587 else if (GET_CODE (addr) == LO_SUM) 4588 { 4589 rs1 = XEXP (addr, 0); 4590 imm1 = XEXP (addr, 1); 4591 4592 if (!CONSTANT_P (imm1) 4593 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4594 return 0; 4595 4596 /* We can't allow TFmode in 32-bit mode, because an offset greater 4597 than the alignment (8) may cause the LO_SUM to overflow. */ 4598 if (mode == TFmode && TARGET_ARCH32) 4599 return 0; 4600 4601 /* During reload, accept the HIGH+LO_SUM construct generated by 4602 sparc_legitimize_reload_address. */ 4603 if (reload_in_progress 4604 && GET_CODE (rs1) == HIGH 4605 && XEXP (rs1, 0) == imm1) 4606 return 1; 4607 } 4608 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) 4609 return 1; 4610 else 4611 return 0; 4612 4613 if (GET_CODE (rs1) == SUBREG) 4614 rs1 = SUBREG_REG (rs1); 4615 if (!REG_P (rs1)) 4616 return 0; 4617 4618 if (rs2) 4619 { 4620 if (GET_CODE (rs2) == SUBREG) 4621 rs2 = SUBREG_REG (rs2); 4622 if (!REG_P (rs2)) 4623 return 0; 4624 } 4625 4626 if (strict) 4627 { 4628 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) 4629 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) 4630 return 0; 4631 } 4632 else 4633 { 4634 if ((! SPARC_INT_REG_P (REGNO (rs1)) 4635 && REGNO (rs1) != FRAME_POINTER_REGNUM 4636 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) 4637 || (rs2 4638 && (! SPARC_INT_REG_P (REGNO (rs2)) 4639 && REGNO (rs2) != FRAME_POINTER_REGNUM 4640 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) 4641 return 0; 4642 } 4643 return 1; 4644 } 4645 4646 /* Return the SYMBOL_REF for the tls_get_addr function. */ 4647 4648 static GTY(()) rtx sparc_tls_symbol = NULL_RTX; 4649 4650 static rtx 4651 sparc_tls_get_addr (void) 4652 { 4653 if (!sparc_tls_symbol) 4654 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); 4655 4656 return sparc_tls_symbol; 4657 } 4658 4659 /* Return the Global Offset Table to be used in TLS mode. */ 4660 4661 static rtx 4662 sparc_tls_got (void) 4663 { 4664 /* In PIC mode, this is just the PIC offset table. */ 4665 if (flag_pic) 4666 { 4667 crtl->uses_pic_offset_table = 1; 4668 return pic_offset_table_rtx; 4669 } 4670 4671 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for 4672 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ 4673 if (TARGET_SUN_TLS && TARGET_ARCH32) 4674 { 4675 load_got_register (); 4676 return got_register_rtx; 4677 } 4678 4679 /* In all other cases, we load a new pseudo with the GOT symbol. */ 4680 return copy_to_reg (sparc_got ()); 4681 } 4682 4683 /* Return true if X contains a thread-local symbol. */ 4684 4685 static bool 4686 sparc_tls_referenced_p (rtx x) 4687 { 4688 if (!TARGET_HAVE_TLS) 4689 return false; 4690 4691 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 4692 x = XEXP (XEXP (x, 0), 0); 4693 4694 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 4695 return true; 4696 4697 /* That's all we handle in sparc_legitimize_tls_address for now. */ 4698 return false; 4699 } 4700 4701 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4702 this (thread-local) address. */ 4703 4704 static rtx 4705 sparc_legitimize_tls_address (rtx addr) 4706 { 4707 rtx temp1, temp2, temp3, ret, o0, got; 4708 rtx_insn *insn; 4709 4710 gcc_assert (can_create_pseudo_p ()); 4711 4712 if (GET_CODE (addr) == SYMBOL_REF) 4713 /* Although the various sethi/or sequences generate SImode values, many of 4714 them can be transformed by the linker when relaxing and, if relaxing to 4715 local-exec, will become a sethi/xor pair, which is signed and therefore 4716 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these 4717 values be spilled onto the stack in 64-bit mode. */ 4718 switch (SYMBOL_REF_TLS_MODEL (addr)) 4719 { 4720 case TLS_MODEL_GLOBAL_DYNAMIC: 4721 start_sequence (); 4722 temp1 = gen_reg_rtx (Pmode); 4723 temp2 = gen_reg_rtx (Pmode); 4724 ret = gen_reg_rtx (Pmode); 4725 o0 = gen_rtx_REG (Pmode, 8); 4726 got = sparc_tls_got (); 4727 if (TARGET_ARCH32) 4728 { 4729 emit_insn (gen_tgd_hi22si (temp1, addr)); 4730 emit_insn (gen_tgd_lo10si (temp2, temp1, addr)); 4731 emit_insn (gen_tgd_addsi (o0, got, temp2, addr)); 4732 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (), 4733 addr, const1_rtx)); 4734 } 4735 else 4736 { 4737 emit_insn (gen_tgd_hi22di (temp1, addr)); 4738 emit_insn (gen_tgd_lo10di (temp2, temp1, addr)); 4739 emit_insn (gen_tgd_adddi (o0, got, temp2, addr)); 4740 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (), 4741 addr, const1_rtx)); 4742 } 4743 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4744 RTL_CONST_CALL_P (insn) = 1; 4745 insn = get_insns (); 4746 end_sequence (); 4747 emit_libcall_block (insn, ret, o0, addr); 4748 break; 4749 4750 case TLS_MODEL_LOCAL_DYNAMIC: 4751 start_sequence (); 4752 temp1 = gen_reg_rtx (Pmode); 4753 temp2 = gen_reg_rtx (Pmode); 4754 temp3 = gen_reg_rtx (Pmode); 4755 ret = gen_reg_rtx (Pmode); 4756 o0 = gen_rtx_REG (Pmode, 8); 4757 got = sparc_tls_got (); 4758 if (TARGET_ARCH32) 4759 { 4760 emit_insn (gen_tldm_hi22si (temp1)); 4761 emit_insn (gen_tldm_lo10si (temp2, temp1)); 4762 emit_insn (gen_tldm_addsi (o0, got, temp2)); 4763 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (), 4764 const1_rtx)); 4765 } 4766 else 4767 { 4768 emit_insn (gen_tldm_hi22di (temp1)); 4769 emit_insn (gen_tldm_lo10di (temp2, temp1)); 4770 emit_insn (gen_tldm_adddi (o0, got, temp2)); 4771 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (), 4772 const1_rtx)); 4773 } 4774 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4775 RTL_CONST_CALL_P (insn) = 1; 4776 insn = get_insns (); 4777 end_sequence (); 4778 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to 4779 share the LD_BASE result with other LD model accesses. */ 4780 emit_libcall_block (insn, temp3, o0, 4781 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 4782 UNSPEC_TLSLD_BASE)); 4783 temp1 = gen_reg_rtx (Pmode); 4784 temp2 = gen_reg_rtx (Pmode); 4785 if (TARGET_ARCH32) 4786 { 4787 emit_insn (gen_tldo_hix22si (temp1, addr)); 4788 emit_insn (gen_tldo_lox10si (temp2, temp1, addr)); 4789 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr)); 4790 } 4791 else 4792 { 4793 emit_insn (gen_tldo_hix22di (temp1, addr)); 4794 emit_insn (gen_tldo_lox10di (temp2, temp1, addr)); 4795 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr)); 4796 } 4797 break; 4798 4799 case TLS_MODEL_INITIAL_EXEC: 4800 temp1 = gen_reg_rtx (Pmode); 4801 temp2 = gen_reg_rtx (Pmode); 4802 temp3 = gen_reg_rtx (Pmode); 4803 got = sparc_tls_got (); 4804 if (TARGET_ARCH32) 4805 { 4806 emit_insn (gen_tie_hi22si (temp1, addr)); 4807 emit_insn (gen_tie_lo10si (temp2, temp1, addr)); 4808 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); 4809 } 4810 else 4811 { 4812 emit_insn (gen_tie_hi22di (temp1, addr)); 4813 emit_insn (gen_tie_lo10di (temp2, temp1, addr)); 4814 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); 4815 } 4816 if (TARGET_SUN_TLS) 4817 { 4818 ret = gen_reg_rtx (Pmode); 4819 if (TARGET_ARCH32) 4820 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7), 4821 temp3, addr)); 4822 else 4823 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7), 4824 temp3, addr)); 4825 } 4826 else 4827 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); 4828 break; 4829 4830 case TLS_MODEL_LOCAL_EXEC: 4831 temp1 = gen_reg_rtx (Pmode); 4832 temp2 = gen_reg_rtx (Pmode); 4833 if (TARGET_ARCH32) 4834 { 4835 emit_insn (gen_tle_hix22si (temp1, addr)); 4836 emit_insn (gen_tle_lox10si (temp2, temp1, addr)); 4837 } 4838 else 4839 { 4840 emit_insn (gen_tle_hix22di (temp1, addr)); 4841 emit_insn (gen_tle_lox10di (temp2, temp1, addr)); 4842 } 4843 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); 4844 break; 4845 4846 default: 4847 gcc_unreachable (); 4848 } 4849 4850 else if (GET_CODE (addr) == CONST) 4851 { 4852 rtx base, offset; 4853 4854 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); 4855 4856 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); 4857 offset = XEXP (XEXP (addr, 0), 1); 4858 4859 base = force_operand (base, NULL_RTX); 4860 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) 4861 offset = force_reg (Pmode, offset); 4862 ret = gen_rtx_PLUS (Pmode, base, offset); 4863 } 4864 4865 else 4866 gcc_unreachable (); /* for now ... */ 4867 4868 return ret; 4869 } 4870 4871 /* Legitimize PIC addresses. If the address is already position-independent, 4872 we return ORIG. Newly generated position-independent addresses go into a 4873 reg. This is REG if nonzero, otherwise we allocate register(s) as 4874 necessary. */ 4875 4876 static rtx 4877 sparc_legitimize_pic_address (rtx orig, rtx reg) 4878 { 4879 if (GET_CODE (orig) == SYMBOL_REF 4880 /* See the comment in sparc_expand_move. */ 4881 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) 4882 { 4883 bool gotdata_op = false; 4884 rtx pic_ref, address; 4885 rtx_insn *insn; 4886 4887 if (!reg) 4888 { 4889 gcc_assert (can_create_pseudo_p ()); 4890 reg = gen_reg_rtx (Pmode); 4891 } 4892 4893 if (flag_pic == 2) 4894 { 4895 /* If not during reload, allocate another temp reg here for loading 4896 in the address, so that these instructions can be optimized 4897 properly. */ 4898 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg; 4899 4900 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse 4901 won't get confused into thinking that these two instructions 4902 are loading in the true address of the symbol. If in the 4903 future a PIC rtx exists, that should be used instead. */ 4904 if (TARGET_ARCH64) 4905 { 4906 emit_insn (gen_movdi_high_pic (temp_reg, orig)); 4907 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); 4908 } 4909 else 4910 { 4911 emit_insn (gen_movsi_high_pic (temp_reg, orig)); 4912 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); 4913 } 4914 4915 address = temp_reg; 4916 gotdata_op = true; 4917 } 4918 else 4919 address = orig; 4920 4921 crtl->uses_pic_offset_table = 1; 4922 if (gotdata_op) 4923 { 4924 if (TARGET_ARCH64) 4925 insn = emit_insn (gen_movdi_pic_gotdata_op (reg, 4926 pic_offset_table_rtx, 4927 address, orig)); 4928 else 4929 insn = emit_insn (gen_movsi_pic_gotdata_op (reg, 4930 pic_offset_table_rtx, 4931 address, orig)); 4932 } 4933 else 4934 { 4935 pic_ref 4936 = gen_const_mem (Pmode, 4937 gen_rtx_PLUS (Pmode, 4938 pic_offset_table_rtx, address)); 4939 insn = emit_move_insn (reg, pic_ref); 4940 } 4941 4942 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4943 by loop. */ 4944 set_unique_reg_note (insn, REG_EQUAL, orig); 4945 return reg; 4946 } 4947 else if (GET_CODE (orig) == CONST) 4948 { 4949 rtx base, offset; 4950 4951 if (GET_CODE (XEXP (orig, 0)) == PLUS 4952 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0))) 4953 return orig; 4954 4955 if (!reg) 4956 { 4957 gcc_assert (can_create_pseudo_p ()); 4958 reg = gen_reg_rtx (Pmode); 4959 } 4960 4961 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 4962 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); 4963 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 4964 base == reg ? NULL_RTX : reg); 4965 4966 if (GET_CODE (offset) == CONST_INT) 4967 { 4968 if (SMALL_INT (offset)) 4969 return plus_constant (Pmode, base, INTVAL (offset)); 4970 else if (can_create_pseudo_p ()) 4971 offset = force_reg (Pmode, offset); 4972 else 4973 /* If we reach here, then something is seriously wrong. */ 4974 gcc_unreachable (); 4975 } 4976 return gen_rtx_PLUS (Pmode, base, offset); 4977 } 4978 else if (GET_CODE (orig) == LABEL_REF) 4979 /* ??? We ought to be checking that the register is live instead, in case 4980 it is eliminated. */ 4981 crtl->uses_pic_offset_table = 1; 4982 4983 return orig; 4984 } 4985 4986 /* Try machine-dependent ways of modifying an illegitimate address X 4987 to be legitimate. If we find one, return the new, valid address. 4988 4989 OLDX is the address as it was before break_out_memory_refs was called. 4990 In some cases it is useful to look at this to decide what needs to be done. 4991 4992 MODE is the mode of the operand pointed to by X. 4993 4994 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ 4995 4996 static rtx 4997 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 4998 machine_mode mode) 4999 { 5000 rtx orig_x = x; 5001 5002 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) 5003 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 5004 force_operand (XEXP (x, 0), NULL_RTX)); 5005 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) 5006 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5007 force_operand (XEXP (x, 1), NULL_RTX)); 5008 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) 5009 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), 5010 XEXP (x, 1)); 5011 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) 5012 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5013 force_operand (XEXP (x, 1), NULL_RTX)); 5014 5015 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) 5016 return x; 5017 5018 if (sparc_tls_referenced_p (x)) 5019 x = sparc_legitimize_tls_address (x); 5020 else if (flag_pic) 5021 x = sparc_legitimize_pic_address (x, NULL_RTX); 5022 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) 5023 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5024 copy_to_mode_reg (Pmode, XEXP (x, 1))); 5025 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) 5026 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 5027 copy_to_mode_reg (Pmode, XEXP (x, 0))); 5028 else if (GET_CODE (x) == SYMBOL_REF 5029 || GET_CODE (x) == CONST 5030 || GET_CODE (x) == LABEL_REF) 5031 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); 5032 5033 return x; 5034 } 5035 5036 /* Delegitimize an address that was legitimized by the above function. */ 5037 5038 static rtx 5039 sparc_delegitimize_address (rtx x) 5040 { 5041 x = delegitimize_mem_from_attrs (x); 5042 5043 if (GET_CODE (x) == LO_SUM) 5044 x = XEXP (x, 1); 5045 5046 if (GET_CODE (x) == UNSPEC) 5047 switch (XINT (x, 1)) 5048 { 5049 case UNSPEC_MOVE_PIC: 5050 case UNSPEC_TLSLE: 5051 x = XVECEXP (x, 0, 0); 5052 gcc_assert (GET_CODE (x) == SYMBOL_REF); 5053 break; 5054 case UNSPEC_MOVE_GOTDATA: 5055 x = XVECEXP (x, 0, 2); 5056 gcc_assert (GET_CODE (x) == SYMBOL_REF); 5057 break; 5058 default: 5059 break; 5060 } 5061 5062 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */ 5063 if (GET_CODE (x) == MINUS 5064 && (XEXP (x, 0) == got_register_rtx 5065 || sparc_pic_register_p (XEXP (x, 0)))) 5066 { 5067 rtx y = XEXP (x, 1); 5068 5069 if (GET_CODE (y) == LO_SUM) 5070 y = XEXP (y, 1); 5071 5072 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL) 5073 { 5074 x = XVECEXP (y, 0, 0); 5075 gcc_assert (GET_CODE (x) == LABEL_REF 5076 || (GET_CODE (x) == CONST 5077 && GET_CODE (XEXP (x, 0)) == PLUS 5078 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5079 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)); 5080 } 5081 } 5082 5083 return x; 5084 } 5085 5086 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to 5087 replace the input X, or the original X if no replacement is called for. 5088 The output parameter *WIN is 1 if the calling macro should goto WIN, 5089 0 if it should not. 5090 5091 For SPARC, we wish to handle addresses by splitting them into 5092 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 5093 This cuts the number of extra insns by one. 5094 5095 Do nothing when generating PIC code and the address is a symbolic 5096 operand or requires a scratch register. */ 5097 5098 rtx 5099 sparc_legitimize_reload_address (rtx x, machine_mode mode, 5100 int opnum, int type, 5101 int ind_levels ATTRIBUTE_UNUSED, int *win) 5102 { 5103 /* Decompose SImode constants into HIGH+LO_SUM. */ 5104 if (CONSTANT_P (x) 5105 && (mode != TFmode || TARGET_ARCH64) 5106 && GET_MODE (x) == SImode 5107 && GET_CODE (x) != LO_SUM 5108 && GET_CODE (x) != HIGH 5109 && sparc_code_model <= CM_MEDLOW 5110 && !(flag_pic 5111 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) 5112 { 5113 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); 5114 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5115 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5116 opnum, (enum reload_type)type); 5117 *win = 1; 5118 return x; 5119 } 5120 5121 /* We have to recognize what we have already generated above. */ 5122 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) 5123 { 5124 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5125 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5126 opnum, (enum reload_type)type); 5127 *win = 1; 5128 return x; 5129 } 5130 5131 *win = 0; 5132 return x; 5133 } 5134 5135 /* Return true if ADDR (a legitimate address expression) 5136 has an effect that depends on the machine mode it is used for. 5137 5138 In PIC mode, 5139 5140 (mem:HI [%l7+a]) 5141 5142 is not equivalent to 5143 5144 (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) 5145 5146 because [%l7+a+1] is interpreted as the address of (a+1). */ 5147 5148 5149 static bool 5150 sparc_mode_dependent_address_p (const_rtx addr, 5151 addr_space_t as ATTRIBUTE_UNUSED) 5152 { 5153 if (GET_CODE (addr) == PLUS 5154 && sparc_pic_register_p (XEXP (addr, 0)) 5155 && symbolic_operand (XEXP (addr, 1), VOIDmode)) 5156 return true; 5157 5158 return false; 5159 } 5160 5161 /* Emit a call instruction with the pattern given by PAT. ADDR is the 5162 address of the call target. */ 5163 5164 void 5165 sparc_emit_call_insn (rtx pat, rtx addr) 5166 { 5167 rtx_insn *insn; 5168 5169 insn = emit_call_insn (pat); 5170 5171 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ 5172 if (TARGET_VXWORKS_RTP 5173 && flag_pic 5174 && GET_CODE (addr) == SYMBOL_REF 5175 && (SYMBOL_REF_DECL (addr) 5176 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 5177 : !SYMBOL_REF_LOCAL_P (addr))) 5178 { 5179 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 5180 crtl->uses_pic_offset_table = 1; 5181 } 5182 } 5183 5184 /* Return 1 if RTX is a MEM which is known to be aligned to at 5185 least a DESIRED byte boundary. */ 5186 5187 int 5188 mem_min_alignment (rtx mem, int desired) 5189 { 5190 rtx addr, base, offset; 5191 5192 /* If it's not a MEM we can't accept it. */ 5193 if (GET_CODE (mem) != MEM) 5194 return 0; 5195 5196 /* Obviously... */ 5197 if (!TARGET_UNALIGNED_DOUBLES 5198 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) 5199 return 1; 5200 5201 /* ??? The rest of the function predates MEM_ALIGN so 5202 there is probably a bit of redundancy. */ 5203 addr = XEXP (mem, 0); 5204 base = offset = NULL_RTX; 5205 if (GET_CODE (addr) == PLUS) 5206 { 5207 if (GET_CODE (XEXP (addr, 0)) == REG) 5208 { 5209 base = XEXP (addr, 0); 5210 5211 /* What we are saying here is that if the base 5212 REG is aligned properly, the compiler will make 5213 sure any REG based index upon it will be so 5214 as well. */ 5215 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) 5216 offset = XEXP (addr, 1); 5217 else 5218 offset = const0_rtx; 5219 } 5220 } 5221 else if (GET_CODE (addr) == REG) 5222 { 5223 base = addr; 5224 offset = const0_rtx; 5225 } 5226 5227 if (base != NULL_RTX) 5228 { 5229 int regno = REGNO (base); 5230 5231 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) 5232 { 5233 /* Check if the compiler has recorded some information 5234 about the alignment of the base REG. If reload has 5235 completed, we already matched with proper alignments. 5236 If not running global_alloc, reload might give us 5237 unaligned pointer to local stack though. */ 5238 if (((cfun != 0 5239 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) 5240 || (optimize && reload_completed)) 5241 && (INTVAL (offset) & (desired - 1)) == 0) 5242 return 1; 5243 } 5244 else 5245 { 5246 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) 5247 return 1; 5248 } 5249 } 5250 else if (! TARGET_UNALIGNED_DOUBLES 5251 || CONSTANT_P (addr) 5252 || GET_CODE (addr) == LO_SUM) 5253 { 5254 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES 5255 is true, in which case we can only assume that an access is aligned if 5256 it is to a constant address, or the address involves a LO_SUM. */ 5257 return 1; 5258 } 5259 5260 /* An obviously unaligned address. */ 5261 return 0; 5262 } 5263 5264 5265 /* Vectors to keep interesting information about registers where it can easily 5266 be got. We used to use the actual mode value as the bit number, but there 5267 are more than 32 modes now. Instead we use two tables: one indexed by 5268 hard register number, and one indexed by mode. */ 5269 5270 /* The purpose of sparc_mode_class is to shrink the range of modes so that 5271 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 5272 mapped into one sparc_mode_class mode. */ 5273 5274 enum sparc_mode_class { 5275 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, 5276 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 5277 CC_MODE, CCFP_MODE 5278 }; 5279 5280 /* Modes for single-word and smaller quantities. */ 5281 #define S_MODES \ 5282 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5283 5284 /* Modes for double-word and smaller quantities. */ 5285 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5286 5287 /* Modes for quad-word and smaller quantities. */ 5288 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 5289 5290 /* Modes for 8-word and smaller quantities. */ 5291 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) 5292 5293 /* Modes for single-float quantities. */ 5294 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5295 5296 /* Modes for double-float and smaller quantities. */ 5297 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5298 5299 /* Modes for quad-float and smaller quantities. */ 5300 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) 5301 5302 /* Modes for quad-float pairs and smaller quantities. */ 5303 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) 5304 5305 /* Modes for double-float only quantities. */ 5306 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5307 5308 /* Modes for quad-float and double-float only quantities. */ 5309 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) 5310 5311 /* Modes for quad-float pairs and double-float only quantities. */ 5312 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) 5313 5314 /* Modes for condition codes. */ 5315 #define CC_MODES (1 << (int) CC_MODE) 5316 #define CCFP_MODES (1 << (int) CCFP_MODE) 5317 5318 /* Value is 1 if register/mode pair is acceptable on sparc. 5319 5320 The funny mixture of D and T modes is because integer operations 5321 do not specially operate on tetra quantities, so non-quad-aligned 5322 registers can hold quadword quantities (except %o4 and %i4 because 5323 they cross fixed registers). 5324 5325 ??? Note that, despite the settings, non-double-aligned parameter 5326 registers can hold double-word quantities in 32-bit mode. */ 5327 5328 /* This points to either the 32-bit or the 64-bit version. */ 5329 static const int *hard_regno_mode_classes; 5330 5331 static const int hard_32bit_mode_classes[] = { 5332 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5333 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5334 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5335 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5336 5337 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5338 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5339 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5340 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5341 5342 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5343 and none can hold SFmode/SImode values. */ 5344 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5345 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5346 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5347 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5348 5349 /* %fcc[0123] */ 5350 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5351 5352 /* %icc, %sfp, %gsr */ 5353 CC_MODES, 0, D_MODES 5354 }; 5355 5356 static const int hard_64bit_mode_classes[] = { 5357 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5358 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5359 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5360 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5361 5362 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5363 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5364 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5365 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5366 5367 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5368 and none can hold SFmode/SImode values. */ 5369 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5370 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5371 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5372 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5373 5374 /* %fcc[0123] */ 5375 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5376 5377 /* %icc, %sfp, %gsr */ 5378 CC_MODES, 0, D_MODES 5379 }; 5380 5381 static int sparc_mode_class [NUM_MACHINE_MODES]; 5382 5383 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 5384 5385 static void 5386 sparc_init_modes (void) 5387 { 5388 int i; 5389 5390 for (i = 0; i < NUM_MACHINE_MODES; i++) 5391 { 5392 machine_mode m = (machine_mode) i; 5393 unsigned int size = GET_MODE_SIZE (m); 5394 5395 switch (GET_MODE_CLASS (m)) 5396 { 5397 case MODE_INT: 5398 case MODE_PARTIAL_INT: 5399 case MODE_COMPLEX_INT: 5400 if (size < 4) 5401 sparc_mode_class[i] = 1 << (int) H_MODE; 5402 else if (size == 4) 5403 sparc_mode_class[i] = 1 << (int) S_MODE; 5404 else if (size == 8) 5405 sparc_mode_class[i] = 1 << (int) D_MODE; 5406 else if (size == 16) 5407 sparc_mode_class[i] = 1 << (int) T_MODE; 5408 else if (size == 32) 5409 sparc_mode_class[i] = 1 << (int) O_MODE; 5410 else 5411 sparc_mode_class[i] = 0; 5412 break; 5413 case MODE_VECTOR_INT: 5414 if (size == 4) 5415 sparc_mode_class[i] = 1 << (int) SF_MODE; 5416 else if (size == 8) 5417 sparc_mode_class[i] = 1 << (int) DF_MODE; 5418 else 5419 sparc_mode_class[i] = 0; 5420 break; 5421 case MODE_FLOAT: 5422 case MODE_COMPLEX_FLOAT: 5423 if (size == 4) 5424 sparc_mode_class[i] = 1 << (int) SF_MODE; 5425 else if (size == 8) 5426 sparc_mode_class[i] = 1 << (int) DF_MODE; 5427 else if (size == 16) 5428 sparc_mode_class[i] = 1 << (int) TF_MODE; 5429 else if (size == 32) 5430 sparc_mode_class[i] = 1 << (int) OF_MODE; 5431 else 5432 sparc_mode_class[i] = 0; 5433 break; 5434 case MODE_CC: 5435 if (m == CCFPmode || m == CCFPEmode) 5436 sparc_mode_class[i] = 1 << (int) CCFP_MODE; 5437 else 5438 sparc_mode_class[i] = 1 << (int) CC_MODE; 5439 break; 5440 default: 5441 sparc_mode_class[i] = 0; 5442 break; 5443 } 5444 } 5445 5446 if (TARGET_ARCH64) 5447 hard_regno_mode_classes = hard_64bit_mode_classes; 5448 else 5449 hard_regno_mode_classes = hard_32bit_mode_classes; 5450 5451 /* Initialize the array used by REGNO_REG_CLASS. */ 5452 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 5453 { 5454 if (i < 16 && TARGET_V8PLUS) 5455 sparc_regno_reg_class[i] = I64_REGS; 5456 else if (i < 32 || i == FRAME_POINTER_REGNUM) 5457 sparc_regno_reg_class[i] = GENERAL_REGS; 5458 else if (i < 64) 5459 sparc_regno_reg_class[i] = FP_REGS; 5460 else if (i < 96) 5461 sparc_regno_reg_class[i] = EXTRA_FP_REGS; 5462 else if (i < 100) 5463 sparc_regno_reg_class[i] = FPCC_REGS; 5464 else 5465 sparc_regno_reg_class[i] = NO_REGS; 5466 } 5467 } 5468 5469 /* Return whether REGNO, a global or FP register, must be saved/restored. */ 5470 5471 static inline bool 5472 save_global_or_fp_reg_p (unsigned int regno, 5473 int leaf_function ATTRIBUTE_UNUSED) 5474 { 5475 return !call_used_regs[regno] && df_regs_ever_live_p (regno); 5476 } 5477 5478 /* Return whether the return address register (%i7) is needed. */ 5479 5480 static inline bool 5481 return_addr_reg_needed_p (int leaf_function) 5482 { 5483 /* If it is live, for example because of __builtin_return_address (0). */ 5484 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 5485 return true; 5486 5487 /* Otherwise, it is needed as save register if %o7 is clobbered. */ 5488 if (!leaf_function 5489 /* Loading the GOT register clobbers %o7. */ 5490 || crtl->uses_pic_offset_table 5491 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM)) 5492 return true; 5493 5494 return false; 5495 } 5496 5497 /* Return whether REGNO, a local or in register, must be saved/restored. */ 5498 5499 static bool 5500 save_local_or_in_reg_p (unsigned int regno, int leaf_function) 5501 { 5502 /* General case: call-saved registers live at some point. */ 5503 if (!call_used_regs[regno] && df_regs_ever_live_p (regno)) 5504 return true; 5505 5506 /* Frame pointer register (%fp) if needed. */ 5507 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 5508 return true; 5509 5510 /* Return address register (%i7) if needed. */ 5511 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function)) 5512 return true; 5513 5514 /* GOT register (%l7) if needed. */ 5515 if (got_register_rtx && regno == REGNO (got_register_rtx)) 5516 return true; 5517 5518 /* If the function accesses prior frames, the frame pointer and the return 5519 address of the previous frame must be saved on the stack. */ 5520 if (crtl->accesses_prior_frames 5521 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM)) 5522 return true; 5523 5524 return false; 5525 } 5526 5527 /* Compute the frame size required by the function. This function is called 5528 during the reload pass and also by sparc_expand_prologue. */ 5529 5530 static HOST_WIDE_INT 5531 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function) 5532 { 5533 HOST_WIDE_INT frame_size, apparent_frame_size; 5534 int args_size, n_global_fp_regs = 0; 5535 bool save_local_in_regs_p = false; 5536 unsigned int i; 5537 5538 /* If the function allocates dynamic stack space, the dynamic offset is 5539 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */ 5540 if (leaf_function && !cfun->calls_alloca) 5541 args_size = 0; 5542 else 5543 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl); 5544 5545 /* Calculate space needed for global registers. */ 5546 if (TARGET_ARCH64) 5547 { 5548 for (i = 0; i < 8; i++) 5549 if (save_global_or_fp_reg_p (i, 0)) 5550 n_global_fp_regs += 2; 5551 } 5552 else 5553 { 5554 for (i = 0; i < 8; i += 2) 5555 if (save_global_or_fp_reg_p (i, 0) 5556 || save_global_or_fp_reg_p (i + 1, 0)) 5557 n_global_fp_regs += 2; 5558 } 5559 5560 /* In the flat window model, find out which local and in registers need to 5561 be saved. We don't reserve space in the current frame for them as they 5562 will be spilled into the register window save area of the caller's frame. 5563 However, as soon as we use this register window save area, we must create 5564 that of the current frame to make it the live one. */ 5565 if (TARGET_FLAT) 5566 for (i = 16; i < 32; i++) 5567 if (save_local_or_in_reg_p (i, leaf_function)) 5568 { 5569 save_local_in_regs_p = true; 5570 break; 5571 } 5572 5573 /* Calculate space needed for FP registers. */ 5574 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) 5575 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 5576 n_global_fp_regs += 2; 5577 5578 if (size == 0 5579 && n_global_fp_regs == 0 5580 && args_size == 0 5581 && !save_local_in_regs_p) 5582 frame_size = apparent_frame_size = 0; 5583 else 5584 { 5585 /* Start from the apparent frame size. */ 5586 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4; 5587 5588 /* We need to add the size of the outgoing argument area. */ 5589 frame_size = apparent_frame_size + ROUND_UP (args_size, 8); 5590 5591 /* And that of the register window save area. */ 5592 frame_size += FIRST_PARM_OFFSET (cfun->decl); 5593 5594 /* Finally, bump to the appropriate alignment. */ 5595 frame_size = SPARC_STACK_ALIGN (frame_size); 5596 } 5597 5598 /* Set up values for use in prologue and epilogue. */ 5599 sparc_frame_size = frame_size; 5600 sparc_apparent_frame_size = apparent_frame_size; 5601 sparc_n_global_fp_regs = n_global_fp_regs; 5602 sparc_save_local_in_regs_p = save_local_in_regs_p; 5603 5604 return frame_size; 5605 } 5606 5607 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ 5608 5609 int 5610 sparc_initial_elimination_offset (int to) 5611 { 5612 int offset; 5613 5614 if (to == STACK_POINTER_REGNUM) 5615 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf); 5616 else 5617 offset = 0; 5618 5619 offset += SPARC_STACK_BIAS; 5620 return offset; 5621 } 5622 5623 /* Output any necessary .register pseudo-ops. */ 5624 5625 void 5626 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) 5627 { 5628 int i; 5629 5630 if (TARGET_ARCH32) 5631 return; 5632 5633 /* Check if %g[2367] were used without 5634 .register being printed for them already. */ 5635 for (i = 2; i < 8; i++) 5636 { 5637 if (df_regs_ever_live_p (i) 5638 && ! sparc_hard_reg_printed [i]) 5639 { 5640 sparc_hard_reg_printed [i] = 1; 5641 /* %g7 is used as TLS base register, use #ignore 5642 for it instead of #scratch. */ 5643 fprintf (file, "\t.register\t%%g%d, #%s\n", i, 5644 i == 7 ? "ignore" : "scratch"); 5645 } 5646 if (i == 3) i = 5; 5647 } 5648 } 5649 5650 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 5651 5652 #if PROBE_INTERVAL > 4096 5653 #error Cannot use indexed addressing mode for stack probing 5654 #endif 5655 5656 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 5657 inclusive. These are offsets from the current stack pointer. 5658 5659 Note that we don't use the REG+REG addressing mode for the probes because 5660 of the stack bias in 64-bit mode. And it doesn't really buy us anything 5661 so the advantages of having a single code win here. */ 5662 5663 static void 5664 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) 5665 { 5666 rtx g1 = gen_rtx_REG (Pmode, 1); 5667 5668 /* See if we have a constant small number of probes to generate. If so, 5669 that's the easy case. */ 5670 if (size <= PROBE_INTERVAL) 5671 { 5672 emit_move_insn (g1, GEN_INT (first)); 5673 emit_insn (gen_rtx_SET (g1, 5674 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5675 emit_stack_probe (plus_constant (Pmode, g1, -size)); 5676 } 5677 5678 /* The run-time loop is made up of 9 insns in the generic case while the 5679 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ 5680 else if (size <= 4 * PROBE_INTERVAL) 5681 { 5682 HOST_WIDE_INT i; 5683 5684 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); 5685 emit_insn (gen_rtx_SET (g1, 5686 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5687 emit_stack_probe (g1); 5688 5689 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 5690 it exceeds SIZE. If only two probes are needed, this will not 5691 generate any code. Then probe at FIRST + SIZE. */ 5692 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 5693 { 5694 emit_insn (gen_rtx_SET (g1, 5695 plus_constant (Pmode, g1, -PROBE_INTERVAL))); 5696 emit_stack_probe (g1); 5697 } 5698 5699 emit_stack_probe (plus_constant (Pmode, g1, 5700 (i - PROBE_INTERVAL) - size)); 5701 } 5702 5703 /* Otherwise, do the same as above, but in a loop. Note that we must be 5704 extra careful with variables wrapping around because we might be at 5705 the very top (or the very bottom) of the address space and we have 5706 to be able to handle this case properly; in particular, we use an 5707 equality test for the loop condition. */ 5708 else 5709 { 5710 HOST_WIDE_INT rounded_size; 5711 rtx g4 = gen_rtx_REG (Pmode, 4); 5712 5713 emit_move_insn (g1, GEN_INT (first)); 5714 5715 5716 /* Step 1: round SIZE to the previous multiple of the interval. */ 5717 5718 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); 5719 emit_move_insn (g4, GEN_INT (rounded_size)); 5720 5721 5722 /* Step 2: compute initial and final value of the loop counter. */ 5723 5724 /* TEST_ADDR = SP + FIRST. */ 5725 emit_insn (gen_rtx_SET (g1, 5726 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5727 5728 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 5729 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4))); 5730 5731 5732 /* Step 3: the loop 5733 5734 while (TEST_ADDR != LAST_ADDR) 5735 { 5736 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 5737 probe at TEST_ADDR 5738 } 5739 5740 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 5741 until it is equal to ROUNDED_SIZE. */ 5742 5743 if (TARGET_ARCH64) 5744 emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); 5745 else 5746 emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); 5747 5748 5749 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 5750 that SIZE is equal to ROUNDED_SIZE. */ 5751 5752 if (size != rounded_size) 5753 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size)); 5754 } 5755 5756 /* Make sure nothing is scheduled before we are done. */ 5757 emit_insn (gen_blockage ()); 5758 } 5759 5760 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 5761 absolute addresses. */ 5762 5763 const char * 5764 output_probe_stack_range (rtx reg1, rtx reg2) 5765 { 5766 static int labelno = 0; 5767 char loop_lab[32]; 5768 rtx xops[2]; 5769 5770 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 5771 5772 /* Loop. */ 5773 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 5774 5775 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 5776 xops[0] = reg1; 5777 xops[1] = GEN_INT (-PROBE_INTERVAL); 5778 output_asm_insn ("add\t%0, %1, %0", xops); 5779 5780 /* Test if TEST_ADDR == LAST_ADDR. */ 5781 xops[1] = reg2; 5782 output_asm_insn ("cmp\t%0, %1", xops); 5783 5784 /* Probe at TEST_ADDR and branch. */ 5785 if (TARGET_ARCH64) 5786 fputs ("\tbne,pt\t%xcc,", asm_out_file); 5787 else 5788 fputs ("\tbne\t", asm_out_file); 5789 assemble_name_raw (asm_out_file, loop_lab); 5790 fputc ('\n', asm_out_file); 5791 xops[1] = GEN_INT (SPARC_STACK_BIAS); 5792 output_asm_insn (" st\t%%g0, [%0+%1]", xops); 5793 5794 return ""; 5795 } 5796 5797 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as 5798 needed. LOW is supposed to be double-word aligned for 32-bit registers. 5799 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE 5800 is the action to be performed if SAVE_P returns true and ACTION_FALSE 5801 the action to be performed if it returns false. Return the new offset. */ 5802 5803 typedef bool (*sorr_pred_t) (unsigned int, int); 5804 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t; 5805 5806 static int 5807 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base, 5808 int offset, int leaf_function, sorr_pred_t save_p, 5809 sorr_act_t action_true, sorr_act_t action_false) 5810 { 5811 unsigned int i; 5812 rtx mem; 5813 rtx_insn *insn; 5814 5815 if (TARGET_ARCH64 && high <= 32) 5816 { 5817 int fp_offset = -1; 5818 5819 for (i = low; i < high; i++) 5820 { 5821 if (save_p (i, leaf_function)) 5822 { 5823 mem = gen_frame_mem (DImode, plus_constant (Pmode, 5824 base, offset)); 5825 if (action_true == SORR_SAVE) 5826 { 5827 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); 5828 RTX_FRAME_RELATED_P (insn) = 1; 5829 } 5830 else /* action_true == SORR_RESTORE */ 5831 { 5832 /* The frame pointer must be restored last since its old 5833 value may be used as base address for the frame. This 5834 is problematic in 64-bit mode only because of the lack 5835 of double-word load instruction. */ 5836 if (i == HARD_FRAME_POINTER_REGNUM) 5837 fp_offset = offset; 5838 else 5839 emit_move_insn (gen_rtx_REG (DImode, i), mem); 5840 } 5841 offset += 8; 5842 } 5843 else if (action_false == SORR_ADVANCE) 5844 offset += 8; 5845 } 5846 5847 if (fp_offset >= 0) 5848 { 5849 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset)); 5850 emit_move_insn (hard_frame_pointer_rtx, mem); 5851 } 5852 } 5853 else 5854 { 5855 for (i = low; i < high; i += 2) 5856 { 5857 bool reg0 = save_p (i, leaf_function); 5858 bool reg1 = save_p (i + 1, leaf_function); 5859 machine_mode mode; 5860 int regno; 5861 5862 if (reg0 && reg1) 5863 { 5864 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode; 5865 regno = i; 5866 } 5867 else if (reg0) 5868 { 5869 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5870 regno = i; 5871 } 5872 else if (reg1) 5873 { 5874 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5875 regno = i + 1; 5876 offset += 4; 5877 } 5878 else 5879 { 5880 if (action_false == SORR_ADVANCE) 5881 offset += 8; 5882 continue; 5883 } 5884 5885 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset)); 5886 if (action_true == SORR_SAVE) 5887 { 5888 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); 5889 RTX_FRAME_RELATED_P (insn) = 1; 5890 if (mode == DImode) 5891 { 5892 rtx set1, set2; 5893 mem = gen_frame_mem (SImode, plus_constant (Pmode, base, 5894 offset)); 5895 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno)); 5896 RTX_FRAME_RELATED_P (set1) = 1; 5897 mem 5898 = gen_frame_mem (SImode, plus_constant (Pmode, base, 5899 offset + 4)); 5900 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1)); 5901 RTX_FRAME_RELATED_P (set2) = 1; 5902 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5903 gen_rtx_PARALLEL (VOIDmode, 5904 gen_rtvec (2, set1, set2))); 5905 } 5906 } 5907 else /* action_true == SORR_RESTORE */ 5908 emit_move_insn (gen_rtx_REG (mode, regno), mem); 5909 5910 /* Bump and round down to double word 5911 in case we already bumped by 4. */ 5912 offset = ROUND_DOWN (offset + 8, 8); 5913 } 5914 } 5915 5916 return offset; 5917 } 5918 5919 /* Emit code to adjust BASE to OFFSET. Return the new base. */ 5920 5921 static rtx 5922 emit_adjust_base_to_offset (rtx base, int offset) 5923 { 5924 /* ??? This might be optimized a little as %g1 might already have a 5925 value close enough that a single add insn will do. */ 5926 /* ??? Although, all of this is probably only a temporary fix because 5927 if %g1 can hold a function result, then sparc_expand_epilogue will 5928 lose (the result will be clobbered). */ 5929 rtx new_base = gen_rtx_REG (Pmode, 1); 5930 emit_move_insn (new_base, GEN_INT (offset)); 5931 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base))); 5932 return new_base; 5933 } 5934 5935 /* Emit code to save/restore call-saved global and FP registers. */ 5936 5937 static void 5938 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action) 5939 { 5940 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095) 5941 { 5942 base = emit_adjust_base_to_offset (base, offset); 5943 offset = 0; 5944 } 5945 5946 offset 5947 = emit_save_or_restore_regs (0, 8, base, offset, 0, 5948 save_global_or_fp_reg_p, action, SORR_NONE); 5949 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0, 5950 save_global_or_fp_reg_p, action, SORR_NONE); 5951 } 5952 5953 /* Emit code to save/restore call-saved local and in registers. */ 5954 5955 static void 5956 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action) 5957 { 5958 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095) 5959 { 5960 base = emit_adjust_base_to_offset (base, offset); 5961 offset = 0; 5962 } 5963 5964 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p, 5965 save_local_or_in_reg_p, action, SORR_ADVANCE); 5966 } 5967 5968 /* Emit a window_save insn. */ 5969 5970 static rtx_insn * 5971 emit_window_save (rtx increment) 5972 { 5973 rtx_insn *insn = emit_insn (gen_window_save (increment)); 5974 RTX_FRAME_RELATED_P (insn) = 1; 5975 5976 /* The incoming return address (%o7) is saved in %i7. */ 5977 add_reg_note (insn, REG_CFA_REGISTER, 5978 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), 5979 gen_rtx_REG (Pmode, 5980 INCOMING_RETURN_ADDR_REGNUM))); 5981 5982 /* The window save event. */ 5983 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx); 5984 5985 /* The CFA is %fp, the hard frame pointer. */ 5986 add_reg_note (insn, REG_CFA_DEF_CFA, 5987 plus_constant (Pmode, hard_frame_pointer_rtx, 5988 INCOMING_FRAME_SP_OFFSET)); 5989 5990 return insn; 5991 } 5992 5993 /* Generate an increment for the stack pointer. */ 5994 5995 static rtx 5996 gen_stack_pointer_inc (rtx increment) 5997 { 5998 return gen_rtx_SET (stack_pointer_rtx, 5999 gen_rtx_PLUS (Pmode, 6000 stack_pointer_rtx, 6001 increment)); 6002 } 6003 6004 /* Expand the function prologue. The prologue is responsible for reserving 6005 storage for the frame, saving the call-saved registers and loading the 6006 GOT register if needed. */ 6007 6008 void 6009 sparc_expand_prologue (void) 6010 { 6011 HOST_WIDE_INT size; 6012 rtx_insn *insn; 6013 6014 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying 6015 on the final value of the flag means deferring the prologue/epilogue 6016 expansion until just before the second scheduling pass, which is too 6017 late to emit multiple epilogues or return insns. 6018 6019 Of course we are making the assumption that the value of the flag 6020 will not change between now and its final value. Of the three parts 6021 of the formula, only the last one can reasonably vary. Let's take a 6022 closer look, after assuming that the first two ones are set to true 6023 (otherwise the last value is effectively silenced). 6024 6025 If only_leaf_regs_used returns false, the global predicate will also 6026 be false so the actual frame size calculated below will be positive. 6027 As a consequence, the save_register_window insn will be emitted in 6028 the instruction stream; now this insn explicitly references %fp 6029 which is not a leaf register so only_leaf_regs_used will always 6030 return false subsequently. 6031 6032 If only_leaf_regs_used returns true, we hope that the subsequent 6033 optimization passes won't cause non-leaf registers to pop up. For 6034 example, the regrename pass has special provisions to not rename to 6035 non-leaf registers in a leaf function. */ 6036 sparc_leaf_function_p 6037 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used (); 6038 6039 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6040 6041 if (flag_stack_usage_info) 6042 current_function_static_stack_size = size; 6043 6044 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6045 || flag_stack_clash_protection) 6046 { 6047 if (crtl->is_leaf && !cfun->calls_alloca) 6048 { 6049 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6050 sparc_emit_probe_stack_range (get_stack_check_protect (), 6051 size - get_stack_check_protect ()); 6052 } 6053 else if (size > 0) 6054 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6055 } 6056 6057 if (size == 0) 6058 ; /* do nothing. */ 6059 else if (sparc_leaf_function_p) 6060 { 6061 rtx size_int_rtx = GEN_INT (-size); 6062 6063 if (size <= 4096) 6064 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6065 else if (size <= 8192) 6066 { 6067 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6068 RTX_FRAME_RELATED_P (insn) = 1; 6069 6070 /* %sp is still the CFA register. */ 6071 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6072 } 6073 else 6074 { 6075 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6076 emit_move_insn (size_rtx, size_int_rtx); 6077 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6078 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 6079 gen_stack_pointer_inc (size_int_rtx)); 6080 } 6081 6082 RTX_FRAME_RELATED_P (insn) = 1; 6083 } 6084 else 6085 { 6086 rtx size_int_rtx = GEN_INT (-size); 6087 6088 if (size <= 4096) 6089 emit_window_save (size_int_rtx); 6090 else if (size <= 8192) 6091 { 6092 emit_window_save (GEN_INT (-4096)); 6093 6094 /* %sp is not the CFA register anymore. */ 6095 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6096 6097 /* Make sure no %fp-based store is issued until after the frame is 6098 established. The offset between the frame pointer and the stack 6099 pointer is calculated relative to the value of the stack pointer 6100 at the end of the function prologue, and moving instructions that 6101 access the stack via the frame pointer between the instructions 6102 that decrement the stack pointer could result in accessing the 6103 register window save area, which is volatile. */ 6104 emit_insn (gen_frame_blockage ()); 6105 } 6106 else 6107 { 6108 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6109 emit_move_insn (size_rtx, size_int_rtx); 6110 emit_window_save (size_rtx); 6111 } 6112 } 6113 6114 if (sparc_leaf_function_p) 6115 { 6116 sparc_frame_base_reg = stack_pointer_rtx; 6117 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6118 } 6119 else 6120 { 6121 sparc_frame_base_reg = hard_frame_pointer_rtx; 6122 sparc_frame_base_offset = SPARC_STACK_BIAS; 6123 } 6124 6125 if (sparc_n_global_fp_regs > 0) 6126 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6127 sparc_frame_base_offset 6128 - sparc_apparent_frame_size, 6129 SORR_SAVE); 6130 6131 /* Advertise that the data calculated just above are now valid. */ 6132 sparc_prologue_data_valid_p = true; 6133 } 6134 6135 /* Expand the function prologue. The prologue is responsible for reserving 6136 storage for the frame, saving the call-saved registers and loading the 6137 GOT register if needed. */ 6138 6139 void 6140 sparc_flat_expand_prologue (void) 6141 { 6142 HOST_WIDE_INT size; 6143 rtx_insn *insn; 6144 6145 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf; 6146 6147 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6148 6149 if (flag_stack_usage_info) 6150 current_function_static_stack_size = size; 6151 6152 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6153 || flag_stack_clash_protection) 6154 { 6155 if (crtl->is_leaf && !cfun->calls_alloca) 6156 { 6157 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6158 sparc_emit_probe_stack_range (get_stack_check_protect (), 6159 size - get_stack_check_protect ()); 6160 } 6161 else if (size > 0) 6162 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6163 } 6164 6165 if (sparc_save_local_in_regs_p) 6166 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, 6167 SORR_SAVE); 6168 6169 if (size == 0) 6170 ; /* do nothing. */ 6171 else 6172 { 6173 rtx size_int_rtx, size_rtx; 6174 6175 size_rtx = size_int_rtx = GEN_INT (-size); 6176 6177 /* We establish the frame (i.e. decrement the stack pointer) first, even 6178 if we use a frame pointer, because we cannot clobber any call-saved 6179 registers, including the frame pointer, if we haven't created a new 6180 register save area, for the sake of compatibility with the ABI. */ 6181 if (size <= 4096) 6182 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6183 else if (size <= 8192 && !frame_pointer_needed) 6184 { 6185 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6186 RTX_FRAME_RELATED_P (insn) = 1; 6187 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6188 } 6189 else 6190 { 6191 size_rtx = gen_rtx_REG (Pmode, 1); 6192 emit_move_insn (size_rtx, size_int_rtx); 6193 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6194 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6195 gen_stack_pointer_inc (size_int_rtx)); 6196 } 6197 RTX_FRAME_RELATED_P (insn) = 1; 6198 6199 /* Ensure nothing is scheduled until after the frame is established. */ 6200 emit_insn (gen_blockage ()); 6201 6202 if (frame_pointer_needed) 6203 { 6204 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx, 6205 gen_rtx_MINUS (Pmode, 6206 stack_pointer_rtx, 6207 size_rtx))); 6208 RTX_FRAME_RELATED_P (insn) = 1; 6209 6210 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6211 gen_rtx_SET (hard_frame_pointer_rtx, 6212 plus_constant (Pmode, stack_pointer_rtx, 6213 size))); 6214 } 6215 6216 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6217 { 6218 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM); 6219 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 6220 6221 insn = emit_move_insn (i7, o7); 6222 RTX_FRAME_RELATED_P (insn) = 1; 6223 6224 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7)); 6225 6226 /* Prevent this instruction from ever being considered dead, 6227 even if this function has no epilogue. */ 6228 emit_use (i7); 6229 } 6230 } 6231 6232 if (frame_pointer_needed) 6233 { 6234 sparc_frame_base_reg = hard_frame_pointer_rtx; 6235 sparc_frame_base_offset = SPARC_STACK_BIAS; 6236 } 6237 else 6238 { 6239 sparc_frame_base_reg = stack_pointer_rtx; 6240 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6241 } 6242 6243 if (sparc_n_global_fp_regs > 0) 6244 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6245 sparc_frame_base_offset 6246 - sparc_apparent_frame_size, 6247 SORR_SAVE); 6248 6249 /* Advertise that the data calculated just above are now valid. */ 6250 sparc_prologue_data_valid_p = true; 6251 } 6252 6253 /* This function generates the assembly code for function entry, which boils 6254 down to emitting the necessary .register directives. */ 6255 6256 static void 6257 sparc_asm_function_prologue (FILE *file) 6258 { 6259 /* Check that the assumption we made in sparc_expand_prologue is valid. */ 6260 if (!TARGET_FLAT) 6261 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs); 6262 6263 sparc_output_scratch_registers (file); 6264 } 6265 6266 /* Expand the function epilogue, either normal or part of a sibcall. 6267 We emit all the instructions except the return or the call. */ 6268 6269 void 6270 sparc_expand_epilogue (bool for_eh) 6271 { 6272 HOST_WIDE_INT size = sparc_frame_size; 6273 6274 if (cfun->calls_alloca) 6275 emit_insn (gen_frame_blockage ()); 6276 6277 if (sparc_n_global_fp_regs > 0) 6278 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6279 sparc_frame_base_offset 6280 - sparc_apparent_frame_size, 6281 SORR_RESTORE); 6282 6283 if (size == 0 || for_eh) 6284 ; /* do nothing. */ 6285 else if (sparc_leaf_function_p) 6286 { 6287 if (size <= 4096) 6288 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6289 else if (size <= 8192) 6290 { 6291 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6292 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6293 } 6294 else 6295 { 6296 rtx reg = gen_rtx_REG (Pmode, 1); 6297 emit_move_insn (reg, GEN_INT (size)); 6298 emit_insn (gen_stack_pointer_inc (reg)); 6299 } 6300 } 6301 } 6302 6303 /* Expand the function epilogue, either normal or part of a sibcall. 6304 We emit all the instructions except the return or the call. */ 6305 6306 void 6307 sparc_flat_expand_epilogue (bool for_eh) 6308 { 6309 HOST_WIDE_INT size = sparc_frame_size; 6310 6311 if (sparc_n_global_fp_regs > 0) 6312 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6313 sparc_frame_base_offset 6314 - sparc_apparent_frame_size, 6315 SORR_RESTORE); 6316 6317 /* If we have a frame pointer, we'll need both to restore it before the 6318 frame is destroyed and use its current value in destroying the frame. 6319 Since we don't have an atomic way to do that in the flat window model, 6320 we save the current value into a temporary register (%g1). */ 6321 if (frame_pointer_needed && !for_eh) 6322 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx); 6323 6324 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6325 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM), 6326 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)); 6327 6328 if (sparc_save_local_in_regs_p) 6329 emit_save_or_restore_local_in_regs (sparc_frame_base_reg, 6330 sparc_frame_base_offset, 6331 SORR_RESTORE); 6332 6333 if (size == 0 || for_eh) 6334 ; /* do nothing. */ 6335 else if (frame_pointer_needed) 6336 { 6337 /* Make sure the frame is destroyed after everything else is done. */ 6338 emit_insn (gen_blockage ()); 6339 6340 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1)); 6341 } 6342 else 6343 { 6344 /* Likewise. */ 6345 emit_insn (gen_blockage ()); 6346 6347 if (size <= 4096) 6348 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6349 else if (size <= 8192) 6350 { 6351 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6352 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6353 } 6354 else 6355 { 6356 rtx reg = gen_rtx_REG (Pmode, 1); 6357 emit_move_insn (reg, GEN_INT (size)); 6358 emit_insn (gen_stack_pointer_inc (reg)); 6359 } 6360 } 6361 } 6362 6363 /* Return true if it is appropriate to emit `return' instructions in the 6364 body of a function. */ 6365 6366 bool 6367 sparc_can_use_return_insn_p (void) 6368 { 6369 return sparc_prologue_data_valid_p 6370 && sparc_n_global_fp_regs == 0 6371 && TARGET_FLAT 6372 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p) 6373 : (sparc_frame_size == 0 || !sparc_leaf_function_p); 6374 } 6375 6376 /* This function generates the assembly code for function exit. */ 6377 6378 static void 6379 sparc_asm_function_epilogue (FILE *file) 6380 { 6381 /* If the last two instructions of a function are "call foo; dslot;" 6382 the return address might point to the first instruction in the next 6383 function and we have to output a dummy nop for the sake of sane 6384 backtraces in such cases. This is pointless for sibling calls since 6385 the return address is explicitly adjusted. */ 6386 6387 rtx_insn *insn = get_last_insn (); 6388 6389 rtx last_real_insn = prev_real_insn (insn); 6390 if (last_real_insn 6391 && NONJUMP_INSN_P (last_real_insn) 6392 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) 6393 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); 6394 6395 if (last_real_insn 6396 && CALL_P (last_real_insn) 6397 && !SIBLING_CALL_P (last_real_insn)) 6398 fputs("\tnop\n", file); 6399 6400 sparc_output_deferred_case_vectors (); 6401 } 6402 6403 /* Output a 'restore' instruction. */ 6404 6405 static void 6406 output_restore (rtx pat) 6407 { 6408 rtx operands[3]; 6409 6410 if (! pat) 6411 { 6412 fputs ("\t restore\n", asm_out_file); 6413 return; 6414 } 6415 6416 gcc_assert (GET_CODE (pat) == SET); 6417 6418 operands[0] = SET_DEST (pat); 6419 pat = SET_SRC (pat); 6420 6421 switch (GET_CODE (pat)) 6422 { 6423 case PLUS: 6424 operands[1] = XEXP (pat, 0); 6425 operands[2] = XEXP (pat, 1); 6426 output_asm_insn (" restore %r1, %2, %Y0", operands); 6427 break; 6428 case LO_SUM: 6429 operands[1] = XEXP (pat, 0); 6430 operands[2] = XEXP (pat, 1); 6431 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); 6432 break; 6433 case ASHIFT: 6434 operands[1] = XEXP (pat, 0); 6435 gcc_assert (XEXP (pat, 1) == const1_rtx); 6436 output_asm_insn (" restore %r1, %r1, %Y0", operands); 6437 break; 6438 default: 6439 operands[1] = pat; 6440 output_asm_insn (" restore %%g0, %1, %Y0", operands); 6441 break; 6442 } 6443 } 6444 6445 /* Output a return. */ 6446 6447 const char * 6448 output_return (rtx_insn *insn) 6449 { 6450 if (crtl->calls_eh_return) 6451 { 6452 /* If the function uses __builtin_eh_return, the eh_return 6453 machinery occupies the delay slot. */ 6454 gcc_assert (!final_sequence); 6455 6456 if (flag_delayed_branch) 6457 { 6458 if (!TARGET_FLAT && TARGET_V9) 6459 fputs ("\treturn\t%i7+8\n", asm_out_file); 6460 else 6461 { 6462 if (!TARGET_FLAT) 6463 fputs ("\trestore\n", asm_out_file); 6464 6465 fputs ("\tjmp\t%o7+8\n", asm_out_file); 6466 } 6467 6468 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); 6469 } 6470 else 6471 { 6472 if (!TARGET_FLAT) 6473 fputs ("\trestore\n", asm_out_file); 6474 6475 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file); 6476 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); 6477 } 6478 } 6479 else if (sparc_leaf_function_p || TARGET_FLAT) 6480 { 6481 /* This is a leaf or flat function so we don't have to bother restoring 6482 the register window, which frees us from dealing with the convoluted 6483 semantics of restore/return. We simply output the jump to the 6484 return address and the insn in the delay slot (if any). */ 6485 6486 return "jmp\t%%o7+%)%#"; 6487 } 6488 else 6489 { 6490 /* This is a regular function so we have to restore the register window. 6491 We may have a pending insn for the delay slot, which will be either 6492 combined with the 'restore' instruction or put in the delay slot of 6493 the 'return' instruction. */ 6494 6495 if (final_sequence) 6496 { 6497 rtx_insn *delay; 6498 rtx pat; 6499 6500 delay = NEXT_INSN (insn); 6501 gcc_assert (delay); 6502 6503 pat = PATTERN (delay); 6504 6505 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) 6506 { 6507 epilogue_renumber (&pat, 0); 6508 return "return\t%%i7+%)%#"; 6509 } 6510 else 6511 { 6512 output_asm_insn ("jmp\t%%i7+%)", NULL); 6513 6514 /* We're going to output the insn in the delay slot manually. 6515 Make sure to output its source location first. */ 6516 PATTERN (delay) = gen_blockage (); 6517 INSN_CODE (delay) = -1; 6518 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6519 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6520 6521 output_restore (pat); 6522 } 6523 } 6524 else 6525 { 6526 /* The delay slot is empty. */ 6527 if (TARGET_V9) 6528 return "return\t%%i7+%)\n\t nop"; 6529 else if (flag_delayed_branch) 6530 return "jmp\t%%i7+%)\n\t restore"; 6531 else 6532 return "restore\n\tjmp\t%%o7+%)\n\t nop"; 6533 } 6534 } 6535 6536 return ""; 6537 } 6538 6539 /* Output a sibling call. */ 6540 6541 const char * 6542 output_sibcall (rtx_insn *insn, rtx call_operand) 6543 { 6544 rtx operands[1]; 6545 6546 gcc_assert (flag_delayed_branch); 6547 6548 operands[0] = call_operand; 6549 6550 if (sparc_leaf_function_p || TARGET_FLAT) 6551 { 6552 /* This is a leaf or flat function so we don't have to bother restoring 6553 the register window. We simply output the jump to the function and 6554 the insn in the delay slot (if any). */ 6555 6556 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); 6557 6558 if (final_sequence) 6559 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", 6560 operands); 6561 else 6562 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize 6563 it into branch if possible. */ 6564 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", 6565 operands); 6566 } 6567 else 6568 { 6569 /* This is a regular function so we have to restore the register window. 6570 We may have a pending insn for the delay slot, which will be combined 6571 with the 'restore' instruction. */ 6572 6573 output_asm_insn ("call\t%a0, 0", operands); 6574 6575 if (final_sequence) 6576 { 6577 rtx_insn *delay; 6578 rtx pat; 6579 6580 delay = NEXT_INSN (insn); 6581 gcc_assert (delay); 6582 6583 pat = PATTERN (delay); 6584 6585 /* We're going to output the insn in the delay slot manually. 6586 Make sure to output its source location first. */ 6587 PATTERN (delay) = gen_blockage (); 6588 INSN_CODE (delay) = -1; 6589 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6590 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6591 6592 output_restore (pat); 6593 } 6594 else 6595 output_restore (NULL_RTX); 6596 } 6597 6598 return ""; 6599 } 6600 6601 /* Functions for handling argument passing. 6602 6603 For 32-bit, the first 6 args are normally in registers and the rest are 6604 pushed. Any arg that starts within the first 6 words is at least 6605 partially passed in a register unless its data type forbids. 6606 6607 For 64-bit, the argument registers are laid out as an array of 16 elements 6608 and arguments are added sequentially. The first 6 int args and up to the 6609 first 16 fp args (depending on size) are passed in regs. 6610 6611 Slot Stack Integral Float Float in structure Double Long Double 6612 ---- ----- -------- ----- ------------------ ------ ----------- 6613 15 [SP+248] %f31 %f30,%f31 %d30 6614 14 [SP+240] %f29 %f28,%f29 %d28 %q28 6615 13 [SP+232] %f27 %f26,%f27 %d26 6616 12 [SP+224] %f25 %f24,%f25 %d24 %q24 6617 11 [SP+216] %f23 %f22,%f23 %d22 6618 10 [SP+208] %f21 %f20,%f21 %d20 %q20 6619 9 [SP+200] %f19 %f18,%f19 %d18 6620 8 [SP+192] %f17 %f16,%f17 %d16 %q16 6621 7 [SP+184] %f15 %f14,%f15 %d14 6622 6 [SP+176] %f13 %f12,%f13 %d12 %q12 6623 5 [SP+168] %o5 %f11 %f10,%f11 %d10 6624 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 6625 3 [SP+152] %o3 %f7 %f6,%f7 %d6 6626 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 6627 1 [SP+136] %o1 %f3 %f2,%f3 %d2 6628 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 6629 6630 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. 6631 6632 Integral arguments are always passed as 64-bit quantities appropriately 6633 extended. 6634 6635 Passing of floating point values is handled as follows. 6636 If a prototype is in scope: 6637 If the value is in a named argument (i.e. not a stdarg function or a 6638 value not part of the `...') then the value is passed in the appropriate 6639 fp reg. 6640 If the value is part of the `...' and is passed in one of the first 6 6641 slots then the value is passed in the appropriate int reg. 6642 If the value is part of the `...' and is not passed in one of the first 6 6643 slots then the value is passed in memory. 6644 If a prototype is not in scope: 6645 If the value is one of the first 6 arguments the value is passed in the 6646 appropriate integer reg and the appropriate fp reg. 6647 If the value is not one of the first 6 arguments the value is passed in 6648 the appropriate fp reg and in memory. 6649 6650 6651 Summary of the calling conventions implemented by GCC on the SPARC: 6652 6653 32-bit ABI: 6654 size argument return value 6655 6656 small integer <4 int. reg. int. reg. 6657 word 4 int. reg. int. reg. 6658 double word 8 int. reg. int. reg. 6659 6660 _Complex small integer <8 int. reg. int. reg. 6661 _Complex word 8 int. reg. int. reg. 6662 _Complex double word 16 memory int. reg. 6663 6664 vector integer <=8 int. reg. FP reg. 6665 vector integer >8 memory memory 6666 6667 float 4 int. reg. FP reg. 6668 double 8 int. reg. FP reg. 6669 long double 16 memory memory 6670 6671 _Complex float 8 memory FP reg. 6672 _Complex double 16 memory FP reg. 6673 _Complex long double 32 memory FP reg. 6674 6675 vector float any memory memory 6676 6677 aggregate any memory memory 6678 6679 6680 6681 64-bit ABI: 6682 size argument return value 6683 6684 small integer <8 int. reg. int. reg. 6685 word 8 int. reg. int. reg. 6686 double word 16 int. reg. int. reg. 6687 6688 _Complex small integer <16 int. reg. int. reg. 6689 _Complex word 16 int. reg. int. reg. 6690 _Complex double word 32 memory int. reg. 6691 6692 vector integer <=16 FP reg. FP reg. 6693 vector integer 16<s<=32 memory FP reg. 6694 vector integer >32 memory memory 6695 6696 float 4 FP reg. FP reg. 6697 double 8 FP reg. FP reg. 6698 long double 16 FP reg. FP reg. 6699 6700 _Complex float 8 FP reg. FP reg. 6701 _Complex double 16 FP reg. FP reg. 6702 _Complex long double 32 memory FP reg. 6703 6704 vector float <=16 FP reg. FP reg. 6705 vector float 16<s<=32 memory FP reg. 6706 vector float >32 memory memory 6707 6708 aggregate <=16 reg. reg. 6709 aggregate 16<s<=32 memory reg. 6710 aggregate >32 memory memory 6711 6712 6713 6714 Note #1: complex floating-point types follow the extended SPARC ABIs as 6715 implemented by the Sun compiler. 6716 6717 Note #2: integer vector types follow the scalar floating-point types 6718 conventions to match what is implemented by the Sun VIS SDK. 6719 6720 Note #3: floating-point vector types follow the aggregate types 6721 conventions. */ 6722 6723 6724 /* Maximum number of int regs for args. */ 6725 #define SPARC_INT_ARG_MAX 6 6726 /* Maximum number of fp regs for args. */ 6727 #define SPARC_FP_ARG_MAX 16 6728 /* Number of words (partially) occupied for a given size in units. */ 6729 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD) 6730 6731 /* Handle the INIT_CUMULATIVE_ARGS macro. 6732 Initialize a variable CUM of type CUMULATIVE_ARGS 6733 for a call to a function whose data type is FNTYPE. 6734 For a library call, FNTYPE is 0. */ 6735 6736 void 6737 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree) 6738 { 6739 cum->words = 0; 6740 cum->prototype_p = fntype && prototype_p (fntype); 6741 cum->libcall_p = !fntype; 6742 } 6743 6744 /* Handle promotion of pointer and integer arguments. */ 6745 6746 static machine_mode 6747 sparc_promote_function_mode (const_tree type, machine_mode mode, 6748 int *punsignedp, const_tree, int) 6749 { 6750 if (type && POINTER_TYPE_P (type)) 6751 { 6752 *punsignedp = POINTERS_EXTEND_UNSIGNED; 6753 return Pmode; 6754 } 6755 6756 /* Integral arguments are passed as full words, as per the ABI. */ 6757 if (GET_MODE_CLASS (mode) == MODE_INT 6758 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 6759 return word_mode; 6760 6761 return mode; 6762 } 6763 6764 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ 6765 6766 static bool 6767 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 6768 { 6769 return TARGET_ARCH64 ? true : false; 6770 } 6771 6772 /* Handle the TARGET_PASS_BY_REFERENCE target hook. 6773 Specify whether to pass the argument by reference. */ 6774 6775 static bool 6776 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, 6777 machine_mode mode, const_tree type, 6778 bool named ATTRIBUTE_UNUSED) 6779 { 6780 if (TARGET_ARCH32) 6781 /* Original SPARC 32-bit ABI says that structures and unions, 6782 and quad-precision floats are passed by reference. 6783 All other base types are passed in registers. 6784 6785 Extended ABI (as implemented by the Sun compiler) says that all 6786 complex floats are passed by reference. Pass complex integers 6787 in registers up to 8 bytes. More generally, enforce the 2-word 6788 cap for passing arguments in registers. 6789 6790 Vector ABI (as implemented by the Sun VIS SDK) says that integer 6791 vectors are passed like floats of the same size, that is in 6792 registers up to 8 bytes. Pass all vector floats by reference 6793 like structure and unions. */ 6794 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 6795 || mode == SCmode 6796 /* Catch CDImode, TFmode, DCmode and TCmode. */ 6797 || GET_MODE_SIZE (mode) > 8 6798 || (type 6799 && VECTOR_TYPE_P (type) 6800 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 6801 else 6802 /* Original SPARC 64-bit ABI says that structures and unions 6803 smaller than 16 bytes are passed in registers, as well as 6804 all other base types. 6805 6806 Extended ABI (as implemented by the Sun compiler) says that 6807 complex floats are passed in registers up to 16 bytes. Pass 6808 all complex integers in registers up to 16 bytes. More generally, 6809 enforce the 2-word cap for passing arguments in registers. 6810 6811 Vector ABI (as implemented by the Sun VIS SDK) says that integer 6812 vectors are passed like floats of the same size, that is in 6813 registers (up to 16 bytes). Pass all vector floats like structure 6814 and unions. */ 6815 return ((type 6816 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type)) 6817 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) 6818 /* Catch CTImode and TCmode. */ 6819 || GET_MODE_SIZE (mode) > 16); 6820 } 6821 6822 /* Traverse the record TYPE recursively and call FUNC on its fields. 6823 NAMED is true if this is for a named parameter. DATA is passed 6824 to FUNC for each field. OFFSET is the starting position and 6825 PACKED is true if we are inside a packed record. */ 6826 6827 template <typename T, void Func (const_tree, int, bool, T*)> 6828 static void 6829 traverse_record_type (const_tree type, bool named, T *data, 6830 int offset = 0, bool packed = false) 6831 { 6832 /* The ABI obviously doesn't specify how packed structures are passed. 6833 These are passed in integer regs if possible, otherwise memory. */ 6834 if (!packed) 6835 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6836 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6837 { 6838 packed = true; 6839 break; 6840 } 6841 6842 /* Walk the real fields, but skip those with no size or a zero size. 6843 ??? Fields with variable offset are handled as having zero offset. */ 6844 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6845 if (TREE_CODE (field) == FIELD_DECL) 6846 { 6847 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field))) 6848 continue; 6849 6850 int bitpos = offset; 6851 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST) 6852 bitpos += int_bit_position (field); 6853 6854 tree field_type = TREE_TYPE (field); 6855 if (TREE_CODE (field_type) == RECORD_TYPE) 6856 traverse_record_type<T, Func> (field_type, named, data, bitpos, 6857 packed); 6858 else 6859 { 6860 const bool fp_type 6861 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type); 6862 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU, 6863 data); 6864 } 6865 } 6866 } 6867 6868 /* Handle recursive register classifying for structure layout. */ 6869 6870 typedef struct 6871 { 6872 bool fp_regs; /* true if field eligible to FP registers. */ 6873 bool fp_regs_in_first_word; /* true if such field in first word. */ 6874 } classify_data_t; 6875 6876 /* A subroutine of function_arg_slotno. Classify the field. */ 6877 6878 inline void 6879 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data) 6880 { 6881 if (fp) 6882 { 6883 data->fp_regs = true; 6884 if (bitpos < BITS_PER_WORD) 6885 data->fp_regs_in_first_word = true; 6886 } 6887 } 6888 6889 /* Compute the slot number to pass an argument in. 6890 Return the slot number or -1 if passing on the stack. 6891 6892 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6893 the preceding args and about the function being called. 6894 MODE is the argument's machine mode. 6895 TYPE is the data type of the argument (as a tree). 6896 This is null for libcalls where that information may 6897 not be available. 6898 NAMED is nonzero if this argument is a named parameter 6899 (otherwise it is an extra parameter matching an ellipsis). 6900 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. 6901 *PREGNO records the register number to use if scalar type. 6902 *PPADDING records the amount of padding needed in words. */ 6903 6904 static int 6905 function_arg_slotno (const struct sparc_args *cum, machine_mode mode, 6906 const_tree type, bool named, bool incoming, 6907 int *pregno, int *ppadding) 6908 { 6909 const int regbase 6910 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 6911 int slotno = cum->words, regno; 6912 enum mode_class mclass = GET_MODE_CLASS (mode); 6913 6914 /* Silence warnings in the callers. */ 6915 *pregno = -1; 6916 *ppadding = -1; 6917 6918 if (type && TREE_ADDRESSABLE (type)) 6919 return -1; 6920 6921 /* In 64-bit mode, objects requiring 16-byte alignment get it. */ 6922 if (TARGET_ARCH64 6923 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 6924 && (slotno & 1) != 0) 6925 { 6926 slotno++; 6927 *ppadding = 1; 6928 } 6929 else 6930 *ppadding = 0; 6931 6932 /* Vector types deserve special treatment because they are polymorphic wrt 6933 their mode, depending upon whether VIS instructions are enabled. */ 6934 if (type && VECTOR_TYPE_P (type)) 6935 { 6936 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 6937 { 6938 /* The SPARC port defines no floating-point vector modes. */ 6939 gcc_assert (mode == BLKmode); 6940 } 6941 else 6942 { 6943 /* Integer vector types should either have a vector 6944 mode or an integral mode, because we are guaranteed 6945 by pass_by_reference that their size is not greater 6946 than 16 bytes and TImode is 16-byte wide. */ 6947 gcc_assert (mode != BLKmode); 6948 6949 /* Integer vectors are handled like floats as per 6950 the Sun VIS SDK. */ 6951 mclass = MODE_FLOAT; 6952 } 6953 } 6954 6955 switch (mclass) 6956 { 6957 case MODE_FLOAT: 6958 case MODE_COMPLEX_FLOAT: 6959 case MODE_VECTOR_INT: 6960 if (TARGET_ARCH64 && TARGET_FPU && named) 6961 { 6962 /* If all arg slots are filled, then must pass on stack. */ 6963 if (slotno >= SPARC_FP_ARG_MAX) 6964 return -1; 6965 6966 regno = SPARC_FP_ARG_FIRST + slotno * 2; 6967 /* Arguments filling only one single FP register are 6968 right-justified in the outer double FP register. */ 6969 if (GET_MODE_SIZE (mode) <= 4) 6970 regno++; 6971 break; 6972 } 6973 /* fallthrough */ 6974 6975 case MODE_INT: 6976 case MODE_COMPLEX_INT: 6977 /* If all arg slots are filled, then must pass on stack. */ 6978 if (slotno >= SPARC_INT_ARG_MAX) 6979 return -1; 6980 6981 regno = regbase + slotno; 6982 break; 6983 6984 case MODE_RANDOM: 6985 /* MODE is VOIDmode when generating the actual call. */ 6986 if (mode == VOIDmode) 6987 return -1; 6988 6989 if (TARGET_64BIT && TARGET_FPU && named 6990 && type 6991 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type))) 6992 { 6993 /* If all arg slots are filled, then must pass on stack. */ 6994 if (slotno >= SPARC_FP_ARG_MAX) 6995 return -1; 6996 6997 if (TREE_CODE (type) == RECORD_TYPE) 6998 { 6999 classify_data_t data = { false, false }; 7000 traverse_record_type<classify_data_t, classify_registers> 7001 (type, named, &data); 7002 7003 if (data.fp_regs) 7004 { 7005 /* If all FP slots are filled except for the last one and 7006 there is no FP field in the first word, then must pass 7007 on stack. */ 7008 if (slotno >= SPARC_FP_ARG_MAX - 1 7009 && !data.fp_regs_in_first_word) 7010 return -1; 7011 } 7012 else 7013 { 7014 /* If all int slots are filled, then must pass on stack. */ 7015 if (slotno >= SPARC_INT_ARG_MAX) 7016 return -1; 7017 } 7018 7019 /* PREGNO isn't set since both int and FP regs can be used. */ 7020 return slotno; 7021 } 7022 7023 regno = SPARC_FP_ARG_FIRST + slotno * 2; 7024 } 7025 else 7026 { 7027 /* If all arg slots are filled, then must pass on stack. */ 7028 if (slotno >= SPARC_INT_ARG_MAX) 7029 return -1; 7030 7031 regno = regbase + slotno; 7032 } 7033 break; 7034 7035 default : 7036 gcc_unreachable (); 7037 } 7038 7039 *pregno = regno; 7040 return slotno; 7041 } 7042 7043 /* Handle recursive register counting/assigning for structure layout. */ 7044 7045 typedef struct 7046 { 7047 int slotno; /* slot number of the argument. */ 7048 int regbase; /* regno of the base register. */ 7049 int intoffset; /* offset of the first pending integer field. */ 7050 int nregs; /* number of words passed in registers. */ 7051 bool stack; /* true if part of the argument is on the stack. */ 7052 rtx ret; /* return expression being built. */ 7053 } assign_data_t; 7054 7055 /* A subroutine of function_arg_record_value. Compute the number of integer 7056 registers to be assigned between PARMS->intoffset and BITPOS. Return 7057 true if at least one integer register is assigned or false otherwise. */ 7058 7059 static bool 7060 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs) 7061 { 7062 if (data->intoffset < 0) 7063 return false; 7064 7065 const int intoffset = data->intoffset; 7066 data->intoffset = -1; 7067 7068 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7069 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); 7070 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD); 7071 int nregs = (endbit - startbit) / BITS_PER_WORD; 7072 7073 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno) 7074 { 7075 nregs = SPARC_INT_ARG_MAX - this_slotno; 7076 7077 /* We need to pass this field (partly) on the stack. */ 7078 data->stack = 1; 7079 } 7080 7081 if (nregs <= 0) 7082 return false; 7083 7084 *pnregs = nregs; 7085 return true; 7086 } 7087 7088 /* A subroutine of function_arg_record_value. Compute the number and the mode 7089 of the FP registers to be assigned for FIELD. Return true if at least one 7090 FP register is assigned or false otherwise. */ 7091 7092 static bool 7093 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data, 7094 int *pnregs, machine_mode *pmode) 7095 { 7096 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7097 machine_mode mode = DECL_MODE (field); 7098 int nregs, nslots; 7099 7100 /* Slots are counted as words while regs are counted as having the size of 7101 the (inner) mode. */ 7102 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode) 7103 { 7104 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7105 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 7106 } 7107 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 7108 { 7109 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7110 nregs = 2; 7111 } 7112 else 7113 nregs = 1; 7114 7115 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode)); 7116 7117 if (nslots > SPARC_FP_ARG_MAX - this_slotno) 7118 { 7119 nslots = SPARC_FP_ARG_MAX - this_slotno; 7120 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode); 7121 7122 /* We need to pass this field (partly) on the stack. */ 7123 data->stack = 1; 7124 7125 if (nregs <= 0) 7126 return false; 7127 } 7128 7129 *pnregs = nregs; 7130 *pmode = mode; 7131 return true; 7132 } 7133 7134 /* A subroutine of function_arg_record_value. Count the number of registers 7135 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */ 7136 7137 inline void 7138 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data) 7139 { 7140 if (fp) 7141 { 7142 int nregs; 7143 machine_mode mode; 7144 7145 if (compute_int_layout (bitpos, data, &nregs)) 7146 data->nregs += nregs; 7147 7148 if (compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7149 data->nregs += nregs; 7150 } 7151 else 7152 { 7153 if (data->intoffset < 0) 7154 data->intoffset = bitpos; 7155 } 7156 } 7157 7158 /* A subroutine of function_arg_record_value. Assign the bits of the 7159 structure between PARMS->intoffset and BITPOS to integer registers. */ 7160 7161 static void 7162 assign_int_registers (int bitpos, assign_data_t *data) 7163 { 7164 int intoffset = data->intoffset; 7165 machine_mode mode; 7166 int nregs; 7167 7168 if (!compute_int_layout (bitpos, data, &nregs)) 7169 return; 7170 7171 /* If this is the trailing part of a word, only load that much into 7172 the register. Otherwise load the whole register. Note that in 7173 the latter case we may pick up unwanted bits. It's not a problem 7174 at the moment but may wish to revisit. */ 7175 if (intoffset % BITS_PER_WORD != 0) 7176 mode = smallest_int_mode_for_size (BITS_PER_WORD 7177 - intoffset % BITS_PER_WORD); 7178 else 7179 mode = word_mode; 7180 7181 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7182 unsigned int regno = data->regbase + this_slotno; 7183 intoffset /= BITS_PER_UNIT; 7184 7185 do 7186 { 7187 rtx reg = gen_rtx_REG (mode, regno); 7188 XVECEXP (data->ret, 0, data->stack + data->nregs) 7189 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); 7190 data->nregs += 1; 7191 mode = word_mode; 7192 regno += 1; 7193 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1; 7194 } 7195 while (--nregs > 0); 7196 } 7197 7198 /* A subroutine of function_arg_record_value. Assign FIELD at position 7199 BITPOS to FP registers. */ 7200 7201 static void 7202 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data) 7203 { 7204 int nregs; 7205 machine_mode mode; 7206 7207 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7208 return; 7209 7210 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7211 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2; 7212 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) 7213 regno++; 7214 int pos = bitpos / BITS_PER_UNIT; 7215 7216 do 7217 { 7218 rtx reg = gen_rtx_REG (mode, regno); 7219 XVECEXP (data->ret, 0, data->stack + data->nregs) 7220 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 7221 data->nregs += 1; 7222 regno += GET_MODE_SIZE (mode) / 4; 7223 pos += GET_MODE_SIZE (mode); 7224 } 7225 while (--nregs > 0); 7226 } 7227 7228 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of 7229 the structure between PARMS->intoffset and BITPOS to registers. */ 7230 7231 inline void 7232 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data) 7233 { 7234 if (fp) 7235 { 7236 assign_int_registers (bitpos, data); 7237 7238 assign_fp_registers (field, bitpos, data); 7239 } 7240 else 7241 { 7242 if (data->intoffset < 0) 7243 data->intoffset = bitpos; 7244 } 7245 } 7246 7247 /* Used by function_arg and function_value to implement the complex 7248 conventions of the 64-bit ABI for passing and returning structures. 7249 Return an expression valid as a return value for the FUNCTION_ARG 7250 and TARGET_FUNCTION_VALUE. 7251 7252 TYPE is the data type of the argument (as a tree). 7253 This is null for libcalls where that information may 7254 not be available. 7255 MODE is the argument's machine mode. 7256 SLOTNO is the index number of the argument's slot in the parameter array. 7257 NAMED is true if this argument is a named parameter 7258 (otherwise it is an extra parameter matching an ellipsis). 7259 REGBASE is the regno of the base register for the parameter array. */ 7260 7261 static rtx 7262 function_arg_record_value (const_tree type, machine_mode mode, 7263 int slotno, bool named, int regbase) 7264 { 7265 const int size = int_size_in_bytes (type); 7266 assign_data_t data; 7267 int nregs; 7268 7269 data.slotno = slotno; 7270 data.regbase = regbase; 7271 7272 /* Count how many registers we need. */ 7273 data.nregs = 0; 7274 data.intoffset = 0; 7275 data.stack = false; 7276 traverse_record_type<assign_data_t, count_registers> (type, named, &data); 7277 7278 /* Take into account pending integer fields. */ 7279 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs)) 7280 data.nregs += nregs; 7281 7282 /* Allocate the vector and handle some annoying special cases. */ 7283 nregs = data.nregs; 7284 7285 if (nregs == 0) 7286 { 7287 /* ??? Empty structure has no value? Duh? */ 7288 if (size <= 0) 7289 { 7290 /* Though there's nothing really to store, return a word register 7291 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL 7292 leads to breakage due to the fact that there are zero bytes to 7293 load. */ 7294 return gen_rtx_REG (mode, regbase); 7295 } 7296 7297 /* ??? C++ has structures with no fields, and yet a size. Give up 7298 for now and pass everything back in integer registers. */ 7299 nregs = CEIL_NWORDS (size); 7300 if (nregs + slotno > SPARC_INT_ARG_MAX) 7301 nregs = SPARC_INT_ARG_MAX - slotno; 7302 } 7303 7304 gcc_assert (nregs > 0); 7305 7306 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs)); 7307 7308 /* If at least one field must be passed on the stack, generate 7309 (parallel [(expr_list (nil) ...) ...]) so that all fields will 7310 also be passed on the stack. We can't do much better because the 7311 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case 7312 of structures for which the fields passed exclusively in registers 7313 are not at the beginning of the structure. */ 7314 if (data.stack) 7315 XVECEXP (data.ret, 0, 0) 7316 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7317 7318 /* Assign the registers. */ 7319 data.nregs = 0; 7320 data.intoffset = 0; 7321 traverse_record_type<assign_data_t, assign_registers> (type, named, &data); 7322 7323 /* Assign pending integer fields. */ 7324 assign_int_registers (size * BITS_PER_UNIT, &data); 7325 7326 gcc_assert (data.nregs == nregs); 7327 7328 return data.ret; 7329 } 7330 7331 /* Used by function_arg and function_value to implement the conventions 7332 of the 64-bit ABI for passing and returning unions. 7333 Return an expression valid as a return value for the FUNCTION_ARG 7334 and TARGET_FUNCTION_VALUE. 7335 7336 SIZE is the size in bytes of the union. 7337 MODE is the argument's machine mode. 7338 SLOTNO is the index number of the argument's slot in the parameter array. 7339 REGNO is the hard register the union will be passed in. */ 7340 7341 static rtx 7342 function_arg_union_value (int size, machine_mode mode, int slotno, int regno) 7343 { 7344 unsigned int nwords; 7345 7346 /* See comment in function_arg_record_value for empty structures. */ 7347 if (size <= 0) 7348 return gen_rtx_REG (mode, regno); 7349 7350 if (slotno == SPARC_INT_ARG_MAX - 1) 7351 nwords = 1; 7352 else 7353 nwords = CEIL_NWORDS (size); 7354 7355 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); 7356 7357 /* Unions are passed left-justified. */ 7358 for (unsigned int i = 0; i < nwords; i++) 7359 XVECEXP (regs, 0, i) 7360 = gen_rtx_EXPR_LIST (VOIDmode, 7361 gen_rtx_REG (word_mode, regno + i), 7362 GEN_INT (UNITS_PER_WORD * i)); 7363 7364 return regs; 7365 } 7366 7367 /* Used by function_arg and function_value to implement the conventions 7368 of the 64-bit ABI for passing and returning BLKmode vectors. 7369 Return an expression valid as a return value for the FUNCTION_ARG 7370 and TARGET_FUNCTION_VALUE. 7371 7372 SIZE is the size in bytes of the vector. 7373 SLOTNO is the index number of the argument's slot in the parameter array. 7374 NAMED is true if this argument is a named parameter 7375 (otherwise it is an extra parameter matching an ellipsis). 7376 REGNO is the hard register the vector will be passed in. */ 7377 7378 static rtx 7379 function_arg_vector_value (int size, int slotno, bool named, int regno) 7380 { 7381 const int mult = (named ? 2 : 1); 7382 unsigned int nwords; 7383 7384 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1) 7385 nwords = 1; 7386 else 7387 nwords = CEIL_NWORDS (size); 7388 7389 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords)); 7390 7391 if (size < UNITS_PER_WORD) 7392 XVECEXP (regs, 0, 0) 7393 = gen_rtx_EXPR_LIST (VOIDmode, 7394 gen_rtx_REG (SImode, regno), 7395 const0_rtx); 7396 else 7397 for (unsigned int i = 0; i < nwords; i++) 7398 XVECEXP (regs, 0, i) 7399 = gen_rtx_EXPR_LIST (VOIDmode, 7400 gen_rtx_REG (word_mode, regno + i * mult), 7401 GEN_INT (i * UNITS_PER_WORD)); 7402 7403 return regs; 7404 } 7405 7406 /* Determine where to put an argument to a function. 7407 Value is zero to push the argument on the stack, 7408 or a hard register in which to store the argument. 7409 7410 CUM is a variable of type CUMULATIVE_ARGS which gives info about 7411 the preceding args and about the function being called. 7412 MODE is the argument's machine mode. 7413 TYPE is the data type of the argument (as a tree). 7414 This is null for libcalls where that information may 7415 not be available. 7416 NAMED is true if this argument is a named parameter 7417 (otherwise it is an extra parameter matching an ellipsis). 7418 INCOMING_P is false for TARGET_FUNCTION_ARG, true for 7419 TARGET_FUNCTION_INCOMING_ARG. */ 7420 7421 static rtx 7422 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode, 7423 const_tree type, bool named, bool incoming) 7424 { 7425 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7426 const int regbase 7427 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 7428 int slotno, regno, padding; 7429 enum mode_class mclass = GET_MODE_CLASS (mode); 7430 7431 slotno 7432 = function_arg_slotno (cum, mode, type, named, incoming, ®no, &padding); 7433 if (slotno == -1) 7434 return 0; 7435 7436 /* Integer vectors are handled like floats as per the Sun VIS SDK. */ 7437 if (type && VECTOR_INTEGER_TYPE_P (type)) 7438 mclass = MODE_FLOAT; 7439 7440 if (TARGET_ARCH32) 7441 return gen_rtx_REG (mode, regno); 7442 7443 /* Structures up to 16 bytes in size are passed in arg slots on the stack 7444 and are promoted to registers if possible. */ 7445 if (type && TREE_CODE (type) == RECORD_TYPE) 7446 { 7447 const int size = int_size_in_bytes (type); 7448 gcc_assert (size <= 16); 7449 7450 return function_arg_record_value (type, mode, slotno, named, regbase); 7451 } 7452 7453 /* Unions up to 16 bytes in size are passed in integer registers. */ 7454 else if (type && TREE_CODE (type) == UNION_TYPE) 7455 { 7456 const int size = int_size_in_bytes (type); 7457 gcc_assert (size <= 16); 7458 7459 return function_arg_union_value (size, mode, slotno, regno); 7460 } 7461 7462 /* Floating-point vectors up to 16 bytes are passed in registers. */ 7463 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode) 7464 { 7465 const int size = int_size_in_bytes (type); 7466 gcc_assert (size <= 16); 7467 7468 return function_arg_vector_value (size, slotno, named, regno); 7469 } 7470 7471 /* v9 fp args in reg slots beyond the int reg slots get passed in regs 7472 but also have the slot allocated for them. 7473 If no prototype is in scope fp values in register slots get passed 7474 in two places, either fp regs and int regs or fp regs and memory. */ 7475 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7476 && SPARC_FP_REG_P (regno)) 7477 { 7478 rtx reg = gen_rtx_REG (mode, regno); 7479 if (cum->prototype_p || cum->libcall_p) 7480 return reg; 7481 else 7482 { 7483 rtx v0, v1; 7484 7485 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) 7486 { 7487 int intreg; 7488 7489 /* On incoming, we don't need to know that the value 7490 is passed in %f0 and %i0, and it confuses other parts 7491 causing needless spillage even on the simplest cases. */ 7492 if (incoming) 7493 return reg; 7494 7495 intreg = (SPARC_OUTGOING_INT_ARG_FIRST 7496 + (regno - SPARC_FP_ARG_FIRST) / 2); 7497 7498 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7499 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), 7500 const0_rtx); 7501 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7502 } 7503 else 7504 { 7505 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7506 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7507 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7508 } 7509 } 7510 } 7511 7512 /* All other aggregate types are passed in an integer register in a mode 7513 corresponding to the size of the type. */ 7514 else if (type && AGGREGATE_TYPE_P (type)) 7515 { 7516 const int size = int_size_in_bytes (type); 7517 gcc_assert (size <= 16); 7518 7519 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7520 } 7521 7522 return gen_rtx_REG (mode, regno); 7523 } 7524 7525 /* Handle the TARGET_FUNCTION_ARG target hook. */ 7526 7527 static rtx 7528 sparc_function_arg (cumulative_args_t cum, machine_mode mode, 7529 const_tree type, bool named) 7530 { 7531 return sparc_function_arg_1 (cum, mode, type, named, false); 7532 } 7533 7534 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ 7535 7536 static rtx 7537 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode, 7538 const_tree type, bool named) 7539 { 7540 return sparc_function_arg_1 (cum, mode, type, named, true); 7541 } 7542 7543 /* For sparc64, objects requiring 16 byte alignment are passed that way. */ 7544 7545 static unsigned int 7546 sparc_function_arg_boundary (machine_mode mode, const_tree type) 7547 { 7548 return ((TARGET_ARCH64 7549 && (GET_MODE_ALIGNMENT (mode) == 128 7550 || (type && TYPE_ALIGN (type) == 128))) 7551 ? 128 7552 : PARM_BOUNDARY); 7553 } 7554 7555 /* For an arg passed partly in registers and partly in memory, 7556 this is the number of bytes of registers used. 7557 For args passed entirely in registers or entirely in memory, zero. 7558 7559 Any arg that starts in the first 6 regs but won't entirely fit in them 7560 needs partial registers on v8. On v9, structures with integer 7561 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp 7562 values that begin in the last fp reg [where "last fp reg" varies with the 7563 mode] will be split between that reg and memory. */ 7564 7565 static int 7566 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode, 7567 tree type, bool named) 7568 { 7569 int slotno, regno, padding; 7570 7571 /* We pass false for incoming here, it doesn't matter. */ 7572 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named, 7573 false, ®no, &padding); 7574 7575 if (slotno == -1) 7576 return 0; 7577 7578 if (TARGET_ARCH32) 7579 { 7580 /* We are guaranteed by pass_by_reference that the size of the 7581 argument is not greater than 8 bytes, so we only need to return 7582 one word if the argument is partially passed in registers. */ 7583 const int size = GET_MODE_SIZE (mode); 7584 7585 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1) 7586 return UNITS_PER_WORD; 7587 } 7588 else 7589 { 7590 /* We are guaranteed by pass_by_reference that the size of the 7591 argument is not greater than 16 bytes, so we only need to return 7592 one word if the argument is partially passed in registers. */ 7593 if (type && AGGREGATE_TYPE_P (type)) 7594 { 7595 const int size = int_size_in_bytes (type); 7596 7597 if (size > UNITS_PER_WORD 7598 && (slotno == SPARC_INT_ARG_MAX - 1 7599 || slotno == SPARC_FP_ARG_MAX - 1)) 7600 return UNITS_PER_WORD; 7601 } 7602 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT 7603 || ((GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 7604 || (type && VECTOR_TYPE_P (type))) 7605 && !(TARGET_FPU && named))) 7606 { 7607 const int size = (type && VECTOR_FLOAT_TYPE_P (type)) 7608 ? int_size_in_bytes (type) 7609 : GET_MODE_SIZE (mode); 7610 7611 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1) 7612 return UNITS_PER_WORD; 7613 } 7614 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 7615 || (type && VECTOR_TYPE_P (type))) 7616 { 7617 const int size = (type && VECTOR_FLOAT_TYPE_P (type)) 7618 ? int_size_in_bytes (type) 7619 : GET_MODE_SIZE (mode); 7620 7621 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1) 7622 return UNITS_PER_WORD; 7623 } 7624 } 7625 7626 return 0; 7627 } 7628 7629 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. 7630 Update the data in CUM to advance over an argument 7631 of mode MODE and data type TYPE. 7632 TYPE is null for libcalls where that information may not be available. */ 7633 7634 static void 7635 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 7636 const_tree type, bool named) 7637 { 7638 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7639 int regno, padding; 7640 7641 /* We pass false for incoming here, it doesn't matter. */ 7642 function_arg_slotno (cum, mode, type, named, false, ®no, &padding); 7643 7644 /* If argument requires leading padding, add it. */ 7645 cum->words += padding; 7646 7647 if (TARGET_ARCH32) 7648 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode)); 7649 else 7650 { 7651 /* For types that can have BLKmode, get the size from the type. */ 7652 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 7653 { 7654 const int size = int_size_in_bytes (type); 7655 7656 /* See comment in function_arg_record_value for empty structures. */ 7657 if (size <= 0) 7658 cum->words++; 7659 else 7660 cum->words += CEIL_NWORDS (size); 7661 } 7662 else 7663 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode)); 7664 } 7665 } 7666 7667 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs 7668 are always stored left shifted in their argument slot. */ 7669 7670 static pad_direction 7671 sparc_function_arg_padding (machine_mode mode, const_tree type) 7672 { 7673 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type)) 7674 return PAD_UPWARD; 7675 7676 /* Fall back to the default. */ 7677 return default_function_arg_padding (mode, type); 7678 } 7679 7680 /* Handle the TARGET_RETURN_IN_MEMORY target hook. 7681 Specify whether to return the return value in memory. */ 7682 7683 static bool 7684 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 7685 { 7686 if (TARGET_ARCH32) 7687 /* Original SPARC 32-bit ABI says that structures and unions, and 7688 quad-precision floats are returned in memory. But note that the 7689 first part is implemented through -fpcc-struct-return being the 7690 default, so here we only implement -freg-struct-return instead. 7691 All other base types are returned in registers. 7692 7693 Extended ABI (as implemented by the Sun compiler) says that 7694 all complex floats are returned in registers (8 FP registers 7695 at most for '_Complex long double'). Return all complex integers 7696 in registers (4 at most for '_Complex long long'). 7697 7698 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7699 integers are returned like floats of the same size, that is in 7700 registers up to 8 bytes and in memory otherwise. Return all 7701 vector floats in memory like structure and unions; note that 7702 they always have BLKmode like the latter. */ 7703 return (TYPE_MODE (type) == BLKmode 7704 || TYPE_MODE (type) == TFmode 7705 || (TREE_CODE (type) == VECTOR_TYPE 7706 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7707 else 7708 /* Original SPARC 64-bit ABI says that structures and unions 7709 smaller than 32 bytes are returned in registers, as well as 7710 all other base types. 7711 7712 Extended ABI (as implemented by the Sun compiler) says that all 7713 complex floats are returned in registers (8 FP registers at most 7714 for '_Complex long double'). Return all complex integers in 7715 registers (4 at most for '_Complex TItype'). 7716 7717 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7718 integers are returned like floats of the same size, that is in 7719 registers. Return all vector floats like structure and unions; 7720 note that they always have BLKmode like the latter. */ 7721 return (TYPE_MODE (type) == BLKmode 7722 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); 7723 } 7724 7725 /* Handle the TARGET_STRUCT_VALUE target hook. 7726 Return where to find the structure return value address. */ 7727 7728 static rtx 7729 sparc_struct_value_rtx (tree fndecl, int incoming) 7730 { 7731 if (TARGET_ARCH64) 7732 return NULL_RTX; 7733 else 7734 { 7735 rtx mem; 7736 7737 if (incoming) 7738 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, 7739 STRUCT_VALUE_OFFSET)); 7740 else 7741 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, 7742 STRUCT_VALUE_OFFSET)); 7743 7744 /* Only follow the SPARC ABI for fixed-size structure returns. 7745 Variable size structure returns are handled per the normal 7746 procedures in GCC. This is enabled by -mstd-struct-return */ 7747 if (incoming == 2 7748 && sparc_std_struct_return 7749 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) 7750 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) 7751 { 7752 /* We must check and adjust the return address, as it is optional 7753 as to whether the return object is really provided. */ 7754 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 7755 rtx scratch = gen_reg_rtx (SImode); 7756 rtx_code_label *endlab = gen_label_rtx (); 7757 7758 /* Calculate the return object size. */ 7759 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); 7760 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); 7761 /* Construct a temporary return value. */ 7762 rtx temp_val 7763 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); 7764 7765 /* Implement SPARC 32-bit psABI callee return struct checking: 7766 7767 Fetch the instruction where we will return to and see if 7768 it's an unimp instruction (the most significant 10 bits 7769 will be zero). */ 7770 emit_move_insn (scratch, gen_rtx_MEM (SImode, 7771 plus_constant (Pmode, 7772 ret_reg, 8))); 7773 /* Assume the size is valid and pre-adjust. */ 7774 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4))); 7775 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 7776 0, endlab); 7777 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4))); 7778 /* Write the address of the memory pointed to by temp_val into 7779 the memory pointed to by mem. */ 7780 emit_move_insn (mem, XEXP (temp_val, 0)); 7781 emit_label (endlab); 7782 } 7783 7784 return mem; 7785 } 7786 } 7787 7788 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. 7789 For v9, function return values are subject to the same rules as arguments, 7790 except that up to 32 bytes may be returned in registers. */ 7791 7792 static rtx 7793 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing) 7794 { 7795 /* Beware that the two values are swapped here wrt function_arg. */ 7796 const int regbase 7797 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST; 7798 enum mode_class mclass = GET_MODE_CLASS (mode); 7799 int regno; 7800 7801 /* Integer vectors are handled like floats as per the Sun VIS SDK. 7802 Note that integer vectors larger than 16 bytes have BLKmode so 7803 they need to be handled like floating-point vectors below. */ 7804 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode) 7805 mclass = MODE_FLOAT; 7806 7807 if (TARGET_ARCH64 && type) 7808 { 7809 /* Structures up to 32 bytes in size are returned in registers. */ 7810 if (TREE_CODE (type) == RECORD_TYPE) 7811 { 7812 const int size = int_size_in_bytes (type); 7813 gcc_assert (size <= 32); 7814 7815 return function_arg_record_value (type, mode, 0, true, regbase); 7816 } 7817 7818 /* Unions up to 32 bytes in size are returned in integer registers. */ 7819 else if (TREE_CODE (type) == UNION_TYPE) 7820 { 7821 const int size = int_size_in_bytes (type); 7822 gcc_assert (size <= 32); 7823 7824 return function_arg_union_value (size, mode, 0, regbase); 7825 } 7826 7827 /* Vectors up to 32 bytes are returned in FP registers. */ 7828 else if (VECTOR_TYPE_P (type) && mode == BLKmode) 7829 { 7830 const int size = int_size_in_bytes (type); 7831 gcc_assert (size <= 32); 7832 7833 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST); 7834 } 7835 7836 /* Objects that require it are returned in FP registers. */ 7837 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7838 ; 7839 7840 /* All other aggregate types are returned in an integer register in a 7841 mode corresponding to the size of the type. */ 7842 else if (AGGREGATE_TYPE_P (type)) 7843 { 7844 /* All other aggregate types are passed in an integer register 7845 in a mode corresponding to the size of the type. */ 7846 const int size = int_size_in_bytes (type); 7847 gcc_assert (size <= 32); 7848 7849 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7850 7851 /* ??? We probably should have made the same ABI change in 7852 3.4.0 as the one we made for unions. The latter was 7853 required by the SCD though, while the former is not 7854 specified, so we favored compatibility and efficiency. 7855 7856 Now we're stuck for aggregates larger than 16 bytes, 7857 because OImode vanished in the meantime. Let's not 7858 try to be unduly clever, and simply follow the ABI 7859 for unions in that case. */ 7860 if (mode == BLKmode) 7861 return function_arg_union_value (size, mode, 0, regbase); 7862 else 7863 mclass = MODE_INT; 7864 } 7865 7866 /* We should only have pointer and integer types at this point. This 7867 must match sparc_promote_function_mode. */ 7868 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7869 mode = word_mode; 7870 } 7871 7872 /* We should only have pointer and integer types at this point, except with 7873 -freg-struct-return. This must match sparc_promote_function_mode. */ 7874 else if (TARGET_ARCH32 7875 && !(type && AGGREGATE_TYPE_P (type)) 7876 && mclass == MODE_INT 7877 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7878 mode = word_mode; 7879 7880 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) 7881 regno = SPARC_FP_ARG_FIRST; 7882 else 7883 regno = regbase; 7884 7885 return gen_rtx_REG (mode, regno); 7886 } 7887 7888 /* Handle TARGET_FUNCTION_VALUE. 7889 On the SPARC, the value is found in the first "output" register, but the 7890 called function leaves it in the first "input" register. */ 7891 7892 static rtx 7893 sparc_function_value (const_tree valtype, 7894 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 7895 bool outgoing) 7896 { 7897 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); 7898 } 7899 7900 /* Handle TARGET_LIBCALL_VALUE. */ 7901 7902 static rtx 7903 sparc_libcall_value (machine_mode mode, 7904 const_rtx fun ATTRIBUTE_UNUSED) 7905 { 7906 return sparc_function_value_1 (NULL_TREE, mode, false); 7907 } 7908 7909 /* Handle FUNCTION_VALUE_REGNO_P. 7910 On the SPARC, the first "output" reg is used for integer values, and the 7911 first floating point register is used for floating point values. */ 7912 7913 static bool 7914 sparc_function_value_regno_p (const unsigned int regno) 7915 { 7916 return (regno == 8 || (TARGET_FPU && regno == 32)); 7917 } 7918 7919 /* Do what is necessary for `va_start'. We look at the current function 7920 to determine if stdarg or varargs is used and return the address of 7921 the first unnamed parameter. */ 7922 7923 static rtx 7924 sparc_builtin_saveregs (void) 7925 { 7926 int first_reg = crtl->args.info.words; 7927 rtx address; 7928 int regno; 7929 7930 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) 7931 emit_move_insn (gen_rtx_MEM (word_mode, 7932 gen_rtx_PLUS (Pmode, 7933 frame_pointer_rtx, 7934 GEN_INT (FIRST_PARM_OFFSET (0) 7935 + (UNITS_PER_WORD 7936 * regno)))), 7937 gen_rtx_REG (word_mode, 7938 SPARC_INCOMING_INT_ARG_FIRST + regno)); 7939 7940 address = gen_rtx_PLUS (Pmode, 7941 frame_pointer_rtx, 7942 GEN_INT (FIRST_PARM_OFFSET (0) 7943 + UNITS_PER_WORD * first_reg)); 7944 7945 return address; 7946 } 7947 7948 /* Implement `va_start' for stdarg. */ 7949 7950 static void 7951 sparc_va_start (tree valist, rtx nextarg) 7952 { 7953 nextarg = expand_builtin_saveregs (); 7954 std_expand_builtin_va_start (valist, nextarg); 7955 } 7956 7957 /* Implement `va_arg' for stdarg. */ 7958 7959 static tree 7960 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 7961 gimple_seq *post_p) 7962 { 7963 HOST_WIDE_INT size, rsize, align; 7964 tree addr, incr; 7965 bool indirect; 7966 tree ptrtype = build_pointer_type (type); 7967 7968 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 7969 { 7970 indirect = true; 7971 size = rsize = UNITS_PER_WORD; 7972 align = 0; 7973 } 7974 else 7975 { 7976 indirect = false; 7977 size = int_size_in_bytes (type); 7978 rsize = ROUND_UP (size, UNITS_PER_WORD); 7979 align = 0; 7980 7981 if (TARGET_ARCH64) 7982 { 7983 /* For SPARC64, objects requiring 16-byte alignment get it. */ 7984 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) 7985 align = 2 * UNITS_PER_WORD; 7986 7987 /* SPARC-V9 ABI states that structures up to 16 bytes in size 7988 are left-justified in their slots. */ 7989 if (AGGREGATE_TYPE_P (type)) 7990 { 7991 if (size == 0) 7992 size = rsize = UNITS_PER_WORD; 7993 else 7994 size = rsize; 7995 } 7996 } 7997 } 7998 7999 incr = valist; 8000 if (align) 8001 { 8002 incr = fold_build_pointer_plus_hwi (incr, align - 1); 8003 incr = fold_convert (sizetype, incr); 8004 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, 8005 size_int (-align)); 8006 incr = fold_convert (ptr_type_node, incr); 8007 } 8008 8009 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); 8010 addr = incr; 8011 8012 if (BYTES_BIG_ENDIAN && size < rsize) 8013 addr = fold_build_pointer_plus_hwi (incr, rsize - size); 8014 8015 if (indirect) 8016 { 8017 addr = fold_convert (build_pointer_type (ptrtype), addr); 8018 addr = build_va_arg_indirect_ref (addr); 8019 } 8020 8021 /* If the address isn't aligned properly for the type, we need a temporary. 8022 FIXME: This is inefficient, usually we can do this in registers. */ 8023 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) 8024 { 8025 tree tmp = create_tmp_var (type, "va_arg_tmp"); 8026 tree dest_addr = build_fold_addr_expr (tmp); 8027 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 8028 3, dest_addr, addr, size_int (rsize)); 8029 TREE_ADDRESSABLE (tmp) = 1; 8030 gimplify_and_add (copy, pre_p); 8031 addr = dest_addr; 8032 } 8033 8034 else 8035 addr = fold_convert (ptrtype, addr); 8036 8037 incr = fold_build_pointer_plus_hwi (incr, rsize); 8038 gimplify_assign (valist, incr, post_p); 8039 8040 return build_va_arg_indirect_ref (addr); 8041 } 8042 8043 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. 8044 Specify whether the vector mode is supported by the hardware. */ 8045 8046 static bool 8047 sparc_vector_mode_supported_p (machine_mode mode) 8048 { 8049 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; 8050 } 8051 8052 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ 8053 8054 static machine_mode 8055 sparc_preferred_simd_mode (scalar_mode mode) 8056 { 8057 if (TARGET_VIS) 8058 switch (mode) 8059 { 8060 case E_SImode: 8061 return V2SImode; 8062 case E_HImode: 8063 return V4HImode; 8064 case E_QImode: 8065 return V8QImode; 8066 8067 default:; 8068 } 8069 8070 return word_mode; 8071 } 8072 8073 /* Implement TARGET_CAN_FOLLOW_JUMP. */ 8074 8075 static bool 8076 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee) 8077 { 8078 /* Do not fold unconditional jumps that have been created for crossing 8079 partition boundaries. */ 8080 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower)) 8081 return false; 8082 8083 return true; 8084 } 8085 8086 /* Return the string to output an unconditional branch to LABEL, which is 8087 the operand number of the label. 8088 8089 DEST is the destination insn (i.e. the label), INSN is the source. */ 8090 8091 const char * 8092 output_ubranch (rtx dest, rtx_insn *insn) 8093 { 8094 static char string[64]; 8095 bool v9_form = false; 8096 int delta; 8097 char *p; 8098 8099 /* Even if we are trying to use cbcond for this, evaluate 8100 whether we can use V9 branches as our backup plan. */ 8101 delta = 5000000; 8102 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ()) 8103 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8104 - INSN_ADDRESSES (INSN_UID (insn))); 8105 8106 /* Leave some instructions for "slop". */ 8107 if (TARGET_V9 && delta >= -260000 && delta < 260000) 8108 v9_form = true; 8109 8110 if (TARGET_CBCOND) 8111 { 8112 bool emit_nop = emit_cbcond_nop (insn); 8113 bool far = false; 8114 const char *rval; 8115 8116 if (delta < -500 || delta > 500) 8117 far = true; 8118 8119 if (far) 8120 { 8121 if (v9_form) 8122 rval = "ba,a,pt\t%%xcc, %l0"; 8123 else 8124 rval = "b,a\t%l0"; 8125 } 8126 else 8127 { 8128 if (emit_nop) 8129 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop"; 8130 else 8131 rval = "cwbe\t%%g0, %%g0, %l0"; 8132 } 8133 return rval; 8134 } 8135 8136 if (v9_form) 8137 strcpy (string, "ba%*,pt\t%%xcc, "); 8138 else 8139 strcpy (string, "b%*\t"); 8140 8141 p = strchr (string, '\0'); 8142 *p++ = '%'; 8143 *p++ = 'l'; 8144 *p++ = '0'; 8145 *p++ = '%'; 8146 *p++ = '('; 8147 *p = '\0'; 8148 8149 return string; 8150 } 8151 8152 /* Return the string to output a conditional branch to LABEL, which is 8153 the operand number of the label. OP is the conditional expression. 8154 XEXP (OP, 0) is assumed to be a condition code register (integer or 8155 floating point) and its mode specifies what kind of comparison we made. 8156 8157 DEST is the destination insn (i.e. the label), INSN is the source. 8158 8159 REVERSED is nonzero if we should reverse the sense of the comparison. 8160 8161 ANNUL is nonzero if we should generate an annulling branch. */ 8162 8163 const char * 8164 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, 8165 rtx_insn *insn) 8166 { 8167 static char string[64]; 8168 enum rtx_code code = GET_CODE (op); 8169 rtx cc_reg = XEXP (op, 0); 8170 machine_mode mode = GET_MODE (cc_reg); 8171 const char *labelno, *branch; 8172 int spaces = 8, far; 8173 char *p; 8174 8175 /* v9 branches are limited to +-1MB. If it is too far away, 8176 change 8177 8178 bne,pt %xcc, .LC30 8179 8180 to 8181 8182 be,pn %xcc, .+12 8183 nop 8184 ba .LC30 8185 8186 and 8187 8188 fbne,a,pn %fcc2, .LC29 8189 8190 to 8191 8192 fbe,pt %fcc2, .+16 8193 nop 8194 ba .LC29 */ 8195 8196 far = TARGET_V9 && (get_attr_length (insn) >= 3); 8197 if (reversed ^ far) 8198 { 8199 /* Reversal of FP compares takes care -- an ordered compare 8200 becomes an unordered compare and vice versa. */ 8201 if (mode == CCFPmode || mode == CCFPEmode) 8202 code = reverse_condition_maybe_unordered (code); 8203 else 8204 code = reverse_condition (code); 8205 } 8206 8207 /* Start by writing the branch condition. */ 8208 if (mode == CCFPmode || mode == CCFPEmode) 8209 { 8210 switch (code) 8211 { 8212 case NE: 8213 branch = "fbne"; 8214 break; 8215 case EQ: 8216 branch = "fbe"; 8217 break; 8218 case GE: 8219 branch = "fbge"; 8220 break; 8221 case GT: 8222 branch = "fbg"; 8223 break; 8224 case LE: 8225 branch = "fble"; 8226 break; 8227 case LT: 8228 branch = "fbl"; 8229 break; 8230 case UNORDERED: 8231 branch = "fbu"; 8232 break; 8233 case ORDERED: 8234 branch = "fbo"; 8235 break; 8236 case UNGT: 8237 branch = "fbug"; 8238 break; 8239 case UNLT: 8240 branch = "fbul"; 8241 break; 8242 case UNEQ: 8243 branch = "fbue"; 8244 break; 8245 case UNGE: 8246 branch = "fbuge"; 8247 break; 8248 case UNLE: 8249 branch = "fbule"; 8250 break; 8251 case LTGT: 8252 branch = "fblg"; 8253 break; 8254 default: 8255 gcc_unreachable (); 8256 } 8257 8258 /* ??? !v9: FP branches cannot be preceded by another floating point 8259 insn. Because there is currently no concept of pre-delay slots, 8260 we can fix this only by always emitting a nop before a floating 8261 point branch. */ 8262 8263 string[0] = '\0'; 8264 if (! TARGET_V9) 8265 strcpy (string, "nop\n\t"); 8266 strcat (string, branch); 8267 } 8268 else 8269 { 8270 switch (code) 8271 { 8272 case NE: 8273 if (mode == CCVmode || mode == CCXVmode) 8274 branch = "bvs"; 8275 else 8276 branch = "bne"; 8277 break; 8278 case EQ: 8279 if (mode == CCVmode || mode == CCXVmode) 8280 branch = "bvc"; 8281 else 8282 branch = "be"; 8283 break; 8284 case GE: 8285 if (mode == CCNZmode || mode == CCXNZmode) 8286 branch = "bpos"; 8287 else 8288 branch = "bge"; 8289 break; 8290 case GT: 8291 branch = "bg"; 8292 break; 8293 case LE: 8294 branch = "ble"; 8295 break; 8296 case LT: 8297 if (mode == CCNZmode || mode == CCXNZmode) 8298 branch = "bneg"; 8299 else 8300 branch = "bl"; 8301 break; 8302 case GEU: 8303 branch = "bgeu"; 8304 break; 8305 case GTU: 8306 branch = "bgu"; 8307 break; 8308 case LEU: 8309 branch = "bleu"; 8310 break; 8311 case LTU: 8312 branch = "blu"; 8313 break; 8314 default: 8315 gcc_unreachable (); 8316 } 8317 strcpy (string, branch); 8318 } 8319 spaces -= strlen (branch); 8320 p = strchr (string, '\0'); 8321 8322 /* Now add the annulling, the label, and a possible noop. */ 8323 if (annul && ! far) 8324 { 8325 strcpy (p, ",a"); 8326 p += 2; 8327 spaces -= 2; 8328 } 8329 8330 if (TARGET_V9) 8331 { 8332 rtx note; 8333 int v8 = 0; 8334 8335 if (! far && insn && INSN_ADDRESSES_SET_P ()) 8336 { 8337 int delta = (INSN_ADDRESSES (INSN_UID (dest)) 8338 - INSN_ADDRESSES (INSN_UID (insn))); 8339 /* Leave some instructions for "slop". */ 8340 if (delta < -260000 || delta >= 260000) 8341 v8 = 1; 8342 } 8343 8344 switch (mode) 8345 { 8346 case E_CCmode: 8347 case E_CCNZmode: 8348 case E_CCCmode: 8349 case E_CCVmode: 8350 labelno = "%%icc, "; 8351 if (v8) 8352 labelno = ""; 8353 break; 8354 case E_CCXmode: 8355 case E_CCXNZmode: 8356 case E_CCXCmode: 8357 case E_CCXVmode: 8358 labelno = "%%xcc, "; 8359 gcc_assert (!v8); 8360 break; 8361 case E_CCFPmode: 8362 case E_CCFPEmode: 8363 { 8364 static char v9_fcc_labelno[] = "%%fccX, "; 8365 /* Set the char indicating the number of the fcc reg to use. */ 8366 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; 8367 labelno = v9_fcc_labelno; 8368 if (v8) 8369 { 8370 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); 8371 labelno = ""; 8372 } 8373 } 8374 break; 8375 default: 8376 gcc_unreachable (); 8377 } 8378 8379 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8380 { 8381 strcpy (p, 8382 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8383 >= profile_probability::even ()) ^ far) 8384 ? ",pt" : ",pn"); 8385 p += 3; 8386 spaces -= 3; 8387 } 8388 } 8389 else 8390 labelno = ""; 8391 8392 if (spaces > 0) 8393 *p++ = '\t'; 8394 else 8395 *p++ = ' '; 8396 strcpy (p, labelno); 8397 p = strchr (p, '\0'); 8398 if (far) 8399 { 8400 strcpy (p, ".+12\n\t nop\n\tb\t"); 8401 /* Skip the next insn if requested or 8402 if we know that it will be a nop. */ 8403 if (annul || ! final_sequence) 8404 p[3] = '6'; 8405 p += 14; 8406 } 8407 *p++ = '%'; 8408 *p++ = 'l'; 8409 *p++ = label + '0'; 8410 *p++ = '%'; 8411 *p++ = '#'; 8412 *p = '\0'; 8413 8414 return string; 8415 } 8416 8417 /* Emit a library call comparison between floating point X and Y. 8418 COMPARISON is the operator to compare with (EQ, NE, GT, etc). 8419 Return the new operator to be used in the comparison sequence. 8420 8421 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode 8422 values as arguments instead of the TFmode registers themselves, 8423 that's why we cannot call emit_float_lib_cmp. */ 8424 8425 rtx 8426 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) 8427 { 8428 const char *qpfunc; 8429 rtx slot0, slot1, result, tem, tem2, libfunc; 8430 machine_mode mode; 8431 enum rtx_code new_comparison; 8432 8433 switch (comparison) 8434 { 8435 case EQ: 8436 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); 8437 break; 8438 8439 case NE: 8440 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); 8441 break; 8442 8443 case GT: 8444 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); 8445 break; 8446 8447 case GE: 8448 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); 8449 break; 8450 8451 case LT: 8452 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); 8453 break; 8454 8455 case LE: 8456 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); 8457 break; 8458 8459 case ORDERED: 8460 case UNORDERED: 8461 case UNGT: 8462 case UNLT: 8463 case UNEQ: 8464 case UNGE: 8465 case UNLE: 8466 case LTGT: 8467 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); 8468 break; 8469 8470 default: 8471 gcc_unreachable (); 8472 } 8473 8474 if (TARGET_ARCH64) 8475 { 8476 if (MEM_P (x)) 8477 { 8478 tree expr = MEM_EXPR (x); 8479 if (expr) 8480 mark_addressable (expr); 8481 slot0 = x; 8482 } 8483 else 8484 { 8485 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8486 emit_move_insn (slot0, x); 8487 } 8488 8489 if (MEM_P (y)) 8490 { 8491 tree expr = MEM_EXPR (y); 8492 if (expr) 8493 mark_addressable (expr); 8494 slot1 = y; 8495 } 8496 else 8497 { 8498 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8499 emit_move_insn (slot1, y); 8500 } 8501 8502 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8503 emit_library_call (libfunc, LCT_NORMAL, 8504 DImode, 8505 XEXP (slot0, 0), Pmode, 8506 XEXP (slot1, 0), Pmode); 8507 mode = DImode; 8508 } 8509 else 8510 { 8511 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8512 emit_library_call (libfunc, LCT_NORMAL, 8513 SImode, 8514 x, TFmode, y, TFmode); 8515 mode = SImode; 8516 } 8517 8518 8519 /* Immediately move the result of the libcall into a pseudo 8520 register so reload doesn't clobber the value if it needs 8521 the return register for a spill reg. */ 8522 result = gen_reg_rtx (mode); 8523 emit_move_insn (result, hard_libcall_value (mode, libfunc)); 8524 8525 switch (comparison) 8526 { 8527 default: 8528 return gen_rtx_NE (VOIDmode, result, const0_rtx); 8529 case ORDERED: 8530 case UNORDERED: 8531 new_comparison = (comparison == UNORDERED ? EQ : NE); 8532 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); 8533 case UNGT: 8534 case UNGE: 8535 new_comparison = (comparison == UNGT ? GT : NE); 8536 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); 8537 case UNLE: 8538 return gen_rtx_NE (VOIDmode, result, const2_rtx); 8539 case UNLT: 8540 tem = gen_reg_rtx (mode); 8541 if (TARGET_ARCH32) 8542 emit_insn (gen_andsi3 (tem, result, const1_rtx)); 8543 else 8544 emit_insn (gen_anddi3 (tem, result, const1_rtx)); 8545 return gen_rtx_NE (VOIDmode, tem, const0_rtx); 8546 case UNEQ: 8547 case LTGT: 8548 tem = gen_reg_rtx (mode); 8549 if (TARGET_ARCH32) 8550 emit_insn (gen_addsi3 (tem, result, const1_rtx)); 8551 else 8552 emit_insn (gen_adddi3 (tem, result, const1_rtx)); 8553 tem2 = gen_reg_rtx (mode); 8554 if (TARGET_ARCH32) 8555 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); 8556 else 8557 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); 8558 new_comparison = (comparison == UNEQ ? EQ : NE); 8559 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); 8560 } 8561 8562 gcc_unreachable (); 8563 } 8564 8565 /* Generate an unsigned DImode to FP conversion. This is the same code 8566 optabs would emit if we didn't have TFmode patterns. */ 8567 8568 void 8569 sparc_emit_floatunsdi (rtx *operands, machine_mode mode) 8570 { 8571 rtx i0, i1, f0, in, out; 8572 8573 out = operands[0]; 8574 in = force_reg (DImode, operands[1]); 8575 rtx_code_label *neglab = gen_label_rtx (); 8576 rtx_code_label *donelab = gen_label_rtx (); 8577 i0 = gen_reg_rtx (DImode); 8578 i1 = gen_reg_rtx (DImode); 8579 f0 = gen_reg_rtx (mode); 8580 8581 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 8582 8583 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); 8584 emit_jump_insn (gen_jump (donelab)); 8585 emit_barrier (); 8586 8587 emit_label (neglab); 8588 8589 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 8590 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 8591 emit_insn (gen_iordi3 (i0, i0, i1)); 8592 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); 8593 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); 8594 8595 emit_label (donelab); 8596 } 8597 8598 /* Generate an FP to unsigned DImode conversion. This is the same code 8599 optabs would emit if we didn't have TFmode patterns. */ 8600 8601 void 8602 sparc_emit_fixunsdi (rtx *operands, machine_mode mode) 8603 { 8604 rtx i0, i1, f0, in, out, limit; 8605 8606 out = operands[0]; 8607 in = force_reg (mode, operands[1]); 8608 rtx_code_label *neglab = gen_label_rtx (); 8609 rtx_code_label *donelab = gen_label_rtx (); 8610 i0 = gen_reg_rtx (DImode); 8611 i1 = gen_reg_rtx (DImode); 8612 limit = gen_reg_rtx (mode); 8613 f0 = gen_reg_rtx (mode); 8614 8615 emit_move_insn (limit, 8616 const_double_from_real_value ( 8617 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); 8618 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); 8619 8620 emit_insn (gen_rtx_SET (out, 8621 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); 8622 emit_jump_insn (gen_jump (donelab)); 8623 emit_barrier (); 8624 8625 emit_label (neglab); 8626 8627 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit))); 8628 emit_insn (gen_rtx_SET (i0, 8629 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); 8630 emit_insn (gen_movdi (i1, const1_rtx)); 8631 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); 8632 emit_insn (gen_xordi3 (out, i0, i1)); 8633 8634 emit_label (donelab); 8635 } 8636 8637 /* Return the string to output a compare and branch instruction to DEST. 8638 DEST is the destination insn (i.e. the label), INSN is the source, 8639 and OP is the conditional expression. */ 8640 8641 const char * 8642 output_cbcond (rtx op, rtx dest, rtx_insn *insn) 8643 { 8644 machine_mode mode = GET_MODE (XEXP (op, 0)); 8645 enum rtx_code code = GET_CODE (op); 8646 const char *cond_str, *tmpl; 8647 int far, emit_nop, len; 8648 static char string[64]; 8649 char size_char; 8650 8651 /* Compare and Branch is limited to +-2KB. If it is too far away, 8652 change 8653 8654 cxbne X, Y, .LC30 8655 8656 to 8657 8658 cxbe X, Y, .+16 8659 nop 8660 ba,pt xcc, .LC30 8661 nop */ 8662 8663 len = get_attr_length (insn); 8664 8665 far = len == 4; 8666 emit_nop = len == 2; 8667 8668 if (far) 8669 code = reverse_condition (code); 8670 8671 size_char = ((mode == SImode) ? 'w' : 'x'); 8672 8673 switch (code) 8674 { 8675 case NE: 8676 cond_str = "ne"; 8677 break; 8678 8679 case EQ: 8680 cond_str = "e"; 8681 break; 8682 8683 case GE: 8684 cond_str = "ge"; 8685 break; 8686 8687 case GT: 8688 cond_str = "g"; 8689 break; 8690 8691 case LE: 8692 cond_str = "le"; 8693 break; 8694 8695 case LT: 8696 cond_str = "l"; 8697 break; 8698 8699 case GEU: 8700 cond_str = "cc"; 8701 break; 8702 8703 case GTU: 8704 cond_str = "gu"; 8705 break; 8706 8707 case LEU: 8708 cond_str = "leu"; 8709 break; 8710 8711 case LTU: 8712 cond_str = "cs"; 8713 break; 8714 8715 default: 8716 gcc_unreachable (); 8717 } 8718 8719 if (far) 8720 { 8721 int veryfar = 1, delta; 8722 8723 if (INSN_ADDRESSES_SET_P ()) 8724 { 8725 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8726 - INSN_ADDRESSES (INSN_UID (insn))); 8727 /* Leave some instructions for "slop". */ 8728 if (delta >= -260000 && delta < 260000) 8729 veryfar = 0; 8730 } 8731 8732 if (veryfar) 8733 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop"; 8734 else 8735 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop"; 8736 } 8737 else 8738 { 8739 if (emit_nop) 8740 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop"; 8741 else 8742 tmpl = "c%cb%s\t%%1, %%2, %%3"; 8743 } 8744 8745 snprintf (string, sizeof(string), tmpl, size_char, cond_str); 8746 8747 return string; 8748 } 8749 8750 /* Return the string to output a conditional branch to LABEL, testing 8751 register REG. LABEL is the operand number of the label; REG is the 8752 operand number of the reg. OP is the conditional expression. The mode 8753 of REG says what kind of comparison we made. 8754 8755 DEST is the destination insn (i.e. the label), INSN is the source. 8756 8757 REVERSED is nonzero if we should reverse the sense of the comparison. 8758 8759 ANNUL is nonzero if we should generate an annulling branch. */ 8760 8761 const char * 8762 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, 8763 int annul, rtx_insn *insn) 8764 { 8765 static char string[64]; 8766 enum rtx_code code = GET_CODE (op); 8767 machine_mode mode = GET_MODE (XEXP (op, 0)); 8768 rtx note; 8769 int far; 8770 char *p; 8771 8772 /* branch on register are limited to +-128KB. If it is too far away, 8773 change 8774 8775 brnz,pt %g1, .LC30 8776 8777 to 8778 8779 brz,pn %g1, .+12 8780 nop 8781 ba,pt %xcc, .LC30 8782 8783 and 8784 8785 brgez,a,pn %o1, .LC29 8786 8787 to 8788 8789 brlz,pt %o1, .+16 8790 nop 8791 ba,pt %xcc, .LC29 */ 8792 8793 far = get_attr_length (insn) >= 3; 8794 8795 /* If not floating-point or if EQ or NE, we can just reverse the code. */ 8796 if (reversed ^ far) 8797 code = reverse_condition (code); 8798 8799 /* Only 64-bit versions of these instructions exist. */ 8800 gcc_assert (mode == DImode); 8801 8802 /* Start by writing the branch condition. */ 8803 8804 switch (code) 8805 { 8806 case NE: 8807 strcpy (string, "brnz"); 8808 break; 8809 8810 case EQ: 8811 strcpy (string, "brz"); 8812 break; 8813 8814 case GE: 8815 strcpy (string, "brgez"); 8816 break; 8817 8818 case LT: 8819 strcpy (string, "brlz"); 8820 break; 8821 8822 case LE: 8823 strcpy (string, "brlez"); 8824 break; 8825 8826 case GT: 8827 strcpy (string, "brgz"); 8828 break; 8829 8830 default: 8831 gcc_unreachable (); 8832 } 8833 8834 p = strchr (string, '\0'); 8835 8836 /* Now add the annulling, reg, label, and nop. */ 8837 if (annul && ! far) 8838 { 8839 strcpy (p, ",a"); 8840 p += 2; 8841 } 8842 8843 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8844 { 8845 strcpy (p, 8846 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8847 >= profile_probability::even ()) ^ far) 8848 ? ",pt" : ",pn"); 8849 p += 3; 8850 } 8851 8852 *p = p < string + 8 ? '\t' : ' '; 8853 p++; 8854 *p++ = '%'; 8855 *p++ = '0' + reg; 8856 *p++ = ','; 8857 *p++ = ' '; 8858 if (far) 8859 { 8860 int veryfar = 1, delta; 8861 8862 if (INSN_ADDRESSES_SET_P ()) 8863 { 8864 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8865 - INSN_ADDRESSES (INSN_UID (insn))); 8866 /* Leave some instructions for "slop". */ 8867 if (delta >= -260000 && delta < 260000) 8868 veryfar = 0; 8869 } 8870 8871 strcpy (p, ".+12\n\t nop\n\t"); 8872 /* Skip the next insn if requested or 8873 if we know that it will be a nop. */ 8874 if (annul || ! final_sequence) 8875 p[3] = '6'; 8876 p += 12; 8877 if (veryfar) 8878 { 8879 strcpy (p, "b\t"); 8880 p += 2; 8881 } 8882 else 8883 { 8884 strcpy (p, "ba,pt\t%%xcc, "); 8885 p += 13; 8886 } 8887 } 8888 *p++ = '%'; 8889 *p++ = 'l'; 8890 *p++ = '0' + label; 8891 *p++ = '%'; 8892 *p++ = '#'; 8893 *p = '\0'; 8894 8895 return string; 8896 } 8897 8898 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. 8899 Such instructions cannot be used in the delay slot of return insn on v9. 8900 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. 8901 */ 8902 8903 static int 8904 epilogue_renumber (register rtx *where, int test) 8905 { 8906 register const char *fmt; 8907 register int i; 8908 register enum rtx_code code; 8909 8910 if (*where == 0) 8911 return 0; 8912 8913 code = GET_CODE (*where); 8914 8915 switch (code) 8916 { 8917 case REG: 8918 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ 8919 return 1; 8920 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) 8921 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); 8922 /* fallthrough */ 8923 case SCRATCH: 8924 case CC0: 8925 case PC: 8926 case CONST_INT: 8927 case CONST_WIDE_INT: 8928 case CONST_DOUBLE: 8929 return 0; 8930 8931 /* Do not replace the frame pointer with the stack pointer because 8932 it can cause the delayed instruction to load below the stack. 8933 This occurs when instructions like: 8934 8935 (set (reg/i:SI 24 %i0) 8936 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) 8937 (const_int -20 [0xffffffec])) 0)) 8938 8939 are in the return delayed slot. */ 8940 case PLUS: 8941 if (GET_CODE (XEXP (*where, 0)) == REG 8942 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM 8943 && (GET_CODE (XEXP (*where, 1)) != CONST_INT 8944 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) 8945 return 1; 8946 break; 8947 8948 case MEM: 8949 if (SPARC_STACK_BIAS 8950 && GET_CODE (XEXP (*where, 0)) == REG 8951 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) 8952 return 1; 8953 break; 8954 8955 default: 8956 break; 8957 } 8958 8959 fmt = GET_RTX_FORMAT (code); 8960 8961 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 8962 { 8963 if (fmt[i] == 'E') 8964 { 8965 register int j; 8966 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) 8967 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) 8968 return 1; 8969 } 8970 else if (fmt[i] == 'e' 8971 && epilogue_renumber (&(XEXP (*where, i)), test)) 8972 return 1; 8973 } 8974 return 0; 8975 } 8976 8977 /* Leaf functions and non-leaf functions have different needs. */ 8978 8979 static const int 8980 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; 8981 8982 static const int 8983 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; 8984 8985 static const int *const reg_alloc_orders[] = { 8986 reg_leaf_alloc_order, 8987 reg_nonleaf_alloc_order}; 8988 8989 void 8990 order_regs_for_local_alloc (void) 8991 { 8992 static int last_order_nonleaf = 1; 8993 8994 if (df_regs_ever_live_p (15) != last_order_nonleaf) 8995 { 8996 last_order_nonleaf = !last_order_nonleaf; 8997 memcpy ((char *) reg_alloc_order, 8998 (const char *) reg_alloc_orders[last_order_nonleaf], 8999 FIRST_PSEUDO_REGISTER * sizeof (int)); 9000 } 9001 } 9002 9003 /* Return 1 if REG and MEM are legitimate enough to allow the various 9004 MEM<-->REG splits to be run. */ 9005 9006 int 9007 sparc_split_reg_mem_legitimate (rtx reg, rtx mem) 9008 { 9009 /* Punt if we are here by mistake. */ 9010 gcc_assert (reload_completed); 9011 9012 /* We must have an offsettable memory reference. */ 9013 if (!offsettable_memref_p (mem)) 9014 return 0; 9015 9016 /* If we have legitimate args for ldd/std, we do not want 9017 the split to happen. */ 9018 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8)) 9019 return 0; 9020 9021 /* Success. */ 9022 return 1; 9023 } 9024 9025 /* Split a REG <-- MEM move into a pair of moves in MODE. */ 9026 9027 void 9028 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode) 9029 { 9030 rtx high_part = gen_highpart (mode, dest); 9031 rtx low_part = gen_lowpart (mode, dest); 9032 rtx word0 = adjust_address (src, mode, 0); 9033 rtx word1 = adjust_address (src, mode, 4); 9034 9035 if (reg_overlap_mentioned_p (high_part, word1)) 9036 { 9037 emit_move_insn_1 (low_part, word1); 9038 emit_move_insn_1 (high_part, word0); 9039 } 9040 else 9041 { 9042 emit_move_insn_1 (high_part, word0); 9043 emit_move_insn_1 (low_part, word1); 9044 } 9045 } 9046 9047 /* Split a MEM <-- REG move into a pair of moves in MODE. */ 9048 9049 void 9050 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode) 9051 { 9052 rtx word0 = adjust_address (dest, mode, 0); 9053 rtx word1 = adjust_address (dest, mode, 4); 9054 rtx high_part = gen_highpart (mode, src); 9055 rtx low_part = gen_lowpart (mode, src); 9056 9057 emit_move_insn_1 (word0, high_part); 9058 emit_move_insn_1 (word1, low_part); 9059 } 9060 9061 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */ 9062 9063 int 9064 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2) 9065 { 9066 /* Punt if we are here by mistake. */ 9067 gcc_assert (reload_completed); 9068 9069 if (GET_CODE (reg1) == SUBREG) 9070 reg1 = SUBREG_REG (reg1); 9071 if (GET_CODE (reg1) != REG) 9072 return 0; 9073 const int regno1 = REGNO (reg1); 9074 9075 if (GET_CODE (reg2) == SUBREG) 9076 reg2 = SUBREG_REG (reg2); 9077 if (GET_CODE (reg2) != REG) 9078 return 0; 9079 const int regno2 = REGNO (reg2); 9080 9081 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2)) 9082 return 1; 9083 9084 if (TARGET_VIS3) 9085 { 9086 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2)) 9087 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2))) 9088 return 1; 9089 } 9090 9091 return 0; 9092 } 9093 9094 /* Split a REG <--> REG move into a pair of moves in MODE. */ 9095 9096 void 9097 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode) 9098 { 9099 rtx dest1 = gen_highpart (mode, dest); 9100 rtx dest2 = gen_lowpart (mode, dest); 9101 rtx src1 = gen_highpart (mode, src); 9102 rtx src2 = gen_lowpart (mode, src); 9103 9104 /* Now emit using the real source and destination we found, swapping 9105 the order if we detect overlap. */ 9106 if (reg_overlap_mentioned_p (dest1, src2)) 9107 { 9108 emit_move_insn_1 (dest2, src2); 9109 emit_move_insn_1 (dest1, src1); 9110 } 9111 else 9112 { 9113 emit_move_insn_1 (dest1, src1); 9114 emit_move_insn_1 (dest2, src2); 9115 } 9116 } 9117 9118 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. 9119 This makes them candidates for using ldd and std insns. 9120 9121 Note reg1 and reg2 *must* be hard registers. */ 9122 9123 int 9124 registers_ok_for_ldd_peep (rtx reg1, rtx reg2) 9125 { 9126 /* We might have been passed a SUBREG. */ 9127 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) 9128 return 0; 9129 9130 if (REGNO (reg1) % 2 != 0) 9131 return 0; 9132 9133 /* Integer ldd is deprecated in SPARC V9 */ 9134 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1))) 9135 return 0; 9136 9137 return (REGNO (reg1) == REGNO (reg2) - 1); 9138 } 9139 9140 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in 9141 an ldd or std insn. 9142 9143 This can only happen when addr1 and addr2, the addresses in mem1 9144 and mem2, are consecutive memory locations (addr1 + 4 == addr2). 9145 addr1 must also be aligned on a 64-bit boundary. 9146 9147 Also iff dependent_reg_rtx is not null it should not be used to 9148 compute the address for mem1, i.e. we cannot optimize a sequence 9149 like: 9150 ld [%o0], %o0 9151 ld [%o0 + 4], %o1 9152 to 9153 ldd [%o0], %o0 9154 nor: 9155 ld [%g3 + 4], %g3 9156 ld [%g3], %g2 9157 to 9158 ldd [%g3], %g2 9159 9160 But, note that the transformation from: 9161 ld [%g2 + 4], %g3 9162 ld [%g2], %g2 9163 to 9164 ldd [%g2], %g2 9165 is perfectly fine. Thus, the peephole2 patterns always pass us 9166 the destination register of the first load, never the second one. 9167 9168 For stores we don't have a similar problem, so dependent_reg_rtx is 9169 NULL_RTX. */ 9170 9171 int 9172 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) 9173 { 9174 rtx addr1, addr2; 9175 unsigned int reg1; 9176 HOST_WIDE_INT offset1; 9177 9178 /* The mems cannot be volatile. */ 9179 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 9180 return 0; 9181 9182 /* MEM1 should be aligned on a 64-bit boundary. */ 9183 if (MEM_ALIGN (mem1) < 64) 9184 return 0; 9185 9186 addr1 = XEXP (mem1, 0); 9187 addr2 = XEXP (mem2, 0); 9188 9189 /* Extract a register number and offset (if used) from the first addr. */ 9190 if (GET_CODE (addr1) == PLUS) 9191 { 9192 /* If not a REG, return zero. */ 9193 if (GET_CODE (XEXP (addr1, 0)) != REG) 9194 return 0; 9195 else 9196 { 9197 reg1 = REGNO (XEXP (addr1, 0)); 9198 /* The offset must be constant! */ 9199 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) 9200 return 0; 9201 offset1 = INTVAL (XEXP (addr1, 1)); 9202 } 9203 } 9204 else if (GET_CODE (addr1) != REG) 9205 return 0; 9206 else 9207 { 9208 reg1 = REGNO (addr1); 9209 /* This was a simple (mem (reg)) expression. Offset is 0. */ 9210 offset1 = 0; 9211 } 9212 9213 /* Make sure the second address is a (mem (plus (reg) (const_int). */ 9214 if (GET_CODE (addr2) != PLUS) 9215 return 0; 9216 9217 if (GET_CODE (XEXP (addr2, 0)) != REG 9218 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) 9219 return 0; 9220 9221 if (reg1 != REGNO (XEXP (addr2, 0))) 9222 return 0; 9223 9224 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) 9225 return 0; 9226 9227 /* The first offset must be evenly divisible by 8 to ensure the 9228 address is 64-bit aligned. */ 9229 if (offset1 % 8 != 0) 9230 return 0; 9231 9232 /* The offset for the second addr must be 4 more than the first addr. */ 9233 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) 9234 return 0; 9235 9236 /* All the tests passed. addr1 and addr2 are valid for ldd and std 9237 instructions. */ 9238 return 1; 9239 } 9240 9241 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */ 9242 9243 rtx 9244 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode) 9245 { 9246 rtx x = widen_memory_access (mem1, mode, 0); 9247 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2); 9248 return x; 9249 } 9250 9251 /* Return 1 if reg is a pseudo, or is the first register in 9252 a hard register pair. This makes it suitable for use in 9253 ldd and std insns. */ 9254 9255 int 9256 register_ok_for_ldd (rtx reg) 9257 { 9258 /* We might have been passed a SUBREG. */ 9259 if (!REG_P (reg)) 9260 return 0; 9261 9262 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) 9263 return (REGNO (reg) % 2 == 0); 9264 9265 return 1; 9266 } 9267 9268 /* Return 1 if OP, a MEM, has an address which is known to be 9269 aligned to an 8-byte boundary. */ 9270 9271 int 9272 memory_ok_for_ldd (rtx op) 9273 { 9274 /* In 64-bit mode, we assume that the address is word-aligned. */ 9275 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) 9276 return 0; 9277 9278 if (! can_create_pseudo_p () 9279 && !strict_memory_address_p (Pmode, XEXP (op, 0))) 9280 return 0; 9281 9282 return 1; 9283 } 9284 9285 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 9286 9287 static bool 9288 sparc_print_operand_punct_valid_p (unsigned char code) 9289 { 9290 if (code == '#' 9291 || code == '*' 9292 || code == '(' 9293 || code == ')' 9294 || code == '_' 9295 || code == '&') 9296 return true; 9297 9298 return false; 9299 } 9300 9301 /* Implement TARGET_PRINT_OPERAND. 9302 Print operand X (an rtx) in assembler syntax to file FILE. 9303 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 9304 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 9305 9306 static void 9307 sparc_print_operand (FILE *file, rtx x, int code) 9308 { 9309 const char *s; 9310 9311 switch (code) 9312 { 9313 case '#': 9314 /* Output an insn in a delay slot. */ 9315 if (final_sequence) 9316 sparc_indent_opcode = 1; 9317 else 9318 fputs ("\n\t nop", file); 9319 return; 9320 case '*': 9321 /* Output an annul flag if there's nothing for the delay slot and we 9322 are optimizing. This is always used with '(' below. 9323 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; 9324 this is a dbx bug. So, we only do this when optimizing. 9325 On UltraSPARC, a branch in a delay slot causes a pipeline flush. 9326 Always emit a nop in case the next instruction is a branch. */ 9327 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9328 fputs (",a", file); 9329 return; 9330 case '(': 9331 /* Output a 'nop' if there's nothing for the delay slot and we are 9332 not optimizing. This is always used with '*' above. */ 9333 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9334 fputs ("\n\t nop", file); 9335 else if (final_sequence) 9336 sparc_indent_opcode = 1; 9337 return; 9338 case ')': 9339 /* Output the right displacement from the saved PC on function return. 9340 The caller may have placed an "unimp" insn immediately after the call 9341 so we have to account for it. This insn is used in the 32-bit ABI 9342 when calling a function that returns a non zero-sized structure. The 9343 64-bit ABI doesn't have it. Be careful to have this test be the same 9344 as that for the call. The exception is when sparc_std_struct_return 9345 is enabled, the psABI is followed exactly and the adjustment is made 9346 by the code in sparc_struct_value_rtx. The call emitted is the same 9347 when sparc_std_struct_return is enabled. */ 9348 if (!TARGET_ARCH64 9349 && cfun->returns_struct 9350 && !sparc_std_struct_return 9351 && DECL_SIZE (DECL_RESULT (current_function_decl)) 9352 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) 9353 == INTEGER_CST 9354 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) 9355 fputs ("12", file); 9356 else 9357 fputc ('8', file); 9358 return; 9359 case '_': 9360 /* Output the Embedded Medium/Anywhere code model base register. */ 9361 fputs (EMBMEDANY_BASE_REG, file); 9362 return; 9363 case '&': 9364 /* Print some local dynamic TLS name. */ 9365 if (const char *name = get_some_local_dynamic_name ()) 9366 assemble_name (file, name); 9367 else 9368 output_operand_lossage ("'%%&' used without any " 9369 "local dynamic TLS references"); 9370 return; 9371 9372 case 'Y': 9373 /* Adjust the operand to take into account a RESTORE operation. */ 9374 if (GET_CODE (x) == CONST_INT) 9375 break; 9376 else if (GET_CODE (x) != REG) 9377 output_operand_lossage ("invalid %%Y operand"); 9378 else if (REGNO (x) < 8) 9379 fputs (reg_names[REGNO (x)], file); 9380 else if (REGNO (x) >= 24 && REGNO (x) < 32) 9381 fputs (reg_names[REGNO (x)-16], file); 9382 else 9383 output_operand_lossage ("invalid %%Y operand"); 9384 return; 9385 case 'L': 9386 /* Print out the low order register name of a register pair. */ 9387 if (WORDS_BIG_ENDIAN) 9388 fputs (reg_names[REGNO (x)+1], file); 9389 else 9390 fputs (reg_names[REGNO (x)], file); 9391 return; 9392 case 'H': 9393 /* Print out the high order register name of a register pair. */ 9394 if (WORDS_BIG_ENDIAN) 9395 fputs (reg_names[REGNO (x)], file); 9396 else 9397 fputs (reg_names[REGNO (x)+1], file); 9398 return; 9399 case 'R': 9400 /* Print out the second register name of a register pair or quad. 9401 I.e., R (%o0) => %o1. */ 9402 fputs (reg_names[REGNO (x)+1], file); 9403 return; 9404 case 'S': 9405 /* Print out the third register name of a register quad. 9406 I.e., S (%o0) => %o2. */ 9407 fputs (reg_names[REGNO (x)+2], file); 9408 return; 9409 case 'T': 9410 /* Print out the fourth register name of a register quad. 9411 I.e., T (%o0) => %o3. */ 9412 fputs (reg_names[REGNO (x)+3], file); 9413 return; 9414 case 'x': 9415 /* Print a condition code register. */ 9416 if (REGNO (x) == SPARC_ICC_REG) 9417 { 9418 switch (GET_MODE (x)) 9419 { 9420 case E_CCmode: 9421 case E_CCNZmode: 9422 case E_CCCmode: 9423 case E_CCVmode: 9424 s = "%icc"; 9425 break; 9426 case E_CCXmode: 9427 case E_CCXNZmode: 9428 case E_CCXCmode: 9429 case E_CCXVmode: 9430 s = "%xcc"; 9431 break; 9432 default: 9433 gcc_unreachable (); 9434 } 9435 fputs (s, file); 9436 } 9437 else 9438 /* %fccN register */ 9439 fputs (reg_names[REGNO (x)], file); 9440 return; 9441 case 'm': 9442 /* Print the operand's address only. */ 9443 output_address (GET_MODE (x), XEXP (x, 0)); 9444 return; 9445 case 'r': 9446 /* In this case we need a register. Use %g0 if the 9447 operand is const0_rtx. */ 9448 if (x == const0_rtx 9449 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) 9450 { 9451 fputs ("%g0", file); 9452 return; 9453 } 9454 else 9455 break; 9456 9457 case 'A': 9458 switch (GET_CODE (x)) 9459 { 9460 case IOR: 9461 s = "or"; 9462 break; 9463 case AND: 9464 s = "and"; 9465 break; 9466 case XOR: 9467 s = "xor"; 9468 break; 9469 default: 9470 output_operand_lossage ("invalid %%A operand"); 9471 s = ""; 9472 break; 9473 } 9474 fputs (s, file); 9475 return; 9476 9477 case 'B': 9478 switch (GET_CODE (x)) 9479 { 9480 case IOR: 9481 s = "orn"; 9482 break; 9483 case AND: 9484 s = "andn"; 9485 break; 9486 case XOR: 9487 s = "xnor"; 9488 break; 9489 default: 9490 output_operand_lossage ("invalid %%B operand"); 9491 s = ""; 9492 break; 9493 } 9494 fputs (s, file); 9495 return; 9496 9497 /* This is used by the conditional move instructions. */ 9498 case 'C': 9499 { 9500 machine_mode mode = GET_MODE (XEXP (x, 0)); 9501 switch (GET_CODE (x)) 9502 { 9503 case NE: 9504 if (mode == CCVmode || mode == CCXVmode) 9505 s = "vs"; 9506 else 9507 s = "ne"; 9508 break; 9509 case EQ: 9510 if (mode == CCVmode || mode == CCXVmode) 9511 s = "vc"; 9512 else 9513 s = "e"; 9514 break; 9515 case GE: 9516 if (mode == CCNZmode || mode == CCXNZmode) 9517 s = "pos"; 9518 else 9519 s = "ge"; 9520 break; 9521 case GT: 9522 s = "g"; 9523 break; 9524 case LE: 9525 s = "le"; 9526 break; 9527 case LT: 9528 if (mode == CCNZmode || mode == CCXNZmode) 9529 s = "neg"; 9530 else 9531 s = "l"; 9532 break; 9533 case GEU: 9534 s = "geu"; 9535 break; 9536 case GTU: 9537 s = "gu"; 9538 break; 9539 case LEU: 9540 s = "leu"; 9541 break; 9542 case LTU: 9543 s = "lu"; 9544 break; 9545 case LTGT: 9546 s = "lg"; 9547 break; 9548 case UNORDERED: 9549 s = "u"; 9550 break; 9551 case ORDERED: 9552 s = "o"; 9553 break; 9554 case UNLT: 9555 s = "ul"; 9556 break; 9557 case UNLE: 9558 s = "ule"; 9559 break; 9560 case UNGT: 9561 s = "ug"; 9562 break; 9563 case UNGE: 9564 s = "uge" 9565 ; break; 9566 case UNEQ: 9567 s = "ue"; 9568 break; 9569 default: 9570 output_operand_lossage ("invalid %%C operand"); 9571 s = ""; 9572 break; 9573 } 9574 fputs (s, file); 9575 return; 9576 } 9577 9578 /* This are used by the movr instruction pattern. */ 9579 case 'D': 9580 { 9581 switch (GET_CODE (x)) 9582 { 9583 case NE: 9584 s = "ne"; 9585 break; 9586 case EQ: 9587 s = "e"; 9588 break; 9589 case GE: 9590 s = "gez"; 9591 break; 9592 case LT: 9593 s = "lz"; 9594 break; 9595 case LE: 9596 s = "lez"; 9597 break; 9598 case GT: 9599 s = "gz"; 9600 break; 9601 default: 9602 output_operand_lossage ("invalid %%D operand"); 9603 s = ""; 9604 break; 9605 } 9606 fputs (s, file); 9607 return; 9608 } 9609 9610 case 'b': 9611 { 9612 /* Print a sign-extended character. */ 9613 int i = trunc_int_for_mode (INTVAL (x), QImode); 9614 fprintf (file, "%d", i); 9615 return; 9616 } 9617 9618 case 'f': 9619 /* Operand must be a MEM; write its address. */ 9620 if (GET_CODE (x) != MEM) 9621 output_operand_lossage ("invalid %%f operand"); 9622 output_address (GET_MODE (x), XEXP (x, 0)); 9623 return; 9624 9625 case 's': 9626 { 9627 /* Print a sign-extended 32-bit value. */ 9628 HOST_WIDE_INT i; 9629 if (GET_CODE(x) == CONST_INT) 9630 i = INTVAL (x); 9631 else 9632 { 9633 output_operand_lossage ("invalid %%s operand"); 9634 return; 9635 } 9636 i = trunc_int_for_mode (i, SImode); 9637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); 9638 return; 9639 } 9640 9641 case 0: 9642 /* Do nothing special. */ 9643 break; 9644 9645 default: 9646 /* Undocumented flag. */ 9647 output_operand_lossage ("invalid operand output code"); 9648 } 9649 9650 if (GET_CODE (x) == REG) 9651 fputs (reg_names[REGNO (x)], file); 9652 else if (GET_CODE (x) == MEM) 9653 { 9654 fputc ('[', file); 9655 /* Poor Sun assembler doesn't understand absolute addressing. */ 9656 if (CONSTANT_P (XEXP (x, 0))) 9657 fputs ("%g0+", file); 9658 output_address (GET_MODE (x), XEXP (x, 0)); 9659 fputc (']', file); 9660 } 9661 else if (GET_CODE (x) == HIGH) 9662 { 9663 fputs ("%hi(", file); 9664 output_addr_const (file, XEXP (x, 0)); 9665 fputc (')', file); 9666 } 9667 else if (GET_CODE (x) == LO_SUM) 9668 { 9669 sparc_print_operand (file, XEXP (x, 0), 0); 9670 if (TARGET_CM_MEDMID) 9671 fputs ("+%l44(", file); 9672 else 9673 fputs ("+%lo(", file); 9674 output_addr_const (file, XEXP (x, 1)); 9675 fputc (')', file); 9676 } 9677 else if (GET_CODE (x) == CONST_DOUBLE) 9678 output_operand_lossage ("floating-point constant not a valid immediate operand"); 9679 else 9680 output_addr_const (file, x); 9681 } 9682 9683 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 9684 9685 static void 9686 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) 9687 { 9688 register rtx base, index = 0; 9689 int offset = 0; 9690 register rtx addr = x; 9691 9692 if (REG_P (addr)) 9693 fputs (reg_names[REGNO (addr)], file); 9694 else if (GET_CODE (addr) == PLUS) 9695 { 9696 if (CONST_INT_P (XEXP (addr, 0))) 9697 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); 9698 else if (CONST_INT_P (XEXP (addr, 1))) 9699 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); 9700 else 9701 base = XEXP (addr, 0), index = XEXP (addr, 1); 9702 if (GET_CODE (base) == LO_SUM) 9703 { 9704 gcc_assert (USE_AS_OFFSETABLE_LO10 9705 && TARGET_ARCH64 9706 && ! TARGET_CM_MEDMID); 9707 output_operand (XEXP (base, 0), 0); 9708 fputs ("+%lo(", file); 9709 output_address (VOIDmode, XEXP (base, 1)); 9710 fprintf (file, ")+%d", offset); 9711 } 9712 else 9713 { 9714 fputs (reg_names[REGNO (base)], file); 9715 if (index == 0) 9716 fprintf (file, "%+d", offset); 9717 else if (REG_P (index)) 9718 fprintf (file, "+%s", reg_names[REGNO (index)]); 9719 else if (GET_CODE (index) == SYMBOL_REF 9720 || GET_CODE (index) == LABEL_REF 9721 || GET_CODE (index) == CONST) 9722 fputc ('+', file), output_addr_const (file, index); 9723 else gcc_unreachable (); 9724 } 9725 } 9726 else if (GET_CODE (addr) == MINUS 9727 && GET_CODE (XEXP (addr, 1)) == LABEL_REF) 9728 { 9729 output_addr_const (file, XEXP (addr, 0)); 9730 fputs ("-(", file); 9731 output_addr_const (file, XEXP (addr, 1)); 9732 fputs ("-.)", file); 9733 } 9734 else if (GET_CODE (addr) == LO_SUM) 9735 { 9736 output_operand (XEXP (addr, 0), 0); 9737 if (TARGET_CM_MEDMID) 9738 fputs ("+%l44(", file); 9739 else 9740 fputs ("+%lo(", file); 9741 output_address (VOIDmode, XEXP (addr, 1)); 9742 fputc (')', file); 9743 } 9744 else if (flag_pic 9745 && GET_CODE (addr) == CONST 9746 && GET_CODE (XEXP (addr, 0)) == MINUS 9747 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST 9748 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS 9749 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) 9750 { 9751 addr = XEXP (addr, 0); 9752 output_addr_const (file, XEXP (addr, 0)); 9753 /* Group the args of the second CONST in parenthesis. */ 9754 fputs ("-(", file); 9755 /* Skip past the second CONST--it does nothing for us. */ 9756 output_addr_const (file, XEXP (XEXP (addr, 1), 0)); 9757 /* Close the parenthesis. */ 9758 fputc (')', file); 9759 } 9760 else 9761 { 9762 output_addr_const (file, addr); 9763 } 9764 } 9765 9766 /* Target hook for assembling integer objects. The sparc version has 9767 special handling for aligned DI-mode objects. */ 9768 9769 static bool 9770 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) 9771 { 9772 /* ??? We only output .xword's for symbols and only then in environments 9773 where the assembler can handle them. */ 9774 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT) 9775 { 9776 if (TARGET_V9) 9777 { 9778 assemble_integer_with_op ("\t.xword\t", x); 9779 return true; 9780 } 9781 else 9782 { 9783 assemble_aligned_integer (4, const0_rtx); 9784 assemble_aligned_integer (4, x); 9785 return true; 9786 } 9787 } 9788 return default_assemble_integer (x, size, aligned_p); 9789 } 9790 9791 /* Return the value of a code used in the .proc pseudo-op that says 9792 what kind of result this function returns. For non-C types, we pick 9793 the closest C type. */ 9794 9795 #ifndef SHORT_TYPE_SIZE 9796 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) 9797 #endif 9798 9799 #ifndef INT_TYPE_SIZE 9800 #define INT_TYPE_SIZE BITS_PER_WORD 9801 #endif 9802 9803 #ifndef LONG_TYPE_SIZE 9804 #define LONG_TYPE_SIZE BITS_PER_WORD 9805 #endif 9806 9807 #ifndef LONG_LONG_TYPE_SIZE 9808 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) 9809 #endif 9810 9811 #ifndef FLOAT_TYPE_SIZE 9812 #define FLOAT_TYPE_SIZE BITS_PER_WORD 9813 #endif 9814 9815 #ifndef DOUBLE_TYPE_SIZE 9816 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9817 #endif 9818 9819 #ifndef LONG_DOUBLE_TYPE_SIZE 9820 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9821 #endif 9822 9823 unsigned long 9824 sparc_type_code (register tree type) 9825 { 9826 register unsigned long qualifiers = 0; 9827 register unsigned shift; 9828 9829 /* Only the first 30 bits of the qualifier are valid. We must refrain from 9830 setting more, since some assemblers will give an error for this. Also, 9831 we must be careful to avoid shifts of 32 bits or more to avoid getting 9832 unpredictable results. */ 9833 9834 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) 9835 { 9836 switch (TREE_CODE (type)) 9837 { 9838 case ERROR_MARK: 9839 return qualifiers; 9840 9841 case ARRAY_TYPE: 9842 qualifiers |= (3 << shift); 9843 break; 9844 9845 case FUNCTION_TYPE: 9846 case METHOD_TYPE: 9847 qualifiers |= (2 << shift); 9848 break; 9849 9850 case POINTER_TYPE: 9851 case REFERENCE_TYPE: 9852 case OFFSET_TYPE: 9853 qualifiers |= (1 << shift); 9854 break; 9855 9856 case RECORD_TYPE: 9857 return (qualifiers | 8); 9858 9859 case UNION_TYPE: 9860 case QUAL_UNION_TYPE: 9861 return (qualifiers | 9); 9862 9863 case ENUMERAL_TYPE: 9864 return (qualifiers | 10); 9865 9866 case VOID_TYPE: 9867 return (qualifiers | 16); 9868 9869 case INTEGER_TYPE: 9870 /* If this is a range type, consider it to be the underlying 9871 type. */ 9872 if (TREE_TYPE (type) != 0) 9873 break; 9874 9875 /* Carefully distinguish all the standard types of C, 9876 without messing up if the language is not C. We do this by 9877 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to 9878 look at both the names and the above fields, but that's redundant. 9879 Any type whose size is between two C types will be considered 9880 to be the wider of the two types. Also, we do not have a 9881 special code to use for "long long", so anything wider than 9882 long is treated the same. Note that we can't distinguish 9883 between "int" and "long" in this code if they are the same 9884 size, but that's fine, since neither can the assembler. */ 9885 9886 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) 9887 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); 9888 9889 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) 9890 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); 9891 9892 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) 9893 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); 9894 9895 else 9896 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); 9897 9898 case REAL_TYPE: 9899 /* If this is a range type, consider it to be the underlying 9900 type. */ 9901 if (TREE_TYPE (type) != 0) 9902 break; 9903 9904 /* Carefully distinguish all the standard types of C, 9905 without messing up if the language is not C. */ 9906 9907 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) 9908 return (qualifiers | 6); 9909 9910 else 9911 return (qualifiers | 7); 9912 9913 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ 9914 /* ??? We need to distinguish between double and float complex types, 9915 but I don't know how yet because I can't reach this code from 9916 existing front-ends. */ 9917 return (qualifiers | 7); /* Who knows? */ 9918 9919 case VECTOR_TYPE: 9920 case BOOLEAN_TYPE: /* Boolean truth value type. */ 9921 case LANG_TYPE: 9922 case NULLPTR_TYPE: 9923 return qualifiers; 9924 9925 default: 9926 gcc_unreachable (); /* Not a type! */ 9927 } 9928 } 9929 9930 return qualifiers; 9931 } 9932 9933 /* Nested function support. */ 9934 9935 /* Emit RTL insns to initialize the variable parts of a trampoline. 9936 FNADDR is an RTX for the address of the function's pure code. 9937 CXT is an RTX for the static chain value for the function. 9938 9939 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi 9940 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes 9941 (to store insns). This is a bit excessive. Perhaps a different 9942 mechanism would be better here. 9943 9944 Emit enough FLUSH insns to synchronize the data and instruction caches. */ 9945 9946 static void 9947 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9948 { 9949 /* SPARC 32-bit trampoline: 9950 9951 sethi %hi(fn), %g1 9952 sethi %hi(static), %g2 9953 jmp %g1+%lo(fn) 9954 or %g2, %lo(static), %g2 9955 9956 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii 9957 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii 9958 */ 9959 9960 emit_move_insn 9961 (adjust_address (m_tramp, SImode, 0), 9962 expand_binop (SImode, ior_optab, 9963 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1), 9964 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), 9965 NULL_RTX, 1, OPTAB_DIRECT)); 9966 9967 emit_move_insn 9968 (adjust_address (m_tramp, SImode, 4), 9969 expand_binop (SImode, ior_optab, 9970 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1), 9971 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), 9972 NULL_RTX, 1, OPTAB_DIRECT)); 9973 9974 emit_move_insn 9975 (adjust_address (m_tramp, SImode, 8), 9976 expand_binop (SImode, ior_optab, 9977 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), 9978 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), 9979 NULL_RTX, 1, OPTAB_DIRECT)); 9980 9981 emit_move_insn 9982 (adjust_address (m_tramp, SImode, 12), 9983 expand_binop (SImode, ior_optab, 9984 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), 9985 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), 9986 NULL_RTX, 1, OPTAB_DIRECT)); 9987 9988 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is 9989 aligned on a 16 byte boundary so one flush clears it all. */ 9990 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0)))); 9991 if (sparc_cpu != PROCESSOR_ULTRASPARC 9992 && sparc_cpu != PROCESSOR_ULTRASPARC3 9993 && sparc_cpu != PROCESSOR_NIAGARA 9994 && sparc_cpu != PROCESSOR_NIAGARA2 9995 && sparc_cpu != PROCESSOR_NIAGARA3 9996 && sparc_cpu != PROCESSOR_NIAGARA4 9997 && sparc_cpu != PROCESSOR_NIAGARA7 9998 && sparc_cpu != PROCESSOR_M8) 9999 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8)))); 10000 10001 /* Call __enable_execute_stack after writing onto the stack to make sure 10002 the stack address is accessible. */ 10003 #ifdef HAVE_ENABLE_EXECUTE_STACK 10004 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10005 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10006 #endif 10007 10008 } 10009 10010 /* The 64-bit version is simpler because it makes more sense to load the 10011 values as "immediate" data out of the trampoline. It's also easier since 10012 we can read the PC without clobbering a register. */ 10013 10014 static void 10015 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 10016 { 10017 /* SPARC 64-bit trampoline: 10018 10019 rd %pc, %g1 10020 ldx [%g1+24], %g5 10021 jmp %g5 10022 ldx [%g1+16], %g5 10023 +16 bytes data 10024 */ 10025 10026 emit_move_insn (adjust_address (m_tramp, SImode, 0), 10027 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); 10028 emit_move_insn (adjust_address (m_tramp, SImode, 4), 10029 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); 10030 emit_move_insn (adjust_address (m_tramp, SImode, 8), 10031 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); 10032 emit_move_insn (adjust_address (m_tramp, SImode, 12), 10033 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); 10034 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); 10035 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); 10036 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); 10037 10038 if (sparc_cpu != PROCESSOR_ULTRASPARC 10039 && sparc_cpu != PROCESSOR_ULTRASPARC3 10040 && sparc_cpu != PROCESSOR_NIAGARA 10041 && sparc_cpu != PROCESSOR_NIAGARA2 10042 && sparc_cpu != PROCESSOR_NIAGARA3 10043 && sparc_cpu != PROCESSOR_NIAGARA4 10044 && sparc_cpu != PROCESSOR_NIAGARA7 10045 && sparc_cpu != PROCESSOR_M8) 10046 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); 10047 10048 /* Call __enable_execute_stack after writing onto the stack to make sure 10049 the stack address is accessible. */ 10050 #ifdef HAVE_ENABLE_EXECUTE_STACK 10051 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10052 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10053 #endif 10054 } 10055 10056 /* Worker for TARGET_TRAMPOLINE_INIT. */ 10057 10058 static void 10059 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 10060 { 10061 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 10062 cxt = force_reg (Pmode, cxt); 10063 if (TARGET_ARCH64) 10064 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); 10065 else 10066 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); 10067 } 10068 10069 /* Adjust the cost of a scheduling dependency. Return the new cost of 10070 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 10071 10072 static int 10073 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, 10074 int cost) 10075 { 10076 enum attr_type insn_type; 10077 10078 if (recog_memoized (insn) < 0) 10079 return cost; 10080 10081 insn_type = get_attr_type (insn); 10082 10083 if (dep_type == 0) 10084 { 10085 /* Data dependency; DEP_INSN writes a register that INSN reads some 10086 cycles later. */ 10087 10088 /* if a load, then the dependence must be on the memory address; 10089 add an extra "cycle". Note that the cost could be two cycles 10090 if the reg was written late in an instruction group; we ca not tell 10091 here. */ 10092 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) 10093 return cost + 3; 10094 10095 /* Get the delay only if the address of the store is the dependence. */ 10096 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) 10097 { 10098 rtx pat = PATTERN(insn); 10099 rtx dep_pat = PATTERN (dep_insn); 10100 10101 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10102 return cost; /* This should not happen! */ 10103 10104 /* The dependency between the two instructions was on the data that 10105 is being stored. Assume that this implies that the address of the 10106 store is not dependent. */ 10107 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10108 return cost; 10109 10110 return cost + 3; /* An approximation. */ 10111 } 10112 10113 /* A shift instruction cannot receive its data from an instruction 10114 in the same cycle; add a one cycle penalty. */ 10115 if (insn_type == TYPE_SHIFT) 10116 return cost + 3; /* Split before cascade into shift. */ 10117 } 10118 else 10119 { 10120 /* Anti- or output- dependency; DEP_INSN reads/writes a register that 10121 INSN writes some cycles later. */ 10122 10123 /* These are only significant for the fpu unit; writing a fp reg before 10124 the fpu has finished with it stalls the processor. */ 10125 10126 /* Reusing an integer register causes no problems. */ 10127 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10128 return 0; 10129 } 10130 10131 return cost; 10132 } 10133 10134 static int 10135 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, 10136 int cost) 10137 { 10138 enum attr_type insn_type, dep_type; 10139 rtx pat = PATTERN(insn); 10140 rtx dep_pat = PATTERN (dep_insn); 10141 10142 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 10143 return cost; 10144 10145 insn_type = get_attr_type (insn); 10146 dep_type = get_attr_type (dep_insn); 10147 10148 switch (dtype) 10149 { 10150 case 0: 10151 /* Data dependency; DEP_INSN writes a register that INSN reads some 10152 cycles later. */ 10153 10154 switch (insn_type) 10155 { 10156 case TYPE_STORE: 10157 case TYPE_FPSTORE: 10158 /* Get the delay iff the address of the store is the dependence. */ 10159 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10160 return cost; 10161 10162 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10163 return cost; 10164 return cost + 3; 10165 10166 case TYPE_LOAD: 10167 case TYPE_SLOAD: 10168 case TYPE_FPLOAD: 10169 /* If a load, then the dependence must be on the memory address. If 10170 the addresses aren't equal, then it might be a false dependency */ 10171 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) 10172 { 10173 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET 10174 || GET_CODE (SET_DEST (dep_pat)) != MEM 10175 || GET_CODE (SET_SRC (pat)) != MEM 10176 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), 10177 XEXP (SET_SRC (pat), 0))) 10178 return cost + 2; 10179 10180 return cost + 8; 10181 } 10182 break; 10183 10184 case TYPE_BRANCH: 10185 /* Compare to branch latency is 0. There is no benefit from 10186 separating compare and branch. */ 10187 if (dep_type == TYPE_COMPARE) 10188 return 0; 10189 /* Floating point compare to branch latency is less than 10190 compare to conditional move. */ 10191 if (dep_type == TYPE_FPCMP) 10192 return cost - 1; 10193 break; 10194 default: 10195 break; 10196 } 10197 break; 10198 10199 case REG_DEP_ANTI: 10200 /* Anti-dependencies only penalize the fpu unit. */ 10201 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10202 return 0; 10203 break; 10204 10205 default: 10206 break; 10207 } 10208 10209 return cost; 10210 } 10211 10212 static int 10213 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost, 10214 unsigned int) 10215 { 10216 switch (sparc_cpu) 10217 { 10218 case PROCESSOR_SUPERSPARC: 10219 cost = supersparc_adjust_cost (insn, dep_type, dep, cost); 10220 break; 10221 case PROCESSOR_HYPERSPARC: 10222 case PROCESSOR_SPARCLITE86X: 10223 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost); 10224 break; 10225 default: 10226 break; 10227 } 10228 return cost; 10229 } 10230 10231 static void 10232 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, 10233 int sched_verbose ATTRIBUTE_UNUSED, 10234 int max_ready ATTRIBUTE_UNUSED) 10235 {} 10236 10237 static int 10238 sparc_use_sched_lookahead (void) 10239 { 10240 switch (sparc_cpu) 10241 { 10242 case PROCESSOR_ULTRASPARC: 10243 case PROCESSOR_ULTRASPARC3: 10244 return 4; 10245 case PROCESSOR_SUPERSPARC: 10246 case PROCESSOR_HYPERSPARC: 10247 case PROCESSOR_SPARCLITE86X: 10248 return 3; 10249 case PROCESSOR_NIAGARA4: 10250 case PROCESSOR_NIAGARA7: 10251 case PROCESSOR_M8: 10252 return 2; 10253 case PROCESSOR_NIAGARA: 10254 case PROCESSOR_NIAGARA2: 10255 case PROCESSOR_NIAGARA3: 10256 default: 10257 return 0; 10258 } 10259 } 10260 10261 static int 10262 sparc_issue_rate (void) 10263 { 10264 switch (sparc_cpu) 10265 { 10266 case PROCESSOR_ULTRASPARC: 10267 case PROCESSOR_ULTRASPARC3: 10268 case PROCESSOR_M8: 10269 return 4; 10270 case PROCESSOR_SUPERSPARC: 10271 return 3; 10272 case PROCESSOR_HYPERSPARC: 10273 case PROCESSOR_SPARCLITE86X: 10274 case PROCESSOR_V9: 10275 /* Assume V9 processors are capable of at least dual-issue. */ 10276 case PROCESSOR_NIAGARA4: 10277 case PROCESSOR_NIAGARA7: 10278 return 2; 10279 case PROCESSOR_NIAGARA: 10280 case PROCESSOR_NIAGARA2: 10281 case PROCESSOR_NIAGARA3: 10282 default: 10283 return 1; 10284 } 10285 } 10286 10287 int 10288 sparc_branch_cost (bool speed_p, bool predictable_p) 10289 { 10290 if (!speed_p) 10291 return 2; 10292 10293 /* For pre-V9 processors we use a single value (usually 3) to take into 10294 account the potential annulling of the delay slot (which ends up being 10295 a bubble in the pipeline slot) plus a cycle to take into consideration 10296 the instruction cache effects. 10297 10298 On V9 and later processors, which have branch prediction facilities, 10299 we take into account whether the branch is (easily) predictable. */ 10300 const int cost = sparc_costs->branch_cost; 10301 10302 switch (sparc_cpu) 10303 { 10304 case PROCESSOR_V9: 10305 case PROCESSOR_ULTRASPARC: 10306 case PROCESSOR_ULTRASPARC3: 10307 case PROCESSOR_NIAGARA: 10308 case PROCESSOR_NIAGARA2: 10309 case PROCESSOR_NIAGARA3: 10310 case PROCESSOR_NIAGARA4: 10311 case PROCESSOR_NIAGARA7: 10312 case PROCESSOR_M8: 10313 return cost + (predictable_p ? 0 : 2); 10314 10315 default: 10316 return cost; 10317 } 10318 } 10319 10320 static int 10321 set_extends (rtx_insn *insn) 10322 { 10323 register rtx pat = PATTERN (insn); 10324 10325 switch (GET_CODE (SET_SRC (pat))) 10326 { 10327 /* Load and some shift instructions zero extend. */ 10328 case MEM: 10329 case ZERO_EXTEND: 10330 /* sethi clears the high bits */ 10331 case HIGH: 10332 /* LO_SUM is used with sethi. sethi cleared the high 10333 bits and the values used with lo_sum are positive */ 10334 case LO_SUM: 10335 /* Store flag stores 0 or 1 */ 10336 case LT: case LTU: 10337 case GT: case GTU: 10338 case LE: case LEU: 10339 case GE: case GEU: 10340 case EQ: 10341 case NE: 10342 return 1; 10343 case AND: 10344 { 10345 rtx op0 = XEXP (SET_SRC (pat), 0); 10346 rtx op1 = XEXP (SET_SRC (pat), 1); 10347 if (GET_CODE (op1) == CONST_INT) 10348 return INTVAL (op1) >= 0; 10349 if (GET_CODE (op0) != REG) 10350 return 0; 10351 if (sparc_check_64 (op0, insn) == 1) 10352 return 1; 10353 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10354 } 10355 case IOR: 10356 case XOR: 10357 { 10358 rtx op0 = XEXP (SET_SRC (pat), 0); 10359 rtx op1 = XEXP (SET_SRC (pat), 1); 10360 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) 10361 return 0; 10362 if (GET_CODE (op1) == CONST_INT) 10363 return INTVAL (op1) >= 0; 10364 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10365 } 10366 case LSHIFTRT: 10367 return GET_MODE (SET_SRC (pat)) == SImode; 10368 /* Positive integers leave the high bits zero. */ 10369 case CONST_INT: 10370 return !(INTVAL (SET_SRC (pat)) & 0x80000000); 10371 case ASHIFTRT: 10372 case SIGN_EXTEND: 10373 return - (GET_MODE (SET_SRC (pat)) == SImode); 10374 case REG: 10375 return sparc_check_64 (SET_SRC (pat), insn); 10376 default: 10377 return 0; 10378 } 10379 } 10380 10381 /* We _ought_ to have only one kind per function, but... */ 10382 static GTY(()) rtx sparc_addr_diff_list; 10383 static GTY(()) rtx sparc_addr_list; 10384 10385 void 10386 sparc_defer_case_vector (rtx lab, rtx vec, int diff) 10387 { 10388 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); 10389 if (diff) 10390 sparc_addr_diff_list 10391 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); 10392 else 10393 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); 10394 } 10395 10396 static void 10397 sparc_output_addr_vec (rtx vec) 10398 { 10399 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10400 int idx, vlen = XVECLEN (body, 0); 10401 10402 #ifdef ASM_OUTPUT_ADDR_VEC_START 10403 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10404 #endif 10405 10406 #ifdef ASM_OUTPUT_CASE_LABEL 10407 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10408 NEXT_INSN (lab)); 10409 #else 10410 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10411 #endif 10412 10413 for (idx = 0; idx < vlen; idx++) 10414 { 10415 ASM_OUTPUT_ADDR_VEC_ELT 10416 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10417 } 10418 10419 #ifdef ASM_OUTPUT_ADDR_VEC_END 10420 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10421 #endif 10422 } 10423 10424 static void 10425 sparc_output_addr_diff_vec (rtx vec) 10426 { 10427 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10428 rtx base = XEXP (XEXP (body, 0), 0); 10429 int idx, vlen = XVECLEN (body, 1); 10430 10431 #ifdef ASM_OUTPUT_ADDR_VEC_START 10432 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10433 #endif 10434 10435 #ifdef ASM_OUTPUT_CASE_LABEL 10436 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10437 NEXT_INSN (lab)); 10438 #else 10439 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10440 #endif 10441 10442 for (idx = 0; idx < vlen; idx++) 10443 { 10444 ASM_OUTPUT_ADDR_DIFF_ELT 10445 (asm_out_file, 10446 body, 10447 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10448 CODE_LABEL_NUMBER (base)); 10449 } 10450 10451 #ifdef ASM_OUTPUT_ADDR_VEC_END 10452 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10453 #endif 10454 } 10455 10456 static void 10457 sparc_output_deferred_case_vectors (void) 10458 { 10459 rtx t; 10460 int align; 10461 10462 if (sparc_addr_list == NULL_RTX 10463 && sparc_addr_diff_list == NULL_RTX) 10464 return; 10465 10466 /* Align to cache line in the function's code section. */ 10467 switch_to_section (current_function_section ()); 10468 10469 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 10470 if (align > 0) 10471 ASM_OUTPUT_ALIGN (asm_out_file, align); 10472 10473 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) 10474 sparc_output_addr_vec (XEXP (t, 0)); 10475 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) 10476 sparc_output_addr_diff_vec (XEXP (t, 0)); 10477 10478 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; 10479 } 10480 10481 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are 10482 unknown. Return 1 if the high bits are zero, -1 if the register is 10483 sign extended. */ 10484 int 10485 sparc_check_64 (rtx x, rtx_insn *insn) 10486 { 10487 /* If a register is set only once it is safe to ignore insns this 10488 code does not know how to handle. The loop will either recognize 10489 the single set and return the correct value or fail to recognize 10490 it and return 0. */ 10491 int set_once = 0; 10492 rtx y = x; 10493 10494 gcc_assert (GET_CODE (x) == REG); 10495 10496 if (GET_MODE (x) == DImode) 10497 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); 10498 10499 if (flag_expensive_optimizations 10500 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) 10501 set_once = 1; 10502 10503 if (insn == 0) 10504 { 10505 if (set_once) 10506 insn = get_last_insn_anywhere (); 10507 else 10508 return 0; 10509 } 10510 10511 while ((insn = PREV_INSN (insn))) 10512 { 10513 switch (GET_CODE (insn)) 10514 { 10515 case JUMP_INSN: 10516 case NOTE: 10517 break; 10518 case CODE_LABEL: 10519 case CALL_INSN: 10520 default: 10521 if (! set_once) 10522 return 0; 10523 break; 10524 case INSN: 10525 { 10526 rtx pat = PATTERN (insn); 10527 if (GET_CODE (pat) != SET) 10528 return 0; 10529 if (rtx_equal_p (x, SET_DEST (pat))) 10530 return set_extends (insn); 10531 if (y && rtx_equal_p (y, SET_DEST (pat))) 10532 return set_extends (insn); 10533 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) 10534 return 0; 10535 } 10536 } 10537 } 10538 return 0; 10539 } 10540 10541 /* Output a wide shift instruction in V8+ mode. INSN is the instruction, 10542 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 10543 10544 const char * 10545 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode) 10546 { 10547 static char asm_code[60]; 10548 10549 /* The scratch register is only required when the destination 10550 register is not a 64-bit global or out register. */ 10551 if (which_alternative != 2) 10552 operands[3] = operands[0]; 10553 10554 /* We can only shift by constants <= 63. */ 10555 if (GET_CODE (operands[2]) == CONST_INT) 10556 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); 10557 10558 if (GET_CODE (operands[1]) == CONST_INT) 10559 { 10560 output_asm_insn ("mov\t%1, %3", operands); 10561 } 10562 else 10563 { 10564 output_asm_insn ("sllx\t%H1, 32, %3", operands); 10565 if (sparc_check_64 (operands[1], insn) <= 0) 10566 output_asm_insn ("srl\t%L1, 0, %L1", operands); 10567 output_asm_insn ("or\t%L1, %3, %3", operands); 10568 } 10569 10570 strcpy (asm_code, opcode); 10571 10572 if (which_alternative != 2) 10573 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); 10574 else 10575 return 10576 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); 10577 } 10578 10579 /* Output rtl to increment the profiler label LABELNO 10580 for profiling a function entry. */ 10581 10582 void 10583 sparc_profile_hook (int labelno) 10584 { 10585 char buf[32]; 10586 rtx lab, fun; 10587 10588 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); 10589 if (NO_PROFILE_COUNTERS) 10590 { 10591 emit_library_call (fun, LCT_NORMAL, VOIDmode); 10592 } 10593 else 10594 { 10595 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 10596 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 10597 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode); 10598 } 10599 } 10600 10601 #ifdef TARGET_SOLARIS 10602 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 10603 10604 static void 10605 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, 10606 tree decl ATTRIBUTE_UNUSED) 10607 { 10608 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 10609 { 10610 solaris_elf_asm_comdat_section (name, flags, decl); 10611 return; 10612 } 10613 10614 fprintf (asm_out_file, "\t.section\t\"%s\"", name); 10615 10616 if (!(flags & SECTION_DEBUG)) 10617 fputs (",#alloc", asm_out_file); 10618 #if HAVE_GAS_SECTION_EXCLUDE 10619 if (flags & SECTION_EXCLUDE) 10620 fputs (",#exclude", asm_out_file); 10621 #endif 10622 if (flags & SECTION_WRITE) 10623 fputs (",#write", asm_out_file); 10624 if (flags & SECTION_TLS) 10625 fputs (",#tls", asm_out_file); 10626 if (flags & SECTION_CODE) 10627 fputs (",#execinstr", asm_out_file); 10628 10629 if (flags & SECTION_NOTYPE) 10630 ; 10631 else if (flags & SECTION_BSS) 10632 fputs (",#nobits", asm_out_file); 10633 else 10634 fputs (",#progbits", asm_out_file); 10635 10636 fputc ('\n', asm_out_file); 10637 } 10638 #endif /* TARGET_SOLARIS */ 10639 10640 /* We do not allow indirect calls to be optimized into sibling calls. 10641 10642 We cannot use sibling calls when delayed branches are disabled 10643 because they will likely require the call delay slot to be filled. 10644 10645 Also, on SPARC 32-bit we cannot emit a sibling call when the 10646 current function returns a structure. This is because the "unimp 10647 after call" convention would cause the callee to return to the 10648 wrong place. The generic code already disallows cases where the 10649 function being called returns a structure. 10650 10651 It may seem strange how this last case could occur. Usually there 10652 is code after the call which jumps to epilogue code which dumps the 10653 return value into the struct return area. That ought to invalidate 10654 the sibling call right? Well, in the C++ case we can end up passing 10655 the pointer to the struct return area to a constructor (which returns 10656 void) and then nothing else happens. Such a sibling call would look 10657 valid without the added check here. 10658 10659 VxWorks PIC PLT entries require the global pointer to be initialized 10660 on entry. We therefore can't emit sibling calls to them. */ 10661 static bool 10662 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10663 { 10664 return (decl 10665 && flag_delayed_branch 10666 && (TARGET_ARCH64 || ! cfun->returns_struct) 10667 && !(TARGET_VXWORKS_RTP 10668 && flag_pic 10669 && !targetm.binds_local_p (decl))); 10670 } 10671 10672 /* libfunc renaming. */ 10673 10674 static void 10675 sparc_init_libfuncs (void) 10676 { 10677 if (TARGET_ARCH32) 10678 { 10679 /* Use the subroutines that Sun's library provides for integer 10680 multiply and divide. The `*' prevents an underscore from 10681 being prepended by the compiler. .umul is a little faster 10682 than .mul. */ 10683 set_optab_libfunc (smul_optab, SImode, "*.umul"); 10684 set_optab_libfunc (sdiv_optab, SImode, "*.div"); 10685 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); 10686 set_optab_libfunc (smod_optab, SImode, "*.rem"); 10687 set_optab_libfunc (umod_optab, SImode, "*.urem"); 10688 10689 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ 10690 set_optab_libfunc (add_optab, TFmode, "_Q_add"); 10691 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); 10692 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); 10693 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); 10694 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); 10695 10696 /* We can define the TFmode sqrt optab only if TARGET_FPU. This 10697 is because with soft-float, the SFmode and DFmode sqrt 10698 instructions will be absent, and the compiler will notice and 10699 try to use the TFmode sqrt instruction for calls to the 10700 builtin function sqrt, but this fails. */ 10701 if (TARGET_FPU) 10702 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); 10703 10704 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); 10705 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); 10706 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); 10707 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); 10708 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); 10709 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); 10710 10711 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); 10712 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); 10713 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); 10714 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); 10715 10716 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); 10717 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); 10718 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); 10719 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); 10720 10721 if (DITF_CONVERSION_LIBFUNCS) 10722 { 10723 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); 10724 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); 10725 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); 10726 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); 10727 } 10728 10729 if (SUN_CONVERSION_LIBFUNCS) 10730 { 10731 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); 10732 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); 10733 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); 10734 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); 10735 } 10736 } 10737 if (TARGET_ARCH64) 10738 { 10739 /* In the SPARC 64bit ABI, SImode multiply and divide functions 10740 do not exist in the library. Make sure the compiler does not 10741 emit calls to them by accident. (It should always use the 10742 hardware instructions.) */ 10743 set_optab_libfunc (smul_optab, SImode, 0); 10744 set_optab_libfunc (sdiv_optab, SImode, 0); 10745 set_optab_libfunc (udiv_optab, SImode, 0); 10746 set_optab_libfunc (smod_optab, SImode, 0); 10747 set_optab_libfunc (umod_optab, SImode, 0); 10748 10749 if (SUN_INTEGER_MULTIPLY_64) 10750 { 10751 set_optab_libfunc (smul_optab, DImode, "__mul64"); 10752 set_optab_libfunc (sdiv_optab, DImode, "__div64"); 10753 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); 10754 set_optab_libfunc (smod_optab, DImode, "__rem64"); 10755 set_optab_libfunc (umod_optab, DImode, "__urem64"); 10756 } 10757 10758 if (SUN_CONVERSION_LIBFUNCS) 10759 { 10760 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); 10761 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); 10762 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); 10763 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); 10764 } 10765 } 10766 } 10767 10768 /* SPARC builtins. */ 10769 enum sparc_builtins 10770 { 10771 /* FPU builtins. */ 10772 SPARC_BUILTIN_LDFSR, 10773 SPARC_BUILTIN_STFSR, 10774 10775 /* VIS 1.0 builtins. */ 10776 SPARC_BUILTIN_FPACK16, 10777 SPARC_BUILTIN_FPACK32, 10778 SPARC_BUILTIN_FPACKFIX, 10779 SPARC_BUILTIN_FEXPAND, 10780 SPARC_BUILTIN_FPMERGE, 10781 SPARC_BUILTIN_FMUL8X16, 10782 SPARC_BUILTIN_FMUL8X16AU, 10783 SPARC_BUILTIN_FMUL8X16AL, 10784 SPARC_BUILTIN_FMUL8SUX16, 10785 SPARC_BUILTIN_FMUL8ULX16, 10786 SPARC_BUILTIN_FMULD8SUX16, 10787 SPARC_BUILTIN_FMULD8ULX16, 10788 SPARC_BUILTIN_FALIGNDATAV4HI, 10789 SPARC_BUILTIN_FALIGNDATAV8QI, 10790 SPARC_BUILTIN_FALIGNDATAV2SI, 10791 SPARC_BUILTIN_FALIGNDATADI, 10792 SPARC_BUILTIN_WRGSR, 10793 SPARC_BUILTIN_RDGSR, 10794 SPARC_BUILTIN_ALIGNADDR, 10795 SPARC_BUILTIN_ALIGNADDRL, 10796 SPARC_BUILTIN_PDIST, 10797 SPARC_BUILTIN_EDGE8, 10798 SPARC_BUILTIN_EDGE8L, 10799 SPARC_BUILTIN_EDGE16, 10800 SPARC_BUILTIN_EDGE16L, 10801 SPARC_BUILTIN_EDGE32, 10802 SPARC_BUILTIN_EDGE32L, 10803 SPARC_BUILTIN_FCMPLE16, 10804 SPARC_BUILTIN_FCMPLE32, 10805 SPARC_BUILTIN_FCMPNE16, 10806 SPARC_BUILTIN_FCMPNE32, 10807 SPARC_BUILTIN_FCMPGT16, 10808 SPARC_BUILTIN_FCMPGT32, 10809 SPARC_BUILTIN_FCMPEQ16, 10810 SPARC_BUILTIN_FCMPEQ32, 10811 SPARC_BUILTIN_FPADD16, 10812 SPARC_BUILTIN_FPADD16S, 10813 SPARC_BUILTIN_FPADD32, 10814 SPARC_BUILTIN_FPADD32S, 10815 SPARC_BUILTIN_FPSUB16, 10816 SPARC_BUILTIN_FPSUB16S, 10817 SPARC_BUILTIN_FPSUB32, 10818 SPARC_BUILTIN_FPSUB32S, 10819 SPARC_BUILTIN_ARRAY8, 10820 SPARC_BUILTIN_ARRAY16, 10821 SPARC_BUILTIN_ARRAY32, 10822 10823 /* VIS 2.0 builtins. */ 10824 SPARC_BUILTIN_EDGE8N, 10825 SPARC_BUILTIN_EDGE8LN, 10826 SPARC_BUILTIN_EDGE16N, 10827 SPARC_BUILTIN_EDGE16LN, 10828 SPARC_BUILTIN_EDGE32N, 10829 SPARC_BUILTIN_EDGE32LN, 10830 SPARC_BUILTIN_BMASK, 10831 SPARC_BUILTIN_BSHUFFLEV4HI, 10832 SPARC_BUILTIN_BSHUFFLEV8QI, 10833 SPARC_BUILTIN_BSHUFFLEV2SI, 10834 SPARC_BUILTIN_BSHUFFLEDI, 10835 10836 /* VIS 3.0 builtins. */ 10837 SPARC_BUILTIN_CMASK8, 10838 SPARC_BUILTIN_CMASK16, 10839 SPARC_BUILTIN_CMASK32, 10840 SPARC_BUILTIN_FCHKSM16, 10841 SPARC_BUILTIN_FSLL16, 10842 SPARC_BUILTIN_FSLAS16, 10843 SPARC_BUILTIN_FSRL16, 10844 SPARC_BUILTIN_FSRA16, 10845 SPARC_BUILTIN_FSLL32, 10846 SPARC_BUILTIN_FSLAS32, 10847 SPARC_BUILTIN_FSRL32, 10848 SPARC_BUILTIN_FSRA32, 10849 SPARC_BUILTIN_PDISTN, 10850 SPARC_BUILTIN_FMEAN16, 10851 SPARC_BUILTIN_FPADD64, 10852 SPARC_BUILTIN_FPSUB64, 10853 SPARC_BUILTIN_FPADDS16, 10854 SPARC_BUILTIN_FPADDS16S, 10855 SPARC_BUILTIN_FPSUBS16, 10856 SPARC_BUILTIN_FPSUBS16S, 10857 SPARC_BUILTIN_FPADDS32, 10858 SPARC_BUILTIN_FPADDS32S, 10859 SPARC_BUILTIN_FPSUBS32, 10860 SPARC_BUILTIN_FPSUBS32S, 10861 SPARC_BUILTIN_FUCMPLE8, 10862 SPARC_BUILTIN_FUCMPNE8, 10863 SPARC_BUILTIN_FUCMPGT8, 10864 SPARC_BUILTIN_FUCMPEQ8, 10865 SPARC_BUILTIN_FHADDS, 10866 SPARC_BUILTIN_FHADDD, 10867 SPARC_BUILTIN_FHSUBS, 10868 SPARC_BUILTIN_FHSUBD, 10869 SPARC_BUILTIN_FNHADDS, 10870 SPARC_BUILTIN_FNHADDD, 10871 SPARC_BUILTIN_UMULXHI, 10872 SPARC_BUILTIN_XMULX, 10873 SPARC_BUILTIN_XMULXHI, 10874 10875 /* VIS 4.0 builtins. */ 10876 SPARC_BUILTIN_FPADD8, 10877 SPARC_BUILTIN_FPADDS8, 10878 SPARC_BUILTIN_FPADDUS8, 10879 SPARC_BUILTIN_FPADDUS16, 10880 SPARC_BUILTIN_FPCMPLE8, 10881 SPARC_BUILTIN_FPCMPGT8, 10882 SPARC_BUILTIN_FPCMPULE16, 10883 SPARC_BUILTIN_FPCMPUGT16, 10884 SPARC_BUILTIN_FPCMPULE32, 10885 SPARC_BUILTIN_FPCMPUGT32, 10886 SPARC_BUILTIN_FPMAX8, 10887 SPARC_BUILTIN_FPMAX16, 10888 SPARC_BUILTIN_FPMAX32, 10889 SPARC_BUILTIN_FPMAXU8, 10890 SPARC_BUILTIN_FPMAXU16, 10891 SPARC_BUILTIN_FPMAXU32, 10892 SPARC_BUILTIN_FPMIN8, 10893 SPARC_BUILTIN_FPMIN16, 10894 SPARC_BUILTIN_FPMIN32, 10895 SPARC_BUILTIN_FPMINU8, 10896 SPARC_BUILTIN_FPMINU16, 10897 SPARC_BUILTIN_FPMINU32, 10898 SPARC_BUILTIN_FPSUB8, 10899 SPARC_BUILTIN_FPSUBS8, 10900 SPARC_BUILTIN_FPSUBUS8, 10901 SPARC_BUILTIN_FPSUBUS16, 10902 10903 /* VIS 4.0B builtins. */ 10904 10905 /* Note that all the DICTUNPACK* entries should be kept 10906 contiguous. */ 10907 SPARC_BUILTIN_FIRST_DICTUNPACK, 10908 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK, 10909 SPARC_BUILTIN_DICTUNPACK16, 10910 SPARC_BUILTIN_DICTUNPACK32, 10911 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32, 10912 10913 /* Note that all the FPCMP*SHL entries should be kept 10914 contiguous. */ 10915 SPARC_BUILTIN_FIRST_FPCMPSHL, 10916 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL, 10917 SPARC_BUILTIN_FPCMPGT8SHL, 10918 SPARC_BUILTIN_FPCMPEQ8SHL, 10919 SPARC_BUILTIN_FPCMPNE8SHL, 10920 SPARC_BUILTIN_FPCMPLE16SHL, 10921 SPARC_BUILTIN_FPCMPGT16SHL, 10922 SPARC_BUILTIN_FPCMPEQ16SHL, 10923 SPARC_BUILTIN_FPCMPNE16SHL, 10924 SPARC_BUILTIN_FPCMPLE32SHL, 10925 SPARC_BUILTIN_FPCMPGT32SHL, 10926 SPARC_BUILTIN_FPCMPEQ32SHL, 10927 SPARC_BUILTIN_FPCMPNE32SHL, 10928 SPARC_BUILTIN_FPCMPULE8SHL, 10929 SPARC_BUILTIN_FPCMPUGT8SHL, 10930 SPARC_BUILTIN_FPCMPULE16SHL, 10931 SPARC_BUILTIN_FPCMPUGT16SHL, 10932 SPARC_BUILTIN_FPCMPULE32SHL, 10933 SPARC_BUILTIN_FPCMPUGT32SHL, 10934 SPARC_BUILTIN_FPCMPDE8SHL, 10935 SPARC_BUILTIN_FPCMPDE16SHL, 10936 SPARC_BUILTIN_FPCMPDE32SHL, 10937 SPARC_BUILTIN_FPCMPUR8SHL, 10938 SPARC_BUILTIN_FPCMPUR16SHL, 10939 SPARC_BUILTIN_FPCMPUR32SHL, 10940 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL, 10941 10942 SPARC_BUILTIN_MAX 10943 }; 10944 10945 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX]; 10946 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX]; 10947 10948 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE. 10949 The instruction should require a constant operand of some sort. The 10950 function prints an error if OPVAL is not valid. */ 10951 10952 static int 10953 check_constant_argument (enum insn_code icode, int opnum, rtx opval) 10954 { 10955 if (GET_CODE (opval) != CONST_INT) 10956 { 10957 error ("%qs expects a constant argument", insn_data[icode].name); 10958 return false; 10959 } 10960 10961 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode)) 10962 { 10963 error ("constant argument out of range for %qs", insn_data[icode].name); 10964 return false; 10965 } 10966 return true; 10967 } 10968 10969 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the 10970 function decl or NULL_TREE if the builtin was not added. */ 10971 10972 static tree 10973 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code, 10974 tree type) 10975 { 10976 tree t 10977 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); 10978 10979 if (t) 10980 { 10981 sparc_builtins[code] = t; 10982 sparc_builtins_icode[code] = icode; 10983 } 10984 10985 return t; 10986 } 10987 10988 /* Likewise, but also marks the function as "const". */ 10989 10990 static tree 10991 def_builtin_const (const char *name, enum insn_code icode, 10992 enum sparc_builtins code, tree type) 10993 { 10994 tree t = def_builtin (name, icode, code, type); 10995 10996 if (t) 10997 TREE_READONLY (t) = 1; 10998 10999 return t; 11000 } 11001 11002 /* Implement the TARGET_INIT_BUILTINS target hook. 11003 Create builtin functions for special SPARC instructions. */ 11004 11005 static void 11006 sparc_init_builtins (void) 11007 { 11008 if (TARGET_FPU) 11009 sparc_fpu_init_builtins (); 11010 11011 if (TARGET_VIS) 11012 sparc_vis_init_builtins (); 11013 } 11014 11015 /* Create builtin functions for FPU instructions. */ 11016 11017 static void 11018 sparc_fpu_init_builtins (void) 11019 { 11020 tree ftype 11021 = build_function_type_list (void_type_node, 11022 build_pointer_type (unsigned_type_node), 0); 11023 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr, 11024 SPARC_BUILTIN_LDFSR, ftype); 11025 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr, 11026 SPARC_BUILTIN_STFSR, ftype); 11027 } 11028 11029 /* Create builtin functions for VIS instructions. */ 11030 11031 static void 11032 sparc_vis_init_builtins (void) 11033 { 11034 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); 11035 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); 11036 tree v4hi = build_vector_type (intHI_type_node, 4); 11037 tree v2hi = build_vector_type (intHI_type_node, 2); 11038 tree v2si = build_vector_type (intSI_type_node, 2); 11039 tree v1si = build_vector_type (intSI_type_node, 1); 11040 11041 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); 11042 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); 11043 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); 11044 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); 11045 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); 11046 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); 11047 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); 11048 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); 11049 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); 11050 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); 11051 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); 11052 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); 11053 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); 11054 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); 11055 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, 11056 v8qi, v8qi, 11057 intDI_type_node, 0); 11058 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node, 11059 v8qi, v8qi, 0); 11060 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node, 11061 v8qi, v8qi, 0); 11062 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node, 11063 intSI_type_node, 0); 11064 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node, 11065 intSI_type_node, 0); 11066 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node, 11067 intDI_type_node, 0); 11068 tree di_ftype_di_di = build_function_type_list (intDI_type_node, 11069 intDI_type_node, 11070 intDI_type_node, 0); 11071 tree si_ftype_si_si = build_function_type_list (intSI_type_node, 11072 intSI_type_node, 11073 intSI_type_node, 0); 11074 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, 11075 ptr_type_node, 11076 intSI_type_node, 0); 11077 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, 11078 ptr_type_node, 11079 intDI_type_node, 0); 11080 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, 11081 ptr_type_node, 11082 ptr_type_node, 0); 11083 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, 11084 ptr_type_node, 11085 ptr_type_node, 0); 11086 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, 11087 v4hi, v4hi, 0); 11088 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, 11089 v2si, v2si, 0); 11090 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, 11091 v4hi, v4hi, 0); 11092 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, 11093 v2si, v2si, 0); 11094 tree void_ftype_di = build_function_type_list (void_type_node, 11095 intDI_type_node, 0); 11096 tree di_ftype_void = build_function_type_list (intDI_type_node, 11097 void_type_node, 0); 11098 tree void_ftype_si = build_function_type_list (void_type_node, 11099 intSI_type_node, 0); 11100 tree sf_ftype_sf_sf = build_function_type_list (float_type_node, 11101 float_type_node, 11102 float_type_node, 0); 11103 tree df_ftype_df_df = build_function_type_list (double_type_node, 11104 double_type_node, 11105 double_type_node, 0); 11106 11107 /* Packing and expanding vectors. */ 11108 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, 11109 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi); 11110 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, 11111 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi); 11112 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, 11113 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si); 11114 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, 11115 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi); 11116 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, 11117 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi); 11118 11119 /* Multiplications. */ 11120 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, 11121 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi); 11122 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, 11123 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi); 11124 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, 11125 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi); 11126 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, 11127 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi); 11128 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, 11129 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi); 11130 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, 11131 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi); 11132 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, 11133 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi); 11134 11135 /* Data aligning. */ 11136 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, 11137 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi); 11138 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, 11139 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi); 11140 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, 11141 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si); 11142 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis, 11143 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di); 11144 11145 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis, 11146 SPARC_BUILTIN_WRGSR, void_ftype_di); 11147 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis, 11148 SPARC_BUILTIN_RDGSR, di_ftype_void); 11149 11150 if (TARGET_ARCH64) 11151 { 11152 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, 11153 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di); 11154 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis, 11155 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di); 11156 } 11157 else 11158 { 11159 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, 11160 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si); 11161 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis, 11162 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si); 11163 } 11164 11165 /* Pixel distance. */ 11166 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis, 11167 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di); 11168 11169 /* Edge handling. */ 11170 if (TARGET_ARCH64) 11171 { 11172 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, 11173 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr); 11174 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, 11175 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr); 11176 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, 11177 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr); 11178 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, 11179 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr); 11180 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, 11181 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr); 11182 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, 11183 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr); 11184 } 11185 else 11186 { 11187 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis, 11188 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr); 11189 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis, 11190 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr); 11191 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis, 11192 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr); 11193 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis, 11194 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr); 11195 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis, 11196 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr); 11197 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis, 11198 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr); 11199 } 11200 11201 /* Pixel compare. */ 11202 if (TARGET_ARCH64) 11203 { 11204 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, 11205 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi); 11206 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, 11207 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si); 11208 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, 11209 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi); 11210 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, 11211 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si); 11212 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, 11213 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi); 11214 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, 11215 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si); 11216 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, 11217 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi); 11218 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, 11219 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si); 11220 } 11221 else 11222 { 11223 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, 11224 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi); 11225 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, 11226 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si); 11227 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, 11228 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi); 11229 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, 11230 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si); 11231 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, 11232 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi); 11233 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, 11234 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si); 11235 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, 11236 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi); 11237 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, 11238 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si); 11239 } 11240 11241 /* Addition and subtraction. */ 11242 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, 11243 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi); 11244 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, 11245 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi); 11246 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, 11247 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si); 11248 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3, 11249 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si); 11250 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, 11251 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi); 11252 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, 11253 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi); 11254 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, 11255 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si); 11256 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3, 11257 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si); 11258 11259 /* Three-dimensional array addressing. */ 11260 if (TARGET_ARCH64) 11261 { 11262 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis, 11263 SPARC_BUILTIN_ARRAY8, di_ftype_di_di); 11264 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis, 11265 SPARC_BUILTIN_ARRAY16, di_ftype_di_di); 11266 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis, 11267 SPARC_BUILTIN_ARRAY32, di_ftype_di_di); 11268 } 11269 else 11270 { 11271 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis, 11272 SPARC_BUILTIN_ARRAY8, si_ftype_si_si); 11273 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis, 11274 SPARC_BUILTIN_ARRAY16, si_ftype_si_si); 11275 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis, 11276 SPARC_BUILTIN_ARRAY32, si_ftype_si_si); 11277 } 11278 11279 if (TARGET_VIS2) 11280 { 11281 /* Edge handling. */ 11282 if (TARGET_ARCH64) 11283 { 11284 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis, 11285 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr); 11286 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis, 11287 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr); 11288 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis, 11289 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr); 11290 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis, 11291 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr); 11292 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis, 11293 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr); 11294 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis, 11295 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr); 11296 } 11297 else 11298 { 11299 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis, 11300 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr); 11301 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis, 11302 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr); 11303 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis, 11304 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr); 11305 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis, 11306 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr); 11307 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis, 11308 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr); 11309 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis, 11310 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr); 11311 } 11312 11313 /* Byte mask and shuffle. */ 11314 if (TARGET_ARCH64) 11315 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis, 11316 SPARC_BUILTIN_BMASK, di_ftype_di_di); 11317 else 11318 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis, 11319 SPARC_BUILTIN_BMASK, si_ftype_si_si); 11320 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis, 11321 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi); 11322 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis, 11323 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi); 11324 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis, 11325 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si); 11326 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis, 11327 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di); 11328 } 11329 11330 if (TARGET_VIS3) 11331 { 11332 if (TARGET_ARCH64) 11333 { 11334 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis, 11335 SPARC_BUILTIN_CMASK8, void_ftype_di); 11336 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis, 11337 SPARC_BUILTIN_CMASK16, void_ftype_di); 11338 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis, 11339 SPARC_BUILTIN_CMASK32, void_ftype_di); 11340 } 11341 else 11342 { 11343 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis, 11344 SPARC_BUILTIN_CMASK8, void_ftype_si); 11345 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis, 11346 SPARC_BUILTIN_CMASK16, void_ftype_si); 11347 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis, 11348 SPARC_BUILTIN_CMASK32, void_ftype_si); 11349 } 11350 11351 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis, 11352 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi); 11353 11354 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3, 11355 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi); 11356 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3, 11357 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi); 11358 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3, 11359 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi); 11360 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3, 11361 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi); 11362 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3, 11363 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si); 11364 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3, 11365 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si); 11366 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3, 11367 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si); 11368 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3, 11369 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si); 11370 11371 if (TARGET_ARCH64) 11372 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis, 11373 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi); 11374 else 11375 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis, 11376 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi); 11377 11378 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis, 11379 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi); 11380 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis, 11381 SPARC_BUILTIN_FPADD64, di_ftype_di_di); 11382 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis, 11383 SPARC_BUILTIN_FPSUB64, di_ftype_di_di); 11384 11385 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3, 11386 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi); 11387 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3, 11388 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi); 11389 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3, 11390 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi); 11391 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3, 11392 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi); 11393 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3, 11394 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si); 11395 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3, 11396 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si); 11397 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3, 11398 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si); 11399 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3, 11400 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si); 11401 11402 if (TARGET_ARCH64) 11403 { 11404 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis, 11405 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi); 11406 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis, 11407 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi); 11408 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis, 11409 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi); 11410 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis, 11411 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi); 11412 } 11413 else 11414 { 11415 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis, 11416 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi); 11417 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis, 11418 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi); 11419 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis, 11420 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi); 11421 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis, 11422 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi); 11423 } 11424 11425 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis, 11426 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf); 11427 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis, 11428 SPARC_BUILTIN_FHADDD, df_ftype_df_df); 11429 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis, 11430 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf); 11431 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis, 11432 SPARC_BUILTIN_FHSUBD, df_ftype_df_df); 11433 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis, 11434 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf); 11435 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis, 11436 SPARC_BUILTIN_FNHADDD, df_ftype_df_df); 11437 11438 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis, 11439 SPARC_BUILTIN_UMULXHI, di_ftype_di_di); 11440 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis, 11441 SPARC_BUILTIN_XMULX, di_ftype_di_di); 11442 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis, 11443 SPARC_BUILTIN_XMULXHI, di_ftype_di_di); 11444 } 11445 11446 if (TARGET_VIS4) 11447 { 11448 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3, 11449 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi); 11450 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3, 11451 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi); 11452 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3, 11453 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi); 11454 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3, 11455 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi); 11456 11457 11458 if (TARGET_ARCH64) 11459 { 11460 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis, 11461 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi); 11462 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis, 11463 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi); 11464 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis, 11465 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi); 11466 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis, 11467 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi); 11468 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis, 11469 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11470 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis, 11471 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11472 } 11473 else 11474 { 11475 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis, 11476 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi); 11477 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis, 11478 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi); 11479 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis, 11480 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi); 11481 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis, 11482 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi); 11483 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis, 11484 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11485 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis, 11486 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11487 } 11488 11489 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3, 11490 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi); 11491 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3, 11492 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi); 11493 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3, 11494 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si); 11495 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3, 11496 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi); 11497 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3, 11498 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi); 11499 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3, 11500 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si); 11501 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3, 11502 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi); 11503 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3, 11504 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi); 11505 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3, 11506 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si); 11507 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3, 11508 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi); 11509 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3, 11510 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi); 11511 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3, 11512 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si); 11513 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3, 11514 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi); 11515 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3, 11516 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi); 11517 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3, 11518 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi); 11519 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3, 11520 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi); 11521 } 11522 11523 if (TARGET_VIS4B) 11524 { 11525 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8, 11526 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si); 11527 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16, 11528 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si); 11529 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32, 11530 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si); 11531 11532 if (TARGET_ARCH64) 11533 { 11534 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node, 11535 v8qi, v8qi, 11536 intSI_type_node, 0); 11537 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node, 11538 v4hi, v4hi, 11539 intSI_type_node, 0); 11540 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node, 11541 v2si, v2si, 11542 intSI_type_node, 0); 11543 11544 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl, 11545 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si); 11546 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl, 11547 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si); 11548 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl, 11549 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si); 11550 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl, 11551 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si); 11552 11553 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl, 11554 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si); 11555 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl, 11556 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si); 11557 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl, 11558 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si); 11559 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl, 11560 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si); 11561 11562 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl, 11563 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si); 11564 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl, 11565 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si); 11566 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl, 11567 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si); 11568 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl, 11569 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si); 11570 11571 11572 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl, 11573 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si); 11574 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl, 11575 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si); 11576 11577 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl, 11578 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si); 11579 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl, 11580 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si); 11581 11582 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl, 11583 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si); 11584 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl, 11585 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si); 11586 11587 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl, 11588 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si); 11589 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl, 11590 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si); 11591 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl, 11592 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si); 11593 11594 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl, 11595 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si); 11596 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl, 11597 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si); 11598 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl, 11599 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si); 11600 11601 } 11602 else 11603 { 11604 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node, 11605 v8qi, v8qi, 11606 intSI_type_node, 0); 11607 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node, 11608 v4hi, v4hi, 11609 intSI_type_node, 0); 11610 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node, 11611 v2si, v2si, 11612 intSI_type_node, 0); 11613 11614 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl, 11615 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si); 11616 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl, 11617 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si); 11618 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl, 11619 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si); 11620 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl, 11621 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si); 11622 11623 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl, 11624 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si); 11625 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl, 11626 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si); 11627 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl, 11628 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si); 11629 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl, 11630 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si); 11631 11632 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl, 11633 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si); 11634 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl, 11635 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si); 11636 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl, 11637 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si); 11638 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl, 11639 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si); 11640 11641 11642 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl, 11643 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si); 11644 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl, 11645 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si); 11646 11647 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl, 11648 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si); 11649 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl, 11650 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si); 11651 11652 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl, 11653 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si); 11654 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl, 11655 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si); 11656 11657 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl, 11658 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si); 11659 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl, 11660 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si); 11661 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl, 11662 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si); 11663 11664 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl, 11665 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si); 11666 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl, 11667 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si); 11668 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl, 11669 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si); 11670 } 11671 } 11672 } 11673 11674 /* Implement TARGET_BUILTIN_DECL hook. */ 11675 11676 static tree 11677 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 11678 { 11679 if (code >= SPARC_BUILTIN_MAX) 11680 return error_mark_node; 11681 11682 return sparc_builtins[code]; 11683 } 11684 11685 /* Implemented TARGET_EXPAND_BUILTIN hook. */ 11686 11687 static rtx 11688 sparc_expand_builtin (tree exp, rtx target, 11689 rtx subtarget ATTRIBUTE_UNUSED, 11690 machine_mode tmode ATTRIBUTE_UNUSED, 11691 int ignore ATTRIBUTE_UNUSED) 11692 { 11693 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 11694 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 11695 enum insn_code icode = sparc_builtins_icode[code]; 11696 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 11697 call_expr_arg_iterator iter; 11698 int arg_count = 0; 11699 rtx pat, op[4]; 11700 tree arg; 11701 11702 if (nonvoid) 11703 { 11704 machine_mode tmode = insn_data[icode].operand[0].mode; 11705 if (!target 11706 || GET_MODE (target) != tmode 11707 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11708 op[0] = gen_reg_rtx (tmode); 11709 else 11710 op[0] = target; 11711 } 11712 else 11713 op[0] = NULL_RTX; 11714 11715 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 11716 { 11717 const struct insn_operand_data *insn_op; 11718 int idx; 11719 11720 if (arg == error_mark_node) 11721 return NULL_RTX; 11722 11723 arg_count++; 11724 idx = arg_count - !nonvoid; 11725 insn_op = &insn_data[icode].operand[idx]; 11726 op[arg_count] = expand_normal (arg); 11727 11728 /* Some of the builtins require constant arguments. We check 11729 for this here. */ 11730 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL 11731 && code <= SPARC_BUILTIN_LAST_FPCMPSHL 11732 && arg_count == 3) 11733 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK 11734 && code <= SPARC_BUILTIN_LAST_DICTUNPACK 11735 && arg_count == 2)) 11736 { 11737 if (!check_constant_argument (icode, idx, op[arg_count])) 11738 return const0_rtx; 11739 } 11740 11741 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR) 11742 { 11743 if (!address_operand (op[arg_count], SImode)) 11744 { 11745 op[arg_count] = convert_memory_address (Pmode, op[arg_count]); 11746 op[arg_count] = copy_addr_to_reg (op[arg_count]); 11747 } 11748 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]); 11749 } 11750 11751 else if (insn_op->mode == V1DImode 11752 && GET_MODE (op[arg_count]) == DImode) 11753 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]); 11754 11755 else if (insn_op->mode == V1SImode 11756 && GET_MODE (op[arg_count]) == SImode) 11757 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]); 11758 11759 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count], 11760 insn_op->mode)) 11761 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]); 11762 } 11763 11764 switch (arg_count) 11765 { 11766 case 0: 11767 pat = GEN_FCN (icode) (op[0]); 11768 break; 11769 case 1: 11770 if (nonvoid) 11771 pat = GEN_FCN (icode) (op[0], op[1]); 11772 else 11773 pat = GEN_FCN (icode) (op[1]); 11774 break; 11775 case 2: 11776 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 11777 break; 11778 case 3: 11779 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 11780 break; 11781 default: 11782 gcc_unreachable (); 11783 } 11784 11785 if (!pat) 11786 return NULL_RTX; 11787 11788 emit_insn (pat); 11789 11790 return (nonvoid ? op[0] : const0_rtx); 11791 } 11792 11793 /* Return the upper 16 bits of the 8x16 multiplication. */ 11794 11795 static int 11796 sparc_vis_mul8x16 (int e8, int e16) 11797 { 11798 return (e8 * e16 + 128) / 256; 11799 } 11800 11801 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put 11802 the result into the array N_ELTS, whose elements are of INNER_TYPE. */ 11803 11804 static void 11805 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode, 11806 tree inner_type, tree cst0, tree cst1) 11807 { 11808 unsigned i, num = VECTOR_CST_NELTS (cst0); 11809 int scale; 11810 11811 switch (fncode) 11812 { 11813 case SPARC_BUILTIN_FMUL8X16: 11814 for (i = 0; i < num; ++i) 11815 { 11816 int val 11817 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11818 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i))); 11819 n_elts->quick_push (build_int_cst (inner_type, val)); 11820 } 11821 break; 11822 11823 case SPARC_BUILTIN_FMUL8X16AU: 11824 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0)); 11825 11826 for (i = 0; i < num; ++i) 11827 { 11828 int val 11829 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11830 scale); 11831 n_elts->quick_push (build_int_cst (inner_type, val)); 11832 } 11833 break; 11834 11835 case SPARC_BUILTIN_FMUL8X16AL: 11836 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1)); 11837 11838 for (i = 0; i < num; ++i) 11839 { 11840 int val 11841 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11842 scale); 11843 n_elts->quick_push (build_int_cst (inner_type, val)); 11844 } 11845 break; 11846 11847 default: 11848 gcc_unreachable (); 11849 } 11850 } 11851 11852 /* Implement TARGET_FOLD_BUILTIN hook. 11853 11854 Fold builtin functions for SPARC intrinsics. If IGNORE is true the 11855 result of the function call is ignored. NULL_TREE is returned if the 11856 function could not be folded. */ 11857 11858 static tree 11859 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 11860 tree *args, bool ignore) 11861 { 11862 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 11863 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); 11864 tree arg0, arg1, arg2; 11865 11866 if (ignore) 11867 switch (code) 11868 { 11869 case SPARC_BUILTIN_LDFSR: 11870 case SPARC_BUILTIN_STFSR: 11871 case SPARC_BUILTIN_ALIGNADDR: 11872 case SPARC_BUILTIN_WRGSR: 11873 case SPARC_BUILTIN_BMASK: 11874 case SPARC_BUILTIN_CMASK8: 11875 case SPARC_BUILTIN_CMASK16: 11876 case SPARC_BUILTIN_CMASK32: 11877 break; 11878 11879 default: 11880 return build_zero_cst (rtype); 11881 } 11882 11883 switch (code) 11884 { 11885 case SPARC_BUILTIN_FEXPAND: 11886 arg0 = args[0]; 11887 STRIP_NOPS (arg0); 11888 11889 if (TREE_CODE (arg0) == VECTOR_CST) 11890 { 11891 tree inner_type = TREE_TYPE (rtype); 11892 unsigned i; 11893 11894 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 11895 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11896 { 11897 unsigned HOST_WIDE_INT val 11898 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i)); 11899 n_elts.quick_push (build_int_cst (inner_type, val << 4)); 11900 } 11901 return n_elts.build (); 11902 } 11903 break; 11904 11905 case SPARC_BUILTIN_FMUL8X16: 11906 case SPARC_BUILTIN_FMUL8X16AU: 11907 case SPARC_BUILTIN_FMUL8X16AL: 11908 arg0 = args[0]; 11909 arg1 = args[1]; 11910 STRIP_NOPS (arg0); 11911 STRIP_NOPS (arg1); 11912 11913 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 11914 { 11915 tree inner_type = TREE_TYPE (rtype); 11916 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 11917 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1); 11918 return n_elts.build (); 11919 } 11920 break; 11921 11922 case SPARC_BUILTIN_FPMERGE: 11923 arg0 = args[0]; 11924 arg1 = args[1]; 11925 STRIP_NOPS (arg0); 11926 STRIP_NOPS (arg1); 11927 11928 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 11929 { 11930 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1); 11931 unsigned i; 11932 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11933 { 11934 n_elts.quick_push (VECTOR_CST_ELT (arg0, i)); 11935 n_elts.quick_push (VECTOR_CST_ELT (arg1, i)); 11936 } 11937 11938 return n_elts.build (); 11939 } 11940 break; 11941 11942 case SPARC_BUILTIN_PDIST: 11943 case SPARC_BUILTIN_PDISTN: 11944 arg0 = args[0]; 11945 arg1 = args[1]; 11946 STRIP_NOPS (arg0); 11947 STRIP_NOPS (arg1); 11948 if (code == SPARC_BUILTIN_PDIST) 11949 { 11950 arg2 = args[2]; 11951 STRIP_NOPS (arg2); 11952 } 11953 else 11954 arg2 = integer_zero_node; 11955 11956 if (TREE_CODE (arg0) == VECTOR_CST 11957 && TREE_CODE (arg1) == VECTOR_CST 11958 && TREE_CODE (arg2) == INTEGER_CST) 11959 { 11960 bool overflow = false; 11961 widest_int result = wi::to_widest (arg2); 11962 widest_int tmp; 11963 unsigned i; 11964 11965 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11966 { 11967 tree e0 = VECTOR_CST_ELT (arg0, i); 11968 tree e1 = VECTOR_CST_ELT (arg1, i); 11969 11970 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf; 11971 11972 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf); 11973 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf); 11974 if (wi::neg_p (tmp)) 11975 tmp = wi::neg (tmp, &neg2_ovf); 11976 else 11977 neg2_ovf = wi::OVF_NONE; 11978 result = wi::add (result, tmp, SIGNED, &add2_ovf); 11979 overflow |= ((neg1_ovf != wi::OVF_NONE) 11980 | (neg2_ovf != wi::OVF_NONE) 11981 | (add1_ovf != wi::OVF_NONE) 11982 | (add2_ovf != wi::OVF_NONE)); 11983 } 11984 11985 gcc_assert (!overflow); 11986 11987 return wide_int_to_tree (rtype, result); 11988 } 11989 11990 default: 11991 break; 11992 } 11993 11994 return NULL_TREE; 11995 } 11996 11997 /* ??? This duplicates information provided to the compiler by the 11998 ??? scheduler description. Some day, teach genautomata to output 11999 ??? the latencies and then CSE will just use that. */ 12000 12001 static bool 12002 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code, 12003 int opno ATTRIBUTE_UNUSED, 12004 int *total, bool speed ATTRIBUTE_UNUSED) 12005 { 12006 int code = GET_CODE (x); 12007 bool float_mode_p = FLOAT_MODE_P (mode); 12008 12009 switch (code) 12010 { 12011 case CONST_INT: 12012 if (SMALL_INT (x)) 12013 *total = 0; 12014 else 12015 *total = 2; 12016 return true; 12017 12018 case CONST_WIDE_INT: 12019 *total = 0; 12020 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0))) 12021 *total += 2; 12022 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1))) 12023 *total += 2; 12024 return true; 12025 12026 case HIGH: 12027 *total = 2; 12028 return true; 12029 12030 case CONST: 12031 case LABEL_REF: 12032 case SYMBOL_REF: 12033 *total = 4; 12034 return true; 12035 12036 case CONST_DOUBLE: 12037 *total = 8; 12038 return true; 12039 12040 case MEM: 12041 /* If outer-code was a sign or zero extension, a cost 12042 of COSTS_N_INSNS (1) was already added in. This is 12043 why we are subtracting it back out. */ 12044 if (outer_code == ZERO_EXTEND) 12045 { 12046 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); 12047 } 12048 else if (outer_code == SIGN_EXTEND) 12049 { 12050 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); 12051 } 12052 else if (float_mode_p) 12053 { 12054 *total = sparc_costs->float_load; 12055 } 12056 else 12057 { 12058 *total = sparc_costs->int_load; 12059 } 12060 12061 return true; 12062 12063 case PLUS: 12064 case MINUS: 12065 if (float_mode_p) 12066 *total = sparc_costs->float_plusminus; 12067 else 12068 *total = COSTS_N_INSNS (1); 12069 return false; 12070 12071 case FMA: 12072 { 12073 rtx sub; 12074 12075 gcc_assert (float_mode_p); 12076 *total = sparc_costs->float_mul; 12077 12078 sub = XEXP (x, 0); 12079 if (GET_CODE (sub) == NEG) 12080 sub = XEXP (sub, 0); 12081 *total += rtx_cost (sub, mode, FMA, 0, speed); 12082 12083 sub = XEXP (x, 2); 12084 if (GET_CODE (sub) == NEG) 12085 sub = XEXP (sub, 0); 12086 *total += rtx_cost (sub, mode, FMA, 2, speed); 12087 return true; 12088 } 12089 12090 case MULT: 12091 if (float_mode_p) 12092 *total = sparc_costs->float_mul; 12093 else if (TARGET_ARCH32 && !TARGET_HARD_MUL) 12094 *total = COSTS_N_INSNS (25); 12095 else 12096 { 12097 int bit_cost; 12098 12099 bit_cost = 0; 12100 if (sparc_costs->int_mul_bit_factor) 12101 { 12102 int nbits; 12103 12104 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 12105 { 12106 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 12107 for (nbits = 0; value != 0; value &= value - 1) 12108 nbits++; 12109 } 12110 else 12111 nbits = 7; 12112 12113 if (nbits < 3) 12114 nbits = 3; 12115 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; 12116 bit_cost = COSTS_N_INSNS (bit_cost); 12117 } 12118 12119 if (mode == DImode || !TARGET_HARD_MUL) 12120 *total = sparc_costs->int_mulX + bit_cost; 12121 else 12122 *total = sparc_costs->int_mul + bit_cost; 12123 } 12124 return false; 12125 12126 case ASHIFT: 12127 case ASHIFTRT: 12128 case LSHIFTRT: 12129 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; 12130 return false; 12131 12132 case DIV: 12133 case UDIV: 12134 case MOD: 12135 case UMOD: 12136 if (float_mode_p) 12137 { 12138 if (mode == DFmode) 12139 *total = sparc_costs->float_div_df; 12140 else 12141 *total = sparc_costs->float_div_sf; 12142 } 12143 else 12144 { 12145 if (mode == DImode) 12146 *total = sparc_costs->int_divX; 12147 else 12148 *total = sparc_costs->int_div; 12149 } 12150 return false; 12151 12152 case NEG: 12153 if (! float_mode_p) 12154 { 12155 *total = COSTS_N_INSNS (1); 12156 return false; 12157 } 12158 /* FALLTHRU */ 12159 12160 case ABS: 12161 case FLOAT: 12162 case UNSIGNED_FLOAT: 12163 case FIX: 12164 case UNSIGNED_FIX: 12165 case FLOAT_EXTEND: 12166 case FLOAT_TRUNCATE: 12167 *total = sparc_costs->float_move; 12168 return false; 12169 12170 case SQRT: 12171 if (mode == DFmode) 12172 *total = sparc_costs->float_sqrt_df; 12173 else 12174 *total = sparc_costs->float_sqrt_sf; 12175 return false; 12176 12177 case COMPARE: 12178 if (float_mode_p) 12179 *total = sparc_costs->float_cmp; 12180 else 12181 *total = COSTS_N_INSNS (1); 12182 return false; 12183 12184 case IF_THEN_ELSE: 12185 if (float_mode_p) 12186 *total = sparc_costs->float_cmove; 12187 else 12188 *total = sparc_costs->int_cmove; 12189 return false; 12190 12191 case IOR: 12192 /* Handle the NAND vector patterns. */ 12193 if (sparc_vector_mode_supported_p (mode) 12194 && GET_CODE (XEXP (x, 0)) == NOT 12195 && GET_CODE (XEXP (x, 1)) == NOT) 12196 { 12197 *total = COSTS_N_INSNS (1); 12198 return true; 12199 } 12200 else 12201 return false; 12202 12203 default: 12204 return false; 12205 } 12206 } 12207 12208 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */ 12209 12210 static inline bool 12211 general_or_i64_p (reg_class_t rclass) 12212 { 12213 return (rclass == GENERAL_REGS || rclass == I64_REGS); 12214 } 12215 12216 /* Implement TARGET_REGISTER_MOVE_COST. */ 12217 12218 static int 12219 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 12220 reg_class_t from, reg_class_t to) 12221 { 12222 bool need_memory = false; 12223 12224 /* This helps postreload CSE to eliminate redundant comparisons. */ 12225 if (from == NO_REGS || to == NO_REGS) 12226 return 100; 12227 12228 if (from == FPCC_REGS || to == FPCC_REGS) 12229 need_memory = true; 12230 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to)) 12231 || (general_or_i64_p (from) && FP_REG_CLASS_P (to))) 12232 { 12233 if (TARGET_VIS3) 12234 { 12235 int size = GET_MODE_SIZE (mode); 12236 if (size == 8 || size == 4) 12237 { 12238 if (! TARGET_ARCH32 || size == 4) 12239 return 4; 12240 else 12241 return 6; 12242 } 12243 } 12244 need_memory = true; 12245 } 12246 12247 if (need_memory) 12248 { 12249 if (sparc_cpu == PROCESSOR_ULTRASPARC 12250 || sparc_cpu == PROCESSOR_ULTRASPARC3 12251 || sparc_cpu == PROCESSOR_NIAGARA 12252 || sparc_cpu == PROCESSOR_NIAGARA2 12253 || sparc_cpu == PROCESSOR_NIAGARA3 12254 || sparc_cpu == PROCESSOR_NIAGARA4 12255 || sparc_cpu == PROCESSOR_NIAGARA7 12256 || sparc_cpu == PROCESSOR_M8) 12257 return 12; 12258 12259 return 6; 12260 } 12261 12262 return 2; 12263 } 12264 12265 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2. 12266 This is achieved by means of a manual dynamic stack space allocation in 12267 the current frame. We make the assumption that SEQ doesn't contain any 12268 function calls, with the possible exception of calls to the GOT helper. */ 12269 12270 static void 12271 emit_and_preserve (rtx seq, rtx reg, rtx reg2) 12272 { 12273 /* We must preserve the lowest 16 words for the register save area. */ 12274 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; 12275 /* We really need only 2 words of fresh stack space. */ 12276 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); 12277 12278 rtx slot 12279 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx, 12280 SPARC_STACK_BIAS + offset)); 12281 12282 emit_insn (gen_stack_pointer_inc (GEN_INT (-size))); 12283 emit_insn (gen_rtx_SET (slot, reg)); 12284 if (reg2) 12285 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD), 12286 reg2)); 12287 emit_insn (seq); 12288 if (reg2) 12289 emit_insn (gen_rtx_SET (reg2, 12290 adjust_address (slot, word_mode, UNITS_PER_WORD))); 12291 emit_insn (gen_rtx_SET (reg, slot)); 12292 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 12293 } 12294 12295 /* Output the assembler code for a thunk function. THUNK_DECL is the 12296 declaration for the thunk function itself, FUNCTION is the decl for 12297 the target function. DELTA is an immediate constant offset to be 12298 added to THIS. If VCALL_OFFSET is nonzero, the word at address 12299 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ 12300 12301 static void 12302 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 12303 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 12304 tree function) 12305 { 12306 rtx this_rtx, funexp; 12307 rtx_insn *insn; 12308 unsigned int int_arg_first; 12309 12310 reload_completed = 1; 12311 epilogue_completed = 1; 12312 12313 emit_note (NOTE_INSN_PROLOGUE_END); 12314 12315 if (TARGET_FLAT) 12316 { 12317 sparc_leaf_function_p = 1; 12318 12319 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12320 } 12321 else if (flag_delayed_branch) 12322 { 12323 /* We will emit a regular sibcall below, so we need to instruct 12324 output_sibcall that we are in a leaf function. */ 12325 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1; 12326 12327 /* This will cause final.c to invoke leaf_renumber_regs so we 12328 must behave as if we were in a not-yet-leafified function. */ 12329 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; 12330 } 12331 else 12332 { 12333 /* We will emit the sibcall manually below, so we will need to 12334 manually spill non-leaf registers. */ 12335 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0; 12336 12337 /* We really are in a leaf function. */ 12338 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12339 } 12340 12341 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function 12342 returns a structure, the structure return pointer is there instead. */ 12343 if (TARGET_ARCH64 12344 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 12345 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); 12346 else 12347 this_rtx = gen_rtx_REG (Pmode, int_arg_first); 12348 12349 /* Add DELTA. When possible use a plain add, otherwise load it into 12350 a register first. */ 12351 if (delta) 12352 { 12353 rtx delta_rtx = GEN_INT (delta); 12354 12355 if (! SPARC_SIMM13_P (delta)) 12356 { 12357 rtx scratch = gen_rtx_REG (Pmode, 1); 12358 emit_move_insn (scratch, delta_rtx); 12359 delta_rtx = scratch; 12360 } 12361 12362 /* THIS_RTX += DELTA. */ 12363 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); 12364 } 12365 12366 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ 12367 if (vcall_offset) 12368 { 12369 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 12370 rtx scratch = gen_rtx_REG (Pmode, 1); 12371 12372 gcc_assert (vcall_offset < 0); 12373 12374 /* SCRATCH = *THIS_RTX. */ 12375 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); 12376 12377 /* Prepare for adding VCALL_OFFSET. The difficulty is that we 12378 may not have any available scratch register at this point. */ 12379 if (SPARC_SIMM13_P (vcall_offset)) 12380 ; 12381 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ 12382 else if (! fixed_regs[5] 12383 /* The below sequence is made up of at least 2 insns, 12384 while the default method may need only one. */ 12385 && vcall_offset < -8192) 12386 { 12387 rtx scratch2 = gen_rtx_REG (Pmode, 5); 12388 emit_move_insn (scratch2, vcall_offset_rtx); 12389 vcall_offset_rtx = scratch2; 12390 } 12391 else 12392 { 12393 rtx increment = GEN_INT (-4096); 12394 12395 /* VCALL_OFFSET is a negative number whose typical range can be 12396 estimated as -32768..0 in 32-bit mode. In almost all cases 12397 it is therefore cheaper to emit multiple add insns than 12398 spilling and loading the constant into a register (at least 12399 6 insns). */ 12400 while (! SPARC_SIMM13_P (vcall_offset)) 12401 { 12402 emit_insn (gen_add2_insn (scratch, increment)); 12403 vcall_offset += 4096; 12404 } 12405 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ 12406 } 12407 12408 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ 12409 emit_move_insn (scratch, gen_rtx_MEM (Pmode, 12410 gen_rtx_PLUS (Pmode, 12411 scratch, 12412 vcall_offset_rtx))); 12413 12414 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ 12415 emit_insn (gen_add2_insn (this_rtx, scratch)); 12416 } 12417 12418 /* Generate a tail call to the target function. */ 12419 if (! TREE_USED (function)) 12420 { 12421 assemble_external (function); 12422 TREE_USED (function) = 1; 12423 } 12424 funexp = XEXP (DECL_RTL (function), 0); 12425 12426 if (flag_delayed_branch) 12427 { 12428 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 12429 insn = emit_call_insn (gen_sibcall (funexp)); 12430 SIBLING_CALL_P (insn) = 1; 12431 } 12432 else 12433 { 12434 /* The hoops we have to jump through in order to generate a sibcall 12435 without using delay slots... */ 12436 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); 12437 12438 if (flag_pic) 12439 { 12440 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ 12441 start_sequence (); 12442 load_got_register (); /* clobbers %o7 */ 12443 if (!TARGET_VXWORKS_RTP) 12444 pic_offset_table_rtx = got_register_rtx; 12445 scratch = sparc_legitimize_pic_address (funexp, scratch); 12446 seq = get_insns (); 12447 end_sequence (); 12448 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); 12449 } 12450 else if (TARGET_ARCH32) 12451 { 12452 emit_insn (gen_rtx_SET (scratch, 12453 gen_rtx_HIGH (SImode, funexp))); 12454 emit_insn (gen_rtx_SET (scratch, 12455 gen_rtx_LO_SUM (SImode, scratch, funexp))); 12456 } 12457 else /* TARGET_ARCH64 */ 12458 { 12459 switch (sparc_code_model) 12460 { 12461 case CM_MEDLOW: 12462 case CM_MEDMID: 12463 /* The destination can serve as a temporary. */ 12464 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); 12465 break; 12466 12467 case CM_MEDANY: 12468 case CM_EMBMEDANY: 12469 /* The destination cannot serve as a temporary. */ 12470 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ 12471 start_sequence (); 12472 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); 12473 seq = get_insns (); 12474 end_sequence (); 12475 emit_and_preserve (seq, spill_reg, 0); 12476 break; 12477 12478 default: 12479 gcc_unreachable (); 12480 } 12481 } 12482 12483 emit_jump_insn (gen_indirect_jump (scratch)); 12484 } 12485 12486 emit_barrier (); 12487 12488 /* Run just enough of rest_of_compilation to get the insns emitted. 12489 There's not really enough bulk here to make other passes such as 12490 instruction scheduling worth while. Note that use_thunk calls 12491 assemble_start_function and assemble_end_function. */ 12492 insn = get_insns (); 12493 shorten_branches (insn); 12494 final_start_function (insn, file, 1); 12495 final (insn, file, 1); 12496 final_end_function (); 12497 12498 reload_completed = 0; 12499 epilogue_completed = 0; 12500 } 12501 12502 /* Return true if sparc_output_mi_thunk would be able to output the 12503 assembler code for the thunk function specified by the arguments 12504 it is passed, and false otherwise. */ 12505 static bool 12506 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, 12507 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 12508 HOST_WIDE_INT vcall_offset, 12509 const_tree function ATTRIBUTE_UNUSED) 12510 { 12511 /* Bound the loop used in the default method above. */ 12512 return (vcall_offset >= -32768 || ! fixed_regs[5]); 12513 } 12514 12515 /* How to allocate a 'struct machine_function'. */ 12516 12517 static struct machine_function * 12518 sparc_init_machine_status (void) 12519 { 12520 return ggc_cleared_alloc<machine_function> (); 12521 } 12522 12523 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ 12524 12525 static unsigned HOST_WIDE_INT 12526 sparc_asan_shadow_offset (void) 12527 { 12528 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29); 12529 } 12530 12531 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 12532 We need to emit DTP-relative relocations. */ 12533 12534 static void 12535 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) 12536 { 12537 switch (size) 12538 { 12539 case 4: 12540 fputs ("\t.word\t%r_tls_dtpoff32(", file); 12541 break; 12542 case 8: 12543 fputs ("\t.xword\t%r_tls_dtpoff64(", file); 12544 break; 12545 default: 12546 gcc_unreachable (); 12547 } 12548 output_addr_const (file, x); 12549 fputs (")", file); 12550 } 12551 12552 /* Do whatever processing is required at the end of a file. */ 12553 12554 static void 12555 sparc_file_end (void) 12556 { 12557 /* If we need to emit the special GOT helper function, do so now. */ 12558 if (got_helper_needed) 12559 { 12560 const char *name = XSTR (got_helper_rtx, 0); 12561 #ifdef DWARF2_UNWIND_INFO 12562 bool do_cfi; 12563 #endif 12564 12565 if (USE_HIDDEN_LINKONCE) 12566 { 12567 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 12568 get_identifier (name), 12569 build_function_type_list (void_type_node, 12570 NULL_TREE)); 12571 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 12572 NULL_TREE, void_type_node); 12573 TREE_PUBLIC (decl) = 1; 12574 TREE_STATIC (decl) = 1; 12575 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 12576 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 12577 DECL_VISIBILITY_SPECIFIED (decl) = 1; 12578 resolve_unique_section (decl, 0, flag_function_sections); 12579 allocate_struct_function (decl, true); 12580 cfun->is_thunk = 1; 12581 current_function_decl = decl; 12582 init_varasm_status (); 12583 assemble_start_function (decl, name); 12584 } 12585 else 12586 { 12587 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 12588 switch_to_section (text_section); 12589 if (align > 0) 12590 ASM_OUTPUT_ALIGN (asm_out_file, align); 12591 ASM_OUTPUT_LABEL (asm_out_file, name); 12592 } 12593 12594 #ifdef DWARF2_UNWIND_INFO 12595 do_cfi = dwarf2out_do_cfi_asm (); 12596 if (do_cfi) 12597 output_asm_insn (".cfi_startproc", NULL); 12598 #endif 12599 if (flag_delayed_branch) 12600 { 12601 output_asm_insn ("jmp\t%%o7+8", NULL); 12602 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx); 12603 } 12604 else 12605 { 12606 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx); 12607 output_asm_insn ("jmp\t%%o7+8", NULL); 12608 output_asm_insn (" nop", NULL); 12609 } 12610 #ifdef DWARF2_UNWIND_INFO 12611 if (do_cfi) 12612 output_asm_insn (".cfi_endproc", NULL); 12613 #endif 12614 } 12615 12616 if (NEED_INDICATE_EXEC_STACK) 12617 file_end_indicate_exec_stack (); 12618 12619 #ifdef TARGET_SOLARIS 12620 solaris_file_end (); 12621 #endif 12622 } 12623 12624 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 12625 /* Implement TARGET_MANGLE_TYPE. */ 12626 12627 static const char * 12628 sparc_mangle_type (const_tree type) 12629 { 12630 if (TARGET_ARCH32 12631 && TYPE_MAIN_VARIANT (type) == long_double_type_node 12632 && TARGET_LONG_DOUBLE_128) 12633 return "g"; 12634 12635 /* For all other types, use normal C++ mangling. */ 12636 return NULL; 12637 } 12638 #endif 12639 12640 /* Expand a membar instruction for various use cases. Both the LOAD_STORE 12641 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where 12642 bit 0 indicates that X is true, and bit 1 indicates Y is true. */ 12643 12644 void 12645 sparc_emit_membar_for_model (enum memmodel model, 12646 int load_store, int before_after) 12647 { 12648 /* Bits for the MEMBAR mmask field. */ 12649 const int LoadLoad = 1; 12650 const int StoreLoad = 2; 12651 const int LoadStore = 4; 12652 const int StoreStore = 8; 12653 12654 int mm = 0, implied = 0; 12655 12656 switch (sparc_memory_model) 12657 { 12658 case SMM_SC: 12659 /* Sequential Consistency. All memory transactions are immediately 12660 visible in sequential execution order. No barriers needed. */ 12661 implied = LoadLoad | StoreLoad | LoadStore | StoreStore; 12662 break; 12663 12664 case SMM_TSO: 12665 /* Total Store Ordering: all memory transactions with store semantics 12666 are followed by an implied StoreStore. */ 12667 implied |= StoreStore; 12668 12669 /* If we're not looking for a raw barrer (before+after), then atomic 12670 operations get the benefit of being both load and store. */ 12671 if (load_store == 3 && before_after == 1) 12672 implied |= StoreLoad; 12673 /* FALLTHRU */ 12674 12675 case SMM_PSO: 12676 /* Partial Store Ordering: all memory transactions with load semantics 12677 are followed by an implied LoadLoad | LoadStore. */ 12678 implied |= LoadLoad | LoadStore; 12679 12680 /* If we're not looking for a raw barrer (before+after), then atomic 12681 operations get the benefit of being both load and store. */ 12682 if (load_store == 3 && before_after == 2) 12683 implied |= StoreLoad | StoreStore; 12684 /* FALLTHRU */ 12685 12686 case SMM_RMO: 12687 /* Relaxed Memory Ordering: no implicit bits. */ 12688 break; 12689 12690 default: 12691 gcc_unreachable (); 12692 } 12693 12694 if (before_after & 1) 12695 { 12696 if (is_mm_release (model) || is_mm_acq_rel (model) 12697 || is_mm_seq_cst (model)) 12698 { 12699 if (load_store & 1) 12700 mm |= LoadLoad | StoreLoad; 12701 if (load_store & 2) 12702 mm |= LoadStore | StoreStore; 12703 } 12704 } 12705 if (before_after & 2) 12706 { 12707 if (is_mm_acquire (model) || is_mm_acq_rel (model) 12708 || is_mm_seq_cst (model)) 12709 { 12710 if (load_store & 1) 12711 mm |= LoadLoad | LoadStore; 12712 if (load_store & 2) 12713 mm |= StoreLoad | StoreStore; 12714 } 12715 } 12716 12717 /* Remove the bits implied by the system memory model. */ 12718 mm &= ~implied; 12719 12720 /* For raw barriers (before+after), always emit a barrier. 12721 This will become a compile-time barrier if needed. */ 12722 if (mm || before_after == 3) 12723 emit_insn (gen_membar (GEN_INT (mm))); 12724 } 12725 12726 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit 12727 compare and swap on the word containing the byte or half-word. */ 12728 12729 static void 12730 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem, 12731 rtx oldval, rtx newval) 12732 { 12733 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 12734 rtx addr = gen_reg_rtx (Pmode); 12735 rtx off = gen_reg_rtx (SImode); 12736 rtx oldv = gen_reg_rtx (SImode); 12737 rtx newv = gen_reg_rtx (SImode); 12738 rtx oldvalue = gen_reg_rtx (SImode); 12739 rtx newvalue = gen_reg_rtx (SImode); 12740 rtx res = gen_reg_rtx (SImode); 12741 rtx resv = gen_reg_rtx (SImode); 12742 rtx memsi, val, mask, cc; 12743 12744 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 12745 12746 if (Pmode != SImode) 12747 addr1 = gen_lowpart (SImode, addr1); 12748 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 12749 12750 memsi = gen_rtx_MEM (SImode, addr); 12751 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 12752 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 12753 12754 val = copy_to_reg (memsi); 12755 12756 emit_insn (gen_rtx_SET (off, 12757 gen_rtx_XOR (SImode, off, 12758 GEN_INT (GET_MODE (mem) == QImode 12759 ? 3 : 2)))); 12760 12761 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 12762 12763 if (GET_MODE (mem) == QImode) 12764 mask = force_reg (SImode, GEN_INT (0xff)); 12765 else 12766 mask = force_reg (SImode, GEN_INT (0xffff)); 12767 12768 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off))); 12769 12770 emit_insn (gen_rtx_SET (val, 12771 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12772 val))); 12773 12774 oldval = gen_lowpart (SImode, oldval); 12775 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off))); 12776 12777 newval = gen_lowpart_common (SImode, newval); 12778 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off))); 12779 12780 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask))); 12781 12782 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask))); 12783 12784 rtx_code_label *end_label = gen_label_rtx (); 12785 rtx_code_label *loop_label = gen_label_rtx (); 12786 emit_label (loop_label); 12787 12788 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val))); 12789 12790 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val))); 12791 12792 emit_move_insn (bool_result, const1_rtx); 12793 12794 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue)); 12795 12796 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); 12797 12798 emit_insn (gen_rtx_SET (resv, 12799 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12800 res))); 12801 12802 emit_move_insn (bool_result, const0_rtx); 12803 12804 cc = gen_compare_reg_1 (NE, resv, val); 12805 emit_insn (gen_rtx_SET (val, resv)); 12806 12807 /* Use cbranchcc4 to separate the compare and branch! */ 12808 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), 12809 cc, const0_rtx, loop_label)); 12810 12811 emit_label (end_label); 12812 12813 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask))); 12814 12815 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off))); 12816 12817 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 12818 } 12819 12820 /* Expand code to perform a compare-and-swap. */ 12821 12822 void 12823 sparc_expand_compare_and_swap (rtx operands[]) 12824 { 12825 rtx bval, retval, mem, oldval, newval; 12826 machine_mode mode; 12827 enum memmodel model; 12828 12829 bval = operands[0]; 12830 retval = operands[1]; 12831 mem = operands[2]; 12832 oldval = operands[3]; 12833 newval = operands[4]; 12834 model = (enum memmodel) INTVAL (operands[6]); 12835 mode = GET_MODE (mem); 12836 12837 sparc_emit_membar_for_model (model, 3, 1); 12838 12839 if (reg_overlap_mentioned_p (retval, oldval)) 12840 oldval = copy_to_reg (oldval); 12841 12842 if (mode == QImode || mode == HImode) 12843 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval); 12844 else 12845 { 12846 rtx (*gen) (rtx, rtx, rtx, rtx); 12847 rtx x; 12848 12849 if (mode == SImode) 12850 gen = gen_atomic_compare_and_swapsi_1; 12851 else 12852 gen = gen_atomic_compare_and_swapdi_1; 12853 emit_insn (gen (retval, mem, oldval, newval)); 12854 12855 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1); 12856 if (x != bval) 12857 convert_move (bval, x, 1); 12858 } 12859 12860 sparc_emit_membar_for_model (model, 3, 2); 12861 } 12862 12863 void 12864 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) 12865 { 12866 rtx t_1, t_2, t_3; 12867 12868 sel = gen_lowpart (DImode, sel); 12869 switch (vmode) 12870 { 12871 case E_V2SImode: 12872 /* inp = xxxxxxxAxxxxxxxB */ 12873 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12874 NULL_RTX, 1, OPTAB_DIRECT); 12875 /* t_1 = ....xxxxxxxAxxx. */ 12876 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 12877 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); 12878 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 12879 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); 12880 /* sel = .......B */ 12881 /* t_1 = ...A.... */ 12882 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 12883 /* sel = ...A...B */ 12884 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); 12885 /* sel = AAAABBBB * 4 */ 12886 t_1 = force_reg (SImode, GEN_INT (0x01230123)); 12887 /* sel = { A*4, A*4+1, A*4+2, ... } */ 12888 break; 12889 12890 case E_V4HImode: 12891 /* inp = xxxAxxxBxxxCxxxD */ 12892 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 12893 NULL_RTX, 1, OPTAB_DIRECT); 12894 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12895 NULL_RTX, 1, OPTAB_DIRECT); 12896 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), 12897 NULL_RTX, 1, OPTAB_DIRECT); 12898 /* t_1 = ..xxxAxxxBxxxCxx */ 12899 /* t_2 = ....xxxAxxxBxxxC */ 12900 /* t_3 = ......xxxAxxxBxx */ 12901 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 12902 GEN_INT (0x07), 12903 NULL_RTX, 1, OPTAB_DIRECT); 12904 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 12905 GEN_INT (0x0700), 12906 NULL_RTX, 1, OPTAB_DIRECT); 12907 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), 12908 GEN_INT (0x070000), 12909 NULL_RTX, 1, OPTAB_DIRECT); 12910 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), 12911 GEN_INT (0x07000000), 12912 NULL_RTX, 1, OPTAB_DIRECT); 12913 /* sel = .......D */ 12914 /* t_1 = .....C.. */ 12915 /* t_2 = ...B.... */ 12916 /* t_3 = .A...... */ 12917 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 12918 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); 12919 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); 12920 /* sel = .A.B.C.D */ 12921 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); 12922 /* sel = AABBCCDD * 2 */ 12923 t_1 = force_reg (SImode, GEN_INT (0x01010101)); 12924 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ 12925 break; 12926 12927 case E_V8QImode: 12928 /* input = xAxBxCxDxExFxGxH */ 12929 sel = expand_simple_binop (DImode, AND, sel, 12930 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 12931 | 0x0f0f0f0f), 12932 NULL_RTX, 1, OPTAB_DIRECT); 12933 /* sel = .A.B.C.D.E.F.G.H */ 12934 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), 12935 NULL_RTX, 1, OPTAB_DIRECT); 12936 /* t_1 = ..A.B.C.D.E.F.G. */ 12937 sel = expand_simple_binop (DImode, IOR, sel, t_1, 12938 NULL_RTX, 1, OPTAB_DIRECT); 12939 /* sel = .AABBCCDDEEFFGGH */ 12940 sel = expand_simple_binop (DImode, AND, sel, 12941 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 12942 | 0xff00ff), 12943 NULL_RTX, 1, OPTAB_DIRECT); 12944 /* sel = ..AB..CD..EF..GH */ 12945 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 12946 NULL_RTX, 1, OPTAB_DIRECT); 12947 /* t_1 = ....AB..CD..EF.. */ 12948 sel = expand_simple_binop (DImode, IOR, sel, t_1, 12949 NULL_RTX, 1, OPTAB_DIRECT); 12950 /* sel = ..ABABCDCDEFEFGH */ 12951 sel = expand_simple_binop (DImode, AND, sel, 12952 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), 12953 NULL_RTX, 1, OPTAB_DIRECT); 12954 /* sel = ....ABCD....EFGH */ 12955 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12956 NULL_RTX, 1, OPTAB_DIRECT); 12957 /* t_1 = ........ABCD.... */ 12958 sel = gen_lowpart (SImode, sel); 12959 t_1 = gen_lowpart (SImode, t_1); 12960 break; 12961 12962 default: 12963 gcc_unreachable (); 12964 } 12965 12966 /* Always perform the final addition/merge within the bmask insn. */ 12967 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); 12968 } 12969 12970 /* Implement TARGET_VEC_PERM_CONST. */ 12971 12972 static bool 12973 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, 12974 rtx op1, const vec_perm_indices &sel) 12975 { 12976 if (!TARGET_VIS2) 12977 return false; 12978 12979 /* All permutes are supported. */ 12980 if (!target) 12981 return true; 12982 12983 /* Force target-independent code to convert constant permutations on other 12984 modes down to V8QI. Rely on this to avoid the complexity of the byte 12985 order of the permutation. */ 12986 if (vmode != V8QImode) 12987 return false; 12988 12989 unsigned int i, mask; 12990 for (i = mask = 0; i < 8; ++i) 12991 mask |= (sel[i] & 0xf) << (28 - i*4); 12992 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode)); 12993 12994 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx)); 12995 emit_insn (gen_bshufflev8qi_vis (target, op0, op1)); 12996 return true; 12997 } 12998 12999 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 13000 13001 static bool 13002 sparc_frame_pointer_required (void) 13003 { 13004 /* If the stack pointer is dynamically modified in the function, it cannot 13005 serve as the frame pointer. */ 13006 if (cfun->calls_alloca) 13007 return true; 13008 13009 /* If the function receives nonlocal gotos, it needs to save the frame 13010 pointer in the nonlocal_goto_save_area object. */ 13011 if (cfun->has_nonlocal_label) 13012 return true; 13013 13014 /* In flat mode, that's it. */ 13015 if (TARGET_FLAT) 13016 return false; 13017 13018 /* Otherwise, the frame pointer is required if the function isn't leaf, but 13019 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */ 13020 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ()); 13021 } 13022 13023 /* The way this is structured, we can't eliminate SFP in favor of SP 13024 if the frame pointer is required: we want to use the SFP->HFP elimination 13025 in that case. But the test in update_eliminables doesn't know we are 13026 assuming below that we only do the former elimination. */ 13027 13028 static bool 13029 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 13030 { 13031 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required (); 13032 } 13033 13034 /* Return the hard frame pointer directly to bypass the stack bias. */ 13035 13036 static rtx 13037 sparc_builtin_setjmp_frame_value (void) 13038 { 13039 return hard_frame_pointer_rtx; 13040 } 13041 13042 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that 13043 they won't be allocated. */ 13044 13045 static void 13046 sparc_conditional_register_usage (void) 13047 { 13048 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 13049 { 13050 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13051 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 13052 } 13053 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ 13054 /* then honor it. */ 13055 if (TARGET_ARCH32 && fixed_regs[5]) 13056 fixed_regs[5] = 1; 13057 else if (TARGET_ARCH64 && fixed_regs[5] == 2) 13058 fixed_regs[5] = 0; 13059 if (! TARGET_V9) 13060 { 13061 int regno; 13062 for (regno = SPARC_FIRST_V9_FP_REG; 13063 regno <= SPARC_LAST_V9_FP_REG; 13064 regno++) 13065 fixed_regs[regno] = 1; 13066 /* %fcc0 is used by v8 and v9. */ 13067 for (regno = SPARC_FIRST_V9_FCC_REG + 1; 13068 regno <= SPARC_LAST_V9_FCC_REG; 13069 regno++) 13070 fixed_regs[regno] = 1; 13071 } 13072 if (! TARGET_FPU) 13073 { 13074 int regno; 13075 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) 13076 fixed_regs[regno] = 1; 13077 } 13078 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ 13079 /* then honor it. Likewise with g3 and g4. */ 13080 if (fixed_regs[2] == 2) 13081 fixed_regs[2] = ! TARGET_APP_REGS; 13082 if (fixed_regs[3] == 2) 13083 fixed_regs[3] = ! TARGET_APP_REGS; 13084 if (TARGET_ARCH32 && fixed_regs[4] == 2) 13085 fixed_regs[4] = ! TARGET_APP_REGS; 13086 else if (TARGET_CM_EMBMEDANY) 13087 fixed_regs[4] = 1; 13088 else if (fixed_regs[4] == 2) 13089 fixed_regs[4] = 0; 13090 if (TARGET_FLAT) 13091 { 13092 int regno; 13093 /* Disable leaf functions. */ 13094 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER); 13095 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 13096 leaf_reg_remap [regno] = regno; 13097 } 13098 if (TARGET_VIS) 13099 global_regs[SPARC_GSR_REG] = 1; 13100 } 13101 13102 /* Implement TARGET_USE_PSEUDO_PIC_REG. */ 13103 13104 static bool 13105 sparc_use_pseudo_pic_reg (void) 13106 { 13107 return !TARGET_VXWORKS_RTP && flag_pic; 13108 } 13109 13110 /* Implement TARGET_INIT_PIC_REG. */ 13111 13112 static void 13113 sparc_init_pic_reg (void) 13114 { 13115 edge entry_edge; 13116 rtx_insn *seq; 13117 13118 /* In PIC mode, we need to always initialize the PIC register if optimization 13119 is enabled, because we are called from IRA and LRA may later force things 13120 to the constant pool for optimization purposes. */ 13121 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize)) 13122 return; 13123 13124 start_sequence (); 13125 load_got_register (); 13126 if (!TARGET_VXWORKS_RTP) 13127 emit_move_insn (pic_offset_table_rtx, got_register_rtx); 13128 seq = get_insns (); 13129 end_sequence (); 13130 13131 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 13132 insert_insn_on_edge (seq, entry_edge); 13133 commit_one_edge_insertion (entry_edge); 13134 } 13135 13136 /* Implement TARGET_PREFERRED_RELOAD_CLASS: 13137 13138 - We can't load constants into FP registers. 13139 - We can't load FP constants into integer registers when soft-float, 13140 because there is no soft-float pattern with a r/F constraint. 13141 - We can't load FP constants into integer registers for TFmode unless 13142 it is 0.0L, because there is no movtf pattern with a r/F constraint. 13143 - Try and reload integer constants (symbolic or otherwise) back into 13144 registers directly, rather than having them dumped to memory. */ 13145 13146 static reg_class_t 13147 sparc_preferred_reload_class (rtx x, reg_class_t rclass) 13148 { 13149 machine_mode mode = GET_MODE (x); 13150 if (CONSTANT_P (x)) 13151 { 13152 if (FP_REG_CLASS_P (rclass) 13153 || rclass == GENERAL_OR_FP_REGS 13154 || rclass == GENERAL_OR_EXTRA_FP_REGS 13155 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU) 13156 || (mode == TFmode && ! const_zero_operand (x, mode))) 13157 return NO_REGS; 13158 13159 if (GET_MODE_CLASS (mode) == MODE_INT) 13160 return GENERAL_REGS; 13161 13162 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 13163 { 13164 if (! FP_REG_CLASS_P (rclass) 13165 || !(const_zero_operand (x, mode) 13166 || const_all_ones_operand (x, mode))) 13167 return NO_REGS; 13168 } 13169 } 13170 13171 if (TARGET_VIS3 13172 && ! TARGET_ARCH64 13173 && (rclass == EXTRA_FP_REGS 13174 || rclass == GENERAL_OR_EXTRA_FP_REGS)) 13175 { 13176 int regno = true_regnum (x); 13177 13178 if (SPARC_INT_REG_P (regno)) 13179 return (rclass == EXTRA_FP_REGS 13180 ? FP_REGS : GENERAL_OR_FP_REGS); 13181 } 13182 13183 return rclass; 13184 } 13185 13186 /* Return true if we use LRA instead of reload pass. */ 13187 13188 static bool 13189 sparc_lra_p (void) 13190 { 13191 return TARGET_LRA; 13192 } 13193 13194 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction, 13195 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 13196 13197 const char * 13198 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode) 13199 { 13200 char mulstr[32]; 13201 13202 gcc_assert (! TARGET_ARCH64); 13203 13204 if (sparc_check_64 (operands[1], insn) <= 0) 13205 output_asm_insn ("srl\t%L1, 0, %L1", operands); 13206 if (which_alternative == 1) 13207 output_asm_insn ("sllx\t%H1, 32, %H1", operands); 13208 if (GET_CODE (operands[2]) == CONST_INT) 13209 { 13210 if (which_alternative == 1) 13211 { 13212 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13213 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); 13214 output_asm_insn (mulstr, operands); 13215 return "srlx\t%L0, 32, %H0"; 13216 } 13217 else 13218 { 13219 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13220 output_asm_insn ("or\t%L1, %3, %3", operands); 13221 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); 13222 output_asm_insn (mulstr, operands); 13223 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13224 return "mov\t%3, %L0"; 13225 } 13226 } 13227 else if (rtx_equal_p (operands[1], operands[2])) 13228 { 13229 if (which_alternative == 1) 13230 { 13231 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13232 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); 13233 output_asm_insn (mulstr, operands); 13234 return "srlx\t%L0, 32, %H0"; 13235 } 13236 else 13237 { 13238 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13239 output_asm_insn ("or\t%L1, %3, %3", operands); 13240 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); 13241 output_asm_insn (mulstr, operands); 13242 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13243 return "mov\t%3, %L0"; 13244 } 13245 } 13246 if (sparc_check_64 (operands[2], insn) <= 0) 13247 output_asm_insn ("srl\t%L2, 0, %L2", operands); 13248 if (which_alternative == 1) 13249 { 13250 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13251 output_asm_insn ("sllx\t%H2, 32, %L1", operands); 13252 output_asm_insn ("or\t%L2, %L1, %L1", operands); 13253 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); 13254 output_asm_insn (mulstr, operands); 13255 return "srlx\t%L0, 32, %H0"; 13256 } 13257 else 13258 { 13259 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13260 output_asm_insn ("sllx\t%H2, 32, %4", operands); 13261 output_asm_insn ("or\t%L1, %3, %3", operands); 13262 output_asm_insn ("or\t%L2, %4, %4", operands); 13263 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); 13264 output_asm_insn (mulstr, operands); 13265 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13266 return "mov\t%3, %L0"; 13267 } 13268 } 13269 13270 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13271 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE 13272 and INNER_MODE are the modes describing TARGET. */ 13273 13274 static void 13275 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode, 13276 machine_mode inner_mode) 13277 { 13278 rtx t1, final_insn, sel; 13279 int bmask; 13280 13281 t1 = gen_reg_rtx (mode); 13282 13283 elt = convert_modes (SImode, inner_mode, elt, true); 13284 emit_move_insn (gen_lowpart(SImode, t1), elt); 13285 13286 switch (mode) 13287 { 13288 case E_V2SImode: 13289 final_insn = gen_bshufflev2si_vis (target, t1, t1); 13290 bmask = 0x45674567; 13291 break; 13292 case E_V4HImode: 13293 final_insn = gen_bshufflev4hi_vis (target, t1, t1); 13294 bmask = 0x67676767; 13295 break; 13296 case E_V8QImode: 13297 final_insn = gen_bshufflev8qi_vis (target, t1, t1); 13298 bmask = 0x77777777; 13299 break; 13300 default: 13301 gcc_unreachable (); 13302 } 13303 13304 sel = force_reg (SImode, GEN_INT (bmask)); 13305 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); 13306 emit_insn (final_insn); 13307 } 13308 13309 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13310 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */ 13311 13312 static void 13313 vector_init_fpmerge (rtx target, rtx elt) 13314 { 13315 rtx t1, t2, t2_low, t3, t3_low; 13316 13317 t1 = gen_reg_rtx (V4QImode); 13318 elt = convert_modes (SImode, QImode, elt, true); 13319 emit_move_insn (gen_lowpart (SImode, t1), elt); 13320 13321 t2 = gen_reg_rtx (V8QImode); 13322 t2_low = gen_lowpart (V4QImode, t2); 13323 emit_insn (gen_fpmerge_vis (t2, t1, t1)); 13324 13325 t3 = gen_reg_rtx (V8QImode); 13326 t3_low = gen_lowpart (V4QImode, t3); 13327 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low)); 13328 13329 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low)); 13330 } 13331 13332 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13333 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */ 13334 13335 static void 13336 vector_init_faligndata (rtx target, rtx elt) 13337 { 13338 rtx t1 = gen_reg_rtx (V4HImode); 13339 int i; 13340 13341 elt = convert_modes (SImode, HImode, elt, true); 13342 emit_move_insn (gen_lowpart (SImode, t1), elt); 13343 13344 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), 13345 force_reg (SImode, GEN_INT (6)), 13346 const0_rtx)); 13347 13348 for (i = 0; i < 4; i++) 13349 emit_insn (gen_faligndatav4hi_vis (target, t1, target)); 13350 } 13351 13352 /* Emit code to initialize TARGET to values for individual fields VALS. */ 13353 13354 void 13355 sparc_expand_vector_init (rtx target, rtx vals) 13356 { 13357 const machine_mode mode = GET_MODE (target); 13358 const machine_mode inner_mode = GET_MODE_INNER (mode); 13359 const int n_elts = GET_MODE_NUNITS (mode); 13360 int i, n_var = 0; 13361 bool all_same = true; 13362 rtx mem; 13363 13364 for (i = 0; i < n_elts; i++) 13365 { 13366 rtx x = XVECEXP (vals, 0, i); 13367 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) 13368 n_var++; 13369 13370 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 13371 all_same = false; 13372 } 13373 13374 if (n_var == 0) 13375 { 13376 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 13377 return; 13378 } 13379 13380 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)) 13381 { 13382 if (GET_MODE_SIZE (inner_mode) == 4) 13383 { 13384 emit_move_insn (gen_lowpart (SImode, target), 13385 gen_lowpart (SImode, XVECEXP (vals, 0, 0))); 13386 return; 13387 } 13388 else if (GET_MODE_SIZE (inner_mode) == 8) 13389 { 13390 emit_move_insn (gen_lowpart (DImode, target), 13391 gen_lowpart (DImode, XVECEXP (vals, 0, 0))); 13392 return; 13393 } 13394 } 13395 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode) 13396 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode)) 13397 { 13398 emit_move_insn (gen_highpart (word_mode, target), 13399 gen_lowpart (word_mode, XVECEXP (vals, 0, 0))); 13400 emit_move_insn (gen_lowpart (word_mode, target), 13401 gen_lowpart (word_mode, XVECEXP (vals, 0, 1))); 13402 return; 13403 } 13404 13405 if (all_same && GET_MODE_SIZE (mode) == 8) 13406 { 13407 if (TARGET_VIS2) 13408 { 13409 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode); 13410 return; 13411 } 13412 if (mode == V8QImode) 13413 { 13414 vector_init_fpmerge (target, XVECEXP (vals, 0, 0)); 13415 return; 13416 } 13417 if (mode == V4HImode) 13418 { 13419 vector_init_faligndata (target, XVECEXP (vals, 0, 0)); 13420 return; 13421 } 13422 } 13423 13424 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 13425 for (i = 0; i < n_elts; i++) 13426 emit_move_insn (adjust_address_nv (mem, inner_mode, 13427 i * GET_MODE_SIZE (inner_mode)), 13428 XVECEXP (vals, 0, i)); 13429 emit_move_insn (target, mem); 13430 } 13431 13432 /* Implement TARGET_SECONDARY_RELOAD. */ 13433 13434 static reg_class_t 13435 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 13436 machine_mode mode, secondary_reload_info *sri) 13437 { 13438 enum reg_class rclass = (enum reg_class) rclass_i; 13439 13440 sri->icode = CODE_FOR_nothing; 13441 sri->extra_cost = 0; 13442 13443 /* We need a temporary when loading/storing a HImode/QImode value 13444 between memory and the FPU registers. This can happen when combine puts 13445 a paradoxical subreg in a float/fix conversion insn. */ 13446 if (FP_REG_CLASS_P (rclass) 13447 && (mode == HImode || mode == QImode) 13448 && (GET_CODE (x) == MEM 13449 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 13450 && true_regnum (x) == -1))) 13451 return GENERAL_REGS; 13452 13453 /* On 32-bit we need a temporary when loading/storing a DFmode value 13454 between unaligned memory and the upper FPU registers. */ 13455 if (TARGET_ARCH32 13456 && rclass == EXTRA_FP_REGS 13457 && mode == DFmode 13458 && GET_CODE (x) == MEM 13459 && ! mem_min_alignment (x, 8)) 13460 return FP_REGS; 13461 13462 if (((TARGET_CM_MEDANY 13463 && symbolic_operand (x, mode)) 13464 || (TARGET_CM_EMBMEDANY 13465 && text_segment_operand (x, mode))) 13466 && ! flag_pic) 13467 { 13468 if (in_p) 13469 sri->icode = direct_optab_handler (reload_in_optab, mode); 13470 else 13471 sri->icode = direct_optab_handler (reload_out_optab, mode); 13472 return NO_REGS; 13473 } 13474 13475 if (TARGET_VIS3 && TARGET_ARCH32) 13476 { 13477 int regno = true_regnum (x); 13478 13479 /* When using VIS3 fp<-->int register moves, on 32-bit we have 13480 to move 8-byte values in 4-byte pieces. This only works via 13481 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to 13482 move between EXTRA_FP_REGS and GENERAL_REGS, we will need 13483 an FP_REGS intermediate move. */ 13484 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno)) 13485 || ((general_or_i64_p (rclass) 13486 || rclass == GENERAL_OR_FP_REGS) 13487 && SPARC_FP_REG_P (regno))) 13488 { 13489 sri->extra_cost = 2; 13490 return FP_REGS; 13491 } 13492 } 13493 13494 return NO_REGS; 13495 } 13496 13497 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. 13498 13499 On SPARC when not VIS3 it is not possible to directly move data 13500 between GENERAL_REGS and FP_REGS. */ 13501 13502 static bool 13503 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1, 13504 reg_class_t class2) 13505 { 13506 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2)) 13507 && (! TARGET_VIS3 13508 || GET_MODE_SIZE (mode) > 8 13509 || GET_MODE_SIZE (mode) < 4)); 13510 } 13511 13512 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. 13513 13514 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9 13515 because the movsi and movsf patterns don't handle r/f moves. 13516 For v8 we copy the default definition. */ 13517 13518 static machine_mode 13519 sparc_secondary_memory_needed_mode (machine_mode mode) 13520 { 13521 if (TARGET_ARCH64) 13522 { 13523 if (GET_MODE_BITSIZE (mode) < 32) 13524 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); 13525 return mode; 13526 } 13527 else 13528 { 13529 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD) 13530 return mode_for_size (BITS_PER_WORD, 13531 GET_MODE_CLASS (mode), 0).require (); 13532 return mode; 13533 } 13534 } 13535 13536 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into 13537 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ 13538 13539 bool 13540 sparc_expand_conditional_move (machine_mode mode, rtx *operands) 13541 { 13542 enum rtx_code rc = GET_CODE (operands[1]); 13543 machine_mode cmp_mode; 13544 rtx cc_reg, dst, cmp; 13545 13546 cmp = operands[1]; 13547 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) 13548 return false; 13549 13550 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) 13551 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); 13552 13553 cmp_mode = GET_MODE (XEXP (cmp, 0)); 13554 rc = GET_CODE (cmp); 13555 13556 dst = operands[0]; 13557 if (! rtx_equal_p (operands[2], dst) 13558 && ! rtx_equal_p (operands[3], dst)) 13559 { 13560 if (reg_overlap_mentioned_p (dst, cmp)) 13561 dst = gen_reg_rtx (mode); 13562 13563 emit_move_insn (dst, operands[3]); 13564 } 13565 else if (operands[2] == dst) 13566 { 13567 operands[2] = operands[3]; 13568 13569 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT) 13570 rc = reverse_condition_maybe_unordered (rc); 13571 else 13572 rc = reverse_condition (rc); 13573 } 13574 13575 if (XEXP (cmp, 1) == const0_rtx 13576 && GET_CODE (XEXP (cmp, 0)) == REG 13577 && cmp_mode == DImode 13578 && v9_regcmp_p (rc)) 13579 cc_reg = XEXP (cmp, 0); 13580 else 13581 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1)); 13582 13583 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx); 13584 13585 emit_insn (gen_rtx_SET (dst, 13586 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst))); 13587 13588 if (dst != operands[0]) 13589 emit_move_insn (operands[0], dst); 13590 13591 return true; 13592 } 13593 13594 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] 13595 into OPERANDS[0] in MODE, depending on the outcome of the comparison of 13596 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. 13597 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine 13598 code to be used for the condition mask. */ 13599 13600 void 13601 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode) 13602 { 13603 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; 13604 enum rtx_code code = GET_CODE (operands[3]); 13605 13606 mask = gen_reg_rtx (Pmode); 13607 cop0 = operands[4]; 13608 cop1 = operands[5]; 13609 if (code == LT || code == GE) 13610 { 13611 rtx t; 13612 13613 code = swap_condition (code); 13614 t = cop0; cop0 = cop1; cop1 = t; 13615 } 13616 13617 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); 13618 13619 fcmp = gen_rtx_UNSPEC (Pmode, 13620 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), 13621 fcode); 13622 13623 cmask = gen_rtx_UNSPEC (DImode, 13624 gen_rtvec (2, mask, gsr), 13625 ccode); 13626 13627 bshuf = gen_rtx_UNSPEC (mode, 13628 gen_rtvec (3, operands[1], operands[2], gsr), 13629 UNSPEC_BSHUFFLE); 13630 13631 emit_insn (gen_rtx_SET (mask, fcmp)); 13632 emit_insn (gen_rtx_SET (gsr, cmask)); 13633 13634 emit_insn (gen_rtx_SET (operands[0], bshuf)); 13635 } 13636 13637 /* On sparc, any mode which naturally allocates into the float 13638 registers should return 4 here. */ 13639 13640 unsigned int 13641 sparc_regmode_natural_size (machine_mode mode) 13642 { 13643 int size = UNITS_PER_WORD; 13644 13645 if (TARGET_ARCH64) 13646 { 13647 enum mode_class mclass = GET_MODE_CLASS (mode); 13648 13649 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) 13650 size = 4; 13651 } 13652 13653 return size; 13654 } 13655 13656 /* Implement TARGET_HARD_REGNO_NREGS. 13657 13658 On SPARC, ordinary registers hold 32 bits worth; this means both 13659 integer and floating point registers. On v9, integer regs hold 64 13660 bits worth; floating point regs hold 32 bits worth (this includes the 13661 new fp regs as even the odd ones are included in the hard register 13662 count). */ 13663 13664 static unsigned int 13665 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode) 13666 { 13667 if (regno == SPARC_GSR_REG) 13668 return 1; 13669 if (TARGET_ARCH64) 13670 { 13671 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM) 13672 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13673 return CEIL (GET_MODE_SIZE (mode), 4); 13674 } 13675 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13676 } 13677 13678 /* Implement TARGET_HARD_REGNO_MODE_OK. 13679 13680 ??? Because of the funny way we pass parameters we should allow certain 13681 ??? types of float/complex values to be in integer registers during 13682 ??? RTL generation. This only matters on arch32. */ 13683 13684 static bool 13685 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 13686 { 13687 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0; 13688 } 13689 13690 /* Implement TARGET_MODES_TIEABLE_P. 13691 13692 For V9 we have to deal with the fact that only the lower 32 floating 13693 point registers are 32-bit addressable. */ 13694 13695 static bool 13696 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2) 13697 { 13698 enum mode_class mclass1, mclass2; 13699 unsigned short size1, size2; 13700 13701 if (mode1 == mode2) 13702 return true; 13703 13704 mclass1 = GET_MODE_CLASS (mode1); 13705 mclass2 = GET_MODE_CLASS (mode2); 13706 if (mclass1 != mclass2) 13707 return false; 13708 13709 if (! TARGET_V9) 13710 return true; 13711 13712 /* Classes are the same and we are V9 so we have to deal with upper 13713 vs. lower floating point registers. If one of the modes is a 13714 4-byte mode, and the other is not, we have to mark them as not 13715 tieable because only the lower 32 floating point register are 13716 addressable 32-bits at a time. 13717 13718 We can't just test explicitly for SFmode, otherwise we won't 13719 cover the vector mode cases properly. */ 13720 13721 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT) 13722 return true; 13723 13724 size1 = GET_MODE_SIZE (mode1); 13725 size2 = GET_MODE_SIZE (mode2); 13726 if ((size1 > 4 && size2 == 4) 13727 || (size2 > 4 && size1 == 4)) 13728 return false; 13729 13730 return true; 13731 } 13732 13733 /* Implement TARGET_CSTORE_MODE. */ 13734 13735 static scalar_int_mode 13736 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED) 13737 { 13738 return (TARGET_ARCH64 ? DImode : SImode); 13739 } 13740 13741 /* Return the compound expression made of T1 and T2. */ 13742 13743 static inline tree 13744 compound_expr (tree t1, tree t2) 13745 { 13746 return build2 (COMPOUND_EXPR, void_type_node, t1, t2); 13747 } 13748 13749 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 13750 13751 static void 13752 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 13753 { 13754 if (!TARGET_FPU) 13755 return; 13756 13757 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5; 13758 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23; 13759 13760 /* We generate the equivalent of feholdexcept (&fenv_var): 13761 13762 unsigned int fenv_var; 13763 __builtin_store_fsr (&fenv_var); 13764 13765 unsigned int tmp1_var; 13766 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask); 13767 13768 __builtin_load_fsr (&tmp1_var); */ 13769 13770 tree fenv_var = create_tmp_var_raw (unsigned_type_node); 13771 TREE_ADDRESSABLE (fenv_var) = 1; 13772 tree fenv_addr = build_fold_addr_expr (fenv_var); 13773 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR]; 13774 tree hold_stfsr 13775 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, 13776 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE); 13777 13778 tree tmp1_var = create_tmp_var_raw (unsigned_type_node); 13779 TREE_ADDRESSABLE (tmp1_var) = 1; 13780 tree masked_fenv_var 13781 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 13782 build_int_cst (unsigned_type_node, 13783 ~(accrued_exception_mask | trap_enable_mask))); 13784 tree hold_mask 13785 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var, 13786 NULL_TREE, NULL_TREE); 13787 13788 tree tmp1_addr = build_fold_addr_expr (tmp1_var); 13789 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR]; 13790 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr); 13791 13792 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr); 13793 13794 /* We reload the value of tmp1_var to clear the exceptions: 13795 13796 __builtin_load_fsr (&tmp1_var); */ 13797 13798 *clear = build_call_expr (ldfsr, 1, tmp1_addr); 13799 13800 /* We generate the equivalent of feupdateenv (&fenv_var): 13801 13802 unsigned int tmp2_var; 13803 __builtin_store_fsr (&tmp2_var); 13804 13805 __builtin_load_fsr (&fenv_var); 13806 13807 if (SPARC_LOW_FE_EXCEPT_VALUES) 13808 tmp2_var >>= 5; 13809 __atomic_feraiseexcept ((int) tmp2_var); */ 13810 13811 tree tmp2_var = create_tmp_var_raw (unsigned_type_node); 13812 TREE_ADDRESSABLE (tmp2_var) = 1; 13813 tree tmp2_addr = build_fold_addr_expr (tmp2_var); 13814 tree update_stfsr 13815 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var, 13816 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE); 13817 13818 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr); 13819 13820 tree atomic_feraiseexcept 13821 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 13822 tree update_call 13823 = build_call_expr (atomic_feraiseexcept, 1, 13824 fold_convert (integer_type_node, tmp2_var)); 13825 13826 if (SPARC_LOW_FE_EXCEPT_VALUES) 13827 { 13828 tree shifted_tmp2_var 13829 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var, 13830 build_int_cst (unsigned_type_node, 5)); 13831 tree update_shift 13832 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var); 13833 update_call = compound_expr (update_shift, update_call); 13834 } 13835 13836 *update 13837 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call); 13838 } 13839 13840 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port. 13841 13842 SImode loads to floating-point registers are not zero-extended. 13843 The definition for LOAD_EXTEND_OP specifies that integer loads 13844 narrower than BITS_PER_WORD will be zero-extended. As a result, 13845 we inhibit changes from SImode unless they are to a mode that is 13846 identical in size. 13847 13848 Likewise for SFmode, since word-mode paradoxical subregs are 13849 problematic on big-endian architectures. */ 13850 13851 static bool 13852 sparc_can_change_mode_class (machine_mode from, machine_mode to, 13853 reg_class_t rclass) 13854 { 13855 if (TARGET_ARCH64 13856 && GET_MODE_SIZE (from) == 4 13857 && GET_MODE_SIZE (to) != 4) 13858 return !reg_classes_intersect_p (rclass, FP_REGS); 13859 return true; 13860 } 13861 13862 /* Implement TARGET_CONSTANT_ALIGNMENT. */ 13863 13864 static HOST_WIDE_INT 13865 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align) 13866 { 13867 if (TREE_CODE (exp) == STRING_CST) 13868 return MAX (align, FASTEST_ALIGNMENT); 13869 return align; 13870 } 13871 13872 #include "gt-sparc.h" 13873