1 /* Subroutines for insn-output.c for SPARC. 2 Copyright (C) 1987-2018 Free Software Foundation, Inc. 3 Contributed by Michael Tiemann (tiemann@cygnus.com) 4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, 5 at Cygnus Support. 6 7 This file is part of GCC. 8 9 GCC is free software; you can redistribute it and/or modify 10 it under the terms of the GNU General Public License as published by 11 the Free Software Foundation; either version 3, or (at your option) 12 any later version. 13 14 GCC is distributed in the hope that it will be useful, 15 but WITHOUT ANY WARRANTY; without even the implied warranty of 16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 GNU General Public License for more details. 18 19 You should have received a copy of the GNU General Public License 20 along with GCC; see the file COPYING3. If not see 21 <http://www.gnu.org/licenses/>. */ 22 23 #define IN_TARGET_CODE 1 24 25 #include "config.h" 26 #include "system.h" 27 #include "coretypes.h" 28 #include "backend.h" 29 #include "target.h" 30 #include "rtl.h" 31 #include "tree.h" 32 #include "memmodel.h" 33 #include "gimple.h" 34 #include "df.h" 35 #include "tm_p.h" 36 #include "stringpool.h" 37 #include "attribs.h" 38 #include "expmed.h" 39 #include "optabs.h" 40 #include "regs.h" 41 #include "emit-rtl.h" 42 #include "recog.h" 43 #include "diagnostic-core.h" 44 #include "alias.h" 45 #include "fold-const.h" 46 #include "stor-layout.h" 47 #include "calls.h" 48 #include "varasm.h" 49 #include "output.h" 50 #include "insn-attr.h" 51 #include "explow.h" 52 #include "expr.h" 53 #include "debug.h" 54 #include "cfgrtl.h" 55 #include "common/common-target.h" 56 #include "gimplify.h" 57 #include "langhooks.h" 58 #include "reload.h" 59 #include "params.h" 60 #include "tree-pass.h" 61 #include "context.h" 62 #include "builtins.h" 63 #include "tree-vector-builder.h" 64 65 /* This file should be included last. */ 66 #include "target-def.h" 67 68 /* Processor costs */ 69 70 struct processor_costs { 71 /* Integer load */ 72 const int int_load; 73 74 /* Integer signed load */ 75 const int int_sload; 76 77 /* Integer zeroed load */ 78 const int int_zload; 79 80 /* Float load */ 81 const int float_load; 82 83 /* fmov, fneg, fabs */ 84 const int float_move; 85 86 /* fadd, fsub */ 87 const int float_plusminus; 88 89 /* fcmp */ 90 const int float_cmp; 91 92 /* fmov, fmovr */ 93 const int float_cmove; 94 95 /* fmul */ 96 const int float_mul; 97 98 /* fdivs */ 99 const int float_div_sf; 100 101 /* fdivd */ 102 const int float_div_df; 103 104 /* fsqrts */ 105 const int float_sqrt_sf; 106 107 /* fsqrtd */ 108 const int float_sqrt_df; 109 110 /* umul/smul */ 111 const int int_mul; 112 113 /* mulX */ 114 const int int_mulX; 115 116 /* integer multiply cost for each bit set past the most 117 significant 3, so the formula for multiply cost becomes: 118 119 if (rs1 < 0) 120 highest_bit = highest_clear_bit(rs1); 121 else 122 highest_bit = highest_set_bit(rs1); 123 if (highest_bit < 3) 124 highest_bit = 3; 125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); 126 127 A value of zero indicates that the multiply costs is fixed, 128 and not variable. */ 129 const int int_mul_bit_factor; 130 131 /* udiv/sdiv */ 132 const int int_div; 133 134 /* divX */ 135 const int int_divX; 136 137 /* movcc, movr */ 138 const int int_cmove; 139 140 /* penalty for shifts, due to scheduling rules etc. */ 141 const int shift_penalty; 142 }; 143 144 static const 145 struct processor_costs cypress_costs = { 146 COSTS_N_INSNS (2), /* int load */ 147 COSTS_N_INSNS (2), /* int signed load */ 148 COSTS_N_INSNS (2), /* int zeroed load */ 149 COSTS_N_INSNS (2), /* float load */ 150 COSTS_N_INSNS (5), /* fmov, fneg, fabs */ 151 COSTS_N_INSNS (5), /* fadd, fsub */ 152 COSTS_N_INSNS (1), /* fcmp */ 153 COSTS_N_INSNS (1), /* fmov, fmovr */ 154 COSTS_N_INSNS (7), /* fmul */ 155 COSTS_N_INSNS (37), /* fdivs */ 156 COSTS_N_INSNS (37), /* fdivd */ 157 COSTS_N_INSNS (63), /* fsqrts */ 158 COSTS_N_INSNS (63), /* fsqrtd */ 159 COSTS_N_INSNS (1), /* imul */ 160 COSTS_N_INSNS (1), /* imulX */ 161 0, /* imul bit factor */ 162 COSTS_N_INSNS (1), /* idiv */ 163 COSTS_N_INSNS (1), /* idivX */ 164 COSTS_N_INSNS (1), /* movcc/movr */ 165 0, /* shift penalty */ 166 }; 167 168 static const 169 struct processor_costs supersparc_costs = { 170 COSTS_N_INSNS (1), /* int load */ 171 COSTS_N_INSNS (1), /* int signed load */ 172 COSTS_N_INSNS (1), /* int zeroed load */ 173 COSTS_N_INSNS (0), /* float load */ 174 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 175 COSTS_N_INSNS (3), /* fadd, fsub */ 176 COSTS_N_INSNS (3), /* fcmp */ 177 COSTS_N_INSNS (1), /* fmov, fmovr */ 178 COSTS_N_INSNS (3), /* fmul */ 179 COSTS_N_INSNS (6), /* fdivs */ 180 COSTS_N_INSNS (9), /* fdivd */ 181 COSTS_N_INSNS (12), /* fsqrts */ 182 COSTS_N_INSNS (12), /* fsqrtd */ 183 COSTS_N_INSNS (4), /* imul */ 184 COSTS_N_INSNS (4), /* imulX */ 185 0, /* imul bit factor */ 186 COSTS_N_INSNS (4), /* idiv */ 187 COSTS_N_INSNS (4), /* idivX */ 188 COSTS_N_INSNS (1), /* movcc/movr */ 189 1, /* shift penalty */ 190 }; 191 192 static const 193 struct processor_costs hypersparc_costs = { 194 COSTS_N_INSNS (1), /* int load */ 195 COSTS_N_INSNS (1), /* int signed load */ 196 COSTS_N_INSNS (1), /* int zeroed load */ 197 COSTS_N_INSNS (1), /* float load */ 198 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 199 COSTS_N_INSNS (1), /* fadd, fsub */ 200 COSTS_N_INSNS (1), /* fcmp */ 201 COSTS_N_INSNS (1), /* fmov, fmovr */ 202 COSTS_N_INSNS (1), /* fmul */ 203 COSTS_N_INSNS (8), /* fdivs */ 204 COSTS_N_INSNS (12), /* fdivd */ 205 COSTS_N_INSNS (17), /* fsqrts */ 206 COSTS_N_INSNS (17), /* fsqrtd */ 207 COSTS_N_INSNS (17), /* imul */ 208 COSTS_N_INSNS (17), /* imulX */ 209 0, /* imul bit factor */ 210 COSTS_N_INSNS (17), /* idiv */ 211 COSTS_N_INSNS (17), /* idivX */ 212 COSTS_N_INSNS (1), /* movcc/movr */ 213 0, /* shift penalty */ 214 }; 215 216 static const 217 struct processor_costs leon_costs = { 218 COSTS_N_INSNS (1), /* int load */ 219 COSTS_N_INSNS (1), /* int signed load */ 220 COSTS_N_INSNS (1), /* int zeroed load */ 221 COSTS_N_INSNS (1), /* float load */ 222 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 223 COSTS_N_INSNS (1), /* fadd, fsub */ 224 COSTS_N_INSNS (1), /* fcmp */ 225 COSTS_N_INSNS (1), /* fmov, fmovr */ 226 COSTS_N_INSNS (1), /* fmul */ 227 COSTS_N_INSNS (15), /* fdivs */ 228 COSTS_N_INSNS (15), /* fdivd */ 229 COSTS_N_INSNS (23), /* fsqrts */ 230 COSTS_N_INSNS (23), /* fsqrtd */ 231 COSTS_N_INSNS (5), /* imul */ 232 COSTS_N_INSNS (5), /* imulX */ 233 0, /* imul bit factor */ 234 COSTS_N_INSNS (5), /* idiv */ 235 COSTS_N_INSNS (5), /* idivX */ 236 COSTS_N_INSNS (1), /* movcc/movr */ 237 0, /* shift penalty */ 238 }; 239 240 static const 241 struct processor_costs leon3_costs = { 242 COSTS_N_INSNS (1), /* int load */ 243 COSTS_N_INSNS (1), /* int signed load */ 244 COSTS_N_INSNS (1), /* int zeroed load */ 245 COSTS_N_INSNS (1), /* float load */ 246 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 247 COSTS_N_INSNS (1), /* fadd, fsub */ 248 COSTS_N_INSNS (1), /* fcmp */ 249 COSTS_N_INSNS (1), /* fmov, fmovr */ 250 COSTS_N_INSNS (1), /* fmul */ 251 COSTS_N_INSNS (14), /* fdivs */ 252 COSTS_N_INSNS (15), /* fdivd */ 253 COSTS_N_INSNS (22), /* fsqrts */ 254 COSTS_N_INSNS (23), /* fsqrtd */ 255 COSTS_N_INSNS (5), /* imul */ 256 COSTS_N_INSNS (5), /* imulX */ 257 0, /* imul bit factor */ 258 COSTS_N_INSNS (35), /* idiv */ 259 COSTS_N_INSNS (35), /* idivX */ 260 COSTS_N_INSNS (1), /* movcc/movr */ 261 0, /* shift penalty */ 262 }; 263 264 static const 265 struct processor_costs sparclet_costs = { 266 COSTS_N_INSNS (3), /* int load */ 267 COSTS_N_INSNS (3), /* int signed load */ 268 COSTS_N_INSNS (1), /* int zeroed load */ 269 COSTS_N_INSNS (1), /* float load */ 270 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 271 COSTS_N_INSNS (1), /* fadd, fsub */ 272 COSTS_N_INSNS (1), /* fcmp */ 273 COSTS_N_INSNS (1), /* fmov, fmovr */ 274 COSTS_N_INSNS (1), /* fmul */ 275 COSTS_N_INSNS (1), /* fdivs */ 276 COSTS_N_INSNS (1), /* fdivd */ 277 COSTS_N_INSNS (1), /* fsqrts */ 278 COSTS_N_INSNS (1), /* fsqrtd */ 279 COSTS_N_INSNS (5), /* imul */ 280 COSTS_N_INSNS (5), /* imulX */ 281 0, /* imul bit factor */ 282 COSTS_N_INSNS (5), /* idiv */ 283 COSTS_N_INSNS (5), /* idivX */ 284 COSTS_N_INSNS (1), /* movcc/movr */ 285 0, /* shift penalty */ 286 }; 287 288 static const 289 struct processor_costs ultrasparc_costs = { 290 COSTS_N_INSNS (2), /* int load */ 291 COSTS_N_INSNS (3), /* int signed load */ 292 COSTS_N_INSNS (2), /* int zeroed load */ 293 COSTS_N_INSNS (2), /* float load */ 294 COSTS_N_INSNS (1), /* fmov, fneg, fabs */ 295 COSTS_N_INSNS (4), /* fadd, fsub */ 296 COSTS_N_INSNS (1), /* fcmp */ 297 COSTS_N_INSNS (2), /* fmov, fmovr */ 298 COSTS_N_INSNS (4), /* fmul */ 299 COSTS_N_INSNS (13), /* fdivs */ 300 COSTS_N_INSNS (23), /* fdivd */ 301 COSTS_N_INSNS (13), /* fsqrts */ 302 COSTS_N_INSNS (23), /* fsqrtd */ 303 COSTS_N_INSNS (4), /* imul */ 304 COSTS_N_INSNS (4), /* imulX */ 305 2, /* imul bit factor */ 306 COSTS_N_INSNS (37), /* idiv */ 307 COSTS_N_INSNS (68), /* idivX */ 308 COSTS_N_INSNS (2), /* movcc/movr */ 309 2, /* shift penalty */ 310 }; 311 312 static const 313 struct processor_costs ultrasparc3_costs = { 314 COSTS_N_INSNS (2), /* int load */ 315 COSTS_N_INSNS (3), /* int signed load */ 316 COSTS_N_INSNS (3), /* int zeroed load */ 317 COSTS_N_INSNS (2), /* float load */ 318 COSTS_N_INSNS (3), /* fmov, fneg, fabs */ 319 COSTS_N_INSNS (4), /* fadd, fsub */ 320 COSTS_N_INSNS (5), /* fcmp */ 321 COSTS_N_INSNS (3), /* fmov, fmovr */ 322 COSTS_N_INSNS (4), /* fmul */ 323 COSTS_N_INSNS (17), /* fdivs */ 324 COSTS_N_INSNS (20), /* fdivd */ 325 COSTS_N_INSNS (20), /* fsqrts */ 326 COSTS_N_INSNS (29), /* fsqrtd */ 327 COSTS_N_INSNS (6), /* imul */ 328 COSTS_N_INSNS (6), /* imulX */ 329 0, /* imul bit factor */ 330 COSTS_N_INSNS (40), /* idiv */ 331 COSTS_N_INSNS (71), /* idivX */ 332 COSTS_N_INSNS (2), /* movcc/movr */ 333 0, /* shift penalty */ 334 }; 335 336 static const 337 struct processor_costs niagara_costs = { 338 COSTS_N_INSNS (3), /* int load */ 339 COSTS_N_INSNS (3), /* int signed load */ 340 COSTS_N_INSNS (3), /* int zeroed load */ 341 COSTS_N_INSNS (9), /* float load */ 342 COSTS_N_INSNS (8), /* fmov, fneg, fabs */ 343 COSTS_N_INSNS (8), /* fadd, fsub */ 344 COSTS_N_INSNS (26), /* fcmp */ 345 COSTS_N_INSNS (8), /* fmov, fmovr */ 346 COSTS_N_INSNS (29), /* fmul */ 347 COSTS_N_INSNS (54), /* fdivs */ 348 COSTS_N_INSNS (83), /* fdivd */ 349 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ 350 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ 351 COSTS_N_INSNS (11), /* imul */ 352 COSTS_N_INSNS (11), /* imulX */ 353 0, /* imul bit factor */ 354 COSTS_N_INSNS (72), /* idiv */ 355 COSTS_N_INSNS (72), /* idivX */ 356 COSTS_N_INSNS (1), /* movcc/movr */ 357 0, /* shift penalty */ 358 }; 359 360 static const 361 struct processor_costs niagara2_costs = { 362 COSTS_N_INSNS (3), /* int load */ 363 COSTS_N_INSNS (3), /* int signed load */ 364 COSTS_N_INSNS (3), /* int zeroed load */ 365 COSTS_N_INSNS (3), /* float load */ 366 COSTS_N_INSNS (6), /* fmov, fneg, fabs */ 367 COSTS_N_INSNS (6), /* fadd, fsub */ 368 COSTS_N_INSNS (6), /* fcmp */ 369 COSTS_N_INSNS (6), /* fmov, fmovr */ 370 COSTS_N_INSNS (6), /* fmul */ 371 COSTS_N_INSNS (19), /* fdivs */ 372 COSTS_N_INSNS (33), /* fdivd */ 373 COSTS_N_INSNS (19), /* fsqrts */ 374 COSTS_N_INSNS (33), /* fsqrtd */ 375 COSTS_N_INSNS (5), /* imul */ 376 COSTS_N_INSNS (5), /* imulX */ 377 0, /* imul bit factor */ 378 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ 379 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ 380 COSTS_N_INSNS (1), /* movcc/movr */ 381 0, /* shift penalty */ 382 }; 383 384 static const 385 struct processor_costs niagara3_costs = { 386 COSTS_N_INSNS (3), /* int load */ 387 COSTS_N_INSNS (3), /* int signed load */ 388 COSTS_N_INSNS (3), /* int zeroed load */ 389 COSTS_N_INSNS (3), /* float load */ 390 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 391 COSTS_N_INSNS (9), /* fadd, fsub */ 392 COSTS_N_INSNS (9), /* fcmp */ 393 COSTS_N_INSNS (9), /* fmov, fmovr */ 394 COSTS_N_INSNS (9), /* fmul */ 395 COSTS_N_INSNS (23), /* fdivs */ 396 COSTS_N_INSNS (37), /* fdivd */ 397 COSTS_N_INSNS (23), /* fsqrts */ 398 COSTS_N_INSNS (37), /* fsqrtd */ 399 COSTS_N_INSNS (9), /* imul */ 400 COSTS_N_INSNS (9), /* imulX */ 401 0, /* imul bit factor */ 402 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ 403 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ 404 COSTS_N_INSNS (1), /* movcc/movr */ 405 0, /* shift penalty */ 406 }; 407 408 static const 409 struct processor_costs niagara4_costs = { 410 COSTS_N_INSNS (5), /* int load */ 411 COSTS_N_INSNS (5), /* int signed load */ 412 COSTS_N_INSNS (5), /* int zeroed load */ 413 COSTS_N_INSNS (5), /* float load */ 414 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 415 COSTS_N_INSNS (11), /* fadd, fsub */ 416 COSTS_N_INSNS (11), /* fcmp */ 417 COSTS_N_INSNS (11), /* fmov, fmovr */ 418 COSTS_N_INSNS (11), /* fmul */ 419 COSTS_N_INSNS (24), /* fdivs */ 420 COSTS_N_INSNS (37), /* fdivd */ 421 COSTS_N_INSNS (24), /* fsqrts */ 422 COSTS_N_INSNS (37), /* fsqrtd */ 423 COSTS_N_INSNS (12), /* imul */ 424 COSTS_N_INSNS (12), /* imulX */ 425 0, /* imul bit factor */ 426 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ 427 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 428 COSTS_N_INSNS (1), /* movcc/movr */ 429 0, /* shift penalty */ 430 }; 431 432 static const 433 struct processor_costs niagara7_costs = { 434 COSTS_N_INSNS (5), /* int load */ 435 COSTS_N_INSNS (5), /* int signed load */ 436 COSTS_N_INSNS (5), /* int zeroed load */ 437 COSTS_N_INSNS (5), /* float load */ 438 COSTS_N_INSNS (11), /* fmov, fneg, fabs */ 439 COSTS_N_INSNS (11), /* fadd, fsub */ 440 COSTS_N_INSNS (11), /* fcmp */ 441 COSTS_N_INSNS (11), /* fmov, fmovr */ 442 COSTS_N_INSNS (11), /* fmul */ 443 COSTS_N_INSNS (24), /* fdivs */ 444 COSTS_N_INSNS (37), /* fdivd */ 445 COSTS_N_INSNS (24), /* fsqrts */ 446 COSTS_N_INSNS (37), /* fsqrtd */ 447 COSTS_N_INSNS (12), /* imul */ 448 COSTS_N_INSNS (12), /* imulX */ 449 0, /* imul bit factor */ 450 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */ 451 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ 452 COSTS_N_INSNS (1), /* movcc/movr */ 453 0, /* shift penalty */ 454 }; 455 456 static const 457 struct processor_costs m8_costs = { 458 COSTS_N_INSNS (3), /* int load */ 459 COSTS_N_INSNS (3), /* int signed load */ 460 COSTS_N_INSNS (3), /* int zeroed load */ 461 COSTS_N_INSNS (3), /* float load */ 462 COSTS_N_INSNS (9), /* fmov, fneg, fabs */ 463 COSTS_N_INSNS (9), /* fadd, fsub */ 464 COSTS_N_INSNS (9), /* fcmp */ 465 COSTS_N_INSNS (9), /* fmov, fmovr */ 466 COSTS_N_INSNS (9), /* fmul */ 467 COSTS_N_INSNS (26), /* fdivs */ 468 COSTS_N_INSNS (30), /* fdivd */ 469 COSTS_N_INSNS (33), /* fsqrts */ 470 COSTS_N_INSNS (41), /* fsqrtd */ 471 COSTS_N_INSNS (12), /* imul */ 472 COSTS_N_INSNS (10), /* imulX */ 473 0, /* imul bit factor */ 474 COSTS_N_INSNS (57), /* udiv/sdiv */ 475 COSTS_N_INSNS (30), /* udivx/sdivx */ 476 COSTS_N_INSNS (1), /* movcc/movr */ 477 0, /* shift penalty */ 478 }; 479 480 static const struct processor_costs *sparc_costs = &cypress_costs; 481 482 #ifdef HAVE_AS_RELAX_OPTION 483 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use 484 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. 485 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if 486 somebody does not branch between the sethi and jmp. */ 487 #define LEAF_SIBCALL_SLOT_RESERVED_P 1 488 #else 489 #define LEAF_SIBCALL_SLOT_RESERVED_P \ 490 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) 491 #endif 492 493 /* Vector to say how input registers are mapped to output registers. 494 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to 495 eliminate it. You must use -fomit-frame-pointer to get that. */ 496 char leaf_reg_remap[] = 497 { 0, 1, 2, 3, 4, 5, 6, 7, 498 -1, -1, -1, -1, -1, -1, 14, -1, 499 -1, -1, -1, -1, -1, -1, -1, -1, 500 8, 9, 10, 11, 12, 13, -1, 15, 501 502 32, 33, 34, 35, 36, 37, 38, 39, 503 40, 41, 42, 43, 44, 45, 46, 47, 504 48, 49, 50, 51, 52, 53, 54, 55, 505 56, 57, 58, 59, 60, 61, 62, 63, 506 64, 65, 66, 67, 68, 69, 70, 71, 507 72, 73, 74, 75, 76, 77, 78, 79, 508 80, 81, 82, 83, 84, 85, 86, 87, 509 88, 89, 90, 91, 92, 93, 94, 95, 510 96, 97, 98, 99, 100, 101, 102}; 511 512 /* Vector, indexed by hard register number, which contains 1 513 for a register that is allowable in a candidate for leaf 514 function treatment. */ 515 char sparc_leaf_regs[] = 516 { 1, 1, 1, 1, 1, 1, 1, 1, 517 0, 0, 0, 0, 0, 0, 1, 0, 518 0, 0, 0, 0, 0, 0, 0, 0, 519 1, 1, 1, 1, 1, 1, 0, 1, 520 1, 1, 1, 1, 1, 1, 1, 1, 521 1, 1, 1, 1, 1, 1, 1, 1, 522 1, 1, 1, 1, 1, 1, 1, 1, 523 1, 1, 1, 1, 1, 1, 1, 1, 524 1, 1, 1, 1, 1, 1, 1, 1, 525 1, 1, 1, 1, 1, 1, 1, 1, 526 1, 1, 1, 1, 1, 1, 1, 1, 527 1, 1, 1, 1, 1, 1, 1, 1, 528 1, 1, 1, 1, 1, 1, 1}; 529 530 struct GTY(()) machine_function 531 { 532 /* Size of the frame of the function. */ 533 HOST_WIDE_INT frame_size; 534 535 /* Size of the frame of the function minus the register window save area 536 and the outgoing argument area. */ 537 HOST_WIDE_INT apparent_frame_size; 538 539 /* Register we pretend the frame pointer is allocated to. Normally, this 540 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We 541 record "offset" separately as it may be too big for (reg + disp). */ 542 rtx frame_base_reg; 543 HOST_WIDE_INT frame_base_offset; 544 545 /* Number of global or FP registers to be saved (as 4-byte quantities). */ 546 int n_global_fp_regs; 547 548 /* True if the current function is leaf and uses only leaf regs, 549 so that the SPARC leaf function optimization can be applied. 550 Private version of crtl->uses_only_leaf_regs, see 551 sparc_expand_prologue for the rationale. */ 552 int leaf_function_p; 553 554 /* True if the prologue saves local or in registers. */ 555 bool save_local_in_regs_p; 556 557 /* True if the data calculated by sparc_expand_prologue are valid. */ 558 bool prologue_data_valid_p; 559 }; 560 561 #define sparc_frame_size cfun->machine->frame_size 562 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size 563 #define sparc_frame_base_reg cfun->machine->frame_base_reg 564 #define sparc_frame_base_offset cfun->machine->frame_base_offset 565 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs 566 #define sparc_leaf_function_p cfun->machine->leaf_function_p 567 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p 568 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p 569 570 /* 1 if the next opcode is to be specially indented. */ 571 int sparc_indent_opcode = 0; 572 573 static void sparc_option_override (void); 574 static void sparc_init_modes (void); 575 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, 576 const_tree, bool, bool, int *, int *); 577 578 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 579 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); 580 581 static void sparc_emit_set_const32 (rtx, rtx); 582 static void sparc_emit_set_const64 (rtx, rtx); 583 static void sparc_output_addr_vec (rtx); 584 static void sparc_output_addr_diff_vec (rtx); 585 static void sparc_output_deferred_case_vectors (void); 586 static bool sparc_legitimate_address_p (machine_mode, rtx, bool); 587 static bool sparc_legitimate_constant_p (machine_mode, rtx); 588 static rtx sparc_builtin_saveregs (void); 589 static int epilogue_renumber (rtx *, int); 590 static bool sparc_assemble_integer (rtx, unsigned int, int); 591 static int set_extends (rtx_insn *); 592 static void sparc_asm_function_prologue (FILE *); 593 static void sparc_asm_function_epilogue (FILE *); 594 #ifdef TARGET_SOLARIS 595 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, 596 tree) ATTRIBUTE_UNUSED; 597 #endif 598 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); 599 static int sparc_issue_rate (void); 600 static void sparc_sched_init (FILE *, int, int); 601 static int sparc_use_sched_lookahead (void); 602 603 static void emit_soft_tfmode_libcall (const char *, int, rtx *); 604 static void emit_soft_tfmode_binop (enum rtx_code, rtx *); 605 static void emit_soft_tfmode_unop (enum rtx_code, rtx *); 606 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); 607 static void emit_hard_tfmode_operation (enum rtx_code, rtx *); 608 609 static bool sparc_function_ok_for_sibcall (tree, tree); 610 static void sparc_init_libfuncs (void); 611 static void sparc_init_builtins (void); 612 static void sparc_fpu_init_builtins (void); 613 static void sparc_vis_init_builtins (void); 614 static tree sparc_builtin_decl (unsigned, bool); 615 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int); 616 static tree sparc_fold_builtin (tree, int, tree *, bool); 617 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, 618 HOST_WIDE_INT, tree); 619 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, 620 HOST_WIDE_INT, const_tree); 621 static struct machine_function * sparc_init_machine_status (void); 622 static bool sparc_cannot_force_const_mem (machine_mode, rtx); 623 static rtx sparc_tls_get_addr (void); 624 static rtx sparc_tls_got (void); 625 static int sparc_register_move_cost (machine_mode, 626 reg_class_t, reg_class_t); 627 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool); 628 static rtx sparc_function_value (const_tree, const_tree, bool); 629 static rtx sparc_libcall_value (machine_mode, const_rtx); 630 static bool sparc_function_value_regno_p (const unsigned int); 631 static rtx sparc_struct_value_rtx (tree, int); 632 static machine_mode sparc_promote_function_mode (const_tree, machine_mode, 633 int *, const_tree, int); 634 static bool sparc_return_in_memory (const_tree, const_tree); 635 static bool sparc_strict_argument_naming (cumulative_args_t); 636 static void sparc_va_start (tree, rtx); 637 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); 638 static bool sparc_vector_mode_supported_p (machine_mode); 639 static bool sparc_tls_referenced_p (rtx); 640 static rtx sparc_legitimize_tls_address (rtx); 641 static rtx sparc_legitimize_pic_address (rtx, rtx); 642 static rtx sparc_legitimize_address (rtx, rtx, machine_mode); 643 static rtx sparc_delegitimize_address (rtx); 644 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); 645 static bool sparc_pass_by_reference (cumulative_args_t, 646 machine_mode, const_tree, bool); 647 static void sparc_function_arg_advance (cumulative_args_t, 648 machine_mode, const_tree, bool); 649 static rtx sparc_function_arg_1 (cumulative_args_t, 650 machine_mode, const_tree, bool, bool); 651 static rtx sparc_function_arg (cumulative_args_t, 652 machine_mode, const_tree, bool); 653 static rtx sparc_function_incoming_arg (cumulative_args_t, 654 machine_mode, const_tree, bool); 655 static pad_direction sparc_function_arg_padding (machine_mode, const_tree); 656 static unsigned int sparc_function_arg_boundary (machine_mode, 657 const_tree); 658 static int sparc_arg_partial_bytes (cumulative_args_t, 659 machine_mode, tree, bool); 660 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 661 static void sparc_file_end (void); 662 static bool sparc_frame_pointer_required (void); 663 static bool sparc_can_eliminate (const int, const int); 664 static rtx sparc_builtin_setjmp_frame_value (void); 665 static void sparc_conditional_register_usage (void); 666 static bool sparc_use_pseudo_pic_reg (void); 667 static void sparc_init_pic_reg (void); 668 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 669 static const char *sparc_mangle_type (const_tree); 670 #endif 671 static void sparc_trampoline_init (rtx, tree, rtx); 672 static machine_mode sparc_preferred_simd_mode (scalar_mode); 673 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); 674 static bool sparc_lra_p (void); 675 static bool sparc_print_operand_punct_valid_p (unsigned char); 676 static void sparc_print_operand (FILE *, rtx, int); 677 static void sparc_print_operand_address (FILE *, machine_mode, rtx); 678 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, 679 machine_mode, 680 secondary_reload_info *); 681 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t, 682 reg_class_t); 683 static machine_mode sparc_secondary_memory_needed_mode (machine_mode); 684 static scalar_int_mode sparc_cstore_mode (enum insn_code icode); 685 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *); 686 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *); 687 static unsigned int sparc_min_arithmetic_precision (void); 688 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode); 689 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode); 690 static bool sparc_modes_tieable_p (machine_mode, machine_mode); 691 static bool sparc_can_change_mode_class (machine_mode, machine_mode, 692 reg_class_t); 693 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); 694 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, 695 const vec_perm_indices &); 696 697 #ifdef SUBTARGET_ATTRIBUTE_TABLE 698 /* Table of valid machine attributes. */ 699 static const struct attribute_spec sparc_attribute_table[] = 700 { 701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, 702 do_diagnostic, handler, exclude } */ 703 SUBTARGET_ATTRIBUTE_TABLE, 704 { NULL, 0, 0, false, false, false, false, NULL, NULL } 705 }; 706 #endif 707 708 /* Option handling. */ 709 710 /* Parsed value. */ 711 enum cmodel sparc_cmodel; 712 713 char sparc_hard_reg_printed[8]; 714 715 /* Initialize the GCC target structure. */ 716 717 /* The default is to use .half rather than .short for aligned HI objects. */ 718 #undef TARGET_ASM_ALIGNED_HI_OP 719 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" 720 721 #undef TARGET_ASM_UNALIGNED_HI_OP 722 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" 723 #undef TARGET_ASM_UNALIGNED_SI_OP 724 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" 725 #undef TARGET_ASM_UNALIGNED_DI_OP 726 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" 727 728 /* The target hook has to handle DI-mode values. */ 729 #undef TARGET_ASM_INTEGER 730 #define TARGET_ASM_INTEGER sparc_assemble_integer 731 732 #undef TARGET_ASM_FUNCTION_PROLOGUE 733 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue 734 #undef TARGET_ASM_FUNCTION_EPILOGUE 735 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue 736 737 #undef TARGET_SCHED_ADJUST_COST 738 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost 739 #undef TARGET_SCHED_ISSUE_RATE 740 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate 741 #undef TARGET_SCHED_INIT 742 #define TARGET_SCHED_INIT sparc_sched_init 743 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 744 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead 745 746 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 747 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall 748 749 #undef TARGET_INIT_LIBFUNCS 750 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs 751 752 #undef TARGET_LEGITIMIZE_ADDRESS 753 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address 754 #undef TARGET_DELEGITIMIZE_ADDRESS 755 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address 756 #undef TARGET_MODE_DEPENDENT_ADDRESS_P 757 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p 758 759 #undef TARGET_INIT_BUILTINS 760 #define TARGET_INIT_BUILTINS sparc_init_builtins 761 #undef TARGET_BUILTIN_DECL 762 #define TARGET_BUILTIN_DECL sparc_builtin_decl 763 #undef TARGET_EXPAND_BUILTIN 764 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin 765 #undef TARGET_FOLD_BUILTIN 766 #define TARGET_FOLD_BUILTIN sparc_fold_builtin 767 768 #if TARGET_TLS 769 #undef TARGET_HAVE_TLS 770 #define TARGET_HAVE_TLS true 771 #endif 772 773 #undef TARGET_CANNOT_FORCE_CONST_MEM 774 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem 775 776 #undef TARGET_ASM_OUTPUT_MI_THUNK 777 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk 778 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 779 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk 780 781 #undef TARGET_RTX_COSTS 782 #define TARGET_RTX_COSTS sparc_rtx_costs 783 #undef TARGET_ADDRESS_COST 784 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 785 #undef TARGET_REGISTER_MOVE_COST 786 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost 787 788 #undef TARGET_PROMOTE_FUNCTION_MODE 789 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode 790 791 #undef TARGET_FUNCTION_VALUE 792 #define TARGET_FUNCTION_VALUE sparc_function_value 793 #undef TARGET_LIBCALL_VALUE 794 #define TARGET_LIBCALL_VALUE sparc_libcall_value 795 #undef TARGET_FUNCTION_VALUE_REGNO_P 796 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p 797 798 #undef TARGET_STRUCT_VALUE_RTX 799 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx 800 #undef TARGET_RETURN_IN_MEMORY 801 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory 802 #undef TARGET_MUST_PASS_IN_STACK 803 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size 804 #undef TARGET_PASS_BY_REFERENCE 805 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference 806 #undef TARGET_ARG_PARTIAL_BYTES 807 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes 808 #undef TARGET_FUNCTION_ARG_ADVANCE 809 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance 810 #undef TARGET_FUNCTION_ARG 811 #define TARGET_FUNCTION_ARG sparc_function_arg 812 #undef TARGET_FUNCTION_INCOMING_ARG 813 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg 814 #undef TARGET_FUNCTION_ARG_PADDING 815 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding 816 #undef TARGET_FUNCTION_ARG_BOUNDARY 817 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary 818 819 #undef TARGET_EXPAND_BUILTIN_SAVEREGS 820 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs 821 #undef TARGET_STRICT_ARGUMENT_NAMING 822 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming 823 824 #undef TARGET_EXPAND_BUILTIN_VA_START 825 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start 826 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 827 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg 828 829 #undef TARGET_VECTOR_MODE_SUPPORTED_P 830 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p 831 832 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE 833 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode 834 835 #ifdef SUBTARGET_INSERT_ATTRIBUTES 836 #undef TARGET_INSERT_ATTRIBUTES 837 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 838 #endif 839 840 #ifdef SUBTARGET_ATTRIBUTE_TABLE 841 #undef TARGET_ATTRIBUTE_TABLE 842 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table 843 #endif 844 845 #undef TARGET_OPTION_OVERRIDE 846 #define TARGET_OPTION_OVERRIDE sparc_option_override 847 848 #ifdef TARGET_THREAD_SSP_OFFSET 849 #undef TARGET_STACK_PROTECT_GUARD 850 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null 851 #endif 852 853 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) 854 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 855 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel 856 #endif 857 858 #undef TARGET_ASM_FILE_END 859 #define TARGET_ASM_FILE_END sparc_file_end 860 861 #undef TARGET_FRAME_POINTER_REQUIRED 862 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required 863 864 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 865 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value 866 867 #undef TARGET_CAN_ELIMINATE 868 #define TARGET_CAN_ELIMINATE sparc_can_eliminate 869 870 #undef TARGET_PREFERRED_RELOAD_CLASS 871 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class 872 873 #undef TARGET_SECONDARY_RELOAD 874 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload 875 #undef TARGET_SECONDARY_MEMORY_NEEDED 876 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed 877 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE 878 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode 879 880 #undef TARGET_CONDITIONAL_REGISTER_USAGE 881 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage 882 883 #undef TARGET_INIT_PIC_REG 884 #define TARGET_INIT_PIC_REG sparc_init_pic_reg 885 886 #undef TARGET_USE_PSEUDO_PIC_REG 887 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg 888 889 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 890 #undef TARGET_MANGLE_TYPE 891 #define TARGET_MANGLE_TYPE sparc_mangle_type 892 #endif 893 894 #undef TARGET_LRA_P 895 #define TARGET_LRA_P sparc_lra_p 896 897 #undef TARGET_LEGITIMATE_ADDRESS_P 898 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p 899 900 #undef TARGET_LEGITIMATE_CONSTANT_P 901 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p 902 903 #undef TARGET_TRAMPOLINE_INIT 904 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init 905 906 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P 907 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p 908 #undef TARGET_PRINT_OPERAND 909 #define TARGET_PRINT_OPERAND sparc_print_operand 910 #undef TARGET_PRINT_OPERAND_ADDRESS 911 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address 912 913 /* The value stored by LDSTUB. */ 914 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 915 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff 916 917 #undef TARGET_CSTORE_MODE 918 #define TARGET_CSTORE_MODE sparc_cstore_mode 919 920 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV 921 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv 922 923 #undef TARGET_FIXED_CONDITION_CODE_REGS 924 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs 925 926 #undef TARGET_MIN_ARITHMETIC_PRECISION 927 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision 928 929 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS 930 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 931 932 #undef TARGET_HARD_REGNO_NREGS 933 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs 934 #undef TARGET_HARD_REGNO_MODE_OK 935 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok 936 937 #undef TARGET_MODES_TIEABLE_P 938 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p 939 940 #undef TARGET_CAN_CHANGE_MODE_CLASS 941 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class 942 943 #undef TARGET_CONSTANT_ALIGNMENT 944 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment 945 946 #undef TARGET_VECTORIZE_VEC_PERM_CONST 947 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const 948 949 struct gcc_target targetm = TARGET_INITIALIZER; 950 951 /* Return the memory reference contained in X if any, zero otherwise. */ 952 953 static rtx 954 mem_ref (rtx x) 955 { 956 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) 957 x = XEXP (x, 0); 958 959 if (MEM_P (x)) 960 return x; 961 962 return NULL_RTX; 963 } 964 965 /* True if any of INSN's source register(s) is REG. */ 966 967 static bool 968 insn_uses_reg_p (rtx_insn *insn, unsigned int reg) 969 { 970 extract_insn (insn); 971 return ((REG_P (recog_data.operand[1]) 972 && REGNO (recog_data.operand[1]) == reg) 973 || (recog_data.n_operands == 3 974 && REG_P (recog_data.operand[2]) 975 && REGNO (recog_data.operand[2]) == reg)); 976 } 977 978 /* True if INSN is a floating-point division or square-root. */ 979 980 static bool 981 div_sqrt_insn_p (rtx_insn *insn) 982 { 983 if (GET_CODE (PATTERN (insn)) != SET) 984 return false; 985 986 switch (get_attr_type (insn)) 987 { 988 case TYPE_FPDIVS: 989 case TYPE_FPSQRTS: 990 case TYPE_FPDIVD: 991 case TYPE_FPSQRTD: 992 return true; 993 default: 994 return false; 995 } 996 } 997 998 /* True if INSN is a floating-point instruction. */ 999 1000 static bool 1001 fpop_insn_p (rtx_insn *insn) 1002 { 1003 if (GET_CODE (PATTERN (insn)) != SET) 1004 return false; 1005 1006 switch (get_attr_type (insn)) 1007 { 1008 case TYPE_FPMOVE: 1009 case TYPE_FPCMOVE: 1010 case TYPE_FP: 1011 case TYPE_FPCMP: 1012 case TYPE_FPMUL: 1013 case TYPE_FPDIVS: 1014 case TYPE_FPSQRTS: 1015 case TYPE_FPDIVD: 1016 case TYPE_FPSQRTD: 1017 return true; 1018 default: 1019 return false; 1020 } 1021 } 1022 1023 /* True if INSN is an atomic instruction. */ 1024 1025 static bool 1026 atomic_insn_for_leon3_p (rtx_insn *insn) 1027 { 1028 switch (INSN_CODE (insn)) 1029 { 1030 case CODE_FOR_swapsi: 1031 case CODE_FOR_ldstub: 1032 case CODE_FOR_atomic_compare_and_swap_leon3_1: 1033 return true; 1034 default: 1035 return false; 1036 } 1037 } 1038 1039 /* We use a machine specific pass to enable workarounds for errata. 1040 1041 We need to have the (essentially) final form of the insn stream in order 1042 to properly detect the various hazards. Therefore, this machine specific 1043 pass runs as late as possible. */ 1044 1045 /* True if INSN is a md pattern or asm statement. */ 1046 #define USEFUL_INSN_P(INSN) \ 1047 (NONDEBUG_INSN_P (INSN) \ 1048 && GET_CODE (PATTERN (INSN)) != USE \ 1049 && GET_CODE (PATTERN (INSN)) != CLOBBER) 1050 1051 static unsigned int 1052 sparc_do_work_around_errata (void) 1053 { 1054 rtx_insn *insn, *next; 1055 1056 /* Force all instructions to be split into their final form. */ 1057 split_all_insns_noflow (); 1058 1059 /* Now look for specific patterns in the insn stream. */ 1060 for (insn = get_insns (); insn; insn = next) 1061 { 1062 bool insert_nop = false; 1063 rtx set; 1064 rtx_insn *jump; 1065 rtx_sequence *seq; 1066 1067 /* Look into the instruction in a delay slot. */ 1068 if (NONJUMP_INSN_P (insn) 1069 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))) 1070 { 1071 jump = seq->insn (0); 1072 insn = seq->insn (1); 1073 } 1074 else if (JUMP_P (insn)) 1075 jump = insn; 1076 else 1077 jump = NULL; 1078 1079 /* Place a NOP at the branch target of an integer branch if it is a 1080 floating-point operation or a floating-point branch. */ 1081 if (sparc_fix_gr712rc 1082 && jump 1083 && jump_to_label_p (jump) 1084 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC) 1085 { 1086 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1087 if (target 1088 && (fpop_insn_p (target) 1089 || (JUMP_P (target) 1090 && get_attr_branch_type (target) == BRANCH_TYPE_FCC))) 1091 emit_insn_before (gen_nop (), target); 1092 } 1093 1094 /* Insert a NOP between load instruction and atomic instruction. Insert 1095 a NOP at branch target if there is a load in delay slot and an atomic 1096 instruction at branch target. */ 1097 if (sparc_fix_ut700 1098 && NONJUMP_INSN_P (insn) 1099 && (set = single_set (insn)) != NULL_RTX 1100 && mem_ref (SET_SRC (set)) 1101 && REG_P (SET_DEST (set))) 1102 { 1103 if (jump && jump_to_label_p (jump)) 1104 { 1105 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); 1106 if (target && atomic_insn_for_leon3_p (target)) 1107 emit_insn_before (gen_nop (), target); 1108 } 1109 1110 next = next_active_insn (insn); 1111 if (!next) 1112 break; 1113 1114 if (atomic_insn_for_leon3_p (next)) 1115 insert_nop = true; 1116 } 1117 1118 /* Look for a sequence that starts with a fdiv or fsqrt instruction and 1119 ends with another fdiv or fsqrt instruction with no dependencies on 1120 the former, along with an appropriate pattern in between. */ 1121 if (sparc_fix_lost_divsqrt 1122 && NONJUMP_INSN_P (insn) 1123 && div_sqrt_insn_p (insn)) 1124 { 1125 int i; 1126 int fp_found = 0; 1127 rtx_insn *after; 1128 1129 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn))); 1130 1131 next = next_active_insn (insn); 1132 if (!next) 1133 break; 1134 1135 for (after = next, i = 0; i < 4; i++) 1136 { 1137 /* Count floating-point operations. */ 1138 if (i != 3 && fpop_insn_p (after)) 1139 { 1140 /* If the insn uses the destination register of 1141 the div/sqrt, then it cannot be problematic. */ 1142 if (insn_uses_reg_p (after, dest_reg)) 1143 break; 1144 fp_found++; 1145 } 1146 1147 /* Count floating-point loads. */ 1148 if (i != 3 1149 && (set = single_set (after)) != NULL_RTX 1150 && REG_P (SET_DEST (set)) 1151 && REGNO (SET_DEST (set)) > 31) 1152 { 1153 /* If the insn uses the destination register of 1154 the div/sqrt, then it cannot be problematic. */ 1155 if (REGNO (SET_DEST (set)) == dest_reg) 1156 break; 1157 fp_found++; 1158 } 1159 1160 /* Check if this is a problematic sequence. */ 1161 if (i > 1 1162 && fp_found >= 2 1163 && div_sqrt_insn_p (after)) 1164 { 1165 /* If this is the short version of the problematic 1166 sequence we add two NOPs in a row to also prevent 1167 the long version. */ 1168 if (i == 2) 1169 emit_insn_before (gen_nop (), next); 1170 insert_nop = true; 1171 break; 1172 } 1173 1174 /* No need to scan past a second div/sqrt. */ 1175 if (div_sqrt_insn_p (after)) 1176 break; 1177 1178 /* Insert NOP before branch. */ 1179 if (i < 3 1180 && (!NONJUMP_INSN_P (after) 1181 || GET_CODE (PATTERN (after)) == SEQUENCE)) 1182 { 1183 insert_nop = true; 1184 break; 1185 } 1186 1187 after = next_active_insn (after); 1188 if (!after) 1189 break; 1190 } 1191 } 1192 1193 /* Look for either of these two sequences: 1194 1195 Sequence A: 1196 1. store of word size or less (e.g. st / stb / sth / stf) 1197 2. any single instruction that is not a load or store 1198 3. any store instruction (e.g. st / stb / sth / stf / std / stdf) 1199 1200 Sequence B: 1201 1. store of double word size (e.g. std / stdf) 1202 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */ 1203 if (sparc_fix_b2bst 1204 && NONJUMP_INSN_P (insn) 1205 && (set = single_set (insn)) != NULL_RTX 1206 && MEM_P (SET_DEST (set))) 1207 { 1208 /* Sequence B begins with a double-word store. */ 1209 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8; 1210 rtx_insn *after; 1211 int i; 1212 1213 next = next_active_insn (insn); 1214 if (!next) 1215 break; 1216 1217 for (after = next, i = 0; i < 2; i++) 1218 { 1219 /* Skip empty assembly statements. */ 1220 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE) 1221 || (USEFUL_INSN_P (after) 1222 && (asm_noperands (PATTERN (after))>=0) 1223 && !strcmp (decode_asm_operands (PATTERN (after), 1224 NULL, NULL, NULL, 1225 NULL, NULL), ""))) 1226 after = next_active_insn (after); 1227 if (!after) 1228 break; 1229 1230 /* If the insn is a branch, then it cannot be problematic. */ 1231 if (!NONJUMP_INSN_P (after) 1232 || GET_CODE (PATTERN (after)) == SEQUENCE) 1233 break; 1234 1235 /* Sequence B is only two instructions long. */ 1236 if (seq_b) 1237 { 1238 /* Add NOP if followed by a store. */ 1239 if ((set = single_set (after)) != NULL_RTX 1240 && MEM_P (SET_DEST (set))) 1241 insert_nop = true; 1242 1243 /* Otherwise it is ok. */ 1244 break; 1245 } 1246 1247 /* If the second instruction is a load or a store, 1248 then the sequence cannot be problematic. */ 1249 if (i == 0) 1250 { 1251 if ((set = single_set (after)) != NULL_RTX 1252 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) 1253 break; 1254 1255 after = next_active_insn (after); 1256 if (!after) 1257 break; 1258 } 1259 1260 /* Add NOP if third instruction is a store. */ 1261 if (i == 1 1262 && (set = single_set (after)) != NULL_RTX 1263 && MEM_P (SET_DEST (set))) 1264 insert_nop = true; 1265 } 1266 } 1267 1268 /* Look for a single-word load into an odd-numbered FP register. */ 1269 else if (sparc_fix_at697f 1270 && NONJUMP_INSN_P (insn) 1271 && (set = single_set (insn)) != NULL_RTX 1272 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1273 && mem_ref (SET_SRC (set)) 1274 && REG_P (SET_DEST (set)) 1275 && REGNO (SET_DEST (set)) > 31 1276 && REGNO (SET_DEST (set)) % 2 != 0) 1277 { 1278 /* The wrong dependency is on the enclosing double register. */ 1279 const unsigned int x = REGNO (SET_DEST (set)) - 1; 1280 unsigned int src1, src2, dest; 1281 int code; 1282 1283 next = next_active_insn (insn); 1284 if (!next) 1285 break; 1286 /* If the insn is a branch, then it cannot be problematic. */ 1287 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1288 continue; 1289 1290 extract_insn (next); 1291 code = INSN_CODE (next); 1292 1293 switch (code) 1294 { 1295 case CODE_FOR_adddf3: 1296 case CODE_FOR_subdf3: 1297 case CODE_FOR_muldf3: 1298 case CODE_FOR_divdf3: 1299 dest = REGNO (recog_data.operand[0]); 1300 src1 = REGNO (recog_data.operand[1]); 1301 src2 = REGNO (recog_data.operand[2]); 1302 if (src1 != src2) 1303 { 1304 /* Case [1-4]: 1305 ld [address], %fx+1 1306 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ 1307 if ((src1 == x || src2 == x) 1308 && (dest == src1 || dest == src2)) 1309 insert_nop = true; 1310 } 1311 else 1312 { 1313 /* Case 5: 1314 ld [address], %fx+1 1315 FPOPd %fx, %fx, %fx */ 1316 if (src1 == x 1317 && dest == src1 1318 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) 1319 insert_nop = true; 1320 } 1321 break; 1322 1323 case CODE_FOR_sqrtdf2: 1324 dest = REGNO (recog_data.operand[0]); 1325 src1 = REGNO (recog_data.operand[1]); 1326 /* Case 6: 1327 ld [address], %fx+1 1328 fsqrtd %fx, %fx */ 1329 if (src1 == x && dest == src1) 1330 insert_nop = true; 1331 break; 1332 1333 default: 1334 break; 1335 } 1336 } 1337 1338 /* Look for a single-word load into an integer register. */ 1339 else if (sparc_fix_ut699 1340 && NONJUMP_INSN_P (insn) 1341 && (set = single_set (insn)) != NULL_RTX 1342 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 1343 && (mem_ref (SET_SRC (set)) != NULL_RTX 1344 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op) 1345 && REG_P (SET_DEST (set)) 1346 && REGNO (SET_DEST (set)) < 32) 1347 { 1348 /* There is no problem if the second memory access has a data 1349 dependency on the first single-cycle load. */ 1350 rtx x = SET_DEST (set); 1351 1352 next = next_active_insn (insn); 1353 if (!next) 1354 break; 1355 /* If the insn is a branch, then it cannot be problematic. */ 1356 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1357 continue; 1358 1359 /* Look for a second memory access to/from an integer register. */ 1360 if ((set = single_set (next)) != NULL_RTX) 1361 { 1362 rtx src = SET_SRC (set); 1363 rtx dest = SET_DEST (set); 1364 rtx mem; 1365 1366 /* LDD is affected. */ 1367 if ((mem = mem_ref (src)) != NULL_RTX 1368 && REG_P (dest) 1369 && REGNO (dest) < 32 1370 && !reg_mentioned_p (x, XEXP (mem, 0))) 1371 insert_nop = true; 1372 1373 /* STD is *not* affected. */ 1374 else if (MEM_P (dest) 1375 && GET_MODE_SIZE (GET_MODE (dest)) <= 4 1376 && (src == CONST0_RTX (GET_MODE (dest)) 1377 || (REG_P (src) 1378 && REGNO (src) < 32 1379 && REGNO (src) != REGNO (x))) 1380 && !reg_mentioned_p (x, XEXP (dest, 0))) 1381 insert_nop = true; 1382 1383 /* GOT accesses uses LD. */ 1384 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op 1385 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1))) 1386 insert_nop = true; 1387 } 1388 } 1389 1390 /* Look for a single-word load/operation into an FP register. */ 1391 else if (sparc_fix_ut699 1392 && NONJUMP_INSN_P (insn) 1393 && (set = single_set (insn)) != NULL_RTX 1394 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1395 && REG_P (SET_DEST (set)) 1396 && REGNO (SET_DEST (set)) > 31) 1397 { 1398 /* Number of instructions in the problematic window. */ 1399 const int n_insns = 4; 1400 /* The problematic combination is with the sibling FP register. */ 1401 const unsigned int x = REGNO (SET_DEST (set)); 1402 const unsigned int y = x ^ 1; 1403 rtx_insn *after; 1404 int i; 1405 1406 next = next_active_insn (insn); 1407 if (!next) 1408 break; 1409 /* If the insn is a branch, then it cannot be problematic. */ 1410 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) 1411 continue; 1412 1413 /* Look for a second load/operation into the sibling FP register. */ 1414 if (!((set = single_set (next)) != NULL_RTX 1415 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 1416 && REG_P (SET_DEST (set)) 1417 && REGNO (SET_DEST (set)) == y)) 1418 continue; 1419 1420 /* Look for a (possible) store from the FP register in the next N 1421 instructions, but bail out if it is again modified or if there 1422 is a store from the sibling FP register before this store. */ 1423 for (after = next, i = 0; i < n_insns; i++) 1424 { 1425 bool branch_p; 1426 1427 after = next_active_insn (after); 1428 if (!after) 1429 break; 1430 1431 /* This is a branch with an empty delay slot. */ 1432 if (!NONJUMP_INSN_P (after)) 1433 { 1434 if (++i == n_insns) 1435 break; 1436 branch_p = true; 1437 after = NULL; 1438 } 1439 /* This is a branch with a filled delay slot. */ 1440 else if (rtx_sequence *seq = 1441 dyn_cast <rtx_sequence *> (PATTERN (after))) 1442 { 1443 if (++i == n_insns) 1444 break; 1445 branch_p = true; 1446 after = seq->insn (1); 1447 } 1448 /* This is a regular instruction. */ 1449 else 1450 branch_p = false; 1451 1452 if (after && (set = single_set (after)) != NULL_RTX) 1453 { 1454 const rtx src = SET_SRC (set); 1455 const rtx dest = SET_DEST (set); 1456 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); 1457 1458 /* If the FP register is again modified before the store, 1459 then the store isn't affected. */ 1460 if (REG_P (dest) 1461 && (REGNO (dest) == x 1462 || (REGNO (dest) == y && size == 8))) 1463 break; 1464 1465 if (MEM_P (dest) && REG_P (src)) 1466 { 1467 /* If there is a store from the sibling FP register 1468 before the store, then the store is not affected. */ 1469 if (REGNO (src) == y || (REGNO (src) == x && size == 8)) 1470 break; 1471 1472 /* Otherwise, the store is affected. */ 1473 if (REGNO (src) == x && size == 4) 1474 { 1475 insert_nop = true; 1476 break; 1477 } 1478 } 1479 } 1480 1481 /* If we have a branch in the first M instructions, then we 1482 cannot see the (M+2)th instruction so we play safe. */ 1483 if (branch_p && i <= (n_insns - 2)) 1484 { 1485 insert_nop = true; 1486 break; 1487 } 1488 } 1489 } 1490 1491 else 1492 next = NEXT_INSN (insn); 1493 1494 if (insert_nop) 1495 emit_insn_before (gen_nop (), next); 1496 } 1497 1498 return 0; 1499 } 1500 1501 namespace { 1502 1503 const pass_data pass_data_work_around_errata = 1504 { 1505 RTL_PASS, /* type */ 1506 "errata", /* name */ 1507 OPTGROUP_NONE, /* optinfo_flags */ 1508 TV_MACH_DEP, /* tv_id */ 1509 0, /* properties_required */ 1510 0, /* properties_provided */ 1511 0, /* properties_destroyed */ 1512 0, /* todo_flags_start */ 1513 0, /* todo_flags_finish */ 1514 }; 1515 1516 class pass_work_around_errata : public rtl_opt_pass 1517 { 1518 public: 1519 pass_work_around_errata(gcc::context *ctxt) 1520 : rtl_opt_pass(pass_data_work_around_errata, ctxt) 1521 {} 1522 1523 /* opt_pass methods: */ 1524 virtual bool gate (function *) 1525 { 1526 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst 1527 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt; 1528 } 1529 1530 virtual unsigned int execute (function *) 1531 { 1532 return sparc_do_work_around_errata (); 1533 } 1534 1535 }; // class pass_work_around_errata 1536 1537 } // anon namespace 1538 1539 rtl_opt_pass * 1540 make_pass_work_around_errata (gcc::context *ctxt) 1541 { 1542 return new pass_work_around_errata (ctxt); 1543 } 1544 1545 /* Helpers for TARGET_DEBUG_OPTIONS. */ 1546 static void 1547 dump_target_flag_bits (const int flags) 1548 { 1549 if (flags & MASK_64BIT) 1550 fprintf (stderr, "64BIT "); 1551 if (flags & MASK_APP_REGS) 1552 fprintf (stderr, "APP_REGS "); 1553 if (flags & MASK_FASTER_STRUCTS) 1554 fprintf (stderr, "FASTER_STRUCTS "); 1555 if (flags & MASK_FLAT) 1556 fprintf (stderr, "FLAT "); 1557 if (flags & MASK_FMAF) 1558 fprintf (stderr, "FMAF "); 1559 if (flags & MASK_FSMULD) 1560 fprintf (stderr, "FSMULD "); 1561 if (flags & MASK_FPU) 1562 fprintf (stderr, "FPU "); 1563 if (flags & MASK_HARD_QUAD) 1564 fprintf (stderr, "HARD_QUAD "); 1565 if (flags & MASK_POPC) 1566 fprintf (stderr, "POPC "); 1567 if (flags & MASK_PTR64) 1568 fprintf (stderr, "PTR64 "); 1569 if (flags & MASK_STACK_BIAS) 1570 fprintf (stderr, "STACK_BIAS "); 1571 if (flags & MASK_UNALIGNED_DOUBLES) 1572 fprintf (stderr, "UNALIGNED_DOUBLES "); 1573 if (flags & MASK_V8PLUS) 1574 fprintf (stderr, "V8PLUS "); 1575 if (flags & MASK_VIS) 1576 fprintf (stderr, "VIS "); 1577 if (flags & MASK_VIS2) 1578 fprintf (stderr, "VIS2 "); 1579 if (flags & MASK_VIS3) 1580 fprintf (stderr, "VIS3 "); 1581 if (flags & MASK_VIS4) 1582 fprintf (stderr, "VIS4 "); 1583 if (flags & MASK_VIS4B) 1584 fprintf (stderr, "VIS4B "); 1585 if (flags & MASK_CBCOND) 1586 fprintf (stderr, "CBCOND "); 1587 if (flags & MASK_DEPRECATED_V8_INSNS) 1588 fprintf (stderr, "DEPRECATED_V8_INSNS "); 1589 if (flags & MASK_SPARCLET) 1590 fprintf (stderr, "SPARCLET "); 1591 if (flags & MASK_SPARCLITE) 1592 fprintf (stderr, "SPARCLITE "); 1593 if (flags & MASK_V8) 1594 fprintf (stderr, "V8 "); 1595 if (flags & MASK_V9) 1596 fprintf (stderr, "V9 "); 1597 } 1598 1599 static void 1600 dump_target_flags (const char *prefix, const int flags) 1601 { 1602 fprintf (stderr, "%s: (%08x) [ ", prefix, flags); 1603 dump_target_flag_bits (flags); 1604 fprintf(stderr, "]\n"); 1605 } 1606 1607 /* Validate and override various options, and do some machine dependent 1608 initialization. */ 1609 1610 static void 1611 sparc_option_override (void) 1612 { 1613 static struct code_model { 1614 const char *const name; 1615 const enum cmodel value; 1616 } const cmodels[] = { 1617 { "32", CM_32 }, 1618 { "medlow", CM_MEDLOW }, 1619 { "medmid", CM_MEDMID }, 1620 { "medany", CM_MEDANY }, 1621 { "embmedany", CM_EMBMEDANY }, 1622 { NULL, (enum cmodel) 0 } 1623 }; 1624 const struct code_model *cmodel; 1625 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ 1626 static struct cpu_default { 1627 const int cpu; 1628 const enum processor_type processor; 1629 } const cpu_default[] = { 1630 /* There must be one entry here for each TARGET_CPU value. */ 1631 { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, 1632 { TARGET_CPU_v8, PROCESSOR_V8 }, 1633 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, 1634 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, 1635 { TARGET_CPU_leon, PROCESSOR_LEON }, 1636 { TARGET_CPU_leon3, PROCESSOR_LEON3 }, 1637 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, 1638 { TARGET_CPU_sparclite, PROCESSOR_F930 }, 1639 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, 1640 { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, 1641 { TARGET_CPU_v9, PROCESSOR_V9 }, 1642 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, 1643 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, 1644 { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, 1645 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, 1646 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, 1647 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, 1648 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 }, 1649 { TARGET_CPU_m8, PROCESSOR_M8 }, 1650 { -1, PROCESSOR_V7 } 1651 }; 1652 const struct cpu_default *def; 1653 /* Table of values for -m{cpu,tune}=. This must match the order of 1654 the enum processor_type in sparc-opts.h. */ 1655 static struct cpu_table { 1656 const char *const name; 1657 const int disable; 1658 const int enable; 1659 } const cpu_table[] = { 1660 { "v7", MASK_ISA|MASK_FSMULD, 0 }, 1661 { "cypress", MASK_ISA|MASK_FSMULD, 0 }, 1662 { "v8", MASK_ISA, MASK_V8 }, 1663 /* TI TMS390Z55 supersparc */ 1664 { "supersparc", MASK_ISA, MASK_V8 }, 1665 { "hypersparc", MASK_ISA, MASK_V8 }, 1666 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON }, 1667 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 }, 1668 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 }, 1669 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE }, 1670 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ 1671 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1672 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ 1673 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE }, 1674 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, 1675 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET }, 1676 /* TEMIC sparclet */ 1677 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET }, 1678 { "v9", MASK_ISA, MASK_V9 }, 1679 /* UltraSPARC I, II, IIi */ 1680 { "ultrasparc", MASK_ISA, 1681 /* Although insns using %y are deprecated, it is a clear win. */ 1682 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1683 /* UltraSPARC III */ 1684 /* ??? Check if %y issue still holds true. */ 1685 { "ultrasparc3", MASK_ISA, 1686 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, 1687 /* UltraSPARC T1 */ 1688 { "niagara", MASK_ISA, 1689 MASK_V9|MASK_DEPRECATED_V8_INSNS }, 1690 /* UltraSPARC T2 */ 1691 { "niagara2", MASK_ISA, 1692 MASK_V9|MASK_POPC|MASK_VIS2 }, 1693 /* UltraSPARC T3 */ 1694 { "niagara3", MASK_ISA, 1695 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF }, 1696 /* UltraSPARC T4 */ 1697 { "niagara4", MASK_ISA, 1698 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, 1699 /* UltraSPARC M7 */ 1700 { "niagara7", MASK_ISA, 1701 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }, 1702 /* UltraSPARC M8 */ 1703 { "m8", MASK_ISA, 1704 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B } 1705 }; 1706 const struct cpu_table *cpu; 1707 unsigned int i; 1708 1709 if (sparc_debug_string != NULL) 1710 { 1711 const char *q; 1712 char *p; 1713 1714 p = ASTRDUP (sparc_debug_string); 1715 while ((q = strtok (p, ",")) != NULL) 1716 { 1717 bool invert; 1718 int mask; 1719 1720 p = NULL; 1721 if (*q == '!') 1722 { 1723 invert = true; 1724 q++; 1725 } 1726 else 1727 invert = false; 1728 1729 if (! strcmp (q, "all")) 1730 mask = MASK_DEBUG_ALL; 1731 else if (! strcmp (q, "options")) 1732 mask = MASK_DEBUG_OPTIONS; 1733 else 1734 error ("unknown -mdebug-%s switch", q); 1735 1736 if (invert) 1737 sparc_debug &= ~mask; 1738 else 1739 sparc_debug |= mask; 1740 } 1741 } 1742 1743 /* Enable the FsMULd instruction by default if not explicitly specified by 1744 the user. It may be later disabled by the CPU (explicitly or not). */ 1745 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD)) 1746 target_flags |= MASK_FSMULD; 1747 1748 if (TARGET_DEBUG_OPTIONS) 1749 { 1750 dump_target_flags("Initial target_flags", target_flags); 1751 dump_target_flags("target_flags_explicit", target_flags_explicit); 1752 } 1753 1754 #ifdef SUBTARGET_OVERRIDE_OPTIONS 1755 SUBTARGET_OVERRIDE_OPTIONS; 1756 #endif 1757 1758 #ifndef SPARC_BI_ARCH 1759 /* Check for unsupported architecture size. */ 1760 if (!TARGET_64BIT != DEFAULT_ARCH32_P) 1761 error ("%s is not supported by this configuration", 1762 DEFAULT_ARCH32_P ? "-m64" : "-m32"); 1763 #endif 1764 1765 /* We force all 64bit archs to use 128 bit long double */ 1766 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128) 1767 { 1768 error ("-mlong-double-64 not allowed with -m64"); 1769 target_flags |= MASK_LONG_DOUBLE_128; 1770 } 1771 1772 /* Code model selection. */ 1773 sparc_cmodel = SPARC_DEFAULT_CMODEL; 1774 1775 #ifdef SPARC_BI_ARCH 1776 if (TARGET_ARCH32) 1777 sparc_cmodel = CM_32; 1778 #endif 1779 1780 if (sparc_cmodel_string != NULL) 1781 { 1782 if (TARGET_ARCH64) 1783 { 1784 for (cmodel = &cmodels[0]; cmodel->name; cmodel++) 1785 if (strcmp (sparc_cmodel_string, cmodel->name) == 0) 1786 break; 1787 if (cmodel->name == NULL) 1788 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string); 1789 else 1790 sparc_cmodel = cmodel->value; 1791 } 1792 else 1793 error ("-mcmodel= is not supported on 32-bit systems"); 1794 } 1795 1796 /* Check that -fcall-saved-REG wasn't specified for out registers. */ 1797 for (i = 8; i < 16; i++) 1798 if (!call_used_regs [i]) 1799 { 1800 error ("-fcall-saved-REG is not supported for out registers"); 1801 call_used_regs [i] = 1; 1802 } 1803 1804 /* Set the default CPU if no -mcpu option was specified. */ 1805 if (!global_options_set.x_sparc_cpu_and_features) 1806 { 1807 for (def = &cpu_default[0]; def->cpu != -1; ++def) 1808 if (def->cpu == TARGET_CPU_DEFAULT) 1809 break; 1810 gcc_assert (def->cpu != -1); 1811 sparc_cpu_and_features = def->processor; 1812 } 1813 1814 /* Set the default CPU if no -mtune option was specified. */ 1815 if (!global_options_set.x_sparc_cpu) 1816 sparc_cpu = sparc_cpu_and_features; 1817 1818 cpu = &cpu_table[(int) sparc_cpu_and_features]; 1819 1820 if (TARGET_DEBUG_OPTIONS) 1821 { 1822 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); 1823 dump_target_flags ("cpu->disable", cpu->disable); 1824 dump_target_flags ("cpu->enable", cpu->enable); 1825 } 1826 1827 target_flags &= ~cpu->disable; 1828 target_flags |= (cpu->enable 1829 #ifndef HAVE_AS_FMAF_HPC_VIS3 1830 & ~(MASK_FMAF | MASK_VIS3) 1831 #endif 1832 #ifndef HAVE_AS_SPARC4 1833 & ~MASK_CBCOND 1834 #endif 1835 #ifndef HAVE_AS_SPARC5_VIS4 1836 & ~(MASK_VIS4 | MASK_SUBXC) 1837 #endif 1838 #ifndef HAVE_AS_SPARC6 1839 & ~(MASK_VIS4B) 1840 #endif 1841 #ifndef HAVE_AS_LEON 1842 & ~(MASK_LEON | MASK_LEON3) 1843 #endif 1844 & ~(target_flags_explicit & MASK_FEATURES) 1845 ); 1846 1847 /* -mvis2 implies -mvis. */ 1848 if (TARGET_VIS2) 1849 target_flags |= MASK_VIS; 1850 1851 /* -mvis3 implies -mvis2 and -mvis. */ 1852 if (TARGET_VIS3) 1853 target_flags |= MASK_VIS2 | MASK_VIS; 1854 1855 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */ 1856 if (TARGET_VIS4) 1857 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1858 1859 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */ 1860 if (TARGET_VIS4B) 1861 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS; 1862 1863 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if 1864 FPU is disabled. */ 1865 if (!TARGET_FPU) 1866 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4 1867 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD); 1868 1869 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions 1870 are available; -m64 also implies v9. */ 1871 if (TARGET_VIS || TARGET_ARCH64) 1872 { 1873 target_flags |= MASK_V9; 1874 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); 1875 } 1876 1877 /* -mvis also implies -mv8plus on 32-bit. */ 1878 if (TARGET_VIS && !TARGET_ARCH64) 1879 target_flags |= MASK_V8PLUS; 1880 1881 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */ 1882 if (TARGET_V9 && TARGET_ARCH32) 1883 target_flags |= MASK_DEPRECATED_V8_INSNS; 1884 1885 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */ 1886 if (!TARGET_V9 || TARGET_ARCH64) 1887 target_flags &= ~MASK_V8PLUS; 1888 1889 /* Don't use stack biasing in 32-bit mode. */ 1890 if (TARGET_ARCH32) 1891 target_flags &= ~MASK_STACK_BIAS; 1892 1893 /* Use LRA instead of reload, unless otherwise instructed. */ 1894 if (!(target_flags_explicit & MASK_LRA)) 1895 target_flags |= MASK_LRA; 1896 1897 /* Enable applicable errata workarounds for LEON3FT. */ 1898 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc) 1899 { 1900 sparc_fix_b2bst = 1; 1901 sparc_fix_lost_divsqrt = 1; 1902 } 1903 1904 /* Disable FsMULd for the UT699 since it doesn't work correctly. */ 1905 if (sparc_fix_ut699) 1906 target_flags &= ~MASK_FSMULD; 1907 1908 /* Supply a default value for align_functions. */ 1909 if (align_functions == 0) 1910 { 1911 if (sparc_cpu == PROCESSOR_ULTRASPARC 1912 || sparc_cpu == PROCESSOR_ULTRASPARC3 1913 || sparc_cpu == PROCESSOR_NIAGARA 1914 || sparc_cpu == PROCESSOR_NIAGARA2 1915 || sparc_cpu == PROCESSOR_NIAGARA3 1916 || sparc_cpu == PROCESSOR_NIAGARA4) 1917 align_functions = 32; 1918 else if (sparc_cpu == PROCESSOR_NIAGARA7 1919 || sparc_cpu == PROCESSOR_M8) 1920 align_functions = 64; 1921 } 1922 1923 /* Validate PCC_STRUCT_RETURN. */ 1924 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) 1925 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); 1926 1927 /* Only use .uaxword when compiling for a 64-bit target. */ 1928 if (!TARGET_ARCH64) 1929 targetm.asm_out.unaligned_op.di = NULL; 1930 1931 /* Do various machine dependent initializations. */ 1932 sparc_init_modes (); 1933 1934 /* Set up function hooks. */ 1935 init_machine_status = sparc_init_machine_status; 1936 1937 switch (sparc_cpu) 1938 { 1939 case PROCESSOR_V7: 1940 case PROCESSOR_CYPRESS: 1941 sparc_costs = &cypress_costs; 1942 break; 1943 case PROCESSOR_V8: 1944 case PROCESSOR_SPARCLITE: 1945 case PROCESSOR_SUPERSPARC: 1946 sparc_costs = &supersparc_costs; 1947 break; 1948 case PROCESSOR_F930: 1949 case PROCESSOR_F934: 1950 case PROCESSOR_HYPERSPARC: 1951 case PROCESSOR_SPARCLITE86X: 1952 sparc_costs = &hypersparc_costs; 1953 break; 1954 case PROCESSOR_LEON: 1955 sparc_costs = &leon_costs; 1956 break; 1957 case PROCESSOR_LEON3: 1958 case PROCESSOR_LEON3V7: 1959 sparc_costs = &leon3_costs; 1960 break; 1961 case PROCESSOR_SPARCLET: 1962 case PROCESSOR_TSC701: 1963 sparc_costs = &sparclet_costs; 1964 break; 1965 case PROCESSOR_V9: 1966 case PROCESSOR_ULTRASPARC: 1967 sparc_costs = &ultrasparc_costs; 1968 break; 1969 case PROCESSOR_ULTRASPARC3: 1970 sparc_costs = &ultrasparc3_costs; 1971 break; 1972 case PROCESSOR_NIAGARA: 1973 sparc_costs = &niagara_costs; 1974 break; 1975 case PROCESSOR_NIAGARA2: 1976 sparc_costs = &niagara2_costs; 1977 break; 1978 case PROCESSOR_NIAGARA3: 1979 sparc_costs = &niagara3_costs; 1980 break; 1981 case PROCESSOR_NIAGARA4: 1982 sparc_costs = &niagara4_costs; 1983 break; 1984 case PROCESSOR_NIAGARA7: 1985 sparc_costs = &niagara7_costs; 1986 break; 1987 case PROCESSOR_M8: 1988 sparc_costs = &m8_costs; 1989 break; 1990 case PROCESSOR_NATIVE: 1991 gcc_unreachable (); 1992 }; 1993 1994 if (sparc_memory_model == SMM_DEFAULT) 1995 { 1996 /* Choose the memory model for the operating system. */ 1997 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; 1998 if (os_default != SMM_DEFAULT) 1999 sparc_memory_model = os_default; 2000 /* Choose the most relaxed model for the processor. */ 2001 else if (TARGET_V9) 2002 sparc_memory_model = SMM_RMO; 2003 else if (TARGET_LEON3) 2004 sparc_memory_model = SMM_TSO; 2005 else if (TARGET_LEON) 2006 sparc_memory_model = SMM_SC; 2007 else if (TARGET_V8) 2008 sparc_memory_model = SMM_PSO; 2009 else 2010 sparc_memory_model = SMM_SC; 2011 } 2012 2013 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 2014 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) 2015 target_flags |= MASK_LONG_DOUBLE_128; 2016 #endif 2017 2018 if (TARGET_DEBUG_OPTIONS) 2019 dump_target_flags ("Final target_flags", target_flags); 2020 2021 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that 2022 can run at the same time. More important, it is the threshold 2023 defining when additional prefetches will be dropped by the 2024 hardware. 2025 2026 The UltraSPARC-III features a documented prefetch queue with a 2027 size of 8. Additional prefetches issued in the cpu are 2028 dropped. 2029 2030 Niagara processors are different. In these processors prefetches 2031 are handled much like regular loads. The L1 miss buffer is 32 2032 entries, but prefetches start getting affected when 30 entries 2033 become occupied. That occupation could be a mix of regular loads 2034 and prefetches though. And that buffer is shared by all threads. 2035 Once the threshold is reached, if the core is running a single 2036 thread the prefetch will retry. If more than one thread is 2037 running, the prefetch will be dropped. 2038 2039 All this makes it very difficult to determine how many 2040 simultaneous prefetches can be issued simultaneously, even in a 2041 single-threaded program. Experimental results show that setting 2042 this parameter to 32 works well when the number of threads is not 2043 high. */ 2044 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 2045 ((sparc_cpu == PROCESSOR_ULTRASPARC 2046 || sparc_cpu == PROCESSOR_NIAGARA 2047 || sparc_cpu == PROCESSOR_NIAGARA2 2048 || sparc_cpu == PROCESSOR_NIAGARA3 2049 || sparc_cpu == PROCESSOR_NIAGARA4) 2050 ? 2 2051 : (sparc_cpu == PROCESSOR_ULTRASPARC3 2052 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7 2053 || sparc_cpu == PROCESSOR_M8) 2054 ? 32 : 3))), 2055 global_options.x_param_values, 2056 global_options_set.x_param_values); 2057 2058 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in 2059 bytes. 2060 2061 The Oracle SPARC Architecture (previously the UltraSPARC 2062 Architecture) specification states that when a PREFETCH[A] 2063 instruction is executed an implementation-specific amount of data 2064 is prefetched, and that it is at least 64 bytes long (aligned to 2065 at least 64 bytes). 2066 2067 However, this is not correct. The M7 (and implementations prior 2068 to that) does not guarantee a 64B prefetch into a cache if the 2069 line size is smaller. A single cache line is all that is ever 2070 prefetched. So for the M7, where the L1D$ has 32B lines and the 2071 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the 2072 L2 and L3, but only 32B are brought into the L1D$. (Assuming it 2073 is a read_n prefetch, which is the only type which allocates to 2074 the L1.) */ 2075 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 2076 (sparc_cpu == PROCESSOR_M8 2077 ? 64 : 32), 2078 global_options.x_param_values, 2079 global_options_set.x_param_values); 2080 2081 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use 2082 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and 2083 Niagara processors feature a L1D$ of 16KB. */ 2084 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 2085 ((sparc_cpu == PROCESSOR_ULTRASPARC 2086 || sparc_cpu == PROCESSOR_ULTRASPARC3 2087 || sparc_cpu == PROCESSOR_NIAGARA 2088 || sparc_cpu == PROCESSOR_NIAGARA2 2089 || sparc_cpu == PROCESSOR_NIAGARA3 2090 || sparc_cpu == PROCESSOR_NIAGARA4 2091 || sparc_cpu == PROCESSOR_NIAGARA7 2092 || sparc_cpu == PROCESSOR_M8) 2093 ? 16 : 64), 2094 global_options.x_param_values, 2095 global_options_set.x_param_values); 2096 2097 2098 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note 2099 that 512 is the default in params.def. */ 2100 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 2101 ((sparc_cpu == PROCESSOR_NIAGARA4 2102 || sparc_cpu == PROCESSOR_M8) 2103 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7 2104 ? 256 : 512)), 2105 global_options.x_param_values, 2106 global_options_set.x_param_values); 2107 2108 2109 /* Disable save slot sharing for call-clobbered registers by default. 2110 The IRA sharing algorithm works on single registers only and this 2111 pessimizes for double floating-point registers. */ 2112 if (!global_options_set.x_flag_ira_share_save_slots) 2113 flag_ira_share_save_slots = 0; 2114 2115 /* Only enable REE by default in 64-bit mode where it helps to eliminate 2116 redundant 32-to-64-bit extensions. */ 2117 if (!global_options_set.x_flag_ree && TARGET_ARCH32) 2118 flag_ree = 0; 2119 } 2120 2121 /* Miscellaneous utilities. */ 2122 2123 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move 2124 or branch on register contents instructions. */ 2125 2126 int 2127 v9_regcmp_p (enum rtx_code code) 2128 { 2129 return (code == EQ || code == NE || code == GE || code == LT 2130 || code == LE || code == GT); 2131 } 2132 2133 /* Nonzero if OP is a floating point constant which can 2134 be loaded into an integer register using a single 2135 sethi instruction. */ 2136 2137 int 2138 fp_sethi_p (rtx op) 2139 { 2140 if (GET_CODE (op) == CONST_DOUBLE) 2141 { 2142 long i; 2143 2144 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2145 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); 2146 } 2147 2148 return 0; 2149 } 2150 2151 /* Nonzero if OP is a floating point constant which can 2152 be loaded into an integer register using a single 2153 mov instruction. */ 2154 2155 int 2156 fp_mov_p (rtx op) 2157 { 2158 if (GET_CODE (op) == CONST_DOUBLE) 2159 { 2160 long i; 2161 2162 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2163 return SPARC_SIMM13_P (i); 2164 } 2165 2166 return 0; 2167 } 2168 2169 /* Nonzero if OP is a floating point constant which can 2170 be loaded into an integer register using a high/losum 2171 instruction sequence. */ 2172 2173 int 2174 fp_high_losum_p (rtx op) 2175 { 2176 /* The constraints calling this should only be in 2177 SFmode move insns, so any constant which cannot 2178 be moved using a single insn will do. */ 2179 if (GET_CODE (op) == CONST_DOUBLE) 2180 { 2181 long i; 2182 2183 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); 2184 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); 2185 } 2186 2187 return 0; 2188 } 2189 2190 /* Return true if the address of LABEL can be loaded by means of the 2191 mov{si,di}_pic_label_ref patterns in PIC mode. */ 2192 2193 static bool 2194 can_use_mov_pic_label_ref (rtx label) 2195 { 2196 /* VxWorks does not impose a fixed gap between segments; the run-time 2197 gap can be different from the object-file gap. We therefore can't 2198 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we 2199 are absolutely sure that X is in the same segment as the GOT. 2200 Unfortunately, the flexibility of linker scripts means that we 2201 can't be sure of that in general, so assume that GOT-relative 2202 accesses are never valid on VxWorks. */ 2203 if (TARGET_VXWORKS_RTP) 2204 return false; 2205 2206 /* Similarly, if the label is non-local, it might end up being placed 2207 in a different section than the current one; now mov_pic_label_ref 2208 requires the label and the code to be in the same section. */ 2209 if (LABEL_REF_NONLOCAL_P (label)) 2210 return false; 2211 2212 /* Finally, if we are reordering basic blocks and partition into hot 2213 and cold sections, this might happen for any label. */ 2214 if (flag_reorder_blocks_and_partition) 2215 return false; 2216 2217 return true; 2218 } 2219 2220 /* Expand a move instruction. Return true if all work is done. */ 2221 2222 bool 2223 sparc_expand_move (machine_mode mode, rtx *operands) 2224 { 2225 /* Handle sets of MEM first. */ 2226 if (GET_CODE (operands[0]) == MEM) 2227 { 2228 /* 0 is a register (or a pair of registers) on SPARC. */ 2229 if (register_or_zero_operand (operands[1], mode)) 2230 return false; 2231 2232 if (!reload_in_progress) 2233 { 2234 operands[0] = validize_mem (operands[0]); 2235 operands[1] = force_reg (mode, operands[1]); 2236 } 2237 } 2238 2239 /* Fix up TLS cases. */ 2240 if (TARGET_HAVE_TLS 2241 && CONSTANT_P (operands[1]) 2242 && sparc_tls_referenced_p (operands [1])) 2243 { 2244 operands[1] = sparc_legitimize_tls_address (operands[1]); 2245 return false; 2246 } 2247 2248 /* Fix up PIC cases. */ 2249 if (flag_pic && CONSTANT_P (operands[1])) 2250 { 2251 if (pic_address_needs_scratch (operands[1])) 2252 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); 2253 2254 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ 2255 if ((GET_CODE (operands[1]) == LABEL_REF 2256 && can_use_mov_pic_label_ref (operands[1])) 2257 || (GET_CODE (operands[1]) == CONST 2258 && GET_CODE (XEXP (operands[1], 0)) == PLUS 2259 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF 2260 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT 2261 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0)))) 2262 { 2263 if (mode == SImode) 2264 { 2265 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); 2266 return true; 2267 } 2268 2269 if (mode == DImode) 2270 { 2271 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); 2272 return true; 2273 } 2274 } 2275 2276 if (symbolic_operand (operands[1], mode)) 2277 { 2278 operands[1] 2279 = sparc_legitimize_pic_address (operands[1], 2280 reload_in_progress 2281 ? operands[0] : NULL_RTX); 2282 return false; 2283 } 2284 } 2285 2286 /* If we are trying to toss an integer constant into FP registers, 2287 or loading a FP or vector constant, force it into memory. */ 2288 if (CONSTANT_P (operands[1]) 2289 && REG_P (operands[0]) 2290 && (SPARC_FP_REG_P (REGNO (operands[0])) 2291 || SCALAR_FLOAT_MODE_P (mode) 2292 || VECTOR_MODE_P (mode))) 2293 { 2294 /* emit_group_store will send such bogosity to us when it is 2295 not storing directly into memory. So fix this up to avoid 2296 crashes in output_constant_pool. */ 2297 if (operands [1] == const0_rtx) 2298 operands[1] = CONST0_RTX (mode); 2299 2300 /* We can clear or set to all-ones FP registers if TARGET_VIS, and 2301 always other regs. */ 2302 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) 2303 && (const_zero_operand (operands[1], mode) 2304 || const_all_ones_operand (operands[1], mode))) 2305 return false; 2306 2307 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG 2308 /* We are able to build any SF constant in integer registers 2309 with at most 2 instructions. */ 2310 && (mode == SFmode 2311 /* And any DF constant in integer registers if needed. */ 2312 || (mode == DFmode && !can_create_pseudo_p ()))) 2313 return false; 2314 2315 operands[1] = force_const_mem (mode, operands[1]); 2316 if (!reload_in_progress) 2317 operands[1] = validize_mem (operands[1]); 2318 return false; 2319 } 2320 2321 /* Accept non-constants and valid constants unmodified. */ 2322 if (!CONSTANT_P (operands[1]) 2323 || GET_CODE (operands[1]) == HIGH 2324 || input_operand (operands[1], mode)) 2325 return false; 2326 2327 switch (mode) 2328 { 2329 case E_QImode: 2330 /* All QImode constants require only one insn, so proceed. */ 2331 break; 2332 2333 case E_HImode: 2334 case E_SImode: 2335 sparc_emit_set_const32 (operands[0], operands[1]); 2336 return true; 2337 2338 case E_DImode: 2339 /* input_operand should have filtered out 32-bit mode. */ 2340 sparc_emit_set_const64 (operands[0], operands[1]); 2341 return true; 2342 2343 case E_TImode: 2344 { 2345 rtx high, low; 2346 /* TImode isn't available in 32-bit mode. */ 2347 split_double (operands[1], &high, &low); 2348 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), 2349 high)); 2350 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), 2351 low)); 2352 } 2353 return true; 2354 2355 default: 2356 gcc_unreachable (); 2357 } 2358 2359 return false; 2360 } 2361 2362 /* Load OP1, a 32-bit constant, into OP0, a register. 2363 We know it can't be done in one insn when we get 2364 here, the move expander guarantees this. */ 2365 2366 static void 2367 sparc_emit_set_const32 (rtx op0, rtx op1) 2368 { 2369 machine_mode mode = GET_MODE (op0); 2370 rtx temp = op0; 2371 2372 if (can_create_pseudo_p ()) 2373 temp = gen_reg_rtx (mode); 2374 2375 if (GET_CODE (op1) == CONST_INT) 2376 { 2377 gcc_assert (!small_int_operand (op1, mode) 2378 && !const_high_operand (op1, mode)); 2379 2380 /* Emit them as real moves instead of a HIGH/LO_SUM, 2381 this way CSE can see everything and reuse intermediate 2382 values if it wants. */ 2383 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1) 2384 & ~(HOST_WIDE_INT) 0x3ff))); 2385 2386 emit_insn (gen_rtx_SET (op0, 2387 gen_rtx_IOR (mode, temp, 2388 GEN_INT (INTVAL (op1) & 0x3ff)))); 2389 } 2390 else 2391 { 2392 /* A symbol, emit in the traditional way. */ 2393 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1))); 2394 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1))); 2395 } 2396 } 2397 2398 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. 2399 If TEMP is nonzero, we are forbidden to use any other scratch 2400 registers. Otherwise, we are allowed to generate them as needed. 2401 2402 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY 2403 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ 2404 2405 void 2406 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) 2407 { 2408 rtx cst, temp1, temp2, temp3, temp4, temp5; 2409 rtx ti_temp = 0; 2410 2411 /* Deal with too large offsets. */ 2412 if (GET_CODE (op1) == CONST 2413 && GET_CODE (XEXP (op1, 0)) == PLUS 2414 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1)) 2415 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst)) 2416 { 2417 gcc_assert (!temp); 2418 temp1 = gen_reg_rtx (DImode); 2419 temp2 = gen_reg_rtx (DImode); 2420 sparc_emit_set_const64 (temp2, cst); 2421 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0), 2422 NULL_RTX); 2423 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2))); 2424 return; 2425 } 2426 2427 if (temp && GET_MODE (temp) == TImode) 2428 { 2429 ti_temp = temp; 2430 temp = gen_rtx_REG (DImode, REGNO (temp)); 2431 } 2432 2433 /* SPARC-V9 code-model support. */ 2434 switch (sparc_cmodel) 2435 { 2436 case CM_MEDLOW: 2437 /* The range spanned by all instructions in the object is less 2438 than 2^31 bytes (2GB) and the distance from any instruction 2439 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2440 than 2^31 bytes (2GB). 2441 2442 The executable must be in the low 4TB of the virtual address 2443 space. 2444 2445 sethi %hi(symbol), %temp1 2446 or %temp1, %lo(symbol), %reg */ 2447 if (temp) 2448 temp1 = temp; /* op0 is allowed. */ 2449 else 2450 temp1 = gen_reg_rtx (DImode); 2451 2452 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1))); 2453 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1))); 2454 break; 2455 2456 case CM_MEDMID: 2457 /* The range spanned by all instructions in the object is less 2458 than 2^31 bytes (2GB) and the distance from any instruction 2459 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2460 than 2^31 bytes (2GB). 2461 2462 The executable must be in the low 16TB of the virtual address 2463 space. 2464 2465 sethi %h44(symbol), %temp1 2466 or %temp1, %m44(symbol), %temp2 2467 sllx %temp2, 12, %temp3 2468 or %temp3, %l44(symbol), %reg */ 2469 if (temp) 2470 { 2471 temp1 = op0; 2472 temp2 = op0; 2473 temp3 = temp; /* op0 is allowed. */ 2474 } 2475 else 2476 { 2477 temp1 = gen_reg_rtx (DImode); 2478 temp2 = gen_reg_rtx (DImode); 2479 temp3 = gen_reg_rtx (DImode); 2480 } 2481 2482 emit_insn (gen_seth44 (temp1, op1)); 2483 emit_insn (gen_setm44 (temp2, temp1, op1)); 2484 emit_insn (gen_rtx_SET (temp3, 2485 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); 2486 emit_insn (gen_setl44 (op0, temp3, op1)); 2487 break; 2488 2489 case CM_MEDANY: 2490 /* The range spanned by all instructions in the object is less 2491 than 2^31 bytes (2GB) and the distance from any instruction 2492 to the location of the label _GLOBAL_OFFSET_TABLE_ is less 2493 than 2^31 bytes (2GB). 2494 2495 The executable can be placed anywhere in the virtual address 2496 space. 2497 2498 sethi %hh(symbol), %temp1 2499 sethi %lm(symbol), %temp2 2500 or %temp1, %hm(symbol), %temp3 2501 sllx %temp3, 32, %temp4 2502 or %temp4, %temp2, %temp5 2503 or %temp5, %lo(symbol), %reg */ 2504 if (temp) 2505 { 2506 /* It is possible that one of the registers we got for operands[2] 2507 might coincide with that of operands[0] (which is why we made 2508 it TImode). Pick the other one to use as our scratch. */ 2509 if (rtx_equal_p (temp, op0)) 2510 { 2511 gcc_assert (ti_temp); 2512 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2513 } 2514 temp1 = op0; 2515 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2516 temp3 = op0; 2517 temp4 = op0; 2518 temp5 = op0; 2519 } 2520 else 2521 { 2522 temp1 = gen_reg_rtx (DImode); 2523 temp2 = gen_reg_rtx (DImode); 2524 temp3 = gen_reg_rtx (DImode); 2525 temp4 = gen_reg_rtx (DImode); 2526 temp5 = gen_reg_rtx (DImode); 2527 } 2528 2529 emit_insn (gen_sethh (temp1, op1)); 2530 emit_insn (gen_setlm (temp2, op1)); 2531 emit_insn (gen_sethm (temp3, temp1, op1)); 2532 emit_insn (gen_rtx_SET (temp4, 2533 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2534 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2535 emit_insn (gen_setlo (op0, temp5, op1)); 2536 break; 2537 2538 case CM_EMBMEDANY: 2539 /* Old old old backwards compatibility kruft here. 2540 Essentially it is MEDLOW with a fixed 64-bit 2541 virtual base added to all data segment addresses. 2542 Text-segment stuff is computed like MEDANY, we can't 2543 reuse the code above because the relocation knobs 2544 look different. 2545 2546 Data segment: sethi %hi(symbol), %temp1 2547 add %temp1, EMBMEDANY_BASE_REG, %temp2 2548 or %temp2, %lo(symbol), %reg */ 2549 if (data_segment_operand (op1, GET_MODE (op1))) 2550 { 2551 if (temp) 2552 { 2553 temp1 = temp; /* op0 is allowed. */ 2554 temp2 = op0; 2555 } 2556 else 2557 { 2558 temp1 = gen_reg_rtx (DImode); 2559 temp2 = gen_reg_rtx (DImode); 2560 } 2561 2562 emit_insn (gen_embmedany_sethi (temp1, op1)); 2563 emit_insn (gen_embmedany_brsum (temp2, temp1)); 2564 emit_insn (gen_embmedany_losum (op0, temp2, op1)); 2565 } 2566 2567 /* Text segment: sethi %uhi(symbol), %temp1 2568 sethi %hi(symbol), %temp2 2569 or %temp1, %ulo(symbol), %temp3 2570 sllx %temp3, 32, %temp4 2571 or %temp4, %temp2, %temp5 2572 or %temp5, %lo(symbol), %reg */ 2573 else 2574 { 2575 if (temp) 2576 { 2577 /* It is possible that one of the registers we got for operands[2] 2578 might coincide with that of operands[0] (which is why we made 2579 it TImode). Pick the other one to use as our scratch. */ 2580 if (rtx_equal_p (temp, op0)) 2581 { 2582 gcc_assert (ti_temp); 2583 temp = gen_rtx_REG (DImode, REGNO (temp) + 1); 2584 } 2585 temp1 = op0; 2586 temp2 = temp; /* op0 is _not_ allowed, see above. */ 2587 temp3 = op0; 2588 temp4 = op0; 2589 temp5 = op0; 2590 } 2591 else 2592 { 2593 temp1 = gen_reg_rtx (DImode); 2594 temp2 = gen_reg_rtx (DImode); 2595 temp3 = gen_reg_rtx (DImode); 2596 temp4 = gen_reg_rtx (DImode); 2597 temp5 = gen_reg_rtx (DImode); 2598 } 2599 2600 emit_insn (gen_embmedany_textuhi (temp1, op1)); 2601 emit_insn (gen_embmedany_texthi (temp2, op1)); 2602 emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); 2603 emit_insn (gen_rtx_SET (temp4, 2604 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); 2605 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); 2606 emit_insn (gen_embmedany_textlo (op0, temp5, op1)); 2607 } 2608 break; 2609 2610 default: 2611 gcc_unreachable (); 2612 } 2613 } 2614 2615 /* These avoid problems when cross compiling. If we do not 2616 go through all this hair then the optimizer will see 2617 invalid REG_EQUAL notes or in some cases none at all. */ 2618 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); 2619 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); 2620 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); 2621 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); 2622 2623 /* The optimizer is not to assume anything about exactly 2624 which bits are set for a HIGH, they are unspecified. 2625 Unfortunately this leads to many missed optimizations 2626 during CSE. We mask out the non-HIGH bits, and matches 2627 a plain movdi, to alleviate this problem. */ 2628 static rtx 2629 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) 2630 { 2631 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); 2632 } 2633 2634 static rtx 2635 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) 2636 { 2637 return gen_rtx_SET (dest, GEN_INT (val)); 2638 } 2639 2640 static rtx 2641 gen_safe_OR64 (rtx src, HOST_WIDE_INT val) 2642 { 2643 return gen_rtx_IOR (DImode, src, GEN_INT (val)); 2644 } 2645 2646 static rtx 2647 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) 2648 { 2649 return gen_rtx_XOR (DImode, src, GEN_INT (val)); 2650 } 2651 2652 /* Worker routines for 64-bit constant formation on arch64. 2653 One of the key things to be doing in these emissions is 2654 to create as many temp REGs as possible. This makes it 2655 possible for half-built constants to be used later when 2656 such values are similar to something required later on. 2657 Without doing this, the optimizer cannot see such 2658 opportunities. */ 2659 2660 static void sparc_emit_set_const64_quick1 (rtx, rtx, 2661 unsigned HOST_WIDE_INT, int); 2662 2663 static void 2664 sparc_emit_set_const64_quick1 (rtx op0, rtx temp, 2665 unsigned HOST_WIDE_INT low_bits, int is_neg) 2666 { 2667 unsigned HOST_WIDE_INT high_bits; 2668 2669 if (is_neg) 2670 high_bits = (~low_bits) & 0xffffffff; 2671 else 2672 high_bits = low_bits; 2673 2674 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2675 if (!is_neg) 2676 { 2677 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2678 } 2679 else 2680 { 2681 /* If we are XOR'ing with -1, then we should emit a one's complement 2682 instead. This way the combiner will notice logical operations 2683 such as ANDN later on and substitute. */ 2684 if ((low_bits & 0x3ff) == 0x3ff) 2685 { 2686 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 2687 } 2688 else 2689 { 2690 emit_insn (gen_rtx_SET (op0, 2691 gen_safe_XOR64 (temp, 2692 (-(HOST_WIDE_INT)0x400 2693 | (low_bits & 0x3ff))))); 2694 } 2695 } 2696 } 2697 2698 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, 2699 unsigned HOST_WIDE_INT, int); 2700 2701 static void 2702 sparc_emit_set_const64_quick2 (rtx op0, rtx temp, 2703 unsigned HOST_WIDE_INT high_bits, 2704 unsigned HOST_WIDE_INT low_immediate, 2705 int shift_count) 2706 { 2707 rtx temp2 = op0; 2708 2709 if ((high_bits & 0xfffffc00) != 0) 2710 { 2711 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2712 if ((high_bits & ~0xfffffc00) != 0) 2713 emit_insn (gen_rtx_SET (op0, 2714 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2715 else 2716 temp2 = temp; 2717 } 2718 else 2719 { 2720 emit_insn (gen_safe_SET64 (temp, high_bits)); 2721 temp2 = temp; 2722 } 2723 2724 /* Now shift it up into place. */ 2725 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2, 2726 GEN_INT (shift_count)))); 2727 2728 /* If there is a low immediate part piece, finish up by 2729 putting that in as well. */ 2730 if (low_immediate != 0) 2731 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate))); 2732 } 2733 2734 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, 2735 unsigned HOST_WIDE_INT); 2736 2737 /* Full 64-bit constant decomposition. Even though this is the 2738 'worst' case, we still optimize a few things away. */ 2739 static void 2740 sparc_emit_set_const64_longway (rtx op0, rtx temp, 2741 unsigned HOST_WIDE_INT high_bits, 2742 unsigned HOST_WIDE_INT low_bits) 2743 { 2744 rtx sub_temp = op0; 2745 2746 if (can_create_pseudo_p ()) 2747 sub_temp = gen_reg_rtx (DImode); 2748 2749 if ((high_bits & 0xfffffc00) != 0) 2750 { 2751 emit_insn (gen_safe_HIGH64 (temp, high_bits)); 2752 if ((high_bits & ~0xfffffc00) != 0) 2753 emit_insn (gen_rtx_SET (sub_temp, 2754 gen_safe_OR64 (temp, (high_bits & 0x3ff)))); 2755 else 2756 sub_temp = temp; 2757 } 2758 else 2759 { 2760 emit_insn (gen_safe_SET64 (temp, high_bits)); 2761 sub_temp = temp; 2762 } 2763 2764 if (can_create_pseudo_p ()) 2765 { 2766 rtx temp2 = gen_reg_rtx (DImode); 2767 rtx temp3 = gen_reg_rtx (DImode); 2768 rtx temp4 = gen_reg_rtx (DImode); 2769 2770 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp, 2771 GEN_INT (32)))); 2772 2773 emit_insn (gen_safe_HIGH64 (temp2, low_bits)); 2774 if ((low_bits & ~0xfffffc00) != 0) 2775 { 2776 emit_insn (gen_rtx_SET (temp3, 2777 gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); 2778 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3))); 2779 } 2780 else 2781 { 2782 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2))); 2783 } 2784 } 2785 else 2786 { 2787 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); 2788 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); 2789 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); 2790 int to_shift = 12; 2791 2792 /* We are in the middle of reload, so this is really 2793 painful. However we do still make an attempt to 2794 avoid emitting truly stupid code. */ 2795 if (low1 != const0_rtx) 2796 { 2797 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2798 GEN_INT (to_shift)))); 2799 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1))); 2800 sub_temp = op0; 2801 to_shift = 12; 2802 } 2803 else 2804 { 2805 to_shift += 12; 2806 } 2807 if (low2 != const0_rtx) 2808 { 2809 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2810 GEN_INT (to_shift)))); 2811 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2))); 2812 sub_temp = op0; 2813 to_shift = 8; 2814 } 2815 else 2816 { 2817 to_shift += 8; 2818 } 2819 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, 2820 GEN_INT (to_shift)))); 2821 if (low3 != const0_rtx) 2822 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3))); 2823 /* phew... */ 2824 } 2825 } 2826 2827 /* Analyze a 64-bit constant for certain properties. */ 2828 static void analyze_64bit_constant (unsigned HOST_WIDE_INT, 2829 unsigned HOST_WIDE_INT, 2830 int *, int *, int *); 2831 2832 static void 2833 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, 2834 unsigned HOST_WIDE_INT low_bits, 2835 int *hbsp, int *lbsp, int *abbasp) 2836 { 2837 int lowest_bit_set, highest_bit_set, all_bits_between_are_set; 2838 int i; 2839 2840 lowest_bit_set = highest_bit_set = -1; 2841 i = 0; 2842 do 2843 { 2844 if ((lowest_bit_set == -1) 2845 && ((low_bits >> i) & 1)) 2846 lowest_bit_set = i; 2847 if ((highest_bit_set == -1) 2848 && ((high_bits >> (32 - i - 1)) & 1)) 2849 highest_bit_set = (64 - i - 1); 2850 } 2851 while (++i < 32 2852 && ((highest_bit_set == -1) 2853 || (lowest_bit_set == -1))); 2854 if (i == 32) 2855 { 2856 i = 0; 2857 do 2858 { 2859 if ((lowest_bit_set == -1) 2860 && ((high_bits >> i) & 1)) 2861 lowest_bit_set = i + 32; 2862 if ((highest_bit_set == -1) 2863 && ((low_bits >> (32 - i - 1)) & 1)) 2864 highest_bit_set = 32 - i - 1; 2865 } 2866 while (++i < 32 2867 && ((highest_bit_set == -1) 2868 || (lowest_bit_set == -1))); 2869 } 2870 /* If there are no bits set this should have gone out 2871 as one instruction! */ 2872 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); 2873 all_bits_between_are_set = 1; 2874 for (i = lowest_bit_set; i <= highest_bit_set; i++) 2875 { 2876 if (i < 32) 2877 { 2878 if ((low_bits & (1 << i)) != 0) 2879 continue; 2880 } 2881 else 2882 { 2883 if ((high_bits & (1 << (i - 32))) != 0) 2884 continue; 2885 } 2886 all_bits_between_are_set = 0; 2887 break; 2888 } 2889 *hbsp = highest_bit_set; 2890 *lbsp = lowest_bit_set; 2891 *abbasp = all_bits_between_are_set; 2892 } 2893 2894 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); 2895 2896 static int 2897 const64_is_2insns (unsigned HOST_WIDE_INT high_bits, 2898 unsigned HOST_WIDE_INT low_bits) 2899 { 2900 int highest_bit_set, lowest_bit_set, all_bits_between_are_set; 2901 2902 if (high_bits == 0 2903 || high_bits == 0xffffffff) 2904 return 1; 2905 2906 analyze_64bit_constant (high_bits, low_bits, 2907 &highest_bit_set, &lowest_bit_set, 2908 &all_bits_between_are_set); 2909 2910 if ((highest_bit_set == 63 2911 || lowest_bit_set == 0) 2912 && all_bits_between_are_set != 0) 2913 return 1; 2914 2915 if ((highest_bit_set - lowest_bit_set) < 21) 2916 return 1; 2917 2918 return 0; 2919 } 2920 2921 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, 2922 unsigned HOST_WIDE_INT, 2923 int, int); 2924 2925 static unsigned HOST_WIDE_INT 2926 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, 2927 unsigned HOST_WIDE_INT low_bits, 2928 int lowest_bit_set, int shift) 2929 { 2930 HOST_WIDE_INT hi, lo; 2931 2932 if (lowest_bit_set < 32) 2933 { 2934 lo = (low_bits >> lowest_bit_set) << shift; 2935 hi = ((high_bits << (32 - lowest_bit_set)) << shift); 2936 } 2937 else 2938 { 2939 lo = 0; 2940 hi = ((high_bits >> (lowest_bit_set - 32)) << shift); 2941 } 2942 gcc_assert (! (hi & lo)); 2943 return (hi | lo); 2944 } 2945 2946 /* Here we are sure to be arch64 and this is an integer constant 2947 being loaded into a register. Emit the most efficient 2948 insn sequence possible. Detection of all the 1-insn cases 2949 has been done already. */ 2950 static void 2951 sparc_emit_set_const64 (rtx op0, rtx op1) 2952 { 2953 unsigned HOST_WIDE_INT high_bits, low_bits; 2954 int lowest_bit_set, highest_bit_set; 2955 int all_bits_between_are_set; 2956 rtx temp = 0; 2957 2958 /* Sanity check that we know what we are working with. */ 2959 gcc_assert (TARGET_ARCH64 2960 && (GET_CODE (op0) == SUBREG 2961 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); 2962 2963 if (! can_create_pseudo_p ()) 2964 temp = op0; 2965 2966 if (GET_CODE (op1) != CONST_INT) 2967 { 2968 sparc_emit_set_symbolic_const64 (op0, op1, temp); 2969 return; 2970 } 2971 2972 if (! temp) 2973 temp = gen_reg_rtx (DImode); 2974 2975 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); 2976 low_bits = (INTVAL (op1) & 0xffffffff); 2977 2978 /* low_bits bits 0 --> 31 2979 high_bits bits 32 --> 63 */ 2980 2981 analyze_64bit_constant (high_bits, low_bits, 2982 &highest_bit_set, &lowest_bit_set, 2983 &all_bits_between_are_set); 2984 2985 /* First try for a 2-insn sequence. */ 2986 2987 /* These situations are preferred because the optimizer can 2988 * do more things with them: 2989 * 1) mov -1, %reg 2990 * sllx %reg, shift, %reg 2991 * 2) mov -1, %reg 2992 * srlx %reg, shift, %reg 2993 * 3) mov some_small_const, %reg 2994 * sllx %reg, shift, %reg 2995 */ 2996 if (((highest_bit_set == 63 2997 || lowest_bit_set == 0) 2998 && all_bits_between_are_set != 0) 2999 || ((highest_bit_set - lowest_bit_set) < 12)) 3000 { 3001 HOST_WIDE_INT the_const = -1; 3002 int shift = lowest_bit_set; 3003 3004 if ((highest_bit_set != 63 3005 && lowest_bit_set != 0) 3006 || all_bits_between_are_set == 0) 3007 { 3008 the_const = 3009 create_simple_focus_bits (high_bits, low_bits, 3010 lowest_bit_set, 0); 3011 } 3012 else if (lowest_bit_set == 0) 3013 shift = -(63 - highest_bit_set); 3014 3015 gcc_assert (SPARC_SIMM13_P (the_const)); 3016 gcc_assert (shift != 0); 3017 3018 emit_insn (gen_safe_SET64 (temp, the_const)); 3019 if (shift > 0) 3020 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp, 3021 GEN_INT (shift)))); 3022 else if (shift < 0) 3023 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp, 3024 GEN_INT (-shift)))); 3025 return; 3026 } 3027 3028 /* Now a range of 22 or less bits set somewhere. 3029 * 1) sethi %hi(focus_bits), %reg 3030 * sllx %reg, shift, %reg 3031 * 2) sethi %hi(focus_bits), %reg 3032 * srlx %reg, shift, %reg 3033 */ 3034 if ((highest_bit_set - lowest_bit_set) < 21) 3035 { 3036 unsigned HOST_WIDE_INT focus_bits = 3037 create_simple_focus_bits (high_bits, low_bits, 3038 lowest_bit_set, 10); 3039 3040 gcc_assert (SPARC_SETHI_P (focus_bits)); 3041 gcc_assert (lowest_bit_set != 10); 3042 3043 emit_insn (gen_safe_HIGH64 (temp, focus_bits)); 3044 3045 /* If lowest_bit_set == 10 then a sethi alone could have done it. */ 3046 if (lowest_bit_set < 10) 3047 emit_insn (gen_rtx_SET (op0, 3048 gen_rtx_LSHIFTRT (DImode, temp, 3049 GEN_INT (10 - lowest_bit_set)))); 3050 else if (lowest_bit_set > 10) 3051 emit_insn (gen_rtx_SET (op0, 3052 gen_rtx_ASHIFT (DImode, temp, 3053 GEN_INT (lowest_bit_set - 10)))); 3054 return; 3055 } 3056 3057 /* 1) sethi %hi(low_bits), %reg 3058 * or %reg, %lo(low_bits), %reg 3059 * 2) sethi %hi(~low_bits), %reg 3060 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg 3061 */ 3062 if (high_bits == 0 3063 || high_bits == 0xffffffff) 3064 { 3065 sparc_emit_set_const64_quick1 (op0, temp, low_bits, 3066 (high_bits == 0xffffffff)); 3067 return; 3068 } 3069 3070 /* Now, try 3-insn sequences. */ 3071 3072 /* 1) sethi %hi(high_bits), %reg 3073 * or %reg, %lo(high_bits), %reg 3074 * sllx %reg, 32, %reg 3075 */ 3076 if (low_bits == 0) 3077 { 3078 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); 3079 return; 3080 } 3081 3082 /* We may be able to do something quick 3083 when the constant is negated, so try that. */ 3084 if (const64_is_2insns ((~high_bits) & 0xffffffff, 3085 (~low_bits) & 0xfffffc00)) 3086 { 3087 /* NOTE: The trailing bits get XOR'd so we need the 3088 non-negated bits, not the negated ones. */ 3089 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; 3090 3091 if ((((~high_bits) & 0xffffffff) == 0 3092 && ((~low_bits) & 0x80000000) == 0) 3093 || (((~high_bits) & 0xffffffff) == 0xffffffff 3094 && ((~low_bits) & 0x80000000) != 0)) 3095 { 3096 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff); 3097 3098 if ((SPARC_SETHI_P (fast_int) 3099 && (~high_bits & 0xffffffff) == 0) 3100 || SPARC_SIMM13_P (fast_int)) 3101 emit_insn (gen_safe_SET64 (temp, fast_int)); 3102 else 3103 sparc_emit_set_const64 (temp, GEN_INT (fast_int)); 3104 } 3105 else 3106 { 3107 rtx negated_const; 3108 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | 3109 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); 3110 sparc_emit_set_const64 (temp, negated_const); 3111 } 3112 3113 /* If we are XOR'ing with -1, then we should emit a one's complement 3114 instead. This way the combiner will notice logical operations 3115 such as ANDN later on and substitute. */ 3116 if (trailing_bits == 0x3ff) 3117 { 3118 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); 3119 } 3120 else 3121 { 3122 emit_insn (gen_rtx_SET (op0, 3123 gen_safe_XOR64 (temp, 3124 (-0x400 | trailing_bits)))); 3125 } 3126 return; 3127 } 3128 3129 /* 1) sethi %hi(xxx), %reg 3130 * or %reg, %lo(xxx), %reg 3131 * sllx %reg, yyy, %reg 3132 * 3133 * ??? This is just a generalized version of the low_bits==0 3134 * thing above, FIXME... 3135 */ 3136 if ((highest_bit_set - lowest_bit_set) < 32) 3137 { 3138 unsigned HOST_WIDE_INT focus_bits = 3139 create_simple_focus_bits (high_bits, low_bits, 3140 lowest_bit_set, 0); 3141 3142 /* We can't get here in this state. */ 3143 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32); 3144 3145 /* So what we know is that the set bits straddle the 3146 middle of the 64-bit word. */ 3147 sparc_emit_set_const64_quick2 (op0, temp, 3148 focus_bits, 0, 3149 lowest_bit_set); 3150 return; 3151 } 3152 3153 /* 1) sethi %hi(high_bits), %reg 3154 * or %reg, %lo(high_bits), %reg 3155 * sllx %reg, 32, %reg 3156 * or %reg, low_bits, %reg 3157 */ 3158 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0)) 3159 { 3160 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32); 3161 return; 3162 } 3163 3164 /* The easiest way when all else fails, is full decomposition. */ 3165 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits); 3166 } 3167 3168 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */ 3169 3170 static bool 3171 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 3172 { 3173 *p1 = SPARC_ICC_REG; 3174 *p2 = SPARC_FCC_REG; 3175 return true; 3176 } 3177 3178 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */ 3179 3180 static unsigned int 3181 sparc_min_arithmetic_precision (void) 3182 { 3183 return 32; 3184 } 3185 3186 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE, 3187 return the mode to be used for the comparison. For floating-point, 3188 CCFP[E]mode is used. CCNZmode should be used when the first operand 3189 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special 3190 processing is needed. */ 3191 3192 machine_mode 3193 select_cc_mode (enum rtx_code op, rtx x, rtx y) 3194 { 3195 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3196 { 3197 switch (op) 3198 { 3199 case EQ: 3200 case NE: 3201 case UNORDERED: 3202 case ORDERED: 3203 case UNLT: 3204 case UNLE: 3205 case UNGT: 3206 case UNGE: 3207 case UNEQ: 3208 case LTGT: 3209 return CCFPmode; 3210 3211 case LT: 3212 case LE: 3213 case GT: 3214 case GE: 3215 return CCFPEmode; 3216 3217 default: 3218 gcc_unreachable (); 3219 } 3220 } 3221 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS 3222 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT) 3223 && y == const0_rtx) 3224 { 3225 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3226 return CCXNZmode; 3227 else 3228 return CCNZmode; 3229 } 3230 else 3231 { 3232 /* This is for the cmp<mode>_sne pattern. */ 3233 if (GET_CODE (x) == NOT && y == constm1_rtx) 3234 { 3235 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3236 return CCXCmode; 3237 else 3238 return CCCmode; 3239 } 3240 3241 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */ 3242 if (!TARGET_ARCH64 && GET_MODE (x) == DImode) 3243 { 3244 if (GET_CODE (y) == UNSPEC 3245 && (XINT (y, 1) == UNSPEC_ADDV 3246 || XINT (y, 1) == UNSPEC_SUBV 3247 || XINT (y, 1) == UNSPEC_NEGV)) 3248 return CCVmode; 3249 else 3250 return CCCmode; 3251 } 3252 3253 if (TARGET_ARCH64 && GET_MODE (x) == DImode) 3254 return CCXmode; 3255 else 3256 return CCmode; 3257 } 3258 } 3259 3260 /* Emit the compare insn and return the CC reg for a CODE comparison 3261 with operands X and Y. */ 3262 3263 static rtx 3264 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y) 3265 { 3266 machine_mode mode; 3267 rtx cc_reg; 3268 3269 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC) 3270 return x; 3271 3272 mode = SELECT_CC_MODE (code, x, y); 3273 3274 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the 3275 fcc regs (cse can't tell they're really call clobbered regs and will 3276 remove a duplicate comparison even if there is an intervening function 3277 call - it will then try to reload the cc reg via an int reg which is why 3278 we need the movcc patterns). It is possible to provide the movcc 3279 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two 3280 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be 3281 to tell cse that CCFPE mode registers (even pseudos) are call 3282 clobbered. */ 3283 3284 /* ??? This is an experiment. Rather than making changes to cse which may 3285 or may not be easy/clean, we do our own cse. This is possible because 3286 we will generate hard registers. Cse knows they're call clobbered (it 3287 doesn't know the same thing about pseudos). If we guess wrong, no big 3288 deal, but if we win, great! */ 3289 3290 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3291 #if 1 /* experiment */ 3292 { 3293 int reg; 3294 /* We cycle through the registers to ensure they're all exercised. */ 3295 static int next_fcc_reg = 0; 3296 /* Previous x,y for each fcc reg. */ 3297 static rtx prev_args[4][2]; 3298 3299 /* Scan prev_args for x,y. */ 3300 for (reg = 0; reg < 4; reg++) 3301 if (prev_args[reg][0] == x && prev_args[reg][1] == y) 3302 break; 3303 if (reg == 4) 3304 { 3305 reg = next_fcc_reg; 3306 prev_args[reg][0] = x; 3307 prev_args[reg][1] = y; 3308 next_fcc_reg = (next_fcc_reg + 1) & 3; 3309 } 3310 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG); 3311 } 3312 #else 3313 cc_reg = gen_reg_rtx (mode); 3314 #endif /* ! experiment */ 3315 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT) 3316 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG); 3317 else 3318 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG); 3319 3320 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this 3321 will only result in an unrecognizable insn so no point in asserting. */ 3322 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y))); 3323 3324 return cc_reg; 3325 } 3326 3327 3328 /* Emit the compare insn and return the CC reg for the comparison in CMP. */ 3329 3330 rtx 3331 gen_compare_reg (rtx cmp) 3332 { 3333 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1)); 3334 } 3335 3336 /* This function is used for v9 only. 3337 DEST is the target of the Scc insn. 3338 CODE is the code for an Scc's comparison. 3339 X and Y are the values we compare. 3340 3341 This function is needed to turn 3342 3343 (set (reg:SI 110) 3344 (gt (reg:CCX 100 %icc) 3345 (const_int 0))) 3346 into 3347 (set (reg:SI 110) 3348 (gt:DI (reg:CCX 100 %icc) 3349 (const_int 0))) 3350 3351 IE: The instruction recognizer needs to see the mode of the comparison to 3352 find the right instruction. We could use "gt:DI" right in the 3353 define_expand, but leaving it out allows us to handle DI, SI, etc. */ 3354 3355 static int 3356 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y) 3357 { 3358 if (! TARGET_ARCH64 3359 && (GET_MODE (x) == DImode 3360 || GET_MODE (dest) == DImode)) 3361 return 0; 3362 3363 /* Try to use the movrCC insns. */ 3364 if (TARGET_ARCH64 3365 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT 3366 && y == const0_rtx 3367 && v9_regcmp_p (compare_code)) 3368 { 3369 rtx op0 = x; 3370 rtx temp; 3371 3372 /* Special case for op0 != 0. This can be done with one instruction if 3373 dest == x. */ 3374 3375 if (compare_code == NE 3376 && GET_MODE (dest) == DImode 3377 && rtx_equal_p (op0, dest)) 3378 { 3379 emit_insn (gen_rtx_SET (dest, 3380 gen_rtx_IF_THEN_ELSE (DImode, 3381 gen_rtx_fmt_ee (compare_code, DImode, 3382 op0, const0_rtx), 3383 const1_rtx, 3384 dest))); 3385 return 1; 3386 } 3387 3388 if (reg_overlap_mentioned_p (dest, op0)) 3389 { 3390 /* Handle the case where dest == x. 3391 We "early clobber" the result. */ 3392 op0 = gen_reg_rtx (GET_MODE (x)); 3393 emit_move_insn (op0, x); 3394 } 3395 3396 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3397 if (GET_MODE (op0) != DImode) 3398 { 3399 temp = gen_reg_rtx (DImode); 3400 convert_move (temp, op0, 0); 3401 } 3402 else 3403 temp = op0; 3404 emit_insn (gen_rtx_SET (dest, 3405 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3406 gen_rtx_fmt_ee (compare_code, DImode, 3407 temp, const0_rtx), 3408 const1_rtx, 3409 dest))); 3410 return 1; 3411 } 3412 else 3413 { 3414 x = gen_compare_reg_1 (compare_code, x, y); 3415 y = const0_rtx; 3416 3417 emit_insn (gen_rtx_SET (dest, const0_rtx)); 3418 emit_insn (gen_rtx_SET (dest, 3419 gen_rtx_IF_THEN_ELSE (GET_MODE (dest), 3420 gen_rtx_fmt_ee (compare_code, 3421 GET_MODE (x), x, y), 3422 const1_rtx, dest))); 3423 return 1; 3424 } 3425 } 3426 3427 3428 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this 3429 without jumps using the addx/subx instructions. */ 3430 3431 bool 3432 emit_scc_insn (rtx operands[]) 3433 { 3434 rtx tem, x, y; 3435 enum rtx_code code; 3436 machine_mode mode; 3437 3438 /* The quad-word fp compare library routines all return nonzero to indicate 3439 true, which is different from the equivalent libgcc routines, so we must 3440 handle them specially here. */ 3441 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD) 3442 { 3443 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3], 3444 GET_CODE (operands[1])); 3445 operands[2] = XEXP (operands[1], 0); 3446 operands[3] = XEXP (operands[1], 1); 3447 } 3448 3449 code = GET_CODE (operands[1]); 3450 x = operands[2]; 3451 y = operands[3]; 3452 mode = GET_MODE (x); 3453 3454 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has 3455 more applications). The exception to this is "reg != 0" which can 3456 be done in one instruction on v9 (so we do it). */ 3457 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode)) 3458 { 3459 if (y != const0_rtx) 3460 x = force_reg (mode, gen_rtx_XOR (mode, x, y)); 3461 3462 rtx pat = gen_rtx_SET (operands[0], 3463 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3464 x, const0_rtx)); 3465 3466 /* If we can use addx/subx or addxc, add a clobber for CC. */ 3467 if (mode == SImode || (code == NE && TARGET_VIS3)) 3468 { 3469 rtx clobber 3470 = gen_rtx_CLOBBER (VOIDmode, 3471 gen_rtx_REG (mode == SImode ? CCmode : CCXmode, 3472 SPARC_ICC_REG)); 3473 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber)); 3474 } 3475 3476 emit_insn (pat); 3477 return true; 3478 } 3479 3480 /* We can do LTU in DImode using the addxc instruction with VIS3. */ 3481 if (TARGET_ARCH64 3482 && mode == DImode 3483 && !((code == LTU || code == GTU) && TARGET_VIS3) 3484 && gen_v9_scc (operands[0], code, x, y)) 3485 return true; 3486 3487 /* We can do LTU and GEU using the addx/subx instructions too. And 3488 for GTU/LEU, if both operands are registers swap them and fall 3489 back to the easy case. */ 3490 if (code == GTU || code == LEU) 3491 { 3492 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 3493 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG)) 3494 { 3495 tem = x; 3496 x = y; 3497 y = tem; 3498 code = swap_condition (code); 3499 } 3500 } 3501 3502 if (code == LTU || code == GEU) 3503 { 3504 emit_insn (gen_rtx_SET (operands[0], 3505 gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3506 gen_compare_reg_1 (code, x, y), 3507 const0_rtx))); 3508 return true; 3509 } 3510 3511 /* All the posibilities to use addx/subx based sequences has been 3512 exhausted, try for a 3 instruction sequence using v9 conditional 3513 moves. */ 3514 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y)) 3515 return true; 3516 3517 /* Nope, do branches. */ 3518 return false; 3519 } 3520 3521 /* Emit a conditional jump insn for the v9 architecture using comparison code 3522 CODE and jump target LABEL. 3523 This function exists to take advantage of the v9 brxx insns. */ 3524 3525 static void 3526 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label) 3527 { 3528 emit_jump_insn (gen_rtx_SET (pc_rtx, 3529 gen_rtx_IF_THEN_ELSE (VOIDmode, 3530 gen_rtx_fmt_ee (code, GET_MODE (op0), 3531 op0, const0_rtx), 3532 gen_rtx_LABEL_REF (VOIDmode, label), 3533 pc_rtx))); 3534 } 3535 3536 /* Emit a conditional jump insn for the UA2011 architecture using 3537 comparison code CODE and jump target LABEL. This function exists 3538 to take advantage of the UA2011 Compare and Branch insns. */ 3539 3540 static void 3541 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label) 3542 { 3543 rtx if_then_else; 3544 3545 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode, 3546 gen_rtx_fmt_ee(code, GET_MODE(op0), 3547 op0, op1), 3548 gen_rtx_LABEL_REF (VOIDmode, label), 3549 pc_rtx); 3550 3551 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else)); 3552 } 3553 3554 void 3555 emit_conditional_branch_insn (rtx operands[]) 3556 { 3557 /* The quad-word fp compare library routines all return nonzero to indicate 3558 true, which is different from the equivalent libgcc routines, so we must 3559 handle them specially here. */ 3560 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD) 3561 { 3562 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2], 3563 GET_CODE (operands[0])); 3564 operands[1] = XEXP (operands[0], 0); 3565 operands[2] = XEXP (operands[0], 1); 3566 } 3567 3568 /* If we can tell early on that the comparison is against a constant 3569 that won't fit in the 5-bit signed immediate field of a cbcond, 3570 use one of the other v9 conditional branch sequences. */ 3571 if (TARGET_CBCOND 3572 && GET_CODE (operands[1]) == REG 3573 && (GET_MODE (operands[1]) == SImode 3574 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode)) 3575 && (GET_CODE (operands[2]) != CONST_INT 3576 || SPARC_SIMM5_P (INTVAL (operands[2])))) 3577 { 3578 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]); 3579 return; 3580 } 3581 3582 if (TARGET_ARCH64 && operands[2] == const0_rtx 3583 && GET_CODE (operands[1]) == REG 3584 && GET_MODE (operands[1]) == DImode) 3585 { 3586 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]); 3587 return; 3588 } 3589 3590 operands[1] = gen_compare_reg (operands[0]); 3591 operands[2] = const0_rtx; 3592 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode, 3593 operands[1], operands[2]); 3594 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2], 3595 operands[3])); 3596 } 3597 3598 3599 /* Generate a DFmode part of a hard TFmode register. 3600 REG is the TFmode hard register, LOW is 1 for the 3601 low 64bit of the register and 0 otherwise. 3602 */ 3603 rtx 3604 gen_df_reg (rtx reg, int low) 3605 { 3606 int regno = REGNO (reg); 3607 3608 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0)) 3609 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2; 3610 return gen_rtx_REG (DFmode, regno); 3611 } 3612 3613 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value. 3614 Unlike normal calls, TFmode operands are passed by reference. It is 3615 assumed that no more than 3 operands are required. */ 3616 3617 static void 3618 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands) 3619 { 3620 rtx ret_slot = NULL, arg[3], func_sym; 3621 int i; 3622 3623 /* We only expect to be called for conversions, unary, and binary ops. */ 3624 gcc_assert (nargs == 2 || nargs == 3); 3625 3626 for (i = 0; i < nargs; ++i) 3627 { 3628 rtx this_arg = operands[i]; 3629 rtx this_slot; 3630 3631 /* TFmode arguments and return values are passed by reference. */ 3632 if (GET_MODE (this_arg) == TFmode) 3633 { 3634 int force_stack_temp; 3635 3636 force_stack_temp = 0; 3637 if (TARGET_BUGGY_QP_LIB && i == 0) 3638 force_stack_temp = 1; 3639 3640 if (GET_CODE (this_arg) == MEM 3641 && ! force_stack_temp) 3642 { 3643 tree expr = MEM_EXPR (this_arg); 3644 if (expr) 3645 mark_addressable (expr); 3646 this_arg = XEXP (this_arg, 0); 3647 } 3648 else if (CONSTANT_P (this_arg) 3649 && ! force_stack_temp) 3650 { 3651 this_slot = force_const_mem (TFmode, this_arg); 3652 this_arg = XEXP (this_slot, 0); 3653 } 3654 else 3655 { 3656 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode)); 3657 3658 /* Operand 0 is the return value. We'll copy it out later. */ 3659 if (i > 0) 3660 emit_move_insn (this_slot, this_arg); 3661 else 3662 ret_slot = this_slot; 3663 3664 this_arg = XEXP (this_slot, 0); 3665 } 3666 } 3667 3668 arg[i] = this_arg; 3669 } 3670 3671 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name); 3672 3673 if (GET_MODE (operands[0]) == TFmode) 3674 { 3675 if (nargs == 2) 3676 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3677 arg[0], GET_MODE (arg[0]), 3678 arg[1], GET_MODE (arg[1])); 3679 else 3680 emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3681 arg[0], GET_MODE (arg[0]), 3682 arg[1], GET_MODE (arg[1]), 3683 arg[2], GET_MODE (arg[2])); 3684 3685 if (ret_slot) 3686 emit_move_insn (operands[0], ret_slot); 3687 } 3688 else 3689 { 3690 rtx ret; 3691 3692 gcc_assert (nargs == 2); 3693 3694 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL, 3695 GET_MODE (operands[0]), 3696 arg[1], GET_MODE (arg[1])); 3697 3698 if (ret != operands[0]) 3699 emit_move_insn (operands[0], ret); 3700 } 3701 } 3702 3703 /* Expand soft-float TFmode calls to sparc abi routines. */ 3704 3705 static void 3706 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands) 3707 { 3708 const char *func; 3709 3710 switch (code) 3711 { 3712 case PLUS: 3713 func = "_Qp_add"; 3714 break; 3715 case MINUS: 3716 func = "_Qp_sub"; 3717 break; 3718 case MULT: 3719 func = "_Qp_mul"; 3720 break; 3721 case DIV: 3722 func = "_Qp_div"; 3723 break; 3724 default: 3725 gcc_unreachable (); 3726 } 3727 3728 emit_soft_tfmode_libcall (func, 3, operands); 3729 } 3730 3731 static void 3732 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands) 3733 { 3734 const char *func; 3735 3736 gcc_assert (code == SQRT); 3737 func = "_Qp_sqrt"; 3738 3739 emit_soft_tfmode_libcall (func, 2, operands); 3740 } 3741 3742 static void 3743 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands) 3744 { 3745 const char *func; 3746 3747 switch (code) 3748 { 3749 case FLOAT_EXTEND: 3750 switch (GET_MODE (operands[1])) 3751 { 3752 case E_SFmode: 3753 func = "_Qp_stoq"; 3754 break; 3755 case E_DFmode: 3756 func = "_Qp_dtoq"; 3757 break; 3758 default: 3759 gcc_unreachable (); 3760 } 3761 break; 3762 3763 case FLOAT_TRUNCATE: 3764 switch (GET_MODE (operands[0])) 3765 { 3766 case E_SFmode: 3767 func = "_Qp_qtos"; 3768 break; 3769 case E_DFmode: 3770 func = "_Qp_qtod"; 3771 break; 3772 default: 3773 gcc_unreachable (); 3774 } 3775 break; 3776 3777 case FLOAT: 3778 switch (GET_MODE (operands[1])) 3779 { 3780 case E_SImode: 3781 func = "_Qp_itoq"; 3782 if (TARGET_ARCH64) 3783 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]); 3784 break; 3785 case E_DImode: 3786 func = "_Qp_xtoq"; 3787 break; 3788 default: 3789 gcc_unreachable (); 3790 } 3791 break; 3792 3793 case UNSIGNED_FLOAT: 3794 switch (GET_MODE (operands[1])) 3795 { 3796 case E_SImode: 3797 func = "_Qp_uitoq"; 3798 if (TARGET_ARCH64) 3799 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]); 3800 break; 3801 case E_DImode: 3802 func = "_Qp_uxtoq"; 3803 break; 3804 default: 3805 gcc_unreachable (); 3806 } 3807 break; 3808 3809 case FIX: 3810 switch (GET_MODE (operands[0])) 3811 { 3812 case E_SImode: 3813 func = "_Qp_qtoi"; 3814 break; 3815 case E_DImode: 3816 func = "_Qp_qtox"; 3817 break; 3818 default: 3819 gcc_unreachable (); 3820 } 3821 break; 3822 3823 case UNSIGNED_FIX: 3824 switch (GET_MODE (operands[0])) 3825 { 3826 case E_SImode: 3827 func = "_Qp_qtoui"; 3828 break; 3829 case E_DImode: 3830 func = "_Qp_qtoux"; 3831 break; 3832 default: 3833 gcc_unreachable (); 3834 } 3835 break; 3836 3837 default: 3838 gcc_unreachable (); 3839 } 3840 3841 emit_soft_tfmode_libcall (func, 2, operands); 3842 } 3843 3844 /* Expand a hard-float tfmode operation. All arguments must be in 3845 registers. */ 3846 3847 static void 3848 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands) 3849 { 3850 rtx op, dest; 3851 3852 if (GET_RTX_CLASS (code) == RTX_UNARY) 3853 { 3854 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3855 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]); 3856 } 3857 else 3858 { 3859 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]); 3860 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); 3861 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]), 3862 operands[1], operands[2]); 3863 } 3864 3865 if (register_operand (operands[0], VOIDmode)) 3866 dest = operands[0]; 3867 else 3868 dest = gen_reg_rtx (GET_MODE (operands[0])); 3869 3870 emit_insn (gen_rtx_SET (dest, op)); 3871 3872 if (dest != operands[0]) 3873 emit_move_insn (operands[0], dest); 3874 } 3875 3876 void 3877 emit_tfmode_binop (enum rtx_code code, rtx *operands) 3878 { 3879 if (TARGET_HARD_QUAD) 3880 emit_hard_tfmode_operation (code, operands); 3881 else 3882 emit_soft_tfmode_binop (code, operands); 3883 } 3884 3885 void 3886 emit_tfmode_unop (enum rtx_code code, rtx *operands) 3887 { 3888 if (TARGET_HARD_QUAD) 3889 emit_hard_tfmode_operation (code, operands); 3890 else 3891 emit_soft_tfmode_unop (code, operands); 3892 } 3893 3894 void 3895 emit_tfmode_cvt (enum rtx_code code, rtx *operands) 3896 { 3897 if (TARGET_HARD_QUAD) 3898 emit_hard_tfmode_operation (code, operands); 3899 else 3900 emit_soft_tfmode_cvt (code, operands); 3901 } 3902 3903 /* Return nonzero if a branch/jump/call instruction will be emitting 3904 nop into its delay slot. */ 3905 3906 int 3907 empty_delay_slot (rtx_insn *insn) 3908 { 3909 rtx seq; 3910 3911 /* If no previous instruction (should not happen), return true. */ 3912 if (PREV_INSN (insn) == NULL) 3913 return 1; 3914 3915 seq = NEXT_INSN (PREV_INSN (insn)); 3916 if (GET_CODE (PATTERN (seq)) == SEQUENCE) 3917 return 0; 3918 3919 return 1; 3920 } 3921 3922 /* Return nonzero if we should emit a nop after a cbcond instruction. 3923 The cbcond instruction does not have a delay slot, however there is 3924 a severe performance penalty if a control transfer appears right 3925 after a cbcond. Therefore we emit a nop when we detect this 3926 situation. */ 3927 3928 int 3929 emit_cbcond_nop (rtx_insn *insn) 3930 { 3931 rtx next = next_active_insn (insn); 3932 3933 if (!next) 3934 return 1; 3935 3936 if (NONJUMP_INSN_P (next) 3937 && GET_CODE (PATTERN (next)) == SEQUENCE) 3938 next = XVECEXP (PATTERN (next), 0, 0); 3939 else if (CALL_P (next) 3940 && GET_CODE (PATTERN (next)) == PARALLEL) 3941 { 3942 rtx delay = XVECEXP (PATTERN (next), 0, 1); 3943 3944 if (GET_CODE (delay) == RETURN) 3945 { 3946 /* It's a sibling call. Do not emit the nop if we're going 3947 to emit something other than the jump itself as the first 3948 instruction of the sibcall sequence. */ 3949 if (sparc_leaf_function_p || TARGET_FLAT) 3950 return 0; 3951 } 3952 } 3953 3954 if (NONJUMP_INSN_P (next)) 3955 return 0; 3956 3957 return 1; 3958 } 3959 3960 /* Return nonzero if TRIAL can go into the call delay slot. */ 3961 3962 int 3963 eligible_for_call_delay (rtx_insn *trial) 3964 { 3965 rtx pat; 3966 3967 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 3968 return 0; 3969 3970 /* The only problematic cases are TLS sequences with Sun as/ld. */ 3971 if ((TARGET_GNU_TLS && HAVE_GNU_LD) || !TARGET_TLS) 3972 return 1; 3973 3974 pat = PATTERN (trial); 3975 3976 /* We must reject tgd_add{32|64}, i.e. 3977 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD))) 3978 and tldm_add{32|64}, i.e. 3979 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM))) 3980 for Sun as/ld. */ 3981 if (GET_CODE (pat) == SET 3982 && GET_CODE (SET_SRC (pat)) == PLUS) 3983 { 3984 rtx unspec = XEXP (SET_SRC (pat), 1); 3985 3986 if (GET_CODE (unspec) == UNSPEC 3987 && (XINT (unspec, 1) == UNSPEC_TLSGD 3988 || XINT (unspec, 1) == UNSPEC_TLSLDM)) 3989 return 0; 3990 } 3991 3992 return 1; 3993 } 3994 3995 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore' 3996 instruction. RETURN_P is true if the v9 variant 'return' is to be 3997 considered in the test too. 3998 3999 TRIAL must be a SET whose destination is a REG appropriate for the 4000 'restore' instruction or, if RETURN_P is true, for the 'return' 4001 instruction. */ 4002 4003 static int 4004 eligible_for_restore_insn (rtx trial, bool return_p) 4005 { 4006 rtx pat = PATTERN (trial); 4007 rtx src = SET_SRC (pat); 4008 bool src_is_freg = false; 4009 rtx src_reg; 4010 4011 /* Since we now can do moves between float and integer registers when 4012 VIS3 is enabled, we have to catch this case. We can allow such 4013 moves when doing a 'return' however. */ 4014 src_reg = src; 4015 if (GET_CODE (src_reg) == SUBREG) 4016 src_reg = SUBREG_REG (src_reg); 4017 if (GET_CODE (src_reg) == REG 4018 && SPARC_FP_REG_P (REGNO (src_reg))) 4019 src_is_freg = true; 4020 4021 /* The 'restore src,%g0,dest' pattern for word mode and below. */ 4022 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4023 && arith_operand (src, GET_MODE (src)) 4024 && ! src_is_freg) 4025 { 4026 if (TARGET_ARCH64) 4027 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4028 else 4029 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode); 4030 } 4031 4032 /* The 'restore src,%g0,dest' pattern for double-word mode. */ 4033 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT 4034 && arith_double_operand (src, GET_MODE (src)) 4035 && ! src_is_freg) 4036 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode); 4037 4038 /* The 'restore src,%g0,dest' pattern for float if no FPU. */ 4039 else if (! TARGET_FPU && register_operand (src, SFmode)) 4040 return 1; 4041 4042 /* The 'restore src,%g0,dest' pattern for double if no FPU. */ 4043 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode)) 4044 return 1; 4045 4046 /* If we have the 'return' instruction, anything that does not use 4047 local or output registers and can go into a delay slot wins. */ 4048 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1)) 4049 return 1; 4050 4051 /* The 'restore src1,src2,dest' pattern for SImode. */ 4052 else if (GET_CODE (src) == PLUS 4053 && register_operand (XEXP (src, 0), SImode) 4054 && arith_operand (XEXP (src, 1), SImode)) 4055 return 1; 4056 4057 /* The 'restore src1,src2,dest' pattern for DImode. */ 4058 else if (GET_CODE (src) == PLUS 4059 && register_operand (XEXP (src, 0), DImode) 4060 && arith_double_operand (XEXP (src, 1), DImode)) 4061 return 1; 4062 4063 /* The 'restore src1,%lo(src2),dest' pattern. */ 4064 else if (GET_CODE (src) == LO_SUM 4065 && ! TARGET_CM_MEDMID 4066 && ((register_operand (XEXP (src, 0), SImode) 4067 && immediate_operand (XEXP (src, 1), SImode)) 4068 || (TARGET_ARCH64 4069 && register_operand (XEXP (src, 0), DImode) 4070 && immediate_operand (XEXP (src, 1), DImode)))) 4071 return 1; 4072 4073 /* The 'restore src,src,dest' pattern. */ 4074 else if (GET_CODE (src) == ASHIFT 4075 && (register_operand (XEXP (src, 0), SImode) 4076 || register_operand (XEXP (src, 0), DImode)) 4077 && XEXP (src, 1) == const1_rtx) 4078 return 1; 4079 4080 return 0; 4081 } 4082 4083 /* Return nonzero if TRIAL can go into the function return's delay slot. */ 4084 4085 int 4086 eligible_for_return_delay (rtx_insn *trial) 4087 { 4088 int regno; 4089 rtx pat; 4090 4091 /* If the function uses __builtin_eh_return, the eh_return machinery 4092 occupies the delay slot. */ 4093 if (crtl->calls_eh_return) 4094 return 0; 4095 4096 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4097 return 0; 4098 4099 /* In the case of a leaf or flat function, anything can go into the slot. */ 4100 if (sparc_leaf_function_p || TARGET_FLAT) 4101 return 1; 4102 4103 if (!NONJUMP_INSN_P (trial)) 4104 return 0; 4105 4106 pat = PATTERN (trial); 4107 if (GET_CODE (pat) == PARALLEL) 4108 { 4109 int i; 4110 4111 if (! TARGET_V9) 4112 return 0; 4113 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--) 4114 { 4115 rtx expr = XVECEXP (pat, 0, i); 4116 if (GET_CODE (expr) != SET) 4117 return 0; 4118 if (GET_CODE (SET_DEST (expr)) != REG) 4119 return 0; 4120 regno = REGNO (SET_DEST (expr)); 4121 if (regno >= 8 && regno < 24) 4122 return 0; 4123 } 4124 return !epilogue_renumber (&pat, 1); 4125 } 4126 4127 if (GET_CODE (pat) != SET) 4128 return 0; 4129 4130 if (GET_CODE (SET_DEST (pat)) != REG) 4131 return 0; 4132 4133 regno = REGNO (SET_DEST (pat)); 4134 4135 /* Otherwise, only operations which can be done in tandem with 4136 a `restore' or `return' insn can go into the delay slot. */ 4137 if (regno >= 8 && regno < 24) 4138 return 0; 4139 4140 /* If this instruction sets up floating point register and we have a return 4141 instruction, it can probably go in. But restore will not work 4142 with FP_REGS. */ 4143 if (! SPARC_INT_REG_P (regno)) 4144 return TARGET_V9 && !epilogue_renumber (&pat, 1); 4145 4146 return eligible_for_restore_insn (trial, true); 4147 } 4148 4149 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */ 4150 4151 int 4152 eligible_for_sibcall_delay (rtx_insn *trial) 4153 { 4154 rtx pat; 4155 4156 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE) 4157 return 0; 4158 4159 if (!NONJUMP_INSN_P (trial)) 4160 return 0; 4161 4162 pat = PATTERN (trial); 4163 4164 if (sparc_leaf_function_p || TARGET_FLAT) 4165 { 4166 /* If the tail call is done using the call instruction, 4167 we have to restore %o7 in the delay slot. */ 4168 if (LEAF_SIBCALL_SLOT_RESERVED_P) 4169 return 0; 4170 4171 /* %g1 is used to build the function address */ 4172 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat)) 4173 return 0; 4174 4175 return 1; 4176 } 4177 4178 if (GET_CODE (pat) != SET) 4179 return 0; 4180 4181 /* Otherwise, only operations which can be done in tandem with 4182 a `restore' insn can go into the delay slot. */ 4183 if (GET_CODE (SET_DEST (pat)) != REG 4184 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24) 4185 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat)))) 4186 return 0; 4187 4188 /* If it mentions %o7, it can't go in, because sibcall will clobber it 4189 in most cases. */ 4190 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat)) 4191 return 0; 4192 4193 return eligible_for_restore_insn (trial, false); 4194 } 4195 4196 /* Determine if it's legal to put X into the constant pool. This 4197 is not possible if X contains the address of a symbol that is 4198 not constant (TLS) or not known at final link time (PIC). */ 4199 4200 static bool 4201 sparc_cannot_force_const_mem (machine_mode mode, rtx x) 4202 { 4203 switch (GET_CODE (x)) 4204 { 4205 case CONST_INT: 4206 case CONST_WIDE_INT: 4207 case CONST_DOUBLE: 4208 case CONST_VECTOR: 4209 /* Accept all non-symbolic constants. */ 4210 return false; 4211 4212 case LABEL_REF: 4213 /* Labels are OK iff we are non-PIC. */ 4214 return flag_pic != 0; 4215 4216 case SYMBOL_REF: 4217 /* 'Naked' TLS symbol references are never OK, 4218 non-TLS symbols are OK iff we are non-PIC. */ 4219 if (SYMBOL_REF_TLS_MODEL (x)) 4220 return true; 4221 else 4222 return flag_pic != 0; 4223 4224 case CONST: 4225 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)); 4226 case PLUS: 4227 case MINUS: 4228 return sparc_cannot_force_const_mem (mode, XEXP (x, 0)) 4229 || sparc_cannot_force_const_mem (mode, XEXP (x, 1)); 4230 case UNSPEC: 4231 return true; 4232 default: 4233 gcc_unreachable (); 4234 } 4235 } 4236 4237 /* Global Offset Table support. */ 4238 static GTY(()) rtx got_symbol_rtx = NULL_RTX; 4239 static GTY(()) rtx got_register_rtx = NULL_RTX; 4240 static GTY(()) rtx got_helper_rtx = NULL_RTX; 4241 4242 static GTY(()) bool got_helper_needed = false; 4243 4244 /* Return the SYMBOL_REF for the Global Offset Table. */ 4245 4246 static rtx 4247 sparc_got (void) 4248 { 4249 if (!got_symbol_rtx) 4250 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); 4251 4252 return got_symbol_rtx; 4253 } 4254 4255 /* Wrapper around the load_pcrel_sym{si,di} patterns. */ 4256 4257 static rtx 4258 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2) 4259 { 4260 int orig_flag_pic = flag_pic; 4261 rtx insn; 4262 4263 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */ 4264 flag_pic = 0; 4265 if (TARGET_ARCH64) 4266 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0))); 4267 else 4268 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0))); 4269 flag_pic = orig_flag_pic; 4270 4271 return insn; 4272 } 4273 4274 /* Output the load_pcrel_sym{si,di} patterns. */ 4275 4276 const char * 4277 output_load_pcrel_sym (rtx *operands) 4278 { 4279 if (flag_delayed_branch) 4280 { 4281 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands); 4282 output_asm_insn ("call\t%a2", operands); 4283 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands); 4284 } 4285 else 4286 { 4287 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands); 4288 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands); 4289 output_asm_insn ("call\t%a2", operands); 4290 output_asm_insn (" nop", NULL); 4291 } 4292 4293 if (operands[2] == got_helper_rtx) 4294 got_helper_needed = true; 4295 4296 return ""; 4297 } 4298 4299 #ifdef HAVE_GAS_HIDDEN 4300 # define USE_HIDDEN_LINKONCE 1 4301 #else 4302 # define USE_HIDDEN_LINKONCE 0 4303 #endif 4304 4305 /* Emit code to load the GOT register. */ 4306 4307 void 4308 load_got_register (void) 4309 { 4310 rtx insn; 4311 4312 if (TARGET_VXWORKS_RTP) 4313 { 4314 if (!got_register_rtx) 4315 got_register_rtx = pic_offset_table_rtx; 4316 4317 insn = gen_vxworks_load_got (); 4318 } 4319 else 4320 { 4321 if (!got_register_rtx) 4322 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM); 4323 4324 /* The GOT symbol is subject to a PC-relative relocation so we need a 4325 helper function to add the PC value and thus get the final value. */ 4326 if (!got_helper_rtx) 4327 { 4328 char name[32]; 4329 4330 /* Skip the leading '%' as that cannot be used in a symbol name. */ 4331 if (USE_HIDDEN_LINKONCE) 4332 sprintf (name, "__sparc_get_pc_thunk.%s", 4333 reg_names[REGNO (got_register_rtx)] + 1); 4334 else 4335 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", 4336 REGNO (got_register_rtx)); 4337 4338 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 4339 } 4340 4341 insn 4342 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx); 4343 } 4344 4345 emit_insn (insn); 4346 } 4347 4348 /* Ensure that we are not using patterns that are not OK with PIC. */ 4349 4350 int 4351 check_pic (int i) 4352 { 4353 rtx op; 4354 4355 switch (flag_pic) 4356 { 4357 case 1: 4358 op = recog_data.operand[i]; 4359 gcc_assert (GET_CODE (op) != SYMBOL_REF 4360 && (GET_CODE (op) != CONST 4361 || (GET_CODE (XEXP (op, 0)) == MINUS 4362 && XEXP (XEXP (op, 0), 0) == sparc_got () 4363 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST))); 4364 /* fallthrough */ 4365 case 2: 4366 default: 4367 return 1; 4368 } 4369 } 4370 4371 /* Return true if X is an address which needs a temporary register when 4372 reloaded while generating PIC code. */ 4373 4374 int 4375 pic_address_needs_scratch (rtx x) 4376 { 4377 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */ 4378 if (GET_CODE (x) == CONST 4379 && GET_CODE (XEXP (x, 0)) == PLUS 4380 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 4381 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT 4382 && !SMALL_INT (XEXP (XEXP (x, 0), 1))) 4383 return 1; 4384 4385 return 0; 4386 } 4387 4388 /* Determine if a given RTX is a valid constant. We already know this 4389 satisfies CONSTANT_P. */ 4390 4391 static bool 4392 sparc_legitimate_constant_p (machine_mode mode, rtx x) 4393 { 4394 switch (GET_CODE (x)) 4395 { 4396 case CONST: 4397 case SYMBOL_REF: 4398 if (sparc_tls_referenced_p (x)) 4399 return false; 4400 break; 4401 4402 case CONST_DOUBLE: 4403 /* Floating point constants are generally not ok. 4404 The only exception is 0.0 and all-ones in VIS. */ 4405 if (TARGET_VIS 4406 && SCALAR_FLOAT_MODE_P (mode) 4407 && (const_zero_operand (x, mode) 4408 || const_all_ones_operand (x, mode))) 4409 return true; 4410 4411 return false; 4412 4413 case CONST_VECTOR: 4414 /* Vector constants are generally not ok. 4415 The only exception is 0 or -1 in VIS. */ 4416 if (TARGET_VIS 4417 && (const_zero_operand (x, mode) 4418 || const_all_ones_operand (x, mode))) 4419 return true; 4420 4421 return false; 4422 4423 default: 4424 break; 4425 } 4426 4427 return true; 4428 } 4429 4430 /* Determine if a given RTX is a valid constant address. */ 4431 4432 bool 4433 constant_address_p (rtx x) 4434 { 4435 switch (GET_CODE (x)) 4436 { 4437 case LABEL_REF: 4438 case CONST_INT: 4439 case HIGH: 4440 return true; 4441 4442 case CONST: 4443 if (flag_pic && pic_address_needs_scratch (x)) 4444 return false; 4445 return sparc_legitimate_constant_p (Pmode, x); 4446 4447 case SYMBOL_REF: 4448 return !flag_pic && sparc_legitimate_constant_p (Pmode, x); 4449 4450 default: 4451 return false; 4452 } 4453 } 4454 4455 /* Nonzero if the constant value X is a legitimate general operand 4456 when generating PIC code. It is given that flag_pic is on and 4457 that X satisfies CONSTANT_P. */ 4458 4459 bool 4460 legitimate_pic_operand_p (rtx x) 4461 { 4462 if (pic_address_needs_scratch (x)) 4463 return false; 4464 if (sparc_tls_referenced_p (x)) 4465 return false; 4466 return true; 4467 } 4468 4469 /* Return true if X is a representation of the PIC register. */ 4470 4471 static bool 4472 sparc_pic_register_p (rtx x) 4473 { 4474 if (!REG_P (x) || !pic_offset_table_rtx) 4475 return false; 4476 4477 if (x == pic_offset_table_rtx) 4478 return true; 4479 4480 if (!HARD_REGISTER_P (pic_offset_table_rtx) 4481 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress) 4482 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx)) 4483 return true; 4484 4485 return false; 4486 } 4487 4488 #define RTX_OK_FOR_OFFSET_P(X, MODE) \ 4489 (CONST_INT_P (X) \ 4490 && INTVAL (X) >= -0x1000 \ 4491 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE))) 4492 4493 #define RTX_OK_FOR_OLO10_P(X, MODE) \ 4494 (CONST_INT_P (X) \ 4495 && INTVAL (X) >= -0x1000 \ 4496 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE))) 4497 4498 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook. 4499 4500 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT 4501 ordinarily. This changes a bit when generating PIC. */ 4502 4503 static bool 4504 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict) 4505 { 4506 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL; 4507 4508 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 4509 rs1 = addr; 4510 else if (GET_CODE (addr) == PLUS) 4511 { 4512 rs1 = XEXP (addr, 0); 4513 rs2 = XEXP (addr, 1); 4514 4515 /* Canonicalize. REG comes first, if there are no regs, 4516 LO_SUM comes first. */ 4517 if (!REG_P (rs1) 4518 && GET_CODE (rs1) != SUBREG 4519 && (REG_P (rs2) 4520 || GET_CODE (rs2) == SUBREG 4521 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM))) 4522 { 4523 rs1 = XEXP (addr, 1); 4524 rs2 = XEXP (addr, 0); 4525 } 4526 4527 if ((flag_pic == 1 4528 && sparc_pic_register_p (rs1) 4529 && !REG_P (rs2) 4530 && GET_CODE (rs2) != SUBREG 4531 && GET_CODE (rs2) != LO_SUM 4532 && GET_CODE (rs2) != MEM 4533 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2)) 4534 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode) 4535 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2))) 4536 || ((REG_P (rs1) 4537 || GET_CODE (rs1) == SUBREG) 4538 && RTX_OK_FOR_OFFSET_P (rs2, mode))) 4539 { 4540 imm1 = rs2; 4541 rs2 = NULL; 4542 } 4543 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG) 4544 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG)) 4545 { 4546 /* We prohibit REG + REG for TFmode when there are no quad move insns 4547 and we consequently need to split. We do this because REG+REG 4548 is not an offsettable address. If we get the situation in reload 4549 where source and destination of a movtf pattern are both MEMs with 4550 REG+REG address, then only one of them gets converted to an 4551 offsettable address. */ 4552 if (mode == TFmode 4553 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) 4554 return 0; 4555 4556 /* Likewise for TImode, but in all cases. */ 4557 if (mode == TImode) 4558 return 0; 4559 4560 /* We prohibit REG + REG on ARCH32 if not optimizing for 4561 DFmode/DImode because then mem_min_alignment is likely to be zero 4562 after reload and the forced split would lack a matching splitter 4563 pattern. */ 4564 if (TARGET_ARCH32 && !optimize 4565 && (mode == DFmode || mode == DImode)) 4566 return 0; 4567 } 4568 else if (USE_AS_OFFSETABLE_LO10 4569 && GET_CODE (rs1) == LO_SUM 4570 && TARGET_ARCH64 4571 && ! TARGET_CM_MEDMID 4572 && RTX_OK_FOR_OLO10_P (rs2, mode)) 4573 { 4574 rs2 = NULL; 4575 imm1 = XEXP (rs1, 1); 4576 rs1 = XEXP (rs1, 0); 4577 if (!CONSTANT_P (imm1) 4578 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4579 return 0; 4580 } 4581 } 4582 else if (GET_CODE (addr) == LO_SUM) 4583 { 4584 rs1 = XEXP (addr, 0); 4585 imm1 = XEXP (addr, 1); 4586 4587 if (!CONSTANT_P (imm1) 4588 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1))) 4589 return 0; 4590 4591 /* We can't allow TFmode in 32-bit mode, because an offset greater 4592 than the alignment (8) may cause the LO_SUM to overflow. */ 4593 if (mode == TFmode && TARGET_ARCH32) 4594 return 0; 4595 4596 /* During reload, accept the HIGH+LO_SUM construct generated by 4597 sparc_legitimize_reload_address. */ 4598 if (reload_in_progress 4599 && GET_CODE (rs1) == HIGH 4600 && XEXP (rs1, 0) == imm1) 4601 return 1; 4602 } 4603 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr)) 4604 return 1; 4605 else 4606 return 0; 4607 4608 if (GET_CODE (rs1) == SUBREG) 4609 rs1 = SUBREG_REG (rs1); 4610 if (!REG_P (rs1)) 4611 return 0; 4612 4613 if (rs2) 4614 { 4615 if (GET_CODE (rs2) == SUBREG) 4616 rs2 = SUBREG_REG (rs2); 4617 if (!REG_P (rs2)) 4618 return 0; 4619 } 4620 4621 if (strict) 4622 { 4623 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1)) 4624 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2)))) 4625 return 0; 4626 } 4627 else 4628 { 4629 if ((! SPARC_INT_REG_P (REGNO (rs1)) 4630 && REGNO (rs1) != FRAME_POINTER_REGNUM 4631 && REGNO (rs1) < FIRST_PSEUDO_REGISTER) 4632 || (rs2 4633 && (! SPARC_INT_REG_P (REGNO (rs2)) 4634 && REGNO (rs2) != FRAME_POINTER_REGNUM 4635 && REGNO (rs2) < FIRST_PSEUDO_REGISTER))) 4636 return 0; 4637 } 4638 return 1; 4639 } 4640 4641 /* Return the SYMBOL_REF for the tls_get_addr function. */ 4642 4643 static GTY(()) rtx sparc_tls_symbol = NULL_RTX; 4644 4645 static rtx 4646 sparc_tls_get_addr (void) 4647 { 4648 if (!sparc_tls_symbol) 4649 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr"); 4650 4651 return sparc_tls_symbol; 4652 } 4653 4654 /* Return the Global Offset Table to be used in TLS mode. */ 4655 4656 static rtx 4657 sparc_tls_got (void) 4658 { 4659 /* In PIC mode, this is just the PIC offset table. */ 4660 if (flag_pic) 4661 { 4662 crtl->uses_pic_offset_table = 1; 4663 return pic_offset_table_rtx; 4664 } 4665 4666 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for 4667 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */ 4668 if (TARGET_SUN_TLS && TARGET_ARCH32) 4669 { 4670 load_got_register (); 4671 return got_register_rtx; 4672 } 4673 4674 /* In all other cases, we load a new pseudo with the GOT symbol. */ 4675 return copy_to_reg (sparc_got ()); 4676 } 4677 4678 /* Return true if X contains a thread-local symbol. */ 4679 4680 static bool 4681 sparc_tls_referenced_p (rtx x) 4682 { 4683 if (!TARGET_HAVE_TLS) 4684 return false; 4685 4686 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) 4687 x = XEXP (XEXP (x, 0), 0); 4688 4689 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) 4690 return true; 4691 4692 /* That's all we handle in sparc_legitimize_tls_address for now. */ 4693 return false; 4694 } 4695 4696 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute 4697 this (thread-local) address. */ 4698 4699 static rtx 4700 sparc_legitimize_tls_address (rtx addr) 4701 { 4702 rtx temp1, temp2, temp3, ret, o0, got; 4703 rtx_insn *insn; 4704 4705 gcc_assert (can_create_pseudo_p ()); 4706 4707 if (GET_CODE (addr) == SYMBOL_REF) 4708 /* Although the various sethi/or sequences generate SImode values, many of 4709 them can be transformed by the linker when relaxing and, if relaxing to 4710 local-exec, will become a sethi/xor pair, which is signed and therefore 4711 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these 4712 values be spilled onto the stack in 64-bit mode. */ 4713 switch (SYMBOL_REF_TLS_MODEL (addr)) 4714 { 4715 case TLS_MODEL_GLOBAL_DYNAMIC: 4716 start_sequence (); 4717 temp1 = gen_reg_rtx (Pmode); 4718 temp2 = gen_reg_rtx (Pmode); 4719 ret = gen_reg_rtx (Pmode); 4720 o0 = gen_rtx_REG (Pmode, 8); 4721 got = sparc_tls_got (); 4722 if (TARGET_ARCH32) 4723 { 4724 emit_insn (gen_tgd_hi22si (temp1, addr)); 4725 emit_insn (gen_tgd_lo10si (temp2, temp1, addr)); 4726 emit_insn (gen_tgd_addsi (o0, got, temp2, addr)); 4727 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (), 4728 addr, const1_rtx)); 4729 } 4730 else 4731 { 4732 emit_insn (gen_tgd_hi22di (temp1, addr)); 4733 emit_insn (gen_tgd_lo10di (temp2, temp1, addr)); 4734 emit_insn (gen_tgd_adddi (o0, got, temp2, addr)); 4735 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (), 4736 addr, const1_rtx)); 4737 } 4738 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4739 RTL_CONST_CALL_P (insn) = 1; 4740 insn = get_insns (); 4741 end_sequence (); 4742 emit_libcall_block (insn, ret, o0, addr); 4743 break; 4744 4745 case TLS_MODEL_LOCAL_DYNAMIC: 4746 start_sequence (); 4747 temp1 = gen_reg_rtx (Pmode); 4748 temp2 = gen_reg_rtx (Pmode); 4749 temp3 = gen_reg_rtx (Pmode); 4750 ret = gen_reg_rtx (Pmode); 4751 o0 = gen_rtx_REG (Pmode, 8); 4752 got = sparc_tls_got (); 4753 if (TARGET_ARCH32) 4754 { 4755 emit_insn (gen_tldm_hi22si (temp1)); 4756 emit_insn (gen_tldm_lo10si (temp2, temp1)); 4757 emit_insn (gen_tldm_addsi (o0, got, temp2)); 4758 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (), 4759 const1_rtx)); 4760 } 4761 else 4762 { 4763 emit_insn (gen_tldm_hi22di (temp1)); 4764 emit_insn (gen_tldm_lo10di (temp2, temp1)); 4765 emit_insn (gen_tldm_adddi (o0, got, temp2)); 4766 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (), 4767 const1_rtx)); 4768 } 4769 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0); 4770 RTL_CONST_CALL_P (insn) = 1; 4771 insn = get_insns (); 4772 end_sequence (); 4773 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to 4774 share the LD_BASE result with other LD model accesses. */ 4775 emit_libcall_block (insn, temp3, o0, 4776 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), 4777 UNSPEC_TLSLD_BASE)); 4778 temp1 = gen_reg_rtx (Pmode); 4779 temp2 = gen_reg_rtx (Pmode); 4780 if (TARGET_ARCH32) 4781 { 4782 emit_insn (gen_tldo_hix22si (temp1, addr)); 4783 emit_insn (gen_tldo_lox10si (temp2, temp1, addr)); 4784 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr)); 4785 } 4786 else 4787 { 4788 emit_insn (gen_tldo_hix22di (temp1, addr)); 4789 emit_insn (gen_tldo_lox10di (temp2, temp1, addr)); 4790 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr)); 4791 } 4792 break; 4793 4794 case TLS_MODEL_INITIAL_EXEC: 4795 temp1 = gen_reg_rtx (Pmode); 4796 temp2 = gen_reg_rtx (Pmode); 4797 temp3 = gen_reg_rtx (Pmode); 4798 got = sparc_tls_got (); 4799 if (TARGET_ARCH32) 4800 { 4801 emit_insn (gen_tie_hi22si (temp1, addr)); 4802 emit_insn (gen_tie_lo10si (temp2, temp1, addr)); 4803 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr)); 4804 } 4805 else 4806 { 4807 emit_insn (gen_tie_hi22di (temp1, addr)); 4808 emit_insn (gen_tie_lo10di (temp2, temp1, addr)); 4809 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr)); 4810 } 4811 if (TARGET_SUN_TLS) 4812 { 4813 ret = gen_reg_rtx (Pmode); 4814 if (TARGET_ARCH32) 4815 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7), 4816 temp3, addr)); 4817 else 4818 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7), 4819 temp3, addr)); 4820 } 4821 else 4822 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3); 4823 break; 4824 4825 case TLS_MODEL_LOCAL_EXEC: 4826 temp1 = gen_reg_rtx (Pmode); 4827 temp2 = gen_reg_rtx (Pmode); 4828 if (TARGET_ARCH32) 4829 { 4830 emit_insn (gen_tle_hix22si (temp1, addr)); 4831 emit_insn (gen_tle_lox10si (temp2, temp1, addr)); 4832 } 4833 else 4834 { 4835 emit_insn (gen_tle_hix22di (temp1, addr)); 4836 emit_insn (gen_tle_lox10di (temp2, temp1, addr)); 4837 } 4838 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2); 4839 break; 4840 4841 default: 4842 gcc_unreachable (); 4843 } 4844 4845 else if (GET_CODE (addr) == CONST) 4846 { 4847 rtx base, offset; 4848 4849 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS); 4850 4851 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0)); 4852 offset = XEXP (XEXP (addr, 0), 1); 4853 4854 base = force_operand (base, NULL_RTX); 4855 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset))) 4856 offset = force_reg (Pmode, offset); 4857 ret = gen_rtx_PLUS (Pmode, base, offset); 4858 } 4859 4860 else 4861 gcc_unreachable (); /* for now ... */ 4862 4863 return ret; 4864 } 4865 4866 /* Legitimize PIC addresses. If the address is already position-independent, 4867 we return ORIG. Newly generated position-independent addresses go into a 4868 reg. This is REG if nonzero, otherwise we allocate register(s) as 4869 necessary. */ 4870 4871 static rtx 4872 sparc_legitimize_pic_address (rtx orig, rtx reg) 4873 { 4874 if (GET_CODE (orig) == SYMBOL_REF 4875 /* See the comment in sparc_expand_move. */ 4876 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig))) 4877 { 4878 bool gotdata_op = false; 4879 rtx pic_ref, address; 4880 rtx_insn *insn; 4881 4882 if (!reg) 4883 { 4884 gcc_assert (can_create_pseudo_p ()); 4885 reg = gen_reg_rtx (Pmode); 4886 } 4887 4888 if (flag_pic == 2) 4889 { 4890 /* If not during reload, allocate another temp reg here for loading 4891 in the address, so that these instructions can be optimized 4892 properly. */ 4893 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg; 4894 4895 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse 4896 won't get confused into thinking that these two instructions 4897 are loading in the true address of the symbol. If in the 4898 future a PIC rtx exists, that should be used instead. */ 4899 if (TARGET_ARCH64) 4900 { 4901 emit_insn (gen_movdi_high_pic (temp_reg, orig)); 4902 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig)); 4903 } 4904 else 4905 { 4906 emit_insn (gen_movsi_high_pic (temp_reg, orig)); 4907 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig)); 4908 } 4909 4910 address = temp_reg; 4911 gotdata_op = true; 4912 } 4913 else 4914 address = orig; 4915 4916 crtl->uses_pic_offset_table = 1; 4917 if (gotdata_op) 4918 { 4919 if (TARGET_ARCH64) 4920 insn = emit_insn (gen_movdi_pic_gotdata_op (reg, 4921 pic_offset_table_rtx, 4922 address, orig)); 4923 else 4924 insn = emit_insn (gen_movsi_pic_gotdata_op (reg, 4925 pic_offset_table_rtx, 4926 address, orig)); 4927 } 4928 else 4929 { 4930 pic_ref 4931 = gen_const_mem (Pmode, 4932 gen_rtx_PLUS (Pmode, 4933 pic_offset_table_rtx, address)); 4934 insn = emit_move_insn (reg, pic_ref); 4935 } 4936 4937 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4938 by loop. */ 4939 set_unique_reg_note (insn, REG_EQUAL, orig); 4940 return reg; 4941 } 4942 else if (GET_CODE (orig) == CONST) 4943 { 4944 rtx base, offset; 4945 4946 if (GET_CODE (XEXP (orig, 0)) == PLUS 4947 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0))) 4948 return orig; 4949 4950 if (!reg) 4951 { 4952 gcc_assert (can_create_pseudo_p ()); 4953 reg = gen_reg_rtx (Pmode); 4954 } 4955 4956 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); 4957 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg); 4958 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), 4959 base == reg ? NULL_RTX : reg); 4960 4961 if (GET_CODE (offset) == CONST_INT) 4962 { 4963 if (SMALL_INT (offset)) 4964 return plus_constant (Pmode, base, INTVAL (offset)); 4965 else if (can_create_pseudo_p ()) 4966 offset = force_reg (Pmode, offset); 4967 else 4968 /* If we reach here, then something is seriously wrong. */ 4969 gcc_unreachable (); 4970 } 4971 return gen_rtx_PLUS (Pmode, base, offset); 4972 } 4973 else if (GET_CODE (orig) == LABEL_REF) 4974 /* ??? We ought to be checking that the register is live instead, in case 4975 it is eliminated. */ 4976 crtl->uses_pic_offset_table = 1; 4977 4978 return orig; 4979 } 4980 4981 /* Try machine-dependent ways of modifying an illegitimate address X 4982 to be legitimate. If we find one, return the new, valid address. 4983 4984 OLDX is the address as it was before break_out_memory_refs was called. 4985 In some cases it is useful to look at this to decide what needs to be done. 4986 4987 MODE is the mode of the operand pointed to by X. 4988 4989 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */ 4990 4991 static rtx 4992 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, 4993 machine_mode mode) 4994 { 4995 rtx orig_x = x; 4996 4997 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT) 4998 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 4999 force_operand (XEXP (x, 0), NULL_RTX)); 5000 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT) 5001 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5002 force_operand (XEXP (x, 1), NULL_RTX)); 5003 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS) 5004 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX), 5005 XEXP (x, 1)); 5006 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS) 5007 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5008 force_operand (XEXP (x, 1), NULL_RTX)); 5009 5010 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE)) 5011 return x; 5012 5013 if (sparc_tls_referenced_p (x)) 5014 x = sparc_legitimize_tls_address (x); 5015 else if (flag_pic) 5016 x = sparc_legitimize_pic_address (x, NULL_RTX); 5017 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1))) 5018 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), 5019 copy_to_mode_reg (Pmode, XEXP (x, 1))); 5020 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0))) 5021 x = gen_rtx_PLUS (Pmode, XEXP (x, 1), 5022 copy_to_mode_reg (Pmode, XEXP (x, 0))); 5023 else if (GET_CODE (x) == SYMBOL_REF 5024 || GET_CODE (x) == CONST 5025 || GET_CODE (x) == LABEL_REF) 5026 x = copy_to_suggested_reg (x, NULL_RTX, Pmode); 5027 5028 return x; 5029 } 5030 5031 /* Delegitimize an address that was legitimized by the above function. */ 5032 5033 static rtx 5034 sparc_delegitimize_address (rtx x) 5035 { 5036 x = delegitimize_mem_from_attrs (x); 5037 5038 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC) 5039 switch (XINT (XEXP (x, 1), 1)) 5040 { 5041 case UNSPEC_MOVE_PIC: 5042 case UNSPEC_TLSLE: 5043 x = XVECEXP (XEXP (x, 1), 0, 0); 5044 gcc_assert (GET_CODE (x) == SYMBOL_REF); 5045 break; 5046 default: 5047 break; 5048 } 5049 5050 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */ 5051 if (GET_CODE (x) == MINUS 5052 && sparc_pic_register_p (XEXP (x, 0)) 5053 && GET_CODE (XEXP (x, 1)) == LO_SUM 5054 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC 5055 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL) 5056 { 5057 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0); 5058 gcc_assert (GET_CODE (x) == LABEL_REF 5059 || (GET_CODE (x) == CONST 5060 && GET_CODE (XEXP (x, 0)) == PLUS 5061 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF 5062 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)); 5063 } 5064 5065 return x; 5066 } 5067 5068 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to 5069 replace the input X, or the original X if no replacement is called for. 5070 The output parameter *WIN is 1 if the calling macro should goto WIN, 5071 0 if it should not. 5072 5073 For SPARC, we wish to handle addresses by splitting them into 5074 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference. 5075 This cuts the number of extra insns by one. 5076 5077 Do nothing when generating PIC code and the address is a symbolic 5078 operand or requires a scratch register. */ 5079 5080 rtx 5081 sparc_legitimize_reload_address (rtx x, machine_mode mode, 5082 int opnum, int type, 5083 int ind_levels ATTRIBUTE_UNUSED, int *win) 5084 { 5085 /* Decompose SImode constants into HIGH+LO_SUM. */ 5086 if (CONSTANT_P (x) 5087 && (mode != TFmode || TARGET_ARCH64) 5088 && GET_MODE (x) == SImode 5089 && GET_CODE (x) != LO_SUM 5090 && GET_CODE (x) != HIGH 5091 && sparc_cmodel <= CM_MEDLOW 5092 && !(flag_pic 5093 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x)))) 5094 { 5095 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x); 5096 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5097 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5098 opnum, (enum reload_type)type); 5099 *win = 1; 5100 return x; 5101 } 5102 5103 /* We have to recognize what we have already generated above. */ 5104 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH) 5105 { 5106 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, 5107 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, 5108 opnum, (enum reload_type)type); 5109 *win = 1; 5110 return x; 5111 } 5112 5113 *win = 0; 5114 return x; 5115 } 5116 5117 /* Return true if ADDR (a legitimate address expression) 5118 has an effect that depends on the machine mode it is used for. 5119 5120 In PIC mode, 5121 5122 (mem:HI [%l7+a]) 5123 5124 is not equivalent to 5125 5126 (mem:QI [%l7+a]) (mem:QI [%l7+a+1]) 5127 5128 because [%l7+a+1] is interpreted as the address of (a+1). */ 5129 5130 5131 static bool 5132 sparc_mode_dependent_address_p (const_rtx addr, 5133 addr_space_t as ATTRIBUTE_UNUSED) 5134 { 5135 if (GET_CODE (addr) == PLUS 5136 && sparc_pic_register_p (XEXP (addr, 0)) 5137 && symbolic_operand (XEXP (addr, 1), VOIDmode)) 5138 return true; 5139 5140 return false; 5141 } 5142 5143 /* Emit a call instruction with the pattern given by PAT. ADDR is the 5144 address of the call target. */ 5145 5146 void 5147 sparc_emit_call_insn (rtx pat, rtx addr) 5148 { 5149 rtx_insn *insn; 5150 5151 insn = emit_call_insn (pat); 5152 5153 /* The PIC register is live on entry to VxWorks PIC PLT entries. */ 5154 if (TARGET_VXWORKS_RTP 5155 && flag_pic 5156 && GET_CODE (addr) == SYMBOL_REF 5157 && (SYMBOL_REF_DECL (addr) 5158 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr)) 5159 : !SYMBOL_REF_LOCAL_P (addr))) 5160 { 5161 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx); 5162 crtl->uses_pic_offset_table = 1; 5163 } 5164 } 5165 5166 /* Return 1 if RTX is a MEM which is known to be aligned to at 5167 least a DESIRED byte boundary. */ 5168 5169 int 5170 mem_min_alignment (rtx mem, int desired) 5171 { 5172 rtx addr, base, offset; 5173 5174 /* If it's not a MEM we can't accept it. */ 5175 if (GET_CODE (mem) != MEM) 5176 return 0; 5177 5178 /* Obviously... */ 5179 if (!TARGET_UNALIGNED_DOUBLES 5180 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired) 5181 return 1; 5182 5183 /* ??? The rest of the function predates MEM_ALIGN so 5184 there is probably a bit of redundancy. */ 5185 addr = XEXP (mem, 0); 5186 base = offset = NULL_RTX; 5187 if (GET_CODE (addr) == PLUS) 5188 { 5189 if (GET_CODE (XEXP (addr, 0)) == REG) 5190 { 5191 base = XEXP (addr, 0); 5192 5193 /* What we are saying here is that if the base 5194 REG is aligned properly, the compiler will make 5195 sure any REG based index upon it will be so 5196 as well. */ 5197 if (GET_CODE (XEXP (addr, 1)) == CONST_INT) 5198 offset = XEXP (addr, 1); 5199 else 5200 offset = const0_rtx; 5201 } 5202 } 5203 else if (GET_CODE (addr) == REG) 5204 { 5205 base = addr; 5206 offset = const0_rtx; 5207 } 5208 5209 if (base != NULL_RTX) 5210 { 5211 int regno = REGNO (base); 5212 5213 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM) 5214 { 5215 /* Check if the compiler has recorded some information 5216 about the alignment of the base REG. If reload has 5217 completed, we already matched with proper alignments. 5218 If not running global_alloc, reload might give us 5219 unaligned pointer to local stack though. */ 5220 if (((cfun != 0 5221 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT) 5222 || (optimize && reload_completed)) 5223 && (INTVAL (offset) & (desired - 1)) == 0) 5224 return 1; 5225 } 5226 else 5227 { 5228 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0) 5229 return 1; 5230 } 5231 } 5232 else if (! TARGET_UNALIGNED_DOUBLES 5233 || CONSTANT_P (addr) 5234 || GET_CODE (addr) == LO_SUM) 5235 { 5236 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES 5237 is true, in which case we can only assume that an access is aligned if 5238 it is to a constant address, or the address involves a LO_SUM. */ 5239 return 1; 5240 } 5241 5242 /* An obviously unaligned address. */ 5243 return 0; 5244 } 5245 5246 5247 /* Vectors to keep interesting information about registers where it can easily 5248 be got. We used to use the actual mode value as the bit number, but there 5249 are more than 32 modes now. Instead we use two tables: one indexed by 5250 hard register number, and one indexed by mode. */ 5251 5252 /* The purpose of sparc_mode_class is to shrink the range of modes so that 5253 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is 5254 mapped into one sparc_mode_class mode. */ 5255 5256 enum sparc_mode_class { 5257 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE, 5258 SF_MODE, DF_MODE, TF_MODE, OF_MODE, 5259 CC_MODE, CCFP_MODE 5260 }; 5261 5262 /* Modes for single-word and smaller quantities. */ 5263 #define S_MODES \ 5264 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5265 5266 /* Modes for double-word and smaller quantities. */ 5267 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5268 5269 /* Modes for quad-word and smaller quantities. */ 5270 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE)) 5271 5272 /* Modes for 8-word and smaller quantities. */ 5273 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE)) 5274 5275 /* Modes for single-float quantities. */ 5276 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE)) 5277 5278 /* Modes for double-float and smaller quantities. */ 5279 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5280 5281 /* Modes for quad-float and smaller quantities. */ 5282 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE)) 5283 5284 /* Modes for quad-float pairs and smaller quantities. */ 5285 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE)) 5286 5287 /* Modes for double-float only quantities. */ 5288 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE)) 5289 5290 /* Modes for quad-float and double-float only quantities. */ 5291 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE)) 5292 5293 /* Modes for quad-float pairs and double-float only quantities. */ 5294 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE)) 5295 5296 /* Modes for condition codes. */ 5297 #define CC_MODES (1 << (int) CC_MODE) 5298 #define CCFP_MODES (1 << (int) CCFP_MODE) 5299 5300 /* Value is 1 if register/mode pair is acceptable on sparc. 5301 5302 The funny mixture of D and T modes is because integer operations 5303 do not specially operate on tetra quantities, so non-quad-aligned 5304 registers can hold quadword quantities (except %o4 and %i4 because 5305 they cross fixed registers). 5306 5307 ??? Note that, despite the settings, non-double-aligned parameter 5308 registers can hold double-word quantities in 32-bit mode. */ 5309 5310 /* This points to either the 32-bit or the 64-bit version. */ 5311 static const int *hard_regno_mode_classes; 5312 5313 static const int hard_32bit_mode_classes[] = { 5314 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5315 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5316 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, 5317 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES, 5318 5319 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5320 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5321 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5322 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5323 5324 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5325 and none can hold SFmode/SImode values. */ 5326 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5327 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5328 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5329 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5330 5331 /* %fcc[0123] */ 5332 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5333 5334 /* %icc, %sfp, %gsr */ 5335 CC_MODES, 0, D_MODES 5336 }; 5337 5338 static const int hard_64bit_mode_classes[] = { 5339 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5340 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5341 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5342 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, 5343 5344 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5345 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5346 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES, 5347 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES, 5348 5349 /* FP regs f32 to f63. Only the even numbered registers actually exist, 5350 and none can hold SFmode/SImode values. */ 5351 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5352 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5353 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5354 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0, 5355 5356 /* %fcc[0123] */ 5357 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES, 5358 5359 /* %icc, %sfp, %gsr */ 5360 CC_MODES, 0, D_MODES 5361 }; 5362 5363 static int sparc_mode_class [NUM_MACHINE_MODES]; 5364 5365 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER]; 5366 5367 static void 5368 sparc_init_modes (void) 5369 { 5370 int i; 5371 5372 for (i = 0; i < NUM_MACHINE_MODES; i++) 5373 { 5374 machine_mode m = (machine_mode) i; 5375 unsigned int size = GET_MODE_SIZE (m); 5376 5377 switch (GET_MODE_CLASS (m)) 5378 { 5379 case MODE_INT: 5380 case MODE_PARTIAL_INT: 5381 case MODE_COMPLEX_INT: 5382 if (size < 4) 5383 sparc_mode_class[i] = 1 << (int) H_MODE; 5384 else if (size == 4) 5385 sparc_mode_class[i] = 1 << (int) S_MODE; 5386 else if (size == 8) 5387 sparc_mode_class[i] = 1 << (int) D_MODE; 5388 else if (size == 16) 5389 sparc_mode_class[i] = 1 << (int) T_MODE; 5390 else if (size == 32) 5391 sparc_mode_class[i] = 1 << (int) O_MODE; 5392 else 5393 sparc_mode_class[i] = 0; 5394 break; 5395 case MODE_VECTOR_INT: 5396 if (size == 4) 5397 sparc_mode_class[i] = 1 << (int) SF_MODE; 5398 else if (size == 8) 5399 sparc_mode_class[i] = 1 << (int) DF_MODE; 5400 else 5401 sparc_mode_class[i] = 0; 5402 break; 5403 case MODE_FLOAT: 5404 case MODE_COMPLEX_FLOAT: 5405 if (size == 4) 5406 sparc_mode_class[i] = 1 << (int) SF_MODE; 5407 else if (size == 8) 5408 sparc_mode_class[i] = 1 << (int) DF_MODE; 5409 else if (size == 16) 5410 sparc_mode_class[i] = 1 << (int) TF_MODE; 5411 else if (size == 32) 5412 sparc_mode_class[i] = 1 << (int) OF_MODE; 5413 else 5414 sparc_mode_class[i] = 0; 5415 break; 5416 case MODE_CC: 5417 if (m == CCFPmode || m == CCFPEmode) 5418 sparc_mode_class[i] = 1 << (int) CCFP_MODE; 5419 else 5420 sparc_mode_class[i] = 1 << (int) CC_MODE; 5421 break; 5422 default: 5423 sparc_mode_class[i] = 0; 5424 break; 5425 } 5426 } 5427 5428 if (TARGET_ARCH64) 5429 hard_regno_mode_classes = hard_64bit_mode_classes; 5430 else 5431 hard_regno_mode_classes = hard_32bit_mode_classes; 5432 5433 /* Initialize the array used by REGNO_REG_CLASS. */ 5434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 5435 { 5436 if (i < 16 && TARGET_V8PLUS) 5437 sparc_regno_reg_class[i] = I64_REGS; 5438 else if (i < 32 || i == FRAME_POINTER_REGNUM) 5439 sparc_regno_reg_class[i] = GENERAL_REGS; 5440 else if (i < 64) 5441 sparc_regno_reg_class[i] = FP_REGS; 5442 else if (i < 96) 5443 sparc_regno_reg_class[i] = EXTRA_FP_REGS; 5444 else if (i < 100) 5445 sparc_regno_reg_class[i] = FPCC_REGS; 5446 else 5447 sparc_regno_reg_class[i] = NO_REGS; 5448 } 5449 } 5450 5451 /* Return whether REGNO, a global or FP register, must be saved/restored. */ 5452 5453 static inline bool 5454 save_global_or_fp_reg_p (unsigned int regno, 5455 int leaf_function ATTRIBUTE_UNUSED) 5456 { 5457 return !call_used_regs[regno] && df_regs_ever_live_p (regno); 5458 } 5459 5460 /* Return whether the return address register (%i7) is needed. */ 5461 5462 static inline bool 5463 return_addr_reg_needed_p (int leaf_function) 5464 { 5465 /* If it is live, for example because of __builtin_return_address (0). */ 5466 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM)) 5467 return true; 5468 5469 /* Otherwise, it is needed as save register if %o7 is clobbered. */ 5470 if (!leaf_function 5471 /* Loading the GOT register clobbers %o7. */ 5472 || crtl->uses_pic_offset_table 5473 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM)) 5474 return true; 5475 5476 return false; 5477 } 5478 5479 /* Return whether REGNO, a local or in register, must be saved/restored. */ 5480 5481 static bool 5482 save_local_or_in_reg_p (unsigned int regno, int leaf_function) 5483 { 5484 /* General case: call-saved registers live at some point. */ 5485 if (!call_used_regs[regno] && df_regs_ever_live_p (regno)) 5486 return true; 5487 5488 /* Frame pointer register (%fp) if needed. */ 5489 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) 5490 return true; 5491 5492 /* Return address register (%i7) if needed. */ 5493 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function)) 5494 return true; 5495 5496 /* GOT register (%l7) if needed. */ 5497 if (got_register_rtx && regno == REGNO (got_register_rtx)) 5498 return true; 5499 5500 /* If the function accesses prior frames, the frame pointer and the return 5501 address of the previous frame must be saved on the stack. */ 5502 if (crtl->accesses_prior_frames 5503 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM)) 5504 return true; 5505 5506 return false; 5507 } 5508 5509 /* Compute the frame size required by the function. This function is called 5510 during the reload pass and also by sparc_expand_prologue. */ 5511 5512 HOST_WIDE_INT 5513 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function) 5514 { 5515 HOST_WIDE_INT frame_size, apparent_frame_size; 5516 int args_size, n_global_fp_regs = 0; 5517 bool save_local_in_regs_p = false; 5518 unsigned int i; 5519 5520 /* If the function allocates dynamic stack space, the dynamic offset is 5521 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */ 5522 if (leaf_function && !cfun->calls_alloca) 5523 args_size = 0; 5524 else 5525 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl); 5526 5527 /* Calculate space needed for global registers. */ 5528 if (TARGET_ARCH64) 5529 { 5530 for (i = 0; i < 8; i++) 5531 if (save_global_or_fp_reg_p (i, 0)) 5532 n_global_fp_regs += 2; 5533 } 5534 else 5535 { 5536 for (i = 0; i < 8; i += 2) 5537 if (save_global_or_fp_reg_p (i, 0) 5538 || save_global_or_fp_reg_p (i + 1, 0)) 5539 n_global_fp_regs += 2; 5540 } 5541 5542 /* In the flat window model, find out which local and in registers need to 5543 be saved. We don't reserve space in the current frame for them as they 5544 will be spilled into the register window save area of the caller's frame. 5545 However, as soon as we use this register window save area, we must create 5546 that of the current frame to make it the live one. */ 5547 if (TARGET_FLAT) 5548 for (i = 16; i < 32; i++) 5549 if (save_local_or_in_reg_p (i, leaf_function)) 5550 { 5551 save_local_in_regs_p = true; 5552 break; 5553 } 5554 5555 /* Calculate space needed for FP registers. */ 5556 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2) 5557 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0)) 5558 n_global_fp_regs += 2; 5559 5560 if (size == 0 5561 && n_global_fp_regs == 0 5562 && args_size == 0 5563 && !save_local_in_regs_p) 5564 frame_size = apparent_frame_size = 0; 5565 else 5566 { 5567 /* Start from the apparent frame size. */ 5568 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4; 5569 5570 /* We need to add the size of the outgoing argument area. */ 5571 frame_size = apparent_frame_size + ROUND_UP (args_size, 8); 5572 5573 /* And that of the register window save area. */ 5574 frame_size += FIRST_PARM_OFFSET (cfun->decl); 5575 5576 /* Finally, bump to the appropriate alignment. */ 5577 frame_size = SPARC_STACK_ALIGN (frame_size); 5578 } 5579 5580 /* Set up values for use in prologue and epilogue. */ 5581 sparc_frame_size = frame_size; 5582 sparc_apparent_frame_size = apparent_frame_size; 5583 sparc_n_global_fp_regs = n_global_fp_regs; 5584 sparc_save_local_in_regs_p = save_local_in_regs_p; 5585 5586 return frame_size; 5587 } 5588 5589 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */ 5590 5591 int 5592 sparc_initial_elimination_offset (int to) 5593 { 5594 int offset; 5595 5596 if (to == STACK_POINTER_REGNUM) 5597 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf); 5598 else 5599 offset = 0; 5600 5601 offset += SPARC_STACK_BIAS; 5602 return offset; 5603 } 5604 5605 /* Output any necessary .register pseudo-ops. */ 5606 5607 void 5608 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED) 5609 { 5610 #ifdef HAVE_AS_REGISTER_PSEUDO_OP 5611 int i; 5612 5613 if (TARGET_ARCH32) 5614 return; 5615 5616 /* Check if %g[2367] were used without 5617 .register being printed for them already. */ 5618 for (i = 2; i < 8; i++) 5619 { 5620 if (df_regs_ever_live_p (i) 5621 && ! sparc_hard_reg_printed [i]) 5622 { 5623 sparc_hard_reg_printed [i] = 1; 5624 /* %g7 is used as TLS base register, use #ignore 5625 for it instead of #scratch. */ 5626 fprintf (file, "\t.register\t%%g%d, #%s\n", i, 5627 i == 7 ? "ignore" : "scratch"); 5628 } 5629 if (i == 3) i = 5; 5630 } 5631 #endif 5632 } 5633 5634 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) 5635 5636 #if PROBE_INTERVAL > 4096 5637 #error Cannot use indexed addressing mode for stack probing 5638 #endif 5639 5640 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, 5641 inclusive. These are offsets from the current stack pointer. 5642 5643 Note that we don't use the REG+REG addressing mode for the probes because 5644 of the stack bias in 64-bit mode. And it doesn't really buy us anything 5645 so the advantages of having a single code win here. */ 5646 5647 static void 5648 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) 5649 { 5650 rtx g1 = gen_rtx_REG (Pmode, 1); 5651 5652 /* See if we have a constant small number of probes to generate. If so, 5653 that's the easy case. */ 5654 if (size <= PROBE_INTERVAL) 5655 { 5656 emit_move_insn (g1, GEN_INT (first)); 5657 emit_insn (gen_rtx_SET (g1, 5658 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5659 emit_stack_probe (plus_constant (Pmode, g1, -size)); 5660 } 5661 5662 /* The run-time loop is made up of 9 insns in the generic case while the 5663 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */ 5664 else if (size <= 4 * PROBE_INTERVAL) 5665 { 5666 HOST_WIDE_INT i; 5667 5668 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL)); 5669 emit_insn (gen_rtx_SET (g1, 5670 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5671 emit_stack_probe (g1); 5672 5673 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until 5674 it exceeds SIZE. If only two probes are needed, this will not 5675 generate any code. Then probe at FIRST + SIZE. */ 5676 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) 5677 { 5678 emit_insn (gen_rtx_SET (g1, 5679 plus_constant (Pmode, g1, -PROBE_INTERVAL))); 5680 emit_stack_probe (g1); 5681 } 5682 5683 emit_stack_probe (plus_constant (Pmode, g1, 5684 (i - PROBE_INTERVAL) - size)); 5685 } 5686 5687 /* Otherwise, do the same as above, but in a loop. Note that we must be 5688 extra careful with variables wrapping around because we might be at 5689 the very top (or the very bottom) of the address space and we have 5690 to be able to handle this case properly; in particular, we use an 5691 equality test for the loop condition. */ 5692 else 5693 { 5694 HOST_WIDE_INT rounded_size; 5695 rtx g4 = gen_rtx_REG (Pmode, 4); 5696 5697 emit_move_insn (g1, GEN_INT (first)); 5698 5699 5700 /* Step 1: round SIZE to the previous multiple of the interval. */ 5701 5702 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); 5703 emit_move_insn (g4, GEN_INT (rounded_size)); 5704 5705 5706 /* Step 2: compute initial and final value of the loop counter. */ 5707 5708 /* TEST_ADDR = SP + FIRST. */ 5709 emit_insn (gen_rtx_SET (g1, 5710 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1))); 5711 5712 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ 5713 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4))); 5714 5715 5716 /* Step 3: the loop 5717 5718 while (TEST_ADDR != LAST_ADDR) 5719 { 5720 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL 5721 probe at TEST_ADDR 5722 } 5723 5724 probes at FIRST + N * PROBE_INTERVAL for values of N from 1 5725 until it is equal to ROUNDED_SIZE. */ 5726 5727 if (TARGET_ARCH64) 5728 emit_insn (gen_probe_stack_rangedi (g1, g1, g4)); 5729 else 5730 emit_insn (gen_probe_stack_rangesi (g1, g1, g4)); 5731 5732 5733 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time 5734 that SIZE is equal to ROUNDED_SIZE. */ 5735 5736 if (size != rounded_size) 5737 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size)); 5738 } 5739 5740 /* Make sure nothing is scheduled before we are done. */ 5741 emit_insn (gen_blockage ()); 5742 } 5743 5744 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are 5745 absolute addresses. */ 5746 5747 const char * 5748 output_probe_stack_range (rtx reg1, rtx reg2) 5749 { 5750 static int labelno = 0; 5751 char loop_lab[32]; 5752 rtx xops[2]; 5753 5754 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); 5755 5756 /* Loop. */ 5757 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); 5758 5759 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ 5760 xops[0] = reg1; 5761 xops[1] = GEN_INT (-PROBE_INTERVAL); 5762 output_asm_insn ("add\t%0, %1, %0", xops); 5763 5764 /* Test if TEST_ADDR == LAST_ADDR. */ 5765 xops[1] = reg2; 5766 output_asm_insn ("cmp\t%0, %1", xops); 5767 5768 /* Probe at TEST_ADDR and branch. */ 5769 if (TARGET_ARCH64) 5770 fputs ("\tbne,pt\t%xcc,", asm_out_file); 5771 else 5772 fputs ("\tbne\t", asm_out_file); 5773 assemble_name_raw (asm_out_file, loop_lab); 5774 fputc ('\n', asm_out_file); 5775 xops[1] = GEN_INT (SPARC_STACK_BIAS); 5776 output_asm_insn (" st\t%%g0, [%0+%1]", xops); 5777 5778 return ""; 5779 } 5780 5781 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as 5782 needed. LOW is supposed to be double-word aligned for 32-bit registers. 5783 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE 5784 is the action to be performed if SAVE_P returns true and ACTION_FALSE 5785 the action to be performed if it returns false. Return the new offset. */ 5786 5787 typedef bool (*sorr_pred_t) (unsigned int, int); 5788 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t; 5789 5790 static int 5791 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base, 5792 int offset, int leaf_function, sorr_pred_t save_p, 5793 sorr_act_t action_true, sorr_act_t action_false) 5794 { 5795 unsigned int i; 5796 rtx mem; 5797 rtx_insn *insn; 5798 5799 if (TARGET_ARCH64 && high <= 32) 5800 { 5801 int fp_offset = -1; 5802 5803 for (i = low; i < high; i++) 5804 { 5805 if (save_p (i, leaf_function)) 5806 { 5807 mem = gen_frame_mem (DImode, plus_constant (Pmode, 5808 base, offset)); 5809 if (action_true == SORR_SAVE) 5810 { 5811 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i)); 5812 RTX_FRAME_RELATED_P (insn) = 1; 5813 } 5814 else /* action_true == SORR_RESTORE */ 5815 { 5816 /* The frame pointer must be restored last since its old 5817 value may be used as base address for the frame. This 5818 is problematic in 64-bit mode only because of the lack 5819 of double-word load instruction. */ 5820 if (i == HARD_FRAME_POINTER_REGNUM) 5821 fp_offset = offset; 5822 else 5823 emit_move_insn (gen_rtx_REG (DImode, i), mem); 5824 } 5825 offset += 8; 5826 } 5827 else if (action_false == SORR_ADVANCE) 5828 offset += 8; 5829 } 5830 5831 if (fp_offset >= 0) 5832 { 5833 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset)); 5834 emit_move_insn (hard_frame_pointer_rtx, mem); 5835 } 5836 } 5837 else 5838 { 5839 for (i = low; i < high; i += 2) 5840 { 5841 bool reg0 = save_p (i, leaf_function); 5842 bool reg1 = save_p (i + 1, leaf_function); 5843 machine_mode mode; 5844 int regno; 5845 5846 if (reg0 && reg1) 5847 { 5848 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode; 5849 regno = i; 5850 } 5851 else if (reg0) 5852 { 5853 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5854 regno = i; 5855 } 5856 else if (reg1) 5857 { 5858 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode; 5859 regno = i + 1; 5860 offset += 4; 5861 } 5862 else 5863 { 5864 if (action_false == SORR_ADVANCE) 5865 offset += 8; 5866 continue; 5867 } 5868 5869 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset)); 5870 if (action_true == SORR_SAVE) 5871 { 5872 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno)); 5873 RTX_FRAME_RELATED_P (insn) = 1; 5874 if (mode == DImode) 5875 { 5876 rtx set1, set2; 5877 mem = gen_frame_mem (SImode, plus_constant (Pmode, base, 5878 offset)); 5879 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno)); 5880 RTX_FRAME_RELATED_P (set1) = 1; 5881 mem 5882 = gen_frame_mem (SImode, plus_constant (Pmode, base, 5883 offset + 4)); 5884 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1)); 5885 RTX_FRAME_RELATED_P (set2) = 1; 5886 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 5887 gen_rtx_PARALLEL (VOIDmode, 5888 gen_rtvec (2, set1, set2))); 5889 } 5890 } 5891 else /* action_true == SORR_RESTORE */ 5892 emit_move_insn (gen_rtx_REG (mode, regno), mem); 5893 5894 /* Bump and round down to double word 5895 in case we already bumped by 4. */ 5896 offset = ROUND_DOWN (offset + 8, 8); 5897 } 5898 } 5899 5900 return offset; 5901 } 5902 5903 /* Emit code to adjust BASE to OFFSET. Return the new base. */ 5904 5905 static rtx 5906 emit_adjust_base_to_offset (rtx base, int offset) 5907 { 5908 /* ??? This might be optimized a little as %g1 might already have a 5909 value close enough that a single add insn will do. */ 5910 /* ??? Although, all of this is probably only a temporary fix because 5911 if %g1 can hold a function result, then sparc_expand_epilogue will 5912 lose (the result will be clobbered). */ 5913 rtx new_base = gen_rtx_REG (Pmode, 1); 5914 emit_move_insn (new_base, GEN_INT (offset)); 5915 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base))); 5916 return new_base; 5917 } 5918 5919 /* Emit code to save/restore call-saved global and FP registers. */ 5920 5921 static void 5922 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action) 5923 { 5924 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095) 5925 { 5926 base = emit_adjust_base_to_offset (base, offset); 5927 offset = 0; 5928 } 5929 5930 offset 5931 = emit_save_or_restore_regs (0, 8, base, offset, 0, 5932 save_global_or_fp_reg_p, action, SORR_NONE); 5933 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0, 5934 save_global_or_fp_reg_p, action, SORR_NONE); 5935 } 5936 5937 /* Emit code to save/restore call-saved local and in registers. */ 5938 5939 static void 5940 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action) 5941 { 5942 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095) 5943 { 5944 base = emit_adjust_base_to_offset (base, offset); 5945 offset = 0; 5946 } 5947 5948 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p, 5949 save_local_or_in_reg_p, action, SORR_ADVANCE); 5950 } 5951 5952 /* Emit a window_save insn. */ 5953 5954 static rtx_insn * 5955 emit_window_save (rtx increment) 5956 { 5957 rtx_insn *insn = emit_insn (gen_window_save (increment)); 5958 RTX_FRAME_RELATED_P (insn) = 1; 5959 5960 /* The incoming return address (%o7) is saved in %i7. */ 5961 add_reg_note (insn, REG_CFA_REGISTER, 5962 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM), 5963 gen_rtx_REG (Pmode, 5964 INCOMING_RETURN_ADDR_REGNUM))); 5965 5966 /* The window save event. */ 5967 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx); 5968 5969 /* The CFA is %fp, the hard frame pointer. */ 5970 add_reg_note (insn, REG_CFA_DEF_CFA, 5971 plus_constant (Pmode, hard_frame_pointer_rtx, 5972 INCOMING_FRAME_SP_OFFSET)); 5973 5974 return insn; 5975 } 5976 5977 /* Generate an increment for the stack pointer. */ 5978 5979 static rtx 5980 gen_stack_pointer_inc (rtx increment) 5981 { 5982 return gen_rtx_SET (stack_pointer_rtx, 5983 gen_rtx_PLUS (Pmode, 5984 stack_pointer_rtx, 5985 increment)); 5986 } 5987 5988 /* Expand the function prologue. The prologue is responsible for reserving 5989 storage for the frame, saving the call-saved registers and loading the 5990 GOT register if needed. */ 5991 5992 void 5993 sparc_expand_prologue (void) 5994 { 5995 HOST_WIDE_INT size; 5996 rtx_insn *insn; 5997 5998 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying 5999 on the final value of the flag means deferring the prologue/epilogue 6000 expansion until just before the second scheduling pass, which is too 6001 late to emit multiple epilogues or return insns. 6002 6003 Of course we are making the assumption that the value of the flag 6004 will not change between now and its final value. Of the three parts 6005 of the formula, only the last one can reasonably vary. Let's take a 6006 closer look, after assuming that the first two ones are set to true 6007 (otherwise the last value is effectively silenced). 6008 6009 If only_leaf_regs_used returns false, the global predicate will also 6010 be false so the actual frame size calculated below will be positive. 6011 As a consequence, the save_register_window insn will be emitted in 6012 the instruction stream; now this insn explicitly references %fp 6013 which is not a leaf register so only_leaf_regs_used will always 6014 return false subsequently. 6015 6016 If only_leaf_regs_used returns true, we hope that the subsequent 6017 optimization passes won't cause non-leaf registers to pop up. For 6018 example, the regrename pass has special provisions to not rename to 6019 non-leaf registers in a leaf function. */ 6020 sparc_leaf_function_p 6021 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used (); 6022 6023 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6024 6025 if (flag_stack_usage_info) 6026 current_function_static_stack_size = size; 6027 6028 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6029 || flag_stack_clash_protection) 6030 { 6031 if (crtl->is_leaf && !cfun->calls_alloca) 6032 { 6033 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6034 sparc_emit_probe_stack_range (get_stack_check_protect (), 6035 size - get_stack_check_protect ()); 6036 } 6037 else if (size > 0) 6038 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6039 } 6040 6041 if (size == 0) 6042 ; /* do nothing. */ 6043 else if (sparc_leaf_function_p) 6044 { 6045 rtx size_int_rtx = GEN_INT (-size); 6046 6047 if (size <= 4096) 6048 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6049 else if (size <= 8192) 6050 { 6051 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6052 RTX_FRAME_RELATED_P (insn) = 1; 6053 6054 /* %sp is still the CFA register. */ 6055 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6056 } 6057 else 6058 { 6059 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6060 emit_move_insn (size_rtx, size_int_rtx); 6061 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6062 add_reg_note (insn, REG_FRAME_RELATED_EXPR, 6063 gen_stack_pointer_inc (size_int_rtx)); 6064 } 6065 6066 RTX_FRAME_RELATED_P (insn) = 1; 6067 } 6068 else 6069 { 6070 rtx size_int_rtx = GEN_INT (-size); 6071 6072 if (size <= 4096) 6073 emit_window_save (size_int_rtx); 6074 else if (size <= 8192) 6075 { 6076 emit_window_save (GEN_INT (-4096)); 6077 6078 /* %sp is not the CFA register anymore. */ 6079 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6080 6081 /* Make sure no %fp-based store is issued until after the frame is 6082 established. The offset between the frame pointer and the stack 6083 pointer is calculated relative to the value of the stack pointer 6084 at the end of the function prologue, and moving instructions that 6085 access the stack via the frame pointer between the instructions 6086 that decrement the stack pointer could result in accessing the 6087 register window save area, which is volatile. */ 6088 emit_insn (gen_frame_blockage ()); 6089 } 6090 else 6091 { 6092 rtx size_rtx = gen_rtx_REG (Pmode, 1); 6093 emit_move_insn (size_rtx, size_int_rtx); 6094 emit_window_save (size_rtx); 6095 } 6096 } 6097 6098 if (sparc_leaf_function_p) 6099 { 6100 sparc_frame_base_reg = stack_pointer_rtx; 6101 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6102 } 6103 else 6104 { 6105 sparc_frame_base_reg = hard_frame_pointer_rtx; 6106 sparc_frame_base_offset = SPARC_STACK_BIAS; 6107 } 6108 6109 if (sparc_n_global_fp_regs > 0) 6110 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6111 sparc_frame_base_offset 6112 - sparc_apparent_frame_size, 6113 SORR_SAVE); 6114 6115 /* Advertise that the data calculated just above are now valid. */ 6116 sparc_prologue_data_valid_p = true; 6117 } 6118 6119 /* Expand the function prologue. The prologue is responsible for reserving 6120 storage for the frame, saving the call-saved registers and loading the 6121 GOT register if needed. */ 6122 6123 void 6124 sparc_flat_expand_prologue (void) 6125 { 6126 HOST_WIDE_INT size; 6127 rtx_insn *insn; 6128 6129 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf; 6130 6131 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p); 6132 6133 if (flag_stack_usage_info) 6134 current_function_static_stack_size = size; 6135 6136 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK 6137 || flag_stack_clash_protection) 6138 { 6139 if (crtl->is_leaf && !cfun->calls_alloca) 6140 { 6141 if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) 6142 sparc_emit_probe_stack_range (get_stack_check_protect (), 6143 size - get_stack_check_protect ()); 6144 } 6145 else if (size > 0) 6146 sparc_emit_probe_stack_range (get_stack_check_protect (), size); 6147 } 6148 6149 if (sparc_save_local_in_regs_p) 6150 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS, 6151 SORR_SAVE); 6152 6153 if (size == 0) 6154 ; /* do nothing. */ 6155 else 6156 { 6157 rtx size_int_rtx, size_rtx; 6158 6159 size_rtx = size_int_rtx = GEN_INT (-size); 6160 6161 /* We establish the frame (i.e. decrement the stack pointer) first, even 6162 if we use a frame pointer, because we cannot clobber any call-saved 6163 registers, including the frame pointer, if we haven't created a new 6164 register save area, for the sake of compatibility with the ABI. */ 6165 if (size <= 4096) 6166 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx)); 6167 else if (size <= 8192 && !frame_pointer_needed) 6168 { 6169 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096))); 6170 RTX_FRAME_RELATED_P (insn) = 1; 6171 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size))); 6172 } 6173 else 6174 { 6175 size_rtx = gen_rtx_REG (Pmode, 1); 6176 emit_move_insn (size_rtx, size_int_rtx); 6177 insn = emit_insn (gen_stack_pointer_inc (size_rtx)); 6178 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6179 gen_stack_pointer_inc (size_int_rtx)); 6180 } 6181 RTX_FRAME_RELATED_P (insn) = 1; 6182 6183 /* Ensure nothing is scheduled until after the frame is established. */ 6184 emit_insn (gen_blockage ()); 6185 6186 if (frame_pointer_needed) 6187 { 6188 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx, 6189 gen_rtx_MINUS (Pmode, 6190 stack_pointer_rtx, 6191 size_rtx))); 6192 RTX_FRAME_RELATED_P (insn) = 1; 6193 6194 add_reg_note (insn, REG_CFA_ADJUST_CFA, 6195 gen_rtx_SET (hard_frame_pointer_rtx, 6196 plus_constant (Pmode, stack_pointer_rtx, 6197 size))); 6198 } 6199 6200 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6201 { 6202 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM); 6203 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 6204 6205 insn = emit_move_insn (i7, o7); 6206 RTX_FRAME_RELATED_P (insn) = 1; 6207 6208 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7)); 6209 6210 /* Prevent this instruction from ever being considered dead, 6211 even if this function has no epilogue. */ 6212 emit_use (i7); 6213 } 6214 } 6215 6216 if (frame_pointer_needed) 6217 { 6218 sparc_frame_base_reg = hard_frame_pointer_rtx; 6219 sparc_frame_base_offset = SPARC_STACK_BIAS; 6220 } 6221 else 6222 { 6223 sparc_frame_base_reg = stack_pointer_rtx; 6224 sparc_frame_base_offset = size + SPARC_STACK_BIAS; 6225 } 6226 6227 if (sparc_n_global_fp_regs > 0) 6228 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6229 sparc_frame_base_offset 6230 - sparc_apparent_frame_size, 6231 SORR_SAVE); 6232 6233 /* Advertise that the data calculated just above are now valid. */ 6234 sparc_prologue_data_valid_p = true; 6235 } 6236 6237 /* This function generates the assembly code for function entry, which boils 6238 down to emitting the necessary .register directives. */ 6239 6240 static void 6241 sparc_asm_function_prologue (FILE *file) 6242 { 6243 /* Check that the assumption we made in sparc_expand_prologue is valid. */ 6244 if (!TARGET_FLAT) 6245 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs); 6246 6247 sparc_output_scratch_registers (file); 6248 } 6249 6250 /* Expand the function epilogue, either normal or part of a sibcall. 6251 We emit all the instructions except the return or the call. */ 6252 6253 void 6254 sparc_expand_epilogue (bool for_eh) 6255 { 6256 HOST_WIDE_INT size = sparc_frame_size; 6257 6258 if (cfun->calls_alloca) 6259 emit_insn (gen_frame_blockage ()); 6260 6261 if (sparc_n_global_fp_regs > 0) 6262 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6263 sparc_frame_base_offset 6264 - sparc_apparent_frame_size, 6265 SORR_RESTORE); 6266 6267 if (size == 0 || for_eh) 6268 ; /* do nothing. */ 6269 else if (sparc_leaf_function_p) 6270 { 6271 if (size <= 4096) 6272 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6273 else if (size <= 8192) 6274 { 6275 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6276 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6277 } 6278 else 6279 { 6280 rtx reg = gen_rtx_REG (Pmode, 1); 6281 emit_move_insn (reg, GEN_INT (size)); 6282 emit_insn (gen_stack_pointer_inc (reg)); 6283 } 6284 } 6285 } 6286 6287 /* Expand the function epilogue, either normal or part of a sibcall. 6288 We emit all the instructions except the return or the call. */ 6289 6290 void 6291 sparc_flat_expand_epilogue (bool for_eh) 6292 { 6293 HOST_WIDE_INT size = sparc_frame_size; 6294 6295 if (sparc_n_global_fp_regs > 0) 6296 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg, 6297 sparc_frame_base_offset 6298 - sparc_apparent_frame_size, 6299 SORR_RESTORE); 6300 6301 /* If we have a frame pointer, we'll need both to restore it before the 6302 frame is destroyed and use its current value in destroying the frame. 6303 Since we don't have an atomic way to do that in the flat window model, 6304 we save the current value into a temporary register (%g1). */ 6305 if (frame_pointer_needed && !for_eh) 6306 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx); 6307 6308 if (return_addr_reg_needed_p (sparc_leaf_function_p)) 6309 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM), 6310 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)); 6311 6312 if (sparc_save_local_in_regs_p) 6313 emit_save_or_restore_local_in_regs (sparc_frame_base_reg, 6314 sparc_frame_base_offset, 6315 SORR_RESTORE); 6316 6317 if (size == 0 || for_eh) 6318 ; /* do nothing. */ 6319 else if (frame_pointer_needed) 6320 { 6321 /* Make sure the frame is destroyed after everything else is done. */ 6322 emit_insn (gen_blockage ()); 6323 6324 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1)); 6325 } 6326 else 6327 { 6328 /* Likewise. */ 6329 emit_insn (gen_blockage ()); 6330 6331 if (size <= 4096) 6332 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 6333 else if (size <= 8192) 6334 { 6335 emit_insn (gen_stack_pointer_inc (GEN_INT (4096))); 6336 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096))); 6337 } 6338 else 6339 { 6340 rtx reg = gen_rtx_REG (Pmode, 1); 6341 emit_move_insn (reg, GEN_INT (size)); 6342 emit_insn (gen_stack_pointer_inc (reg)); 6343 } 6344 } 6345 } 6346 6347 /* Return true if it is appropriate to emit `return' instructions in the 6348 body of a function. */ 6349 6350 bool 6351 sparc_can_use_return_insn_p (void) 6352 { 6353 return sparc_prologue_data_valid_p 6354 && sparc_n_global_fp_regs == 0 6355 && TARGET_FLAT 6356 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p) 6357 : (sparc_frame_size == 0 || !sparc_leaf_function_p); 6358 } 6359 6360 /* This function generates the assembly code for function exit. */ 6361 6362 static void 6363 sparc_asm_function_epilogue (FILE *file) 6364 { 6365 /* If the last two instructions of a function are "call foo; dslot;" 6366 the return address might point to the first instruction in the next 6367 function and we have to output a dummy nop for the sake of sane 6368 backtraces in such cases. This is pointless for sibling calls since 6369 the return address is explicitly adjusted. */ 6370 6371 rtx_insn *insn = get_last_insn (); 6372 6373 rtx last_real_insn = prev_real_insn (insn); 6374 if (last_real_insn 6375 && NONJUMP_INSN_P (last_real_insn) 6376 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE) 6377 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0); 6378 6379 if (last_real_insn 6380 && CALL_P (last_real_insn) 6381 && !SIBLING_CALL_P (last_real_insn)) 6382 fputs("\tnop\n", file); 6383 6384 sparc_output_deferred_case_vectors (); 6385 } 6386 6387 /* Output a 'restore' instruction. */ 6388 6389 static void 6390 output_restore (rtx pat) 6391 { 6392 rtx operands[3]; 6393 6394 if (! pat) 6395 { 6396 fputs ("\t restore\n", asm_out_file); 6397 return; 6398 } 6399 6400 gcc_assert (GET_CODE (pat) == SET); 6401 6402 operands[0] = SET_DEST (pat); 6403 pat = SET_SRC (pat); 6404 6405 switch (GET_CODE (pat)) 6406 { 6407 case PLUS: 6408 operands[1] = XEXP (pat, 0); 6409 operands[2] = XEXP (pat, 1); 6410 output_asm_insn (" restore %r1, %2, %Y0", operands); 6411 break; 6412 case LO_SUM: 6413 operands[1] = XEXP (pat, 0); 6414 operands[2] = XEXP (pat, 1); 6415 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands); 6416 break; 6417 case ASHIFT: 6418 operands[1] = XEXP (pat, 0); 6419 gcc_assert (XEXP (pat, 1) == const1_rtx); 6420 output_asm_insn (" restore %r1, %r1, %Y0", operands); 6421 break; 6422 default: 6423 operands[1] = pat; 6424 output_asm_insn (" restore %%g0, %1, %Y0", operands); 6425 break; 6426 } 6427 } 6428 6429 /* Output a return. */ 6430 6431 const char * 6432 output_return (rtx_insn *insn) 6433 { 6434 if (crtl->calls_eh_return) 6435 { 6436 /* If the function uses __builtin_eh_return, the eh_return 6437 machinery occupies the delay slot. */ 6438 gcc_assert (!final_sequence); 6439 6440 if (flag_delayed_branch) 6441 { 6442 if (!TARGET_FLAT && TARGET_V9) 6443 fputs ("\treturn\t%i7+8\n", asm_out_file); 6444 else 6445 { 6446 if (!TARGET_FLAT) 6447 fputs ("\trestore\n", asm_out_file); 6448 6449 fputs ("\tjmp\t%o7+8\n", asm_out_file); 6450 } 6451 6452 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file); 6453 } 6454 else 6455 { 6456 if (!TARGET_FLAT) 6457 fputs ("\trestore\n", asm_out_file); 6458 6459 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file); 6460 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file); 6461 } 6462 } 6463 else if (sparc_leaf_function_p || TARGET_FLAT) 6464 { 6465 /* This is a leaf or flat function so we don't have to bother restoring 6466 the register window, which frees us from dealing with the convoluted 6467 semantics of restore/return. We simply output the jump to the 6468 return address and the insn in the delay slot (if any). */ 6469 6470 return "jmp\t%%o7+%)%#"; 6471 } 6472 else 6473 { 6474 /* This is a regular function so we have to restore the register window. 6475 We may have a pending insn for the delay slot, which will be either 6476 combined with the 'restore' instruction or put in the delay slot of 6477 the 'return' instruction. */ 6478 6479 if (final_sequence) 6480 { 6481 rtx_insn *delay; 6482 rtx pat; 6483 6484 delay = NEXT_INSN (insn); 6485 gcc_assert (delay); 6486 6487 pat = PATTERN (delay); 6488 6489 if (TARGET_V9 && ! epilogue_renumber (&pat, 1)) 6490 { 6491 epilogue_renumber (&pat, 0); 6492 return "return\t%%i7+%)%#"; 6493 } 6494 else 6495 { 6496 output_asm_insn ("jmp\t%%i7+%)", NULL); 6497 6498 /* We're going to output the insn in the delay slot manually. 6499 Make sure to output its source location first. */ 6500 PATTERN (delay) = gen_blockage (); 6501 INSN_CODE (delay) = -1; 6502 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6503 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6504 6505 output_restore (pat); 6506 } 6507 } 6508 else 6509 { 6510 /* The delay slot is empty. */ 6511 if (TARGET_V9) 6512 return "return\t%%i7+%)\n\t nop"; 6513 else if (flag_delayed_branch) 6514 return "jmp\t%%i7+%)\n\t restore"; 6515 else 6516 return "restore\n\tjmp\t%%o7+%)\n\t nop"; 6517 } 6518 } 6519 6520 return ""; 6521 } 6522 6523 /* Output a sibling call. */ 6524 6525 const char * 6526 output_sibcall (rtx_insn *insn, rtx call_operand) 6527 { 6528 rtx operands[1]; 6529 6530 gcc_assert (flag_delayed_branch); 6531 6532 operands[0] = call_operand; 6533 6534 if (sparc_leaf_function_p || TARGET_FLAT) 6535 { 6536 /* This is a leaf or flat function so we don't have to bother restoring 6537 the register window. We simply output the jump to the function and 6538 the insn in the delay slot (if any). */ 6539 6540 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence)); 6541 6542 if (final_sequence) 6543 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#", 6544 operands); 6545 else 6546 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize 6547 it into branch if possible. */ 6548 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7", 6549 operands); 6550 } 6551 else 6552 { 6553 /* This is a regular function so we have to restore the register window. 6554 We may have a pending insn for the delay slot, which will be combined 6555 with the 'restore' instruction. */ 6556 6557 output_asm_insn ("call\t%a0, 0", operands); 6558 6559 if (final_sequence) 6560 { 6561 rtx_insn *delay; 6562 rtx pat; 6563 6564 delay = NEXT_INSN (insn); 6565 gcc_assert (delay); 6566 6567 pat = PATTERN (delay); 6568 6569 /* We're going to output the insn in the delay slot manually. 6570 Make sure to output its source location first. */ 6571 PATTERN (delay) = gen_blockage (); 6572 INSN_CODE (delay) = -1; 6573 final_scan_insn (delay, asm_out_file, optimize, 0, NULL); 6574 INSN_LOCATION (delay) = UNKNOWN_LOCATION; 6575 6576 output_restore (pat); 6577 } 6578 else 6579 output_restore (NULL_RTX); 6580 } 6581 6582 return ""; 6583 } 6584 6585 /* Functions for handling argument passing. 6586 6587 For 32-bit, the first 6 args are normally in registers and the rest are 6588 pushed. Any arg that starts within the first 6 words is at least 6589 partially passed in a register unless its data type forbids. 6590 6591 For 64-bit, the argument registers are laid out as an array of 16 elements 6592 and arguments are added sequentially. The first 6 int args and up to the 6593 first 16 fp args (depending on size) are passed in regs. 6594 6595 Slot Stack Integral Float Float in structure Double Long Double 6596 ---- ----- -------- ----- ------------------ ------ ----------- 6597 15 [SP+248] %f31 %f30,%f31 %d30 6598 14 [SP+240] %f29 %f28,%f29 %d28 %q28 6599 13 [SP+232] %f27 %f26,%f27 %d26 6600 12 [SP+224] %f25 %f24,%f25 %d24 %q24 6601 11 [SP+216] %f23 %f22,%f23 %d22 6602 10 [SP+208] %f21 %f20,%f21 %d20 %q20 6603 9 [SP+200] %f19 %f18,%f19 %d18 6604 8 [SP+192] %f17 %f16,%f17 %d16 %q16 6605 7 [SP+184] %f15 %f14,%f15 %d14 6606 6 [SP+176] %f13 %f12,%f13 %d12 %q12 6607 5 [SP+168] %o5 %f11 %f10,%f11 %d10 6608 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8 6609 3 [SP+152] %o3 %f7 %f6,%f7 %d6 6610 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4 6611 1 [SP+136] %o1 %f3 %f2,%f3 %d2 6612 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0 6613 6614 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise. 6615 6616 Integral arguments are always passed as 64-bit quantities appropriately 6617 extended. 6618 6619 Passing of floating point values is handled as follows. 6620 If a prototype is in scope: 6621 If the value is in a named argument (i.e. not a stdarg function or a 6622 value not part of the `...') then the value is passed in the appropriate 6623 fp reg. 6624 If the value is part of the `...' and is passed in one of the first 6 6625 slots then the value is passed in the appropriate int reg. 6626 If the value is part of the `...' and is not passed in one of the first 6 6627 slots then the value is passed in memory. 6628 If a prototype is not in scope: 6629 If the value is one of the first 6 arguments the value is passed in the 6630 appropriate integer reg and the appropriate fp reg. 6631 If the value is not one of the first 6 arguments the value is passed in 6632 the appropriate fp reg and in memory. 6633 6634 6635 Summary of the calling conventions implemented by GCC on the SPARC: 6636 6637 32-bit ABI: 6638 size argument return value 6639 6640 small integer <4 int. reg. int. reg. 6641 word 4 int. reg. int. reg. 6642 double word 8 int. reg. int. reg. 6643 6644 _Complex small integer <8 int. reg. int. reg. 6645 _Complex word 8 int. reg. int. reg. 6646 _Complex double word 16 memory int. reg. 6647 6648 vector integer <=8 int. reg. FP reg. 6649 vector integer >8 memory memory 6650 6651 float 4 int. reg. FP reg. 6652 double 8 int. reg. FP reg. 6653 long double 16 memory memory 6654 6655 _Complex float 8 memory FP reg. 6656 _Complex double 16 memory FP reg. 6657 _Complex long double 32 memory FP reg. 6658 6659 vector float any memory memory 6660 6661 aggregate any memory memory 6662 6663 6664 6665 64-bit ABI: 6666 size argument return value 6667 6668 small integer <8 int. reg. int. reg. 6669 word 8 int. reg. int. reg. 6670 double word 16 int. reg. int. reg. 6671 6672 _Complex small integer <16 int. reg. int. reg. 6673 _Complex word 16 int. reg. int. reg. 6674 _Complex double word 32 memory int. reg. 6675 6676 vector integer <=16 FP reg. FP reg. 6677 vector integer 16<s<=32 memory FP reg. 6678 vector integer >32 memory memory 6679 6680 float 4 FP reg. FP reg. 6681 double 8 FP reg. FP reg. 6682 long double 16 FP reg. FP reg. 6683 6684 _Complex float 8 FP reg. FP reg. 6685 _Complex double 16 FP reg. FP reg. 6686 _Complex long double 32 memory FP reg. 6687 6688 vector float <=16 FP reg. FP reg. 6689 vector float 16<s<=32 memory FP reg. 6690 vector float >32 memory memory 6691 6692 aggregate <=16 reg. reg. 6693 aggregate 16<s<=32 memory reg. 6694 aggregate >32 memory memory 6695 6696 6697 6698 Note #1: complex floating-point types follow the extended SPARC ABIs as 6699 implemented by the Sun compiler. 6700 6701 Note #2: integral vector types follow the scalar floating-point types 6702 conventions to match what is implemented by the Sun VIS SDK. 6703 6704 Note #3: floating-point vector types follow the aggregate types 6705 conventions. */ 6706 6707 6708 /* Maximum number of int regs for args. */ 6709 #define SPARC_INT_ARG_MAX 6 6710 /* Maximum number of fp regs for args. */ 6711 #define SPARC_FP_ARG_MAX 16 6712 /* Number of words (partially) occupied for a given size in units. */ 6713 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD) 6714 6715 /* Handle the INIT_CUMULATIVE_ARGS macro. 6716 Initialize a variable CUM of type CUMULATIVE_ARGS 6717 for a call to a function whose data type is FNTYPE. 6718 For a library call, FNTYPE is 0. */ 6719 6720 void 6721 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree) 6722 { 6723 cum->words = 0; 6724 cum->prototype_p = fntype && prototype_p (fntype); 6725 cum->libcall_p = !fntype; 6726 } 6727 6728 /* Handle promotion of pointer and integer arguments. */ 6729 6730 static machine_mode 6731 sparc_promote_function_mode (const_tree type, machine_mode mode, 6732 int *punsignedp, const_tree, int) 6733 { 6734 if (type && POINTER_TYPE_P (type)) 6735 { 6736 *punsignedp = POINTERS_EXTEND_UNSIGNED; 6737 return Pmode; 6738 } 6739 6740 /* Integral arguments are passed as full words, as per the ABI. */ 6741 if (GET_MODE_CLASS (mode) == MODE_INT 6742 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 6743 return word_mode; 6744 6745 return mode; 6746 } 6747 6748 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */ 6749 6750 static bool 6751 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) 6752 { 6753 return TARGET_ARCH64 ? true : false; 6754 } 6755 6756 /* Traverse the record TYPE recursively and call FUNC on its fields. 6757 NAMED is true if this is for a named parameter. DATA is passed 6758 to FUNC for each field. OFFSET is the starting position and 6759 PACKED is true if we are inside a packed record. */ 6760 6761 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)> 6762 static void 6763 traverse_record_type (const_tree type, bool named, T *data, 6764 HOST_WIDE_INT offset = 0, bool packed = false) 6765 { 6766 /* The ABI obviously doesn't specify how packed structures are passed. 6767 These are passed in integer regs if possible, otherwise memory. */ 6768 if (!packed) 6769 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6770 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field)) 6771 { 6772 packed = true; 6773 break; 6774 } 6775 6776 /* Walk the real fields, but skip those with no size or a zero size. 6777 ??? Fields with variable offset are handled as having zero offset. */ 6778 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) 6779 if (TREE_CODE (field) == FIELD_DECL) 6780 { 6781 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field))) 6782 continue; 6783 6784 HOST_WIDE_INT bitpos = offset; 6785 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST) 6786 bitpos += int_bit_position (field); 6787 6788 tree field_type = TREE_TYPE (field); 6789 if (TREE_CODE (field_type) == RECORD_TYPE) 6790 traverse_record_type<T, Func> (field_type, named, data, bitpos, 6791 packed); 6792 else 6793 { 6794 const bool fp_type 6795 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type); 6796 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU, 6797 data); 6798 } 6799 } 6800 } 6801 6802 /* Handle recursive register classifying for structure layout. */ 6803 6804 typedef struct 6805 { 6806 bool fp_regs; /* true if field eligible to FP registers. */ 6807 bool fp_regs_in_first_word; /* true if such field in first word. */ 6808 } classify_data_t; 6809 6810 /* A subroutine of function_arg_slotno. Classify the field. */ 6811 6812 inline void 6813 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp, 6814 classify_data_t *data) 6815 { 6816 if (fp) 6817 { 6818 data->fp_regs = true; 6819 if (bitpos < BITS_PER_WORD) 6820 data->fp_regs_in_first_word = true; 6821 } 6822 } 6823 6824 /* Compute the slot number to pass an argument in. 6825 Return the slot number or -1 if passing on the stack. 6826 6827 CUM is a variable of type CUMULATIVE_ARGS which gives info about 6828 the preceding args and about the function being called. 6829 MODE is the argument's machine mode. 6830 TYPE is the data type of the argument (as a tree). 6831 This is null for libcalls where that information may 6832 not be available. 6833 NAMED is nonzero if this argument is a named parameter 6834 (otherwise it is an extra parameter matching an ellipsis). 6835 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG. 6836 *PREGNO records the register number to use if scalar type. 6837 *PPADDING records the amount of padding needed in words. */ 6838 6839 static int 6840 function_arg_slotno (const struct sparc_args *cum, machine_mode mode, 6841 const_tree type, bool named, bool incoming, 6842 int *pregno, int *ppadding) 6843 { 6844 int regbase = (incoming 6845 ? SPARC_INCOMING_INT_ARG_FIRST 6846 : SPARC_OUTGOING_INT_ARG_FIRST); 6847 int slotno = cum->words; 6848 enum mode_class mclass; 6849 int regno; 6850 6851 *ppadding = 0; 6852 6853 if (type && TREE_ADDRESSABLE (type)) 6854 return -1; 6855 6856 if (TARGET_ARCH32 6857 && mode == BLKmode 6858 && type 6859 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0) 6860 return -1; 6861 6862 /* For SPARC64, objects requiring 16-byte alignment get it. */ 6863 if (TARGET_ARCH64 6864 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128 6865 && (slotno & 1) != 0) 6866 slotno++, *ppadding = 1; 6867 6868 mclass = GET_MODE_CLASS (mode); 6869 if (type && TREE_CODE (type) == VECTOR_TYPE) 6870 { 6871 /* Vector types deserve special treatment because they are 6872 polymorphic wrt their mode, depending upon whether VIS 6873 instructions are enabled. */ 6874 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 6875 { 6876 /* The SPARC port defines no floating-point vector modes. */ 6877 gcc_assert (mode == BLKmode); 6878 } 6879 else 6880 { 6881 /* Integral vector types should either have a vector 6882 mode or an integral mode, because we are guaranteed 6883 by pass_by_reference that their size is not greater 6884 than 16 bytes and TImode is 16-byte wide. */ 6885 gcc_assert (mode != BLKmode); 6886 6887 /* Vector integers are handled like floats according to 6888 the Sun VIS SDK. */ 6889 mclass = MODE_FLOAT; 6890 } 6891 } 6892 6893 switch (mclass) 6894 { 6895 case MODE_FLOAT: 6896 case MODE_COMPLEX_FLOAT: 6897 case MODE_VECTOR_INT: 6898 if (TARGET_ARCH64 && TARGET_FPU && named) 6899 { 6900 /* If all arg slots are filled, then must pass on stack. */ 6901 if (slotno >= SPARC_FP_ARG_MAX) 6902 return -1; 6903 6904 regno = SPARC_FP_ARG_FIRST + slotno * 2; 6905 /* Arguments filling only one single FP register are 6906 right-justified in the outer double FP register. */ 6907 if (GET_MODE_SIZE (mode) <= 4) 6908 regno++; 6909 break; 6910 } 6911 /* fallthrough */ 6912 6913 case MODE_INT: 6914 case MODE_COMPLEX_INT: 6915 /* If all arg slots are filled, then must pass on stack. */ 6916 if (slotno >= SPARC_INT_ARG_MAX) 6917 return -1; 6918 6919 regno = regbase + slotno; 6920 break; 6921 6922 case MODE_RANDOM: 6923 if (mode == VOIDmode) 6924 /* MODE is VOIDmode when generating the actual call. */ 6925 return -1; 6926 6927 gcc_assert (mode == BLKmode); 6928 6929 if (TARGET_ARCH32 6930 || !type 6931 || (TREE_CODE (type) != RECORD_TYPE 6932 && TREE_CODE (type) != VECTOR_TYPE)) 6933 { 6934 /* If all arg slots are filled, then must pass on stack. */ 6935 if (slotno >= SPARC_INT_ARG_MAX) 6936 return -1; 6937 6938 regno = regbase + slotno; 6939 } 6940 else /* TARGET_ARCH64 && type */ 6941 { 6942 /* If all arg slots are filled, then must pass on stack. */ 6943 if (slotno >= SPARC_FP_ARG_MAX) 6944 return -1; 6945 6946 if (TREE_CODE (type) == RECORD_TYPE) 6947 { 6948 classify_data_t data = { false, false }; 6949 traverse_record_type<classify_data_t, classify_registers> 6950 (type, named, &data); 6951 6952 if (data.fp_regs) 6953 { 6954 /* If all FP slots are filled except for the last one and 6955 there is no FP field in the first word, then must pass 6956 on stack. */ 6957 if (slotno >= SPARC_FP_ARG_MAX - 1 6958 && !data.fp_regs_in_first_word) 6959 return -1; 6960 } 6961 else 6962 { 6963 /* If all int slots are filled, then must pass on stack. */ 6964 if (slotno >= SPARC_INT_ARG_MAX) 6965 return -1; 6966 } 6967 } 6968 6969 /* PREGNO isn't set since both int and FP regs can be used. */ 6970 return slotno; 6971 } 6972 break; 6973 6974 default : 6975 gcc_unreachable (); 6976 } 6977 6978 *pregno = regno; 6979 return slotno; 6980 } 6981 6982 /* Handle recursive register counting/assigning for structure layout. */ 6983 6984 typedef struct 6985 { 6986 int slotno; /* slot number of the argument. */ 6987 int regbase; /* regno of the base register. */ 6988 int intoffset; /* offset of the first pending integer field. */ 6989 int nregs; /* number of words passed in registers. */ 6990 bool stack; /* true if part of the argument is on the stack. */ 6991 rtx ret; /* return expression being built. */ 6992 } assign_data_t; 6993 6994 /* A subroutine of function_arg_record_value. Compute the number of integer 6995 registers to be assigned between PARMS->intoffset and BITPOS. Return 6996 true if at least one integer register is assigned or false otherwise. */ 6997 6998 static bool 6999 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs) 7000 { 7001 if (data->intoffset < 0) 7002 return false; 7003 7004 const int intoffset = data->intoffset; 7005 data->intoffset = -1; 7006 7007 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7008 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); 7009 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD); 7010 int nregs = (endbit - startbit) / BITS_PER_WORD; 7011 7012 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno) 7013 { 7014 nregs = SPARC_INT_ARG_MAX - this_slotno; 7015 7016 /* We need to pass this field (partly) on the stack. */ 7017 data->stack = 1; 7018 } 7019 7020 if (nregs <= 0) 7021 return false; 7022 7023 *pnregs = nregs; 7024 return true; 7025 } 7026 7027 /* A subroutine of function_arg_record_value. Compute the number and the mode 7028 of the FP registers to be assigned for FIELD. Return true if at least one 7029 FP register is assigned or false otherwise. */ 7030 7031 static bool 7032 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos, 7033 assign_data_t *data, 7034 int *pnregs, machine_mode *pmode) 7035 { 7036 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7037 machine_mode mode = DECL_MODE (field); 7038 int nregs, nslots; 7039 7040 /* Slots are counted as words while regs are counted as having the size of 7041 the (inner) mode. */ 7042 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode) 7043 { 7044 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7045 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field)); 7046 } 7047 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE) 7048 { 7049 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field))); 7050 nregs = 2; 7051 } 7052 else 7053 nregs = 1; 7054 7055 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode)); 7056 7057 if (nslots > SPARC_FP_ARG_MAX - this_slotno) 7058 { 7059 nslots = SPARC_FP_ARG_MAX - this_slotno; 7060 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode); 7061 7062 /* We need to pass this field (partly) on the stack. */ 7063 data->stack = 1; 7064 7065 if (nregs <= 0) 7066 return false; 7067 } 7068 7069 *pnregs = nregs; 7070 *pmode = mode; 7071 return true; 7072 } 7073 7074 /* A subroutine of function_arg_record_value. Count the number of registers 7075 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */ 7076 7077 inline void 7078 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp, 7079 assign_data_t *data) 7080 { 7081 if (fp) 7082 { 7083 int nregs; 7084 machine_mode mode; 7085 7086 if (compute_int_layout (bitpos, data, &nregs)) 7087 data->nregs += nregs; 7088 7089 if (compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7090 data->nregs += nregs; 7091 } 7092 else 7093 { 7094 if (data->intoffset < 0) 7095 data->intoffset = bitpos; 7096 } 7097 } 7098 7099 /* A subroutine of function_arg_record_value. Assign the bits of the 7100 structure between PARMS->intoffset and BITPOS to integer registers. */ 7101 7102 static void 7103 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data) 7104 { 7105 int intoffset = data->intoffset; 7106 machine_mode mode; 7107 int nregs; 7108 7109 if (!compute_int_layout (bitpos, data, &nregs)) 7110 return; 7111 7112 /* If this is the trailing part of a word, only load that much into 7113 the register. Otherwise load the whole register. Note that in 7114 the latter case we may pick up unwanted bits. It's not a problem 7115 at the moment but may wish to revisit. */ 7116 if (intoffset % BITS_PER_WORD != 0) 7117 mode = smallest_int_mode_for_size (BITS_PER_WORD 7118 - intoffset % BITS_PER_WORD); 7119 else 7120 mode = word_mode; 7121 7122 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD; 7123 unsigned int regno = data->regbase + this_slotno; 7124 intoffset /= BITS_PER_UNIT; 7125 7126 do 7127 { 7128 rtx reg = gen_rtx_REG (mode, regno); 7129 XVECEXP (data->ret, 0, data->stack + data->nregs) 7130 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); 7131 data->nregs += 1; 7132 mode = word_mode; 7133 regno += 1; 7134 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1; 7135 } 7136 while (--nregs > 0); 7137 } 7138 7139 /* A subroutine of function_arg_record_value. Assign FIELD at position 7140 BITPOS to FP registers. */ 7141 7142 static void 7143 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos, 7144 assign_data_t *data) 7145 { 7146 int nregs; 7147 machine_mode mode; 7148 7149 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode)) 7150 return; 7151 7152 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD; 7153 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2; 7154 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0) 7155 regno++; 7156 int pos = bitpos / BITS_PER_UNIT; 7157 7158 do 7159 { 7160 rtx reg = gen_rtx_REG (mode, regno); 7161 XVECEXP (data->ret, 0, data->stack + data->nregs) 7162 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos)); 7163 data->nregs += 1; 7164 regno += GET_MODE_SIZE (mode) / 4; 7165 pos += GET_MODE_SIZE (mode); 7166 } 7167 while (--nregs > 0); 7168 } 7169 7170 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of 7171 the structure between PARMS->intoffset and BITPOS to registers. */ 7172 7173 inline void 7174 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp, 7175 assign_data_t *data) 7176 { 7177 if (fp) 7178 { 7179 assign_int_registers (bitpos, data); 7180 7181 assign_fp_registers (field, bitpos, data); 7182 } 7183 else 7184 { 7185 if (data->intoffset < 0) 7186 data->intoffset = bitpos; 7187 } 7188 } 7189 7190 /* Used by function_arg and sparc_function_value_1 to implement the complex 7191 conventions of the 64-bit ABI for passing and returning structures. 7192 Return an expression valid as a return value for the FUNCTION_ARG 7193 and TARGET_FUNCTION_VALUE. 7194 7195 TYPE is the data type of the argument (as a tree). 7196 This is null for libcalls where that information may 7197 not be available. 7198 MODE is the argument's machine mode. 7199 SLOTNO is the index number of the argument's slot in the parameter array. 7200 NAMED is true if this argument is a named parameter 7201 (otherwise it is an extra parameter matching an ellipsis). 7202 REGBASE is the regno of the base register for the parameter array. */ 7203 7204 static rtx 7205 function_arg_record_value (const_tree type, machine_mode mode, 7206 int slotno, bool named, int regbase) 7207 { 7208 HOST_WIDE_INT typesize = int_size_in_bytes (type); 7209 assign_data_t data; 7210 int nregs; 7211 7212 data.slotno = slotno; 7213 data.regbase = regbase; 7214 7215 /* Count how many registers we need. */ 7216 data.nregs = 0; 7217 data.intoffset = 0; 7218 data.stack = false; 7219 traverse_record_type<assign_data_t, count_registers> (type, named, &data); 7220 7221 /* Take into account pending integer fields. */ 7222 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs)) 7223 data.nregs += nregs; 7224 7225 /* Allocate the vector and handle some annoying special cases. */ 7226 nregs = data.nregs; 7227 7228 if (nregs == 0) 7229 { 7230 /* ??? Empty structure has no value? Duh? */ 7231 if (typesize <= 0) 7232 { 7233 /* Though there's nothing really to store, return a word register 7234 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL 7235 leads to breakage due to the fact that there are zero bytes to 7236 load. */ 7237 return gen_rtx_REG (mode, regbase); 7238 } 7239 7240 /* ??? C++ has structures with no fields, and yet a size. Give up 7241 for now and pass everything back in integer registers. */ 7242 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 7243 if (nregs + slotno > SPARC_INT_ARG_MAX) 7244 nregs = SPARC_INT_ARG_MAX - slotno; 7245 } 7246 7247 gcc_assert (nregs > 0); 7248 7249 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs)); 7250 7251 /* If at least one field must be passed on the stack, generate 7252 (parallel [(expr_list (nil) ...) ...]) so that all fields will 7253 also be passed on the stack. We can't do much better because the 7254 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case 7255 of structures for which the fields passed exclusively in registers 7256 are not at the beginning of the structure. */ 7257 if (data.stack) 7258 XVECEXP (data.ret, 0, 0) 7259 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7260 7261 /* Assign the registers. */ 7262 data.nregs = 0; 7263 data.intoffset = 0; 7264 traverse_record_type<assign_data_t, assign_registers> (type, named, &data); 7265 7266 /* Assign pending integer fields. */ 7267 assign_int_registers (typesize * BITS_PER_UNIT, &data); 7268 7269 gcc_assert (data.nregs == nregs); 7270 7271 return data.ret; 7272 } 7273 7274 /* Used by function_arg and sparc_function_value_1 to implement the conventions 7275 of the 64-bit ABI for passing and returning unions. 7276 Return an expression valid as a return value for the FUNCTION_ARG 7277 and TARGET_FUNCTION_VALUE. 7278 7279 SIZE is the size in bytes of the union. 7280 MODE is the argument's machine mode. 7281 REGNO is the hard register the union will be passed in. */ 7282 7283 static rtx 7284 function_arg_union_value (int size, machine_mode mode, int slotno, 7285 int regno) 7286 { 7287 int nwords = CEIL_NWORDS (size), i; 7288 rtx regs; 7289 7290 /* See comment in previous function for empty structures. */ 7291 if (nwords == 0) 7292 return gen_rtx_REG (mode, regno); 7293 7294 if (slotno == SPARC_INT_ARG_MAX - 1) 7295 nwords = 1; 7296 7297 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords)); 7298 7299 for (i = 0; i < nwords; i++) 7300 { 7301 /* Unions are passed left-justified. */ 7302 XVECEXP (regs, 0, i) 7303 = gen_rtx_EXPR_LIST (VOIDmode, 7304 gen_rtx_REG (word_mode, regno), 7305 GEN_INT (UNITS_PER_WORD * i)); 7306 regno++; 7307 } 7308 7309 return regs; 7310 } 7311 7312 /* Used by function_arg and sparc_function_value_1 to implement the conventions 7313 for passing and returning BLKmode vectors. 7314 Return an expression valid as a return value for the FUNCTION_ARG 7315 and TARGET_FUNCTION_VALUE. 7316 7317 SIZE is the size in bytes of the vector. 7318 REGNO is the FP hard register the vector will be passed in. */ 7319 7320 static rtx 7321 function_arg_vector_value (int size, int regno) 7322 { 7323 const int nregs = MAX (1, size / 8); 7324 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs)); 7325 7326 if (size < 8) 7327 XVECEXP (regs, 0, 0) 7328 = gen_rtx_EXPR_LIST (VOIDmode, 7329 gen_rtx_REG (SImode, regno), 7330 const0_rtx); 7331 else 7332 for (int i = 0; i < nregs; i++) 7333 XVECEXP (regs, 0, i) 7334 = gen_rtx_EXPR_LIST (VOIDmode, 7335 gen_rtx_REG (DImode, regno + 2*i), 7336 GEN_INT (i*8)); 7337 7338 return regs; 7339 } 7340 7341 /* Determine where to put an argument to a function. 7342 Value is zero to push the argument on the stack, 7343 or a hard register in which to store the argument. 7344 7345 CUM is a variable of type CUMULATIVE_ARGS which gives info about 7346 the preceding args and about the function being called. 7347 MODE is the argument's machine mode. 7348 TYPE is the data type of the argument (as a tree). 7349 This is null for libcalls where that information may 7350 not be available. 7351 NAMED is true if this argument is a named parameter 7352 (otherwise it is an extra parameter matching an ellipsis). 7353 INCOMING_P is false for TARGET_FUNCTION_ARG, true for 7354 TARGET_FUNCTION_INCOMING_ARG. */ 7355 7356 static rtx 7357 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode, 7358 const_tree type, bool named, bool incoming) 7359 { 7360 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7361 7362 int regbase = (incoming 7363 ? SPARC_INCOMING_INT_ARG_FIRST 7364 : SPARC_OUTGOING_INT_ARG_FIRST); 7365 int slotno, regno, padding; 7366 enum mode_class mclass = GET_MODE_CLASS (mode); 7367 7368 slotno = function_arg_slotno (cum, mode, type, named, incoming, 7369 ®no, &padding); 7370 if (slotno == -1) 7371 return 0; 7372 7373 /* Vector types deserve special treatment because they are polymorphic wrt 7374 their mode, depending upon whether VIS instructions are enabled. */ 7375 if (type && TREE_CODE (type) == VECTOR_TYPE) 7376 { 7377 HOST_WIDE_INT size = int_size_in_bytes (type); 7378 gcc_assert ((TARGET_ARCH32 && size <= 8) 7379 || (TARGET_ARCH64 && size <= 16)); 7380 7381 if (mode == BLKmode) 7382 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno); 7383 7384 mclass = MODE_FLOAT; 7385 } 7386 7387 if (TARGET_ARCH32) 7388 return gen_rtx_REG (mode, regno); 7389 7390 /* Structures up to 16 bytes in size are passed in arg slots on the stack 7391 and are promoted to registers if possible. */ 7392 if (type && TREE_CODE (type) == RECORD_TYPE) 7393 { 7394 HOST_WIDE_INT size = int_size_in_bytes (type); 7395 gcc_assert (size <= 16); 7396 7397 return function_arg_record_value (type, mode, slotno, named, regbase); 7398 } 7399 7400 /* Unions up to 16 bytes in size are passed in integer registers. */ 7401 else if (type && TREE_CODE (type) == UNION_TYPE) 7402 { 7403 HOST_WIDE_INT size = int_size_in_bytes (type); 7404 gcc_assert (size <= 16); 7405 7406 return function_arg_union_value (size, mode, slotno, regno); 7407 } 7408 7409 /* v9 fp args in reg slots beyond the int reg slots get passed in regs 7410 but also have the slot allocated for them. 7411 If no prototype is in scope fp values in register slots get passed 7412 in two places, either fp regs and int regs or fp regs and memory. */ 7413 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7414 && SPARC_FP_REG_P (regno)) 7415 { 7416 rtx reg = gen_rtx_REG (mode, regno); 7417 if (cum->prototype_p || cum->libcall_p) 7418 return reg; 7419 else 7420 { 7421 rtx v0, v1; 7422 7423 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2) 7424 { 7425 int intreg; 7426 7427 /* On incoming, we don't need to know that the value 7428 is passed in %f0 and %i0, and it confuses other parts 7429 causing needless spillage even on the simplest cases. */ 7430 if (incoming) 7431 return reg; 7432 7433 intreg = (SPARC_OUTGOING_INT_ARG_FIRST 7434 + (regno - SPARC_FP_ARG_FIRST) / 2); 7435 7436 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7437 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg), 7438 const0_rtx); 7439 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7440 } 7441 else 7442 { 7443 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); 7444 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx); 7445 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1)); 7446 } 7447 } 7448 } 7449 7450 /* All other aggregate types are passed in an integer register in a mode 7451 corresponding to the size of the type. */ 7452 else if (type && AGGREGATE_TYPE_P (type)) 7453 { 7454 HOST_WIDE_INT size = int_size_in_bytes (type); 7455 gcc_assert (size <= 16); 7456 7457 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7458 } 7459 7460 return gen_rtx_REG (mode, regno); 7461 } 7462 7463 /* Handle the TARGET_FUNCTION_ARG target hook. */ 7464 7465 static rtx 7466 sparc_function_arg (cumulative_args_t cum, machine_mode mode, 7467 const_tree type, bool named) 7468 { 7469 return sparc_function_arg_1 (cum, mode, type, named, false); 7470 } 7471 7472 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */ 7473 7474 static rtx 7475 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode, 7476 const_tree type, bool named) 7477 { 7478 return sparc_function_arg_1 (cum, mode, type, named, true); 7479 } 7480 7481 /* For sparc64, objects requiring 16 byte alignment are passed that way. */ 7482 7483 static unsigned int 7484 sparc_function_arg_boundary (machine_mode mode, const_tree type) 7485 { 7486 return ((TARGET_ARCH64 7487 && (GET_MODE_ALIGNMENT (mode) == 128 7488 || (type && TYPE_ALIGN (type) == 128))) 7489 ? 128 7490 : PARM_BOUNDARY); 7491 } 7492 7493 /* For an arg passed partly in registers and partly in memory, 7494 this is the number of bytes of registers used. 7495 For args passed entirely in registers or entirely in memory, zero. 7496 7497 Any arg that starts in the first 6 regs but won't entirely fit in them 7498 needs partial registers on v8. On v9, structures with integer 7499 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp 7500 values that begin in the last fp reg [where "last fp reg" varies with the 7501 mode] will be split between that reg and memory. */ 7502 7503 static int 7504 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode, 7505 tree type, bool named) 7506 { 7507 int slotno, regno, padding; 7508 7509 /* We pass false for incoming here, it doesn't matter. */ 7510 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named, 7511 false, ®no, &padding); 7512 7513 if (slotno == -1) 7514 return 0; 7515 7516 if (TARGET_ARCH32) 7517 { 7518 if ((slotno + (mode == BLKmode 7519 ? CEIL_NWORDS (int_size_in_bytes (type)) 7520 : CEIL_NWORDS (GET_MODE_SIZE (mode)))) 7521 > SPARC_INT_ARG_MAX) 7522 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD; 7523 } 7524 else 7525 { 7526 /* We are guaranteed by pass_by_reference that the size of the 7527 argument is not greater than 16 bytes, so we only need to return 7528 one word if the argument is partially passed in registers. */ 7529 7530 if (type && AGGREGATE_TYPE_P (type)) 7531 { 7532 int size = int_size_in_bytes (type); 7533 7534 if (size > UNITS_PER_WORD 7535 && (slotno == SPARC_INT_ARG_MAX - 1 7536 || slotno == SPARC_FP_ARG_MAX - 1)) 7537 return UNITS_PER_WORD; 7538 } 7539 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT 7540 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT 7541 && ! (TARGET_FPU && named))) 7542 { 7543 /* The complex types are passed as packed types. */ 7544 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD 7545 && slotno == SPARC_INT_ARG_MAX - 1) 7546 return UNITS_PER_WORD; 7547 } 7548 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) 7549 { 7550 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD) 7551 > SPARC_FP_ARG_MAX) 7552 return UNITS_PER_WORD; 7553 } 7554 } 7555 7556 return 0; 7557 } 7558 7559 /* Handle the TARGET_PASS_BY_REFERENCE target hook. 7560 Specify whether to pass the argument by reference. */ 7561 7562 static bool 7563 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, 7564 machine_mode mode, const_tree type, 7565 bool named ATTRIBUTE_UNUSED) 7566 { 7567 if (TARGET_ARCH32) 7568 /* Original SPARC 32-bit ABI says that structures and unions, 7569 and quad-precision floats are passed by reference. For Pascal, 7570 also pass arrays by reference. All other base types are passed 7571 in registers. 7572 7573 Extended ABI (as implemented by the Sun compiler) says that all 7574 complex floats are passed by reference. Pass complex integers 7575 in registers up to 8 bytes. More generally, enforce the 2-word 7576 cap for passing arguments in registers. 7577 7578 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7579 integers are passed like floats of the same size, that is in 7580 registers up to 8 bytes. Pass all vector floats by reference 7581 like structure and unions. */ 7582 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type))) 7583 || mode == SCmode 7584 /* Catch CDImode, TFmode, DCmode and TCmode. */ 7585 || GET_MODE_SIZE (mode) > 8 7586 || (type 7587 && TREE_CODE (type) == VECTOR_TYPE 7588 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7589 else 7590 /* Original SPARC 64-bit ABI says that structures and unions 7591 smaller than 16 bytes are passed in registers, as well as 7592 all other base types. 7593 7594 Extended ABI (as implemented by the Sun compiler) says that 7595 complex floats are passed in registers up to 16 bytes. Pass 7596 all complex integers in registers up to 16 bytes. More generally, 7597 enforce the 2-word cap for passing arguments in registers. 7598 7599 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7600 integers are passed like floats of the same size, that is in 7601 registers (up to 16 bytes). Pass all vector floats like structure 7602 and unions. */ 7603 return ((type 7604 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE) 7605 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16) 7606 /* Catch CTImode and TCmode. */ 7607 || GET_MODE_SIZE (mode) > 16); 7608 } 7609 7610 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook. 7611 Update the data in CUM to advance over an argument 7612 of mode MODE and data type TYPE. 7613 TYPE is null for libcalls where that information may not be available. */ 7614 7615 static void 7616 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode, 7617 const_tree type, bool named) 7618 { 7619 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 7620 int regno, padding; 7621 7622 /* We pass false for incoming here, it doesn't matter. */ 7623 function_arg_slotno (cum, mode, type, named, false, ®no, &padding); 7624 7625 /* If argument requires leading padding, add it. */ 7626 cum->words += padding; 7627 7628 if (TARGET_ARCH32) 7629 cum->words += (mode == BLKmode 7630 ? CEIL_NWORDS (int_size_in_bytes (type)) 7631 : CEIL_NWORDS (GET_MODE_SIZE (mode))); 7632 else 7633 { 7634 if (type && AGGREGATE_TYPE_P (type)) 7635 { 7636 int size = int_size_in_bytes (type); 7637 7638 if (size <= 8) 7639 ++cum->words; 7640 else if (size <= 16) 7641 cum->words += 2; 7642 else /* passed by reference */ 7643 ++cum->words; 7644 } 7645 else 7646 cum->words += (mode == BLKmode 7647 ? CEIL_NWORDS (int_size_in_bytes (type)) 7648 : CEIL_NWORDS (GET_MODE_SIZE (mode))); 7649 } 7650 } 7651 7652 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs 7653 are always stored left shifted in their argument slot. */ 7654 7655 static pad_direction 7656 sparc_function_arg_padding (machine_mode mode, const_tree type) 7657 { 7658 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type)) 7659 return PAD_UPWARD; 7660 7661 /* Fall back to the default. */ 7662 return default_function_arg_padding (mode, type); 7663 } 7664 7665 /* Handle the TARGET_RETURN_IN_MEMORY target hook. 7666 Specify whether to return the return value in memory. */ 7667 7668 static bool 7669 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 7670 { 7671 if (TARGET_ARCH32) 7672 /* Original SPARC 32-bit ABI says that structures and unions, 7673 and quad-precision floats are returned in memory. All other 7674 base types are returned in registers. 7675 7676 Extended ABI (as implemented by the Sun compiler) says that 7677 all complex floats are returned in registers (8 FP registers 7678 at most for '_Complex long double'). Return all complex integers 7679 in registers (4 at most for '_Complex long long'). 7680 7681 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7682 integers are returned like floats of the same size, that is in 7683 registers up to 8 bytes and in memory otherwise. Return all 7684 vector floats in memory like structure and unions; note that 7685 they always have BLKmode like the latter. */ 7686 return (TYPE_MODE (type) == BLKmode 7687 || TYPE_MODE (type) == TFmode 7688 || (TREE_CODE (type) == VECTOR_TYPE 7689 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)); 7690 else 7691 /* Original SPARC 64-bit ABI says that structures and unions 7692 smaller than 32 bytes are returned in registers, as well as 7693 all other base types. 7694 7695 Extended ABI (as implemented by the Sun compiler) says that all 7696 complex floats are returned in registers (8 FP registers at most 7697 for '_Complex long double'). Return all complex integers in 7698 registers (4 at most for '_Complex TItype'). 7699 7700 Vector ABI (as implemented by the Sun VIS SDK) says that vector 7701 integers are returned like floats of the same size, that is in 7702 registers. Return all vector floats like structure and unions; 7703 note that they always have BLKmode like the latter. */ 7704 return (TYPE_MODE (type) == BLKmode 7705 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32); 7706 } 7707 7708 /* Handle the TARGET_STRUCT_VALUE target hook. 7709 Return where to find the structure return value address. */ 7710 7711 static rtx 7712 sparc_struct_value_rtx (tree fndecl, int incoming) 7713 { 7714 if (TARGET_ARCH64) 7715 return 0; 7716 else 7717 { 7718 rtx mem; 7719 7720 if (incoming) 7721 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx, 7722 STRUCT_VALUE_OFFSET)); 7723 else 7724 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx, 7725 STRUCT_VALUE_OFFSET)); 7726 7727 /* Only follow the SPARC ABI for fixed-size structure returns. 7728 Variable size structure returns are handled per the normal 7729 procedures in GCC. This is enabled by -mstd-struct-return */ 7730 if (incoming == 2 7731 && sparc_std_struct_return 7732 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl)) 7733 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST) 7734 { 7735 /* We must check and adjust the return address, as it is optional 7736 as to whether the return object is really provided. */ 7737 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); 7738 rtx scratch = gen_reg_rtx (SImode); 7739 rtx_code_label *endlab = gen_label_rtx (); 7740 7741 /* Calculate the return object size. */ 7742 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl)); 7743 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff); 7744 /* Construct a temporary return value. */ 7745 rtx temp_val 7746 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0); 7747 7748 /* Implement SPARC 32-bit psABI callee return struct checking: 7749 7750 Fetch the instruction where we will return to and see if 7751 it's an unimp instruction (the most significant 10 bits 7752 will be zero). */ 7753 emit_move_insn (scratch, gen_rtx_MEM (SImode, 7754 plus_constant (Pmode, 7755 ret_reg, 8))); 7756 /* Assume the size is valid and pre-adjust. */ 7757 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4))); 7758 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode, 7759 0, endlab); 7760 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4))); 7761 /* Write the address of the memory pointed to by temp_val into 7762 the memory pointed to by mem. */ 7763 emit_move_insn (mem, XEXP (temp_val, 0)); 7764 emit_label (endlab); 7765 } 7766 7767 return mem; 7768 } 7769 } 7770 7771 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook. 7772 For v9, function return values are subject to the same rules as arguments, 7773 except that up to 32 bytes may be returned in registers. */ 7774 7775 static rtx 7776 sparc_function_value_1 (const_tree type, machine_mode mode, 7777 bool outgoing) 7778 { 7779 /* Beware that the two values are swapped here wrt function_arg. */ 7780 int regbase = (outgoing 7781 ? SPARC_INCOMING_INT_ARG_FIRST 7782 : SPARC_OUTGOING_INT_ARG_FIRST); 7783 enum mode_class mclass = GET_MODE_CLASS (mode); 7784 int regno; 7785 7786 /* Vector types deserve special treatment because they are polymorphic wrt 7787 their mode, depending upon whether VIS instructions are enabled. */ 7788 if (type && TREE_CODE (type) == VECTOR_TYPE) 7789 { 7790 HOST_WIDE_INT size = int_size_in_bytes (type); 7791 gcc_assert ((TARGET_ARCH32 && size <= 8) 7792 || (TARGET_ARCH64 && size <= 32)); 7793 7794 if (mode == BLKmode) 7795 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST); 7796 7797 mclass = MODE_FLOAT; 7798 } 7799 7800 if (TARGET_ARCH64 && type) 7801 { 7802 /* Structures up to 32 bytes in size are returned in registers. */ 7803 if (TREE_CODE (type) == RECORD_TYPE) 7804 { 7805 HOST_WIDE_INT size = int_size_in_bytes (type); 7806 gcc_assert (size <= 32); 7807 7808 return function_arg_record_value (type, mode, 0, 1, regbase); 7809 } 7810 7811 /* Unions up to 32 bytes in size are returned in integer registers. */ 7812 else if (TREE_CODE (type) == UNION_TYPE) 7813 { 7814 HOST_WIDE_INT size = int_size_in_bytes (type); 7815 gcc_assert (size <= 32); 7816 7817 return function_arg_union_value (size, mode, 0, regbase); 7818 } 7819 7820 /* Objects that require it are returned in FP registers. */ 7821 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) 7822 ; 7823 7824 /* All other aggregate types are returned in an integer register in a 7825 mode corresponding to the size of the type. */ 7826 else if (AGGREGATE_TYPE_P (type)) 7827 { 7828 /* All other aggregate types are passed in an integer register 7829 in a mode corresponding to the size of the type. */ 7830 HOST_WIDE_INT size = int_size_in_bytes (type); 7831 gcc_assert (size <= 32); 7832 7833 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk (); 7834 7835 /* ??? We probably should have made the same ABI change in 7836 3.4.0 as the one we made for unions. The latter was 7837 required by the SCD though, while the former is not 7838 specified, so we favored compatibility and efficiency. 7839 7840 Now we're stuck for aggregates larger than 16 bytes, 7841 because OImode vanished in the meantime. Let's not 7842 try to be unduly clever, and simply follow the ABI 7843 for unions in that case. */ 7844 if (mode == BLKmode) 7845 return function_arg_union_value (size, mode, 0, regbase); 7846 else 7847 mclass = MODE_INT; 7848 } 7849 7850 /* We should only have pointer and integer types at this point. This 7851 must match sparc_promote_function_mode. */ 7852 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7853 mode = word_mode; 7854 } 7855 7856 /* We should only have pointer and integer types at this point, except with 7857 -freg-struct-return. This must match sparc_promote_function_mode. */ 7858 else if (TARGET_ARCH32 7859 && !(type && AGGREGATE_TYPE_P (type)) 7860 && mclass == MODE_INT 7861 && GET_MODE_SIZE (mode) < UNITS_PER_WORD) 7862 mode = word_mode; 7863 7864 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU) 7865 regno = SPARC_FP_ARG_FIRST; 7866 else 7867 regno = regbase; 7868 7869 return gen_rtx_REG (mode, regno); 7870 } 7871 7872 /* Handle TARGET_FUNCTION_VALUE. 7873 On the SPARC, the value is found in the first "output" register, but the 7874 called function leaves it in the first "input" register. */ 7875 7876 static rtx 7877 sparc_function_value (const_tree valtype, 7878 const_tree fn_decl_or_type ATTRIBUTE_UNUSED, 7879 bool outgoing) 7880 { 7881 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing); 7882 } 7883 7884 /* Handle TARGET_LIBCALL_VALUE. */ 7885 7886 static rtx 7887 sparc_libcall_value (machine_mode mode, 7888 const_rtx fun ATTRIBUTE_UNUSED) 7889 { 7890 return sparc_function_value_1 (NULL_TREE, mode, false); 7891 } 7892 7893 /* Handle FUNCTION_VALUE_REGNO_P. 7894 On the SPARC, the first "output" reg is used for integer values, and the 7895 first floating point register is used for floating point values. */ 7896 7897 static bool 7898 sparc_function_value_regno_p (const unsigned int regno) 7899 { 7900 return (regno == 8 || (TARGET_FPU && regno == 32)); 7901 } 7902 7903 /* Do what is necessary for `va_start'. We look at the current function 7904 to determine if stdarg or varargs is used and return the address of 7905 the first unnamed parameter. */ 7906 7907 static rtx 7908 sparc_builtin_saveregs (void) 7909 { 7910 int first_reg = crtl->args.info.words; 7911 rtx address; 7912 int regno; 7913 7914 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++) 7915 emit_move_insn (gen_rtx_MEM (word_mode, 7916 gen_rtx_PLUS (Pmode, 7917 frame_pointer_rtx, 7918 GEN_INT (FIRST_PARM_OFFSET (0) 7919 + (UNITS_PER_WORD 7920 * regno)))), 7921 gen_rtx_REG (word_mode, 7922 SPARC_INCOMING_INT_ARG_FIRST + regno)); 7923 7924 address = gen_rtx_PLUS (Pmode, 7925 frame_pointer_rtx, 7926 GEN_INT (FIRST_PARM_OFFSET (0) 7927 + UNITS_PER_WORD * first_reg)); 7928 7929 return address; 7930 } 7931 7932 /* Implement `va_start' for stdarg. */ 7933 7934 static void 7935 sparc_va_start (tree valist, rtx nextarg) 7936 { 7937 nextarg = expand_builtin_saveregs (); 7938 std_expand_builtin_va_start (valist, nextarg); 7939 } 7940 7941 /* Implement `va_arg' for stdarg. */ 7942 7943 static tree 7944 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 7945 gimple_seq *post_p) 7946 { 7947 HOST_WIDE_INT size, rsize, align; 7948 tree addr, incr; 7949 bool indirect; 7950 tree ptrtype = build_pointer_type (type); 7951 7952 if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) 7953 { 7954 indirect = true; 7955 size = rsize = UNITS_PER_WORD; 7956 align = 0; 7957 } 7958 else 7959 { 7960 indirect = false; 7961 size = int_size_in_bytes (type); 7962 rsize = ROUND_UP (size, UNITS_PER_WORD); 7963 align = 0; 7964 7965 if (TARGET_ARCH64) 7966 { 7967 /* For SPARC64, objects requiring 16-byte alignment get it. */ 7968 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD) 7969 align = 2 * UNITS_PER_WORD; 7970 7971 /* SPARC-V9 ABI states that structures up to 16 bytes in size 7972 are left-justified in their slots. */ 7973 if (AGGREGATE_TYPE_P (type)) 7974 { 7975 if (size == 0) 7976 size = rsize = UNITS_PER_WORD; 7977 else 7978 size = rsize; 7979 } 7980 } 7981 } 7982 7983 incr = valist; 7984 if (align) 7985 { 7986 incr = fold_build_pointer_plus_hwi (incr, align - 1); 7987 incr = fold_convert (sizetype, incr); 7988 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr, 7989 size_int (-align)); 7990 incr = fold_convert (ptr_type_node, incr); 7991 } 7992 7993 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue); 7994 addr = incr; 7995 7996 if (BYTES_BIG_ENDIAN && size < rsize) 7997 addr = fold_build_pointer_plus_hwi (incr, rsize - size); 7998 7999 if (indirect) 8000 { 8001 addr = fold_convert (build_pointer_type (ptrtype), addr); 8002 addr = build_va_arg_indirect_ref (addr); 8003 } 8004 8005 /* If the address isn't aligned properly for the type, we need a temporary. 8006 FIXME: This is inefficient, usually we can do this in registers. */ 8007 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD) 8008 { 8009 tree tmp = create_tmp_var (type, "va_arg_tmp"); 8010 tree dest_addr = build_fold_addr_expr (tmp); 8011 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), 8012 3, dest_addr, addr, size_int (rsize)); 8013 TREE_ADDRESSABLE (tmp) = 1; 8014 gimplify_and_add (copy, pre_p); 8015 addr = dest_addr; 8016 } 8017 8018 else 8019 addr = fold_convert (ptrtype, addr); 8020 8021 incr = fold_build_pointer_plus_hwi (incr, rsize); 8022 gimplify_assign (valist, incr, post_p); 8023 8024 return build_va_arg_indirect_ref (addr); 8025 } 8026 8027 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook. 8028 Specify whether the vector mode is supported by the hardware. */ 8029 8030 static bool 8031 sparc_vector_mode_supported_p (machine_mode mode) 8032 { 8033 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false; 8034 } 8035 8036 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */ 8037 8038 static machine_mode 8039 sparc_preferred_simd_mode (scalar_mode mode) 8040 { 8041 if (TARGET_VIS) 8042 switch (mode) 8043 { 8044 case E_SImode: 8045 return V2SImode; 8046 case E_HImode: 8047 return V4HImode; 8048 case E_QImode: 8049 return V8QImode; 8050 8051 default:; 8052 } 8053 8054 return word_mode; 8055 } 8056 8057 /* Return the string to output an unconditional branch to LABEL, which is 8058 the operand number of the label. 8059 8060 DEST is the destination insn (i.e. the label), INSN is the source. */ 8061 8062 const char * 8063 output_ubranch (rtx dest, rtx_insn *insn) 8064 { 8065 static char string[64]; 8066 bool v9_form = false; 8067 int delta; 8068 char *p; 8069 8070 /* Even if we are trying to use cbcond for this, evaluate 8071 whether we can use V9 branches as our backup plan. */ 8072 8073 delta = 5000000; 8074 if (INSN_ADDRESSES_SET_P ()) 8075 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8076 - INSN_ADDRESSES (INSN_UID (insn))); 8077 8078 /* Leave some instructions for "slop". */ 8079 if (TARGET_V9 && delta >= -260000 && delta < 260000) 8080 v9_form = true; 8081 8082 if (TARGET_CBCOND) 8083 { 8084 bool emit_nop = emit_cbcond_nop (insn); 8085 bool far = false; 8086 const char *rval; 8087 8088 if (delta < -500 || delta > 500) 8089 far = true; 8090 8091 if (far) 8092 { 8093 if (v9_form) 8094 rval = "ba,a,pt\t%%xcc, %l0"; 8095 else 8096 rval = "b,a\t%l0"; 8097 } 8098 else 8099 { 8100 if (emit_nop) 8101 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop"; 8102 else 8103 rval = "cwbe\t%%g0, %%g0, %l0"; 8104 } 8105 return rval; 8106 } 8107 8108 if (v9_form) 8109 strcpy (string, "ba%*,pt\t%%xcc, "); 8110 else 8111 strcpy (string, "b%*\t"); 8112 8113 p = strchr (string, '\0'); 8114 *p++ = '%'; 8115 *p++ = 'l'; 8116 *p++ = '0'; 8117 *p++ = '%'; 8118 *p++ = '('; 8119 *p = '\0'; 8120 8121 return string; 8122 } 8123 8124 /* Return the string to output a conditional branch to LABEL, which is 8125 the operand number of the label. OP is the conditional expression. 8126 XEXP (OP, 0) is assumed to be a condition code register (integer or 8127 floating point) and its mode specifies what kind of comparison we made. 8128 8129 DEST is the destination insn (i.e. the label), INSN is the source. 8130 8131 REVERSED is nonzero if we should reverse the sense of the comparison. 8132 8133 ANNUL is nonzero if we should generate an annulling branch. */ 8134 8135 const char * 8136 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul, 8137 rtx_insn *insn) 8138 { 8139 static char string[64]; 8140 enum rtx_code code = GET_CODE (op); 8141 rtx cc_reg = XEXP (op, 0); 8142 machine_mode mode = GET_MODE (cc_reg); 8143 const char *labelno, *branch; 8144 int spaces = 8, far; 8145 char *p; 8146 8147 /* v9 branches are limited to +-1MB. If it is too far away, 8148 change 8149 8150 bne,pt %xcc, .LC30 8151 8152 to 8153 8154 be,pn %xcc, .+12 8155 nop 8156 ba .LC30 8157 8158 and 8159 8160 fbne,a,pn %fcc2, .LC29 8161 8162 to 8163 8164 fbe,pt %fcc2, .+16 8165 nop 8166 ba .LC29 */ 8167 8168 far = TARGET_V9 && (get_attr_length (insn) >= 3); 8169 if (reversed ^ far) 8170 { 8171 /* Reversal of FP compares takes care -- an ordered compare 8172 becomes an unordered compare and vice versa. */ 8173 if (mode == CCFPmode || mode == CCFPEmode) 8174 code = reverse_condition_maybe_unordered (code); 8175 else 8176 code = reverse_condition (code); 8177 } 8178 8179 /* Start by writing the branch condition. */ 8180 if (mode == CCFPmode || mode == CCFPEmode) 8181 { 8182 switch (code) 8183 { 8184 case NE: 8185 branch = "fbne"; 8186 break; 8187 case EQ: 8188 branch = "fbe"; 8189 break; 8190 case GE: 8191 branch = "fbge"; 8192 break; 8193 case GT: 8194 branch = "fbg"; 8195 break; 8196 case LE: 8197 branch = "fble"; 8198 break; 8199 case LT: 8200 branch = "fbl"; 8201 break; 8202 case UNORDERED: 8203 branch = "fbu"; 8204 break; 8205 case ORDERED: 8206 branch = "fbo"; 8207 break; 8208 case UNGT: 8209 branch = "fbug"; 8210 break; 8211 case UNLT: 8212 branch = "fbul"; 8213 break; 8214 case UNEQ: 8215 branch = "fbue"; 8216 break; 8217 case UNGE: 8218 branch = "fbuge"; 8219 break; 8220 case UNLE: 8221 branch = "fbule"; 8222 break; 8223 case LTGT: 8224 branch = "fblg"; 8225 break; 8226 default: 8227 gcc_unreachable (); 8228 } 8229 8230 /* ??? !v9: FP branches cannot be preceded by another floating point 8231 insn. Because there is currently no concept of pre-delay slots, 8232 we can fix this only by always emitting a nop before a floating 8233 point branch. */ 8234 8235 string[0] = '\0'; 8236 if (! TARGET_V9) 8237 strcpy (string, "nop\n\t"); 8238 strcat (string, branch); 8239 } 8240 else 8241 { 8242 switch (code) 8243 { 8244 case NE: 8245 if (mode == CCVmode || mode == CCXVmode) 8246 branch = "bvs"; 8247 else 8248 branch = "bne"; 8249 break; 8250 case EQ: 8251 if (mode == CCVmode || mode == CCXVmode) 8252 branch = "bvc"; 8253 else 8254 branch = "be"; 8255 break; 8256 case GE: 8257 if (mode == CCNZmode || mode == CCXNZmode) 8258 branch = "bpos"; 8259 else 8260 branch = "bge"; 8261 break; 8262 case GT: 8263 branch = "bg"; 8264 break; 8265 case LE: 8266 branch = "ble"; 8267 break; 8268 case LT: 8269 if (mode == CCNZmode || mode == CCXNZmode) 8270 branch = "bneg"; 8271 else 8272 branch = "bl"; 8273 break; 8274 case GEU: 8275 branch = "bgeu"; 8276 break; 8277 case GTU: 8278 branch = "bgu"; 8279 break; 8280 case LEU: 8281 branch = "bleu"; 8282 break; 8283 case LTU: 8284 branch = "blu"; 8285 break; 8286 default: 8287 gcc_unreachable (); 8288 } 8289 strcpy (string, branch); 8290 } 8291 spaces -= strlen (branch); 8292 p = strchr (string, '\0'); 8293 8294 /* Now add the annulling, the label, and a possible noop. */ 8295 if (annul && ! far) 8296 { 8297 strcpy (p, ",a"); 8298 p += 2; 8299 spaces -= 2; 8300 } 8301 8302 if (TARGET_V9) 8303 { 8304 rtx note; 8305 int v8 = 0; 8306 8307 if (! far && insn && INSN_ADDRESSES_SET_P ()) 8308 { 8309 int delta = (INSN_ADDRESSES (INSN_UID (dest)) 8310 - INSN_ADDRESSES (INSN_UID (insn))); 8311 /* Leave some instructions for "slop". */ 8312 if (delta < -260000 || delta >= 260000) 8313 v8 = 1; 8314 } 8315 8316 switch (mode) 8317 { 8318 case E_CCmode: 8319 case E_CCNZmode: 8320 case E_CCCmode: 8321 case E_CCVmode: 8322 labelno = "%%icc, "; 8323 if (v8) 8324 labelno = ""; 8325 break; 8326 case E_CCXmode: 8327 case E_CCXNZmode: 8328 case E_CCXCmode: 8329 case E_CCXVmode: 8330 labelno = "%%xcc, "; 8331 gcc_assert (!v8); 8332 break; 8333 case E_CCFPmode: 8334 case E_CCFPEmode: 8335 { 8336 static char v9_fcc_labelno[] = "%%fccX, "; 8337 /* Set the char indicating the number of the fcc reg to use. */ 8338 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0'; 8339 labelno = v9_fcc_labelno; 8340 if (v8) 8341 { 8342 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG); 8343 labelno = ""; 8344 } 8345 } 8346 break; 8347 default: 8348 gcc_unreachable (); 8349 } 8350 8351 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8352 { 8353 strcpy (p, 8354 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8355 >= profile_probability::even ()) ^ far) 8356 ? ",pt" : ",pn"); 8357 p += 3; 8358 spaces -= 3; 8359 } 8360 } 8361 else 8362 labelno = ""; 8363 8364 if (spaces > 0) 8365 *p++ = '\t'; 8366 else 8367 *p++ = ' '; 8368 strcpy (p, labelno); 8369 p = strchr (p, '\0'); 8370 if (far) 8371 { 8372 strcpy (p, ".+12\n\t nop\n\tb\t"); 8373 /* Skip the next insn if requested or 8374 if we know that it will be a nop. */ 8375 if (annul || ! final_sequence) 8376 p[3] = '6'; 8377 p += 14; 8378 } 8379 *p++ = '%'; 8380 *p++ = 'l'; 8381 *p++ = label + '0'; 8382 *p++ = '%'; 8383 *p++ = '#'; 8384 *p = '\0'; 8385 8386 return string; 8387 } 8388 8389 /* Emit a library call comparison between floating point X and Y. 8390 COMPARISON is the operator to compare with (EQ, NE, GT, etc). 8391 Return the new operator to be used in the comparison sequence. 8392 8393 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode 8394 values as arguments instead of the TFmode registers themselves, 8395 that's why we cannot call emit_float_lib_cmp. */ 8396 8397 rtx 8398 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison) 8399 { 8400 const char *qpfunc; 8401 rtx slot0, slot1, result, tem, tem2, libfunc; 8402 machine_mode mode; 8403 enum rtx_code new_comparison; 8404 8405 switch (comparison) 8406 { 8407 case EQ: 8408 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq"); 8409 break; 8410 8411 case NE: 8412 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne"); 8413 break; 8414 8415 case GT: 8416 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt"); 8417 break; 8418 8419 case GE: 8420 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge"); 8421 break; 8422 8423 case LT: 8424 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt"); 8425 break; 8426 8427 case LE: 8428 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle"); 8429 break; 8430 8431 case ORDERED: 8432 case UNORDERED: 8433 case UNGT: 8434 case UNLT: 8435 case UNEQ: 8436 case UNGE: 8437 case UNLE: 8438 case LTGT: 8439 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp"); 8440 break; 8441 8442 default: 8443 gcc_unreachable (); 8444 } 8445 8446 if (TARGET_ARCH64) 8447 { 8448 if (MEM_P (x)) 8449 { 8450 tree expr = MEM_EXPR (x); 8451 if (expr) 8452 mark_addressable (expr); 8453 slot0 = x; 8454 } 8455 else 8456 { 8457 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8458 emit_move_insn (slot0, x); 8459 } 8460 8461 if (MEM_P (y)) 8462 { 8463 tree expr = MEM_EXPR (y); 8464 if (expr) 8465 mark_addressable (expr); 8466 slot1 = y; 8467 } 8468 else 8469 { 8470 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode)); 8471 emit_move_insn (slot1, y); 8472 } 8473 8474 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8475 emit_library_call (libfunc, LCT_NORMAL, 8476 DImode, 8477 XEXP (slot0, 0), Pmode, 8478 XEXP (slot1, 0), Pmode); 8479 mode = DImode; 8480 } 8481 else 8482 { 8483 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc); 8484 emit_library_call (libfunc, LCT_NORMAL, 8485 SImode, 8486 x, TFmode, y, TFmode); 8487 mode = SImode; 8488 } 8489 8490 8491 /* Immediately move the result of the libcall into a pseudo 8492 register so reload doesn't clobber the value if it needs 8493 the return register for a spill reg. */ 8494 result = gen_reg_rtx (mode); 8495 emit_move_insn (result, hard_libcall_value (mode, libfunc)); 8496 8497 switch (comparison) 8498 { 8499 default: 8500 return gen_rtx_NE (VOIDmode, result, const0_rtx); 8501 case ORDERED: 8502 case UNORDERED: 8503 new_comparison = (comparison == UNORDERED ? EQ : NE); 8504 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3)); 8505 case UNGT: 8506 case UNGE: 8507 new_comparison = (comparison == UNGT ? GT : NE); 8508 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx); 8509 case UNLE: 8510 return gen_rtx_NE (VOIDmode, result, const2_rtx); 8511 case UNLT: 8512 tem = gen_reg_rtx (mode); 8513 if (TARGET_ARCH32) 8514 emit_insn (gen_andsi3 (tem, result, const1_rtx)); 8515 else 8516 emit_insn (gen_anddi3 (tem, result, const1_rtx)); 8517 return gen_rtx_NE (VOIDmode, tem, const0_rtx); 8518 case UNEQ: 8519 case LTGT: 8520 tem = gen_reg_rtx (mode); 8521 if (TARGET_ARCH32) 8522 emit_insn (gen_addsi3 (tem, result, const1_rtx)); 8523 else 8524 emit_insn (gen_adddi3 (tem, result, const1_rtx)); 8525 tem2 = gen_reg_rtx (mode); 8526 if (TARGET_ARCH32) 8527 emit_insn (gen_andsi3 (tem2, tem, const2_rtx)); 8528 else 8529 emit_insn (gen_anddi3 (tem2, tem, const2_rtx)); 8530 new_comparison = (comparison == UNEQ ? EQ : NE); 8531 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx); 8532 } 8533 8534 gcc_unreachable (); 8535 } 8536 8537 /* Generate an unsigned DImode to FP conversion. This is the same code 8538 optabs would emit if we didn't have TFmode patterns. */ 8539 8540 void 8541 sparc_emit_floatunsdi (rtx *operands, machine_mode mode) 8542 { 8543 rtx i0, i1, f0, in, out; 8544 8545 out = operands[0]; 8546 in = force_reg (DImode, operands[1]); 8547 rtx_code_label *neglab = gen_label_rtx (); 8548 rtx_code_label *donelab = gen_label_rtx (); 8549 i0 = gen_reg_rtx (DImode); 8550 i1 = gen_reg_rtx (DImode); 8551 f0 = gen_reg_rtx (mode); 8552 8553 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); 8554 8555 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); 8556 emit_jump_insn (gen_jump (donelab)); 8557 emit_barrier (); 8558 8559 emit_label (neglab); 8560 8561 emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); 8562 emit_insn (gen_anddi3 (i1, in, const1_rtx)); 8563 emit_insn (gen_iordi3 (i0, i0, i1)); 8564 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); 8565 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); 8566 8567 emit_label (donelab); 8568 } 8569 8570 /* Generate an FP to unsigned DImode conversion. This is the same code 8571 optabs would emit if we didn't have TFmode patterns. */ 8572 8573 void 8574 sparc_emit_fixunsdi (rtx *operands, machine_mode mode) 8575 { 8576 rtx i0, i1, f0, in, out, limit; 8577 8578 out = operands[0]; 8579 in = force_reg (mode, operands[1]); 8580 rtx_code_label *neglab = gen_label_rtx (); 8581 rtx_code_label *donelab = gen_label_rtx (); 8582 i0 = gen_reg_rtx (DImode); 8583 i1 = gen_reg_rtx (DImode); 8584 limit = gen_reg_rtx (mode); 8585 f0 = gen_reg_rtx (mode); 8586 8587 emit_move_insn (limit, 8588 const_double_from_real_value ( 8589 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode)); 8590 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab); 8591 8592 emit_insn (gen_rtx_SET (out, 8593 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in)))); 8594 emit_jump_insn (gen_jump (donelab)); 8595 emit_barrier (); 8596 8597 emit_label (neglab); 8598 8599 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit))); 8600 emit_insn (gen_rtx_SET (i0, 8601 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0)))); 8602 emit_insn (gen_movdi (i1, const1_rtx)); 8603 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63))); 8604 emit_insn (gen_xordi3 (out, i0, i1)); 8605 8606 emit_label (donelab); 8607 } 8608 8609 /* Return the string to output a compare and branch instruction to DEST. 8610 DEST is the destination insn (i.e. the label), INSN is the source, 8611 and OP is the conditional expression. */ 8612 8613 const char * 8614 output_cbcond (rtx op, rtx dest, rtx_insn *insn) 8615 { 8616 machine_mode mode = GET_MODE (XEXP (op, 0)); 8617 enum rtx_code code = GET_CODE (op); 8618 const char *cond_str, *tmpl; 8619 int far, emit_nop, len; 8620 static char string[64]; 8621 char size_char; 8622 8623 /* Compare and Branch is limited to +-2KB. If it is too far away, 8624 change 8625 8626 cxbne X, Y, .LC30 8627 8628 to 8629 8630 cxbe X, Y, .+16 8631 nop 8632 ba,pt xcc, .LC30 8633 nop */ 8634 8635 len = get_attr_length (insn); 8636 8637 far = len == 4; 8638 emit_nop = len == 2; 8639 8640 if (far) 8641 code = reverse_condition (code); 8642 8643 size_char = ((mode == SImode) ? 'w' : 'x'); 8644 8645 switch (code) 8646 { 8647 case NE: 8648 cond_str = "ne"; 8649 break; 8650 8651 case EQ: 8652 cond_str = "e"; 8653 break; 8654 8655 case GE: 8656 cond_str = "ge"; 8657 break; 8658 8659 case GT: 8660 cond_str = "g"; 8661 break; 8662 8663 case LE: 8664 cond_str = "le"; 8665 break; 8666 8667 case LT: 8668 cond_str = "l"; 8669 break; 8670 8671 case GEU: 8672 cond_str = "cc"; 8673 break; 8674 8675 case GTU: 8676 cond_str = "gu"; 8677 break; 8678 8679 case LEU: 8680 cond_str = "leu"; 8681 break; 8682 8683 case LTU: 8684 cond_str = "cs"; 8685 break; 8686 8687 default: 8688 gcc_unreachable (); 8689 } 8690 8691 if (far) 8692 { 8693 int veryfar = 1, delta; 8694 8695 if (INSN_ADDRESSES_SET_P ()) 8696 { 8697 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8698 - INSN_ADDRESSES (INSN_UID (insn))); 8699 /* Leave some instructions for "slop". */ 8700 if (delta >= -260000 && delta < 260000) 8701 veryfar = 0; 8702 } 8703 8704 if (veryfar) 8705 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop"; 8706 else 8707 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop"; 8708 } 8709 else 8710 { 8711 if (emit_nop) 8712 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop"; 8713 else 8714 tmpl = "c%cb%s\t%%1, %%2, %%3"; 8715 } 8716 8717 snprintf (string, sizeof(string), tmpl, size_char, cond_str); 8718 8719 return string; 8720 } 8721 8722 /* Return the string to output a conditional branch to LABEL, testing 8723 register REG. LABEL is the operand number of the label; REG is the 8724 operand number of the reg. OP is the conditional expression. The mode 8725 of REG says what kind of comparison we made. 8726 8727 DEST is the destination insn (i.e. the label), INSN is the source. 8728 8729 REVERSED is nonzero if we should reverse the sense of the comparison. 8730 8731 ANNUL is nonzero if we should generate an annulling branch. */ 8732 8733 const char * 8734 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed, 8735 int annul, rtx_insn *insn) 8736 { 8737 static char string[64]; 8738 enum rtx_code code = GET_CODE (op); 8739 machine_mode mode = GET_MODE (XEXP (op, 0)); 8740 rtx note; 8741 int far; 8742 char *p; 8743 8744 /* branch on register are limited to +-128KB. If it is too far away, 8745 change 8746 8747 brnz,pt %g1, .LC30 8748 8749 to 8750 8751 brz,pn %g1, .+12 8752 nop 8753 ba,pt %xcc, .LC30 8754 8755 and 8756 8757 brgez,a,pn %o1, .LC29 8758 8759 to 8760 8761 brlz,pt %o1, .+16 8762 nop 8763 ba,pt %xcc, .LC29 */ 8764 8765 far = get_attr_length (insn) >= 3; 8766 8767 /* If not floating-point or if EQ or NE, we can just reverse the code. */ 8768 if (reversed ^ far) 8769 code = reverse_condition (code); 8770 8771 /* Only 64-bit versions of these instructions exist. */ 8772 gcc_assert (mode == DImode); 8773 8774 /* Start by writing the branch condition. */ 8775 8776 switch (code) 8777 { 8778 case NE: 8779 strcpy (string, "brnz"); 8780 break; 8781 8782 case EQ: 8783 strcpy (string, "brz"); 8784 break; 8785 8786 case GE: 8787 strcpy (string, "brgez"); 8788 break; 8789 8790 case LT: 8791 strcpy (string, "brlz"); 8792 break; 8793 8794 case LE: 8795 strcpy (string, "brlez"); 8796 break; 8797 8798 case GT: 8799 strcpy (string, "brgz"); 8800 break; 8801 8802 default: 8803 gcc_unreachable (); 8804 } 8805 8806 p = strchr (string, '\0'); 8807 8808 /* Now add the annulling, reg, label, and nop. */ 8809 if (annul && ! far) 8810 { 8811 strcpy (p, ",a"); 8812 p += 2; 8813 } 8814 8815 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX))) 8816 { 8817 strcpy (p, 8818 ((profile_probability::from_reg_br_prob_note (XINT (note, 0)) 8819 >= profile_probability::even ()) ^ far) 8820 ? ",pt" : ",pn"); 8821 p += 3; 8822 } 8823 8824 *p = p < string + 8 ? '\t' : ' '; 8825 p++; 8826 *p++ = '%'; 8827 *p++ = '0' + reg; 8828 *p++ = ','; 8829 *p++ = ' '; 8830 if (far) 8831 { 8832 int veryfar = 1, delta; 8833 8834 if (INSN_ADDRESSES_SET_P ()) 8835 { 8836 delta = (INSN_ADDRESSES (INSN_UID (dest)) 8837 - INSN_ADDRESSES (INSN_UID (insn))); 8838 /* Leave some instructions for "slop". */ 8839 if (delta >= -260000 && delta < 260000) 8840 veryfar = 0; 8841 } 8842 8843 strcpy (p, ".+12\n\t nop\n\t"); 8844 /* Skip the next insn if requested or 8845 if we know that it will be a nop. */ 8846 if (annul || ! final_sequence) 8847 p[3] = '6'; 8848 p += 12; 8849 if (veryfar) 8850 { 8851 strcpy (p, "b\t"); 8852 p += 2; 8853 } 8854 else 8855 { 8856 strcpy (p, "ba,pt\t%%xcc, "); 8857 p += 13; 8858 } 8859 } 8860 *p++ = '%'; 8861 *p++ = 'l'; 8862 *p++ = '0' + label; 8863 *p++ = '%'; 8864 *p++ = '#'; 8865 *p = '\0'; 8866 8867 return string; 8868 } 8869 8870 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7]. 8871 Such instructions cannot be used in the delay slot of return insn on v9. 8872 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts. 8873 */ 8874 8875 static int 8876 epilogue_renumber (register rtx *where, int test) 8877 { 8878 register const char *fmt; 8879 register int i; 8880 register enum rtx_code code; 8881 8882 if (*where == 0) 8883 return 0; 8884 8885 code = GET_CODE (*where); 8886 8887 switch (code) 8888 { 8889 case REG: 8890 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */ 8891 return 1; 8892 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32) 8893 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where))); 8894 /* fallthrough */ 8895 case SCRATCH: 8896 case CC0: 8897 case PC: 8898 case CONST_INT: 8899 case CONST_WIDE_INT: 8900 case CONST_DOUBLE: 8901 return 0; 8902 8903 /* Do not replace the frame pointer with the stack pointer because 8904 it can cause the delayed instruction to load below the stack. 8905 This occurs when instructions like: 8906 8907 (set (reg/i:SI 24 %i0) 8908 (mem/f:SI (plus:SI (reg/f:SI 30 %fp) 8909 (const_int -20 [0xffffffec])) 0)) 8910 8911 are in the return delayed slot. */ 8912 case PLUS: 8913 if (GET_CODE (XEXP (*where, 0)) == REG 8914 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM 8915 && (GET_CODE (XEXP (*where, 1)) != CONST_INT 8916 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS)) 8917 return 1; 8918 break; 8919 8920 case MEM: 8921 if (SPARC_STACK_BIAS 8922 && GET_CODE (XEXP (*where, 0)) == REG 8923 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM) 8924 return 1; 8925 break; 8926 8927 default: 8928 break; 8929 } 8930 8931 fmt = GET_RTX_FORMAT (code); 8932 8933 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) 8934 { 8935 if (fmt[i] == 'E') 8936 { 8937 register int j; 8938 for (j = XVECLEN (*where, i) - 1; j >= 0; j--) 8939 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test)) 8940 return 1; 8941 } 8942 else if (fmt[i] == 'e' 8943 && epilogue_renumber (&(XEXP (*where, i)), test)) 8944 return 1; 8945 } 8946 return 0; 8947 } 8948 8949 /* Leaf functions and non-leaf functions have different needs. */ 8950 8951 static const int 8952 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER; 8953 8954 static const int 8955 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER; 8956 8957 static const int *const reg_alloc_orders[] = { 8958 reg_leaf_alloc_order, 8959 reg_nonleaf_alloc_order}; 8960 8961 void 8962 order_regs_for_local_alloc (void) 8963 { 8964 static int last_order_nonleaf = 1; 8965 8966 if (df_regs_ever_live_p (15) != last_order_nonleaf) 8967 { 8968 last_order_nonleaf = !last_order_nonleaf; 8969 memcpy ((char *) reg_alloc_order, 8970 (const char *) reg_alloc_orders[last_order_nonleaf], 8971 FIRST_PSEUDO_REGISTER * sizeof (int)); 8972 } 8973 } 8974 8975 /* Return 1 if REG and MEM are legitimate enough to allow the various 8976 MEM<-->REG splits to be run. */ 8977 8978 int 8979 sparc_split_reg_mem_legitimate (rtx reg, rtx mem) 8980 { 8981 /* Punt if we are here by mistake. */ 8982 gcc_assert (reload_completed); 8983 8984 /* We must have an offsettable memory reference. */ 8985 if (!offsettable_memref_p (mem)) 8986 return 0; 8987 8988 /* If we have legitimate args for ldd/std, we do not want 8989 the split to happen. */ 8990 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8)) 8991 return 0; 8992 8993 /* Success. */ 8994 return 1; 8995 } 8996 8997 /* Split a REG <-- MEM move into a pair of moves in MODE. */ 8998 8999 void 9000 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode) 9001 { 9002 rtx high_part = gen_highpart (mode, dest); 9003 rtx low_part = gen_lowpart (mode, dest); 9004 rtx word0 = adjust_address (src, mode, 0); 9005 rtx word1 = adjust_address (src, mode, 4); 9006 9007 if (reg_overlap_mentioned_p (high_part, word1)) 9008 { 9009 emit_move_insn_1 (low_part, word1); 9010 emit_move_insn_1 (high_part, word0); 9011 } 9012 else 9013 { 9014 emit_move_insn_1 (high_part, word0); 9015 emit_move_insn_1 (low_part, word1); 9016 } 9017 } 9018 9019 /* Split a MEM <-- REG move into a pair of moves in MODE. */ 9020 9021 void 9022 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode) 9023 { 9024 rtx word0 = adjust_address (dest, mode, 0); 9025 rtx word1 = adjust_address (dest, mode, 4); 9026 rtx high_part = gen_highpart (mode, src); 9027 rtx low_part = gen_lowpart (mode, src); 9028 9029 emit_move_insn_1 (word0, high_part); 9030 emit_move_insn_1 (word1, low_part); 9031 } 9032 9033 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */ 9034 9035 int 9036 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2) 9037 { 9038 /* Punt if we are here by mistake. */ 9039 gcc_assert (reload_completed); 9040 9041 if (GET_CODE (reg1) == SUBREG) 9042 reg1 = SUBREG_REG (reg1); 9043 if (GET_CODE (reg1) != REG) 9044 return 0; 9045 const int regno1 = REGNO (reg1); 9046 9047 if (GET_CODE (reg2) == SUBREG) 9048 reg2 = SUBREG_REG (reg2); 9049 if (GET_CODE (reg2) != REG) 9050 return 0; 9051 const int regno2 = REGNO (reg2); 9052 9053 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2)) 9054 return 1; 9055 9056 if (TARGET_VIS3) 9057 { 9058 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2)) 9059 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2))) 9060 return 1; 9061 } 9062 9063 return 0; 9064 } 9065 9066 /* Split a REG <--> REG move into a pair of moves in MODE. */ 9067 9068 void 9069 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode) 9070 { 9071 rtx dest1 = gen_highpart (mode, dest); 9072 rtx dest2 = gen_lowpart (mode, dest); 9073 rtx src1 = gen_highpart (mode, src); 9074 rtx src2 = gen_lowpart (mode, src); 9075 9076 /* Now emit using the real source and destination we found, swapping 9077 the order if we detect overlap. */ 9078 if (reg_overlap_mentioned_p (dest1, src2)) 9079 { 9080 emit_move_insn_1 (dest2, src2); 9081 emit_move_insn_1 (dest1, src1); 9082 } 9083 else 9084 { 9085 emit_move_insn_1 (dest1, src1); 9086 emit_move_insn_1 (dest2, src2); 9087 } 9088 } 9089 9090 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1. 9091 This makes them candidates for using ldd and std insns. 9092 9093 Note reg1 and reg2 *must* be hard registers. */ 9094 9095 int 9096 registers_ok_for_ldd_peep (rtx reg1, rtx reg2) 9097 { 9098 /* We might have been passed a SUBREG. */ 9099 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) 9100 return 0; 9101 9102 if (REGNO (reg1) % 2 != 0) 9103 return 0; 9104 9105 /* Integer ldd is deprecated in SPARC V9 */ 9106 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1))) 9107 return 0; 9108 9109 return (REGNO (reg1) == REGNO (reg2) - 1); 9110 } 9111 9112 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in 9113 an ldd or std insn. 9114 9115 This can only happen when addr1 and addr2, the addresses in mem1 9116 and mem2, are consecutive memory locations (addr1 + 4 == addr2). 9117 addr1 must also be aligned on a 64-bit boundary. 9118 9119 Also iff dependent_reg_rtx is not null it should not be used to 9120 compute the address for mem1, i.e. we cannot optimize a sequence 9121 like: 9122 ld [%o0], %o0 9123 ld [%o0 + 4], %o1 9124 to 9125 ldd [%o0], %o0 9126 nor: 9127 ld [%g3 + 4], %g3 9128 ld [%g3], %g2 9129 to 9130 ldd [%g3], %g2 9131 9132 But, note that the transformation from: 9133 ld [%g2 + 4], %g3 9134 ld [%g2], %g2 9135 to 9136 ldd [%g2], %g2 9137 is perfectly fine. Thus, the peephole2 patterns always pass us 9138 the destination register of the first load, never the second one. 9139 9140 For stores we don't have a similar problem, so dependent_reg_rtx is 9141 NULL_RTX. */ 9142 9143 int 9144 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx) 9145 { 9146 rtx addr1, addr2; 9147 unsigned int reg1; 9148 HOST_WIDE_INT offset1; 9149 9150 /* The mems cannot be volatile. */ 9151 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) 9152 return 0; 9153 9154 /* MEM1 should be aligned on a 64-bit boundary. */ 9155 if (MEM_ALIGN (mem1) < 64) 9156 return 0; 9157 9158 addr1 = XEXP (mem1, 0); 9159 addr2 = XEXP (mem2, 0); 9160 9161 /* Extract a register number and offset (if used) from the first addr. */ 9162 if (GET_CODE (addr1) == PLUS) 9163 { 9164 /* If not a REG, return zero. */ 9165 if (GET_CODE (XEXP (addr1, 0)) != REG) 9166 return 0; 9167 else 9168 { 9169 reg1 = REGNO (XEXP (addr1, 0)); 9170 /* The offset must be constant! */ 9171 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) 9172 return 0; 9173 offset1 = INTVAL (XEXP (addr1, 1)); 9174 } 9175 } 9176 else if (GET_CODE (addr1) != REG) 9177 return 0; 9178 else 9179 { 9180 reg1 = REGNO (addr1); 9181 /* This was a simple (mem (reg)) expression. Offset is 0. */ 9182 offset1 = 0; 9183 } 9184 9185 /* Make sure the second address is a (mem (plus (reg) (const_int). */ 9186 if (GET_CODE (addr2) != PLUS) 9187 return 0; 9188 9189 if (GET_CODE (XEXP (addr2, 0)) != REG 9190 || GET_CODE (XEXP (addr2, 1)) != CONST_INT) 9191 return 0; 9192 9193 if (reg1 != REGNO (XEXP (addr2, 0))) 9194 return 0; 9195 9196 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx)) 9197 return 0; 9198 9199 /* The first offset must be evenly divisible by 8 to ensure the 9200 address is 64-bit aligned. */ 9201 if (offset1 % 8 != 0) 9202 return 0; 9203 9204 /* The offset for the second addr must be 4 more than the first addr. */ 9205 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4) 9206 return 0; 9207 9208 /* All the tests passed. addr1 and addr2 are valid for ldd and std 9209 instructions. */ 9210 return 1; 9211 } 9212 9213 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */ 9214 9215 rtx 9216 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode) 9217 { 9218 rtx x = widen_memory_access (mem1, mode, 0); 9219 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2); 9220 return x; 9221 } 9222 9223 /* Return 1 if reg is a pseudo, or is the first register in 9224 a hard register pair. This makes it suitable for use in 9225 ldd and std insns. */ 9226 9227 int 9228 register_ok_for_ldd (rtx reg) 9229 { 9230 /* We might have been passed a SUBREG. */ 9231 if (!REG_P (reg)) 9232 return 0; 9233 9234 if (REGNO (reg) < FIRST_PSEUDO_REGISTER) 9235 return (REGNO (reg) % 2 == 0); 9236 9237 return 1; 9238 } 9239 9240 /* Return 1 if OP, a MEM, has an address which is known to be 9241 aligned to an 8-byte boundary. */ 9242 9243 int 9244 memory_ok_for_ldd (rtx op) 9245 { 9246 /* In 64-bit mode, we assume that the address is word-aligned. */ 9247 if (TARGET_ARCH32 && !mem_min_alignment (op, 8)) 9248 return 0; 9249 9250 if (! can_create_pseudo_p () 9251 && !strict_memory_address_p (Pmode, XEXP (op, 0))) 9252 return 0; 9253 9254 return 1; 9255 } 9256 9257 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */ 9258 9259 static bool 9260 sparc_print_operand_punct_valid_p (unsigned char code) 9261 { 9262 if (code == '#' 9263 || code == '*' 9264 || code == '(' 9265 || code == ')' 9266 || code == '_' 9267 || code == '&') 9268 return true; 9269 9270 return false; 9271 } 9272 9273 /* Implement TARGET_PRINT_OPERAND. 9274 Print operand X (an rtx) in assembler syntax to file FILE. 9275 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. 9276 For `%' followed by punctuation, CODE is the punctuation and X is null. */ 9277 9278 static void 9279 sparc_print_operand (FILE *file, rtx x, int code) 9280 { 9281 const char *s; 9282 9283 switch (code) 9284 { 9285 case '#': 9286 /* Output an insn in a delay slot. */ 9287 if (final_sequence) 9288 sparc_indent_opcode = 1; 9289 else 9290 fputs ("\n\t nop", file); 9291 return; 9292 case '*': 9293 /* Output an annul flag if there's nothing for the delay slot and we 9294 are optimizing. This is always used with '(' below. 9295 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch; 9296 this is a dbx bug. So, we only do this when optimizing. 9297 On UltraSPARC, a branch in a delay slot causes a pipeline flush. 9298 Always emit a nop in case the next instruction is a branch. */ 9299 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9300 fputs (",a", file); 9301 return; 9302 case '(': 9303 /* Output a 'nop' if there's nothing for the delay slot and we are 9304 not optimizing. This is always used with '*' above. */ 9305 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9)) 9306 fputs ("\n\t nop", file); 9307 else if (final_sequence) 9308 sparc_indent_opcode = 1; 9309 return; 9310 case ')': 9311 /* Output the right displacement from the saved PC on function return. 9312 The caller may have placed an "unimp" insn immediately after the call 9313 so we have to account for it. This insn is used in the 32-bit ABI 9314 when calling a function that returns a non zero-sized structure. The 9315 64-bit ABI doesn't have it. Be careful to have this test be the same 9316 as that for the call. The exception is when sparc_std_struct_return 9317 is enabled, the psABI is followed exactly and the adjustment is made 9318 by the code in sparc_struct_value_rtx. The call emitted is the same 9319 when sparc_std_struct_return is enabled. */ 9320 if (!TARGET_ARCH64 9321 && cfun->returns_struct 9322 && !sparc_std_struct_return 9323 && DECL_SIZE (DECL_RESULT (current_function_decl)) 9324 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl))) 9325 == INTEGER_CST 9326 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl)))) 9327 fputs ("12", file); 9328 else 9329 fputc ('8', file); 9330 return; 9331 case '_': 9332 /* Output the Embedded Medium/Anywhere code model base register. */ 9333 fputs (EMBMEDANY_BASE_REG, file); 9334 return; 9335 case '&': 9336 /* Print some local dynamic TLS name. */ 9337 if (const char *name = get_some_local_dynamic_name ()) 9338 assemble_name (file, name); 9339 else 9340 output_operand_lossage ("'%%&' used without any " 9341 "local dynamic TLS references"); 9342 return; 9343 9344 case 'Y': 9345 /* Adjust the operand to take into account a RESTORE operation. */ 9346 if (GET_CODE (x) == CONST_INT) 9347 break; 9348 else if (GET_CODE (x) != REG) 9349 output_operand_lossage ("invalid %%Y operand"); 9350 else if (REGNO (x) < 8) 9351 fputs (reg_names[REGNO (x)], file); 9352 else if (REGNO (x) >= 24 && REGNO (x) < 32) 9353 fputs (reg_names[REGNO (x)-16], file); 9354 else 9355 output_operand_lossage ("invalid %%Y operand"); 9356 return; 9357 case 'L': 9358 /* Print out the low order register name of a register pair. */ 9359 if (WORDS_BIG_ENDIAN) 9360 fputs (reg_names[REGNO (x)+1], file); 9361 else 9362 fputs (reg_names[REGNO (x)], file); 9363 return; 9364 case 'H': 9365 /* Print out the high order register name of a register pair. */ 9366 if (WORDS_BIG_ENDIAN) 9367 fputs (reg_names[REGNO (x)], file); 9368 else 9369 fputs (reg_names[REGNO (x)+1], file); 9370 return; 9371 case 'R': 9372 /* Print out the second register name of a register pair or quad. 9373 I.e., R (%o0) => %o1. */ 9374 fputs (reg_names[REGNO (x)+1], file); 9375 return; 9376 case 'S': 9377 /* Print out the third register name of a register quad. 9378 I.e., S (%o0) => %o2. */ 9379 fputs (reg_names[REGNO (x)+2], file); 9380 return; 9381 case 'T': 9382 /* Print out the fourth register name of a register quad. 9383 I.e., T (%o0) => %o3. */ 9384 fputs (reg_names[REGNO (x)+3], file); 9385 return; 9386 case 'x': 9387 /* Print a condition code register. */ 9388 if (REGNO (x) == SPARC_ICC_REG) 9389 { 9390 switch (GET_MODE (x)) 9391 { 9392 case E_CCmode: 9393 case E_CCNZmode: 9394 case E_CCCmode: 9395 case E_CCVmode: 9396 s = "%icc"; 9397 break; 9398 case E_CCXmode: 9399 case E_CCXNZmode: 9400 case E_CCXCmode: 9401 case E_CCXVmode: 9402 s = "%xcc"; 9403 break; 9404 default: 9405 gcc_unreachable (); 9406 } 9407 fputs (s, file); 9408 } 9409 else 9410 /* %fccN register */ 9411 fputs (reg_names[REGNO (x)], file); 9412 return; 9413 case 'm': 9414 /* Print the operand's address only. */ 9415 output_address (GET_MODE (x), XEXP (x, 0)); 9416 return; 9417 case 'r': 9418 /* In this case we need a register. Use %g0 if the 9419 operand is const0_rtx. */ 9420 if (x == const0_rtx 9421 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x)))) 9422 { 9423 fputs ("%g0", file); 9424 return; 9425 } 9426 else 9427 break; 9428 9429 case 'A': 9430 switch (GET_CODE (x)) 9431 { 9432 case IOR: 9433 s = "or"; 9434 break; 9435 case AND: 9436 s = "and"; 9437 break; 9438 case XOR: 9439 s = "xor"; 9440 break; 9441 default: 9442 output_operand_lossage ("invalid %%A operand"); 9443 s = ""; 9444 break; 9445 } 9446 fputs (s, file); 9447 return; 9448 9449 case 'B': 9450 switch (GET_CODE (x)) 9451 { 9452 case IOR: 9453 s = "orn"; 9454 break; 9455 case AND: 9456 s = "andn"; 9457 break; 9458 case XOR: 9459 s = "xnor"; 9460 break; 9461 default: 9462 output_operand_lossage ("invalid %%B operand"); 9463 s = ""; 9464 break; 9465 } 9466 fputs (s, file); 9467 return; 9468 9469 /* This is used by the conditional move instructions. */ 9470 case 'C': 9471 { 9472 machine_mode mode = GET_MODE (XEXP (x, 0)); 9473 switch (GET_CODE (x)) 9474 { 9475 case NE: 9476 if (mode == CCVmode || mode == CCXVmode) 9477 s = "vs"; 9478 else 9479 s = "ne"; 9480 break; 9481 case EQ: 9482 if (mode == CCVmode || mode == CCXVmode) 9483 s = "vc"; 9484 else 9485 s = "e"; 9486 break; 9487 case GE: 9488 if (mode == CCNZmode || mode == CCXNZmode) 9489 s = "pos"; 9490 else 9491 s = "ge"; 9492 break; 9493 case GT: 9494 s = "g"; 9495 break; 9496 case LE: 9497 s = "le"; 9498 break; 9499 case LT: 9500 if (mode == CCNZmode || mode == CCXNZmode) 9501 s = "neg"; 9502 else 9503 s = "l"; 9504 break; 9505 case GEU: 9506 s = "geu"; 9507 break; 9508 case GTU: 9509 s = "gu"; 9510 break; 9511 case LEU: 9512 s = "leu"; 9513 break; 9514 case LTU: 9515 s = "lu"; 9516 break; 9517 case LTGT: 9518 s = "lg"; 9519 break; 9520 case UNORDERED: 9521 s = "u"; 9522 break; 9523 case ORDERED: 9524 s = "o"; 9525 break; 9526 case UNLT: 9527 s = "ul"; 9528 break; 9529 case UNLE: 9530 s = "ule"; 9531 break; 9532 case UNGT: 9533 s = "ug"; 9534 break; 9535 case UNGE: 9536 s = "uge" 9537 ; break; 9538 case UNEQ: 9539 s = "ue"; 9540 break; 9541 default: 9542 output_operand_lossage ("invalid %%C operand"); 9543 s = ""; 9544 break; 9545 } 9546 fputs (s, file); 9547 return; 9548 } 9549 9550 /* This are used by the movr instruction pattern. */ 9551 case 'D': 9552 { 9553 switch (GET_CODE (x)) 9554 { 9555 case NE: 9556 s = "ne"; 9557 break; 9558 case EQ: 9559 s = "e"; 9560 break; 9561 case GE: 9562 s = "gez"; 9563 break; 9564 case LT: 9565 s = "lz"; 9566 break; 9567 case LE: 9568 s = "lez"; 9569 break; 9570 case GT: 9571 s = "gz"; 9572 break; 9573 default: 9574 output_operand_lossage ("invalid %%D operand"); 9575 s = ""; 9576 break; 9577 } 9578 fputs (s, file); 9579 return; 9580 } 9581 9582 case 'b': 9583 { 9584 /* Print a sign-extended character. */ 9585 int i = trunc_int_for_mode (INTVAL (x), QImode); 9586 fprintf (file, "%d", i); 9587 return; 9588 } 9589 9590 case 'f': 9591 /* Operand must be a MEM; write its address. */ 9592 if (GET_CODE (x) != MEM) 9593 output_operand_lossage ("invalid %%f operand"); 9594 output_address (GET_MODE (x), XEXP (x, 0)); 9595 return; 9596 9597 case 's': 9598 { 9599 /* Print a sign-extended 32-bit value. */ 9600 HOST_WIDE_INT i; 9601 if (GET_CODE(x) == CONST_INT) 9602 i = INTVAL (x); 9603 else 9604 { 9605 output_operand_lossage ("invalid %%s operand"); 9606 return; 9607 } 9608 i = trunc_int_for_mode (i, SImode); 9609 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); 9610 return; 9611 } 9612 9613 case 0: 9614 /* Do nothing special. */ 9615 break; 9616 9617 default: 9618 /* Undocumented flag. */ 9619 output_operand_lossage ("invalid operand output code"); 9620 } 9621 9622 if (GET_CODE (x) == REG) 9623 fputs (reg_names[REGNO (x)], file); 9624 else if (GET_CODE (x) == MEM) 9625 { 9626 fputc ('[', file); 9627 /* Poor Sun assembler doesn't understand absolute addressing. */ 9628 if (CONSTANT_P (XEXP (x, 0))) 9629 fputs ("%g0+", file); 9630 output_address (GET_MODE (x), XEXP (x, 0)); 9631 fputc (']', file); 9632 } 9633 else if (GET_CODE (x) == HIGH) 9634 { 9635 fputs ("%hi(", file); 9636 output_addr_const (file, XEXP (x, 0)); 9637 fputc (')', file); 9638 } 9639 else if (GET_CODE (x) == LO_SUM) 9640 { 9641 sparc_print_operand (file, XEXP (x, 0), 0); 9642 if (TARGET_CM_MEDMID) 9643 fputs ("+%l44(", file); 9644 else 9645 fputs ("+%lo(", file); 9646 output_addr_const (file, XEXP (x, 1)); 9647 fputc (')', file); 9648 } 9649 else if (GET_CODE (x) == CONST_DOUBLE) 9650 output_operand_lossage ("floating-point constant not a valid immediate operand"); 9651 else 9652 output_addr_const (file, x); 9653 } 9654 9655 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */ 9656 9657 static void 9658 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x) 9659 { 9660 register rtx base, index = 0; 9661 int offset = 0; 9662 register rtx addr = x; 9663 9664 if (REG_P (addr)) 9665 fputs (reg_names[REGNO (addr)], file); 9666 else if (GET_CODE (addr) == PLUS) 9667 { 9668 if (CONST_INT_P (XEXP (addr, 0))) 9669 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1); 9670 else if (CONST_INT_P (XEXP (addr, 1))) 9671 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0); 9672 else 9673 base = XEXP (addr, 0), index = XEXP (addr, 1); 9674 if (GET_CODE (base) == LO_SUM) 9675 { 9676 gcc_assert (USE_AS_OFFSETABLE_LO10 9677 && TARGET_ARCH64 9678 && ! TARGET_CM_MEDMID); 9679 output_operand (XEXP (base, 0), 0); 9680 fputs ("+%lo(", file); 9681 output_address (VOIDmode, XEXP (base, 1)); 9682 fprintf (file, ")+%d", offset); 9683 } 9684 else 9685 { 9686 fputs (reg_names[REGNO (base)], file); 9687 if (index == 0) 9688 fprintf (file, "%+d", offset); 9689 else if (REG_P (index)) 9690 fprintf (file, "+%s", reg_names[REGNO (index)]); 9691 else if (GET_CODE (index) == SYMBOL_REF 9692 || GET_CODE (index) == LABEL_REF 9693 || GET_CODE (index) == CONST) 9694 fputc ('+', file), output_addr_const (file, index); 9695 else gcc_unreachable (); 9696 } 9697 } 9698 else if (GET_CODE (addr) == MINUS 9699 && GET_CODE (XEXP (addr, 1)) == LABEL_REF) 9700 { 9701 output_addr_const (file, XEXP (addr, 0)); 9702 fputs ("-(", file); 9703 output_addr_const (file, XEXP (addr, 1)); 9704 fputs ("-.)", file); 9705 } 9706 else if (GET_CODE (addr) == LO_SUM) 9707 { 9708 output_operand (XEXP (addr, 0), 0); 9709 if (TARGET_CM_MEDMID) 9710 fputs ("+%l44(", file); 9711 else 9712 fputs ("+%lo(", file); 9713 output_address (VOIDmode, XEXP (addr, 1)); 9714 fputc (')', file); 9715 } 9716 else if (flag_pic 9717 && GET_CODE (addr) == CONST 9718 && GET_CODE (XEXP (addr, 0)) == MINUS 9719 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST 9720 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS 9721 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx) 9722 { 9723 addr = XEXP (addr, 0); 9724 output_addr_const (file, XEXP (addr, 0)); 9725 /* Group the args of the second CONST in parenthesis. */ 9726 fputs ("-(", file); 9727 /* Skip past the second CONST--it does nothing for us. */ 9728 output_addr_const (file, XEXP (XEXP (addr, 1), 0)); 9729 /* Close the parenthesis. */ 9730 fputc (')', file); 9731 } 9732 else 9733 { 9734 output_addr_const (file, addr); 9735 } 9736 } 9737 9738 /* Target hook for assembling integer objects. The sparc version has 9739 special handling for aligned DI-mode objects. */ 9740 9741 static bool 9742 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p) 9743 { 9744 /* ??? We only output .xword's for symbols and only then in environments 9745 where the assembler can handle them. */ 9746 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT) 9747 { 9748 if (TARGET_V9) 9749 { 9750 assemble_integer_with_op ("\t.xword\t", x); 9751 return true; 9752 } 9753 else 9754 { 9755 assemble_aligned_integer (4, const0_rtx); 9756 assemble_aligned_integer (4, x); 9757 return true; 9758 } 9759 } 9760 return default_assemble_integer (x, size, aligned_p); 9761 } 9762 9763 /* Return the value of a code used in the .proc pseudo-op that says 9764 what kind of result this function returns. For non-C types, we pick 9765 the closest C type. */ 9766 9767 #ifndef SHORT_TYPE_SIZE 9768 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) 9769 #endif 9770 9771 #ifndef INT_TYPE_SIZE 9772 #define INT_TYPE_SIZE BITS_PER_WORD 9773 #endif 9774 9775 #ifndef LONG_TYPE_SIZE 9776 #define LONG_TYPE_SIZE BITS_PER_WORD 9777 #endif 9778 9779 #ifndef LONG_LONG_TYPE_SIZE 9780 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2) 9781 #endif 9782 9783 #ifndef FLOAT_TYPE_SIZE 9784 #define FLOAT_TYPE_SIZE BITS_PER_WORD 9785 #endif 9786 9787 #ifndef DOUBLE_TYPE_SIZE 9788 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9789 #endif 9790 9791 #ifndef LONG_DOUBLE_TYPE_SIZE 9792 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2) 9793 #endif 9794 9795 unsigned long 9796 sparc_type_code (register tree type) 9797 { 9798 register unsigned long qualifiers = 0; 9799 register unsigned shift; 9800 9801 /* Only the first 30 bits of the qualifier are valid. We must refrain from 9802 setting more, since some assemblers will give an error for this. Also, 9803 we must be careful to avoid shifts of 32 bits or more to avoid getting 9804 unpredictable results. */ 9805 9806 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type)) 9807 { 9808 switch (TREE_CODE (type)) 9809 { 9810 case ERROR_MARK: 9811 return qualifiers; 9812 9813 case ARRAY_TYPE: 9814 qualifiers |= (3 << shift); 9815 break; 9816 9817 case FUNCTION_TYPE: 9818 case METHOD_TYPE: 9819 qualifiers |= (2 << shift); 9820 break; 9821 9822 case POINTER_TYPE: 9823 case REFERENCE_TYPE: 9824 case OFFSET_TYPE: 9825 qualifiers |= (1 << shift); 9826 break; 9827 9828 case RECORD_TYPE: 9829 return (qualifiers | 8); 9830 9831 case UNION_TYPE: 9832 case QUAL_UNION_TYPE: 9833 return (qualifiers | 9); 9834 9835 case ENUMERAL_TYPE: 9836 return (qualifiers | 10); 9837 9838 case VOID_TYPE: 9839 return (qualifiers | 16); 9840 9841 case INTEGER_TYPE: 9842 /* If this is a range type, consider it to be the underlying 9843 type. */ 9844 if (TREE_TYPE (type) != 0) 9845 break; 9846 9847 /* Carefully distinguish all the standard types of C, 9848 without messing up if the language is not C. We do this by 9849 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to 9850 look at both the names and the above fields, but that's redundant. 9851 Any type whose size is between two C types will be considered 9852 to be the wider of the two types. Also, we do not have a 9853 special code to use for "long long", so anything wider than 9854 long is treated the same. Note that we can't distinguish 9855 between "int" and "long" in this code if they are the same 9856 size, but that's fine, since neither can the assembler. */ 9857 9858 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE) 9859 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2)); 9860 9861 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE) 9862 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3)); 9863 9864 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE) 9865 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4)); 9866 9867 else 9868 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5)); 9869 9870 case REAL_TYPE: 9871 /* If this is a range type, consider it to be the underlying 9872 type. */ 9873 if (TREE_TYPE (type) != 0) 9874 break; 9875 9876 /* Carefully distinguish all the standard types of C, 9877 without messing up if the language is not C. */ 9878 9879 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE) 9880 return (qualifiers | 6); 9881 9882 else 9883 return (qualifiers | 7); 9884 9885 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */ 9886 /* ??? We need to distinguish between double and float complex types, 9887 but I don't know how yet because I can't reach this code from 9888 existing front-ends. */ 9889 return (qualifiers | 7); /* Who knows? */ 9890 9891 case VECTOR_TYPE: 9892 case BOOLEAN_TYPE: /* Boolean truth value type. */ 9893 case LANG_TYPE: 9894 case NULLPTR_TYPE: 9895 return qualifiers; 9896 9897 default: 9898 gcc_unreachable (); /* Not a type! */ 9899 } 9900 } 9901 9902 return qualifiers; 9903 } 9904 9905 /* Nested function support. */ 9906 9907 /* Emit RTL insns to initialize the variable parts of a trampoline. 9908 FNADDR is an RTX for the address of the function's pure code. 9909 CXT is an RTX for the static chain value for the function. 9910 9911 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi 9912 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes 9913 (to store insns). This is a bit excessive. Perhaps a different 9914 mechanism would be better here. 9915 9916 Emit enough FLUSH insns to synchronize the data and instruction caches. */ 9917 9918 static void 9919 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9920 { 9921 /* SPARC 32-bit trampoline: 9922 9923 sethi %hi(fn), %g1 9924 sethi %hi(static), %g2 9925 jmp %g1+%lo(fn) 9926 or %g2, %lo(static), %g2 9927 9928 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii 9929 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii 9930 */ 9931 9932 emit_move_insn 9933 (adjust_address (m_tramp, SImode, 0), 9934 expand_binop (SImode, ior_optab, 9935 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1), 9936 GEN_INT (trunc_int_for_mode (0x03000000, SImode)), 9937 NULL_RTX, 1, OPTAB_DIRECT)); 9938 9939 emit_move_insn 9940 (adjust_address (m_tramp, SImode, 4), 9941 expand_binop (SImode, ior_optab, 9942 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1), 9943 GEN_INT (trunc_int_for_mode (0x05000000, SImode)), 9944 NULL_RTX, 1, OPTAB_DIRECT)); 9945 9946 emit_move_insn 9947 (adjust_address (m_tramp, SImode, 8), 9948 expand_binop (SImode, ior_optab, 9949 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX), 9950 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)), 9951 NULL_RTX, 1, OPTAB_DIRECT)); 9952 9953 emit_move_insn 9954 (adjust_address (m_tramp, SImode, 12), 9955 expand_binop (SImode, ior_optab, 9956 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX), 9957 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)), 9958 NULL_RTX, 1, OPTAB_DIRECT)); 9959 9960 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is 9961 aligned on a 16 byte boundary so one flush clears it all. */ 9962 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0)))); 9963 if (sparc_cpu != PROCESSOR_ULTRASPARC 9964 && sparc_cpu != PROCESSOR_ULTRASPARC3 9965 && sparc_cpu != PROCESSOR_NIAGARA 9966 && sparc_cpu != PROCESSOR_NIAGARA2 9967 && sparc_cpu != PROCESSOR_NIAGARA3 9968 && sparc_cpu != PROCESSOR_NIAGARA4 9969 && sparc_cpu != PROCESSOR_NIAGARA7 9970 && sparc_cpu != PROCESSOR_M8) 9971 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8)))); 9972 9973 /* Call __enable_execute_stack after writing onto the stack to make sure 9974 the stack address is accessible. */ 9975 #ifdef HAVE_ENABLE_EXECUTE_STACK 9976 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 9977 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 9978 #endif 9979 9980 } 9981 9982 /* The 64-bit version is simpler because it makes more sense to load the 9983 values as "immediate" data out of the trampoline. It's also easier since 9984 we can read the PC without clobbering a register. */ 9985 9986 static void 9987 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt) 9988 { 9989 /* SPARC 64-bit trampoline: 9990 9991 rd %pc, %g1 9992 ldx [%g1+24], %g5 9993 jmp %g5 9994 ldx [%g1+16], %g5 9995 +16 bytes data 9996 */ 9997 9998 emit_move_insn (adjust_address (m_tramp, SImode, 0), 9999 GEN_INT (trunc_int_for_mode (0x83414000, SImode))); 10000 emit_move_insn (adjust_address (m_tramp, SImode, 4), 10001 GEN_INT (trunc_int_for_mode (0xca586018, SImode))); 10002 emit_move_insn (adjust_address (m_tramp, SImode, 8), 10003 GEN_INT (trunc_int_for_mode (0x81c14000, SImode))); 10004 emit_move_insn (adjust_address (m_tramp, SImode, 12), 10005 GEN_INT (trunc_int_for_mode (0xca586010, SImode))); 10006 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt); 10007 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr); 10008 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0)))); 10009 10010 if (sparc_cpu != PROCESSOR_ULTRASPARC 10011 && sparc_cpu != PROCESSOR_ULTRASPARC3 10012 && sparc_cpu != PROCESSOR_NIAGARA 10013 && sparc_cpu != PROCESSOR_NIAGARA2 10014 && sparc_cpu != PROCESSOR_NIAGARA3 10015 && sparc_cpu != PROCESSOR_NIAGARA4 10016 && sparc_cpu != PROCESSOR_NIAGARA7 10017 && sparc_cpu != PROCESSOR_M8) 10018 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8)))); 10019 10020 /* Call __enable_execute_stack after writing onto the stack to make sure 10021 the stack address is accessible. */ 10022 #ifdef HAVE_ENABLE_EXECUTE_STACK 10023 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 10024 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode); 10025 #endif 10026 } 10027 10028 /* Worker for TARGET_TRAMPOLINE_INIT. */ 10029 10030 static void 10031 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) 10032 { 10033 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0)); 10034 cxt = force_reg (Pmode, cxt); 10035 if (TARGET_ARCH64) 10036 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt); 10037 else 10038 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt); 10039 } 10040 10041 /* Adjust the cost of a scheduling dependency. Return the new cost of 10042 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ 10043 10044 static int 10045 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, 10046 int cost) 10047 { 10048 enum attr_type insn_type; 10049 10050 if (recog_memoized (insn) < 0) 10051 return cost; 10052 10053 insn_type = get_attr_type (insn); 10054 10055 if (dep_type == 0) 10056 { 10057 /* Data dependency; DEP_INSN writes a register that INSN reads some 10058 cycles later. */ 10059 10060 /* if a load, then the dependence must be on the memory address; 10061 add an extra "cycle". Note that the cost could be two cycles 10062 if the reg was written late in an instruction group; we ca not tell 10063 here. */ 10064 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD) 10065 return cost + 3; 10066 10067 /* Get the delay only if the address of the store is the dependence. */ 10068 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE) 10069 { 10070 rtx pat = PATTERN(insn); 10071 rtx dep_pat = PATTERN (dep_insn); 10072 10073 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10074 return cost; /* This should not happen! */ 10075 10076 /* The dependency between the two instructions was on the data that 10077 is being stored. Assume that this implies that the address of the 10078 store is not dependent. */ 10079 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10080 return cost; 10081 10082 return cost + 3; /* An approximation. */ 10083 } 10084 10085 /* A shift instruction cannot receive its data from an instruction 10086 in the same cycle; add a one cycle penalty. */ 10087 if (insn_type == TYPE_SHIFT) 10088 return cost + 3; /* Split before cascade into shift. */ 10089 } 10090 else 10091 { 10092 /* Anti- or output- dependency; DEP_INSN reads/writes a register that 10093 INSN writes some cycles later. */ 10094 10095 /* These are only significant for the fpu unit; writing a fp reg before 10096 the fpu has finished with it stalls the processor. */ 10097 10098 /* Reusing an integer register causes no problems. */ 10099 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10100 return 0; 10101 } 10102 10103 return cost; 10104 } 10105 10106 static int 10107 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn, 10108 int cost) 10109 { 10110 enum attr_type insn_type, dep_type; 10111 rtx pat = PATTERN(insn); 10112 rtx dep_pat = PATTERN (dep_insn); 10113 10114 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) 10115 return cost; 10116 10117 insn_type = get_attr_type (insn); 10118 dep_type = get_attr_type (dep_insn); 10119 10120 switch (dtype) 10121 { 10122 case 0: 10123 /* Data dependency; DEP_INSN writes a register that INSN reads some 10124 cycles later. */ 10125 10126 switch (insn_type) 10127 { 10128 case TYPE_STORE: 10129 case TYPE_FPSTORE: 10130 /* Get the delay iff the address of the store is the dependence. */ 10131 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) 10132 return cost; 10133 10134 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) 10135 return cost; 10136 return cost + 3; 10137 10138 case TYPE_LOAD: 10139 case TYPE_SLOAD: 10140 case TYPE_FPLOAD: 10141 /* If a load, then the dependence must be on the memory address. If 10142 the addresses aren't equal, then it might be a false dependency */ 10143 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) 10144 { 10145 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET 10146 || GET_CODE (SET_DEST (dep_pat)) != MEM 10147 || GET_CODE (SET_SRC (pat)) != MEM 10148 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0), 10149 XEXP (SET_SRC (pat), 0))) 10150 return cost + 2; 10151 10152 return cost + 8; 10153 } 10154 break; 10155 10156 case TYPE_BRANCH: 10157 /* Compare to branch latency is 0. There is no benefit from 10158 separating compare and branch. */ 10159 if (dep_type == TYPE_COMPARE) 10160 return 0; 10161 /* Floating point compare to branch latency is less than 10162 compare to conditional move. */ 10163 if (dep_type == TYPE_FPCMP) 10164 return cost - 1; 10165 break; 10166 default: 10167 break; 10168 } 10169 break; 10170 10171 case REG_DEP_ANTI: 10172 /* Anti-dependencies only penalize the fpu unit. */ 10173 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT) 10174 return 0; 10175 break; 10176 10177 default: 10178 break; 10179 } 10180 10181 return cost; 10182 } 10183 10184 static int 10185 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost, 10186 unsigned int) 10187 { 10188 switch (sparc_cpu) 10189 { 10190 case PROCESSOR_SUPERSPARC: 10191 cost = supersparc_adjust_cost (insn, dep_type, dep, cost); 10192 break; 10193 case PROCESSOR_HYPERSPARC: 10194 case PROCESSOR_SPARCLITE86X: 10195 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost); 10196 break; 10197 default: 10198 break; 10199 } 10200 return cost; 10201 } 10202 10203 static void 10204 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED, 10205 int sched_verbose ATTRIBUTE_UNUSED, 10206 int max_ready ATTRIBUTE_UNUSED) 10207 {} 10208 10209 static int 10210 sparc_use_sched_lookahead (void) 10211 { 10212 if (sparc_cpu == PROCESSOR_NIAGARA 10213 || sparc_cpu == PROCESSOR_NIAGARA2 10214 || sparc_cpu == PROCESSOR_NIAGARA3) 10215 return 0; 10216 if (sparc_cpu == PROCESSOR_NIAGARA4 10217 || sparc_cpu == PROCESSOR_NIAGARA7 10218 || sparc_cpu == PROCESSOR_M8) 10219 return 2; 10220 if (sparc_cpu == PROCESSOR_ULTRASPARC 10221 || sparc_cpu == PROCESSOR_ULTRASPARC3) 10222 return 4; 10223 if ((1 << sparc_cpu) & 10224 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | 10225 (1 << PROCESSOR_SPARCLITE86X))) 10226 return 3; 10227 return 0; 10228 } 10229 10230 static int 10231 sparc_issue_rate (void) 10232 { 10233 switch (sparc_cpu) 10234 { 10235 case PROCESSOR_NIAGARA: 10236 case PROCESSOR_NIAGARA2: 10237 case PROCESSOR_NIAGARA3: 10238 default: 10239 return 1; 10240 case PROCESSOR_NIAGARA4: 10241 case PROCESSOR_NIAGARA7: 10242 case PROCESSOR_V9: 10243 /* Assume V9 processors are capable of at least dual-issue. */ 10244 return 2; 10245 case PROCESSOR_SUPERSPARC: 10246 return 3; 10247 case PROCESSOR_HYPERSPARC: 10248 case PROCESSOR_SPARCLITE86X: 10249 return 2; 10250 case PROCESSOR_ULTRASPARC: 10251 case PROCESSOR_ULTRASPARC3: 10252 case PROCESSOR_M8: 10253 return 4; 10254 } 10255 } 10256 10257 static int 10258 set_extends (rtx_insn *insn) 10259 { 10260 register rtx pat = PATTERN (insn); 10261 10262 switch (GET_CODE (SET_SRC (pat))) 10263 { 10264 /* Load and some shift instructions zero extend. */ 10265 case MEM: 10266 case ZERO_EXTEND: 10267 /* sethi clears the high bits */ 10268 case HIGH: 10269 /* LO_SUM is used with sethi. sethi cleared the high 10270 bits and the values used with lo_sum are positive */ 10271 case LO_SUM: 10272 /* Store flag stores 0 or 1 */ 10273 case LT: case LTU: 10274 case GT: case GTU: 10275 case LE: case LEU: 10276 case GE: case GEU: 10277 case EQ: 10278 case NE: 10279 return 1; 10280 case AND: 10281 { 10282 rtx op0 = XEXP (SET_SRC (pat), 0); 10283 rtx op1 = XEXP (SET_SRC (pat), 1); 10284 if (GET_CODE (op1) == CONST_INT) 10285 return INTVAL (op1) >= 0; 10286 if (GET_CODE (op0) != REG) 10287 return 0; 10288 if (sparc_check_64 (op0, insn) == 1) 10289 return 1; 10290 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10291 } 10292 case IOR: 10293 case XOR: 10294 { 10295 rtx op0 = XEXP (SET_SRC (pat), 0); 10296 rtx op1 = XEXP (SET_SRC (pat), 1); 10297 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0) 10298 return 0; 10299 if (GET_CODE (op1) == CONST_INT) 10300 return INTVAL (op1) >= 0; 10301 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1); 10302 } 10303 case LSHIFTRT: 10304 return GET_MODE (SET_SRC (pat)) == SImode; 10305 /* Positive integers leave the high bits zero. */ 10306 case CONST_INT: 10307 return !(INTVAL (SET_SRC (pat)) & 0x80000000); 10308 case ASHIFTRT: 10309 case SIGN_EXTEND: 10310 return - (GET_MODE (SET_SRC (pat)) == SImode); 10311 case REG: 10312 return sparc_check_64 (SET_SRC (pat), insn); 10313 default: 10314 return 0; 10315 } 10316 } 10317 10318 /* We _ought_ to have only one kind per function, but... */ 10319 static GTY(()) rtx sparc_addr_diff_list; 10320 static GTY(()) rtx sparc_addr_list; 10321 10322 void 10323 sparc_defer_case_vector (rtx lab, rtx vec, int diff) 10324 { 10325 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec); 10326 if (diff) 10327 sparc_addr_diff_list 10328 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list); 10329 else 10330 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list); 10331 } 10332 10333 static void 10334 sparc_output_addr_vec (rtx vec) 10335 { 10336 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10337 int idx, vlen = XVECLEN (body, 0); 10338 10339 #ifdef ASM_OUTPUT_ADDR_VEC_START 10340 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10341 #endif 10342 10343 #ifdef ASM_OUTPUT_CASE_LABEL 10344 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10345 NEXT_INSN (lab)); 10346 #else 10347 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10348 #endif 10349 10350 for (idx = 0; idx < vlen; idx++) 10351 { 10352 ASM_OUTPUT_ADDR_VEC_ELT 10353 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0))); 10354 } 10355 10356 #ifdef ASM_OUTPUT_ADDR_VEC_END 10357 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10358 #endif 10359 } 10360 10361 static void 10362 sparc_output_addr_diff_vec (rtx vec) 10363 { 10364 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1); 10365 rtx base = XEXP (XEXP (body, 0), 0); 10366 int idx, vlen = XVECLEN (body, 1); 10367 10368 #ifdef ASM_OUTPUT_ADDR_VEC_START 10369 ASM_OUTPUT_ADDR_VEC_START (asm_out_file); 10370 #endif 10371 10372 #ifdef ASM_OUTPUT_CASE_LABEL 10373 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab), 10374 NEXT_INSN (lab)); 10375 #else 10376 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab)); 10377 #endif 10378 10379 for (idx = 0; idx < vlen; idx++) 10380 { 10381 ASM_OUTPUT_ADDR_DIFF_ELT 10382 (asm_out_file, 10383 body, 10384 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)), 10385 CODE_LABEL_NUMBER (base)); 10386 } 10387 10388 #ifdef ASM_OUTPUT_ADDR_VEC_END 10389 ASM_OUTPUT_ADDR_VEC_END (asm_out_file); 10390 #endif 10391 } 10392 10393 static void 10394 sparc_output_deferred_case_vectors (void) 10395 { 10396 rtx t; 10397 int align; 10398 10399 if (sparc_addr_list == NULL_RTX 10400 && sparc_addr_diff_list == NULL_RTX) 10401 return; 10402 10403 /* Align to cache line in the function's code section. */ 10404 switch_to_section (current_function_section ()); 10405 10406 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 10407 if (align > 0) 10408 ASM_OUTPUT_ALIGN (asm_out_file, align); 10409 10410 for (t = sparc_addr_list; t ; t = XEXP (t, 1)) 10411 sparc_output_addr_vec (XEXP (t, 0)); 10412 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1)) 10413 sparc_output_addr_diff_vec (XEXP (t, 0)); 10414 10415 sparc_addr_list = sparc_addr_diff_list = NULL_RTX; 10416 } 10417 10418 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are 10419 unknown. Return 1 if the high bits are zero, -1 if the register is 10420 sign extended. */ 10421 int 10422 sparc_check_64 (rtx x, rtx_insn *insn) 10423 { 10424 /* If a register is set only once it is safe to ignore insns this 10425 code does not know how to handle. The loop will either recognize 10426 the single set and return the correct value or fail to recognize 10427 it and return 0. */ 10428 int set_once = 0; 10429 rtx y = x; 10430 10431 gcc_assert (GET_CODE (x) == REG); 10432 10433 if (GET_MODE (x) == DImode) 10434 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN); 10435 10436 if (flag_expensive_optimizations 10437 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1) 10438 set_once = 1; 10439 10440 if (insn == 0) 10441 { 10442 if (set_once) 10443 insn = get_last_insn_anywhere (); 10444 else 10445 return 0; 10446 } 10447 10448 while ((insn = PREV_INSN (insn))) 10449 { 10450 switch (GET_CODE (insn)) 10451 { 10452 case JUMP_INSN: 10453 case NOTE: 10454 break; 10455 case CODE_LABEL: 10456 case CALL_INSN: 10457 default: 10458 if (! set_once) 10459 return 0; 10460 break; 10461 case INSN: 10462 { 10463 rtx pat = PATTERN (insn); 10464 if (GET_CODE (pat) != SET) 10465 return 0; 10466 if (rtx_equal_p (x, SET_DEST (pat))) 10467 return set_extends (insn); 10468 if (y && rtx_equal_p (y, SET_DEST (pat))) 10469 return set_extends (insn); 10470 if (reg_overlap_mentioned_p (SET_DEST (pat), y)) 10471 return 0; 10472 } 10473 } 10474 } 10475 return 0; 10476 } 10477 10478 /* Output a wide shift instruction in V8+ mode. INSN is the instruction, 10479 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 10480 10481 const char * 10482 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode) 10483 { 10484 static char asm_code[60]; 10485 10486 /* The scratch register is only required when the destination 10487 register is not a 64-bit global or out register. */ 10488 if (which_alternative != 2) 10489 operands[3] = operands[0]; 10490 10491 /* We can only shift by constants <= 63. */ 10492 if (GET_CODE (operands[2]) == CONST_INT) 10493 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f); 10494 10495 if (GET_CODE (operands[1]) == CONST_INT) 10496 { 10497 output_asm_insn ("mov\t%1, %3", operands); 10498 } 10499 else 10500 { 10501 output_asm_insn ("sllx\t%H1, 32, %3", operands); 10502 if (sparc_check_64 (operands[1], insn) <= 0) 10503 output_asm_insn ("srl\t%L1, 0, %L1", operands); 10504 output_asm_insn ("or\t%L1, %3, %3", operands); 10505 } 10506 10507 strcpy (asm_code, opcode); 10508 10509 if (which_alternative != 2) 10510 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); 10511 else 10512 return 10513 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"); 10514 } 10515 10516 /* Output rtl to increment the profiler label LABELNO 10517 for profiling a function entry. */ 10518 10519 void 10520 sparc_profile_hook (int labelno) 10521 { 10522 char buf[32]; 10523 rtx lab, fun; 10524 10525 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION); 10526 if (NO_PROFILE_COUNTERS) 10527 { 10528 emit_library_call (fun, LCT_NORMAL, VOIDmode); 10529 } 10530 else 10531 { 10532 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); 10533 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); 10534 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode); 10535 } 10536 } 10537 10538 #ifdef TARGET_SOLARIS 10539 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 10540 10541 static void 10542 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags, 10543 tree decl ATTRIBUTE_UNUSED) 10544 { 10545 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE) 10546 { 10547 solaris_elf_asm_comdat_section (name, flags, decl); 10548 return; 10549 } 10550 10551 fprintf (asm_out_file, "\t.section\t\"%s\"", name); 10552 10553 if (!(flags & SECTION_DEBUG)) 10554 fputs (",#alloc", asm_out_file); 10555 if (flags & SECTION_WRITE) 10556 fputs (",#write", asm_out_file); 10557 if (flags & SECTION_TLS) 10558 fputs (",#tls", asm_out_file); 10559 if (flags & SECTION_CODE) 10560 fputs (",#execinstr", asm_out_file); 10561 10562 if (flags & SECTION_NOTYPE) 10563 ; 10564 else if (flags & SECTION_BSS) 10565 fputs (",#nobits", asm_out_file); 10566 else 10567 fputs (",#progbits", asm_out_file); 10568 10569 fputc ('\n', asm_out_file); 10570 } 10571 #endif /* TARGET_SOLARIS */ 10572 10573 /* We do not allow indirect calls to be optimized into sibling calls. 10574 10575 We cannot use sibling calls when delayed branches are disabled 10576 because they will likely require the call delay slot to be filled. 10577 10578 Also, on SPARC 32-bit we cannot emit a sibling call when the 10579 current function returns a structure. This is because the "unimp 10580 after call" convention would cause the callee to return to the 10581 wrong place. The generic code already disallows cases where the 10582 function being called returns a structure. 10583 10584 It may seem strange how this last case could occur. Usually there 10585 is code after the call which jumps to epilogue code which dumps the 10586 return value into the struct return area. That ought to invalidate 10587 the sibling call right? Well, in the C++ case we can end up passing 10588 the pointer to the struct return area to a constructor (which returns 10589 void) and then nothing else happens. Such a sibling call would look 10590 valid without the added check here. 10591 10592 VxWorks PIC PLT entries require the global pointer to be initialized 10593 on entry. We therefore can't emit sibling calls to them. */ 10594 static bool 10595 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) 10596 { 10597 return (decl 10598 && flag_delayed_branch 10599 && (TARGET_ARCH64 || ! cfun->returns_struct) 10600 && !(TARGET_VXWORKS_RTP 10601 && flag_pic 10602 && !targetm.binds_local_p (decl))); 10603 } 10604 10605 /* libfunc renaming. */ 10606 10607 static void 10608 sparc_init_libfuncs (void) 10609 { 10610 if (TARGET_ARCH32) 10611 { 10612 /* Use the subroutines that Sun's library provides for integer 10613 multiply and divide. The `*' prevents an underscore from 10614 being prepended by the compiler. .umul is a little faster 10615 than .mul. */ 10616 set_optab_libfunc (smul_optab, SImode, "*.umul"); 10617 set_optab_libfunc (sdiv_optab, SImode, "*.div"); 10618 set_optab_libfunc (udiv_optab, SImode, "*.udiv"); 10619 set_optab_libfunc (smod_optab, SImode, "*.rem"); 10620 set_optab_libfunc (umod_optab, SImode, "*.urem"); 10621 10622 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */ 10623 set_optab_libfunc (add_optab, TFmode, "_Q_add"); 10624 set_optab_libfunc (sub_optab, TFmode, "_Q_sub"); 10625 set_optab_libfunc (neg_optab, TFmode, "_Q_neg"); 10626 set_optab_libfunc (smul_optab, TFmode, "_Q_mul"); 10627 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div"); 10628 10629 /* We can define the TFmode sqrt optab only if TARGET_FPU. This 10630 is because with soft-float, the SFmode and DFmode sqrt 10631 instructions will be absent, and the compiler will notice and 10632 try to use the TFmode sqrt instruction for calls to the 10633 builtin function sqrt, but this fails. */ 10634 if (TARGET_FPU) 10635 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt"); 10636 10637 set_optab_libfunc (eq_optab, TFmode, "_Q_feq"); 10638 set_optab_libfunc (ne_optab, TFmode, "_Q_fne"); 10639 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt"); 10640 set_optab_libfunc (ge_optab, TFmode, "_Q_fge"); 10641 set_optab_libfunc (lt_optab, TFmode, "_Q_flt"); 10642 set_optab_libfunc (le_optab, TFmode, "_Q_fle"); 10643 10644 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq"); 10645 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq"); 10646 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos"); 10647 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod"); 10648 10649 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi"); 10650 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou"); 10651 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq"); 10652 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq"); 10653 10654 if (DITF_CONVERSION_LIBFUNCS) 10655 { 10656 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll"); 10657 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull"); 10658 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq"); 10659 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq"); 10660 } 10661 10662 if (SUN_CONVERSION_LIBFUNCS) 10663 { 10664 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll"); 10665 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull"); 10666 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll"); 10667 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull"); 10668 } 10669 } 10670 if (TARGET_ARCH64) 10671 { 10672 /* In the SPARC 64bit ABI, SImode multiply and divide functions 10673 do not exist in the library. Make sure the compiler does not 10674 emit calls to them by accident. (It should always use the 10675 hardware instructions.) */ 10676 set_optab_libfunc (smul_optab, SImode, 0); 10677 set_optab_libfunc (sdiv_optab, SImode, 0); 10678 set_optab_libfunc (udiv_optab, SImode, 0); 10679 set_optab_libfunc (smod_optab, SImode, 0); 10680 set_optab_libfunc (umod_optab, SImode, 0); 10681 10682 if (SUN_INTEGER_MULTIPLY_64) 10683 { 10684 set_optab_libfunc (smul_optab, DImode, "__mul64"); 10685 set_optab_libfunc (sdiv_optab, DImode, "__div64"); 10686 set_optab_libfunc (udiv_optab, DImode, "__udiv64"); 10687 set_optab_libfunc (smod_optab, DImode, "__rem64"); 10688 set_optab_libfunc (umod_optab, DImode, "__urem64"); 10689 } 10690 10691 if (SUN_CONVERSION_LIBFUNCS) 10692 { 10693 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol"); 10694 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul"); 10695 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol"); 10696 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul"); 10697 } 10698 } 10699 } 10700 10701 /* SPARC builtins. */ 10702 enum sparc_builtins 10703 { 10704 /* FPU builtins. */ 10705 SPARC_BUILTIN_LDFSR, 10706 SPARC_BUILTIN_STFSR, 10707 10708 /* VIS 1.0 builtins. */ 10709 SPARC_BUILTIN_FPACK16, 10710 SPARC_BUILTIN_FPACK32, 10711 SPARC_BUILTIN_FPACKFIX, 10712 SPARC_BUILTIN_FEXPAND, 10713 SPARC_BUILTIN_FPMERGE, 10714 SPARC_BUILTIN_FMUL8X16, 10715 SPARC_BUILTIN_FMUL8X16AU, 10716 SPARC_BUILTIN_FMUL8X16AL, 10717 SPARC_BUILTIN_FMUL8SUX16, 10718 SPARC_BUILTIN_FMUL8ULX16, 10719 SPARC_BUILTIN_FMULD8SUX16, 10720 SPARC_BUILTIN_FMULD8ULX16, 10721 SPARC_BUILTIN_FALIGNDATAV4HI, 10722 SPARC_BUILTIN_FALIGNDATAV8QI, 10723 SPARC_BUILTIN_FALIGNDATAV2SI, 10724 SPARC_BUILTIN_FALIGNDATADI, 10725 SPARC_BUILTIN_WRGSR, 10726 SPARC_BUILTIN_RDGSR, 10727 SPARC_BUILTIN_ALIGNADDR, 10728 SPARC_BUILTIN_ALIGNADDRL, 10729 SPARC_BUILTIN_PDIST, 10730 SPARC_BUILTIN_EDGE8, 10731 SPARC_BUILTIN_EDGE8L, 10732 SPARC_BUILTIN_EDGE16, 10733 SPARC_BUILTIN_EDGE16L, 10734 SPARC_BUILTIN_EDGE32, 10735 SPARC_BUILTIN_EDGE32L, 10736 SPARC_BUILTIN_FCMPLE16, 10737 SPARC_BUILTIN_FCMPLE32, 10738 SPARC_BUILTIN_FCMPNE16, 10739 SPARC_BUILTIN_FCMPNE32, 10740 SPARC_BUILTIN_FCMPGT16, 10741 SPARC_BUILTIN_FCMPGT32, 10742 SPARC_BUILTIN_FCMPEQ16, 10743 SPARC_BUILTIN_FCMPEQ32, 10744 SPARC_BUILTIN_FPADD16, 10745 SPARC_BUILTIN_FPADD16S, 10746 SPARC_BUILTIN_FPADD32, 10747 SPARC_BUILTIN_FPADD32S, 10748 SPARC_BUILTIN_FPSUB16, 10749 SPARC_BUILTIN_FPSUB16S, 10750 SPARC_BUILTIN_FPSUB32, 10751 SPARC_BUILTIN_FPSUB32S, 10752 SPARC_BUILTIN_ARRAY8, 10753 SPARC_BUILTIN_ARRAY16, 10754 SPARC_BUILTIN_ARRAY32, 10755 10756 /* VIS 2.0 builtins. */ 10757 SPARC_BUILTIN_EDGE8N, 10758 SPARC_BUILTIN_EDGE8LN, 10759 SPARC_BUILTIN_EDGE16N, 10760 SPARC_BUILTIN_EDGE16LN, 10761 SPARC_BUILTIN_EDGE32N, 10762 SPARC_BUILTIN_EDGE32LN, 10763 SPARC_BUILTIN_BMASK, 10764 SPARC_BUILTIN_BSHUFFLEV4HI, 10765 SPARC_BUILTIN_BSHUFFLEV8QI, 10766 SPARC_BUILTIN_BSHUFFLEV2SI, 10767 SPARC_BUILTIN_BSHUFFLEDI, 10768 10769 /* VIS 3.0 builtins. */ 10770 SPARC_BUILTIN_CMASK8, 10771 SPARC_BUILTIN_CMASK16, 10772 SPARC_BUILTIN_CMASK32, 10773 SPARC_BUILTIN_FCHKSM16, 10774 SPARC_BUILTIN_FSLL16, 10775 SPARC_BUILTIN_FSLAS16, 10776 SPARC_BUILTIN_FSRL16, 10777 SPARC_BUILTIN_FSRA16, 10778 SPARC_BUILTIN_FSLL32, 10779 SPARC_BUILTIN_FSLAS32, 10780 SPARC_BUILTIN_FSRL32, 10781 SPARC_BUILTIN_FSRA32, 10782 SPARC_BUILTIN_PDISTN, 10783 SPARC_BUILTIN_FMEAN16, 10784 SPARC_BUILTIN_FPADD64, 10785 SPARC_BUILTIN_FPSUB64, 10786 SPARC_BUILTIN_FPADDS16, 10787 SPARC_BUILTIN_FPADDS16S, 10788 SPARC_BUILTIN_FPSUBS16, 10789 SPARC_BUILTIN_FPSUBS16S, 10790 SPARC_BUILTIN_FPADDS32, 10791 SPARC_BUILTIN_FPADDS32S, 10792 SPARC_BUILTIN_FPSUBS32, 10793 SPARC_BUILTIN_FPSUBS32S, 10794 SPARC_BUILTIN_FUCMPLE8, 10795 SPARC_BUILTIN_FUCMPNE8, 10796 SPARC_BUILTIN_FUCMPGT8, 10797 SPARC_BUILTIN_FUCMPEQ8, 10798 SPARC_BUILTIN_FHADDS, 10799 SPARC_BUILTIN_FHADDD, 10800 SPARC_BUILTIN_FHSUBS, 10801 SPARC_BUILTIN_FHSUBD, 10802 SPARC_BUILTIN_FNHADDS, 10803 SPARC_BUILTIN_FNHADDD, 10804 SPARC_BUILTIN_UMULXHI, 10805 SPARC_BUILTIN_XMULX, 10806 SPARC_BUILTIN_XMULXHI, 10807 10808 /* VIS 4.0 builtins. */ 10809 SPARC_BUILTIN_FPADD8, 10810 SPARC_BUILTIN_FPADDS8, 10811 SPARC_BUILTIN_FPADDUS8, 10812 SPARC_BUILTIN_FPADDUS16, 10813 SPARC_BUILTIN_FPCMPLE8, 10814 SPARC_BUILTIN_FPCMPGT8, 10815 SPARC_BUILTIN_FPCMPULE16, 10816 SPARC_BUILTIN_FPCMPUGT16, 10817 SPARC_BUILTIN_FPCMPULE32, 10818 SPARC_BUILTIN_FPCMPUGT32, 10819 SPARC_BUILTIN_FPMAX8, 10820 SPARC_BUILTIN_FPMAX16, 10821 SPARC_BUILTIN_FPMAX32, 10822 SPARC_BUILTIN_FPMAXU8, 10823 SPARC_BUILTIN_FPMAXU16, 10824 SPARC_BUILTIN_FPMAXU32, 10825 SPARC_BUILTIN_FPMIN8, 10826 SPARC_BUILTIN_FPMIN16, 10827 SPARC_BUILTIN_FPMIN32, 10828 SPARC_BUILTIN_FPMINU8, 10829 SPARC_BUILTIN_FPMINU16, 10830 SPARC_BUILTIN_FPMINU32, 10831 SPARC_BUILTIN_FPSUB8, 10832 SPARC_BUILTIN_FPSUBS8, 10833 SPARC_BUILTIN_FPSUBUS8, 10834 SPARC_BUILTIN_FPSUBUS16, 10835 10836 /* VIS 4.0B builtins. */ 10837 10838 /* Note that all the DICTUNPACK* entries should be kept 10839 contiguous. */ 10840 SPARC_BUILTIN_FIRST_DICTUNPACK, 10841 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK, 10842 SPARC_BUILTIN_DICTUNPACK16, 10843 SPARC_BUILTIN_DICTUNPACK32, 10844 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32, 10845 10846 /* Note that all the FPCMP*SHL entries should be kept 10847 contiguous. */ 10848 SPARC_BUILTIN_FIRST_FPCMPSHL, 10849 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL, 10850 SPARC_BUILTIN_FPCMPGT8SHL, 10851 SPARC_BUILTIN_FPCMPEQ8SHL, 10852 SPARC_BUILTIN_FPCMPNE8SHL, 10853 SPARC_BUILTIN_FPCMPLE16SHL, 10854 SPARC_BUILTIN_FPCMPGT16SHL, 10855 SPARC_BUILTIN_FPCMPEQ16SHL, 10856 SPARC_BUILTIN_FPCMPNE16SHL, 10857 SPARC_BUILTIN_FPCMPLE32SHL, 10858 SPARC_BUILTIN_FPCMPGT32SHL, 10859 SPARC_BUILTIN_FPCMPEQ32SHL, 10860 SPARC_BUILTIN_FPCMPNE32SHL, 10861 SPARC_BUILTIN_FPCMPULE8SHL, 10862 SPARC_BUILTIN_FPCMPUGT8SHL, 10863 SPARC_BUILTIN_FPCMPULE16SHL, 10864 SPARC_BUILTIN_FPCMPUGT16SHL, 10865 SPARC_BUILTIN_FPCMPULE32SHL, 10866 SPARC_BUILTIN_FPCMPUGT32SHL, 10867 SPARC_BUILTIN_FPCMPDE8SHL, 10868 SPARC_BUILTIN_FPCMPDE16SHL, 10869 SPARC_BUILTIN_FPCMPDE32SHL, 10870 SPARC_BUILTIN_FPCMPUR8SHL, 10871 SPARC_BUILTIN_FPCMPUR16SHL, 10872 SPARC_BUILTIN_FPCMPUR32SHL, 10873 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL, 10874 10875 SPARC_BUILTIN_MAX 10876 }; 10877 10878 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX]; 10879 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX]; 10880 10881 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE. 10882 The instruction should require a constant operand of some sort. The 10883 function prints an error if OPVAL is not valid. */ 10884 10885 static int 10886 check_constant_argument (enum insn_code icode, int opnum, rtx opval) 10887 { 10888 if (GET_CODE (opval) != CONST_INT) 10889 { 10890 error ("%qs expects a constant argument", insn_data[icode].name); 10891 return false; 10892 } 10893 10894 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode)) 10895 { 10896 error ("constant argument out of range for %qs", insn_data[icode].name); 10897 return false; 10898 } 10899 return true; 10900 } 10901 10902 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the 10903 function decl or NULL_TREE if the builtin was not added. */ 10904 10905 static tree 10906 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code, 10907 tree type) 10908 { 10909 tree t 10910 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE); 10911 10912 if (t) 10913 { 10914 sparc_builtins[code] = t; 10915 sparc_builtins_icode[code] = icode; 10916 } 10917 10918 return t; 10919 } 10920 10921 /* Likewise, but also marks the function as "const". */ 10922 10923 static tree 10924 def_builtin_const (const char *name, enum insn_code icode, 10925 enum sparc_builtins code, tree type) 10926 { 10927 tree t = def_builtin (name, icode, code, type); 10928 10929 if (t) 10930 TREE_READONLY (t) = 1; 10931 10932 return t; 10933 } 10934 10935 /* Implement the TARGET_INIT_BUILTINS target hook. 10936 Create builtin functions for special SPARC instructions. */ 10937 10938 static void 10939 sparc_init_builtins (void) 10940 { 10941 if (TARGET_FPU) 10942 sparc_fpu_init_builtins (); 10943 10944 if (TARGET_VIS) 10945 sparc_vis_init_builtins (); 10946 } 10947 10948 /* Create builtin functions for FPU instructions. */ 10949 10950 static void 10951 sparc_fpu_init_builtins (void) 10952 { 10953 tree ftype 10954 = build_function_type_list (void_type_node, 10955 build_pointer_type (unsigned_type_node), 0); 10956 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr, 10957 SPARC_BUILTIN_LDFSR, ftype); 10958 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr, 10959 SPARC_BUILTIN_STFSR, ftype); 10960 } 10961 10962 /* Create builtin functions for VIS instructions. */ 10963 10964 static void 10965 sparc_vis_init_builtins (void) 10966 { 10967 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4); 10968 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8); 10969 tree v4hi = build_vector_type (intHI_type_node, 4); 10970 tree v2hi = build_vector_type (intHI_type_node, 2); 10971 tree v2si = build_vector_type (intSI_type_node, 2); 10972 tree v1si = build_vector_type (intSI_type_node, 1); 10973 10974 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0); 10975 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0); 10976 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0); 10977 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0); 10978 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0); 10979 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0); 10980 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0); 10981 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0); 10982 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0); 10983 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0); 10984 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0); 10985 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0); 10986 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0); 10987 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0); 10988 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node, 10989 v8qi, v8qi, 10990 intDI_type_node, 0); 10991 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node, 10992 v8qi, v8qi, 0); 10993 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node, 10994 v8qi, v8qi, 0); 10995 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node, 10996 intSI_type_node, 0); 10997 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node, 10998 intSI_type_node, 0); 10999 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node, 11000 intDI_type_node, 0); 11001 tree di_ftype_di_di = build_function_type_list (intDI_type_node, 11002 intDI_type_node, 11003 intDI_type_node, 0); 11004 tree si_ftype_si_si = build_function_type_list (intSI_type_node, 11005 intSI_type_node, 11006 intSI_type_node, 0); 11007 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node, 11008 ptr_type_node, 11009 intSI_type_node, 0); 11010 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node, 11011 ptr_type_node, 11012 intDI_type_node, 0); 11013 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node, 11014 ptr_type_node, 11015 ptr_type_node, 0); 11016 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node, 11017 ptr_type_node, 11018 ptr_type_node, 0); 11019 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node, 11020 v4hi, v4hi, 0); 11021 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node, 11022 v2si, v2si, 0); 11023 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node, 11024 v4hi, v4hi, 0); 11025 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node, 11026 v2si, v2si, 0); 11027 tree void_ftype_di = build_function_type_list (void_type_node, 11028 intDI_type_node, 0); 11029 tree di_ftype_void = build_function_type_list (intDI_type_node, 11030 void_type_node, 0); 11031 tree void_ftype_si = build_function_type_list (void_type_node, 11032 intSI_type_node, 0); 11033 tree sf_ftype_sf_sf = build_function_type_list (float_type_node, 11034 float_type_node, 11035 float_type_node, 0); 11036 tree df_ftype_df_df = build_function_type_list (double_type_node, 11037 double_type_node, 11038 double_type_node, 0); 11039 11040 /* Packing and expanding vectors. */ 11041 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, 11042 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi); 11043 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis, 11044 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi); 11045 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis, 11046 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si); 11047 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, 11048 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi); 11049 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis, 11050 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi); 11051 11052 /* Multiplications. */ 11053 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis, 11054 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi); 11055 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis, 11056 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi); 11057 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis, 11058 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi); 11059 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis, 11060 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi); 11061 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis, 11062 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi); 11063 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis, 11064 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi); 11065 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis, 11066 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi); 11067 11068 /* Data aligning. */ 11069 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis, 11070 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi); 11071 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis, 11072 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi); 11073 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis, 11074 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si); 11075 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis, 11076 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di); 11077 11078 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis, 11079 SPARC_BUILTIN_WRGSR, void_ftype_di); 11080 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis, 11081 SPARC_BUILTIN_RDGSR, di_ftype_void); 11082 11083 if (TARGET_ARCH64) 11084 { 11085 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis, 11086 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di); 11087 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis, 11088 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di); 11089 } 11090 else 11091 { 11092 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis, 11093 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si); 11094 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis, 11095 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si); 11096 } 11097 11098 /* Pixel distance. */ 11099 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis, 11100 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di); 11101 11102 /* Edge handling. */ 11103 if (TARGET_ARCH64) 11104 { 11105 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis, 11106 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr); 11107 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis, 11108 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr); 11109 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis, 11110 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr); 11111 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis, 11112 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr); 11113 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis, 11114 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr); 11115 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis, 11116 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr); 11117 } 11118 else 11119 { 11120 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis, 11121 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr); 11122 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis, 11123 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr); 11124 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis, 11125 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr); 11126 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis, 11127 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr); 11128 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis, 11129 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr); 11130 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis, 11131 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr); 11132 } 11133 11134 /* Pixel compare. */ 11135 if (TARGET_ARCH64) 11136 { 11137 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis, 11138 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi); 11139 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis, 11140 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si); 11141 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis, 11142 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi); 11143 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis, 11144 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si); 11145 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis, 11146 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi); 11147 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis, 11148 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si); 11149 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis, 11150 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi); 11151 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis, 11152 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si); 11153 } 11154 else 11155 { 11156 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis, 11157 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi); 11158 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis, 11159 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si); 11160 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis, 11161 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi); 11162 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis, 11163 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si); 11164 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis, 11165 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi); 11166 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis, 11167 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si); 11168 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis, 11169 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi); 11170 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis, 11171 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si); 11172 } 11173 11174 /* Addition and subtraction. */ 11175 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3, 11176 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi); 11177 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3, 11178 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi); 11179 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3, 11180 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si); 11181 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3, 11182 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si); 11183 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3, 11184 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi); 11185 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3, 11186 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi); 11187 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3, 11188 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si); 11189 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3, 11190 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si); 11191 11192 /* Three-dimensional array addressing. */ 11193 if (TARGET_ARCH64) 11194 { 11195 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis, 11196 SPARC_BUILTIN_ARRAY8, di_ftype_di_di); 11197 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis, 11198 SPARC_BUILTIN_ARRAY16, di_ftype_di_di); 11199 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis, 11200 SPARC_BUILTIN_ARRAY32, di_ftype_di_di); 11201 } 11202 else 11203 { 11204 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis, 11205 SPARC_BUILTIN_ARRAY8, si_ftype_si_si); 11206 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis, 11207 SPARC_BUILTIN_ARRAY16, si_ftype_si_si); 11208 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis, 11209 SPARC_BUILTIN_ARRAY32, si_ftype_si_si); 11210 } 11211 11212 if (TARGET_VIS2) 11213 { 11214 /* Edge handling. */ 11215 if (TARGET_ARCH64) 11216 { 11217 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis, 11218 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr); 11219 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis, 11220 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr); 11221 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis, 11222 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr); 11223 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis, 11224 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr); 11225 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis, 11226 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr); 11227 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis, 11228 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr); 11229 } 11230 else 11231 { 11232 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis, 11233 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr); 11234 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis, 11235 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr); 11236 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis, 11237 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr); 11238 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis, 11239 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr); 11240 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis, 11241 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr); 11242 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis, 11243 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr); 11244 } 11245 11246 /* Byte mask and shuffle. */ 11247 if (TARGET_ARCH64) 11248 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis, 11249 SPARC_BUILTIN_BMASK, di_ftype_di_di); 11250 else 11251 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis, 11252 SPARC_BUILTIN_BMASK, si_ftype_si_si); 11253 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis, 11254 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi); 11255 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis, 11256 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi); 11257 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis, 11258 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si); 11259 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis, 11260 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di); 11261 } 11262 11263 if (TARGET_VIS3) 11264 { 11265 if (TARGET_ARCH64) 11266 { 11267 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis, 11268 SPARC_BUILTIN_CMASK8, void_ftype_di); 11269 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis, 11270 SPARC_BUILTIN_CMASK16, void_ftype_di); 11271 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis, 11272 SPARC_BUILTIN_CMASK32, void_ftype_di); 11273 } 11274 else 11275 { 11276 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis, 11277 SPARC_BUILTIN_CMASK8, void_ftype_si); 11278 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis, 11279 SPARC_BUILTIN_CMASK16, void_ftype_si); 11280 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis, 11281 SPARC_BUILTIN_CMASK32, void_ftype_si); 11282 } 11283 11284 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis, 11285 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi); 11286 11287 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3, 11288 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi); 11289 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3, 11290 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi); 11291 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3, 11292 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi); 11293 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3, 11294 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi); 11295 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3, 11296 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si); 11297 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3, 11298 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si); 11299 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3, 11300 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si); 11301 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3, 11302 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si); 11303 11304 if (TARGET_ARCH64) 11305 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis, 11306 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi); 11307 else 11308 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis, 11309 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi); 11310 11311 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis, 11312 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi); 11313 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis, 11314 SPARC_BUILTIN_FPADD64, di_ftype_di_di); 11315 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis, 11316 SPARC_BUILTIN_FPSUB64, di_ftype_di_di); 11317 11318 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3, 11319 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi); 11320 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3, 11321 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi); 11322 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3, 11323 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi); 11324 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3, 11325 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi); 11326 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3, 11327 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si); 11328 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3, 11329 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si); 11330 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3, 11331 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si); 11332 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3, 11333 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si); 11334 11335 if (TARGET_ARCH64) 11336 { 11337 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis, 11338 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi); 11339 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis, 11340 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi); 11341 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis, 11342 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi); 11343 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis, 11344 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi); 11345 } 11346 else 11347 { 11348 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis, 11349 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi); 11350 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis, 11351 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi); 11352 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis, 11353 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi); 11354 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis, 11355 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi); 11356 } 11357 11358 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis, 11359 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf); 11360 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis, 11361 SPARC_BUILTIN_FHADDD, df_ftype_df_df); 11362 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis, 11363 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf); 11364 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis, 11365 SPARC_BUILTIN_FHSUBD, df_ftype_df_df); 11366 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis, 11367 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf); 11368 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis, 11369 SPARC_BUILTIN_FNHADDD, df_ftype_df_df); 11370 11371 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis, 11372 SPARC_BUILTIN_UMULXHI, di_ftype_di_di); 11373 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis, 11374 SPARC_BUILTIN_XMULX, di_ftype_di_di); 11375 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis, 11376 SPARC_BUILTIN_XMULXHI, di_ftype_di_di); 11377 } 11378 11379 if (TARGET_VIS4) 11380 { 11381 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3, 11382 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi); 11383 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3, 11384 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi); 11385 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3, 11386 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi); 11387 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3, 11388 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi); 11389 11390 11391 if (TARGET_ARCH64) 11392 { 11393 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis, 11394 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi); 11395 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis, 11396 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi); 11397 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis, 11398 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi); 11399 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis, 11400 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi); 11401 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis, 11402 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11403 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis, 11404 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11405 } 11406 else 11407 { 11408 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis, 11409 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi); 11410 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis, 11411 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi); 11412 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis, 11413 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi); 11414 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis, 11415 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi); 11416 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis, 11417 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si); 11418 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis, 11419 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si); 11420 } 11421 11422 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3, 11423 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi); 11424 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3, 11425 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi); 11426 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3, 11427 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si); 11428 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3, 11429 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi); 11430 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3, 11431 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi); 11432 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3, 11433 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si); 11434 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3, 11435 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi); 11436 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3, 11437 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi); 11438 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3, 11439 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si); 11440 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3, 11441 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi); 11442 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3, 11443 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi); 11444 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3, 11445 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si); 11446 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3, 11447 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi); 11448 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3, 11449 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi); 11450 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3, 11451 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi); 11452 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3, 11453 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi); 11454 } 11455 11456 if (TARGET_VIS4B) 11457 { 11458 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8, 11459 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si); 11460 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16, 11461 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si); 11462 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32, 11463 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si); 11464 11465 if (TARGET_ARCH64) 11466 { 11467 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node, 11468 v8qi, v8qi, 11469 intSI_type_node, 0); 11470 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node, 11471 v4hi, v4hi, 11472 intSI_type_node, 0); 11473 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node, 11474 v2si, v2si, 11475 intSI_type_node, 0); 11476 11477 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl, 11478 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si); 11479 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl, 11480 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si); 11481 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl, 11482 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si); 11483 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl, 11484 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si); 11485 11486 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl, 11487 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si); 11488 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl, 11489 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si); 11490 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl, 11491 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si); 11492 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl, 11493 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si); 11494 11495 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl, 11496 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si); 11497 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl, 11498 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si); 11499 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl, 11500 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si); 11501 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl, 11502 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si); 11503 11504 11505 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl, 11506 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si); 11507 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl, 11508 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si); 11509 11510 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl, 11511 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si); 11512 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl, 11513 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si); 11514 11515 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl, 11516 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si); 11517 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl, 11518 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si); 11519 11520 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl, 11521 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si); 11522 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl, 11523 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si); 11524 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl, 11525 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si); 11526 11527 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl, 11528 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si); 11529 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl, 11530 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si); 11531 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl, 11532 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si); 11533 11534 } 11535 else 11536 { 11537 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node, 11538 v8qi, v8qi, 11539 intSI_type_node, 0); 11540 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node, 11541 v4hi, v4hi, 11542 intSI_type_node, 0); 11543 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node, 11544 v2si, v2si, 11545 intSI_type_node, 0); 11546 11547 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl, 11548 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si); 11549 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl, 11550 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si); 11551 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl, 11552 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si); 11553 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl, 11554 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si); 11555 11556 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl, 11557 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si); 11558 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl, 11559 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si); 11560 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl, 11561 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si); 11562 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl, 11563 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si); 11564 11565 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl, 11566 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si); 11567 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl, 11568 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si); 11569 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl, 11570 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si); 11571 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl, 11572 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si); 11573 11574 11575 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl, 11576 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si); 11577 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl, 11578 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si); 11579 11580 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl, 11581 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si); 11582 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl, 11583 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si); 11584 11585 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl, 11586 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si); 11587 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl, 11588 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si); 11589 11590 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl, 11591 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si); 11592 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl, 11593 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si); 11594 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl, 11595 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si); 11596 11597 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl, 11598 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si); 11599 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl, 11600 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si); 11601 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl, 11602 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si); 11603 } 11604 } 11605 } 11606 11607 /* Implement TARGET_BUILTIN_DECL hook. */ 11608 11609 static tree 11610 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) 11611 { 11612 if (code >= SPARC_BUILTIN_MAX) 11613 return error_mark_node; 11614 11615 return sparc_builtins[code]; 11616 } 11617 11618 /* Implemented TARGET_EXPAND_BUILTIN hook. */ 11619 11620 static rtx 11621 sparc_expand_builtin (tree exp, rtx target, 11622 rtx subtarget ATTRIBUTE_UNUSED, 11623 machine_mode tmode ATTRIBUTE_UNUSED, 11624 int ignore ATTRIBUTE_UNUSED) 11625 { 11626 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 11627 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 11628 enum insn_code icode = sparc_builtins_icode[code]; 11629 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; 11630 call_expr_arg_iterator iter; 11631 int arg_count = 0; 11632 rtx pat, op[4]; 11633 tree arg; 11634 11635 if (nonvoid) 11636 { 11637 machine_mode tmode = insn_data[icode].operand[0].mode; 11638 if (!target 11639 || GET_MODE (target) != tmode 11640 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 11641 op[0] = gen_reg_rtx (tmode); 11642 else 11643 op[0] = target; 11644 } 11645 11646 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) 11647 { 11648 const struct insn_operand_data *insn_op; 11649 int idx; 11650 11651 if (arg == error_mark_node) 11652 return NULL_RTX; 11653 11654 arg_count++; 11655 idx = arg_count - !nonvoid; 11656 insn_op = &insn_data[icode].operand[idx]; 11657 op[arg_count] = expand_normal (arg); 11658 11659 /* Some of the builtins require constant arguments. We check 11660 for this here. */ 11661 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL 11662 && code <= SPARC_BUILTIN_LAST_FPCMPSHL 11663 && arg_count == 3) 11664 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK 11665 && code <= SPARC_BUILTIN_LAST_DICTUNPACK 11666 && arg_count == 2)) 11667 { 11668 if (!check_constant_argument (icode, idx, op[arg_count])) 11669 return const0_rtx; 11670 } 11671 11672 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR) 11673 { 11674 if (!address_operand (op[arg_count], SImode)) 11675 { 11676 op[arg_count] = convert_memory_address (Pmode, op[arg_count]); 11677 op[arg_count] = copy_addr_to_reg (op[arg_count]); 11678 } 11679 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]); 11680 } 11681 11682 else if (insn_op->mode == V1DImode 11683 && GET_MODE (op[arg_count]) == DImode) 11684 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]); 11685 11686 else if (insn_op->mode == V1SImode 11687 && GET_MODE (op[arg_count]) == SImode) 11688 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]); 11689 11690 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count], 11691 insn_op->mode)) 11692 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]); 11693 } 11694 11695 switch (arg_count) 11696 { 11697 case 0: 11698 pat = GEN_FCN (icode) (op[0]); 11699 break; 11700 case 1: 11701 if (nonvoid) 11702 pat = GEN_FCN (icode) (op[0], op[1]); 11703 else 11704 pat = GEN_FCN (icode) (op[1]); 11705 break; 11706 case 2: 11707 pat = GEN_FCN (icode) (op[0], op[1], op[2]); 11708 break; 11709 case 3: 11710 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); 11711 break; 11712 default: 11713 gcc_unreachable (); 11714 } 11715 11716 if (!pat) 11717 return NULL_RTX; 11718 11719 emit_insn (pat); 11720 11721 return (nonvoid ? op[0] : const0_rtx); 11722 } 11723 11724 /* Return the upper 16 bits of the 8x16 multiplication. */ 11725 11726 static int 11727 sparc_vis_mul8x16 (int e8, int e16) 11728 { 11729 return (e8 * e16 + 128) / 256; 11730 } 11731 11732 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put 11733 the result into the array N_ELTS, whose elements are of INNER_TYPE. */ 11734 11735 static void 11736 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode, 11737 tree inner_type, tree cst0, tree cst1) 11738 { 11739 unsigned i, num = VECTOR_CST_NELTS (cst0); 11740 int scale; 11741 11742 switch (fncode) 11743 { 11744 case SPARC_BUILTIN_FMUL8X16: 11745 for (i = 0; i < num; ++i) 11746 { 11747 int val 11748 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11749 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i))); 11750 n_elts->quick_push (build_int_cst (inner_type, val)); 11751 } 11752 break; 11753 11754 case SPARC_BUILTIN_FMUL8X16AU: 11755 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0)); 11756 11757 for (i = 0; i < num; ++i) 11758 { 11759 int val 11760 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11761 scale); 11762 n_elts->quick_push (build_int_cst (inner_type, val)); 11763 } 11764 break; 11765 11766 case SPARC_BUILTIN_FMUL8X16AL: 11767 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1)); 11768 11769 for (i = 0; i < num; ++i) 11770 { 11771 int val 11772 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)), 11773 scale); 11774 n_elts->quick_push (build_int_cst (inner_type, val)); 11775 } 11776 break; 11777 11778 default: 11779 gcc_unreachable (); 11780 } 11781 } 11782 11783 /* Implement TARGET_FOLD_BUILTIN hook. 11784 11785 Fold builtin functions for SPARC intrinsics. If IGNORE is true the 11786 result of the function call is ignored. NULL_TREE is returned if the 11787 function could not be folded. */ 11788 11789 static tree 11790 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, 11791 tree *args, bool ignore) 11792 { 11793 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl); 11794 tree rtype = TREE_TYPE (TREE_TYPE (fndecl)); 11795 tree arg0, arg1, arg2; 11796 11797 if (ignore) 11798 switch (code) 11799 { 11800 case SPARC_BUILTIN_LDFSR: 11801 case SPARC_BUILTIN_STFSR: 11802 case SPARC_BUILTIN_ALIGNADDR: 11803 case SPARC_BUILTIN_WRGSR: 11804 case SPARC_BUILTIN_BMASK: 11805 case SPARC_BUILTIN_CMASK8: 11806 case SPARC_BUILTIN_CMASK16: 11807 case SPARC_BUILTIN_CMASK32: 11808 break; 11809 11810 default: 11811 return build_zero_cst (rtype); 11812 } 11813 11814 switch (code) 11815 { 11816 case SPARC_BUILTIN_FEXPAND: 11817 arg0 = args[0]; 11818 STRIP_NOPS (arg0); 11819 11820 if (TREE_CODE (arg0) == VECTOR_CST) 11821 { 11822 tree inner_type = TREE_TYPE (rtype); 11823 unsigned i; 11824 11825 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 11826 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11827 { 11828 unsigned HOST_WIDE_INT val 11829 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i)); 11830 n_elts.quick_push (build_int_cst (inner_type, val << 4)); 11831 } 11832 return n_elts.build (); 11833 } 11834 break; 11835 11836 case SPARC_BUILTIN_FMUL8X16: 11837 case SPARC_BUILTIN_FMUL8X16AU: 11838 case SPARC_BUILTIN_FMUL8X16AL: 11839 arg0 = args[0]; 11840 arg1 = args[1]; 11841 STRIP_NOPS (arg0); 11842 STRIP_NOPS (arg1); 11843 11844 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 11845 { 11846 tree inner_type = TREE_TYPE (rtype); 11847 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1); 11848 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1); 11849 return n_elts.build (); 11850 } 11851 break; 11852 11853 case SPARC_BUILTIN_FPMERGE: 11854 arg0 = args[0]; 11855 arg1 = args[1]; 11856 STRIP_NOPS (arg0); 11857 STRIP_NOPS (arg1); 11858 11859 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST) 11860 { 11861 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1); 11862 unsigned i; 11863 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11864 { 11865 n_elts.quick_push (VECTOR_CST_ELT (arg0, i)); 11866 n_elts.quick_push (VECTOR_CST_ELT (arg1, i)); 11867 } 11868 11869 return n_elts.build (); 11870 } 11871 break; 11872 11873 case SPARC_BUILTIN_PDIST: 11874 case SPARC_BUILTIN_PDISTN: 11875 arg0 = args[0]; 11876 arg1 = args[1]; 11877 STRIP_NOPS (arg0); 11878 STRIP_NOPS (arg1); 11879 if (code == SPARC_BUILTIN_PDIST) 11880 { 11881 arg2 = args[2]; 11882 STRIP_NOPS (arg2); 11883 } 11884 else 11885 arg2 = integer_zero_node; 11886 11887 if (TREE_CODE (arg0) == VECTOR_CST 11888 && TREE_CODE (arg1) == VECTOR_CST 11889 && TREE_CODE (arg2) == INTEGER_CST) 11890 { 11891 bool overflow = false; 11892 widest_int result = wi::to_widest (arg2); 11893 widest_int tmp; 11894 unsigned i; 11895 11896 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i) 11897 { 11898 tree e0 = VECTOR_CST_ELT (arg0, i); 11899 tree e1 = VECTOR_CST_ELT (arg1, i); 11900 11901 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf; 11902 11903 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf); 11904 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf); 11905 if (wi::neg_p (tmp)) 11906 tmp = wi::neg (tmp, &neg2_ovf); 11907 else 11908 neg2_ovf = false; 11909 result = wi::add (result, tmp, SIGNED, &add2_ovf); 11910 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf; 11911 } 11912 11913 gcc_assert (!overflow); 11914 11915 return wide_int_to_tree (rtype, result); 11916 } 11917 11918 default: 11919 break; 11920 } 11921 11922 return NULL_TREE; 11923 } 11924 11925 /* ??? This duplicates information provided to the compiler by the 11926 ??? scheduler description. Some day, teach genautomata to output 11927 ??? the latencies and then CSE will just use that. */ 11928 11929 static bool 11930 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code, 11931 int opno ATTRIBUTE_UNUSED, 11932 int *total, bool speed ATTRIBUTE_UNUSED) 11933 { 11934 int code = GET_CODE (x); 11935 bool float_mode_p = FLOAT_MODE_P (mode); 11936 11937 switch (code) 11938 { 11939 case CONST_INT: 11940 if (SMALL_INT (x)) 11941 *total = 0; 11942 else 11943 *total = 2; 11944 return true; 11945 11946 case CONST_WIDE_INT: 11947 *total = 0; 11948 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0))) 11949 *total += 2; 11950 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1))) 11951 *total += 2; 11952 return true; 11953 11954 case HIGH: 11955 *total = 2; 11956 return true; 11957 11958 case CONST: 11959 case LABEL_REF: 11960 case SYMBOL_REF: 11961 *total = 4; 11962 return true; 11963 11964 case CONST_DOUBLE: 11965 *total = 8; 11966 return true; 11967 11968 case MEM: 11969 /* If outer-code was a sign or zero extension, a cost 11970 of COSTS_N_INSNS (1) was already added in. This is 11971 why we are subtracting it back out. */ 11972 if (outer_code == ZERO_EXTEND) 11973 { 11974 *total = sparc_costs->int_zload - COSTS_N_INSNS (1); 11975 } 11976 else if (outer_code == SIGN_EXTEND) 11977 { 11978 *total = sparc_costs->int_sload - COSTS_N_INSNS (1); 11979 } 11980 else if (float_mode_p) 11981 { 11982 *total = sparc_costs->float_load; 11983 } 11984 else 11985 { 11986 *total = sparc_costs->int_load; 11987 } 11988 11989 return true; 11990 11991 case PLUS: 11992 case MINUS: 11993 if (float_mode_p) 11994 *total = sparc_costs->float_plusminus; 11995 else 11996 *total = COSTS_N_INSNS (1); 11997 return false; 11998 11999 case FMA: 12000 { 12001 rtx sub; 12002 12003 gcc_assert (float_mode_p); 12004 *total = sparc_costs->float_mul; 12005 12006 sub = XEXP (x, 0); 12007 if (GET_CODE (sub) == NEG) 12008 sub = XEXP (sub, 0); 12009 *total += rtx_cost (sub, mode, FMA, 0, speed); 12010 12011 sub = XEXP (x, 2); 12012 if (GET_CODE (sub) == NEG) 12013 sub = XEXP (sub, 0); 12014 *total += rtx_cost (sub, mode, FMA, 2, speed); 12015 return true; 12016 } 12017 12018 case MULT: 12019 if (float_mode_p) 12020 *total = sparc_costs->float_mul; 12021 else if (TARGET_ARCH32 && !TARGET_HARD_MUL) 12022 *total = COSTS_N_INSNS (25); 12023 else 12024 { 12025 int bit_cost; 12026 12027 bit_cost = 0; 12028 if (sparc_costs->int_mul_bit_factor) 12029 { 12030 int nbits; 12031 12032 if (GET_CODE (XEXP (x, 1)) == CONST_INT) 12033 { 12034 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 12035 for (nbits = 0; value != 0; value &= value - 1) 12036 nbits++; 12037 } 12038 else 12039 nbits = 7; 12040 12041 if (nbits < 3) 12042 nbits = 3; 12043 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor; 12044 bit_cost = COSTS_N_INSNS (bit_cost); 12045 } 12046 12047 if (mode == DImode || !TARGET_HARD_MUL) 12048 *total = sparc_costs->int_mulX + bit_cost; 12049 else 12050 *total = sparc_costs->int_mul + bit_cost; 12051 } 12052 return false; 12053 12054 case ASHIFT: 12055 case ASHIFTRT: 12056 case LSHIFTRT: 12057 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty; 12058 return false; 12059 12060 case DIV: 12061 case UDIV: 12062 case MOD: 12063 case UMOD: 12064 if (float_mode_p) 12065 { 12066 if (mode == DFmode) 12067 *total = sparc_costs->float_div_df; 12068 else 12069 *total = sparc_costs->float_div_sf; 12070 } 12071 else 12072 { 12073 if (mode == DImode) 12074 *total = sparc_costs->int_divX; 12075 else 12076 *total = sparc_costs->int_div; 12077 } 12078 return false; 12079 12080 case NEG: 12081 if (! float_mode_p) 12082 { 12083 *total = COSTS_N_INSNS (1); 12084 return false; 12085 } 12086 /* FALLTHRU */ 12087 12088 case ABS: 12089 case FLOAT: 12090 case UNSIGNED_FLOAT: 12091 case FIX: 12092 case UNSIGNED_FIX: 12093 case FLOAT_EXTEND: 12094 case FLOAT_TRUNCATE: 12095 *total = sparc_costs->float_move; 12096 return false; 12097 12098 case SQRT: 12099 if (mode == DFmode) 12100 *total = sparc_costs->float_sqrt_df; 12101 else 12102 *total = sparc_costs->float_sqrt_sf; 12103 return false; 12104 12105 case COMPARE: 12106 if (float_mode_p) 12107 *total = sparc_costs->float_cmp; 12108 else 12109 *total = COSTS_N_INSNS (1); 12110 return false; 12111 12112 case IF_THEN_ELSE: 12113 if (float_mode_p) 12114 *total = sparc_costs->float_cmove; 12115 else 12116 *total = sparc_costs->int_cmove; 12117 return false; 12118 12119 case IOR: 12120 /* Handle the NAND vector patterns. */ 12121 if (sparc_vector_mode_supported_p (mode) 12122 && GET_CODE (XEXP (x, 0)) == NOT 12123 && GET_CODE (XEXP (x, 1)) == NOT) 12124 { 12125 *total = COSTS_N_INSNS (1); 12126 return true; 12127 } 12128 else 12129 return false; 12130 12131 default: 12132 return false; 12133 } 12134 } 12135 12136 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */ 12137 12138 static inline bool 12139 general_or_i64_p (reg_class_t rclass) 12140 { 12141 return (rclass == GENERAL_REGS || rclass == I64_REGS); 12142 } 12143 12144 /* Implement TARGET_REGISTER_MOVE_COST. */ 12145 12146 static int 12147 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 12148 reg_class_t from, reg_class_t to) 12149 { 12150 bool need_memory = false; 12151 12152 /* This helps postreload CSE to eliminate redundant comparisons. */ 12153 if (from == NO_REGS || to == NO_REGS) 12154 return 100; 12155 12156 if (from == FPCC_REGS || to == FPCC_REGS) 12157 need_memory = true; 12158 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to)) 12159 || (general_or_i64_p (from) && FP_REG_CLASS_P (to))) 12160 { 12161 if (TARGET_VIS3) 12162 { 12163 int size = GET_MODE_SIZE (mode); 12164 if (size == 8 || size == 4) 12165 { 12166 if (! TARGET_ARCH32 || size == 4) 12167 return 4; 12168 else 12169 return 6; 12170 } 12171 } 12172 need_memory = true; 12173 } 12174 12175 if (need_memory) 12176 { 12177 if (sparc_cpu == PROCESSOR_ULTRASPARC 12178 || sparc_cpu == PROCESSOR_ULTRASPARC3 12179 || sparc_cpu == PROCESSOR_NIAGARA 12180 || sparc_cpu == PROCESSOR_NIAGARA2 12181 || sparc_cpu == PROCESSOR_NIAGARA3 12182 || sparc_cpu == PROCESSOR_NIAGARA4 12183 || sparc_cpu == PROCESSOR_NIAGARA7 12184 || sparc_cpu == PROCESSOR_M8) 12185 return 12; 12186 12187 return 6; 12188 } 12189 12190 return 2; 12191 } 12192 12193 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2. 12194 This is achieved by means of a manual dynamic stack space allocation in 12195 the current frame. We make the assumption that SEQ doesn't contain any 12196 function calls, with the possible exception of calls to the GOT helper. */ 12197 12198 static void 12199 emit_and_preserve (rtx seq, rtx reg, rtx reg2) 12200 { 12201 /* We must preserve the lowest 16 words for the register save area. */ 12202 HOST_WIDE_INT offset = 16*UNITS_PER_WORD; 12203 /* We really need only 2 words of fresh stack space. */ 12204 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD); 12205 12206 rtx slot 12207 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx, 12208 SPARC_STACK_BIAS + offset)); 12209 12210 emit_insn (gen_stack_pointer_inc (GEN_INT (-size))); 12211 emit_insn (gen_rtx_SET (slot, reg)); 12212 if (reg2) 12213 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD), 12214 reg2)); 12215 emit_insn (seq); 12216 if (reg2) 12217 emit_insn (gen_rtx_SET (reg2, 12218 adjust_address (slot, word_mode, UNITS_PER_WORD))); 12219 emit_insn (gen_rtx_SET (reg, slot)); 12220 emit_insn (gen_stack_pointer_inc (GEN_INT (size))); 12221 } 12222 12223 /* Output the assembler code for a thunk function. THUNK_DECL is the 12224 declaration for the thunk function itself, FUNCTION is the decl for 12225 the target function. DELTA is an immediate constant offset to be 12226 added to THIS. If VCALL_OFFSET is nonzero, the word at address 12227 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */ 12228 12229 static void 12230 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, 12231 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, 12232 tree function) 12233 { 12234 rtx this_rtx, funexp; 12235 rtx_insn *insn; 12236 unsigned int int_arg_first; 12237 12238 reload_completed = 1; 12239 epilogue_completed = 1; 12240 12241 emit_note (NOTE_INSN_PROLOGUE_END); 12242 12243 if (TARGET_FLAT) 12244 { 12245 sparc_leaf_function_p = 1; 12246 12247 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12248 } 12249 else if (flag_delayed_branch) 12250 { 12251 /* We will emit a regular sibcall below, so we need to instruct 12252 output_sibcall that we are in a leaf function. */ 12253 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1; 12254 12255 /* This will cause final.c to invoke leaf_renumber_regs so we 12256 must behave as if we were in a not-yet-leafified function. */ 12257 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST; 12258 } 12259 else 12260 { 12261 /* We will emit the sibcall manually below, so we will need to 12262 manually spill non-leaf registers. */ 12263 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0; 12264 12265 /* We really are in a leaf function. */ 12266 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST; 12267 } 12268 12269 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function 12270 returns a structure, the structure return pointer is there instead. */ 12271 if (TARGET_ARCH64 12272 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) 12273 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1); 12274 else 12275 this_rtx = gen_rtx_REG (Pmode, int_arg_first); 12276 12277 /* Add DELTA. When possible use a plain add, otherwise load it into 12278 a register first. */ 12279 if (delta) 12280 { 12281 rtx delta_rtx = GEN_INT (delta); 12282 12283 if (! SPARC_SIMM13_P (delta)) 12284 { 12285 rtx scratch = gen_rtx_REG (Pmode, 1); 12286 emit_move_insn (scratch, delta_rtx); 12287 delta_rtx = scratch; 12288 } 12289 12290 /* THIS_RTX += DELTA. */ 12291 emit_insn (gen_add2_insn (this_rtx, delta_rtx)); 12292 } 12293 12294 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */ 12295 if (vcall_offset) 12296 { 12297 rtx vcall_offset_rtx = GEN_INT (vcall_offset); 12298 rtx scratch = gen_rtx_REG (Pmode, 1); 12299 12300 gcc_assert (vcall_offset < 0); 12301 12302 /* SCRATCH = *THIS_RTX. */ 12303 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx)); 12304 12305 /* Prepare for adding VCALL_OFFSET. The difficulty is that we 12306 may not have any available scratch register at this point. */ 12307 if (SPARC_SIMM13_P (vcall_offset)) 12308 ; 12309 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */ 12310 else if (! fixed_regs[5] 12311 /* The below sequence is made up of at least 2 insns, 12312 while the default method may need only one. */ 12313 && vcall_offset < -8192) 12314 { 12315 rtx scratch2 = gen_rtx_REG (Pmode, 5); 12316 emit_move_insn (scratch2, vcall_offset_rtx); 12317 vcall_offset_rtx = scratch2; 12318 } 12319 else 12320 { 12321 rtx increment = GEN_INT (-4096); 12322 12323 /* VCALL_OFFSET is a negative number whose typical range can be 12324 estimated as -32768..0 in 32-bit mode. In almost all cases 12325 it is therefore cheaper to emit multiple add insns than 12326 spilling and loading the constant into a register (at least 12327 6 insns). */ 12328 while (! SPARC_SIMM13_P (vcall_offset)) 12329 { 12330 emit_insn (gen_add2_insn (scratch, increment)); 12331 vcall_offset += 4096; 12332 } 12333 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */ 12334 } 12335 12336 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */ 12337 emit_move_insn (scratch, gen_rtx_MEM (Pmode, 12338 gen_rtx_PLUS (Pmode, 12339 scratch, 12340 vcall_offset_rtx))); 12341 12342 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */ 12343 emit_insn (gen_add2_insn (this_rtx, scratch)); 12344 } 12345 12346 /* Generate a tail call to the target function. */ 12347 if (! TREE_USED (function)) 12348 { 12349 assemble_external (function); 12350 TREE_USED (function) = 1; 12351 } 12352 funexp = XEXP (DECL_RTL (function), 0); 12353 12354 if (flag_delayed_branch) 12355 { 12356 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); 12357 insn = emit_call_insn (gen_sibcall (funexp)); 12358 SIBLING_CALL_P (insn) = 1; 12359 } 12360 else 12361 { 12362 /* The hoops we have to jump through in order to generate a sibcall 12363 without using delay slots... */ 12364 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1); 12365 12366 if (flag_pic) 12367 { 12368 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */ 12369 start_sequence (); 12370 load_got_register (); /* clobbers %o7 */ 12371 if (!TARGET_VXWORKS_RTP) 12372 pic_offset_table_rtx = got_register_rtx; 12373 scratch = sparc_legitimize_pic_address (funexp, scratch); 12374 seq = get_insns (); 12375 end_sequence (); 12376 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx); 12377 } 12378 else if (TARGET_ARCH32) 12379 { 12380 emit_insn (gen_rtx_SET (scratch, 12381 gen_rtx_HIGH (SImode, funexp))); 12382 emit_insn (gen_rtx_SET (scratch, 12383 gen_rtx_LO_SUM (SImode, scratch, funexp))); 12384 } 12385 else /* TARGET_ARCH64 */ 12386 { 12387 switch (sparc_cmodel) 12388 { 12389 case CM_MEDLOW: 12390 case CM_MEDMID: 12391 /* The destination can serve as a temporary. */ 12392 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch); 12393 break; 12394 12395 case CM_MEDANY: 12396 case CM_EMBMEDANY: 12397 /* The destination cannot serve as a temporary. */ 12398 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */ 12399 start_sequence (); 12400 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg); 12401 seq = get_insns (); 12402 end_sequence (); 12403 emit_and_preserve (seq, spill_reg, 0); 12404 break; 12405 12406 default: 12407 gcc_unreachable (); 12408 } 12409 } 12410 12411 emit_jump_insn (gen_indirect_jump (scratch)); 12412 } 12413 12414 emit_barrier (); 12415 12416 /* Run just enough of rest_of_compilation to get the insns emitted. 12417 There's not really enough bulk here to make other passes such as 12418 instruction scheduling worth while. Note that use_thunk calls 12419 assemble_start_function and assemble_end_function. */ 12420 insn = get_insns (); 12421 shorten_branches (insn); 12422 final_start_function (insn, file, 1); 12423 final (insn, file, 1); 12424 final_end_function (); 12425 12426 reload_completed = 0; 12427 epilogue_completed = 0; 12428 } 12429 12430 /* Return true if sparc_output_mi_thunk would be able to output the 12431 assembler code for the thunk function specified by the arguments 12432 it is passed, and false otherwise. */ 12433 static bool 12434 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED, 12435 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 12436 HOST_WIDE_INT vcall_offset, 12437 const_tree function ATTRIBUTE_UNUSED) 12438 { 12439 /* Bound the loop used in the default method above. */ 12440 return (vcall_offset >= -32768 || ! fixed_regs[5]); 12441 } 12442 12443 /* How to allocate a 'struct machine_function'. */ 12444 12445 static struct machine_function * 12446 sparc_init_machine_status (void) 12447 { 12448 return ggc_cleared_alloc<machine_function> (); 12449 } 12450 12451 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 12452 We need to emit DTP-relative relocations. */ 12453 12454 static void 12455 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x) 12456 { 12457 switch (size) 12458 { 12459 case 4: 12460 fputs ("\t.word\t%r_tls_dtpoff32(", file); 12461 break; 12462 case 8: 12463 fputs ("\t.xword\t%r_tls_dtpoff64(", file); 12464 break; 12465 default: 12466 gcc_unreachable (); 12467 } 12468 output_addr_const (file, x); 12469 fputs (")", file); 12470 } 12471 12472 /* Do whatever processing is required at the end of a file. */ 12473 12474 static void 12475 sparc_file_end (void) 12476 { 12477 /* If we need to emit the special GOT helper function, do so now. */ 12478 if (got_helper_needed) 12479 { 12480 const char *name = XSTR (got_helper_rtx, 0); 12481 #ifdef DWARF2_UNWIND_INFO 12482 bool do_cfi; 12483 #endif 12484 12485 if (USE_HIDDEN_LINKONCE) 12486 { 12487 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, 12488 get_identifier (name), 12489 build_function_type_list (void_type_node, 12490 NULL_TREE)); 12491 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, 12492 NULL_TREE, void_type_node); 12493 TREE_PUBLIC (decl) = 1; 12494 TREE_STATIC (decl) = 1; 12495 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl)); 12496 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; 12497 DECL_VISIBILITY_SPECIFIED (decl) = 1; 12498 resolve_unique_section (decl, 0, flag_function_sections); 12499 allocate_struct_function (decl, true); 12500 cfun->is_thunk = 1; 12501 current_function_decl = decl; 12502 init_varasm_status (); 12503 assemble_start_function (decl, name); 12504 } 12505 else 12506 { 12507 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT); 12508 switch_to_section (text_section); 12509 if (align > 0) 12510 ASM_OUTPUT_ALIGN (asm_out_file, align); 12511 ASM_OUTPUT_LABEL (asm_out_file, name); 12512 } 12513 12514 #ifdef DWARF2_UNWIND_INFO 12515 do_cfi = dwarf2out_do_cfi_asm (); 12516 if (do_cfi) 12517 output_asm_insn (".cfi_startproc", NULL); 12518 #endif 12519 if (flag_delayed_branch) 12520 { 12521 output_asm_insn ("jmp\t%%o7+8", NULL); 12522 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx); 12523 } 12524 else 12525 { 12526 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx); 12527 output_asm_insn ("jmp\t%%o7+8", NULL); 12528 output_asm_insn (" nop", NULL); 12529 } 12530 #ifdef DWARF2_UNWIND_INFO 12531 if (do_cfi) 12532 output_asm_insn (".cfi_endproc", NULL); 12533 #endif 12534 } 12535 12536 if (NEED_INDICATE_EXEC_STACK) 12537 file_end_indicate_exec_stack (); 12538 12539 #ifdef TARGET_SOLARIS 12540 solaris_file_end (); 12541 #endif 12542 } 12543 12544 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING 12545 /* Implement TARGET_MANGLE_TYPE. */ 12546 12547 static const char * 12548 sparc_mangle_type (const_tree type) 12549 { 12550 if (TARGET_ARCH32 12551 && TYPE_MAIN_VARIANT (type) == long_double_type_node 12552 && TARGET_LONG_DOUBLE_128) 12553 return "g"; 12554 12555 /* For all other types, use normal C++ mangling. */ 12556 return NULL; 12557 } 12558 #endif 12559 12560 /* Expand a membar instruction for various use cases. Both the LOAD_STORE 12561 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where 12562 bit 0 indicates that X is true, and bit 1 indicates Y is true. */ 12563 12564 void 12565 sparc_emit_membar_for_model (enum memmodel model, 12566 int load_store, int before_after) 12567 { 12568 /* Bits for the MEMBAR mmask field. */ 12569 const int LoadLoad = 1; 12570 const int StoreLoad = 2; 12571 const int LoadStore = 4; 12572 const int StoreStore = 8; 12573 12574 int mm = 0, implied = 0; 12575 12576 switch (sparc_memory_model) 12577 { 12578 case SMM_SC: 12579 /* Sequential Consistency. All memory transactions are immediately 12580 visible in sequential execution order. No barriers needed. */ 12581 implied = LoadLoad | StoreLoad | LoadStore | StoreStore; 12582 break; 12583 12584 case SMM_TSO: 12585 /* Total Store Ordering: all memory transactions with store semantics 12586 are followed by an implied StoreStore. */ 12587 implied |= StoreStore; 12588 12589 /* If we're not looking for a raw barrer (before+after), then atomic 12590 operations get the benefit of being both load and store. */ 12591 if (load_store == 3 && before_after == 1) 12592 implied |= StoreLoad; 12593 /* FALLTHRU */ 12594 12595 case SMM_PSO: 12596 /* Partial Store Ordering: all memory transactions with load semantics 12597 are followed by an implied LoadLoad | LoadStore. */ 12598 implied |= LoadLoad | LoadStore; 12599 12600 /* If we're not looking for a raw barrer (before+after), then atomic 12601 operations get the benefit of being both load and store. */ 12602 if (load_store == 3 && before_after == 2) 12603 implied |= StoreLoad | StoreStore; 12604 /* FALLTHRU */ 12605 12606 case SMM_RMO: 12607 /* Relaxed Memory Ordering: no implicit bits. */ 12608 break; 12609 12610 default: 12611 gcc_unreachable (); 12612 } 12613 12614 if (before_after & 1) 12615 { 12616 if (is_mm_release (model) || is_mm_acq_rel (model) 12617 || is_mm_seq_cst (model)) 12618 { 12619 if (load_store & 1) 12620 mm |= LoadLoad | StoreLoad; 12621 if (load_store & 2) 12622 mm |= LoadStore | StoreStore; 12623 } 12624 } 12625 if (before_after & 2) 12626 { 12627 if (is_mm_acquire (model) || is_mm_acq_rel (model) 12628 || is_mm_seq_cst (model)) 12629 { 12630 if (load_store & 1) 12631 mm |= LoadLoad | LoadStore; 12632 if (load_store & 2) 12633 mm |= StoreLoad | StoreStore; 12634 } 12635 } 12636 12637 /* Remove the bits implied by the system memory model. */ 12638 mm &= ~implied; 12639 12640 /* For raw barriers (before+after), always emit a barrier. 12641 This will become a compile-time barrier if needed. */ 12642 if (mm || before_after == 3) 12643 emit_insn (gen_membar (GEN_INT (mm))); 12644 } 12645 12646 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit 12647 compare and swap on the word containing the byte or half-word. */ 12648 12649 static void 12650 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem, 12651 rtx oldval, rtx newval) 12652 { 12653 rtx addr1 = force_reg (Pmode, XEXP (mem, 0)); 12654 rtx addr = gen_reg_rtx (Pmode); 12655 rtx off = gen_reg_rtx (SImode); 12656 rtx oldv = gen_reg_rtx (SImode); 12657 rtx newv = gen_reg_rtx (SImode); 12658 rtx oldvalue = gen_reg_rtx (SImode); 12659 rtx newvalue = gen_reg_rtx (SImode); 12660 rtx res = gen_reg_rtx (SImode); 12661 rtx resv = gen_reg_rtx (SImode); 12662 rtx memsi, val, mask, cc; 12663 12664 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4)))); 12665 12666 if (Pmode != SImode) 12667 addr1 = gen_lowpart (SImode, addr1); 12668 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3)))); 12669 12670 memsi = gen_rtx_MEM (SImode, addr); 12671 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER); 12672 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem); 12673 12674 val = copy_to_reg (memsi); 12675 12676 emit_insn (gen_rtx_SET (off, 12677 gen_rtx_XOR (SImode, off, 12678 GEN_INT (GET_MODE (mem) == QImode 12679 ? 3 : 2)))); 12680 12681 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3)))); 12682 12683 if (GET_MODE (mem) == QImode) 12684 mask = force_reg (SImode, GEN_INT (0xff)); 12685 else 12686 mask = force_reg (SImode, GEN_INT (0xffff)); 12687 12688 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off))); 12689 12690 emit_insn (gen_rtx_SET (val, 12691 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12692 val))); 12693 12694 oldval = gen_lowpart (SImode, oldval); 12695 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off))); 12696 12697 newval = gen_lowpart_common (SImode, newval); 12698 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off))); 12699 12700 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask))); 12701 12702 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask))); 12703 12704 rtx_code_label *end_label = gen_label_rtx (); 12705 rtx_code_label *loop_label = gen_label_rtx (); 12706 emit_label (loop_label); 12707 12708 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val))); 12709 12710 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val))); 12711 12712 emit_move_insn (bool_result, const1_rtx); 12713 12714 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue)); 12715 12716 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label); 12717 12718 emit_insn (gen_rtx_SET (resv, 12719 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask), 12720 res))); 12721 12722 emit_move_insn (bool_result, const0_rtx); 12723 12724 cc = gen_compare_reg_1 (NE, resv, val); 12725 emit_insn (gen_rtx_SET (val, resv)); 12726 12727 /* Use cbranchcc4 to separate the compare and branch! */ 12728 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx), 12729 cc, const0_rtx, loop_label)); 12730 12731 emit_label (end_label); 12732 12733 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask))); 12734 12735 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off))); 12736 12737 emit_move_insn (result, gen_lowpart (GET_MODE (result), res)); 12738 } 12739 12740 /* Expand code to perform a compare-and-swap. */ 12741 12742 void 12743 sparc_expand_compare_and_swap (rtx operands[]) 12744 { 12745 rtx bval, retval, mem, oldval, newval; 12746 machine_mode mode; 12747 enum memmodel model; 12748 12749 bval = operands[0]; 12750 retval = operands[1]; 12751 mem = operands[2]; 12752 oldval = operands[3]; 12753 newval = operands[4]; 12754 model = (enum memmodel) INTVAL (operands[6]); 12755 mode = GET_MODE (mem); 12756 12757 sparc_emit_membar_for_model (model, 3, 1); 12758 12759 if (reg_overlap_mentioned_p (retval, oldval)) 12760 oldval = copy_to_reg (oldval); 12761 12762 if (mode == QImode || mode == HImode) 12763 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval); 12764 else 12765 { 12766 rtx (*gen) (rtx, rtx, rtx, rtx); 12767 rtx x; 12768 12769 if (mode == SImode) 12770 gen = gen_atomic_compare_and_swapsi_1; 12771 else 12772 gen = gen_atomic_compare_and_swapdi_1; 12773 emit_insn (gen (retval, mem, oldval, newval)); 12774 12775 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1); 12776 if (x != bval) 12777 convert_move (bval, x, 1); 12778 } 12779 12780 sparc_emit_membar_for_model (model, 3, 2); 12781 } 12782 12783 void 12784 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel) 12785 { 12786 rtx t_1, t_2, t_3; 12787 12788 sel = gen_lowpart (DImode, sel); 12789 switch (vmode) 12790 { 12791 case E_V2SImode: 12792 /* inp = xxxxxxxAxxxxxxxB */ 12793 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12794 NULL_RTX, 1, OPTAB_DIRECT); 12795 /* t_1 = ....xxxxxxxAxxx. */ 12796 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 12797 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT); 12798 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 12799 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT); 12800 /* sel = .......B */ 12801 /* t_1 = ...A.... */ 12802 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 12803 /* sel = ...A...B */ 12804 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1); 12805 /* sel = AAAABBBB * 4 */ 12806 t_1 = force_reg (SImode, GEN_INT (0x01230123)); 12807 /* sel = { A*4, A*4+1, A*4+2, ... } */ 12808 break; 12809 12810 case E_V4HImode: 12811 /* inp = xxxAxxxBxxxCxxxD */ 12812 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 12813 NULL_RTX, 1, OPTAB_DIRECT); 12814 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12815 NULL_RTX, 1, OPTAB_DIRECT); 12816 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24), 12817 NULL_RTX, 1, OPTAB_DIRECT); 12818 /* t_1 = ..xxxAxxxBxxxCxx */ 12819 /* t_2 = ....xxxAxxxBxxxC */ 12820 /* t_3 = ......xxxAxxxBxx */ 12821 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel), 12822 GEN_INT (0x07), 12823 NULL_RTX, 1, OPTAB_DIRECT); 12824 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1), 12825 GEN_INT (0x0700), 12826 NULL_RTX, 1, OPTAB_DIRECT); 12827 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2), 12828 GEN_INT (0x070000), 12829 NULL_RTX, 1, OPTAB_DIRECT); 12830 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3), 12831 GEN_INT (0x07000000), 12832 NULL_RTX, 1, OPTAB_DIRECT); 12833 /* sel = .......D */ 12834 /* t_1 = .....C.. */ 12835 /* t_2 = ...B.... */ 12836 /* t_3 = .A...... */ 12837 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT); 12838 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT); 12839 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT); 12840 /* sel = .A.B.C.D */ 12841 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1); 12842 /* sel = AABBCCDD * 2 */ 12843 t_1 = force_reg (SImode, GEN_INT (0x01010101)); 12844 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */ 12845 break; 12846 12847 case E_V8QImode: 12848 /* input = xAxBxCxDxExFxGxH */ 12849 sel = expand_simple_binop (DImode, AND, sel, 12850 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32 12851 | 0x0f0f0f0f), 12852 NULL_RTX, 1, OPTAB_DIRECT); 12853 /* sel = .A.B.C.D.E.F.G.H */ 12854 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4), 12855 NULL_RTX, 1, OPTAB_DIRECT); 12856 /* t_1 = ..A.B.C.D.E.F.G. */ 12857 sel = expand_simple_binop (DImode, IOR, sel, t_1, 12858 NULL_RTX, 1, OPTAB_DIRECT); 12859 /* sel = .AABBCCDDEEFFGGH */ 12860 sel = expand_simple_binop (DImode, AND, sel, 12861 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32 12862 | 0xff00ff), 12863 NULL_RTX, 1, OPTAB_DIRECT); 12864 /* sel = ..AB..CD..EF..GH */ 12865 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8), 12866 NULL_RTX, 1, OPTAB_DIRECT); 12867 /* t_1 = ....AB..CD..EF.. */ 12868 sel = expand_simple_binop (DImode, IOR, sel, t_1, 12869 NULL_RTX, 1, OPTAB_DIRECT); 12870 /* sel = ..ABABCDCDEFEFGH */ 12871 sel = expand_simple_binop (DImode, AND, sel, 12872 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff), 12873 NULL_RTX, 1, OPTAB_DIRECT); 12874 /* sel = ....ABCD....EFGH */ 12875 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16), 12876 NULL_RTX, 1, OPTAB_DIRECT); 12877 /* t_1 = ........ABCD.... */ 12878 sel = gen_lowpart (SImode, sel); 12879 t_1 = gen_lowpart (SImode, t_1); 12880 break; 12881 12882 default: 12883 gcc_unreachable (); 12884 } 12885 12886 /* Always perform the final addition/merge within the bmask insn. */ 12887 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1)); 12888 } 12889 12890 /* Implement TARGET_VEC_PERM_CONST. */ 12891 12892 static bool 12893 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, 12894 rtx op1, const vec_perm_indices &sel) 12895 { 12896 if (!TARGET_VIS2) 12897 return false; 12898 12899 /* All permutes are supported. */ 12900 if (!target) 12901 return true; 12902 12903 /* Force target-independent code to convert constant permutations on other 12904 modes down to V8QI. Rely on this to avoid the complexity of the byte 12905 order of the permutation. */ 12906 if (vmode != V8QImode) 12907 return false; 12908 12909 unsigned int i, mask; 12910 for (i = mask = 0; i < 8; ++i) 12911 mask |= (sel[i] & 0xf) << (28 - i*4); 12912 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode)); 12913 12914 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx)); 12915 emit_insn (gen_bshufflev8qi_vis (target, op0, op1)); 12916 return true; 12917 } 12918 12919 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 12920 12921 static bool 12922 sparc_frame_pointer_required (void) 12923 { 12924 /* If the stack pointer is dynamically modified in the function, it cannot 12925 serve as the frame pointer. */ 12926 if (cfun->calls_alloca) 12927 return true; 12928 12929 /* If the function receives nonlocal gotos, it needs to save the frame 12930 pointer in the nonlocal_goto_save_area object. */ 12931 if (cfun->has_nonlocal_label) 12932 return true; 12933 12934 /* In flat mode, that's it. */ 12935 if (TARGET_FLAT) 12936 return false; 12937 12938 /* Otherwise, the frame pointer is required if the function isn't leaf, but 12939 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */ 12940 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ()); 12941 } 12942 12943 /* The way this is structured, we can't eliminate SFP in favor of SP 12944 if the frame pointer is required: we want to use the SFP->HFP elimination 12945 in that case. But the test in update_eliminables doesn't know we are 12946 assuming below that we only do the former elimination. */ 12947 12948 static bool 12949 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to) 12950 { 12951 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required (); 12952 } 12953 12954 /* Return the hard frame pointer directly to bypass the stack bias. */ 12955 12956 static rtx 12957 sparc_builtin_setjmp_frame_value (void) 12958 { 12959 return hard_frame_pointer_rtx; 12960 } 12961 12962 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that 12963 they won't be allocated. */ 12964 12965 static void 12966 sparc_conditional_register_usage (void) 12967 { 12968 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) 12969 { 12970 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12971 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 12972 } 12973 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */ 12974 /* then honor it. */ 12975 if (TARGET_ARCH32 && fixed_regs[5]) 12976 fixed_regs[5] = 1; 12977 else if (TARGET_ARCH64 && fixed_regs[5] == 2) 12978 fixed_regs[5] = 0; 12979 if (! TARGET_V9) 12980 { 12981 int regno; 12982 for (regno = SPARC_FIRST_V9_FP_REG; 12983 regno <= SPARC_LAST_V9_FP_REG; 12984 regno++) 12985 fixed_regs[regno] = 1; 12986 /* %fcc0 is used by v8 and v9. */ 12987 for (regno = SPARC_FIRST_V9_FCC_REG + 1; 12988 regno <= SPARC_LAST_V9_FCC_REG; 12989 regno++) 12990 fixed_regs[regno] = 1; 12991 } 12992 if (! TARGET_FPU) 12993 { 12994 int regno; 12995 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++) 12996 fixed_regs[regno] = 1; 12997 } 12998 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */ 12999 /* then honor it. Likewise with g3 and g4. */ 13000 if (fixed_regs[2] == 2) 13001 fixed_regs[2] = ! TARGET_APP_REGS; 13002 if (fixed_regs[3] == 2) 13003 fixed_regs[3] = ! TARGET_APP_REGS; 13004 if (TARGET_ARCH32 && fixed_regs[4] == 2) 13005 fixed_regs[4] = ! TARGET_APP_REGS; 13006 else if (TARGET_CM_EMBMEDANY) 13007 fixed_regs[4] = 1; 13008 else if (fixed_regs[4] == 2) 13009 fixed_regs[4] = 0; 13010 if (TARGET_FLAT) 13011 { 13012 int regno; 13013 /* Disable leaf functions. */ 13014 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER); 13015 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 13016 leaf_reg_remap [regno] = regno; 13017 } 13018 if (TARGET_VIS) 13019 global_regs[SPARC_GSR_REG] = 1; 13020 } 13021 13022 /* Implement TARGET_USE_PSEUDO_PIC_REG. */ 13023 13024 static bool 13025 sparc_use_pseudo_pic_reg (void) 13026 { 13027 return !TARGET_VXWORKS_RTP && flag_pic; 13028 } 13029 13030 /* Implement TARGET_INIT_PIC_REG. */ 13031 13032 static void 13033 sparc_init_pic_reg (void) 13034 { 13035 edge entry_edge; 13036 rtx_insn *seq; 13037 13038 /* In PIC mode, we need to always initialize the PIC register if optimization 13039 is enabled, because we are called from IRA and LRA may later force things 13040 to the constant pool for optimization purposes. */ 13041 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize)) 13042 return; 13043 13044 start_sequence (); 13045 load_got_register (); 13046 if (!TARGET_VXWORKS_RTP) 13047 emit_move_insn (pic_offset_table_rtx, got_register_rtx); 13048 seq = get_insns (); 13049 end_sequence (); 13050 13051 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); 13052 insert_insn_on_edge (seq, entry_edge); 13053 commit_one_edge_insertion (entry_edge); 13054 } 13055 13056 /* Implement TARGET_PREFERRED_RELOAD_CLASS: 13057 13058 - We can't load constants into FP registers. 13059 - We can't load FP constants into integer registers when soft-float, 13060 because there is no soft-float pattern with a r/F constraint. 13061 - We can't load FP constants into integer registers for TFmode unless 13062 it is 0.0L, because there is no movtf pattern with a r/F constraint. 13063 - Try and reload integer constants (symbolic or otherwise) back into 13064 registers directly, rather than having them dumped to memory. */ 13065 13066 static reg_class_t 13067 sparc_preferred_reload_class (rtx x, reg_class_t rclass) 13068 { 13069 machine_mode mode = GET_MODE (x); 13070 if (CONSTANT_P (x)) 13071 { 13072 if (FP_REG_CLASS_P (rclass) 13073 || rclass == GENERAL_OR_FP_REGS 13074 || rclass == GENERAL_OR_EXTRA_FP_REGS 13075 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU) 13076 || (mode == TFmode && ! const_zero_operand (x, mode))) 13077 return NO_REGS; 13078 13079 if (GET_MODE_CLASS (mode) == MODE_INT) 13080 return GENERAL_REGS; 13081 13082 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 13083 { 13084 if (! FP_REG_CLASS_P (rclass) 13085 || !(const_zero_operand (x, mode) 13086 || const_all_ones_operand (x, mode))) 13087 return NO_REGS; 13088 } 13089 } 13090 13091 if (TARGET_VIS3 13092 && ! TARGET_ARCH64 13093 && (rclass == EXTRA_FP_REGS 13094 || rclass == GENERAL_OR_EXTRA_FP_REGS)) 13095 { 13096 int regno = true_regnum (x); 13097 13098 if (SPARC_INT_REG_P (regno)) 13099 return (rclass == EXTRA_FP_REGS 13100 ? FP_REGS : GENERAL_OR_FP_REGS); 13101 } 13102 13103 return rclass; 13104 } 13105 13106 /* Return true if we use LRA instead of reload pass. */ 13107 13108 static bool 13109 sparc_lra_p (void) 13110 { 13111 return TARGET_LRA; 13112 } 13113 13114 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction, 13115 OPERANDS are its operands and OPCODE is the mnemonic to be used. */ 13116 13117 const char * 13118 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode) 13119 { 13120 char mulstr[32]; 13121 13122 gcc_assert (! TARGET_ARCH64); 13123 13124 if (sparc_check_64 (operands[1], insn) <= 0) 13125 output_asm_insn ("srl\t%L1, 0, %L1", operands); 13126 if (which_alternative == 1) 13127 output_asm_insn ("sllx\t%H1, 32, %H1", operands); 13128 if (GET_CODE (operands[2]) == CONST_INT) 13129 { 13130 if (which_alternative == 1) 13131 { 13132 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13133 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode); 13134 output_asm_insn (mulstr, operands); 13135 return "srlx\t%L0, 32, %H0"; 13136 } 13137 else 13138 { 13139 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13140 output_asm_insn ("or\t%L1, %3, %3", operands); 13141 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode); 13142 output_asm_insn (mulstr, operands); 13143 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13144 return "mov\t%3, %L0"; 13145 } 13146 } 13147 else if (rtx_equal_p (operands[1], operands[2])) 13148 { 13149 if (which_alternative == 1) 13150 { 13151 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13152 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode); 13153 output_asm_insn (mulstr, operands); 13154 return "srlx\t%L0, 32, %H0"; 13155 } 13156 else 13157 { 13158 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13159 output_asm_insn ("or\t%L1, %3, %3", operands); 13160 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode); 13161 output_asm_insn (mulstr, operands); 13162 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13163 return "mov\t%3, %L0"; 13164 } 13165 } 13166 if (sparc_check_64 (operands[2], insn) <= 0) 13167 output_asm_insn ("srl\t%L2, 0, %L2", operands); 13168 if (which_alternative == 1) 13169 { 13170 output_asm_insn ("or\t%L1, %H1, %H1", operands); 13171 output_asm_insn ("sllx\t%H2, 32, %L1", operands); 13172 output_asm_insn ("or\t%L2, %L1, %L1", operands); 13173 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode); 13174 output_asm_insn (mulstr, operands); 13175 return "srlx\t%L0, 32, %H0"; 13176 } 13177 else 13178 { 13179 output_asm_insn ("sllx\t%H1, 32, %3", operands); 13180 output_asm_insn ("sllx\t%H2, 32, %4", operands); 13181 output_asm_insn ("or\t%L1, %3, %3", operands); 13182 output_asm_insn ("or\t%L2, %4, %4", operands); 13183 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode); 13184 output_asm_insn (mulstr, operands); 13185 output_asm_insn ("srlx\t%3, 32, %H0", operands); 13186 return "mov\t%3, %L0"; 13187 } 13188 } 13189 13190 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13191 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE 13192 and INNER_MODE are the modes describing TARGET. */ 13193 13194 static void 13195 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode, 13196 machine_mode inner_mode) 13197 { 13198 rtx t1, final_insn, sel; 13199 int bmask; 13200 13201 t1 = gen_reg_rtx (mode); 13202 13203 elt = convert_modes (SImode, inner_mode, elt, true); 13204 emit_move_insn (gen_lowpart(SImode, t1), elt); 13205 13206 switch (mode) 13207 { 13208 case E_V2SImode: 13209 final_insn = gen_bshufflev2si_vis (target, t1, t1); 13210 bmask = 0x45674567; 13211 break; 13212 case E_V4HImode: 13213 final_insn = gen_bshufflev4hi_vis (target, t1, t1); 13214 bmask = 0x67676767; 13215 break; 13216 case E_V8QImode: 13217 final_insn = gen_bshufflev8qi_vis (target, t1, t1); 13218 bmask = 0x77777777; 13219 break; 13220 default: 13221 gcc_unreachable (); 13222 } 13223 13224 sel = force_reg (SImode, GEN_INT (bmask)); 13225 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx)); 13226 emit_insn (final_insn); 13227 } 13228 13229 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13230 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */ 13231 13232 static void 13233 vector_init_fpmerge (rtx target, rtx elt) 13234 { 13235 rtx t1, t2, t2_low, t3, t3_low; 13236 13237 t1 = gen_reg_rtx (V4QImode); 13238 elt = convert_modes (SImode, QImode, elt, true); 13239 emit_move_insn (gen_lowpart (SImode, t1), elt); 13240 13241 t2 = gen_reg_rtx (V8QImode); 13242 t2_low = gen_lowpart (V4QImode, t2); 13243 emit_insn (gen_fpmerge_vis (t2, t1, t1)); 13244 13245 t3 = gen_reg_rtx (V8QImode); 13246 t3_low = gen_lowpart (V4QImode, t3); 13247 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low)); 13248 13249 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low)); 13250 } 13251 13252 /* Subroutine of sparc_expand_vector_init. Emit code to initialize 13253 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */ 13254 13255 static void 13256 vector_init_faligndata (rtx target, rtx elt) 13257 { 13258 rtx t1 = gen_reg_rtx (V4HImode); 13259 int i; 13260 13261 elt = convert_modes (SImode, HImode, elt, true); 13262 emit_move_insn (gen_lowpart (SImode, t1), elt); 13263 13264 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), 13265 force_reg (SImode, GEN_INT (6)), 13266 const0_rtx)); 13267 13268 for (i = 0; i < 4; i++) 13269 emit_insn (gen_faligndatav4hi_vis (target, t1, target)); 13270 } 13271 13272 /* Emit code to initialize TARGET to values for individual fields VALS. */ 13273 13274 void 13275 sparc_expand_vector_init (rtx target, rtx vals) 13276 { 13277 const machine_mode mode = GET_MODE (target); 13278 const machine_mode inner_mode = GET_MODE_INNER (mode); 13279 const int n_elts = GET_MODE_NUNITS (mode); 13280 int i, n_var = 0; 13281 bool all_same = true; 13282 rtx mem; 13283 13284 for (i = 0; i < n_elts; i++) 13285 { 13286 rtx x = XVECEXP (vals, 0, i); 13287 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) 13288 n_var++; 13289 13290 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 13291 all_same = false; 13292 } 13293 13294 if (n_var == 0) 13295 { 13296 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 13297 return; 13298 } 13299 13300 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode)) 13301 { 13302 if (GET_MODE_SIZE (inner_mode) == 4) 13303 { 13304 emit_move_insn (gen_lowpart (SImode, target), 13305 gen_lowpart (SImode, XVECEXP (vals, 0, 0))); 13306 return; 13307 } 13308 else if (GET_MODE_SIZE (inner_mode) == 8) 13309 { 13310 emit_move_insn (gen_lowpart (DImode, target), 13311 gen_lowpart (DImode, XVECEXP (vals, 0, 0))); 13312 return; 13313 } 13314 } 13315 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode) 13316 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode)) 13317 { 13318 emit_move_insn (gen_highpart (word_mode, target), 13319 gen_lowpart (word_mode, XVECEXP (vals, 0, 0))); 13320 emit_move_insn (gen_lowpart (word_mode, target), 13321 gen_lowpart (word_mode, XVECEXP (vals, 0, 1))); 13322 return; 13323 } 13324 13325 if (all_same && GET_MODE_SIZE (mode) == 8) 13326 { 13327 if (TARGET_VIS2) 13328 { 13329 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode); 13330 return; 13331 } 13332 if (mode == V8QImode) 13333 { 13334 vector_init_fpmerge (target, XVECEXP (vals, 0, 0)); 13335 return; 13336 } 13337 if (mode == V4HImode) 13338 { 13339 vector_init_faligndata (target, XVECEXP (vals, 0, 0)); 13340 return; 13341 } 13342 } 13343 13344 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); 13345 for (i = 0; i < n_elts; i++) 13346 emit_move_insn (adjust_address_nv (mem, inner_mode, 13347 i * GET_MODE_SIZE (inner_mode)), 13348 XVECEXP (vals, 0, i)); 13349 emit_move_insn (target, mem); 13350 } 13351 13352 /* Implement TARGET_SECONDARY_RELOAD. */ 13353 13354 static reg_class_t 13355 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, 13356 machine_mode mode, secondary_reload_info *sri) 13357 { 13358 enum reg_class rclass = (enum reg_class) rclass_i; 13359 13360 sri->icode = CODE_FOR_nothing; 13361 sri->extra_cost = 0; 13362 13363 /* We need a temporary when loading/storing a HImode/QImode value 13364 between memory and the FPU registers. This can happen when combine puts 13365 a paradoxical subreg in a float/fix conversion insn. */ 13366 if (FP_REG_CLASS_P (rclass) 13367 && (mode == HImode || mode == QImode) 13368 && (GET_CODE (x) == MEM 13369 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG) 13370 && true_regnum (x) == -1))) 13371 return GENERAL_REGS; 13372 13373 /* On 32-bit we need a temporary when loading/storing a DFmode value 13374 between unaligned memory and the upper FPU registers. */ 13375 if (TARGET_ARCH32 13376 && rclass == EXTRA_FP_REGS 13377 && mode == DFmode 13378 && GET_CODE (x) == MEM 13379 && ! mem_min_alignment (x, 8)) 13380 return FP_REGS; 13381 13382 if (((TARGET_CM_MEDANY 13383 && symbolic_operand (x, mode)) 13384 || (TARGET_CM_EMBMEDANY 13385 && text_segment_operand (x, mode))) 13386 && ! flag_pic) 13387 { 13388 if (in_p) 13389 sri->icode = direct_optab_handler (reload_in_optab, mode); 13390 else 13391 sri->icode = direct_optab_handler (reload_out_optab, mode); 13392 return NO_REGS; 13393 } 13394 13395 if (TARGET_VIS3 && TARGET_ARCH32) 13396 { 13397 int regno = true_regnum (x); 13398 13399 /* When using VIS3 fp<-->int register moves, on 32-bit we have 13400 to move 8-byte values in 4-byte pieces. This only works via 13401 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to 13402 move between EXTRA_FP_REGS and GENERAL_REGS, we will need 13403 an FP_REGS intermediate move. */ 13404 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno)) 13405 || ((general_or_i64_p (rclass) 13406 || rclass == GENERAL_OR_FP_REGS) 13407 && SPARC_FP_REG_P (regno))) 13408 { 13409 sri->extra_cost = 2; 13410 return FP_REGS; 13411 } 13412 } 13413 13414 return NO_REGS; 13415 } 13416 13417 /* Implement TARGET_SECONDARY_MEMORY_NEEDED. 13418 13419 On SPARC when not VIS3 it is not possible to directly move data 13420 between GENERAL_REGS and FP_REGS. */ 13421 13422 static bool 13423 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1, 13424 reg_class_t class2) 13425 { 13426 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2)) 13427 && (! TARGET_VIS3 13428 || GET_MODE_SIZE (mode) > 8 13429 || GET_MODE_SIZE (mode) < 4)); 13430 } 13431 13432 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. 13433 13434 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9 13435 because the movsi and movsf patterns don't handle r/f moves. 13436 For v8 we copy the default definition. */ 13437 13438 static machine_mode 13439 sparc_secondary_memory_needed_mode (machine_mode mode) 13440 { 13441 if (TARGET_ARCH64) 13442 { 13443 if (GET_MODE_BITSIZE (mode) < 32) 13444 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require (); 13445 return mode; 13446 } 13447 else 13448 { 13449 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD) 13450 return mode_for_size (BITS_PER_WORD, 13451 GET_MODE_CLASS (mode), 0).require (); 13452 return mode; 13453 } 13454 } 13455 13456 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into 13457 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */ 13458 13459 bool 13460 sparc_expand_conditional_move (machine_mode mode, rtx *operands) 13461 { 13462 enum rtx_code rc = GET_CODE (operands[1]); 13463 machine_mode cmp_mode; 13464 rtx cc_reg, dst, cmp; 13465 13466 cmp = operands[1]; 13467 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) 13468 return false; 13469 13470 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) 13471 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); 13472 13473 cmp_mode = GET_MODE (XEXP (cmp, 0)); 13474 rc = GET_CODE (cmp); 13475 13476 dst = operands[0]; 13477 if (! rtx_equal_p (operands[2], dst) 13478 && ! rtx_equal_p (operands[3], dst)) 13479 { 13480 if (reg_overlap_mentioned_p (dst, cmp)) 13481 dst = gen_reg_rtx (mode); 13482 13483 emit_move_insn (dst, operands[3]); 13484 } 13485 else if (operands[2] == dst) 13486 { 13487 operands[2] = operands[3]; 13488 13489 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT) 13490 rc = reverse_condition_maybe_unordered (rc); 13491 else 13492 rc = reverse_condition (rc); 13493 } 13494 13495 if (XEXP (cmp, 1) == const0_rtx 13496 && GET_CODE (XEXP (cmp, 0)) == REG 13497 && cmp_mode == DImode 13498 && v9_regcmp_p (rc)) 13499 cc_reg = XEXP (cmp, 0); 13500 else 13501 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1)); 13502 13503 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx); 13504 13505 emit_insn (gen_rtx_SET (dst, 13506 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst))); 13507 13508 if (dst != operands[0]) 13509 emit_move_insn (operands[0], dst); 13510 13511 return true; 13512 } 13513 13514 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2] 13515 into OPERANDS[0] in MODE, depending on the outcome of the comparison of 13516 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition. 13517 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine 13518 code to be used for the condition mask. */ 13519 13520 void 13521 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode) 13522 { 13523 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; 13524 enum rtx_code code = GET_CODE (operands[3]); 13525 13526 mask = gen_reg_rtx (Pmode); 13527 cop0 = operands[4]; 13528 cop1 = operands[5]; 13529 if (code == LT || code == GE) 13530 { 13531 rtx t; 13532 13533 code = swap_condition (code); 13534 t = cop0; cop0 = cop1; cop1 = t; 13535 } 13536 13537 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); 13538 13539 fcmp = gen_rtx_UNSPEC (Pmode, 13540 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), 13541 fcode); 13542 13543 cmask = gen_rtx_UNSPEC (DImode, 13544 gen_rtvec (2, mask, gsr), 13545 ccode); 13546 13547 bshuf = gen_rtx_UNSPEC (mode, 13548 gen_rtvec (3, operands[1], operands[2], gsr), 13549 UNSPEC_BSHUFFLE); 13550 13551 emit_insn (gen_rtx_SET (mask, fcmp)); 13552 emit_insn (gen_rtx_SET (gsr, cmask)); 13553 13554 emit_insn (gen_rtx_SET (operands[0], bshuf)); 13555 } 13556 13557 /* On sparc, any mode which naturally allocates into the float 13558 registers should return 4 here. */ 13559 13560 unsigned int 13561 sparc_regmode_natural_size (machine_mode mode) 13562 { 13563 int size = UNITS_PER_WORD; 13564 13565 if (TARGET_ARCH64) 13566 { 13567 enum mode_class mclass = GET_MODE_CLASS (mode); 13568 13569 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT) 13570 size = 4; 13571 } 13572 13573 return size; 13574 } 13575 13576 /* Implement TARGET_HARD_REGNO_NREGS. 13577 13578 On SPARC, ordinary registers hold 32 bits worth; this means both 13579 integer and floating point registers. On v9, integer regs hold 64 13580 bits worth; floating point regs hold 32 bits worth (this includes the 13581 new fp regs as even the odd ones are included in the hard register 13582 count). */ 13583 13584 static unsigned int 13585 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode) 13586 { 13587 if (regno == SPARC_GSR_REG) 13588 return 1; 13589 if (TARGET_ARCH64) 13590 { 13591 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM) 13592 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13593 return CEIL (GET_MODE_SIZE (mode), 4); 13594 } 13595 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); 13596 } 13597 13598 /* Implement TARGET_HARD_REGNO_MODE_OK. 13599 13600 ??? Because of the funny way we pass parameters we should allow certain 13601 ??? types of float/complex values to be in integer registers during 13602 ??? RTL generation. This only matters on arch32. */ 13603 13604 static bool 13605 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode) 13606 { 13607 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0; 13608 } 13609 13610 /* Implement TARGET_MODES_TIEABLE_P. 13611 13612 For V9 we have to deal with the fact that only the lower 32 floating 13613 point registers are 32-bit addressable. */ 13614 13615 static bool 13616 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2) 13617 { 13618 enum mode_class mclass1, mclass2; 13619 unsigned short size1, size2; 13620 13621 if (mode1 == mode2) 13622 return true; 13623 13624 mclass1 = GET_MODE_CLASS (mode1); 13625 mclass2 = GET_MODE_CLASS (mode2); 13626 if (mclass1 != mclass2) 13627 return false; 13628 13629 if (! TARGET_V9) 13630 return true; 13631 13632 /* Classes are the same and we are V9 so we have to deal with upper 13633 vs. lower floating point registers. If one of the modes is a 13634 4-byte mode, and the other is not, we have to mark them as not 13635 tieable because only the lower 32 floating point register are 13636 addressable 32-bits at a time. 13637 13638 We can't just test explicitly for SFmode, otherwise we won't 13639 cover the vector mode cases properly. */ 13640 13641 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT) 13642 return true; 13643 13644 size1 = GET_MODE_SIZE (mode1); 13645 size2 = GET_MODE_SIZE (mode2); 13646 if ((size1 > 4 && size2 == 4) 13647 || (size2 > 4 && size1 == 4)) 13648 return false; 13649 13650 return true; 13651 } 13652 13653 /* Implement TARGET_CSTORE_MODE. */ 13654 13655 static scalar_int_mode 13656 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED) 13657 { 13658 return (TARGET_ARCH64 ? DImode : SImode); 13659 } 13660 13661 /* Return the compound expression made of T1 and T2. */ 13662 13663 static inline tree 13664 compound_expr (tree t1, tree t2) 13665 { 13666 return build2 (COMPOUND_EXPR, void_type_node, t1, t2); 13667 } 13668 13669 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ 13670 13671 static void 13672 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) 13673 { 13674 if (!TARGET_FPU) 13675 return; 13676 13677 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5; 13678 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23; 13679 13680 /* We generate the equivalent of feholdexcept (&fenv_var): 13681 13682 unsigned int fenv_var; 13683 __builtin_store_fsr (&fenv_var); 13684 13685 unsigned int tmp1_var; 13686 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask); 13687 13688 __builtin_load_fsr (&tmp1_var); */ 13689 13690 tree fenv_var = create_tmp_var_raw (unsigned_type_node); 13691 TREE_ADDRESSABLE (fenv_var) = 1; 13692 tree fenv_addr = build_fold_addr_expr (fenv_var); 13693 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR]; 13694 tree hold_stfsr 13695 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, 13696 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE); 13697 13698 tree tmp1_var = create_tmp_var_raw (unsigned_type_node); 13699 TREE_ADDRESSABLE (tmp1_var) = 1; 13700 tree masked_fenv_var 13701 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, 13702 build_int_cst (unsigned_type_node, 13703 ~(accrued_exception_mask | trap_enable_mask))); 13704 tree hold_mask 13705 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var, 13706 NULL_TREE, NULL_TREE); 13707 13708 tree tmp1_addr = build_fold_addr_expr (tmp1_var); 13709 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR]; 13710 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr); 13711 13712 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr); 13713 13714 /* We reload the value of tmp1_var to clear the exceptions: 13715 13716 __builtin_load_fsr (&tmp1_var); */ 13717 13718 *clear = build_call_expr (ldfsr, 1, tmp1_addr); 13719 13720 /* We generate the equivalent of feupdateenv (&fenv_var): 13721 13722 unsigned int tmp2_var; 13723 __builtin_store_fsr (&tmp2_var); 13724 13725 __builtin_load_fsr (&fenv_var); 13726 13727 if (SPARC_LOW_FE_EXCEPT_VALUES) 13728 tmp2_var >>= 5; 13729 __atomic_feraiseexcept ((int) tmp2_var); */ 13730 13731 tree tmp2_var = create_tmp_var_raw (unsigned_type_node); 13732 TREE_ADDRESSABLE (tmp2_var) = 1; 13733 tree tmp2_addr = build_fold_addr_expr (tmp2_var); 13734 tree update_stfsr 13735 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var, 13736 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE); 13737 13738 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr); 13739 13740 tree atomic_feraiseexcept 13741 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); 13742 tree update_call 13743 = build_call_expr (atomic_feraiseexcept, 1, 13744 fold_convert (integer_type_node, tmp2_var)); 13745 13746 if (SPARC_LOW_FE_EXCEPT_VALUES) 13747 { 13748 tree shifted_tmp2_var 13749 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var, 13750 build_int_cst (unsigned_type_node, 5)); 13751 tree update_shift 13752 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var); 13753 update_call = compound_expr (update_shift, update_call); 13754 } 13755 13756 *update 13757 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call); 13758 } 13759 13760 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port. 13761 13762 SImode loads to floating-point registers are not zero-extended. 13763 The definition for LOAD_EXTEND_OP specifies that integer loads 13764 narrower than BITS_PER_WORD will be zero-extended. As a result, 13765 we inhibit changes from SImode unless they are to a mode that is 13766 identical in size. 13767 13768 Likewise for SFmode, since word-mode paradoxical subregs are 13769 problematic on big-endian architectures. */ 13770 13771 static bool 13772 sparc_can_change_mode_class (machine_mode from, machine_mode to, 13773 reg_class_t rclass) 13774 { 13775 if (TARGET_ARCH64 13776 && GET_MODE_SIZE (from) == 4 13777 && GET_MODE_SIZE (to) != 4) 13778 return !reg_classes_intersect_p (rclass, FP_REGS); 13779 return true; 13780 } 13781 13782 /* Implement TARGET_CONSTANT_ALIGNMENT. */ 13783 13784 static HOST_WIDE_INT 13785 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align) 13786 { 13787 if (TREE_CODE (exp) == STRING_CST) 13788 return MAX (align, FASTEST_ALIGNMENT); 13789 return align; 13790 } 13791 13792 #include "gt-sparc.h" 13793