xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/s390/s390.c (revision 23f5f46327e37e7811da3520f4bb933f9489322f)
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2    Copyright (C) 1999-2020 Free Software Foundation, Inc.
3    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4                   Ulrich Weigand (uweigand@de.ibm.com) and
5                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #define IN_TARGET_CODE 1
24 
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
88 
89 /* This file should be included last.  */
90 #include "target-def.h"
91 
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
93 
94 /* Remember the last target of s390_set_current_function.  */
95 static GTY(()) tree s390_previous_fndecl;
96 
97 /* Define the specific costs for a given cpu.  */
98 
99 struct processor_costs
100 {
101   /* multiplication */
102   const int m;        /* cost of an M instruction.  */
103   const int mghi;     /* cost of an MGHI instruction.  */
104   const int mh;       /* cost of an MH instruction.  */
105   const int mhi;      /* cost of an MHI instruction.  */
106   const int ml;       /* cost of an ML instruction.  */
107   const int mr;       /* cost of an MR instruction.  */
108   const int ms;       /* cost of an MS instruction.  */
109   const int msg;      /* cost of an MSG instruction.  */
110   const int msgf;     /* cost of an MSGF instruction.  */
111   const int msgfr;    /* cost of an MSGFR instruction.  */
112   const int msgr;     /* cost of an MSGR instruction.  */
113   const int msr;      /* cost of an MSR instruction.  */
114   const int mult_df;  /* cost of multiplication in DFmode.  */
115   const int mxbr;
116   /* square root */
117   const int sqxbr;    /* cost of square root in TFmode.  */
118   const int sqdbr;    /* cost of square root in DFmode.  */
119   const int sqebr;    /* cost of square root in SFmode.  */
120   /* multiply and add */
121   const int madbr;    /* cost of multiply and add in DFmode.  */
122   const int maebr;    /* cost of multiply and add in SFmode.  */
123   /* division */
124   const int dxbr;
125   const int ddbr;
126   const int debr;
127   const int dlgr;
128   const int dlr;
129   const int dr;
130   const int dsgfr;
131   const int dsgr;
132 };
133 
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135 
136 static const
137 struct processor_costs z900_cost =
138 {
139   COSTS_N_INSNS (5),     /* M     */
140   COSTS_N_INSNS (10),    /* MGHI  */
141   COSTS_N_INSNS (5),     /* MH    */
142   COSTS_N_INSNS (4),     /* MHI   */
143   COSTS_N_INSNS (5),     /* ML    */
144   COSTS_N_INSNS (5),     /* MR    */
145   COSTS_N_INSNS (4),     /* MS    */
146   COSTS_N_INSNS (15),    /* MSG   */
147   COSTS_N_INSNS (7),     /* MSGF  */
148   COSTS_N_INSNS (7),     /* MSGFR */
149   COSTS_N_INSNS (10),    /* MSGR  */
150   COSTS_N_INSNS (4),     /* MSR   */
151   COSTS_N_INSNS (7),     /* multiplication in DFmode */
152   COSTS_N_INSNS (13),    /* MXBR */
153   COSTS_N_INSNS (136),   /* SQXBR */
154   COSTS_N_INSNS (44),    /* SQDBR */
155   COSTS_N_INSNS (35),    /* SQEBR */
156   COSTS_N_INSNS (18),    /* MADBR */
157   COSTS_N_INSNS (13),    /* MAEBR */
158   COSTS_N_INSNS (134),   /* DXBR */
159   COSTS_N_INSNS (30),    /* DDBR */
160   COSTS_N_INSNS (27),    /* DEBR */
161   COSTS_N_INSNS (220),   /* DLGR */
162   COSTS_N_INSNS (34),    /* DLR */
163   COSTS_N_INSNS (34),    /* DR */
164   COSTS_N_INSNS (32),    /* DSGFR */
165   COSTS_N_INSNS (32),    /* DSGR */
166 };
167 
168 static const
169 struct processor_costs z990_cost =
170 {
171   COSTS_N_INSNS (4),     /* M     */
172   COSTS_N_INSNS (2),     /* MGHI  */
173   COSTS_N_INSNS (2),     /* MH    */
174   COSTS_N_INSNS (2),     /* MHI   */
175   COSTS_N_INSNS (4),     /* ML    */
176   COSTS_N_INSNS (4),     /* MR    */
177   COSTS_N_INSNS (5),     /* MS    */
178   COSTS_N_INSNS (6),     /* MSG   */
179   COSTS_N_INSNS (4),     /* MSGF  */
180   COSTS_N_INSNS (4),     /* MSGFR */
181   COSTS_N_INSNS (4),     /* MSGR  */
182   COSTS_N_INSNS (4),     /* MSR   */
183   COSTS_N_INSNS (1),     /* multiplication in DFmode */
184   COSTS_N_INSNS (28),    /* MXBR */
185   COSTS_N_INSNS (130),   /* SQXBR */
186   COSTS_N_INSNS (66),    /* SQDBR */
187   COSTS_N_INSNS (38),    /* SQEBR */
188   COSTS_N_INSNS (1),     /* MADBR */
189   COSTS_N_INSNS (1),     /* MAEBR */
190   COSTS_N_INSNS (60),    /* DXBR */
191   COSTS_N_INSNS (40),    /* DDBR */
192   COSTS_N_INSNS (26),    /* DEBR */
193   COSTS_N_INSNS (176),   /* DLGR */
194   COSTS_N_INSNS (31),    /* DLR */
195   COSTS_N_INSNS (31),    /* DR */
196   COSTS_N_INSNS (31),    /* DSGFR */
197   COSTS_N_INSNS (31),    /* DSGR */
198 };
199 
200 static const
201 struct processor_costs z9_109_cost =
202 {
203   COSTS_N_INSNS (4),     /* M     */
204   COSTS_N_INSNS (2),     /* MGHI  */
205   COSTS_N_INSNS (2),     /* MH    */
206   COSTS_N_INSNS (2),     /* MHI   */
207   COSTS_N_INSNS (4),     /* ML    */
208   COSTS_N_INSNS (4),     /* MR    */
209   COSTS_N_INSNS (5),     /* MS    */
210   COSTS_N_INSNS (6),     /* MSG   */
211   COSTS_N_INSNS (4),     /* MSGF  */
212   COSTS_N_INSNS (4),     /* MSGFR */
213   COSTS_N_INSNS (4),     /* MSGR  */
214   COSTS_N_INSNS (4),     /* MSR   */
215   COSTS_N_INSNS (1),     /* multiplication in DFmode */
216   COSTS_N_INSNS (28),    /* MXBR */
217   COSTS_N_INSNS (130),   /* SQXBR */
218   COSTS_N_INSNS (66),    /* SQDBR */
219   COSTS_N_INSNS (38),    /* SQEBR */
220   COSTS_N_INSNS (1),     /* MADBR */
221   COSTS_N_INSNS (1),     /* MAEBR */
222   COSTS_N_INSNS (60),    /* DXBR */
223   COSTS_N_INSNS (40),    /* DDBR */
224   COSTS_N_INSNS (26),    /* DEBR */
225   COSTS_N_INSNS (30),    /* DLGR */
226   COSTS_N_INSNS (23),    /* DLR */
227   COSTS_N_INSNS (23),    /* DR */
228   COSTS_N_INSNS (24),    /* DSGFR */
229   COSTS_N_INSNS (24),    /* DSGR */
230 };
231 
232 static const
233 struct processor_costs z10_cost =
234 {
235   COSTS_N_INSNS (10),    /* M     */
236   COSTS_N_INSNS (10),    /* MGHI  */
237   COSTS_N_INSNS (10),    /* MH    */
238   COSTS_N_INSNS (10),    /* MHI   */
239   COSTS_N_INSNS (10),    /* ML    */
240   COSTS_N_INSNS (10),    /* MR    */
241   COSTS_N_INSNS (10),    /* MS    */
242   COSTS_N_INSNS (10),    /* MSG   */
243   COSTS_N_INSNS (10),    /* MSGF  */
244   COSTS_N_INSNS (10),    /* MSGFR */
245   COSTS_N_INSNS (10),    /* MSGR  */
246   COSTS_N_INSNS (10),    /* MSR   */
247   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
248   COSTS_N_INSNS (50),    /* MXBR */
249   COSTS_N_INSNS (120),   /* SQXBR */
250   COSTS_N_INSNS (52),    /* SQDBR */
251   COSTS_N_INSNS (38),    /* SQEBR */
252   COSTS_N_INSNS (1),     /* MADBR */
253   COSTS_N_INSNS (1),     /* MAEBR */
254   COSTS_N_INSNS (111),   /* DXBR */
255   COSTS_N_INSNS (39),    /* DDBR */
256   COSTS_N_INSNS (32),    /* DEBR */
257   COSTS_N_INSNS (160),   /* DLGR */
258   COSTS_N_INSNS (71),    /* DLR */
259   COSTS_N_INSNS (71),    /* DR */
260   COSTS_N_INSNS (71),    /* DSGFR */
261   COSTS_N_INSNS (71),    /* DSGR */
262 };
263 
264 static const
265 struct processor_costs z196_cost =
266 {
267   COSTS_N_INSNS (7),     /* M     */
268   COSTS_N_INSNS (5),     /* MGHI  */
269   COSTS_N_INSNS (5),     /* MH    */
270   COSTS_N_INSNS (5),     /* MHI   */
271   COSTS_N_INSNS (7),     /* ML    */
272   COSTS_N_INSNS (7),     /* MR    */
273   COSTS_N_INSNS (6),     /* MS    */
274   COSTS_N_INSNS (8),     /* MSG   */
275   COSTS_N_INSNS (6),     /* MSGF  */
276   COSTS_N_INSNS (6),     /* MSGFR */
277   COSTS_N_INSNS (8),     /* MSGR  */
278   COSTS_N_INSNS (6),     /* MSR   */
279   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
280   COSTS_N_INSNS (40),    /* MXBR B+40 */
281   COSTS_N_INSNS (100),   /* SQXBR B+100 */
282   COSTS_N_INSNS (42),    /* SQDBR B+42 */
283   COSTS_N_INSNS (28),    /* SQEBR B+28 */
284   COSTS_N_INSNS (1),     /* MADBR B */
285   COSTS_N_INSNS (1),     /* MAEBR B */
286   COSTS_N_INSNS (101),   /* DXBR B+101 */
287   COSTS_N_INSNS (29),    /* DDBR */
288   COSTS_N_INSNS (22),    /* DEBR */
289   COSTS_N_INSNS (160),   /* DLGR cracked */
290   COSTS_N_INSNS (160),   /* DLR cracked */
291   COSTS_N_INSNS (160),   /* DR expanded */
292   COSTS_N_INSNS (160),   /* DSGFR cracked */
293   COSTS_N_INSNS (160),   /* DSGR cracked */
294 };
295 
296 static const
297 struct processor_costs zEC12_cost =
298 {
299   COSTS_N_INSNS (7),     /* M     */
300   COSTS_N_INSNS (5),     /* MGHI  */
301   COSTS_N_INSNS (5),     /* MH    */
302   COSTS_N_INSNS (5),     /* MHI   */
303   COSTS_N_INSNS (7),     /* ML    */
304   COSTS_N_INSNS (7),     /* MR    */
305   COSTS_N_INSNS (6),     /* MS    */
306   COSTS_N_INSNS (8),     /* MSG   */
307   COSTS_N_INSNS (6),     /* MSGF  */
308   COSTS_N_INSNS (6),     /* MSGFR */
309   COSTS_N_INSNS (8),     /* MSGR  */
310   COSTS_N_INSNS (6),     /* MSR   */
311   COSTS_N_INSNS (1) ,    /* multiplication in DFmode */
312   COSTS_N_INSNS (40),    /* MXBR B+40 */
313   COSTS_N_INSNS (100),   /* SQXBR B+100 */
314   COSTS_N_INSNS (42),    /* SQDBR B+42 */
315   COSTS_N_INSNS (28),    /* SQEBR B+28 */
316   COSTS_N_INSNS (1),     /* MADBR B */
317   COSTS_N_INSNS (1),     /* MAEBR B */
318   COSTS_N_INSNS (131),   /* DXBR B+131 */
319   COSTS_N_INSNS (29),    /* DDBR */
320   COSTS_N_INSNS (22),    /* DEBR */
321   COSTS_N_INSNS (160),   /* DLGR cracked */
322   COSTS_N_INSNS (160),   /* DLR cracked */
323   COSTS_N_INSNS (160),   /* DR expanded */
324   COSTS_N_INSNS (160),   /* DSGFR cracked */
325   COSTS_N_INSNS (160),   /* DSGR cracked */
326 };
327 
328 const struct s390_processor processor_table[] =
329 {
330   { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost,   5  },
331   { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost,   6  },
332   { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7  },
333   { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost, 7  },
334   { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost,    8  },
335   { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost,   9  },
336   { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost,  10 },
337   { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost,  11 },
338   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
339   { "z15",    "arch13", PROCESSOR_8561_Z15,    &zEC12_cost,  13 },
340   { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
341 };
342 
343 extern int reload_completed;
344 
345 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook.  */
346 static rtx_insn *last_scheduled_insn;
347 #define NUM_SIDES 2
348 
349 #define MAX_SCHED_UNITS 4
350 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
351 
352 /* Estimate of number of cycles a long-running insn occupies an
353    execution unit.  */
354 static int fxd_longrunning[NUM_SIDES];
355 static int fpd_longrunning[NUM_SIDES];
356 
357 /* The maximum score added for an instruction whose unit hasn't been
358    in use for MAX_SCHED_MIX_DISTANCE steps.  Increase this value to
359    give instruction mix scheduling more priority over instruction
360    grouping.  */
361 #define MAX_SCHED_MIX_SCORE      2
362 
363 /* The maximum distance up to which individual scores will be
364    calculated.  Everything beyond this gives MAX_SCHED_MIX_SCORE.
365    Increase this with the OOO windows size of the machine.  */
366 #define MAX_SCHED_MIX_DISTANCE 70
367 
368 /* Structure used to hold the components of a S/390 memory
369    address.  A legitimate address on S/390 is of the general
370    form
371           base + index + displacement
372    where any of the components is optional.
373 
374    base and index are registers of the class ADDR_REGS,
375    displacement is an unsigned 12-bit immediate constant.  */
376 
377 /* The max number of insns of backend generated memset/memcpy/memcmp
378    loops.  This value is used in the unroll adjust hook to detect such
379    loops.  Current max is 9 coming from the memcmp loop.  */
380 #define BLOCK_MEM_OPS_LOOP_INSNS 9
381 
382 struct s390_address
383 {
384   rtx base;
385   rtx indx;
386   rtx disp;
387   bool pointer;
388   bool literal_pool;
389 };
390 
391 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
392 
393 #define cfun_frame_layout (cfun->machine->frame_layout)
394 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
395 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT				\
396 				 ? cfun_frame_layout.fpr_bitmap & 0x0f	\
397 				 : cfun_frame_layout.fpr_bitmap & 0x03))
398 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
399   cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
400 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |=    \
401   (1 << (REGNO - FPR0_REGNUM)))
402 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap &    \
403   (1 << (REGNO - FPR0_REGNUM))))
404 #define cfun_gpr_save_slot(REGNO) \
405   cfun->machine->frame_layout.gpr_save_slots[REGNO]
406 
407 /* Number of GPRs and FPRs used for argument passing.  */
408 #define GP_ARG_NUM_REG 5
409 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
410 #define VEC_ARG_NUM_REG 8
411 
412 /* A couple of shortcuts.  */
413 #define CONST_OK_FOR_J(x) \
414 	CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
415 #define CONST_OK_FOR_K(x) \
416 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
417 #define CONST_OK_FOR_Os(x) \
418 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
419 #define CONST_OK_FOR_Op(x) \
420 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
421 #define CONST_OK_FOR_On(x) \
422 	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
423 
424 #define REGNO_PAIR_OK(REGNO, MODE)                               \
425   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
426 
427 /* That's the read ahead of the dynamic branch prediction unit in
428    bytes on a z10 (or higher) CPU.  */
429 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
430 
431 /* Masks per jump target register indicating which thunk need to be
432    generated.  */
433 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
434 static GTY(()) int indirect_branch_z10thunk_mask = 0;
435 
436 #define INDIRECT_BRANCH_NUM_OPTIONS 4
437 
438 enum s390_indirect_branch_option
439   {
440     s390_opt_indirect_branch_jump = 0,
441     s390_opt_indirect_branch_call,
442     s390_opt_function_return_reg,
443     s390_opt_function_return_mem
444   };
445 
446 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
447 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
448   { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
449 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
450   { ".s390_indirect_jump", ".s390_indirect_call",
451     ".s390_return_reg", ".s390_return_mem" };
452 
453 bool
s390_return_addr_from_memory()454 s390_return_addr_from_memory ()
455 {
456   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
457 }
458 
459 /* Indicate which ABI has been used for passing vector args.
460    0 - no vector type arguments have been passed where the ABI is relevant
461    1 - the old ABI has been used
462    2 - a vector type argument has been passed either in a vector register
463        or on the stack by value  */
464 static int s390_vector_abi = 0;
465 
466 /* Set the vector ABI marker if TYPE is subject to the vector ABI
467    switch.  The vector ABI affects only vector data types.  There are
468    two aspects of the vector ABI relevant here:
469 
470    1. vectors >= 16 bytes have an alignment of 8 bytes with the new
471    ABI and natural alignment with the old.
472 
473    2. vector <= 16 bytes are passed in VRs or by value on the stack
474    with the new ABI but by reference on the stack with the old.
475 
476    If ARG_P is true TYPE is used for a function argument or return
477    value.  The ABI marker then is set for all vector data types.  If
478    ARG_P is false only type 1 vectors are being checked.  */
479 
480 static void
s390_check_type_for_vector_abi(const_tree type,bool arg_p,bool in_struct_p)481 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
482 {
483   static hash_set<const_tree> visited_types_hash;
484 
485   if (s390_vector_abi)
486     return;
487 
488   if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
489     return;
490 
491   if (visited_types_hash.contains (type))
492     return;
493 
494   visited_types_hash.add (type);
495 
496   if (VECTOR_TYPE_P (type))
497     {
498       int type_size = int_size_in_bytes (type);
499 
500       /* Outside arguments only the alignment is changing and this
501 	 only happens for vector types >= 16 bytes.  */
502       if (!arg_p && type_size < 16)
503 	return;
504 
505       /* In arguments vector types > 16 are passed as before (GCC
506 	 never enforced the bigger alignment for arguments which was
507 	 required by the old vector ABI).  However, it might still be
508 	 ABI relevant due to the changed alignment if it is a struct
509 	 member.  */
510       if (arg_p && type_size > 16 && !in_struct_p)
511 	return;
512 
513       s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
514     }
515   else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
516     {
517       /* ARRAY_TYPE: Since with neither of the ABIs we have more than
518 	 natural alignment there will never be ABI dependent padding
519 	 in an array type.  That's why we do not set in_struct_p to
520 	 true here.  */
521       s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
522     }
523   else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
524     {
525       tree arg_chain;
526 
527       /* Check the return type.  */
528       s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
529 
530       for (arg_chain = TYPE_ARG_TYPES (type);
531 	   arg_chain;
532 	   arg_chain = TREE_CHAIN (arg_chain))
533 	s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
534     }
535   else if (RECORD_OR_UNION_TYPE_P (type))
536     {
537       tree field;
538 
539       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
540 	{
541 	  if (TREE_CODE (field) != FIELD_DECL)
542 	    continue;
543 
544 	  s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
545 	}
546     }
547 }
548 
549 
550 /* System z builtins.  */
551 
552 #include "s390-builtins.h"
553 
554 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
555   {
556 #undef B_DEF
557 #undef OB_DEF
558 #undef OB_DEF_VAR
559 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
560 #define OB_DEF(...)
561 #define OB_DEF_VAR(...)
562 #include "s390-builtins.def"
563     0
564   };
565 
566 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
567   {
568 #undef B_DEF
569 #undef OB_DEF
570 #undef OB_DEF_VAR
571 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
572 #define OB_DEF(...)
573 #define OB_DEF_VAR(...)
574 #include "s390-builtins.def"
575     0
576   };
577 
578 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
579   {
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
585 #define OB_DEF_VAR(...)
586 #include "s390-builtins.def"
587     0
588   };
589 
590 const unsigned int
591 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
592   {
593 #undef B_DEF
594 #undef OB_DEF
595 #undef OB_DEF_VAR
596 #define B_DEF(...)
597 #define OB_DEF(...)
598 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
599 #include "s390-builtins.def"
600     0
601   };
602 
603 const unsigned int
604 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
605   {
606 #undef B_DEF
607 #undef OB_DEF
608 #undef OB_DEF_VAR
609 #define B_DEF(...)
610 #define OB_DEF(...)
611 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
612 #include "s390-builtins.def"
613     0
614   };
615 
616 tree s390_builtin_types[BT_MAX];
617 tree s390_builtin_fn_types[BT_FN_MAX];
618 tree s390_builtin_decls[S390_BUILTIN_MAX +
619 			S390_OVERLOADED_BUILTIN_MAX +
620 			S390_OVERLOADED_BUILTIN_VAR_MAX];
621 
622 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
623 #undef B_DEF
624 #undef OB_DEF
625 #undef OB_DEF_VAR
626 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
627 #define OB_DEF(...)
628 #define OB_DEF_VAR(...)
629 
630 #include "s390-builtins.def"
631   CODE_FOR_nothing
632 };
633 
634 static void
s390_init_builtins(void)635 s390_init_builtins (void)
636 {
637   /* These definitions are being used in s390-builtins.def.  */
638   tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
639 				       NULL, NULL);
640   tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
641   tree c_uint64_type_node;
642 
643   /* The uint64_type_node from tree.c is not compatible to the C99
644      uint64_t data type.  What we want is c_uint64_type_node from
645      c-common.c.  But since backend code is not supposed to interface
646      with the frontend we recreate it here.  */
647   if (TARGET_64BIT)
648     c_uint64_type_node = long_unsigned_type_node;
649   else
650     c_uint64_type_node = long_long_unsigned_type_node;
651 
652 #undef DEF_TYPE
653 #define DEF_TYPE(INDEX, NODE, CONST_P)			\
654   if (s390_builtin_types[INDEX] == NULL)		\
655     s390_builtin_types[INDEX] = (!CONST_P) ?		\
656       (NODE) : build_type_variant ((NODE), 1, 0);
657 
658 #undef DEF_POINTER_TYPE
659 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE)				\
660   if (s390_builtin_types[INDEX] == NULL)				\
661     s390_builtin_types[INDEX] =						\
662       build_pointer_type (s390_builtin_types[INDEX_BASE]);
663 
664 #undef DEF_DISTINCT_TYPE
665 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE)				\
666   if (s390_builtin_types[INDEX] == NULL)				\
667     s390_builtin_types[INDEX] =						\
668       build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
669 
670 #undef DEF_VECTOR_TYPE
671 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)			\
672   if (s390_builtin_types[INDEX] == NULL)				\
673     s390_builtin_types[INDEX] =						\
674       build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
675 
676 #undef DEF_OPAQUE_VECTOR_TYPE
677 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS)		\
678   if (s390_builtin_types[INDEX] == NULL)				\
679     s390_builtin_types[INDEX] =						\
680       build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
681 
682 #undef DEF_FN_TYPE
683 #define DEF_FN_TYPE(INDEX, args...)				\
684   if (s390_builtin_fn_types[INDEX] == NULL)			\
685     s390_builtin_fn_types[INDEX] =				\
686       build_function_type_list (args, NULL_TREE);
687 #undef DEF_OV_TYPE
688 #define DEF_OV_TYPE(...)
689 #include "s390-builtin-types.def"
690 
691 #undef B_DEF
692 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE)		\
693   if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL)			\
694     s390_builtin_decls[S390_BUILTIN_##NAME] =				\
695       add_builtin_function ("__builtin_" #NAME,				\
696 			    s390_builtin_fn_types[FNTYPE],		\
697 			    S390_BUILTIN_##NAME,			\
698 			    BUILT_IN_MD,				\
699 			    NULL,					\
700 			    ATTRS);
701 #undef OB_DEF
702 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE)	\
703   if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
704       == NULL)								\
705     s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
706       add_builtin_function ("__builtin_" #NAME,				\
707 			    s390_builtin_fn_types[FNTYPE],		\
708 			    S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
709 			    BUILT_IN_MD,				\
710 			    NULL,					\
711 			    0);
712 #undef OB_DEF_VAR
713 #define OB_DEF_VAR(...)
714 #include "s390-builtins.def"
715 
716 }
717 
718 /* Return true if ARG is appropriate as argument number ARGNUM of
719    builtin DECL.  The operand flags from s390-builtins.def have to
720    passed as OP_FLAGS.  */
721 bool
s390_const_operand_ok(tree arg,int argnum,int op_flags,tree decl)722 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
723 {
724   if (O_UIMM_P (op_flags))
725     {
726       int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
727       int bitwidth = bitwidths[op_flags - O_U1];
728 
729       if (!tree_fits_uhwi_p (arg)
730 	  || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
731 	{
732 	  error ("constant argument %d for builtin %qF is out of range "
733 		 "(0..%wu)", argnum, decl,
734 		 (HOST_WIDE_INT_1U << bitwidth) - 1);
735 	  return false;
736 	}
737     }
738 
739   if (O_SIMM_P (op_flags))
740     {
741       int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
742       int bitwidth = bitwidths[op_flags - O_S2];
743 
744       if (!tree_fits_shwi_p (arg)
745 	  || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
746 	  || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
747 	{
748 	  error ("constant argument %d for builtin %qF is out of range "
749 		 "(%wd..%wd)", argnum, decl,
750 		 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
751 		 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
752 	  return false;
753 	}
754     }
755   return true;
756 }
757 
758 /* Expand an expression EXP that calls a built-in function,
759    with result going to TARGET if that's convenient
760    (and in mode MODE if that's convenient).
761    SUBTARGET may be used as the target for computing one of EXP's operands.
762    IGNORE is nonzero if the value is to be ignored.  */
763 
764 static rtx
s390_expand_builtin(tree exp,rtx target,rtx subtarget ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED,int ignore ATTRIBUTE_UNUSED)765 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
766 		     machine_mode mode ATTRIBUTE_UNUSED,
767 		     int ignore ATTRIBUTE_UNUSED)
768 {
769 #define MAX_ARGS 6
770 
771   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
772   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
773   enum insn_code icode;
774   rtx op[MAX_ARGS], pat;
775   int arity;
776   bool nonvoid;
777   tree arg;
778   call_expr_arg_iterator iter;
779   unsigned int all_op_flags = opflags_for_builtin (fcode);
780   machine_mode last_vec_mode = VOIDmode;
781 
782   if (TARGET_DEBUG_ARG)
783     {
784       fprintf (stderr,
785 	       "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
786 	       (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
787 	       bflags_for_builtin (fcode));
788     }
789 
790   if (S390_USE_TARGET_ATTRIBUTE)
791     {
792       unsigned int bflags;
793 
794       bflags = bflags_for_builtin (fcode);
795       if ((bflags & B_HTM) && !TARGET_HTM)
796 	{
797 	  error ("builtin %qF is not supported without %<-mhtm%> "
798 		 "(default with %<-march=zEC12%> and higher).", fndecl);
799 	  return const0_rtx;
800 	}
801       if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
802 	{
803 	  error ("builtin %qF requires %<-mvx%> "
804 		 "(default with %<-march=z13%> and higher).", fndecl);
805 	  return const0_rtx;
806 	}
807 
808       if ((bflags & B_VXE) && !TARGET_VXE)
809 	{
810 	  error ("Builtin %qF requires z14 or higher.", fndecl);
811 	  return const0_rtx;
812 	}
813 
814       if ((bflags & B_VXE2) && !TARGET_VXE2)
815 	{
816 	  error ("Builtin %qF requires z15 or higher.", fndecl);
817 	  return const0_rtx;
818 	}
819     }
820   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821       && fcode < S390_ALL_BUILTIN_MAX)
822     {
823       gcc_unreachable ();
824     }
825   else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
826     {
827       icode = code_for_builtin[fcode];
828       /* Set a flag in the machine specific cfun part in order to support
829 	 saving/restoring of FPRs.  */
830       if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 	cfun->machine->tbegin_p = true;
832     }
833   else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
834     {
835       error ("unresolved overloaded builtin");
836       return const0_rtx;
837     }
838   else
839     internal_error ("bad builtin fcode");
840 
841   if (icode == 0)
842     internal_error ("bad builtin icode");
843 
844   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
845 
846   if (nonvoid)
847     {
848       machine_mode tmode = insn_data[icode].operand[0].mode;
849       if (!target
850 	  || GET_MODE (target) != tmode
851 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 	target = gen_reg_rtx (tmode);
853 
854       /* There are builtins (e.g. vec_promote) with no vector
855 	 arguments but an element selector.  So we have to also look
856 	 at the vector return type when emitting the modulo
857 	 operation.  */
858       if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 	last_vec_mode = insn_data[icode].operand[0].mode;
860     }
861 
862   arity = 0;
863   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
864     {
865       rtx tmp_rtx;
866       const struct insn_operand_data *insn_op;
867       unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
868 
869       all_op_flags = all_op_flags >> O_SHIFT;
870 
871       if (arg == error_mark_node)
872 	return NULL_RTX;
873       if (arity >= MAX_ARGS)
874 	return NULL_RTX;
875 
876       if (O_IMM_P (op_flags)
877 	  && TREE_CODE (arg) != INTEGER_CST)
878 	{
879 	  error ("constant value required for builtin %qF argument %d",
880 		 fndecl, arity + 1);
881 	  return const0_rtx;
882 	}
883 
884       if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
885 	return const0_rtx;
886 
887       insn_op = &insn_data[icode].operand[arity + nonvoid];
888       op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
889 
890       /* expand_expr truncates constants to the target mode only if it
891 	 is "convenient".  However, our checks below rely on this
892 	 being done.  */
893       if (CONST_INT_P (op[arity])
894 	  && SCALAR_INT_MODE_P (insn_op->mode)
895 	  && GET_MODE (op[arity]) != insn_op->mode)
896 	op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
897 						 insn_op->mode));
898 
899       /* Wrap the expanded RTX for pointer types into a MEM expr with
900 	 the proper mode.  This allows us to use e.g. (match_operand
901 	 "memory_operand"..) in the insn patterns instead of (mem
902 	 (match_operand "address_operand)).  This is helpful for
903 	 patterns not just accepting MEMs.  */
904       if (POINTER_TYPE_P (TREE_TYPE (arg))
905 	  && insn_op->predicate != address_operand)
906 	op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
907 
908       /* Expand the module operation required on element selectors.  */
909       if (op_flags == O_ELEM)
910 	{
911 	  gcc_assert (last_vec_mode != VOIDmode);
912 	  op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
913 					     op[arity],
914 					     GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
915 					     NULL_RTX, 1, OPTAB_DIRECT);
916 	}
917 
918       /* Record the vector mode used for an element selector.  This assumes:
919 	 1. There is no builtin with two different vector modes and an element selector
920 	 2. The element selector comes after the vector type it is referring to.
921 	 This currently the true for all the builtins but FIXME we
922 	 should better check for that.  */
923       if (VECTOR_MODE_P (insn_op->mode))
924 	last_vec_mode = insn_op->mode;
925 
926       if (insn_op->predicate (op[arity], insn_op->mode))
927 	{
928 	  arity++;
929 	  continue;
930 	}
931 
932       /* A memory operand is rejected by the memory_operand predicate.
933 	 Try making the address legal by copying it into a register.  */
934       if (MEM_P (op[arity])
935 	  && insn_op->predicate == memory_operand
936 	  && (GET_MODE (XEXP (op[arity], 0)) == Pmode
937 	      || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
938 	{
939 	  op[arity] = replace_equiv_address (op[arity],
940 					     copy_to_mode_reg (Pmode,
941 					       XEXP (op[arity], 0)));
942 	}
943       /* Some of the builtins require different modes/types than the
944 	 pattern in order to implement a specific API.  Instead of
945 	 adding many expanders which do the mode change we do it here.
946 	 E.g. s390_vec_add_u128 required to have vector unsigned char
947 	 arguments is mapped to addti3.  */
948       else if (insn_op->mode != VOIDmode
949 	       && GET_MODE (op[arity]) != VOIDmode
950 	       && GET_MODE (op[arity]) != insn_op->mode
951 	       && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
952 						   GET_MODE (op[arity]), 0))
953 		   != NULL_RTX))
954 	{
955 	  op[arity] = tmp_rtx;
956 	}
957 
958       /* The predicate rejects the operand although the mode is fine.
959 	 Copy the operand to register.  */
960       if (!insn_op->predicate (op[arity], insn_op->mode)
961 	  && (GET_MODE (op[arity]) == insn_op->mode
962 	      || GET_MODE (op[arity]) == VOIDmode
963 	      || (insn_op->predicate == address_operand
964 		  && GET_MODE (op[arity]) == Pmode)))
965 	{
966 	  /* An address_operand usually has VOIDmode in the expander
967 	     so we cannot use this.  */
968 	  machine_mode target_mode =
969 	    (insn_op->predicate == address_operand
970 	     ? (machine_mode) Pmode : insn_op->mode);
971 	  op[arity] = copy_to_mode_reg (target_mode, op[arity]);
972 	}
973 
974       if (!insn_op->predicate (op[arity], insn_op->mode))
975 	{
976 	  error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
977 	  return const0_rtx;
978 	}
979       arity++;
980     }
981 
982   switch (arity)
983     {
984     case 0:
985       pat = GEN_FCN (icode) (target);
986       break;
987     case 1:
988       if (nonvoid)
989 	pat = GEN_FCN (icode) (target, op[0]);
990       else
991 	pat = GEN_FCN (icode) (op[0]);
992       break;
993     case 2:
994       if (nonvoid)
995 	pat = GEN_FCN (icode) (target, op[0], op[1]);
996       else
997 	pat = GEN_FCN (icode) (op[0], op[1]);
998       break;
999     case 3:
1000       if (nonvoid)
1001 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1002       else
1003 	pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1004       break;
1005     case 4:
1006       if (nonvoid)
1007 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1008       else
1009 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1010       break;
1011     case 5:
1012       if (nonvoid)
1013 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1014       else
1015 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1016       break;
1017     case 6:
1018       if (nonvoid)
1019 	pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1020       else
1021 	pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1022       break;
1023     default:
1024       gcc_unreachable ();
1025     }
1026   if (!pat)
1027     return NULL_RTX;
1028   emit_insn (pat);
1029 
1030   if (nonvoid)
1031     return target;
1032   else
1033     return const0_rtx;
1034 }
1035 
1036 
1037 static const int s390_hotpatch_hw_max = 1000000;
1038 static int s390_hotpatch_hw_before_label = 0;
1039 static int s390_hotpatch_hw_after_label = 0;
1040 
1041 /* Check whether the hotpatch attribute is applied to a function and, if it has
1042    an argument, the argument is valid.  */
1043 
1044 static tree
s390_handle_hotpatch_attribute(tree * node,tree name,tree args,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1045 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1046 				int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1047 {
1048   tree expr;
1049   tree expr2;
1050   int err;
1051 
1052   if (TREE_CODE (*node) != FUNCTION_DECL)
1053     {
1054       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1055 	       name);
1056       *no_add_attrs = true;
1057     }
1058   if (args != NULL && TREE_CHAIN (args) != NULL)
1059     {
1060       expr = TREE_VALUE (args);
1061       expr2 = TREE_VALUE (TREE_CHAIN (args));
1062     }
1063   if (args == NULL || TREE_CHAIN (args) == NULL)
1064     err = 1;
1065   else if (TREE_CODE (expr) != INTEGER_CST
1066 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1067 	   || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1068     err = 1;
1069   else if (TREE_CODE (expr2) != INTEGER_CST
1070 	   || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1071 	   || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1072     err = 1;
1073   else
1074     err = 0;
1075   if (err)
1076     {
1077       error ("requested %qE attribute is not a comma separated pair of"
1078 	     " non-negative integer constants or too large (max. %d)", name,
1079 	     s390_hotpatch_hw_max);
1080       *no_add_attrs = true;
1081     }
1082 
1083   return NULL_TREE;
1084 }
1085 
1086 /* Expand the s390_vector_bool type attribute.  */
1087 
1088 static tree
s390_handle_vectorbool_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1089 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1090 				  tree args ATTRIBUTE_UNUSED,
1091 				  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1092 {
1093   tree type = *node, result = NULL_TREE;
1094   machine_mode mode;
1095 
1096   while (POINTER_TYPE_P (type)
1097 	 || TREE_CODE (type) == FUNCTION_TYPE
1098 	 || TREE_CODE (type) == METHOD_TYPE
1099 	 || TREE_CODE (type) == ARRAY_TYPE)
1100     type = TREE_TYPE (type);
1101 
1102   mode = TYPE_MODE (type);
1103   switch (mode)
1104     {
1105     case E_DImode: case E_V2DImode:
1106       result = s390_builtin_types[BT_BV2DI];
1107       break;
1108     case E_SImode: case E_V4SImode:
1109       result = s390_builtin_types[BT_BV4SI];
1110       break;
1111     case E_HImode: case E_V8HImode:
1112       result = s390_builtin_types[BT_BV8HI];
1113       break;
1114     case E_QImode: case E_V16QImode:
1115       result = s390_builtin_types[BT_BV16QI];
1116       break;
1117     default:
1118       break;
1119     }
1120 
1121   *no_add_attrs = true;  /* No need to hang on to the attribute.  */
1122 
1123   if (result)
1124     *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1125 
1126   return NULL_TREE;
1127 }
1128 
1129 /* Check syntax of function decl attributes having a string type value.  */
1130 
1131 static tree
s390_handle_string_attribute(tree * node,tree name ATTRIBUTE_UNUSED,tree args ATTRIBUTE_UNUSED,int flags ATTRIBUTE_UNUSED,bool * no_add_attrs)1132 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 			      tree args ATTRIBUTE_UNUSED,
1134 			      int flags ATTRIBUTE_UNUSED,
1135 			      bool *no_add_attrs)
1136 {
1137   tree cst;
1138 
1139   if (TREE_CODE (*node) != FUNCTION_DECL)
1140     {
1141       warning (OPT_Wattributes, "%qE attribute only applies to functions",
1142 	       name);
1143       *no_add_attrs = true;
1144     }
1145 
1146   cst = TREE_VALUE (args);
1147 
1148   if (TREE_CODE (cst) != STRING_CST)
1149     {
1150       warning (OPT_Wattributes,
1151 	       "%qE attribute requires a string constant argument",
1152 	       name);
1153       *no_add_attrs = true;
1154     }
1155 
1156   if (is_attribute_p ("indirect_branch", name)
1157       || is_attribute_p ("indirect_branch_call", name)
1158       || is_attribute_p ("function_return", name)
1159       || is_attribute_p ("function_return_reg", name)
1160       || is_attribute_p ("function_return_mem", name))
1161     {
1162       if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1163 	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1164 	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1165       {
1166 	warning (OPT_Wattributes,
1167 		 "argument to %qE attribute is not "
1168 		 "(keep|thunk|thunk-extern)", name);
1169 	*no_add_attrs = true;
1170       }
1171     }
1172 
1173   if (is_attribute_p ("indirect_branch_jump", name)
1174       && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1175       && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1176       && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1177       && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1178     {
1179       warning (OPT_Wattributes,
1180 	       "argument to %qE attribute is not "
1181 	       "(keep|thunk|thunk-inline|thunk-extern)", name);
1182       *no_add_attrs = true;
1183     }
1184 
1185   return NULL_TREE;
1186 }
1187 
1188 static const struct attribute_spec s390_attribute_table[] = {
1189   { "hotpatch", 2, 2, true, false, false, false,
1190     s390_handle_hotpatch_attribute, NULL },
1191   { "s390_vector_bool", 0, 0, false, true, false, true,
1192     s390_handle_vectorbool_attribute, NULL },
1193   { "indirect_branch", 1, 1, true, false, false, false,
1194     s390_handle_string_attribute, NULL },
1195   { "indirect_branch_jump", 1, 1, true, false, false, false,
1196     s390_handle_string_attribute, NULL },
1197   { "indirect_branch_call", 1, 1, true, false, false, false,
1198     s390_handle_string_attribute, NULL },
1199   { "function_return", 1, 1, true, false, false, false,
1200     s390_handle_string_attribute, NULL },
1201   { "function_return_reg", 1, 1, true, false, false, false,
1202     s390_handle_string_attribute, NULL },
1203   { "function_return_mem", 1, 1, true, false, false, false,
1204     s390_handle_string_attribute, NULL },
1205 
1206   /* End element.  */
1207   { NULL,        0, 0, false, false, false, false, NULL, NULL }
1208 };
1209 
1210 /* Return the alignment for LABEL.  We default to the -falign-labels
1211    value except for the literal pool base label.  */
1212 int
s390_label_align(rtx_insn * label)1213 s390_label_align (rtx_insn *label)
1214 {
1215   rtx_insn *prev_insn = prev_active_insn (label);
1216   rtx set, src;
1217 
1218   if (prev_insn == NULL_RTX)
1219     goto old;
1220 
1221   set = single_set (prev_insn);
1222 
1223   if (set == NULL_RTX)
1224     goto old;
1225 
1226   src = SET_SRC (set);
1227 
1228   /* Don't align literal pool base labels.  */
1229   if (GET_CODE (src) == UNSPEC
1230       && XINT (src, 1) == UNSPEC_MAIN_BASE)
1231     return 0;
1232 
1233  old:
1234   return align_labels.levels[0].log;
1235 }
1236 
1237 static GTY(()) rtx got_symbol;
1238 
1239 /* Return the GOT table symbol.  The symbol will be created when the
1240    function is invoked for the first time.  */
1241 
1242 static rtx
s390_got_symbol(void)1243 s390_got_symbol (void)
1244 {
1245   if (!got_symbol)
1246     {
1247       got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1248       SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1249     }
1250 
1251   return got_symbol;
1252 }
1253 
1254 static scalar_int_mode
s390_libgcc_cmp_return_mode(void)1255 s390_libgcc_cmp_return_mode (void)
1256 {
1257   return TARGET_64BIT ? DImode : SImode;
1258 }
1259 
1260 static scalar_int_mode
s390_libgcc_shift_count_mode(void)1261 s390_libgcc_shift_count_mode (void)
1262 {
1263   return TARGET_64BIT ? DImode : SImode;
1264 }
1265 
1266 static scalar_int_mode
s390_unwind_word_mode(void)1267 s390_unwind_word_mode (void)
1268 {
1269   return TARGET_64BIT ? DImode : SImode;
1270 }
1271 
1272 /* Return true if the back end supports mode MODE.  */
1273 static bool
s390_scalar_mode_supported_p(scalar_mode mode)1274 s390_scalar_mode_supported_p (scalar_mode mode)
1275 {
1276   /* In contrast to the default implementation reject TImode constants on 31bit
1277      TARGET_ZARCH for ABI compliance.  */
1278   if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1279     return false;
1280 
1281   if (DECIMAL_FLOAT_MODE_P (mode))
1282     return default_decimal_float_supported_p ();
1283 
1284   return default_scalar_mode_supported_p (mode);
1285 }
1286 
1287 /* Return true if the back end supports vector mode MODE.  */
1288 static bool
s390_vector_mode_supported_p(machine_mode mode)1289 s390_vector_mode_supported_p (machine_mode mode)
1290 {
1291   machine_mode inner;
1292 
1293   if (!VECTOR_MODE_P (mode)
1294       || !TARGET_VX
1295       || GET_MODE_SIZE (mode) > 16)
1296     return false;
1297 
1298   inner = GET_MODE_INNER (mode);
1299 
1300   switch (inner)
1301     {
1302     case E_QImode:
1303     case E_HImode:
1304     case E_SImode:
1305     case E_DImode:
1306     case E_TImode:
1307     case E_SFmode:
1308     case E_DFmode:
1309     case E_TFmode:
1310       return true;
1311     default:
1312       return false;
1313     }
1314 }
1315 
1316 /* Set the has_landing_pad_p flag in struct machine_function to VALUE.  */
1317 
1318 void
s390_set_has_landing_pad_p(bool value)1319 s390_set_has_landing_pad_p (bool value)
1320 {
1321   cfun->machine->has_landing_pad_p = value;
1322 }
1323 
1324 /* If two condition code modes are compatible, return a condition code
1325    mode which is compatible with both.  Otherwise, return
1326    VOIDmode.  */
1327 
1328 static machine_mode
s390_cc_modes_compatible(machine_mode m1,machine_mode m2)1329 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1330 {
1331   if (m1 == m2)
1332     return m1;
1333 
1334   switch (m1)
1335     {
1336     case E_CCZmode:
1337       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1338 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1339 	return m2;
1340       return VOIDmode;
1341 
1342     case E_CCSmode:
1343     case E_CCUmode:
1344     case E_CCTmode:
1345     case E_CCSRmode:
1346     case E_CCURmode:
1347     case E_CCZ1mode:
1348       if (m2 == CCZmode)
1349 	return m1;
1350 
1351       return VOIDmode;
1352 
1353     default:
1354       return VOIDmode;
1355     }
1356   return VOIDmode;
1357 }
1358 
1359 /* Return true if SET either doesn't set the CC register, or else
1360    the source and destination have matching CC modes and that
1361    CC mode is at least as constrained as REQ_MODE.  */
1362 
1363 static bool
s390_match_ccmode_set(rtx set,machine_mode req_mode)1364 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1365 {
1366   machine_mode set_mode;
1367 
1368   gcc_assert (GET_CODE (set) == SET);
1369 
1370   /* These modes are supposed to be used only in CC consumer
1371      patterns.  */
1372   gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1373 	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1374 
1375   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1376     return 1;
1377 
1378   set_mode = GET_MODE (SET_DEST (set));
1379   switch (set_mode)
1380     {
1381     case E_CCZ1mode:
1382     case E_CCSmode:
1383     case E_CCSRmode:
1384     case E_CCSFPSmode:
1385     case E_CCUmode:
1386     case E_CCURmode:
1387     case E_CCOmode:
1388     case E_CCLmode:
1389     case E_CCL1mode:
1390     case E_CCL2mode:
1391     case E_CCL3mode:
1392     case E_CCT1mode:
1393     case E_CCT2mode:
1394     case E_CCT3mode:
1395     case E_CCVEQmode:
1396     case E_CCVIHmode:
1397     case E_CCVIHUmode:
1398     case E_CCVFHmode:
1399     case E_CCVFHEmode:
1400       if (req_mode != set_mode)
1401 	return 0;
1402       break;
1403 
1404     case E_CCZmode:
1405       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1406 	  && req_mode != CCSRmode && req_mode != CCURmode
1407 	  && req_mode != CCZ1mode)
1408 	return 0;
1409       break;
1410 
1411     case E_CCAPmode:
1412     case E_CCANmode:
1413       if (req_mode != CCAmode)
1414 	return 0;
1415       break;
1416 
1417     default:
1418       gcc_unreachable ();
1419     }
1420 
1421   return (GET_MODE (SET_SRC (set)) == set_mode);
1422 }
1423 
1424 /* Return true if every SET in INSN that sets the CC register
1425    has source and destination with matching CC modes and that
1426    CC mode is at least as constrained as REQ_MODE.
1427    If REQ_MODE is VOIDmode, always return false.  */
1428 
1429 bool
s390_match_ccmode(rtx_insn * insn,machine_mode req_mode)1430 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1431 {
1432   int i;
1433 
1434   /* s390_tm_ccmode returns VOIDmode to indicate failure.  */
1435   if (req_mode == VOIDmode)
1436     return false;
1437 
1438   if (GET_CODE (PATTERN (insn)) == SET)
1439     return s390_match_ccmode_set (PATTERN (insn), req_mode);
1440 
1441   if (GET_CODE (PATTERN (insn)) == PARALLEL)
1442       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1443 	{
1444 	  rtx set = XVECEXP (PATTERN (insn), 0, i);
1445 	  if (GET_CODE (set) == SET)
1446 	    if (!s390_match_ccmode_set (set, req_mode))
1447 	      return false;
1448 	}
1449 
1450   return true;
1451 }
1452 
1453 /* If a test-under-mask instruction can be used to implement
1454    (compare (and ... OP1) OP2), return the CC mode required
1455    to do that.  Otherwise, return VOIDmode.
1456    MIXED is true if the instruction can distinguish between
1457    CC1 and CC2 for mixed selected bits (TMxx), it is false
1458    if the instruction cannot (TM).  */
1459 
1460 machine_mode
s390_tm_ccmode(rtx op1,rtx op2,bool mixed)1461 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1462 {
1463   int bit0, bit1;
1464 
1465   /* ??? Fixme: should work on CONST_WIDE_INT as well.  */
1466   if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1467     return VOIDmode;
1468 
1469   /* Selected bits all zero: CC0.
1470      e.g.: int a; if ((a & (16 + 128)) == 0) */
1471   if (INTVAL (op2) == 0)
1472     return CCTmode;
1473 
1474   /* Selected bits all one: CC3.
1475      e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1476   if (INTVAL (op2) == INTVAL (op1))
1477     return CCT3mode;
1478 
1479   /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1480      int a;
1481      if ((a & (16 + 128)) == 16)         -> CCT1
1482      if ((a & (16 + 128)) == 128)        -> CCT2  */
1483   if (mixed)
1484     {
1485       bit1 = exact_log2 (INTVAL (op2));
1486       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1487       if (bit0 != -1 && bit1 != -1)
1488 	return bit0 > bit1 ? CCT1mode : CCT2mode;
1489     }
1490 
1491   return VOIDmode;
1492 }
1493 
1494 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1495    OP0 and OP1 of a COMPARE, return the mode to be used for the
1496    comparison.  */
1497 
1498 machine_mode
s390_select_ccmode(enum rtx_code code,rtx op0,rtx op1)1499 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1500 {
1501   switch (code)
1502     {
1503       case EQ:
1504       case NE:
1505 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1506 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1507 	  return CCAPmode;
1508 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1509 	    && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1510 	  return CCAPmode;
1511 	if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1512 	     || GET_CODE (op1) == NEG)
1513 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1514 	  return CCLmode;
1515 
1516 	if (GET_CODE (op0) == AND)
1517 	  {
1518 	    /* Check whether we can potentially do it via TM.  */
1519 	    machine_mode ccmode;
1520 	    ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1521 	    if (ccmode != VOIDmode)
1522 	      {
1523 		/* Relax CCTmode to CCZmode to allow fall-back to AND
1524 		   if that turns out to be beneficial.  */
1525 		return ccmode == CCTmode ? CCZmode : ccmode;
1526 	      }
1527 	  }
1528 
1529 	if (register_operand (op0, HImode)
1530 	    && GET_CODE (op1) == CONST_INT
1531 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1532 	  return CCT3mode;
1533 	if (register_operand (op0, QImode)
1534 	    && GET_CODE (op1) == CONST_INT
1535 	    && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1536 	  return CCT3mode;
1537 
1538 	return CCZmode;
1539 
1540       case LE:
1541       case LT:
1542       case GE:
1543       case GT:
1544 	/* The only overflow condition of NEG and ABS happens when
1545 	   -INT_MAX is used as parameter, which stays negative. So
1546 	   we have an overflow from a positive value to a negative.
1547 	   Using CCAP mode the resulting cc can be used for comparisons.  */
1548 	if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 	  return CCAPmode;
1551 
1552 	/* If constants are involved in an add instruction it is possible to use
1553 	   the resulting cc for comparisons with zero. Knowing the sign of the
1554 	   constant the overflow behavior gets predictable. e.g.:
1555 	     int a, b; if ((b = a + c) > 0)
1556 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
1557 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1558 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1559 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1560 		    /* Avoid INT32_MIN on 32 bit.  */
1561 		    && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1562 	  {
1563 	    if (INTVAL (XEXP((op0), 1)) < 0)
1564 	      return CCANmode;
1565 	    else
1566 	      return CCAPmode;
1567 	  }
1568 
1569 	/* Fall through.  */
1570       case LTGT:
1571 	if (HONOR_NANS (op0) || HONOR_NANS (op1))
1572 	  return CCSFPSmode;
1573 
1574 	/* Fall through.  */
1575       case UNORDERED:
1576       case ORDERED:
1577       case UNEQ:
1578       case UNLE:
1579       case UNLT:
1580       case UNGE:
1581       case UNGT:
1582 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1583 	    && GET_CODE (op1) != CONST_INT)
1584 	  return CCSRmode;
1585 	return CCSmode;
1586 
1587       case LTU:
1588       case GEU:
1589 	if (GET_CODE (op0) == PLUS
1590 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1591 	  return CCL1mode;
1592 
1593 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1594 	    && GET_CODE (op1) != CONST_INT)
1595 	  return CCURmode;
1596 	return CCUmode;
1597 
1598       case LEU:
1599       case GTU:
1600 	if (GET_CODE (op0) == MINUS
1601 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1602 	  return CCL2mode;
1603 
1604 	if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1605 	    && GET_CODE (op1) != CONST_INT)
1606 	  return CCURmode;
1607 	return CCUmode;
1608 
1609       default:
1610 	gcc_unreachable ();
1611     }
1612 }
1613 
1614 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1615    that we can implement more efficiently.  */
1616 
1617 static void
s390_canonicalize_comparison(int * code,rtx * op0,rtx * op1,bool op0_preserve_value)1618 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1619 			      bool op0_preserve_value)
1620 {
1621   if (op0_preserve_value)
1622     return;
1623 
1624   /* Convert ZERO_EXTRACT back to AND to enable TM patterns.  */
1625   if ((*code == EQ || *code == NE)
1626       && *op1 == const0_rtx
1627       && GET_CODE (*op0) == ZERO_EXTRACT
1628       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1629       && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1630       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1631     {
1632       rtx inner = XEXP (*op0, 0);
1633       HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1634       HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1635       HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1636 
1637       if (len > 0 && len < modesize
1638 	  && pos >= 0 && pos + len <= modesize
1639 	  && modesize <= HOST_BITS_PER_WIDE_INT)
1640 	{
1641 	  unsigned HOST_WIDE_INT block;
1642 	  block = (HOST_WIDE_INT_1U << len) - 1;
1643 	  block <<= modesize - pos - len;
1644 
1645 	  *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1646 			      gen_int_mode (block, GET_MODE (inner)));
1647 	}
1648     }
1649 
1650   /* Narrow AND of memory against immediate to enable TM.  */
1651   if ((*code == EQ || *code == NE)
1652       && *op1 == const0_rtx
1653       && GET_CODE (*op0) == AND
1654       && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1655       && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1656     {
1657       rtx inner = XEXP (*op0, 0);
1658       rtx mask = XEXP (*op0, 1);
1659 
1660       /* Ignore paradoxical SUBREGs if all extra bits are masked out.  */
1661       if (GET_CODE (inner) == SUBREG
1662 	  && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1663 	  && (GET_MODE_SIZE (GET_MODE (inner))
1664 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1665 	  && ((INTVAL (mask)
1666 	       & GET_MODE_MASK (GET_MODE (inner))
1667 	       & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1668 	      == 0))
1669 	inner = SUBREG_REG (inner);
1670 
1671       /* Do not change volatile MEMs.  */
1672       if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1673 	{
1674 	  int part = s390_single_part (XEXP (*op0, 1),
1675 				       GET_MODE (inner), QImode, 0);
1676 	  if (part >= 0)
1677 	    {
1678 	      mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1679 	      inner = adjust_address_nv (inner, QImode, part);
1680 	      *op0 = gen_rtx_AND (QImode, inner, mask);
1681 	    }
1682 	}
1683     }
1684 
1685   /* Narrow comparisons against 0xffff to HImode if possible.  */
1686   if ((*code == EQ || *code == NE)
1687       && GET_CODE (*op1) == CONST_INT
1688       && INTVAL (*op1) == 0xffff
1689       && SCALAR_INT_MODE_P (GET_MODE (*op0))
1690       && (nonzero_bits (*op0, GET_MODE (*op0))
1691 	  & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1692     {
1693       *op0 = gen_lowpart (HImode, *op0);
1694       *op1 = constm1_rtx;
1695     }
1696 
1697   /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible.  */
1698   if (GET_CODE (*op0) == UNSPEC
1699       && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1700       && XVECLEN (*op0, 0) == 1
1701       && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1702       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1703       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1704       && *op1 == const0_rtx)
1705     {
1706       enum rtx_code new_code = UNKNOWN;
1707       switch (*code)
1708 	{
1709 	  case EQ: new_code = EQ;  break;
1710 	  case NE: new_code = NE;  break;
1711 	  case LT: new_code = GTU; break;
1712 	  case GT: new_code = LTU; break;
1713 	  case LE: new_code = GEU; break;
1714 	  case GE: new_code = LEU; break;
1715 	  default: break;
1716 	}
1717 
1718       if (new_code != UNKNOWN)
1719 	{
1720 	  *op0 = XVECEXP (*op0, 0, 0);
1721 	  *code = new_code;
1722 	}
1723     }
1724 
1725   /* Remove redundant UNSPEC_CC_TO_INT conversions if possible.  */
1726   if (GET_CODE (*op0) == UNSPEC
1727       && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1728       && XVECLEN (*op0, 0) == 1
1729       && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1730       && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1731       && CONST_INT_P (*op1))
1732     {
1733       enum rtx_code new_code = UNKNOWN;
1734       switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1735 	{
1736 	case E_CCZmode:
1737 	case E_CCRAWmode:
1738 	  switch (*code)
1739 	    {
1740 	    case EQ: new_code = EQ;  break;
1741 	    case NE: new_code = NE;  break;
1742 	    default: break;
1743 	    }
1744 	  break;
1745 	default: break;
1746 	}
1747 
1748       if (new_code != UNKNOWN)
1749 	{
1750 	  /* For CCRAWmode put the required cc mask into the second
1751 	     operand.  */
1752 	if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1753 	    && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1754 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1755 	  *op0 = XVECEXP (*op0, 0, 0);
1756 	  *code = new_code;
1757 	}
1758     }
1759 
1760   /* Simplify cascaded EQ, NE with const0_rtx.  */
1761   if ((*code == NE || *code == EQ)
1762       && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1763       && GET_MODE (*op0) == SImode
1764       && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1765       && REG_P (XEXP (*op0, 0))
1766       && XEXP (*op0, 1) == const0_rtx
1767       && *op1 == const0_rtx)
1768     {
1769       if ((*code == EQ && GET_CODE (*op0) == NE)
1770 	  || (*code == NE && GET_CODE (*op0) == EQ))
1771 	*code = EQ;
1772       else
1773 	*code = NE;
1774       *op0 = XEXP (*op0, 0);
1775     }
1776 
1777   /* Prefer register over memory as first operand.  */
1778   if (MEM_P (*op0) && REG_P (*op1))
1779     {
1780       rtx tem = *op0; *op0 = *op1; *op1 = tem;
1781       *code = (int)swap_condition ((enum rtx_code)*code);
1782     }
1783 
1784   /* A comparison result is compared against zero.  Replace it with
1785      the (perhaps inverted) original comparison.
1786      This probably should be done by simplify_relational_operation.  */
1787   if ((*code == EQ || *code == NE)
1788       && *op1 == const0_rtx
1789       && COMPARISON_P (*op0)
1790       && CC_REG_P (XEXP (*op0, 0)))
1791     {
1792       enum rtx_code new_code;
1793 
1794       if (*code == EQ)
1795 	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1796 						   XEXP (*op0, 0),
1797 						   XEXP (*op0, 1), NULL);
1798       else
1799 	new_code = GET_CODE (*op0);
1800 
1801       if (new_code != UNKNOWN)
1802 	{
1803 	  *code = new_code;
1804 	  *op1 = XEXP (*op0, 1);
1805 	  *op0 = XEXP (*op0, 0);
1806 	}
1807     }
1808 
1809   /* ~a==b -> ~(a^b)==0   ~a!=b -> ~(a^b)!=0 */
1810   if (TARGET_Z15
1811       && (*code == EQ || *code == NE)
1812       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1813       && GET_CODE (*op0) == NOT)
1814     {
1815       machine_mode mode = GET_MODE (*op0);
1816       *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1817       *op0 = gen_rtx_NOT (mode, *op0);
1818       *op1 = const0_rtx;
1819     }
1820 
1821   /* a&b == -1 -> ~a|~b == 0    a|b == -1 -> ~a&~b == 0  */
1822   if (TARGET_Z15
1823       && (*code == EQ || *code == NE)
1824       && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1825       && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1826       && CONST_INT_P (*op1)
1827       && *op1 == constm1_rtx)
1828     {
1829       machine_mode mode = GET_MODE (*op0);
1830       rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1831       rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1832 
1833       if (GET_CODE (*op0) == AND)
1834 	*op0 = gen_rtx_IOR (mode, op00, op01);
1835       else
1836 	*op0 = gen_rtx_AND (mode, op00, op01);
1837 
1838       *op1 = const0_rtx;
1839     }
1840 }
1841 
1842 
1843 /* Emit a compare instruction suitable to implement the comparison
1844    OP0 CODE OP1.  Return the correct condition RTL to be placed in
1845    the IF_THEN_ELSE of the conditional branch testing the result.  */
1846 
1847 rtx
s390_emit_compare(enum rtx_code code,rtx op0,rtx op1)1848 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1849 {
1850   machine_mode mode = s390_select_ccmode (code, op0, op1);
1851   rtx cc;
1852 
1853   if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1854     {
1855       /* Do not output a redundant compare instruction if a
1856 	 compare_and_swap pattern already computed the result and the
1857 	 machine modes are compatible.  */
1858       gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1859 		  == GET_MODE (op0));
1860       cc = op0;
1861     }
1862   else
1863     {
1864       cc = gen_rtx_REG (mode, CC_REGNUM);
1865       emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1866     }
1867 
1868   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1869 }
1870 
1871 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1872    MEM, whose address is a pseudo containing the original MEM's address.  */
1873 
1874 static rtx
s390_legitimize_cs_operand(rtx mem)1875 s390_legitimize_cs_operand (rtx mem)
1876 {
1877   rtx tmp;
1878 
1879   if (!contains_symbol_ref_p (mem))
1880     return mem;
1881   tmp = gen_reg_rtx (Pmode);
1882   emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1883   return change_address (mem, VOIDmode, tmp);
1884 }
1885 
1886 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1887    matches CMP.
1888    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1889    conditional branch testing the result.  */
1890 
1891 static rtx
s390_emit_compare_and_swap(enum rtx_code code,rtx old,rtx mem,rtx cmp,rtx new_rtx,machine_mode ccmode)1892 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1893 			    rtx cmp, rtx new_rtx, machine_mode ccmode)
1894 {
1895   rtx cc;
1896 
1897   mem = s390_legitimize_cs_operand (mem);
1898   cc = gen_rtx_REG (ccmode, CC_REGNUM);
1899   switch (GET_MODE (mem))
1900     {
1901     case E_SImode:
1902       emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1903 							 new_rtx, cc));
1904       break;
1905     case E_DImode:
1906       emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1907 							 new_rtx, cc));
1908       break;
1909     case E_TImode:
1910 	emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1911 							   new_rtx, cc));
1912       break;
1913     case E_QImode:
1914     case E_HImode:
1915     default:
1916       gcc_unreachable ();
1917     }
1918   return s390_emit_compare (code, cc, const0_rtx);
1919 }
1920 
1921 /* Emit a jump instruction to TARGET and return it.  If COND is
1922    NULL_RTX, emit an unconditional jump, else a conditional jump under
1923    condition COND.  */
1924 
1925 rtx_insn *
s390_emit_jump(rtx target,rtx cond)1926 s390_emit_jump (rtx target, rtx cond)
1927 {
1928   rtx insn;
1929 
1930   target = gen_rtx_LABEL_REF (VOIDmode, target);
1931   if (cond)
1932     target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1933 
1934   insn = gen_rtx_SET (pc_rtx, target);
1935   return emit_jump_insn (insn);
1936 }
1937 
1938 /* Return branch condition mask to implement a branch
1939    specified by CODE.  Return -1 for invalid comparisons.  */
1940 
1941 int
s390_branch_condition_mask(rtx code)1942 s390_branch_condition_mask (rtx code)
1943 {
1944   const int CC0 = 1 << 3;
1945   const int CC1 = 1 << 2;
1946   const int CC2 = 1 << 1;
1947   const int CC3 = 1 << 0;
1948 
1949   gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1950   gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1951   gcc_assert (XEXP (code, 1) == const0_rtx
1952 	      || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1953 		  && CONST_INT_P (XEXP (code, 1))));
1954 
1955 
1956   switch (GET_MODE (XEXP (code, 0)))
1957     {
1958     case E_CCZmode:
1959     case E_CCZ1mode:
1960       switch (GET_CODE (code))
1961 	{
1962 	case EQ:	return CC0;
1963 	case NE:	return CC1 | CC2 | CC3;
1964 	default:	return -1;
1965 	}
1966       break;
1967 
1968     case E_CCT1mode:
1969       switch (GET_CODE (code))
1970 	{
1971 	case EQ:	return CC1;
1972 	case NE:	return CC0 | CC2 | CC3;
1973 	default:	return -1;
1974 	}
1975       break;
1976 
1977     case E_CCT2mode:
1978       switch (GET_CODE (code))
1979 	{
1980 	case EQ:	return CC2;
1981 	case NE:	return CC0 | CC1 | CC3;
1982 	default:	return -1;
1983 	}
1984       break;
1985 
1986     case E_CCT3mode:
1987       switch (GET_CODE (code))
1988 	{
1989 	case EQ:	return CC3;
1990 	case NE:	return CC0 | CC1 | CC2;
1991 	default:	return -1;
1992 	}
1993       break;
1994 
1995     case E_CCLmode:
1996       switch (GET_CODE (code))
1997 	{
1998 	case EQ:	return CC0 | CC2;
1999 	case NE:	return CC1 | CC3;
2000 	default:	return -1;
2001 	}
2002       break;
2003 
2004     case E_CCL1mode:
2005       switch (GET_CODE (code))
2006 	{
2007 	case LTU:	return CC2 | CC3;  /* carry */
2008 	case GEU:	return CC0 | CC1;  /* no carry */
2009 	default:	return -1;
2010 	}
2011       break;
2012 
2013     case E_CCL2mode:
2014       switch (GET_CODE (code))
2015 	{
2016 	case GTU:	return CC0 | CC1;  /* borrow */
2017 	case LEU:	return CC2 | CC3;  /* no borrow */
2018 	default:	return -1;
2019 	}
2020       break;
2021 
2022     case E_CCL3mode:
2023       switch (GET_CODE (code))
2024 	{
2025 	case EQ:	return CC0 | CC2;
2026 	case NE:	return CC1 | CC3;
2027 	case LTU:	return CC1;
2028 	case GTU:	return CC3;
2029 	case LEU:	return CC1 | CC2;
2030 	case GEU:	return CC2 | CC3;
2031 	default:	return -1;
2032 	}
2033 
2034     case E_CCUmode:
2035       switch (GET_CODE (code))
2036 	{
2037 	case EQ:	return CC0;
2038 	case NE:	return CC1 | CC2 | CC3;
2039 	case LTU:	return CC1;
2040 	case GTU:	return CC2;
2041 	case LEU:	return CC0 | CC1;
2042 	case GEU:	return CC0 | CC2;
2043 	default:	return -1;
2044 	}
2045       break;
2046 
2047     case E_CCURmode:
2048       switch (GET_CODE (code))
2049 	{
2050 	case EQ:	return CC0;
2051 	case NE:	return CC2 | CC1 | CC3;
2052 	case LTU:	return CC2;
2053 	case GTU:	return CC1;
2054 	case LEU:	return CC0 | CC2;
2055 	case GEU:	return CC0 | CC1;
2056 	default:	return -1;
2057 	}
2058       break;
2059 
2060     case E_CCAPmode:
2061       switch (GET_CODE (code))
2062 	{
2063 	case EQ:	return CC0;
2064 	case NE:	return CC1 | CC2 | CC3;
2065 	case LT:	return CC1 | CC3;
2066 	case GT:	return CC2;
2067 	case LE:	return CC0 | CC1 | CC3;
2068 	case GE:	return CC0 | CC2;
2069 	default:	return -1;
2070 	}
2071       break;
2072 
2073     case E_CCANmode:
2074       switch (GET_CODE (code))
2075 	{
2076 	case EQ:	return CC0;
2077 	case NE:	return CC1 | CC2 | CC3;
2078 	case LT:	return CC1;
2079 	case GT:	return CC2 | CC3;
2080 	case LE:	return CC0 | CC1;
2081 	case GE:	return CC0 | CC2 | CC3;
2082 	default:	return -1;
2083 	}
2084       break;
2085 
2086     case E_CCOmode:
2087       switch (GET_CODE (code))
2088 	{
2089 	case EQ:	return CC0 | CC1 | CC2;
2090 	case NE:	return CC3;
2091 	default:	return -1;
2092 	}
2093       break;
2094 
2095     case E_CCSmode:
2096     case E_CCSFPSmode:
2097       switch (GET_CODE (code))
2098 	{
2099 	case EQ:	return CC0;
2100 	case NE:	return CC1 | CC2 | CC3;
2101 	case LT:	return CC1;
2102 	case GT:	return CC2;
2103 	case LE:	return CC0 | CC1;
2104 	case GE:	return CC0 | CC2;
2105 	case UNORDERED:	return CC3;
2106 	case ORDERED:	return CC0 | CC1 | CC2;
2107 	case UNEQ:	return CC0 | CC3;
2108 	case UNLT:	return CC1 | CC3;
2109 	case UNGT:	return CC2 | CC3;
2110 	case UNLE:	return CC0 | CC1 | CC3;
2111 	case UNGE:	return CC0 | CC2 | CC3;
2112 	case LTGT:	return CC1 | CC2;
2113 	default:	return -1;
2114 	}
2115       break;
2116 
2117     case E_CCSRmode:
2118       switch (GET_CODE (code))
2119 	{
2120 	case EQ:	return CC0;
2121 	case NE:	return CC2 | CC1 | CC3;
2122 	case LT:	return CC2;
2123 	case GT:	return CC1;
2124 	case LE:	return CC0 | CC2;
2125 	case GE:	return CC0 | CC1;
2126 	case UNORDERED:	return CC3;
2127 	case ORDERED:	return CC0 | CC2 | CC1;
2128 	case UNEQ:	return CC0 | CC3;
2129 	case UNLT:	return CC2 | CC3;
2130 	case UNGT:	return CC1 | CC3;
2131 	case UNLE:	return CC0 | CC2 | CC3;
2132 	case UNGE:	return CC0 | CC1 | CC3;
2133 	case LTGT:	return CC2 | CC1;
2134 	default:	return -1;
2135 	}
2136       break;
2137 
2138       /* Vector comparison modes.  */
2139       /* CC2 will never be set.  It however is part of the negated
2140 	 masks.  */
2141     case E_CCVIALLmode:
2142       switch (GET_CODE (code))
2143 	{
2144 	case EQ:
2145 	case GTU:
2146 	case GT:
2147 	case GE:        return CC0;
2148 	  /* The inverted modes are in fact *any* modes.  */
2149 	case NE:
2150 	case LEU:
2151 	case LE:
2152 	case LT:        return CC3 | CC1 | CC2;
2153 	default:        return -1;
2154 	}
2155 
2156     case E_CCVIANYmode:
2157       switch (GET_CODE (code))
2158 	{
2159 	case EQ:
2160 	case GTU:
2161 	case GT:
2162 	case GE:        return CC0 | CC1;
2163 	  /* The inverted modes are in fact *all* modes.  */
2164 	case NE:
2165 	case LEU:
2166 	case LE:
2167 	case LT:        return CC3 | CC2;
2168 	default:        return -1;
2169 	}
2170     case E_CCVFALLmode:
2171       switch (GET_CODE (code))
2172 	{
2173 	case EQ:
2174 	case GT:
2175 	case GE:        return CC0;
2176 	  /* The inverted modes are in fact *any* modes.  */
2177 	case NE:
2178 	case UNLE:
2179 	case UNLT:      return CC3 | CC1 | CC2;
2180 	default:        return -1;
2181 	}
2182 
2183     case E_CCVFANYmode:
2184       switch (GET_CODE (code))
2185 	{
2186 	case EQ:
2187 	case GT:
2188 	case GE:        return CC0 | CC1;
2189 	  /* The inverted modes are in fact *all* modes.  */
2190 	case NE:
2191 	case UNLE:
2192 	case UNLT:      return CC3 | CC2;
2193 	default:        return -1;
2194 	}
2195 
2196     case E_CCRAWmode:
2197       switch (GET_CODE (code))
2198 	{
2199 	case EQ:
2200 	  return INTVAL (XEXP (code, 1));
2201 	case NE:
2202 	  return (INTVAL (XEXP (code, 1))) ^ 0xf;
2203 	default:
2204 	  gcc_unreachable ();
2205 	}
2206 
2207     default:
2208       return -1;
2209     }
2210 }
2211 
2212 
2213 /* Return branch condition mask to implement a compare and branch
2214    specified by CODE.  Return -1 for invalid comparisons.  */
2215 
2216 int
s390_compare_and_branch_condition_mask(rtx code)2217 s390_compare_and_branch_condition_mask (rtx code)
2218 {
2219   const int CC0 = 1 << 3;
2220   const int CC1 = 1 << 2;
2221   const int CC2 = 1 << 1;
2222 
2223   switch (GET_CODE (code))
2224     {
2225     case EQ:
2226       return CC0;
2227     case NE:
2228       return CC1 | CC2;
2229     case LT:
2230     case LTU:
2231       return CC1;
2232     case GT:
2233     case GTU:
2234       return CC2;
2235     case LE:
2236     case LEU:
2237       return CC0 | CC1;
2238     case GE:
2239     case GEU:
2240       return CC0 | CC2;
2241     default:
2242       gcc_unreachable ();
2243     }
2244   return -1;
2245 }
2246 
2247 /* If INV is false, return assembler mnemonic string to implement
2248    a branch specified by CODE.  If INV is true, return mnemonic
2249    for the corresponding inverted branch.  */
2250 
2251 static const char *
s390_branch_condition_mnemonic(rtx code,int inv)2252 s390_branch_condition_mnemonic (rtx code, int inv)
2253 {
2254   int mask;
2255 
2256   static const char *const mnemonic[16] =
2257     {
2258       NULL, "o", "h", "nle",
2259       "l", "nhe", "lh", "ne",
2260       "e", "nlh", "he", "nl",
2261       "le", "nh", "no", NULL
2262     };
2263 
2264   if (GET_CODE (XEXP (code, 0)) == REG
2265       && REGNO (XEXP (code, 0)) == CC_REGNUM
2266       && (XEXP (code, 1) == const0_rtx
2267 	  || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2268 	      && CONST_INT_P (XEXP (code, 1)))))
2269     mask = s390_branch_condition_mask (code);
2270   else
2271     mask = s390_compare_and_branch_condition_mask (code);
2272 
2273   gcc_assert (mask >= 0);
2274 
2275   if (inv)
2276     mask ^= 15;
2277 
2278   gcc_assert (mask >= 1 && mask <= 14);
2279 
2280   return mnemonic[mask];
2281 }
2282 
2283 /* Return the part of op which has a value different from def.
2284    The size of the part is determined by mode.
2285    Use this function only if you already know that op really
2286    contains such a part.  */
2287 
2288 unsigned HOST_WIDE_INT
s390_extract_part(rtx op,machine_mode mode,int def)2289 s390_extract_part (rtx op, machine_mode mode, int def)
2290 {
2291   unsigned HOST_WIDE_INT value = 0;
2292   int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2293   int part_bits = GET_MODE_BITSIZE (mode);
2294   unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2295   int i;
2296 
2297   for (i = 0; i < max_parts; i++)
2298     {
2299       if (i == 0)
2300 	value = UINTVAL (op);
2301       else
2302 	value >>= part_bits;
2303 
2304       if ((value & part_mask) != (def & part_mask))
2305 	return value & part_mask;
2306     }
2307 
2308   gcc_unreachable ();
2309 }
2310 
2311 /* If OP is an integer constant of mode MODE with exactly one
2312    part of mode PART_MODE unequal to DEF, return the number of that
2313    part. Otherwise, return -1.  */
2314 
2315 int
s390_single_part(rtx op,machine_mode mode,machine_mode part_mode,int def)2316 s390_single_part (rtx op,
2317 		  machine_mode mode,
2318 		  machine_mode part_mode,
2319 		  int def)
2320 {
2321   unsigned HOST_WIDE_INT value = 0;
2322   int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2323   unsigned HOST_WIDE_INT part_mask
2324     = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2325   int i, part = -1;
2326 
2327   if (GET_CODE (op) != CONST_INT)
2328     return -1;
2329 
2330   for (i = 0; i < n_parts; i++)
2331     {
2332       if (i == 0)
2333 	value = UINTVAL (op);
2334       else
2335 	value >>= GET_MODE_BITSIZE (part_mode);
2336 
2337       if ((value & part_mask) != (def & part_mask))
2338 	{
2339 	  if (part != -1)
2340 	    return -1;
2341 	  else
2342 	    part = i;
2343 	}
2344     }
2345   return part == -1 ? -1 : n_parts - 1 - part;
2346 }
2347 
2348 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2349    bits and no other bits are set in (the lower SIZE bits of) IN.
2350 
2351    PSTART and PEND can be used to obtain the start and end
2352    position (inclusive) of the bitfield relative to 64
2353    bits. *PSTART / *PEND gives the position of the first/last bit
2354    of the bitfield counting from the highest order bit starting
2355    with zero.  */
2356 
2357 bool
s390_contiguous_bitmask_nowrap_p(unsigned HOST_WIDE_INT in,int size,int * pstart,int * pend)2358 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2359 				  int *pstart, int *pend)
2360 {
2361   int start;
2362   int end = -1;
2363   int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2364   int highbit = HOST_BITS_PER_WIDE_INT - size;
2365   unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2366 
2367   gcc_assert (!!pstart == !!pend);
2368   for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2369     if (end == -1)
2370       {
2371 	/* Look for the rightmost bit of a contiguous range of ones.  */
2372 	if (bitmask & in)
2373 	  /* Found it.  */
2374 	  end = start;
2375       }
2376     else
2377       {
2378 	/* Look for the firt zero bit after the range of ones.  */
2379 	if (! (bitmask & in))
2380 	  /* Found it.  */
2381 	  break;
2382       }
2383   /* We're one past the last one-bit.  */
2384   start++;
2385 
2386   if (end == -1)
2387     /* No one bits found.  */
2388     return false;
2389 
2390   if (start > highbit)
2391     {
2392       unsigned HOST_WIDE_INT mask;
2393 
2394       /* Calculate a mask for all bits beyond the contiguous bits.  */
2395       mask = ((~HOST_WIDE_INT_0U >> highbit)
2396 	      & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2397       if (mask & in)
2398 	/* There are more bits set beyond the first range of one bits.  */
2399 	return false;
2400     }
2401 
2402   if (pstart)
2403     {
2404       *pstart = start;
2405       *pend = end;
2406     }
2407 
2408   return true;
2409 }
2410 
2411 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2412    if ~IN contains a contiguous bitfield.  In that case, *END is <
2413    *START.
2414 
2415    If WRAP_P is true, a bitmask that wraps around is also tested.
2416    When a wraparoud occurs *START is greater than *END (in
2417    non-null pointers), and the uppermost (64 - SIZE) bits are thus
2418    part of the range.  If WRAP_P is false, no wraparound is
2419    tested.  */
2420 
2421 bool
s390_contiguous_bitmask_p(unsigned HOST_WIDE_INT in,bool wrap_p,int size,int * start,int * end)2422 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2423 			   int size, int *start, int *end)
2424 {
2425   int bs = HOST_BITS_PER_WIDE_INT;
2426   bool b;
2427 
2428   gcc_assert (!!start == !!end);
2429   if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2430     /* This cannot be expressed as a contiguous bitmask.  Exit early because
2431        the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2432        a valid bitmask.  */
2433     return false;
2434   b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2435   if (b)
2436     return true;
2437   if (! wrap_p)
2438     return false;
2439   b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2440   if (b && start)
2441     {
2442       int s = *start;
2443       int e = *end;
2444 
2445       gcc_assert (s >= 1);
2446       *start = ((e + 1) & (bs - 1));
2447       *end = ((s - 1 + bs) & (bs - 1));
2448     }
2449 
2450   return b;
2451 }
2452 
2453 /* Return true if OP contains the same contiguous bitfield in *all*
2454    its elements.  START and END can be used to obtain the start and
2455    end position of the bitfield.
2456 
2457    START/STOP give the position of the first/last bit of the bitfield
2458    counting from the lowest order bit starting with zero.  In order to
2459    use these values for S/390 instructions this has to be converted to
2460    "bits big endian" style.  */
2461 
2462 bool
s390_contiguous_bitmask_vector_p(rtx op,int * start,int * end)2463 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2464 {
2465   unsigned HOST_WIDE_INT mask;
2466   int size;
2467   rtx elt;
2468   bool b;
2469 
2470   gcc_assert (!!start == !!end);
2471   if (!const_vec_duplicate_p (op, &elt)
2472       || !CONST_INT_P (elt))
2473     return false;
2474 
2475   size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2476 
2477   /* We cannot deal with V1TI/V1TF. This would require a vgmq.  */
2478   if (size > 64)
2479     return false;
2480 
2481   mask = UINTVAL (elt);
2482 
2483   b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2484   if (b)
2485     {
2486       if (start)
2487 	{
2488 	  *start -= (HOST_BITS_PER_WIDE_INT - size);
2489 	  *end -= (HOST_BITS_PER_WIDE_INT - size);
2490 	}
2491       return true;
2492     }
2493   else
2494     return false;
2495 }
2496 
2497 /* Return true if C consists only of byte chunks being either 0 or
2498    0xff.  If MASK is !=NULL a byte mask is generated which is
2499    appropriate for the vector generate byte mask instruction.  */
2500 
2501 bool
s390_bytemask_vector_p(rtx op,unsigned * mask)2502 s390_bytemask_vector_p (rtx op, unsigned *mask)
2503 {
2504   int i;
2505   unsigned tmp_mask = 0;
2506   int nunit, unit_size;
2507 
2508   if (!VECTOR_MODE_P (GET_MODE (op))
2509       || GET_CODE (op) != CONST_VECTOR
2510       || !CONST_INT_P (XVECEXP (op, 0, 0)))
2511     return false;
2512 
2513   nunit = GET_MODE_NUNITS (GET_MODE (op));
2514   unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2515 
2516   for (i = 0; i < nunit; i++)
2517     {
2518       unsigned HOST_WIDE_INT c;
2519       int j;
2520 
2521       if (!CONST_INT_P (XVECEXP (op, 0, i)))
2522 	return false;
2523 
2524       c = UINTVAL (XVECEXP (op, 0, i));
2525       for (j = 0; j < unit_size; j++)
2526 	{
2527 	  if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2528 	    return false;
2529 	  tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2530 	  c = c >> BITS_PER_UNIT;
2531 	}
2532     }
2533 
2534   if (mask != NULL)
2535     *mask = tmp_mask;
2536 
2537   return true;
2538 }
2539 
2540 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2541    equivalent to a shift followed by the AND.  In particular, CONTIG
2542    should not overlap the (rotated) bit 0/bit 63 gap.  Negative values
2543    for ROTL indicate a rotate to the right.  */
2544 
2545 bool
s390_extzv_shift_ok(int bitsize,int rotl,unsigned HOST_WIDE_INT contig)2546 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2547 {
2548   int start, end;
2549   bool ok;
2550 
2551   ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2552   gcc_assert (ok);
2553 
2554   if (rotl >= 0)
2555     return (64 - end >= rotl);
2556   else
2557     {
2558       /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2559 	 DIMode.  */
2560       rotl = -rotl + (64 - bitsize);
2561       return (start >= rotl);
2562     }
2563 }
2564 
2565 /* Check whether we can (and want to) split a double-word
2566    move in mode MODE from SRC to DST into two single-word
2567    moves, moving the subword FIRST_SUBWORD first.  */
2568 
2569 bool
s390_split_ok_p(rtx dst,rtx src,machine_mode mode,int first_subword)2570 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2571 {
2572   /* Floating point and vector registers cannot be split.  */
2573   if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2574     return false;
2575 
2576   /* Non-offsettable memory references cannot be split.  */
2577   if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2578       || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2579     return false;
2580 
2581   /* Moving the first subword must not clobber a register
2582      needed to move the second subword.  */
2583   if (register_operand (dst, mode))
2584     {
2585       rtx subreg = operand_subword (dst, first_subword, 0, mode);
2586       if (reg_overlap_mentioned_p (subreg, src))
2587 	return false;
2588     }
2589 
2590   return true;
2591 }
2592 
2593 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2594    and [MEM2, MEM2 + SIZE] do overlap and false
2595    otherwise.  */
2596 
2597 bool
s390_overlap_p(rtx mem1,rtx mem2,HOST_WIDE_INT size)2598 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2599 {
2600   rtx addr1, addr2, addr_delta;
2601   HOST_WIDE_INT delta;
2602 
2603   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2604     return true;
2605 
2606   if (size == 0)
2607     return false;
2608 
2609   addr1 = XEXP (mem1, 0);
2610   addr2 = XEXP (mem2, 0);
2611 
2612   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2613 
2614   /* This overlapping check is used by peepholes merging memory block operations.
2615      Overlapping operations would otherwise be recognized by the S/390 hardware
2616      and would fall back to a slower implementation. Allowing overlapping
2617      operations would lead to slow code but not to wrong code. Therefore we are
2618      somewhat optimistic if we cannot prove that the memory blocks are
2619      overlapping.
2620      That's why we return false here although this may accept operations on
2621      overlapping memory areas.  */
2622   if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2623     return false;
2624 
2625   delta = INTVAL (addr_delta);
2626 
2627   if (delta == 0
2628       || (delta > 0 && delta < size)
2629       || (delta < 0 && -delta < size))
2630     return true;
2631 
2632   return false;
2633 }
2634 
2635 /* Check whether the address of memory reference MEM2 equals exactly
2636    the address of memory reference MEM1 plus DELTA.  Return true if
2637    we can prove this to be the case, false otherwise.  */
2638 
2639 bool
s390_offset_p(rtx mem1,rtx mem2,rtx delta)2640 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2641 {
2642   rtx addr1, addr2, addr_delta;
2643 
2644   if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2645     return false;
2646 
2647   addr1 = XEXP (mem1, 0);
2648   addr2 = XEXP (mem2, 0);
2649 
2650   addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2651   if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2652     return false;
2653 
2654   return true;
2655 }
2656 
2657 /* Expand logical operator CODE in mode MODE with operands OPERANDS.  */
2658 
2659 void
s390_expand_logical_operator(enum rtx_code code,machine_mode mode,rtx * operands)2660 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2661 			      rtx *operands)
2662 {
2663   machine_mode wmode = mode;
2664   rtx dst = operands[0];
2665   rtx src1 = operands[1];
2666   rtx src2 = operands[2];
2667   rtx op, clob, tem;
2668 
2669   /* If we cannot handle the operation directly, use a temp register.  */
2670   if (!s390_logical_operator_ok_p (operands))
2671     dst = gen_reg_rtx (mode);
2672 
2673   /* QImode and HImode patterns make sense only if we have a destination
2674      in memory.  Otherwise perform the operation in SImode.  */
2675   if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2676     wmode = SImode;
2677 
2678   /* Widen operands if required.  */
2679   if (mode != wmode)
2680     {
2681       if (GET_CODE (dst) == SUBREG
2682 	  && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2683 	dst = tem;
2684       else if (REG_P (dst))
2685 	dst = gen_rtx_SUBREG (wmode, dst, 0);
2686       else
2687 	dst = gen_reg_rtx (wmode);
2688 
2689       if (GET_CODE (src1) == SUBREG
2690 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2691 	src1 = tem;
2692       else if (GET_MODE (src1) != VOIDmode)
2693 	src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2694 
2695       if (GET_CODE (src2) == SUBREG
2696 	  && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2697 	src2 = tem;
2698       else if (GET_MODE (src2) != VOIDmode)
2699 	src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2700     }
2701 
2702   /* Emit the instruction.  */
2703   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2704   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2705   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2706 
2707   /* Fix up the destination if needed.  */
2708   if (dst != operands[0])
2709     emit_move_insn (operands[0], gen_lowpart (mode, dst));
2710 }
2711 
2712 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR).  */
2713 
2714 bool
s390_logical_operator_ok_p(rtx * operands)2715 s390_logical_operator_ok_p (rtx *operands)
2716 {
2717   /* If the destination operand is in memory, it needs to coincide
2718      with one of the source operands.  After reload, it has to be
2719      the first source operand.  */
2720   if (GET_CODE (operands[0]) == MEM)
2721     return rtx_equal_p (operands[0], operands[1])
2722 	   || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2723 
2724   return true;
2725 }
2726 
2727 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2728    operand IMMOP to switch from SS to SI type instructions.  */
2729 
2730 void
s390_narrow_logical_operator(enum rtx_code code,rtx * memop,rtx * immop)2731 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2732 {
2733   int def = code == AND ? -1 : 0;
2734   HOST_WIDE_INT mask;
2735   int part;
2736 
2737   gcc_assert (GET_CODE (*memop) == MEM);
2738   gcc_assert (!MEM_VOLATILE_P (*memop));
2739 
2740   mask = s390_extract_part (*immop, QImode, def);
2741   part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2742   gcc_assert (part >= 0);
2743 
2744   *memop = adjust_address (*memop, QImode, part);
2745   *immop = gen_int_mode (mask, QImode);
2746 }
2747 
2748 
2749 /* How to allocate a 'struct machine_function'.  */
2750 
2751 static struct machine_function *
s390_init_machine_status(void)2752 s390_init_machine_status (void)
2753 {
2754   return ggc_cleared_alloc<machine_function> ();
2755 }
2756 
2757 /* Map for smallest class containing reg regno.  */
2758 
2759 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2760 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  0 */
2761   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  4 */
2762   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /*  8 */
2763   ADDR_REGS,    ADDR_REGS, ADDR_REGS, ADDR_REGS,  /* 12 */
2764   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 16 */
2765   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 20 */
2766   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 24 */
2767   FP_REGS,      FP_REGS,   FP_REGS,   FP_REGS,    /* 28 */
2768   ADDR_REGS,    CC_REGS,   ADDR_REGS, ADDR_REGS,  /* 32 */
2769   ACCESS_REGS,	ACCESS_REGS, VEC_REGS, VEC_REGS,  /* 36 */
2770   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 40 */
2771   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 44 */
2772   VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS,         /* 48 */
2773   VEC_REGS, VEC_REGS                              /* 52 */
2774 };
2775 
2776 /* Return attribute type of insn.  */
2777 
2778 static enum attr_type
s390_safe_attr_type(rtx_insn * insn)2779 s390_safe_attr_type (rtx_insn *insn)
2780 {
2781   if (recog_memoized (insn) >= 0)
2782     return get_attr_type (insn);
2783   else
2784     return TYPE_NONE;
2785 }
2786 
2787 /* Return attribute relative_long of insn.  */
2788 
2789 static bool
s390_safe_relative_long_p(rtx_insn * insn)2790 s390_safe_relative_long_p (rtx_insn *insn)
2791 {
2792   if (recog_memoized (insn) >= 0)
2793     return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2794   else
2795     return false;
2796 }
2797 
2798 /* Return true if DISP is a valid short displacement.  */
2799 
2800 static bool
s390_short_displacement(rtx disp)2801 s390_short_displacement (rtx disp)
2802 {
2803   /* No displacement is OK.  */
2804   if (!disp)
2805     return true;
2806 
2807   /* Without the long displacement facility we don't need to
2808      distingiush between long and short displacement.  */
2809   if (!TARGET_LONG_DISPLACEMENT)
2810     return true;
2811 
2812   /* Integer displacement in range.  */
2813   if (GET_CODE (disp) == CONST_INT)
2814     return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2815 
2816   /* GOT offset is not OK, the GOT can be large.  */
2817   if (GET_CODE (disp) == CONST
2818       && GET_CODE (XEXP (disp, 0)) == UNSPEC
2819       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2820 	  || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2821     return false;
2822 
2823   /* All other symbolic constants are literal pool references,
2824      which are OK as the literal pool must be small.  */
2825   if (GET_CODE (disp) == CONST)
2826     return true;
2827 
2828   return false;
2829 }
2830 
2831 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2832    If successful, also determines the
2833    following characteristics of `ref': `is_ptr' - whether it can be an
2834    LA argument, `is_base_ptr' - whether the resulting base is a well-known
2835    base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2836    considered a literal pool pointer for purposes of avoiding two different
2837    literal pool pointers per insn during or after reload (`B' constraint).  */
2838 static bool
s390_decompose_constant_pool_ref(rtx * ref,rtx * disp,bool * is_ptr,bool * is_base_ptr,bool * is_pool_ptr)2839 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2840 				  bool *is_base_ptr, bool *is_pool_ptr)
2841 {
2842   if (!*ref)
2843     return true;
2844 
2845   if (GET_CODE (*ref) == UNSPEC)
2846     switch (XINT (*ref, 1))
2847       {
2848       case UNSPEC_LTREF:
2849 	if (!*disp)
2850 	  *disp = gen_rtx_UNSPEC (Pmode,
2851 				  gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2852 				  UNSPEC_LTREL_OFFSET);
2853 	else
2854 	  return false;
2855 
2856 	*ref = XVECEXP (*ref, 0, 1);
2857 	break;
2858 
2859       default:
2860 	return false;
2861       }
2862 
2863   if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2864     return false;
2865 
2866   if (REGNO (*ref) == STACK_POINTER_REGNUM
2867       || REGNO (*ref) == FRAME_POINTER_REGNUM
2868       || ((reload_completed || reload_in_progress)
2869 	  && frame_pointer_needed
2870 	  && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2871       || REGNO (*ref) == ARG_POINTER_REGNUM
2872       || (flag_pic
2873 	  && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2874     *is_ptr = *is_base_ptr = true;
2875 
2876   if ((reload_completed || reload_in_progress)
2877       && *ref == cfun->machine->base_reg)
2878     *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2879 
2880   return true;
2881 }
2882 
2883 /* Decompose a RTL expression ADDR for a memory address into
2884    its components, returned in OUT.
2885 
2886    Returns false if ADDR is not a valid memory address, true
2887    otherwise.  If OUT is NULL, don't return the components,
2888    but check for validity only.
2889 
2890    Note: Only addresses in canonical form are recognized.
2891    LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2892    canonical form so that they will be recognized.  */
2893 
2894 static int
s390_decompose_address(rtx addr,struct s390_address * out)2895 s390_decompose_address (rtx addr, struct s390_address *out)
2896 {
2897   HOST_WIDE_INT offset = 0;
2898   rtx base = NULL_RTX;
2899   rtx indx = NULL_RTX;
2900   rtx disp = NULL_RTX;
2901   rtx orig_disp;
2902   bool pointer = false;
2903   bool base_ptr = false;
2904   bool indx_ptr = false;
2905   bool literal_pool = false;
2906 
2907   /* We may need to substitute the literal pool base register into the address
2908      below.  However, at this point we do not know which register is going to
2909      be used as base, so we substitute the arg pointer register.  This is going
2910      to be treated as holding a pointer below -- it shouldn't be used for any
2911      other purpose.  */
2912   rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2913 
2914   /* Decompose address into base + index + displacement.  */
2915 
2916   if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2917     base = addr;
2918 
2919   else if (GET_CODE (addr) == PLUS)
2920     {
2921       rtx op0 = XEXP (addr, 0);
2922       rtx op1 = XEXP (addr, 1);
2923       enum rtx_code code0 = GET_CODE (op0);
2924       enum rtx_code code1 = GET_CODE (op1);
2925 
2926       if (code0 == REG || code0 == UNSPEC)
2927 	{
2928 	  if (code1 == REG || code1 == UNSPEC)
2929 	    {
2930 	      indx = op0;	/* index + base */
2931 	      base = op1;
2932 	    }
2933 
2934 	  else
2935 	    {
2936 	      base = op0;	/* base + displacement */
2937 	      disp = op1;
2938 	    }
2939 	}
2940 
2941       else if (code0 == PLUS)
2942 	{
2943 	  indx = XEXP (op0, 0);	/* index + base + disp */
2944 	  base = XEXP (op0, 1);
2945 	  disp = op1;
2946 	}
2947 
2948       else
2949 	{
2950 	  return false;
2951 	}
2952     }
2953 
2954   else
2955     disp = addr;		/* displacement */
2956 
2957   /* Extract integer part of displacement.  */
2958   orig_disp = disp;
2959   if (disp)
2960     {
2961       if (GET_CODE (disp) == CONST_INT)
2962 	{
2963 	  offset = INTVAL (disp);
2964 	  disp = NULL_RTX;
2965 	}
2966       else if (GET_CODE (disp) == CONST
2967 	       && GET_CODE (XEXP (disp, 0)) == PLUS
2968 	       && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2969 	{
2970 	  offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2971 	  disp = XEXP (XEXP (disp, 0), 0);
2972 	}
2973     }
2974 
2975   /* Strip off CONST here to avoid special case tests later.  */
2976   if (disp && GET_CODE (disp) == CONST)
2977     disp = XEXP (disp, 0);
2978 
2979   /* We can convert literal pool addresses to
2980      displacements by basing them off the base register.  */
2981   if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2982     {
2983       if (base || indx)
2984 	return false;
2985 
2986       base = fake_pool_base, literal_pool = true;
2987 
2988       /* Mark up the displacement.  */
2989       disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2990 			     UNSPEC_LTREL_OFFSET);
2991     }
2992 
2993   /* Validate base register.  */
2994   if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
2995 					 &literal_pool))
2996     return false;
2997 
2998   /* Validate index register.  */
2999   if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3000 					 &literal_pool))
3001     return false;
3002 
3003   /* Prefer to use pointer as base, not index.  */
3004   if (base && indx && !base_ptr
3005       && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3006     {
3007       rtx tmp = base;
3008       base = indx;
3009       indx = tmp;
3010     }
3011 
3012   /* Validate displacement.  */
3013   if (!disp)
3014     {
3015       /* If virtual registers are involved, the displacement will change later
3016 	 anyway as the virtual registers get eliminated.  This could make a
3017 	 valid displacement invalid, but it is more likely to make an invalid
3018 	 displacement valid, because we sometimes access the register save area
3019 	 via negative offsets to one of those registers.
3020 	 Thus we don't check the displacement for validity here.  If after
3021 	 elimination the displacement turns out to be invalid after all,
3022 	 this is fixed up by reload in any case.  */
3023       /* LRA maintains always displacements up to date and we need to
3024 	 know the displacement is right during all LRA not only at the
3025 	 final elimination.  */
3026       if (lra_in_progress
3027 	  || (base != arg_pointer_rtx
3028 	      && indx != arg_pointer_rtx
3029 	      && base != return_address_pointer_rtx
3030 	      && indx != return_address_pointer_rtx
3031 	      && base != frame_pointer_rtx
3032 	      && indx != frame_pointer_rtx
3033 	      && base != virtual_stack_vars_rtx
3034 	      && indx != virtual_stack_vars_rtx))
3035 	if (!DISP_IN_RANGE (offset))
3036 	  return false;
3037     }
3038   else
3039     {
3040       /* All the special cases are pointers.  */
3041       pointer = true;
3042 
3043       /* In the small-PIC case, the linker converts @GOT
3044 	 and @GOTNTPOFF offsets to possible displacements.  */
3045       if (GET_CODE (disp) == UNSPEC
3046 	  && (XINT (disp, 1) == UNSPEC_GOT
3047 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3048 	  && flag_pic == 1)
3049 	{
3050 	  ;
3051 	}
3052 
3053       /* Accept pool label offsets.  */
3054       else if (GET_CODE (disp) == UNSPEC
3055 	       && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3056 	;
3057 
3058       /* Accept literal pool references.  */
3059       else if (GET_CODE (disp) == UNSPEC
3060 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3061 	{
3062 	  /* In case CSE pulled a non literal pool reference out of
3063 	     the pool we have to reject the address.  This is
3064 	     especially important when loading the GOT pointer on non
3065 	     zarch CPUs.  In this case the literal pool contains an lt
3066 	     relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3067 	     will most likely exceed the displacement.  */
3068 	  if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3069 	      || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3070 	    return false;
3071 
3072 	  orig_disp = gen_rtx_CONST (Pmode, disp);
3073 	  if (offset)
3074 	    {
3075 	      /* If we have an offset, make sure it does not
3076 		 exceed the size of the constant pool entry.
3077 		 Otherwise we might generate an out-of-range
3078 		 displacement for the base register form.  */
3079 	      rtx sym = XVECEXP (disp, 0, 0);
3080 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3081 		return false;
3082 
3083 	      orig_disp = plus_constant (Pmode, orig_disp, offset);
3084 	    }
3085 	}
3086 
3087       else
3088 	return false;
3089     }
3090 
3091   if (!base && !indx)
3092     pointer = true;
3093 
3094   if (out)
3095     {
3096       out->base = base;
3097       out->indx = indx;
3098       out->disp = orig_disp;
3099       out->pointer = pointer;
3100       out->literal_pool = literal_pool;
3101     }
3102 
3103   return true;
3104 }
3105 
3106 /* Decompose a RTL expression OP for an address style operand into its
3107    components, and return the base register in BASE and the offset in
3108    OFFSET.  While OP looks like an address it is never supposed to be
3109    used as such.
3110 
3111    Return true if OP is a valid address operand, false if not.  */
3112 
3113 bool
s390_decompose_addrstyle_without_index(rtx op,rtx * base,HOST_WIDE_INT * offset)3114 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3115 					HOST_WIDE_INT *offset)
3116 {
3117   rtx off = NULL_RTX;
3118 
3119   /* We can have an integer constant, an address register,
3120      or a sum of the two.  */
3121   if (CONST_SCALAR_INT_P (op))
3122     {
3123       off = op;
3124       op = NULL_RTX;
3125     }
3126   if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3127     {
3128       off = XEXP (op, 1);
3129       op = XEXP (op, 0);
3130     }
3131   while (op && GET_CODE (op) == SUBREG)
3132     op = SUBREG_REG (op);
3133 
3134   if (op && GET_CODE (op) != REG)
3135     return false;
3136 
3137   if (offset)
3138     {
3139       if (off == NULL_RTX)
3140 	*offset = 0;
3141       else if (CONST_INT_P (off))
3142 	*offset = INTVAL (off);
3143       else if (CONST_WIDE_INT_P (off))
3144 	/* The offset will anyway be cut down to 12 bits so take just
3145 	   the lowest order chunk of the wide int.  */
3146 	*offset = CONST_WIDE_INT_ELT (off, 0);
3147       else
3148 	gcc_unreachable ();
3149     }
3150   if (base)
3151     *base = op;
3152 
3153    return true;
3154 }
3155 
3156 /*  Check that OP is a valid shift count operand.
3157     It should be of the following structure:
3158       (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3159     where subreg, and and plus are optional.
3160 
3161     If IMPLICIT_MASK is > 0 and OP contains and
3162       (AND ... immediate)
3163     it is checked whether IMPLICIT_MASK and the immediate match.
3164     Otherwise, no checking is performed.
3165   */
3166 bool
s390_valid_shift_count(rtx op,HOST_WIDE_INT implicit_mask)3167 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3168 {
3169   /* Strip subreg.  */
3170   while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3171     op = XEXP (op, 0);
3172 
3173   /* Check for an and with proper constant.  */
3174   if (GET_CODE (op) == AND)
3175   {
3176     rtx op1 = XEXP (op, 0);
3177     rtx imm = XEXP (op, 1);
3178 
3179     if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3180       op1 = XEXP (op1, 0);
3181 
3182     if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3183       return false;
3184 
3185     if (!immediate_operand (imm, GET_MODE (imm)))
3186       return false;
3187 
3188     HOST_WIDE_INT val = INTVAL (imm);
3189     if (implicit_mask > 0
3190 	&& (val & implicit_mask) != implicit_mask)
3191       return false;
3192 
3193     op = op1;
3194   }
3195 
3196   /* Check the rest.  */
3197   return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3198 }
3199 
3200 /* Return true if CODE is a valid address without index.  */
3201 
3202 bool
s390_legitimate_address_without_index_p(rtx op)3203 s390_legitimate_address_without_index_p (rtx op)
3204 {
3205   struct s390_address addr;
3206 
3207   if (!s390_decompose_address (XEXP (op, 0), &addr))
3208     return false;
3209   if (addr.indx)
3210     return false;
3211 
3212   return true;
3213 }
3214 
3215 
3216 /* Return TRUE if ADDR is an operand valid for a load/store relative
3217    instruction.  Be aware that the alignment of the operand needs to
3218    be checked separately.
3219    Valid addresses are single references or a sum of a reference and a
3220    constant integer. Return these parts in SYMREF and ADDEND.  You can
3221    pass NULL in REF and/or ADDEND if you are not interested in these
3222    values.  */
3223 
3224 static bool
s390_loadrelative_operand_p(rtx addr,rtx * symref,HOST_WIDE_INT * addend)3225 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3226 {
3227   HOST_WIDE_INT tmpaddend = 0;
3228 
3229   if (GET_CODE (addr) == CONST)
3230     addr = XEXP (addr, 0);
3231 
3232   if (GET_CODE (addr) == PLUS)
3233     {
3234       if (!CONST_INT_P (XEXP (addr, 1)))
3235 	return false;
3236 
3237       tmpaddend = INTVAL (XEXP (addr, 1));
3238       addr = XEXP (addr, 0);
3239     }
3240 
3241   if (GET_CODE (addr) == SYMBOL_REF
3242       || (GET_CODE (addr) == UNSPEC
3243 	  && (XINT (addr, 1) == UNSPEC_GOTENT
3244 	      || XINT (addr, 1) == UNSPEC_PLT)))
3245     {
3246       if (symref)
3247 	*symref = addr;
3248       if (addend)
3249 	*addend = tmpaddend;
3250 
3251       return true;
3252     }
3253   return false;
3254 }
3255 
3256 /* Return true if the address in OP is valid for constraint letter C
3257    if wrapped in a MEM rtx.  Set LIT_POOL_OK to true if it literal
3258    pool MEMs should be accepted.  Only the Q, R, S, T constraint
3259    letters are allowed for C.  */
3260 
3261 static int
s390_check_qrst_address(char c,rtx op,bool lit_pool_ok)3262 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3263 {
3264   rtx symref;
3265   struct s390_address addr;
3266   bool decomposed = false;
3267 
3268   if (!address_operand (op, GET_MODE (op)))
3269     return 0;
3270 
3271   /* This check makes sure that no symbolic address (except literal
3272      pool references) are accepted by the R or T constraints.  */
3273   if (s390_loadrelative_operand_p (op, &symref, NULL)
3274       && (!lit_pool_ok
3275           || !SYMBOL_REF_P (symref)
3276           || !CONSTANT_POOL_ADDRESS_P (symref)))
3277     return 0;
3278 
3279   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
3280   if (!lit_pool_ok)
3281     {
3282       if (!s390_decompose_address (op, &addr))
3283 	return 0;
3284       if (addr.literal_pool)
3285 	return 0;
3286       decomposed = true;
3287     }
3288 
3289   /* With reload, we sometimes get intermediate address forms that are
3290      actually invalid as-is, but we need to accept them in the most
3291      generic cases below ('R' or 'T'), since reload will in fact fix
3292      them up.  LRA behaves differently here; we never see such forms,
3293      but on the other hand, we need to strictly reject every invalid
3294      address form.  After both reload and LRA invalid address forms
3295      must be rejected, because nothing will fix them up later.  Perform
3296      this check right up front.  */
3297   if (lra_in_progress || reload_completed)
3298     {
3299       if (!decomposed && !s390_decompose_address (op, &addr))
3300 	return 0;
3301       decomposed = true;
3302     }
3303 
3304   switch (c)
3305     {
3306     case 'Q': /* no index short displacement */
3307       if (!decomposed && !s390_decompose_address (op, &addr))
3308 	return 0;
3309       if (addr.indx)
3310 	return 0;
3311       if (!s390_short_displacement (addr.disp))
3312 	return 0;
3313       break;
3314 
3315     case 'R': /* with index short displacement */
3316       if (TARGET_LONG_DISPLACEMENT)
3317 	{
3318 	  if (!decomposed && !s390_decompose_address (op, &addr))
3319 	    return 0;
3320 	  if (!s390_short_displacement (addr.disp))
3321 	    return 0;
3322 	}
3323       /* Any invalid address here will be fixed up by reload,
3324 	 so accept it for the most generic constraint.  */
3325       break;
3326 
3327     case 'S': /* no index long displacement */
3328       if (!decomposed && !s390_decompose_address (op, &addr))
3329 	return 0;
3330       if (addr.indx)
3331 	return 0;
3332       break;
3333 
3334     case 'T': /* with index long displacement */
3335       /* Any invalid address here will be fixed up by reload,
3336 	 so accept it for the most generic constraint.  */
3337       break;
3338 
3339     default:
3340       return 0;
3341     }
3342   return 1;
3343 }
3344 
3345 
3346 /* Evaluates constraint strings described by the regular expression
3347    ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3348    the constraint given in STR, or 0 else.  */
3349 
3350 int
s390_mem_constraint(const char * str,rtx op)3351 s390_mem_constraint (const char *str, rtx op)
3352 {
3353   char c = str[0];
3354 
3355   switch (c)
3356     {
3357     case 'A':
3358       /* Check for offsettable variants of memory constraints.  */
3359       if (!MEM_P (op) || MEM_VOLATILE_P (op))
3360 	return 0;
3361       if ((reload_completed || reload_in_progress)
3362 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3363 	return 0;
3364       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3365     case 'B':
3366       /* Check for non-literal-pool variants of memory constraints.  */
3367       if (!MEM_P (op))
3368 	return 0;
3369       return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3370     case 'Q':
3371     case 'R':
3372     case 'S':
3373     case 'T':
3374       if (GET_CODE (op) != MEM)
3375 	return 0;
3376       return s390_check_qrst_address (c, XEXP (op, 0), true);
3377     case 'Y':
3378       /* Simply check for the basic form of a shift count.  Reload will
3379 	 take care of making sure we have a proper base register.  */
3380       if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3381 	return 0;
3382       break;
3383     case 'Z':
3384       return s390_check_qrst_address (str[1], op, true);
3385     default:
3386       return 0;
3387     }
3388   return 1;
3389 }
3390 
3391 
3392 /* Evaluates constraint strings starting with letter O.  Input
3393    parameter C is the second letter following the "O" in the constraint
3394    string. Returns 1 if VALUE meets the respective constraint and 0
3395    otherwise.  */
3396 
3397 int
s390_O_constraint_str(const char c,HOST_WIDE_INT value)3398 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3399 {
3400   if (!TARGET_EXTIMM)
3401     return 0;
3402 
3403   switch (c)
3404     {
3405     case 's':
3406       return trunc_int_for_mode (value, SImode) == value;
3407 
3408     case 'p':
3409       return value == 0
3410 	|| s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3411 
3412     case 'n':
3413       return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3414 
3415     default:
3416       gcc_unreachable ();
3417     }
3418 }
3419 
3420 
3421 /* Evaluates constraint strings starting with letter N.  Parameter STR
3422    contains the letters following letter "N" in the constraint string.
3423    Returns true if VALUE matches the constraint.  */
3424 
3425 int
s390_N_constraint_str(const char * str,HOST_WIDE_INT value)3426 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3427 {
3428   machine_mode mode, part_mode;
3429   int def;
3430   int part, part_goal;
3431 
3432 
3433   if (str[0] == 'x')
3434     part_goal = -1;
3435   else
3436     part_goal = str[0] - '0';
3437 
3438   switch (str[1])
3439     {
3440     case 'Q':
3441       part_mode = QImode;
3442       break;
3443     case 'H':
3444       part_mode = HImode;
3445       break;
3446     case 'S':
3447       part_mode = SImode;
3448       break;
3449     default:
3450       return 0;
3451     }
3452 
3453   switch (str[2])
3454     {
3455     case 'H':
3456       mode = HImode;
3457       break;
3458     case 'S':
3459       mode = SImode;
3460       break;
3461     case 'D':
3462       mode = DImode;
3463       break;
3464     default:
3465       return 0;
3466     }
3467 
3468   switch (str[3])
3469     {
3470     case '0':
3471       def = 0;
3472       break;
3473     case 'F':
3474       def = -1;
3475       break;
3476     default:
3477       return 0;
3478     }
3479 
3480   if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3481     return 0;
3482 
3483   part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3484   if (part < 0)
3485     return 0;
3486   if (part_goal != -1 && part_goal != part)
3487     return 0;
3488 
3489   return 1;
3490 }
3491 
3492 
3493 /* Returns true if the input parameter VALUE is a float zero.  */
3494 
3495 int
s390_float_const_zero_p(rtx value)3496 s390_float_const_zero_p (rtx value)
3497 {
3498   return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3499 	  && value == CONST0_RTX (GET_MODE (value)));
3500 }
3501 
3502 /* Implement TARGET_REGISTER_MOVE_COST.  */
3503 
3504 static int
s390_register_move_cost(machine_mode mode,reg_class_t from,reg_class_t to)3505 s390_register_move_cost (machine_mode mode,
3506 			 reg_class_t from, reg_class_t to)
3507 {
3508   /* On s390, copy between fprs and gprs is expensive.  */
3509 
3510   /* It becomes somewhat faster having ldgr/lgdr.  */
3511   if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3512     {
3513       /* ldgr is single cycle. */
3514       if (reg_classes_intersect_p (from, GENERAL_REGS)
3515 	  && reg_classes_intersect_p (to, FP_REGS))
3516 	return 1;
3517       /* lgdr needs 3 cycles. */
3518       if (reg_classes_intersect_p (to, GENERAL_REGS)
3519 	  && reg_classes_intersect_p (from, FP_REGS))
3520 	return 3;
3521     }
3522 
3523   /* Otherwise copying is done via memory.  */
3524   if ((reg_classes_intersect_p (from, GENERAL_REGS)
3525        && reg_classes_intersect_p (to, FP_REGS))
3526       || (reg_classes_intersect_p (from, FP_REGS)
3527 	  && reg_classes_intersect_p (to, GENERAL_REGS)))
3528     return 10;
3529 
3530   /* We usually do not want to copy via CC.  */
3531   if (reg_classes_intersect_p (from, CC_REGS)
3532        || reg_classes_intersect_p (to, CC_REGS))
3533     return 5;
3534 
3535   return 1;
3536 }
3537 
3538 /* Implement TARGET_MEMORY_MOVE_COST.  */
3539 
3540 static int
s390_memory_move_cost(machine_mode mode ATTRIBUTE_UNUSED,reg_class_t rclass ATTRIBUTE_UNUSED,bool in ATTRIBUTE_UNUSED)3541 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3542 		       reg_class_t rclass ATTRIBUTE_UNUSED,
3543 		       bool in ATTRIBUTE_UNUSED)
3544 {
3545   return 2;
3546 }
3547 
3548 /* Compute a (partial) cost for rtx X.  Return true if the complete
3549    cost has been computed, and false if subexpressions should be
3550    scanned.  In either case, *TOTAL contains the cost result.  The
3551    initial value of *TOTAL is the default value computed by
3552    rtx_cost.  It may be left unmodified.  OUTER_CODE contains the
3553    code of the superexpression of x.  */
3554 
3555 static bool
s390_rtx_costs(rtx x,machine_mode mode,int outer_code,int opno ATTRIBUTE_UNUSED,int * total,bool speed ATTRIBUTE_UNUSED)3556 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3557 		int opno ATTRIBUTE_UNUSED,
3558 		int *total, bool speed ATTRIBUTE_UNUSED)
3559 {
3560   int code = GET_CODE (x);
3561   switch (code)
3562     {
3563     case CONST:
3564     case CONST_INT:
3565     case LABEL_REF:
3566     case SYMBOL_REF:
3567     case CONST_DOUBLE:
3568     case CONST_WIDE_INT:
3569     case MEM:
3570       *total = 0;
3571       return true;
3572 
3573     case SET:
3574       {
3575 	/* Without this a conditional move instruction would be
3576 	   accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3577 	   comparison operator).  That's a bit pessimistic.  */
3578 
3579 	if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3580 	  return false;
3581 
3582 	rtx cond = XEXP (SET_SRC (x), 0);
3583 
3584 	if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3585 	  return false;
3586 
3587 	/* It is going to be a load/store on condition.  Make it
3588 	   slightly more expensive than a normal load.  */
3589 	*total = COSTS_N_INSNS (1) + 1;
3590 
3591 	rtx dst = SET_DEST (x);
3592 	rtx then = XEXP (SET_SRC (x), 1);
3593 	rtx els = XEXP (SET_SRC (x), 2);
3594 
3595 	/* It is a real IF-THEN-ELSE.  An additional move will be
3596 	   needed to implement that.  */
3597 	if (!TARGET_Z15
3598 	    && reload_completed
3599 	    && !rtx_equal_p (dst, then)
3600 	    && !rtx_equal_p (dst, els))
3601 	  *total += COSTS_N_INSNS (1) / 2;
3602 
3603 	/* A minor penalty for constants we cannot directly handle.  */
3604 	if ((CONST_INT_P (then) || CONST_INT_P (els))
3605 	    && (!TARGET_Z13 || MEM_P (dst)
3606 		|| (CONST_INT_P (then) && !satisfies_constraint_K (then))
3607 		|| (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3608 	  *total += COSTS_N_INSNS (1) / 2;
3609 
3610 	/* A store on condition can only handle register src operands.  */
3611 	if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3612 	  *total += COSTS_N_INSNS (1) / 2;
3613 
3614 	return true;
3615       }
3616     case IOR:
3617 
3618       /* nnrk, nngrk */
3619       if (TARGET_Z15
3620 	  && (mode == SImode || mode == DImode)
3621 	  && GET_CODE (XEXP (x, 0)) == NOT
3622 	  && GET_CODE (XEXP (x, 1)) == NOT)
3623 	{
3624 	  *total = COSTS_N_INSNS (1);
3625 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3626 	    *total += 1;
3627 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3628 	    *total += 1;
3629 	  return true;
3630 	}
3631 
3632       /* risbg */
3633       if (GET_CODE (XEXP (x, 0)) == AND
3634 	  && GET_CODE (XEXP (x, 1)) == ASHIFT
3635 	  && REG_P (XEXP (XEXP (x, 0), 0))
3636 	  && REG_P (XEXP (XEXP (x, 1), 0))
3637 	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3638 	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3639 	  && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3640 	      (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3641 	{
3642 	  *total = COSTS_N_INSNS (2);
3643 	  return true;
3644 	}
3645 
3646       /* ~AND on a 128 bit mode.  This can be done using a vector
3647 	 instruction.  */
3648       if (TARGET_VXE
3649 	  && GET_CODE (XEXP (x, 0)) == NOT
3650 	  && GET_CODE (XEXP (x, 1)) == NOT
3651 	  && REG_P (XEXP (XEXP (x, 0), 0))
3652 	  && REG_P (XEXP (XEXP (x, 1), 0))
3653 	  && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3654 	  && s390_hard_regno_mode_ok (VR0_REGNUM,
3655 				      GET_MODE (XEXP (XEXP (x, 0), 0))))
3656 	{
3657 	  *total = COSTS_N_INSNS (1);
3658 	  return true;
3659 	}
3660 
3661       *total = COSTS_N_INSNS (1);
3662       return false;
3663 
3664     case AND:
3665       /* nork, nogrk */
3666       if (TARGET_Z15
3667 	  && (mode == SImode || mode == DImode)
3668 	  && GET_CODE (XEXP (x, 0)) == NOT
3669 	  && GET_CODE (XEXP (x, 1)) == NOT)
3670 	{
3671 	  *total = COSTS_N_INSNS (1);
3672 	  if (!REG_P (XEXP (XEXP (x, 0), 0)))
3673 	    *total += 1;
3674 	  if (!REG_P (XEXP (XEXP (x, 1), 0)))
3675 	    *total += 1;
3676 	  return true;
3677 	}
3678       /* fallthrough */
3679     case ASHIFT:
3680     case ASHIFTRT:
3681     case LSHIFTRT:
3682     case ROTATE:
3683     case ROTATERT:
3684     case XOR:
3685     case NEG:
3686     case NOT:
3687     case PLUS:
3688     case MINUS:
3689       *total = COSTS_N_INSNS (1);
3690       return false;
3691 
3692     case MULT:
3693       switch (mode)
3694 	{
3695 	case E_SImode:
3696 	  {
3697 	    rtx left = XEXP (x, 0);
3698 	    rtx right = XEXP (x, 1);
3699 	    if (GET_CODE (right) == CONST_INT
3700 		&& CONST_OK_FOR_K (INTVAL (right)))
3701 	      *total = s390_cost->mhi;
3702 	    else if (GET_CODE (left) == SIGN_EXTEND)
3703 	      *total = s390_cost->mh;
3704 	    else
3705 	      *total = s390_cost->ms;  /* msr, ms, msy */
3706 	    break;
3707 	  }
3708 	case E_DImode:
3709 	  {
3710 	    rtx left = XEXP (x, 0);
3711 	    rtx right = XEXP (x, 1);
3712 	    if (TARGET_ZARCH)
3713 	      {
3714 		if (GET_CODE (right) == CONST_INT
3715 		    && CONST_OK_FOR_K (INTVAL (right)))
3716 		  *total = s390_cost->mghi;
3717 		else if (GET_CODE (left) == SIGN_EXTEND)
3718 		  *total = s390_cost->msgf;
3719 		else
3720 		  *total = s390_cost->msg;  /* msgr, msg */
3721 	      }
3722 	    else /* TARGET_31BIT */
3723 	      {
3724 		if (GET_CODE (left) == SIGN_EXTEND
3725 		    && GET_CODE (right) == SIGN_EXTEND)
3726 		  /* mulsidi case: mr, m */
3727 		  *total = s390_cost->m;
3728 		else if (GET_CODE (left) == ZERO_EXTEND
3729 			 && GET_CODE (right) == ZERO_EXTEND)
3730 		  /* umulsidi case: ml, mlr */
3731 		  *total = s390_cost->ml;
3732 		else
3733 		  /* Complex calculation is required.  */
3734 		  *total = COSTS_N_INSNS (40);
3735 	      }
3736 	    break;
3737 	  }
3738 	case E_SFmode:
3739 	case E_DFmode:
3740 	  *total = s390_cost->mult_df;
3741 	  break;
3742 	case E_TFmode:
3743 	  *total = s390_cost->mxbr;
3744 	  break;
3745 	default:
3746 	  return false;
3747 	}
3748       return false;
3749 
3750     case FMA:
3751       switch (mode)
3752 	{
3753 	case E_DFmode:
3754 	  *total = s390_cost->madbr;
3755 	  break;
3756 	case E_SFmode:
3757 	  *total = s390_cost->maebr;
3758 	  break;
3759 	default:
3760 	  return false;
3761 	}
3762       /* Negate in the third argument is free: FMSUB.  */
3763       if (GET_CODE (XEXP (x, 2)) == NEG)
3764 	{
3765 	  *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3766 		     + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3767 		     + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3768 	  return true;
3769 	}
3770       return false;
3771 
3772     case UDIV:
3773     case UMOD:
3774       if (mode == TImode)	       /* 128 bit division */
3775 	*total = s390_cost->dlgr;
3776       else if (mode == DImode)
3777 	{
3778 	  rtx right = XEXP (x, 1);
3779 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3780 	    *total = s390_cost->dlr;
3781 	  else				       /* 64 by 64 bit division */
3782 	    *total = s390_cost->dlgr;
3783 	}
3784       else if (mode == SImode)         /* 32 bit division */
3785 	*total = s390_cost->dlr;
3786       return false;
3787 
3788     case DIV:
3789     case MOD:
3790       if (mode == DImode)
3791 	{
3792 	  rtx right = XEXP (x, 1);
3793 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3794 	    if (TARGET_ZARCH)
3795 	      *total = s390_cost->dsgfr;
3796 	    else
3797 	      *total = s390_cost->dr;
3798 	  else				       /* 64 by 64 bit division */
3799 	    *total = s390_cost->dsgr;
3800 	}
3801       else if (mode == SImode)         /* 32 bit division */
3802 	*total = s390_cost->dlr;
3803       else if (mode == SFmode)
3804 	{
3805 	  *total = s390_cost->debr;
3806 	}
3807       else if (mode == DFmode)
3808 	{
3809 	  *total = s390_cost->ddbr;
3810 	}
3811       else if (mode == TFmode)
3812 	{
3813 	  *total = s390_cost->dxbr;
3814 	}
3815       return false;
3816 
3817     case SQRT:
3818       if (mode == SFmode)
3819 	*total = s390_cost->sqebr;
3820       else if (mode == DFmode)
3821 	*total = s390_cost->sqdbr;
3822       else /* TFmode */
3823 	*total = s390_cost->sqxbr;
3824       return false;
3825 
3826     case SIGN_EXTEND:
3827     case ZERO_EXTEND:
3828       if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3829 	  || outer_code == PLUS || outer_code == MINUS
3830 	  || outer_code == COMPARE)
3831 	*total = 0;
3832       return false;
3833 
3834     case COMPARE:
3835       *total = COSTS_N_INSNS (1);
3836 
3837       /* nxrk, nxgrk ~(a^b)==0 */
3838       if (TARGET_Z15
3839 	  && GET_CODE (XEXP (x, 0)) == NOT
3840 	  && XEXP (x, 1) == const0_rtx
3841 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3842 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3843 	  && mode == CCZmode)
3844 	{
3845 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3846 	    *total += 1;
3847 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3848 	    *total += 1;
3849 	  return true;
3850 	}
3851 
3852       /* nnrk, nngrk, nork, nogrk */
3853       if (TARGET_Z15
3854 	  && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3855 	  && XEXP (x, 1) == const0_rtx
3856 	  && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3857 	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3858 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3859 	  && mode == CCZmode)
3860 	{
3861 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3862 	    *total += 1;
3863 	  if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3864 	    *total += 1;
3865 	  return true;
3866 	}
3867 
3868       if (GET_CODE (XEXP (x, 0)) == AND
3869 	  && GET_CODE (XEXP (x, 1)) == CONST_INT
3870 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3871 	{
3872 	  rtx op0 = XEXP (XEXP (x, 0), 0);
3873 	  rtx op1 = XEXP (XEXP (x, 0), 1);
3874 	  rtx op2 = XEXP (x, 1);
3875 
3876 	  if (memory_operand (op0, GET_MODE (op0))
3877 	      && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3878 	    return true;
3879 	  if (register_operand (op0, GET_MODE (op0))
3880 	      && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3881 	    return true;
3882 	}
3883       return false;
3884 
3885     default:
3886       return false;
3887     }
3888 }
3889 
3890 /* Return the cost of an address rtx ADDR.  */
3891 
3892 static int
s390_address_cost(rtx addr,machine_mode mode ATTRIBUTE_UNUSED,addr_space_t as ATTRIBUTE_UNUSED,bool speed ATTRIBUTE_UNUSED)3893 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3894 		   addr_space_t as ATTRIBUTE_UNUSED,
3895 		   bool speed ATTRIBUTE_UNUSED)
3896 {
3897   struct s390_address ad;
3898   if (!s390_decompose_address (addr, &ad))
3899     return 1000;
3900 
3901   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3902 }
3903 
3904 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
3905 static int
s390_builtin_vectorization_cost(enum vect_cost_for_stmt type_of_cost,tree vectype,int misalign ATTRIBUTE_UNUSED)3906 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3907 				 tree vectype,
3908 				 int misalign ATTRIBUTE_UNUSED)
3909 {
3910   switch (type_of_cost)
3911     {
3912       case scalar_stmt:
3913       case scalar_load:
3914       case scalar_store:
3915       case vector_stmt:
3916       case vector_load:
3917       case vector_store:
3918       case vector_gather_load:
3919       case vector_scatter_store:
3920       case vec_to_scalar:
3921       case scalar_to_vec:
3922       case cond_branch_not_taken:
3923       case vec_perm:
3924       case vec_promote_demote:
3925       case unaligned_load:
3926       case unaligned_store:
3927 	return 1;
3928 
3929       case cond_branch_taken:
3930 	return 3;
3931 
3932       case vec_construct:
3933 	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3934 
3935       default:
3936 	gcc_unreachable ();
3937     }
3938 }
3939 
3940 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3941    otherwise return 0.  */
3942 
3943 int
tls_symbolic_operand(rtx op)3944 tls_symbolic_operand (rtx op)
3945 {
3946   if (GET_CODE (op) != SYMBOL_REF)
3947     return 0;
3948   return SYMBOL_REF_TLS_MODEL (op);
3949 }
3950 
3951 /* Split DImode access register reference REG (on 64-bit) into its constituent
3952    low and high parts, and store them into LO and HI.  Note that gen_lowpart/
3953    gen_highpart cannot be used as they assume all registers are word-sized,
3954    while our access registers have only half that size.  */
3955 
3956 void
s390_split_access_reg(rtx reg,rtx * lo,rtx * hi)3957 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3958 {
3959   gcc_assert (TARGET_64BIT);
3960   gcc_assert (ACCESS_REG_P (reg));
3961   gcc_assert (GET_MODE (reg) == DImode);
3962   gcc_assert (!(REGNO (reg) & 1));
3963 
3964   *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3965   *hi = gen_rtx_REG (SImode, REGNO (reg));
3966 }
3967 
3968 /* Return true if OP contains a symbol reference */
3969 
3970 bool
symbolic_reference_mentioned_p(rtx op)3971 symbolic_reference_mentioned_p (rtx op)
3972 {
3973   const char *fmt;
3974   int i;
3975 
3976   if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3977     return 1;
3978 
3979   fmt = GET_RTX_FORMAT (GET_CODE (op));
3980   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3981     {
3982       if (fmt[i] == 'E')
3983 	{
3984 	  int j;
3985 
3986 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3987 	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3988 	      return 1;
3989 	}
3990 
3991       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3992 	return 1;
3993     }
3994 
3995   return 0;
3996 }
3997 
3998 /* Return true if OP contains a reference to a thread-local symbol.  */
3999 
4000 bool
tls_symbolic_reference_mentioned_p(rtx op)4001 tls_symbolic_reference_mentioned_p (rtx op)
4002 {
4003   const char *fmt;
4004   int i;
4005 
4006   if (GET_CODE (op) == SYMBOL_REF)
4007     return tls_symbolic_operand (op);
4008 
4009   fmt = GET_RTX_FORMAT (GET_CODE (op));
4010   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4011     {
4012       if (fmt[i] == 'E')
4013 	{
4014 	  int j;
4015 
4016 	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4017 	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4018 	      return true;
4019 	}
4020 
4021       else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4022 	return true;
4023     }
4024 
4025   return false;
4026 }
4027 
4028 
4029 /* Return true if OP is a legitimate general operand when
4030    generating PIC code.  It is given that flag_pic is on
4031    and that OP satisfies CONSTANT_P.  */
4032 
4033 int
legitimate_pic_operand_p(rtx op)4034 legitimate_pic_operand_p (rtx op)
4035 {
4036   /* Accept all non-symbolic constants.  */
4037   if (!SYMBOLIC_CONST (op))
4038     return 1;
4039 
4040   /* Accept addresses that can be expressed relative to (pc).  */
4041   if (larl_operand (op, VOIDmode))
4042     return 1;
4043 
4044   /* Reject everything else; must be handled
4045      via emit_symbolic_move.  */
4046   return 0;
4047 }
4048 
4049 /* Returns true if the constant value OP is a legitimate general operand.
4050    It is given that OP satisfies CONSTANT_P.  */
4051 
4052 static bool
s390_legitimate_constant_p(machine_mode mode,rtx op)4053 s390_legitimate_constant_p (machine_mode mode, rtx op)
4054 {
4055   if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4056     {
4057       if (GET_MODE_SIZE (mode) != 16)
4058 	return 0;
4059 
4060       if (!satisfies_constraint_j00 (op)
4061 	  && !satisfies_constraint_jm1 (op)
4062 	  && !satisfies_constraint_jKK (op)
4063 	  && !satisfies_constraint_jxx (op)
4064 	  && !satisfies_constraint_jyy (op))
4065 	return 0;
4066     }
4067 
4068   /* Accept all non-symbolic constants.  */
4069   if (!SYMBOLIC_CONST (op))
4070     return 1;
4071 
4072   /* Accept immediate LARL operands.  */
4073   if (larl_operand (op, mode))
4074     return 1;
4075 
4076   /* Thread-local symbols are never legal constants.  This is
4077      so that emit_call knows that computing such addresses
4078      might require a function call.  */
4079   if (TLS_SYMBOLIC_CONST (op))
4080     return 0;
4081 
4082   /* In the PIC case, symbolic constants must *not* be
4083      forced into the literal pool.  We accept them here,
4084      so that they will be handled by emit_symbolic_move.  */
4085   if (flag_pic)
4086     return 1;
4087 
4088   /* All remaining non-PIC symbolic constants are
4089      forced into the literal pool.  */
4090   return 0;
4091 }
4092 
4093 /* Determine if it's legal to put X into the constant pool.  This
4094    is not possible if X contains the address of a symbol that is
4095    not constant (TLS) or not known at final link time (PIC).  */
4096 
4097 static bool
s390_cannot_force_const_mem(machine_mode mode,rtx x)4098 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4099 {
4100   switch (GET_CODE (x))
4101     {
4102     case CONST_INT:
4103     case CONST_DOUBLE:
4104     case CONST_WIDE_INT:
4105     case CONST_VECTOR:
4106       /* Accept all non-symbolic constants.  */
4107       return false;
4108 
4109     case LABEL_REF:
4110       /* Labels are OK iff we are non-PIC.  */
4111       return flag_pic != 0;
4112 
4113     case SYMBOL_REF:
4114       /* 'Naked' TLS symbol references are never OK,
4115 	 non-TLS symbols are OK iff we are non-PIC.  */
4116       if (tls_symbolic_operand (x))
4117 	return true;
4118       else
4119 	return flag_pic != 0;
4120 
4121     case CONST:
4122       return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4123     case PLUS:
4124     case MINUS:
4125       return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4126 	     || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4127 
4128     case UNSPEC:
4129       switch (XINT (x, 1))
4130 	{
4131 	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
4132 	case UNSPEC_LTREL_OFFSET:
4133 	case UNSPEC_GOT:
4134 	case UNSPEC_GOTOFF:
4135 	case UNSPEC_PLTOFF:
4136 	case UNSPEC_TLSGD:
4137 	case UNSPEC_TLSLDM:
4138 	case UNSPEC_NTPOFF:
4139 	case UNSPEC_DTPOFF:
4140 	case UNSPEC_GOTNTPOFF:
4141 	case UNSPEC_INDNTPOFF:
4142 	  return false;
4143 
4144 	/* If the literal pool shares the code section, be put
4145 	   execute template placeholders into the pool as well.  */
4146 	case UNSPEC_INSN:
4147 	default:
4148 	  return true;
4149 	}
4150       break;
4151 
4152     default:
4153       gcc_unreachable ();
4154     }
4155 }
4156 
4157 /* Returns true if the constant value OP is a legitimate general
4158    operand during and after reload.  The difference to
4159    legitimate_constant_p is that this function will not accept
4160    a constant that would need to be forced to the literal pool
4161    before it can be used as operand.
4162    This function accepts all constants which can be loaded directly
4163    into a GPR.  */
4164 
4165 bool
legitimate_reload_constant_p(rtx op)4166 legitimate_reload_constant_p (rtx op)
4167 {
4168   /* Accept la(y) operands.  */
4169   if (GET_CODE (op) == CONST_INT
4170       && DISP_IN_RANGE (INTVAL (op)))
4171     return true;
4172 
4173   /* Accept l(g)hi/l(g)fi operands.  */
4174   if (GET_CODE (op) == CONST_INT
4175       && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4176     return true;
4177 
4178   /* Accept lliXX operands.  */
4179   if (TARGET_ZARCH
4180       && GET_CODE (op) == CONST_INT
4181       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4182       && s390_single_part (op, word_mode, HImode, 0) >= 0)
4183   return true;
4184 
4185   if (TARGET_EXTIMM
4186       && GET_CODE (op) == CONST_INT
4187       && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4188       && s390_single_part (op, word_mode, SImode, 0) >= 0)
4189     return true;
4190 
4191   /* Accept larl operands.  */
4192   if (larl_operand (op, VOIDmode))
4193     return true;
4194 
4195   /* Accept floating-point zero operands that fit into a single GPR.  */
4196   if (GET_CODE (op) == CONST_DOUBLE
4197       && s390_float_const_zero_p (op)
4198       && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4199     return true;
4200 
4201   /* Accept double-word operands that can be split.  */
4202   if (GET_CODE (op) == CONST_WIDE_INT
4203       || (GET_CODE (op) == CONST_INT
4204 	  && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4205     {
4206       machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4207       rtx hi = operand_subword (op, 0, 0, dword_mode);
4208       rtx lo = operand_subword (op, 1, 0, dword_mode);
4209       return legitimate_reload_constant_p (hi)
4210 	     && legitimate_reload_constant_p (lo);
4211     }
4212 
4213   /* Everything else cannot be handled without reload.  */
4214   return false;
4215 }
4216 
4217 /* Returns true if the constant value OP is a legitimate fp operand
4218    during and after reload.
4219    This function accepts all constants which can be loaded directly
4220    into an FPR.  */
4221 
4222 static bool
legitimate_reload_fp_constant_p(rtx op)4223 legitimate_reload_fp_constant_p (rtx op)
4224 {
4225   /* Accept floating-point zero operands if the load zero instruction
4226      can be used.  Prior to z196 the load fp zero instruction caused a
4227      performance penalty if the result is used as BFP number.  */
4228   if (TARGET_Z196
4229       && GET_CODE (op) == CONST_DOUBLE
4230       && s390_float_const_zero_p (op))
4231     return true;
4232 
4233   return false;
4234 }
4235 
4236 /* Returns true if the constant value OP is a legitimate vector operand
4237    during and after reload.
4238    This function accepts all constants which can be loaded directly
4239    into an VR.  */
4240 
4241 static bool
legitimate_reload_vector_constant_p(rtx op)4242 legitimate_reload_vector_constant_p (rtx op)
4243 {
4244   if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4245       && (satisfies_constraint_j00 (op)
4246 	  || satisfies_constraint_jm1 (op)
4247 	  || satisfies_constraint_jKK (op)
4248 	  || satisfies_constraint_jxx (op)
4249 	  || satisfies_constraint_jyy (op)))
4250     return true;
4251 
4252   return false;
4253 }
4254 
4255 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4256    return the class of reg to actually use.  */
4257 
4258 static reg_class_t
s390_preferred_reload_class(rtx op,reg_class_t rclass)4259 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4260 {
4261   switch (GET_CODE (op))
4262     {
4263       /* Constants we cannot reload into general registers
4264 	 must be forced into the literal pool.  */
4265       case CONST_VECTOR:
4266       case CONST_DOUBLE:
4267       case CONST_INT:
4268       case CONST_WIDE_INT:
4269 	if (reg_class_subset_p (GENERAL_REGS, rclass)
4270 	    && legitimate_reload_constant_p (op))
4271 	  return GENERAL_REGS;
4272 	else if (reg_class_subset_p (ADDR_REGS, rclass)
4273 		 && legitimate_reload_constant_p (op))
4274 	  return ADDR_REGS;
4275 	else if (reg_class_subset_p (FP_REGS, rclass)
4276 		 && legitimate_reload_fp_constant_p (op))
4277 	  return FP_REGS;
4278 	else if (reg_class_subset_p (VEC_REGS, rclass)
4279 		 && legitimate_reload_vector_constant_p (op))
4280 	  return VEC_REGS;
4281 
4282 	return NO_REGS;
4283 
4284       /* If a symbolic constant or a PLUS is reloaded,
4285 	 it is most likely being used as an address, so
4286 	 prefer ADDR_REGS.  If 'class' is not a superset
4287 	 of ADDR_REGS, e.g. FP_REGS, reject this reload.  */
4288       case CONST:
4289 	/* Symrefs cannot be pushed into the literal pool with -fPIC
4290 	   so we *MUST NOT* return NO_REGS for these cases
4291 	   (s390_cannot_force_const_mem will return true).
4292 
4293 	   On the other hand we MUST return NO_REGS for symrefs with
4294 	   invalid addend which might have been pushed to the literal
4295 	   pool (no -fPIC).  Usually we would expect them to be
4296 	   handled via secondary reload but this does not happen if
4297 	   they are used as literal pool slot replacement in reload
4298 	   inheritance (see emit_input_reload_insns).  */
4299 	if (GET_CODE (XEXP (op, 0)) == PLUS
4300 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4301 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4302 	  {
4303 	    if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4304 	      return ADDR_REGS;
4305 	    else
4306 	      return NO_REGS;
4307 	  }
4308 	/* fallthrough */
4309       case LABEL_REF:
4310       case SYMBOL_REF:
4311 	if (!legitimate_reload_constant_p (op))
4312 	  return NO_REGS;
4313 	/* fallthrough */
4314       case PLUS:
4315 	/* load address will be used.  */
4316 	if (reg_class_subset_p (ADDR_REGS, rclass))
4317 	  return ADDR_REGS;
4318 	else
4319 	  return NO_REGS;
4320 
4321       default:
4322 	break;
4323     }
4324 
4325   return rclass;
4326 }
4327 
4328 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4329    multiple of ALIGNMENT and the SYMBOL_REF being naturally
4330    aligned.  */
4331 
4332 bool
s390_check_symref_alignment(rtx addr,HOST_WIDE_INT alignment)4333 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4334 {
4335   HOST_WIDE_INT addend;
4336   rtx symref;
4337 
4338   /* The "required alignment" might be 0 (e.g. for certain structs
4339      accessed via BLKmode).  Early abort in this case, as well as when
4340      an alignment > 8 is required.  */
4341   if (alignment < 2 || alignment > 8)
4342     return false;
4343 
4344   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4345     return false;
4346 
4347   if (addend & (alignment - 1))
4348     return false;
4349 
4350   if (GET_CODE (symref) == SYMBOL_REF)
4351     {
4352       /* s390_encode_section_info is not called for anchors, since they don't
4353 	 have corresponding VAR_DECLs.  Therefore, we cannot rely on
4354 	 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information.  */
4355       if (SYMBOL_REF_ANCHOR_P (symref))
4356 	{
4357 	  HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4358 	  unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4359 					  / BITS_PER_UNIT);
4360 
4361 	  gcc_assert (block_offset >= 0);
4362 	  return ((block_offset & (alignment - 1)) == 0
4363 		  && block_alignment >= alignment);
4364 	}
4365 
4366       /* We have load-relative instructions for 2-byte, 4-byte, and
4367 	 8-byte alignment so allow only these.  */
4368       switch (alignment)
4369 	{
4370 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4371 	case 4:	return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4372 	case 2:	return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4373 	default: return false;
4374 	}
4375     }
4376 
4377   if (GET_CODE (symref) == UNSPEC
4378       && alignment <= UNITS_PER_LONG)
4379     return true;
4380 
4381   return false;
4382 }
4383 
4384 /* ADDR is moved into REG using larl.  If ADDR isn't a valid larl
4385    operand SCRATCH is used to reload the even part of the address and
4386    adding one.  */
4387 
4388 void
s390_reload_larl_operand(rtx reg,rtx addr,rtx scratch)4389 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4390 {
4391   HOST_WIDE_INT addend;
4392   rtx symref;
4393 
4394   if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4395     gcc_unreachable ();
4396 
4397   if (!(addend & 1))
4398     /* Easy case.  The addend is even so larl will do fine.  */
4399     emit_move_insn (reg, addr);
4400   else
4401     {
4402       /* We can leave the scratch register untouched if the target
4403 	 register is a valid base register.  */
4404       if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4405 	  && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4406 	scratch = reg;
4407 
4408       gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4409       gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4410 
4411       if (addend != 1)
4412 	emit_move_insn (scratch,
4413 			gen_rtx_CONST (Pmode,
4414 				       gen_rtx_PLUS (Pmode, symref,
4415 						     GEN_INT (addend - 1))));
4416       else
4417 	emit_move_insn (scratch, symref);
4418 
4419       /* Increment the address using la in order to avoid clobbering cc.  */
4420       s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4421     }
4422 }
4423 
4424 /* Generate what is necessary to move between REG and MEM using
4425    SCRATCH.  The direction is given by TOMEM.  */
4426 
4427 void
s390_reload_symref_address(rtx reg,rtx mem,rtx scratch,bool tomem)4428 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4429 {
4430   /* Reload might have pulled a constant out of the literal pool.
4431      Force it back in.  */
4432   if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4433       || GET_CODE (mem) == CONST_WIDE_INT
4434       || GET_CODE (mem) == CONST_VECTOR
4435       || GET_CODE (mem) == CONST)
4436     mem = force_const_mem (GET_MODE (reg), mem);
4437 
4438   gcc_assert (MEM_P (mem));
4439 
4440   /* For a load from memory we can leave the scratch register
4441      untouched if the target register is a valid base register.  */
4442   if (!tomem
4443       && REGNO (reg) < FIRST_PSEUDO_REGISTER
4444       && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4445       && GET_MODE (reg) == GET_MODE (scratch))
4446     scratch = reg;
4447 
4448   /* Load address into scratch register.  Since we can't have a
4449      secondary reload for a secondary reload we have to cover the case
4450      where larl would need a secondary reload here as well.  */
4451   s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4452 
4453   /* Now we can use a standard load/store to do the move.  */
4454   if (tomem)
4455     emit_move_insn (replace_equiv_address (mem, scratch), reg);
4456   else
4457     emit_move_insn (reg, replace_equiv_address (mem, scratch));
4458 }
4459 
4460 /* Inform reload about cases where moving X with a mode MODE to a register in
4461    RCLASS requires an extra scratch or immediate register.  Return the class
4462    needed for the immediate register.  */
4463 
4464 static reg_class_t
s390_secondary_reload(bool in_p,rtx x,reg_class_t rclass_i,machine_mode mode,secondary_reload_info * sri)4465 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4466 		       machine_mode mode, secondary_reload_info *sri)
4467 {
4468   enum reg_class rclass = (enum reg_class) rclass_i;
4469 
4470   /* Intermediate register needed.  */
4471   if (reg_classes_intersect_p (CC_REGS, rclass))
4472     return GENERAL_REGS;
4473 
4474   if (TARGET_VX)
4475     {
4476       /* The vst/vl vector move instructions allow only for short
4477 	 displacements.  */
4478       if (MEM_P (x)
4479 	  && GET_CODE (XEXP (x, 0)) == PLUS
4480 	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4481 	  && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4482 	  && reg_class_subset_p (rclass, VEC_REGS)
4483 	  && (!reg_class_subset_p (rclass, FP_REGS)
4484 	      || (GET_MODE_SIZE (mode) > 8
4485 		  && s390_class_max_nregs (FP_REGS, mode) == 1)))
4486 	{
4487 	  if (in_p)
4488 	    sri->icode = (TARGET_64BIT ?
4489 			  CODE_FOR_reloaddi_la_in :
4490 			  CODE_FOR_reloadsi_la_in);
4491 	  else
4492 	    sri->icode = (TARGET_64BIT ?
4493 			  CODE_FOR_reloaddi_la_out :
4494 			  CODE_FOR_reloadsi_la_out);
4495 	}
4496     }
4497 
4498   if (TARGET_Z10)
4499     {
4500       HOST_WIDE_INT offset;
4501       rtx symref;
4502 
4503       /* On z10 several optimizer steps may generate larl operands with
4504 	 an odd addend.  */
4505       if (in_p
4506 	  && s390_loadrelative_operand_p (x, &symref, &offset)
4507 	  && mode == Pmode
4508 	  && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4509 	  && (offset & 1) == 1)
4510 	sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4511 		      : CODE_FOR_reloadsi_larl_odd_addend_z10);
4512 
4513       /* Handle all the (mem (symref)) accesses we cannot use the z10
4514 	 instructions for.  */
4515       if (MEM_P (x)
4516 	  && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4517 	  && (mode == QImode
4518 	      || !reg_class_subset_p (rclass, GENERAL_REGS)
4519 	      || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4520 	      || !s390_check_symref_alignment (XEXP (x, 0),
4521 					       GET_MODE_SIZE (mode))))
4522 	{
4523 #define __SECONDARY_RELOAD_CASE(M,m)					\
4524 	  case E_##M##mode:						\
4525 	    if (TARGET_64BIT)						\
4526 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
4527 				  CODE_FOR_reload##m##di_tomem_z10;	\
4528 	    else							\
4529 	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
4530 				  CODE_FOR_reload##m##si_tomem_z10;	\
4531 	  break;
4532 
4533 	  switch (GET_MODE (x))
4534 	    {
4535 	      __SECONDARY_RELOAD_CASE (QI, qi);
4536 	      __SECONDARY_RELOAD_CASE (HI, hi);
4537 	      __SECONDARY_RELOAD_CASE (SI, si);
4538 	      __SECONDARY_RELOAD_CASE (DI, di);
4539 	      __SECONDARY_RELOAD_CASE (TI, ti);
4540 	      __SECONDARY_RELOAD_CASE (SF, sf);
4541 	      __SECONDARY_RELOAD_CASE (DF, df);
4542 	      __SECONDARY_RELOAD_CASE (TF, tf);
4543 	      __SECONDARY_RELOAD_CASE (SD, sd);
4544 	      __SECONDARY_RELOAD_CASE (DD, dd);
4545 	      __SECONDARY_RELOAD_CASE (TD, td);
4546 	      __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4547 	      __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4548 	      __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4549 	      __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4550 	      __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4551 	      __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4552 	      __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4553 	      __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4554 	      __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4555 	      __SECONDARY_RELOAD_CASE (V1SI, v1si);
4556 	      __SECONDARY_RELOAD_CASE (V2SI, v2si);
4557 	      __SECONDARY_RELOAD_CASE (V4SI, v4si);
4558 	      __SECONDARY_RELOAD_CASE (V1DI, v1di);
4559 	      __SECONDARY_RELOAD_CASE (V2DI, v2di);
4560 	      __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4561 	      __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4562 	      __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4563 	      __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4564 	      __SECONDARY_RELOAD_CASE (V1DF, v1df);
4565 	      __SECONDARY_RELOAD_CASE (V2DF, v2df);
4566 	      __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4567 	    default:
4568 	      gcc_unreachable ();
4569 	    }
4570 #undef __SECONDARY_RELOAD_CASE
4571 	}
4572     }
4573 
4574   /* We need a scratch register when loading a PLUS expression which
4575      is not a legitimate operand of the LOAD ADDRESS instruction.  */
4576   /* LRA can deal with transformation of plus op very well -- so we
4577      don't need to prompt LRA in this case.  */
4578   if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4579     sri->icode = (TARGET_64BIT ?
4580 		  CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4581 
4582   /* Performing a multiword move from or to memory we have to make sure the
4583      second chunk in memory is addressable without causing a displacement
4584      overflow.  If that would be the case we calculate the address in
4585      a scratch register.  */
4586   if (MEM_P (x)
4587       && GET_CODE (XEXP (x, 0)) == PLUS
4588       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4589       && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4590 			 + GET_MODE_SIZE (mode) - 1))
4591     {
4592       /* For GENERAL_REGS a displacement overflow is no problem if occurring
4593 	 in a s_operand address since we may fallback to lm/stm.  So we only
4594 	 have to care about overflows in the b+i+d case.  */
4595       if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4596 	   && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4597 	   && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4598 	  /* For FP_REGS no lm/stm is available so this check is triggered
4599 	     for displacement overflows in b+i+d and b+d like addresses.  */
4600 	  || (reg_classes_intersect_p (FP_REGS, rclass)
4601 	      && s390_class_max_nregs (FP_REGS, mode) > 1))
4602 	{
4603 	  if (in_p)
4604 	    sri->icode = (TARGET_64BIT ?
4605 			  CODE_FOR_reloaddi_la_in :
4606 			  CODE_FOR_reloadsi_la_in);
4607 	  else
4608 	    sri->icode = (TARGET_64BIT ?
4609 			  CODE_FOR_reloaddi_la_out :
4610 			  CODE_FOR_reloadsi_la_out);
4611 	}
4612     }
4613 
4614   /* A scratch address register is needed when a symbolic constant is
4615      copied to r0 compiling with -fPIC.  In other cases the target
4616      register might be used as temporary (see legitimize_pic_address).  */
4617   if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4618     sri->icode = (TARGET_64BIT ?
4619 		  CODE_FOR_reloaddi_PIC_addr :
4620 		  CODE_FOR_reloadsi_PIC_addr);
4621 
4622   /* Either scratch or no register needed.  */
4623   return NO_REGS;
4624 }
4625 
4626 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4627 
4628    We need secondary memory to move data between GPRs and FPRs.
4629 
4630    - With DFP the ldgr lgdr instructions are available.  Due to the
4631      different alignment we cannot use them for SFmode.  For 31 bit a
4632      64 bit value in GPR would be a register pair so here we still
4633      need to go via memory.
4634 
4635    - With z13 we can do the SF/SImode moves with vlgvf.  Due to the
4636      overlapping of FPRs and VRs we still disallow TF/TD modes to be
4637      in full VRs so as before also on z13 we do these moves via
4638      memory.
4639 
4640      FIXME: Should we try splitting it into two vlgvg's/vlvg's instead?  */
4641 
4642 static bool
s390_secondary_memory_needed(machine_mode mode,reg_class_t class1,reg_class_t class2)4643 s390_secondary_memory_needed (machine_mode mode,
4644 			      reg_class_t class1, reg_class_t class2)
4645 {
4646   return (((reg_classes_intersect_p (class1, VEC_REGS)
4647 	    && reg_classes_intersect_p (class2, GENERAL_REGS))
4648 	   || (reg_classes_intersect_p (class1, GENERAL_REGS)
4649 	       && reg_classes_intersect_p (class2, VEC_REGS)))
4650 	  && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4651 	      || GET_MODE_SIZE (mode) != 8)
4652 	  && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4653 			     && GET_MODE_SIZE (mode) > 8)));
4654 }
4655 
4656 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4657 
4658    get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4659    because the movsi and movsf patterns don't handle r/f moves.  */
4660 
4661 static machine_mode
s390_secondary_memory_needed_mode(machine_mode mode)4662 s390_secondary_memory_needed_mode (machine_mode mode)
4663 {
4664   if (GET_MODE_BITSIZE (mode) < 32)
4665     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4666   return mode;
4667 }
4668 
4669 /* Generate code to load SRC, which is PLUS that is not a
4670    legitimate operand for the LA instruction, into TARGET.
4671    SCRATCH may be used as scratch register.  */
4672 
4673 void
s390_expand_plus_operand(rtx target,rtx src,rtx scratch)4674 s390_expand_plus_operand (rtx target, rtx src,
4675 			  rtx scratch)
4676 {
4677   rtx sum1, sum2;
4678   struct s390_address ad;
4679 
4680   /* src must be a PLUS; get its two operands.  */
4681   gcc_assert (GET_CODE (src) == PLUS);
4682   gcc_assert (GET_MODE (src) == Pmode);
4683 
4684   /* Check if any of the two operands is already scheduled
4685      for replacement by reload.  This can happen e.g. when
4686      float registers occur in an address.  */
4687   sum1 = find_replacement (&XEXP (src, 0));
4688   sum2 = find_replacement (&XEXP (src, 1));
4689   src = gen_rtx_PLUS (Pmode, sum1, sum2);
4690 
4691   /* If the address is already strictly valid, there's nothing to do.  */
4692   if (!s390_decompose_address (src, &ad)
4693       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4694       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4695     {
4696       /* Otherwise, one of the operands cannot be an address register;
4697 	 we reload its value into the scratch register.  */
4698       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4699 	{
4700 	  emit_move_insn (scratch, sum1);
4701 	  sum1 = scratch;
4702 	}
4703       if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4704 	{
4705 	  emit_move_insn (scratch, sum2);
4706 	  sum2 = scratch;
4707 	}
4708 
4709       /* According to the way these invalid addresses are generated
4710 	 in reload.c, it should never happen (at least on s390) that
4711 	 *neither* of the PLUS components, after find_replacements
4712 	 was applied, is an address register.  */
4713       if (sum1 == scratch && sum2 == scratch)
4714 	{
4715 	  debug_rtx (src);
4716 	  gcc_unreachable ();
4717 	}
4718 
4719       src = gen_rtx_PLUS (Pmode, sum1, sum2);
4720     }
4721 
4722   /* Emit the LOAD ADDRESS pattern.  Note that reload of PLUS
4723      is only ever performed on addresses, so we can mark the
4724      sum as legitimate for LA in any case.  */
4725   s390_load_address (target, src);
4726 }
4727 
4728 
4729 /* Return true if ADDR is a valid memory address.
4730    STRICT specifies whether strict register checking applies.  */
4731 
4732 static bool
s390_legitimate_address_p(machine_mode mode,rtx addr,bool strict)4733 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4734 {
4735   struct s390_address ad;
4736 
4737   if (TARGET_Z10
4738       && larl_operand (addr, VOIDmode)
4739       && (mode == VOIDmode
4740 	  || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4741     return true;
4742 
4743   if (!s390_decompose_address (addr, &ad))
4744     return false;
4745 
4746   /* The vector memory instructions only support short displacements.
4747      Reject invalid displacements early to prevent plenty of lay
4748      instructions to be generated later which then cannot be merged
4749      properly.  */
4750   if (TARGET_VX
4751       && VECTOR_MODE_P (mode)
4752       && ad.disp != NULL_RTX
4753       && CONST_INT_P (ad.disp)
4754       && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4755     return false;
4756 
4757   if (strict)
4758     {
4759       if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4760 	return false;
4761 
4762       if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4763 	return false;
4764     }
4765   else
4766     {
4767       if (ad.base
4768 	  && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4769 	       || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4770 	return false;
4771 
4772       if (ad.indx
4773 	  && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4774 	       || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4775 	  return false;
4776     }
4777   return true;
4778 }
4779 
4780 /* Return true if OP is a valid operand for the LA instruction.
4781    In 31-bit, we need to prove that the result is used as an
4782    address, as LA performs only a 31-bit addition.  */
4783 
4784 bool
legitimate_la_operand_p(rtx op)4785 legitimate_la_operand_p (rtx op)
4786 {
4787   struct s390_address addr;
4788   if (!s390_decompose_address (op, &addr))
4789     return false;
4790 
4791   return (TARGET_64BIT || addr.pointer);
4792 }
4793 
4794 /* Return true if it is valid *and* preferable to use LA to
4795    compute the sum of OP1 and OP2.  */
4796 
4797 bool
preferred_la_operand_p(rtx op1,rtx op2)4798 preferred_la_operand_p (rtx op1, rtx op2)
4799 {
4800   struct s390_address addr;
4801 
4802   if (op2 != const0_rtx)
4803     op1 = gen_rtx_PLUS (Pmode, op1, op2);
4804 
4805   if (!s390_decompose_address (op1, &addr))
4806     return false;
4807   if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4808     return false;
4809   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4810     return false;
4811 
4812   /* Avoid LA instructions with index (and base) register on z196 or
4813      later; it is preferable to use regular add instructions when
4814      possible.  Starting with zEC12 the la with index register is
4815      "uncracked" again but still slower than a regular add.  */
4816   if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4817     return false;
4818 
4819   if (!TARGET_64BIT && !addr.pointer)
4820     return false;
4821 
4822   if (addr.pointer)
4823     return true;
4824 
4825   if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4826       || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4827     return true;
4828 
4829   return false;
4830 }
4831 
4832 /* Emit a forced load-address operation to load SRC into DST.
4833    This will use the LOAD ADDRESS instruction even in situations
4834    where legitimate_la_operand_p (SRC) returns false.  */
4835 
4836 void
s390_load_address(rtx dst,rtx src)4837 s390_load_address (rtx dst, rtx src)
4838 {
4839   if (TARGET_64BIT)
4840     emit_move_insn (dst, src);
4841   else
4842     emit_insn (gen_force_la_31 (dst, src));
4843 }
4844 
4845 /* Return true if it ok to use SYMBOL_REF in a relative address.  */
4846 
4847 bool
s390_rel_address_ok_p(rtx symbol_ref)4848 s390_rel_address_ok_p (rtx symbol_ref)
4849 {
4850   tree decl;
4851 
4852   if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4853     return true;
4854 
4855   decl = SYMBOL_REF_DECL (symbol_ref);
4856 
4857   if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4858     return (s390_pic_data_is_text_relative
4859 	    || (decl
4860 		&& TREE_CODE (decl) == FUNCTION_DECL));
4861 
4862   return false;
4863 }
4864 
4865 /* Return a legitimate reference for ORIG (an address) using the
4866    register REG.  If REG is 0, a new pseudo is generated.
4867 
4868    There are two types of references that must be handled:
4869 
4870    1. Global data references must load the address from the GOT, via
4871       the PIC reg.  An insn is emitted to do this load, and the reg is
4872       returned.
4873 
4874    2. Static data references, constant pool addresses, and code labels
4875       compute the address as an offset from the GOT, whose base is in
4876       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
4877       differentiate them from global data objects.  The returned
4878       address is the PIC reg + an unspec constant.
4879 
4880    TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4881    reg also appears in the address.  */
4882 
4883 rtx
legitimize_pic_address(rtx orig,rtx reg)4884 legitimize_pic_address (rtx orig, rtx reg)
4885 {
4886   rtx addr = orig;
4887   rtx addend = const0_rtx;
4888   rtx new_rtx = orig;
4889 
4890   gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4891 
4892   if (GET_CODE (addr) == CONST)
4893     addr = XEXP (addr, 0);
4894 
4895   if (GET_CODE (addr) == PLUS)
4896     {
4897       addend = XEXP (addr, 1);
4898       addr = XEXP (addr, 0);
4899     }
4900 
4901   if ((GET_CODE (addr) == LABEL_REF
4902        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4903        || (GET_CODE (addr) == UNSPEC &&
4904 	   (XINT (addr, 1) == UNSPEC_GOTENT
4905 	    || XINT (addr, 1) == UNSPEC_PLT)))
4906       && GET_CODE (addend) == CONST_INT)
4907     {
4908       /* This can be locally addressed.  */
4909 
4910       /* larl_operand requires UNSPECs to be wrapped in a const rtx.  */
4911       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4912 			gen_rtx_CONST (Pmode, addr) : addr);
4913 
4914       if (larl_operand (const_addr, VOIDmode)
4915 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4916 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4917 	{
4918 	  if (INTVAL (addend) & 1)
4919 	    {
4920 	      /* LARL can't handle odd offsets, so emit a pair of LARL
4921 		 and LA.  */
4922 	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
4923 
4924 	      if (!DISP_IN_RANGE (INTVAL (addend)))
4925 		{
4926 		  HOST_WIDE_INT even = INTVAL (addend) - 1;
4927 		  addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4928 		  addr = gen_rtx_CONST (Pmode, addr);
4929 		  addend = const1_rtx;
4930 		}
4931 
4932 	      emit_move_insn (temp, addr);
4933 	      new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4934 
4935 	      if (reg != 0)
4936 		{
4937 		  s390_load_address (reg, new_rtx);
4938 		  new_rtx = reg;
4939 		}
4940 	    }
4941 	  else
4942 	    {
4943 	      /* If the offset is even, we can just use LARL.  This
4944 		 will happen automatically.  */
4945 	    }
4946 	}
4947       else
4948 	{
4949 	  /* No larl - Access local symbols relative to the GOT.  */
4950 
4951 	  rtx temp = reg? reg : gen_reg_rtx (Pmode);
4952 
4953 	  if (reload_in_progress || reload_completed)
4954 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4955 
4956 	  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4957 	  if (addend != const0_rtx)
4958 	    addr = gen_rtx_PLUS (Pmode, addr, addend);
4959 	  addr = gen_rtx_CONST (Pmode, addr);
4960 	  addr = force_const_mem (Pmode, addr);
4961 	  emit_move_insn (temp, addr);
4962 
4963 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4964 	  if (reg != 0)
4965 	    {
4966 	      s390_load_address (reg, new_rtx);
4967 	      new_rtx = reg;
4968 	    }
4969 	}
4970     }
4971   else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4972     {
4973       /* A non-local symbol reference without addend.
4974 
4975 	 The symbol ref is wrapped into an UNSPEC to make sure the
4976 	 proper operand modifier (@GOT or @GOTENT) will be emitted.
4977 	 This will tell the linker to put the symbol into the GOT.
4978 
4979 	 Additionally the code dereferencing the GOT slot is emitted here.
4980 
4981 	 An addend to the symref needs to be added afterwards.
4982 	 legitimize_pic_address calls itself recursively to handle
4983 	 that case.  So no need to do it here.  */
4984 
4985       if (reg == 0)
4986 	reg = gen_reg_rtx (Pmode);
4987 
4988       if (TARGET_Z10)
4989 	{
4990 	  /* Use load relative if possible.
4991 	     lgrl <target>, sym@GOTENT  */
4992 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4993 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4994 	  new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4995 
4996 	  emit_move_insn (reg, new_rtx);
4997 	  new_rtx = reg;
4998 	}
4999       else if (flag_pic == 1)
5000 	{
5001 	  /* Assume GOT offset is a valid displacement operand (< 4k
5002 	     or < 512k with z990).  This is handled the same way in
5003 	     both 31- and 64-bit code (@GOT).
5004 	     lg <target>, sym@GOT(r12)  */
5005 
5006 	  if (reload_in_progress || reload_completed)
5007 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5008 
5009 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5010 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5011 	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5012 	  new_rtx = gen_const_mem (Pmode, new_rtx);
5013 	  emit_move_insn (reg, new_rtx);
5014 	  new_rtx = reg;
5015 	}
5016       else
5017 	{
5018 	  /* If the GOT offset might be >= 4k, we determine the position
5019 	     of the GOT entry via a PC-relative LARL (@GOTENT).
5020 	     larl temp, sym@GOTENT
5021 	     lg   <target>, 0(temp) */
5022 
5023 	  rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5024 
5025 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5026 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5027 
5028 	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5029 	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5030 	  emit_move_insn (temp, new_rtx);
5031 	  new_rtx = gen_const_mem (Pmode, temp);
5032 	  emit_move_insn (reg, new_rtx);
5033 
5034 	  new_rtx = reg;
5035 	}
5036     }
5037   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5038     {
5039       gcc_assert (XVECLEN (addr, 0) == 1);
5040       switch (XINT (addr, 1))
5041 	{
5042 	  /* These address symbols (or PLT slots) relative to the GOT
5043 	     (not GOT slots!).  In general this will exceed the
5044 	     displacement range so these value belong into the literal
5045 	     pool.  */
5046 	case UNSPEC_GOTOFF:
5047 	case UNSPEC_PLTOFF:
5048 	  new_rtx = force_const_mem (Pmode, orig);
5049 	  break;
5050 
5051 	  /* For -fPIC the GOT size might exceed the displacement
5052 	     range so make sure the value is in the literal pool.  */
5053 	case UNSPEC_GOT:
5054 	  if (flag_pic == 2)
5055 	    new_rtx = force_const_mem (Pmode, orig);
5056 	  break;
5057 
5058 	  /* For @GOTENT larl is used.  This is handled like local
5059 	     symbol refs.  */
5060 	case UNSPEC_GOTENT:
5061 	  gcc_unreachable ();
5062 	  break;
5063 
5064 	  /* For @PLT larl is used.  This is handled like local
5065 	     symbol refs.  */
5066 	case UNSPEC_PLT:
5067 	  gcc_unreachable ();
5068 	  break;
5069 
5070 	  /* Everything else cannot happen.  */
5071 	default:
5072 	  gcc_unreachable ();
5073 	}
5074     }
5075   else if (addend != const0_rtx)
5076     {
5077       /* Otherwise, compute the sum.  */
5078 
5079       rtx base = legitimize_pic_address (addr, reg);
5080       new_rtx  = legitimize_pic_address (addend,
5081 					 base == reg ? NULL_RTX : reg);
5082       if (GET_CODE (new_rtx) == CONST_INT)
5083 	new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5084       else
5085 	{
5086 	  if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5087 	    {
5088 	      base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5089 	      new_rtx = XEXP (new_rtx, 1);
5090 	    }
5091 	  new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5092 	}
5093 
5094       if (GET_CODE (new_rtx) == CONST)
5095 	new_rtx = XEXP (new_rtx, 0);
5096       new_rtx = force_operand (new_rtx, 0);
5097     }
5098 
5099   return new_rtx;
5100 }
5101 
5102 /* Load the thread pointer into a register.  */
5103 
5104 rtx
s390_get_thread_pointer(void)5105 s390_get_thread_pointer (void)
5106 {
5107   rtx tp = gen_reg_rtx (Pmode);
5108 
5109   emit_insn (gen_get_thread_pointer (Pmode, tp));
5110 
5111   mark_reg_pointer (tp, BITS_PER_WORD);
5112 
5113   return tp;
5114 }
5115 
5116 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5117    in s390_tls_symbol which always refers to __tls_get_offset.
5118    The returned offset is written to RESULT_REG and an USE rtx is
5119    generated for TLS_CALL.  */
5120 
5121 static GTY(()) rtx s390_tls_symbol;
5122 
5123 static void
s390_emit_tls_call_insn(rtx result_reg,rtx tls_call)5124 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5125 {
5126   rtx insn;
5127 
5128   if (!flag_pic)
5129     emit_insn (s390_load_got ());
5130 
5131   if (!s390_tls_symbol)
5132     s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5133 
5134   insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5135 			 gen_rtx_REG (Pmode, RETURN_REGNUM));
5136 
5137   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5138   RTL_CONST_CALL_P (insn) = 1;
5139 }
5140 
5141 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
5142    this (thread-local) address.  REG may be used as temporary.  */
5143 
5144 static rtx
legitimize_tls_address(rtx addr,rtx reg)5145 legitimize_tls_address (rtx addr, rtx reg)
5146 {
5147   rtx new_rtx, tls_call, temp, base, r2;
5148   rtx_insn *insn;
5149 
5150   if (GET_CODE (addr) == SYMBOL_REF)
5151     switch (tls_symbolic_operand (addr))
5152       {
5153       case TLS_MODEL_GLOBAL_DYNAMIC:
5154 	start_sequence ();
5155 	r2 = gen_rtx_REG (Pmode, 2);
5156 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5157 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5158 	new_rtx = force_const_mem (Pmode, new_rtx);
5159 	emit_move_insn (r2, new_rtx);
5160 	s390_emit_tls_call_insn (r2, tls_call);
5161 	insn = get_insns ();
5162 	end_sequence ();
5163 
5164 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5165 	temp = gen_reg_rtx (Pmode);
5166 	emit_libcall_block (insn, temp, r2, new_rtx);
5167 
5168 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5169 	if (reg != 0)
5170 	  {
5171 	    s390_load_address (reg, new_rtx);
5172 	    new_rtx = reg;
5173 	  }
5174 	break;
5175 
5176       case TLS_MODEL_LOCAL_DYNAMIC:
5177 	start_sequence ();
5178 	r2 = gen_rtx_REG (Pmode, 2);
5179 	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5180 	new_rtx = gen_rtx_CONST (Pmode, tls_call);
5181 	new_rtx = force_const_mem (Pmode, new_rtx);
5182 	emit_move_insn (r2, new_rtx);
5183 	s390_emit_tls_call_insn (r2, tls_call);
5184 	insn = get_insns ();
5185 	end_sequence ();
5186 
5187 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5188 	temp = gen_reg_rtx (Pmode);
5189 	emit_libcall_block (insn, temp, r2, new_rtx);
5190 
5191 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5192 	base = gen_reg_rtx (Pmode);
5193 	s390_load_address (base, new_rtx);
5194 
5195 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5196 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5197 	new_rtx = force_const_mem (Pmode, new_rtx);
5198 	temp = gen_reg_rtx (Pmode);
5199 	emit_move_insn (temp, new_rtx);
5200 
5201 	new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5202 	if (reg != 0)
5203 	  {
5204 	    s390_load_address (reg, new_rtx);
5205 	    new_rtx = reg;
5206 	  }
5207 	break;
5208 
5209       case TLS_MODEL_INITIAL_EXEC:
5210 	if (flag_pic == 1)
5211 	  {
5212 	    /* Assume GOT offset < 4k.  This is handled the same way
5213 	       in both 31- and 64-bit code.  */
5214 
5215 	    if (reload_in_progress || reload_completed)
5216 	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5217 
5218 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5219 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5220 	    new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5221 	    new_rtx = gen_const_mem (Pmode, new_rtx);
5222 	    temp = gen_reg_rtx (Pmode);
5223 	    emit_move_insn (temp, new_rtx);
5224 	  }
5225 	else
5226 	  {
5227 	    /* If the GOT offset might be >= 4k, we determine the position
5228 	       of the GOT entry via a PC-relative LARL.  */
5229 
5230 	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5231 	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5232 	    temp = gen_reg_rtx (Pmode);
5233 	    emit_move_insn (temp, new_rtx);
5234 
5235 	    new_rtx = gen_const_mem (Pmode, temp);
5236 	    temp = gen_reg_rtx (Pmode);
5237 	    emit_move_insn (temp, new_rtx);
5238 	  }
5239 
5240 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5241 	if (reg != 0)
5242 	  {
5243 	    s390_load_address (reg, new_rtx);
5244 	    new_rtx = reg;
5245 	  }
5246 	break;
5247 
5248       case TLS_MODEL_LOCAL_EXEC:
5249 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5250 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5251 	new_rtx = force_const_mem (Pmode, new_rtx);
5252 	temp = gen_reg_rtx (Pmode);
5253 	emit_move_insn (temp, new_rtx);
5254 
5255 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5256 	if (reg != 0)
5257 	  {
5258 	    s390_load_address (reg, new_rtx);
5259 	    new_rtx = reg;
5260 	  }
5261 	break;
5262 
5263       default:
5264 	gcc_unreachable ();
5265       }
5266 
5267   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5268     {
5269       switch (XINT (XEXP (addr, 0), 1))
5270 	{
5271 	case UNSPEC_INDNTPOFF:
5272 	  new_rtx = addr;
5273 	  break;
5274 
5275 	default:
5276 	  gcc_unreachable ();
5277 	}
5278     }
5279 
5280   else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5281 	   && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5282     {
5283       new_rtx = XEXP (XEXP (addr, 0), 0);
5284       if (GET_CODE (new_rtx) != SYMBOL_REF)
5285 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5286 
5287       new_rtx = legitimize_tls_address (new_rtx, reg);
5288       new_rtx = plus_constant (Pmode, new_rtx,
5289 			       INTVAL (XEXP (XEXP (addr, 0), 1)));
5290       new_rtx = force_operand (new_rtx, 0);
5291     }
5292 
5293   else
5294     gcc_unreachable ();  /* for now ... */
5295 
5296   return new_rtx;
5297 }
5298 
5299 /* Emit insns making the address in operands[1] valid for a standard
5300    move to operands[0].  operands[1] is replaced by an address which
5301    should be used instead of the former RTX to emit the move
5302    pattern.  */
5303 
5304 void
emit_symbolic_move(rtx * operands)5305 emit_symbolic_move (rtx *operands)
5306 {
5307   rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5308 
5309   if (GET_CODE (operands[0]) == MEM)
5310     operands[1] = force_reg (Pmode, operands[1]);
5311   else if (TLS_SYMBOLIC_CONST (operands[1]))
5312     operands[1] = legitimize_tls_address (operands[1], temp);
5313   else if (flag_pic)
5314     operands[1] = legitimize_pic_address (operands[1], temp);
5315 }
5316 
5317 /* Try machine-dependent ways of modifying an illegitimate address X
5318    to be legitimate.  If we find one, return the new, valid address.
5319 
5320    OLDX is the address as it was before break_out_memory_refs was called.
5321    In some cases it is useful to look at this to decide what needs to be done.
5322 
5323    MODE is the mode of the operand pointed to by X.
5324 
5325    When -fpic is used, special handling is needed for symbolic references.
5326    See comments by legitimize_pic_address for details.  */
5327 
5328 static rtx
s390_legitimize_address(rtx x,rtx oldx ATTRIBUTE_UNUSED,machine_mode mode ATTRIBUTE_UNUSED)5329 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5330 			 machine_mode mode ATTRIBUTE_UNUSED)
5331 {
5332   rtx constant_term = const0_rtx;
5333 
5334   if (TLS_SYMBOLIC_CONST (x))
5335     {
5336       x = legitimize_tls_address (x, 0);
5337 
5338       if (s390_legitimate_address_p (mode, x, FALSE))
5339 	return x;
5340     }
5341   else if (GET_CODE (x) == PLUS
5342 	   && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5343 	       || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5344     {
5345       return x;
5346     }
5347   else if (flag_pic)
5348     {
5349       if (SYMBOLIC_CONST (x)
5350 	  || (GET_CODE (x) == PLUS
5351 	      && (SYMBOLIC_CONST (XEXP (x, 0))
5352 		  || SYMBOLIC_CONST (XEXP (x, 1)))))
5353 	  x = legitimize_pic_address (x, 0);
5354 
5355       if (s390_legitimate_address_p (mode, x, FALSE))
5356 	return x;
5357     }
5358 
5359   x = eliminate_constant_term (x, &constant_term);
5360 
5361   /* Optimize loading of large displacements by splitting them
5362      into the multiple of 4K and the rest; this allows the
5363      former to be CSE'd if possible.
5364 
5365      Don't do this if the displacement is added to a register
5366      pointing into the stack frame, as the offsets will
5367      change later anyway.  */
5368 
5369   if (GET_CODE (constant_term) == CONST_INT
5370       && !TARGET_LONG_DISPLACEMENT
5371       && !DISP_IN_RANGE (INTVAL (constant_term))
5372       && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5373     {
5374       HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5375       HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5376 
5377       rtx temp = gen_reg_rtx (Pmode);
5378       rtx val  = force_operand (GEN_INT (upper), temp);
5379       if (val != temp)
5380 	emit_move_insn (temp, val);
5381 
5382       x = gen_rtx_PLUS (Pmode, x, temp);
5383       constant_term = GEN_INT (lower);
5384     }
5385 
5386   if (GET_CODE (x) == PLUS)
5387     {
5388       if (GET_CODE (XEXP (x, 0)) == REG)
5389 	{
5390 	  rtx temp = gen_reg_rtx (Pmode);
5391 	  rtx val  = force_operand (XEXP (x, 1), temp);
5392 	  if (val != temp)
5393 	    emit_move_insn (temp, val);
5394 
5395 	  x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5396 	}
5397 
5398       else if (GET_CODE (XEXP (x, 1)) == REG)
5399 	{
5400 	  rtx temp = gen_reg_rtx (Pmode);
5401 	  rtx val  = force_operand (XEXP (x, 0), temp);
5402 	  if (val != temp)
5403 	    emit_move_insn (temp, val);
5404 
5405 	  x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5406 	}
5407     }
5408 
5409   if (constant_term != const0_rtx)
5410     x = gen_rtx_PLUS (Pmode, x, constant_term);
5411 
5412   return x;
5413 }
5414 
5415 /* Try a machine-dependent way of reloading an illegitimate address AD
5416    operand.  If we find one, push the reload and return the new address.
5417 
5418    MODE is the mode of the enclosing MEM.  OPNUM is the operand number
5419    and TYPE is the reload type of the current reload.  */
5420 
5421 rtx
legitimize_reload_address(rtx ad,machine_mode mode ATTRIBUTE_UNUSED,int opnum,int type)5422 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5423 			   int opnum, int type)
5424 {
5425   if (!optimize || TARGET_LONG_DISPLACEMENT)
5426     return NULL_RTX;
5427 
5428   if (GET_CODE (ad) == PLUS)
5429     {
5430       rtx tem = simplify_binary_operation (PLUS, Pmode,
5431 					   XEXP (ad, 0), XEXP (ad, 1));
5432       if (tem)
5433 	ad = tem;
5434     }
5435 
5436   if (GET_CODE (ad) == PLUS
5437       && GET_CODE (XEXP (ad, 0)) == REG
5438       && GET_CODE (XEXP (ad, 1)) == CONST_INT
5439       && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5440     {
5441       HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5442       HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5443       rtx cst, tem, new_rtx;
5444 
5445       cst = GEN_INT (upper);
5446       if (!legitimate_reload_constant_p (cst))
5447 	cst = force_const_mem (Pmode, cst);
5448 
5449       tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5450       new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5451 
5452       push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5453 		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5454 		   opnum, (enum reload_type) type);
5455       return new_rtx;
5456     }
5457 
5458   return NULL_RTX;
5459 }
5460 
5461 /* Emit code to move LEN bytes from DST to SRC.  */
5462 
5463 bool
s390_expand_cpymem(rtx dst,rtx src,rtx len)5464 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5465 {
5466   /* When tuning for z10 or higher we rely on the Glibc functions to
5467      do the right thing. Only for constant lengths below 64k we will
5468      generate inline code.  */
5469   if (s390_tune >= PROCESSOR_2097_Z10
5470       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5471     return false;
5472 
5473   /* Expand memcpy for constant length operands without a loop if it
5474      is shorter that way.
5475 
5476      With a constant length argument a
5477      memcpy loop (without pfd) is 36 bytes -> 6 * mvc  */
5478   if (GET_CODE (len) == CONST_INT
5479       && INTVAL (len) >= 0
5480       && INTVAL (len) <= 256 * 6
5481       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5482     {
5483       HOST_WIDE_INT o, l;
5484 
5485       for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5486 	{
5487 	  rtx newdst = adjust_address (dst, BLKmode, o);
5488 	  rtx newsrc = adjust_address (src, BLKmode, o);
5489 	  emit_insn (gen_cpymem_short (newdst, newsrc,
5490 				       GEN_INT (l > 256 ? 255 : l - 1)));
5491 	}
5492     }
5493 
5494   else if (TARGET_MVCLE)
5495     {
5496       emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5497     }
5498 
5499   else
5500     {
5501       rtx dst_addr, src_addr, count, blocks, temp;
5502       rtx_code_label *loop_start_label = gen_label_rtx ();
5503       rtx_code_label *loop_end_label = gen_label_rtx ();
5504       rtx_code_label *end_label = gen_label_rtx ();
5505       machine_mode mode;
5506 
5507       mode = GET_MODE (len);
5508       if (mode == VOIDmode)
5509 	mode = Pmode;
5510 
5511       dst_addr = gen_reg_rtx (Pmode);
5512       src_addr = gen_reg_rtx (Pmode);
5513       count = gen_reg_rtx (mode);
5514       blocks = gen_reg_rtx (mode);
5515 
5516       convert_move (count, len, 1);
5517       emit_cmp_and_jump_insns (count, const0_rtx,
5518 			       EQ, NULL_RTX, mode, 1, end_label);
5519 
5520       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5521       emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5522       dst = change_address (dst, VOIDmode, dst_addr);
5523       src = change_address (src, VOIDmode, src_addr);
5524 
5525       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5526 			   OPTAB_DIRECT);
5527       if (temp != count)
5528 	emit_move_insn (count, temp);
5529 
5530       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5531 			   OPTAB_DIRECT);
5532       if (temp != blocks)
5533 	emit_move_insn (blocks, temp);
5534 
5535       emit_cmp_and_jump_insns (blocks, const0_rtx,
5536 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5537 
5538       emit_label (loop_start_label);
5539 
5540       if (TARGET_Z10
5541 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5542 	{
5543 	  rtx prefetch;
5544 
5545 	  /* Issue a read prefetch for the +3 cache line.  */
5546 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5547 				   const0_rtx, const0_rtx);
5548 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5549 	  emit_insn (prefetch);
5550 
5551 	  /* Issue a write prefetch for the +3 cache line.  */
5552 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5553 				   const1_rtx, const0_rtx);
5554 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5555 	  emit_insn (prefetch);
5556 	}
5557 
5558       emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5559       s390_load_address (dst_addr,
5560 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561       s390_load_address (src_addr,
5562 			 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5563 
5564       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5565 			   OPTAB_DIRECT);
5566       if (temp != blocks)
5567 	emit_move_insn (blocks, temp);
5568 
5569       emit_cmp_and_jump_insns (blocks, const0_rtx,
5570 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5571 
5572       emit_jump (loop_start_label);
5573       emit_label (loop_end_label);
5574 
5575       emit_insn (gen_cpymem_short (dst, src,
5576 				   convert_to_mode (Pmode, count, 1)));
5577       emit_label (end_label);
5578     }
5579   return true;
5580 }
5581 
5582 /* Emit code to set LEN bytes at DST to VAL.
5583    Make use of clrmem if VAL is zero.  */
5584 
5585 void
s390_expand_setmem(rtx dst,rtx len,rtx val)5586 s390_expand_setmem (rtx dst, rtx len, rtx val)
5587 {
5588   if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5589     return;
5590 
5591   gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5592 
5593   /* Expand setmem/clrmem for a constant length operand without a
5594      loop if it will be shorter that way.
5595      clrmem loop (with PFD)    is 30 bytes -> 5 * xc
5596      clrmem loop (without PFD) is 24 bytes -> 4 * xc
5597      setmem loop (with PFD)    is 38 bytes -> ~4 * (mvi/stc + mvc)
5598      setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5599   if (GET_CODE (len) == CONST_INT
5600       && ((val == const0_rtx
5601 	   && (INTVAL (len) <= 256 * 4
5602 	       || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5603 	  || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5604       && (!TARGET_MVCLE || INTVAL (len) <= 256))
5605     {
5606       HOST_WIDE_INT o, l;
5607 
5608       if (val == const0_rtx)
5609 	/* clrmem: emit 256 byte blockwise XCs.  */
5610 	for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5611 	  {
5612 	    rtx newdst = adjust_address (dst, BLKmode, o);
5613 	    emit_insn (gen_clrmem_short (newdst,
5614 					 GEN_INT (l > 256 ? 255 : l - 1)));
5615 	  }
5616       else
5617 	/* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5618 	   setting first byte to val and using a 256 byte mvc with one
5619 	   byte overlap to propagate the byte.  */
5620 	for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5621 	  {
5622 	    rtx newdst = adjust_address (dst, BLKmode, o);
5623 	    emit_move_insn (adjust_address (dst, QImode, o), val);
5624 	    if (l > 1)
5625 	      {
5626 		rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5627 		emit_insn (gen_cpymem_short (newdstp1, newdst,
5628 					     GEN_INT (l > 257 ? 255 : l - 2)));
5629 	      }
5630 	  }
5631     }
5632 
5633   else if (TARGET_MVCLE)
5634     {
5635       val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5636       if (TARGET_64BIT)
5637 	emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5638 				       val));
5639       else
5640 	emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5641 				       val));
5642     }
5643 
5644   else
5645     {
5646       rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5647       rtx_code_label *loop_start_label = gen_label_rtx ();
5648       rtx_code_label *onebyte_end_label = gen_label_rtx ();
5649       rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5650       rtx_code_label *restbyte_end_label = gen_label_rtx ();
5651       machine_mode mode;
5652 
5653       mode = GET_MODE (len);
5654       if (mode == VOIDmode)
5655 	mode = Pmode;
5656 
5657       dst_addr = gen_reg_rtx (Pmode);
5658       count = gen_reg_rtx (mode);
5659       blocks = gen_reg_rtx (mode);
5660 
5661       convert_move (count, len, 1);
5662       emit_cmp_and_jump_insns (count, const0_rtx,
5663 			       EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5664 			       profile_probability::very_unlikely ());
5665 
5666       /* We need to make a copy of the target address since memset is
5667 	 supposed to return it unmodified.  We have to make it here
5668 	 already since the new reg is used at onebyte_end_label.  */
5669       emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5670       dst = change_address (dst, VOIDmode, dst_addr);
5671 
5672       if (val != const0_rtx)
5673 	{
5674 	  /* When using the overlapping mvc the original target
5675 	     address is only accessed as single byte entity (even by
5676 	     the mvc reading this value).  */
5677 	  set_mem_size (dst, 1);
5678 	  dstp1 = adjust_address (dst, VOIDmode, 1);
5679 	  emit_cmp_and_jump_insns (count,
5680 				   const1_rtx, EQ, NULL_RTX, mode, 1,
5681 				   onebyte_end_label,
5682 				   profile_probability::very_unlikely ());
5683 	}
5684 
5685       /* There is one unconditional (mvi+mvc)/xc after the loop
5686 	 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5687 	 or one (xc) here leaves this number of bytes to be handled by
5688 	 it.  */
5689       temp = expand_binop (mode, add_optab, count,
5690 			   val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5691 			   count, 1, OPTAB_DIRECT);
5692       if (temp != count)
5693 	emit_move_insn (count, temp);
5694 
5695       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5696 			   OPTAB_DIRECT);
5697       if (temp != blocks)
5698 	emit_move_insn (blocks, temp);
5699 
5700       emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 			       EQ, NULL_RTX, mode, 1, restbyte_end_label);
5702 
5703       emit_jump (loop_start_label);
5704 
5705       if (val != const0_rtx)
5706 	{
5707 	  /* The 1 byte != 0 special case.  Not handled efficiently
5708 	     since we require two jumps for that.  However, this
5709 	     should be very rare.  */
5710 	  emit_label (onebyte_end_label);
5711 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5712 	  emit_jump (zerobyte_end_label);
5713 	}
5714 
5715       emit_label (loop_start_label);
5716 
5717       if (TARGET_SETMEM_PFD (val, len))
5718 	{
5719 	  /* Issue a write prefetch.  */
5720 	  rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5721 	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5722 				       const1_rtx, const0_rtx);
5723 	  emit_insn (prefetch);
5724 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5725 	}
5726 
5727       if (val == const0_rtx)
5728 	emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5729       else
5730 	{
5731 	  /* Set the first byte in the block to the value and use an
5732 	     overlapping mvc for the block.  */
5733 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5734 	  emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5735 	}
5736       s390_load_address (dst_addr,
5737 			 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5738 
5739       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5740 			   OPTAB_DIRECT);
5741       if (temp != blocks)
5742 	emit_move_insn (blocks, temp);
5743 
5744       emit_cmp_and_jump_insns (blocks, const0_rtx,
5745 			       NE, NULL_RTX, mode, 1, loop_start_label);
5746 
5747       emit_label (restbyte_end_label);
5748 
5749       if (val == const0_rtx)
5750 	emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5751       else
5752 	{
5753 	  /* Set the first byte in the block to the value and use an
5754 	     overlapping mvc for the block.  */
5755 	  emit_move_insn (adjust_address (dst, QImode, 0), val);
5756 	  /* execute only uses the lowest 8 bits of count that's
5757 	     exactly what we need here.  */
5758 	  emit_insn (gen_cpymem_short (dstp1, dst,
5759 				       convert_to_mode (Pmode, count, 1)));
5760 	}
5761 
5762       emit_label (zerobyte_end_label);
5763     }
5764 }
5765 
5766 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5767    and return the result in TARGET.  */
5768 
5769 bool
s390_expand_cmpmem(rtx target,rtx op0,rtx op1,rtx len)5770 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5771 {
5772   rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5773   rtx tmp;
5774 
5775   /* When tuning for z10 or higher we rely on the Glibc functions to
5776      do the right thing. Only for constant lengths below 64k we will
5777      generate inline code.  */
5778   if (s390_tune >= PROCESSOR_2097_Z10
5779       && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5780     return false;
5781 
5782   /* As the result of CMPINT is inverted compared to what we need,
5783      we have to swap the operands.  */
5784   tmp = op0; op0 = op1; op1 = tmp;
5785 
5786   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5787     {
5788       if (INTVAL (len) > 0)
5789 	{
5790 	  emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5791 	  emit_insn (gen_cmpint (target, ccreg));
5792 	}
5793       else
5794 	emit_move_insn (target, const0_rtx);
5795     }
5796   else if (TARGET_MVCLE)
5797     {
5798       emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5799       emit_insn (gen_cmpint (target, ccreg));
5800     }
5801   else
5802     {
5803       rtx addr0, addr1, count, blocks, temp;
5804       rtx_code_label *loop_start_label = gen_label_rtx ();
5805       rtx_code_label *loop_end_label = gen_label_rtx ();
5806       rtx_code_label *end_label = gen_label_rtx ();
5807       machine_mode mode;
5808 
5809       mode = GET_MODE (len);
5810       if (mode == VOIDmode)
5811 	mode = Pmode;
5812 
5813       addr0 = gen_reg_rtx (Pmode);
5814       addr1 = gen_reg_rtx (Pmode);
5815       count = gen_reg_rtx (mode);
5816       blocks = gen_reg_rtx (mode);
5817 
5818       convert_move (count, len, 1);
5819       emit_cmp_and_jump_insns (count, const0_rtx,
5820 			       EQ, NULL_RTX, mode, 1, end_label);
5821 
5822       emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5823       emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5824       op0 = change_address (op0, VOIDmode, addr0);
5825       op1 = change_address (op1, VOIDmode, addr1);
5826 
5827       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5828 			   OPTAB_DIRECT);
5829       if (temp != count)
5830 	emit_move_insn (count, temp);
5831 
5832       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5833 			   OPTAB_DIRECT);
5834       if (temp != blocks)
5835 	emit_move_insn (blocks, temp);
5836 
5837       emit_cmp_and_jump_insns (blocks, const0_rtx,
5838 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5839 
5840       emit_label (loop_start_label);
5841 
5842       if (TARGET_Z10
5843 	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5844 	{
5845 	  rtx prefetch;
5846 
5847 	  /* Issue a read prefetch for the +2 cache line of operand 1.  */
5848 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5849 				   const0_rtx, const0_rtx);
5850 	  emit_insn (prefetch);
5851 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5852 
5853 	  /* Issue a read prefetch for the +2 cache line of operand 2.  */
5854 	  prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5855 				   const0_rtx, const0_rtx);
5856 	  emit_insn (prefetch);
5857 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5858 	}
5859 
5860       emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5861       temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5862       temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5863 			gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5864       temp = gen_rtx_SET (pc_rtx, temp);
5865       emit_jump_insn (temp);
5866 
5867       s390_load_address (addr0,
5868 			 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5869       s390_load_address (addr1,
5870 			 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5871 
5872       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5873 			   OPTAB_DIRECT);
5874       if (temp != blocks)
5875 	emit_move_insn (blocks, temp);
5876 
5877       emit_cmp_and_jump_insns (blocks, const0_rtx,
5878 			       EQ, NULL_RTX, mode, 1, loop_end_label);
5879 
5880       emit_jump (loop_start_label);
5881       emit_label (loop_end_label);
5882 
5883       emit_insn (gen_cmpmem_short (op0, op1,
5884 				   convert_to_mode (Pmode, count, 1)));
5885       emit_label (end_label);
5886 
5887       emit_insn (gen_cmpint (target, ccreg));
5888     }
5889   return true;
5890 }
5891 
5892 /* Emit a conditional jump to LABEL for condition code mask MASK using
5893    comparsion operator COMPARISON.  Return the emitted jump insn.  */
5894 
5895 static rtx_insn *
s390_emit_ccraw_jump(HOST_WIDE_INT mask,enum rtx_code comparison,rtx label)5896 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5897 {
5898   rtx temp;
5899 
5900   gcc_assert (comparison == EQ || comparison == NE);
5901   gcc_assert (mask > 0 && mask < 15);
5902 
5903   temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5904 			 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5905   temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5906 			       gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5907   temp = gen_rtx_SET (pc_rtx, temp);
5908   return emit_jump_insn (temp);
5909 }
5910 
5911 /* Emit the instructions to implement strlen of STRING and store the
5912    result in TARGET.  The string has the known ALIGNMENT.  This
5913    version uses vector instructions and is therefore not appropriate
5914    for targets prior to z13.  */
5915 
5916 void
s390_expand_vec_strlen(rtx target,rtx string,rtx alignment)5917 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5918 {
5919   rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5920   rtx str_reg = gen_reg_rtx (V16QImode);
5921   rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5922   rtx str_idx_reg = gen_reg_rtx (Pmode);
5923   rtx result_reg = gen_reg_rtx (V16QImode);
5924   rtx is_aligned_label = gen_label_rtx ();
5925   rtx into_loop_label = NULL_RTX;
5926   rtx loop_start_label = gen_label_rtx ();
5927   rtx temp;
5928   rtx len = gen_reg_rtx (QImode);
5929   rtx cond;
5930 
5931   s390_load_address (str_addr_base_reg, XEXP (string, 0));
5932   emit_move_insn (str_idx_reg, const0_rtx);
5933 
5934   if (INTVAL (alignment) < 16)
5935     {
5936       /* Check whether the address happens to be aligned properly so
5937 	 jump directly to the aligned loop.  */
5938       emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5939 					    str_addr_base_reg, GEN_INT (15)),
5940 			       const0_rtx, EQ, NULL_RTX,
5941 			       Pmode, 1, is_aligned_label);
5942 
5943       temp = gen_reg_rtx (Pmode);
5944       temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5945 			   GEN_INT (15), temp, 1, OPTAB_DIRECT);
5946       gcc_assert (REG_P (temp));
5947       highest_index_to_load_reg =
5948 	expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5949 		      highest_index_to_load_reg, 1, OPTAB_DIRECT);
5950       gcc_assert (REG_P (highest_index_to_load_reg));
5951       emit_insn (gen_vllv16qi (str_reg,
5952 		   convert_to_mode (SImode, highest_index_to_load_reg, 1),
5953 		   gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5954 
5955       into_loop_label = gen_label_rtx ();
5956       s390_emit_jump (into_loop_label, NULL_RTX);
5957       emit_barrier ();
5958     }
5959 
5960   emit_label (is_aligned_label);
5961   LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5962 
5963   /* Reaching this point we are only performing 16 bytes aligned
5964      loads.  */
5965   emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5966 
5967   emit_label (loop_start_label);
5968   LABEL_NUSES (loop_start_label) = 1;
5969 
5970   /* Load 16 bytes of the string into VR.  */
5971   emit_move_insn (str_reg,
5972 		  gen_rtx_MEM (V16QImode,
5973 			       gen_rtx_PLUS (Pmode, str_idx_reg,
5974 					     str_addr_base_reg)));
5975   if (into_loop_label != NULL_RTX)
5976     {
5977       emit_label (into_loop_label);
5978       LABEL_NUSES (into_loop_label) = 1;
5979     }
5980 
5981   /* Increment string index by 16 bytes.  */
5982   expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5983 		str_idx_reg, 1, OPTAB_DIRECT);
5984 
5985   emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5986 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5987 
5988   add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5989 		    REG_BR_PROB,
5990 		    profile_probability::very_likely ().to_reg_br_prob_note ());
5991   emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5992 
5993   /* If the string pointer wasn't aligned we have loaded less then 16
5994      bytes and the remaining bytes got filled with zeros (by vll).
5995      Now we have to check whether the resulting index lies within the
5996      bytes actually part of the string.  */
5997 
5998   cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5999 			    highest_index_to_load_reg);
6000   s390_load_address (highest_index_to_load_reg,
6001 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6002 				   const1_rtx));
6003   if (TARGET_64BIT)
6004     emit_insn (gen_movdicc (str_idx_reg, cond,
6005 			    highest_index_to_load_reg, str_idx_reg));
6006   else
6007     emit_insn (gen_movsicc (str_idx_reg, cond,
6008 			    highest_index_to_load_reg, str_idx_reg));
6009 
6010   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6011 			profile_probability::very_unlikely ());
6012 
6013   expand_binop (Pmode, add_optab, str_idx_reg,
6014 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6015   /* FIXME: len is already zero extended - so avoid the llgcr emitted
6016      here.  */
6017   temp = expand_binop (Pmode, add_optab, str_idx_reg,
6018 		       convert_to_mode (Pmode, len, 1),
6019 		       target, 1, OPTAB_DIRECT);
6020   if (temp != target)
6021     emit_move_insn (target, temp);
6022 }
6023 
6024 void
s390_expand_vec_movstr(rtx result,rtx dst,rtx src)6025 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6026 {
6027   rtx temp = gen_reg_rtx (Pmode);
6028   rtx src_addr = XEXP (src, 0);
6029   rtx dst_addr = XEXP (dst, 0);
6030   rtx src_addr_reg = gen_reg_rtx (Pmode);
6031   rtx dst_addr_reg = gen_reg_rtx (Pmode);
6032   rtx offset = gen_reg_rtx (Pmode);
6033   rtx vsrc = gen_reg_rtx (V16QImode);
6034   rtx vpos = gen_reg_rtx (V16QImode);
6035   rtx loadlen = gen_reg_rtx (SImode);
6036   rtx gpos_qi = gen_reg_rtx(QImode);
6037   rtx gpos = gen_reg_rtx (SImode);
6038   rtx done_label = gen_label_rtx ();
6039   rtx loop_label = gen_label_rtx ();
6040   rtx exit_label = gen_label_rtx ();
6041   rtx full_label = gen_label_rtx ();
6042 
6043   /* Perform a quick check for string ending on the first up to 16
6044      bytes and exit early if successful.  */
6045 
6046   emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6047   emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6048   emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6049   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6050   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6051   /* gpos is the byte index if a zero was found and 16 otherwise.
6052      So if it is lower than the loaded bytes we have a hit.  */
6053   emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6054 			   full_label);
6055   emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6056 
6057   force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6058 		      1, OPTAB_DIRECT);
6059   emit_jump (exit_label);
6060   emit_barrier ();
6061 
6062   emit_label (full_label);
6063   LABEL_NUSES (full_label) = 1;
6064 
6065   /* Calculate `offset' so that src + offset points to the last byte
6066      before 16 byte alignment.  */
6067 
6068   /* temp = src_addr & 0xf */
6069   force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6070 		      1, OPTAB_DIRECT);
6071 
6072   /* offset = 0xf - temp */
6073   emit_move_insn (offset, GEN_INT (15));
6074   force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6075 		      1, OPTAB_DIRECT);
6076 
6077   /* Store `offset' bytes in the dstination string.  The quick check
6078      has loaded at least `offset' bytes into vsrc.  */
6079 
6080   emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6081 
6082   /* Advance to the next byte to be loaded.  */
6083   force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6084 		      1, OPTAB_DIRECT);
6085 
6086   /* Make sure the addresses are single regs which can be used as a
6087      base.  */
6088   emit_move_insn (src_addr_reg, src_addr);
6089   emit_move_insn (dst_addr_reg, dst_addr);
6090 
6091   /* MAIN LOOP */
6092 
6093   emit_label (loop_label);
6094   LABEL_NUSES (loop_label) = 1;
6095 
6096   emit_move_insn (vsrc,
6097 		  gen_rtx_MEM (V16QImode,
6098 			       gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6099 
6100   emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6101 				  GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6102   add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6103 		    REG_BR_PROB, profile_probability::very_unlikely ()
6104 				  .to_reg_br_prob_note ());
6105 
6106   emit_move_insn (gen_rtx_MEM (V16QImode,
6107 			       gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6108 		  vsrc);
6109   /* offset += 16 */
6110   force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6111 		      offset,  1, OPTAB_DIRECT);
6112 
6113   emit_jump (loop_label);
6114   emit_barrier ();
6115 
6116   /* REGULAR EXIT */
6117 
6118   /* We are done.  Add the offset of the zero character to the dst_addr
6119      pointer to get the result.  */
6120 
6121   emit_label (done_label);
6122   LABEL_NUSES (done_label) = 1;
6123 
6124   force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6125 		      1, OPTAB_DIRECT);
6126 
6127   emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6128   emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6129 
6130   emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6131 
6132   force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6133 		      1, OPTAB_DIRECT);
6134 
6135   /* EARLY EXIT */
6136 
6137   emit_label (exit_label);
6138   LABEL_NUSES (exit_label) = 1;
6139 }
6140 
6141 
6142 /* Expand conditional increment or decrement using alc/slb instructions.
6143    Should generate code setting DST to either SRC or SRC + INCREMENT,
6144    depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6145    Returns true if successful, false otherwise.
6146 
6147    That makes it possible to implement some if-constructs without jumps e.g.:
6148    (borrow = CC0 | CC1 and carry = CC2 | CC3)
6149    unsigned int a, b, c;
6150    if (a < b)  c++; -> CCU  b > a  -> CC2;    c += carry;
6151    if (a < b)  c--; -> CCL3 a - b  -> borrow; c -= borrow;
6152    if (a <= b) c++; -> CCL3 b - a  -> borrow; c += carry;
6153    if (a <= b) c--; -> CCU  a <= b -> borrow; c -= borrow;
6154 
6155    Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6156    if (a == b) c++; -> CCL3 a ^= b; 0 - a  -> borrow;    c += carry;
6157    if (a == b) c--; -> CCU  a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6158    if (a != b) c++; -> CCU  a ^= b; a > 0  -> CC2;       c += carry;
6159    if (a != b) c--; -> CCL3 a ^= b; 0 - a  -> borrow;    c -= borrow; */
6160 
6161 bool
s390_expand_addcc(enum rtx_code cmp_code,rtx cmp_op0,rtx cmp_op1,rtx dst,rtx src,rtx increment)6162 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6163 		   rtx dst, rtx src, rtx increment)
6164 {
6165   machine_mode cmp_mode;
6166   machine_mode cc_mode;
6167   rtx op_res;
6168   rtx insn;
6169   rtvec p;
6170   int ret;
6171 
6172   if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6173       && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6174     cmp_mode = SImode;
6175   else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6176 	   && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6177     cmp_mode = DImode;
6178   else
6179     return false;
6180 
6181   /* Try ADD LOGICAL WITH CARRY.  */
6182   if (increment == const1_rtx)
6183     {
6184       /* Determine CC mode to use.  */
6185       if (cmp_code == EQ || cmp_code == NE)
6186 	{
6187 	  if (cmp_op1 != const0_rtx)
6188 	    {
6189 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6190 					     NULL_RTX, 0, OPTAB_WIDEN);
6191 	      cmp_op1 = const0_rtx;
6192 	    }
6193 
6194 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6195 	}
6196 
6197       if (cmp_code == LTU || cmp_code == LEU)
6198 	{
6199 	  rtx tem = cmp_op0;
6200 	  cmp_op0 = cmp_op1;
6201 	  cmp_op1 = tem;
6202 	  cmp_code = swap_condition (cmp_code);
6203 	}
6204 
6205       switch (cmp_code)
6206 	{
6207 	  case GTU:
6208 	    cc_mode = CCUmode;
6209 	    break;
6210 
6211 	  case GEU:
6212 	    cc_mode = CCL3mode;
6213 	    break;
6214 
6215 	  default:
6216 	    return false;
6217 	}
6218 
6219       /* Emit comparison instruction pattern. */
6220       if (!register_operand (cmp_op0, cmp_mode))
6221 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6222 
6223       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6224 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6225       /* We use insn_invalid_p here to add clobbers if required.  */
6226       ret = insn_invalid_p (emit_insn (insn), false);
6227       gcc_assert (!ret);
6228 
6229       /* Emit ALC instruction pattern.  */
6230       op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6231 			       gen_rtx_REG (cc_mode, CC_REGNUM),
6232 			       const0_rtx);
6233 
6234       if (src != const0_rtx)
6235 	{
6236 	  if (!register_operand (src, GET_MODE (dst)))
6237 	    src = force_reg (GET_MODE (dst), src);
6238 
6239 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6240 	  op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6241 	}
6242 
6243       p = rtvec_alloc (2);
6244       RTVEC_ELT (p, 0) =
6245 	gen_rtx_SET (dst, op_res);
6246       RTVEC_ELT (p, 1) =
6247 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6248       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6249 
6250       return true;
6251     }
6252 
6253   /* Try SUBTRACT LOGICAL WITH BORROW.  */
6254   if (increment == constm1_rtx)
6255     {
6256       /* Determine CC mode to use.  */
6257       if (cmp_code == EQ || cmp_code == NE)
6258 	{
6259 	  if (cmp_op1 != const0_rtx)
6260 	    {
6261 	      cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6262 					     NULL_RTX, 0, OPTAB_WIDEN);
6263 	      cmp_op1 = const0_rtx;
6264 	    }
6265 
6266 	  cmp_code = cmp_code == EQ ? LEU : GTU;
6267 	}
6268 
6269       if (cmp_code == GTU || cmp_code == GEU)
6270 	{
6271 	  rtx tem = cmp_op0;
6272 	  cmp_op0 = cmp_op1;
6273 	  cmp_op1 = tem;
6274 	  cmp_code = swap_condition (cmp_code);
6275 	}
6276 
6277       switch (cmp_code)
6278 	{
6279 	  case LEU:
6280 	    cc_mode = CCUmode;
6281 	    break;
6282 
6283 	  case LTU:
6284 	    cc_mode = CCL3mode;
6285 	    break;
6286 
6287 	  default:
6288 	    return false;
6289 	}
6290 
6291       /* Emit comparison instruction pattern. */
6292       if (!register_operand (cmp_op0, cmp_mode))
6293 	cmp_op0 = force_reg (cmp_mode, cmp_op0);
6294 
6295       insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6296 			  gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6297       /* We use insn_invalid_p here to add clobbers if required.  */
6298       ret = insn_invalid_p (emit_insn (insn), false);
6299       gcc_assert (!ret);
6300 
6301       /* Emit SLB instruction pattern.  */
6302       if (!register_operand (src, GET_MODE (dst)))
6303 	src = force_reg (GET_MODE (dst), src);
6304 
6305       op_res = gen_rtx_MINUS (GET_MODE (dst),
6306 			      gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6307 			      gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6308 					      gen_rtx_REG (cc_mode, CC_REGNUM),
6309 					      const0_rtx));
6310       p = rtvec_alloc (2);
6311       RTVEC_ELT (p, 0) =
6312 	gen_rtx_SET (dst, op_res);
6313       RTVEC_ELT (p, 1) =
6314 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6315       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6316 
6317       return true;
6318     }
6319 
6320   return false;
6321 }
6322 
6323 /* Expand code for the insv template. Return true if successful.  */
6324 
6325 bool
s390_expand_insv(rtx dest,rtx op1,rtx op2,rtx src)6326 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6327 {
6328   int bitsize = INTVAL (op1);
6329   int bitpos = INTVAL (op2);
6330   machine_mode mode = GET_MODE (dest);
6331   machine_mode smode;
6332   int smode_bsize, mode_bsize;
6333   rtx op, clobber;
6334 
6335   if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6336     return false;
6337 
6338   /* Generate INSERT IMMEDIATE (IILL et al).  */
6339   /* (set (ze (reg)) (const_int)).  */
6340   if (TARGET_ZARCH
6341       && register_operand (dest, word_mode)
6342       && (bitpos % 16) == 0
6343       && (bitsize % 16) == 0
6344       && const_int_operand (src, VOIDmode))
6345     {
6346       HOST_WIDE_INT val = INTVAL (src);
6347       int regpos = bitpos + bitsize;
6348 
6349       while (regpos > bitpos)
6350 	{
6351 	  machine_mode putmode;
6352 	  int putsize;
6353 
6354 	  if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6355 	    putmode = SImode;
6356 	  else
6357 	    putmode = HImode;
6358 
6359 	  putsize = GET_MODE_BITSIZE (putmode);
6360 	  regpos -= putsize;
6361 	  emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6362 						GEN_INT (putsize),
6363 						GEN_INT (regpos)),
6364 			  gen_int_mode (val, putmode));
6365 	  val >>= putsize;
6366 	}
6367       gcc_assert (regpos == bitpos);
6368       return true;
6369     }
6370 
6371   smode = smallest_int_mode_for_size (bitsize);
6372   smode_bsize = GET_MODE_BITSIZE (smode);
6373   mode_bsize = GET_MODE_BITSIZE (mode);
6374 
6375   /* Generate STORE CHARACTERS UNDER MASK (STCM et al).  */
6376   if (bitpos == 0
6377       && (bitsize % BITS_PER_UNIT) == 0
6378       && MEM_P (dest)
6379       && (register_operand (src, word_mode)
6380 	  || const_int_operand (src, VOIDmode)))
6381     {
6382       /* Emit standard pattern if possible.  */
6383       if (smode_bsize == bitsize)
6384 	{
6385 	  emit_move_insn (adjust_address (dest, smode, 0),
6386 			  gen_lowpart (smode, src));
6387 	  return true;
6388 	}
6389 
6390       /* (set (ze (mem)) (const_int)).  */
6391       else if (const_int_operand (src, VOIDmode))
6392 	{
6393 	  int size = bitsize / BITS_PER_UNIT;
6394 	  rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6395 					BLKmode,
6396 					UNITS_PER_WORD - size);
6397 
6398 	  dest = adjust_address (dest, BLKmode, 0);
6399 	  set_mem_size (dest, size);
6400 	  s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6401 	  return true;
6402 	}
6403 
6404       /* (set (ze (mem)) (reg)).  */
6405       else if (register_operand (src, word_mode))
6406 	{
6407 	  if (bitsize <= 32)
6408 	    emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6409 						  const0_rtx), src);
6410 	  else
6411 	    {
6412 	      /* Emit st,stcmh sequence.  */
6413 	      int stcmh_width = bitsize - 32;
6414 	      int size = stcmh_width / BITS_PER_UNIT;
6415 
6416 	      emit_move_insn (adjust_address (dest, SImode, size),
6417 			      gen_lowpart (SImode, src));
6418 	      set_mem_size (dest, size);
6419 	      emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6420 						    GEN_INT (stcmh_width),
6421 						    const0_rtx),
6422 			      gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6423 	    }
6424 	  return true;
6425 	}
6426     }
6427 
6428   /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al).  */
6429   if ((bitpos % BITS_PER_UNIT) == 0
6430       && (bitsize % BITS_PER_UNIT) == 0
6431       && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6432       && MEM_P (src)
6433       && (mode == DImode || mode == SImode)
6434       && register_operand (dest, mode))
6435     {
6436       /* Emit a strict_low_part pattern if possible.  */
6437       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6438 	{
6439 	  op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6440 	  op = gen_rtx_SET (op, gen_lowpart (smode, src));
6441 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6442 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6443 	  return true;
6444 	}
6445 
6446       /* ??? There are more powerful versions of ICM that are not
6447 	 completely represented in the md file.  */
6448     }
6449 
6450   /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al).  */
6451   if (TARGET_Z10 && (mode == DImode || mode == SImode))
6452     {
6453       machine_mode mode_s = GET_MODE (src);
6454 
6455       if (CONSTANT_P (src))
6456 	{
6457 	  /* For constant zero values the representation with AND
6458 	     appears to be folded in more situations than the (set
6459 	     (zero_extract) ...).
6460 	     We only do this when the start and end of the bitfield
6461 	     remain in the same SImode chunk.  That way nihf or nilf
6462 	     can be used.
6463 	     The AND patterns might still generate a risbg for this.  */
6464 	  if (src == const0_rtx && bitpos / 32  == (bitpos + bitsize - 1) / 32)
6465 	    return false;
6466 	  else
6467 	    src = force_reg (mode, src);
6468 	}
6469       else if (mode_s != mode)
6470 	{
6471 	  gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6472 	  src = force_reg (mode_s, src);
6473 	  src = gen_lowpart (mode, src);
6474 	}
6475 
6476       op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6477       op = gen_rtx_SET (op, src);
6478 
6479       if (!TARGET_ZEC12)
6480 	{
6481 	  clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6482 	  op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6483 	}
6484       emit_insn (op);
6485 
6486       return true;
6487     }
6488 
6489   return false;
6490 }
6491 
6492 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6493    register that holds VAL of mode MODE shifted by COUNT bits.  */
6494 
6495 static inline rtx
s390_expand_mask_and_shift(rtx val,machine_mode mode,rtx count)6496 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6497 {
6498   val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6499 			     NULL_RTX, 1, OPTAB_DIRECT);
6500   return expand_simple_binop (SImode, ASHIFT, val, count,
6501 			      NULL_RTX, 1, OPTAB_DIRECT);
6502 }
6503 
6504 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6505    the result in TARGET.  */
6506 
6507 void
s390_expand_vec_compare(rtx target,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6508 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6509 			 rtx cmp_op1, rtx cmp_op2)
6510 {
6511   machine_mode mode = GET_MODE (target);
6512   bool neg_p = false, swap_p = false;
6513   rtx tmp;
6514 
6515   if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6516     {
6517       switch (cond)
6518 	{
6519 	  /* NE a != b -> !(a == b) */
6520 	case NE:   cond = EQ; neg_p = true;                break;
6521 	case UNGT:
6522 	  emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6523 	  return;
6524 	case UNGE:
6525 	  emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6526 	  return;
6527 	case LE:   cond = GE;               swap_p = true; break;
6528 	  /* UNLE: (a u<= b) -> (b u>= a).  */
6529 	case UNLE:
6530 	  emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6531 	  return;
6532 	  /* LT: a < b -> b > a */
6533 	case LT:   cond = GT;               swap_p = true; break;
6534 	  /* UNLT: (a u< b) -> (b u> a).  */
6535 	case UNLT:
6536 	  emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6537 	  return;
6538 	case UNEQ:
6539 	  emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6540 	  return;
6541 	case LTGT:
6542 	  emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6543 	  return;
6544 	case ORDERED:
6545 	  emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6546 	  return;
6547 	case UNORDERED:
6548 	  emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6549 	  return;
6550 	default: break;
6551 	}
6552     }
6553   else
6554     {
6555       switch (cond)
6556 	{
6557 	  /* NE: a != b -> !(a == b) */
6558 	case NE:  cond = EQ;  neg_p = true;                break;
6559 	  /* GE: a >= b -> !(b > a) */
6560 	case GE:  cond = GT;  neg_p = true; swap_p = true; break;
6561 	  /* GEU: a >= b -> !(b > a) */
6562 	case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6563 	  /* LE: a <= b -> !(a > b) */
6564 	case LE:  cond = GT;  neg_p = true;                break;
6565 	  /* LEU: a <= b -> !(a > b) */
6566 	case LEU: cond = GTU; neg_p = true;                break;
6567 	  /* LT: a < b -> b > a */
6568 	case LT:  cond = GT;                swap_p = true; break;
6569 	  /* LTU: a < b -> b > a */
6570 	case LTU: cond = GTU;               swap_p = true; break;
6571 	default: break;
6572 	}
6573     }
6574 
6575   if (swap_p)
6576     {
6577       tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6578     }
6579 
6580   emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6581 						  mode,
6582 						  cmp_op1, cmp_op2)));
6583   if (neg_p)
6584     emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6585 }
6586 
6587 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6588    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6589    elements in CMP1 and CMP2 fulfill the comparison.
6590    This function is only used to emit patterns for the vx builtins and
6591    therefore only handles comparison codes required by the
6592    builtins.  */
6593 void
s390_expand_vec_compare_cc(rtx target,enum rtx_code code,rtx cmp1,rtx cmp2,bool all_p)6594 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6595 			    rtx cmp1, rtx cmp2, bool all_p)
6596 {
6597   machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6598   rtx tmp_reg = gen_reg_rtx (SImode);
6599   bool swap_p = false;
6600 
6601   if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6602     {
6603       switch (code)
6604 	{
6605 	case EQ:
6606 	case NE:
6607 	  cc_producer_mode = CCVEQmode;
6608 	  break;
6609 	case GE:
6610 	case LT:
6611 	  code = swap_condition (code);
6612 	  swap_p = true;
6613 	  /* fallthrough */
6614 	case GT:
6615 	case LE:
6616 	  cc_producer_mode = CCVIHmode;
6617 	  break;
6618 	case GEU:
6619 	case LTU:
6620 	  code = swap_condition (code);
6621 	  swap_p = true;
6622 	  /* fallthrough */
6623 	case GTU:
6624 	case LEU:
6625 	  cc_producer_mode = CCVIHUmode;
6626 	  break;
6627 	default:
6628 	  gcc_unreachable ();
6629 	}
6630 
6631       scratch_mode = GET_MODE (cmp1);
6632       /* These codes represent inverted CC interpretations.  Inverting
6633 	 an ALL CC mode results in an ANY CC mode and the other way
6634 	 around.  Invert the all_p flag here to compensate for
6635 	 that.  */
6636       if (code == NE || code == LE || code == LEU)
6637 	all_p = !all_p;
6638 
6639       cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6640     }
6641   else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6642     {
6643       bool inv_p = false;
6644 
6645       switch (code)
6646 	{
6647 	case EQ:   cc_producer_mode = CCVEQmode;  break;
6648 	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
6649 	case GT:   cc_producer_mode = CCVFHmode;  break;
6650 	case GE:   cc_producer_mode = CCVFHEmode; break;
6651 	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
6652 	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6653 	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
6654 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6655 	default: gcc_unreachable ();
6656 	}
6657       scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6658 
6659       if (inv_p)
6660 	all_p = !all_p;
6661 
6662       cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6663     }
6664   else
6665     gcc_unreachable ();
6666 
6667   if (swap_p)
6668     {
6669       rtx tmp = cmp2;
6670       cmp2 = cmp1;
6671       cmp1 = tmp;
6672     }
6673 
6674   emit_insn (gen_rtx_PARALLEL (VOIDmode,
6675 	       gen_rtvec (2, gen_rtx_SET (
6676 			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6677 			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6678 			  gen_rtx_CLOBBER (VOIDmode,
6679 					   gen_rtx_SCRATCH (scratch_mode)))));
6680   emit_move_insn (target, const0_rtx);
6681   emit_move_insn (tmp_reg, const1_rtx);
6682 
6683   emit_move_insn (target,
6684 		  gen_rtx_IF_THEN_ELSE (SImode,
6685 		    gen_rtx_fmt_ee (code, VOIDmode,
6686 				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6687 				    const0_rtx),
6688 					tmp_reg, target));
6689 }
6690 
6691 /* Invert the comparison CODE applied to a CC mode.  This is only safe
6692    if we know whether there result was created by a floating point
6693    compare or not.  For the CCV modes this is encoded as part of the
6694    mode.  */
6695 enum rtx_code
s390_reverse_condition(machine_mode mode,enum rtx_code code)6696 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6697 {
6698   /* Reversal of FP compares takes care -- an ordered compare
6699      becomes an unordered compare and vice versa.  */
6700   if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6701     return reverse_condition_maybe_unordered (code);
6702   else if (mode == CCVIALLmode || mode == CCVIANYmode)
6703     return reverse_condition (code);
6704   else
6705     gcc_unreachable ();
6706 }
6707 
6708 /* Generate a vector comparison expression loading either elements of
6709    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6710    and CMP_OP2.  */
6711 
6712 void
s390_expand_vcond(rtx target,rtx then,rtx els,enum rtx_code cond,rtx cmp_op1,rtx cmp_op2)6713 s390_expand_vcond (rtx target, rtx then, rtx els,
6714 		   enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6715 {
6716   rtx tmp;
6717   machine_mode result_mode;
6718   rtx result_target;
6719 
6720   machine_mode target_mode = GET_MODE (target);
6721   machine_mode cmp_mode = GET_MODE (cmp_op1);
6722   rtx op = (cond == LT) ? els : then;
6723 
6724   /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6725      and x < 0 ? 1 : 0 into (unsigned) x >> 31.  Likewise
6726      for short and byte (x >> 15 and x >> 7 respectively).  */
6727   if ((cond == LT || cond == GE)
6728       && target_mode == cmp_mode
6729       && cmp_op2 == CONST0_RTX (cmp_mode)
6730       && op == CONST0_RTX (target_mode)
6731       && s390_vector_mode_supported_p (target_mode)
6732       && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6733     {
6734       rtx negop = (cond == LT) ? then : els;
6735 
6736       int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6737 
6738       /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6739       if (negop == CONST1_RTX (target_mode))
6740 	{
6741 	  rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6742 					 GEN_INT (shift), target,
6743 					 1, OPTAB_DIRECT);
6744 	  if (res != target)
6745 	    emit_move_insn (target, res);
6746 	  return;
6747 	}
6748 
6749       /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6750       else if (all_ones_operand (negop, target_mode))
6751 	{
6752 	  rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6753 					 GEN_INT (shift), target,
6754 					 0, OPTAB_DIRECT);
6755 	  if (res != target)
6756 	    emit_move_insn (target, res);
6757 	  return;
6758 	}
6759     }
6760 
6761   /* We always use an integral type vector to hold the comparison
6762      result.  */
6763   result_mode = related_int_vector_mode (cmp_mode).require ();
6764   result_target = gen_reg_rtx (result_mode);
6765 
6766   /* We allow vector immediates as comparison operands that
6767      can be handled by the optimization above but not by the
6768      following code.  Hence, force them into registers here.  */
6769   if (!REG_P (cmp_op1))
6770     cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6771 
6772   if (!REG_P (cmp_op2))
6773     cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6774 
6775   s390_expand_vec_compare (result_target, cond,
6776 			   cmp_op1, cmp_op2);
6777 
6778   /* If the results are supposed to be either -1 or 0 we are done
6779      since this is what our compare instructions generate anyway.  */
6780   if (all_ones_operand (then, GET_MODE (then))
6781       && const0_operand (els, GET_MODE (els)))
6782     {
6783       emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6784 					      result_target, 0));
6785       return;
6786     }
6787 
6788   /* Otherwise we will do a vsel afterwards.  */
6789   /* This gets triggered e.g.
6790      with gcc.c-torture/compile/pr53410-1.c */
6791   if (!REG_P (then))
6792     then = force_reg (target_mode, then);
6793 
6794   if (!REG_P (els))
6795     els = force_reg (target_mode, els);
6796 
6797   tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6798 			result_target,
6799 			CONST0_RTX (result_mode));
6800 
6801   /* We compared the result against zero above so we have to swap then
6802      and els here.  */
6803   tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6804 
6805   gcc_assert (target_mode == GET_MODE (then));
6806   emit_insn (gen_rtx_SET (target, tmp));
6807 }
6808 
6809 /* Emit the RTX necessary to initialize the vector TARGET with values
6810    in VALS.  */
6811 void
s390_expand_vec_init(rtx target,rtx vals)6812 s390_expand_vec_init (rtx target, rtx vals)
6813 {
6814   machine_mode mode = GET_MODE (target);
6815   machine_mode inner_mode = GET_MODE_INNER (mode);
6816   int n_elts = GET_MODE_NUNITS (mode);
6817   bool all_same = true, all_regs = true, all_const_int = true;
6818   rtx x;
6819   int i;
6820 
6821   for (i = 0; i < n_elts; ++i)
6822     {
6823       x = XVECEXP (vals, 0, i);
6824 
6825       if (!CONST_INT_P (x))
6826 	all_const_int = false;
6827 
6828       if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6829 	all_same = false;
6830 
6831       if (!REG_P (x))
6832 	all_regs = false;
6833     }
6834 
6835   /* Use vector gen mask or vector gen byte mask if possible.  */
6836   if (all_same && all_const_int
6837       && (XVECEXP (vals, 0, 0) == const0_rtx
6838 	  || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6839 					       NULL, NULL)
6840 	  || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6841     {
6842       emit_insn (gen_rtx_SET (target,
6843 			      gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6844       return;
6845     }
6846 
6847   /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
6848   if (all_same)
6849     {
6850       rtx elem = XVECEXP (vals, 0, 0);
6851 
6852       /* vec_splats accepts general_operand as source.  */
6853       if (!general_operand (elem, GET_MODE (elem)))
6854 	elem = force_reg (inner_mode, elem);
6855 
6856       emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6857       return;
6858     }
6859 
6860   if (all_regs
6861       && REG_P (target)
6862       && n_elts == 2
6863       && GET_MODE_SIZE (inner_mode) == 8)
6864     {
6865       /* Use vector load pair.  */
6866       emit_insn (gen_rtx_SET (target,
6867 			      gen_rtx_VEC_CONCAT (mode,
6868 						  XVECEXP (vals, 0, 0),
6869 						  XVECEXP (vals, 0, 1))));
6870       return;
6871     }
6872 
6873   /* Use vector load logical element and zero.  */
6874   if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6875     {
6876       bool found = true;
6877 
6878       x = XVECEXP (vals, 0, 0);
6879       if (memory_operand (x, inner_mode))
6880 	{
6881 	  for (i = 1; i < n_elts; ++i)
6882 	    found = found && XVECEXP (vals, 0, i) == const0_rtx;
6883 
6884 	  if (found)
6885 	    {
6886 	      machine_mode half_mode = (inner_mode == SFmode
6887 					? V2SFmode : V2SImode);
6888 	      emit_insn (gen_rtx_SET (target,
6889 			      gen_rtx_VEC_CONCAT (mode,
6890 						  gen_rtx_VEC_CONCAT (half_mode,
6891 								      x,
6892 								      const0_rtx),
6893 						  gen_rtx_VEC_CONCAT (half_mode,
6894 								      const0_rtx,
6895 								      const0_rtx))));
6896 	      return;
6897 	    }
6898 	}
6899     }
6900 
6901   /* We are about to set the vector elements one by one.  Zero out the
6902      full register first in order to help the data flow framework to
6903      detect it as full VR set.  */
6904   emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6905 
6906   /* Unfortunately the vec_init expander is not allowed to fail.  So
6907      we have to implement the fallback ourselves.  */
6908   for (i = 0; i < n_elts; i++)
6909     {
6910       rtx elem = XVECEXP (vals, 0, i);
6911       if (!general_operand (elem, GET_MODE (elem)))
6912 	elem = force_reg (inner_mode, elem);
6913 
6914       emit_insn (gen_rtx_SET (target,
6915 			      gen_rtx_UNSPEC (mode,
6916 					      gen_rtvec (3, elem,
6917 							 GEN_INT (i), target),
6918 					      UNSPEC_VEC_SET)));
6919     }
6920 }
6921 
6922 /* Structure to hold the initial parameters for a compare_and_swap operation
6923    in HImode and QImode.  */
6924 
6925 struct alignment_context
6926 {
6927   rtx memsi;	  /* SI aligned memory location.  */
6928   rtx shift;	  /* Bit offset with regard to lsb.  */
6929   rtx modemask;	  /* Mask of the HQImode shifted by SHIFT bits.  */
6930   rtx modemaski;  /* ~modemask */
6931   bool aligned;	  /* True if memory is aligned, false else.  */
6932 };
6933 
6934 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6935    structure AC for transparent simplifying, if the memory alignment is known
6936    to be at least 32bit.  MEM is the memory location for the actual operation
6937    and MODE its mode.  */
6938 
6939 static void
init_alignment_context(struct alignment_context * ac,rtx mem,machine_mode mode)6940 init_alignment_context (struct alignment_context *ac, rtx mem,
6941 			machine_mode mode)
6942 {
6943   ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6944   ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6945 
6946   if (ac->aligned)
6947     ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned.  */
6948   else
6949     {
6950       /* Alignment is unknown.  */
6951       rtx byteoffset, addr, align;
6952 
6953       /* Force the address into a register.  */
6954       addr = force_reg (Pmode, XEXP (mem, 0));
6955 
6956       /* Align it to SImode.  */
6957       align = expand_simple_binop (Pmode, AND, addr,
6958 				   GEN_INT (-GET_MODE_SIZE (SImode)),
6959 				   NULL_RTX, 1, OPTAB_DIRECT);
6960       /* Generate MEM.  */
6961       ac->memsi = gen_rtx_MEM (SImode, align);
6962       MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6963       set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6964       set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6965 
6966       /* Calculate shiftcount.  */
6967       byteoffset = expand_simple_binop (Pmode, AND, addr,
6968 					GEN_INT (GET_MODE_SIZE (SImode) - 1),
6969 					NULL_RTX, 1, OPTAB_DIRECT);
6970       /* As we already have some offset, evaluate the remaining distance.  */
6971       ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6972 				      NULL_RTX, 1, OPTAB_DIRECT);
6973     }
6974 
6975   /* Shift is the byte count, but we need the bitcount.  */
6976   ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6977 				   NULL_RTX, 1, OPTAB_DIRECT);
6978 
6979   /* Calculate masks.  */
6980   ac->modemask = expand_simple_binop (SImode, ASHIFT,
6981 				      GEN_INT (GET_MODE_MASK (mode)),
6982 				      ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6983   ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6984 				      NULL_RTX, 1);
6985 }
6986 
6987 /* A subroutine of s390_expand_cs_hqi.  Insert INS into VAL.  If possible,
6988    use a single insv insn into SEQ2.  Otherwise, put prep insns in SEQ1 and
6989    perform the merge in SEQ2.  */
6990 
6991 static rtx
s390_two_part_insv(struct alignment_context * ac,rtx * seq1,rtx * seq2,machine_mode mode,rtx val,rtx ins)6992 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6993 		    machine_mode mode, rtx val, rtx ins)
6994 {
6995   rtx tmp;
6996 
6997   if (ac->aligned)
6998     {
6999       start_sequence ();
7000       tmp = copy_to_mode_reg (SImode, val);
7001       if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7002 			    const0_rtx, ins))
7003 	{
7004 	  *seq1 = NULL;
7005 	  *seq2 = get_insns ();
7006 	  end_sequence ();
7007 	  return tmp;
7008 	}
7009       end_sequence ();
7010     }
7011 
7012   /* Failed to use insv.  Generate a two part shift and mask.  */
7013   start_sequence ();
7014   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7015   *seq1 = get_insns ();
7016   end_sequence ();
7017 
7018   start_sequence ();
7019   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7020   *seq2 = get_insns ();
7021   end_sequence ();
7022 
7023   return tmp;
7024 }
7025 
7026 /* Expand an atomic compare and swap operation for HImode and QImode.  MEM is
7027    the memory location, CMP the old value to compare MEM with and NEW_RTX the
7028    value to set if CMP == MEM.  */
7029 
7030 static void
s390_expand_cs_hqi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7031 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7032 		    rtx cmp, rtx new_rtx, bool is_weak)
7033 {
7034   struct alignment_context ac;
7035   rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7036   rtx res = gen_reg_rtx (SImode);
7037   rtx_code_label *csloop = NULL, *csend = NULL;
7038 
7039   gcc_assert (MEM_P (mem));
7040 
7041   init_alignment_context (&ac, mem, mode);
7042 
7043   /* Load full word.  Subsequent loads are performed by CS.  */
7044   val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7045 			     NULL_RTX, 1, OPTAB_DIRECT);
7046 
7047   /* Prepare insertions of cmp and new_rtx into the loaded value.  When
7048      possible, we try to use insv to make this happen efficiently.  If
7049      that fails we'll generate code both inside and outside the loop.  */
7050   cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7051   newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7052 
7053   if (seq0)
7054     emit_insn (seq0);
7055   if (seq1)
7056     emit_insn (seq1);
7057 
7058   /* Start CS loop.  */
7059   if (!is_weak)
7060     {
7061       /* Begin assuming success.  */
7062       emit_move_insn (btarget, const1_rtx);
7063 
7064       csloop = gen_label_rtx ();
7065       csend = gen_label_rtx ();
7066       emit_label (csloop);
7067     }
7068 
7069   /* val = "<mem>00..0<mem>"
7070    * cmp = "00..0<cmp>00..0"
7071    * new = "00..0<new>00..0"
7072    */
7073 
7074   emit_insn (seq2);
7075   emit_insn (seq3);
7076 
7077   cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7078   if (is_weak)
7079     emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7080   else
7081     {
7082       rtx tmp;
7083 
7084       /* Jump to end if we're done (likely?).  */
7085       s390_emit_jump (csend, cc);
7086 
7087       /* Check for changes outside mode, and loop internal if so.
7088 	 Arrange the moves so that the compare is adjacent to the
7089 	 branch so that we can generate CRJ.  */
7090       tmp = copy_to_reg (val);
7091       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7092 			  1, OPTAB_DIRECT);
7093       cc = s390_emit_compare (NE, val, tmp);
7094       s390_emit_jump (csloop, cc);
7095 
7096       /* Failed.  */
7097       emit_move_insn (btarget, const0_rtx);
7098       emit_label (csend);
7099     }
7100 
7101   /* Return the correct part of the bitfield.  */
7102   convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7103 					      NULL_RTX, 1, OPTAB_DIRECT), 1);
7104 }
7105 
7106 /* Variant of s390_expand_cs for SI, DI and TI modes.  */
7107 static void
s390_expand_cs_tdsi(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7108 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7109 		     rtx cmp, rtx new_rtx, bool is_weak)
7110 {
7111   rtx output = vtarget;
7112   rtx_code_label *skip_cs_label = NULL;
7113   bool do_const_opt = false;
7114 
7115   if (!register_operand (output, mode))
7116     output = gen_reg_rtx (mode);
7117 
7118   /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7119      with the constant first and skip the compare_and_swap because its very
7120      expensive and likely to fail anyway.
7121      Note 1: This is done only for IS_WEAK.  C11 allows optimizations that may
7122      cause spurious in that case.
7123      Note 2: It may be useful to do this also for non-constant INPUT.
7124      Note 3: Currently only targets with "load on condition" are supported
7125      (z196 and newer).  */
7126 
7127   if (TARGET_Z196
7128       && (mode == SImode || mode == DImode))
7129     do_const_opt = (is_weak && CONST_INT_P (cmp));
7130 
7131   if (do_const_opt)
7132     {
7133       rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7134 
7135       skip_cs_label = gen_label_rtx ();
7136       emit_move_insn (btarget, const0_rtx);
7137       if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7138 	{
7139 	  rtvec lt = rtvec_alloc (2);
7140 
7141 	  /* Load-and-test + conditional jump.  */
7142 	  RTVEC_ELT (lt, 0)
7143 	    = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7144 	  RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7145 	  emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7146 	}
7147       else
7148 	{
7149 	  emit_move_insn (output, mem);
7150 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7151 	}
7152       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7153       add_reg_br_prob_note (get_last_insn (),
7154 			    profile_probability::very_unlikely ());
7155       /* If the jump is not taken, OUTPUT is the expected value.  */
7156       cmp = output;
7157       /* Reload newval to a register manually, *after* the compare and jump
7158 	 above.  Otherwise Reload might place it before the jump.  */
7159     }
7160   else
7161     cmp = force_reg (mode, cmp);
7162   new_rtx = force_reg (mode, new_rtx);
7163   s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7164 			      (do_const_opt) ? CCZmode : CCZ1mode);
7165   if (skip_cs_label != NULL)
7166     emit_label (skip_cs_label);
7167 
7168   /* We deliberately accept non-register operands in the predicate
7169      to ensure the write back to the output operand happens *before*
7170      the store-flags code below.  This makes it easier for combine
7171      to merge the store-flags code with a potential test-and-branch
7172      pattern following (immediately!) afterwards.  */
7173   if (output != vtarget)
7174     emit_move_insn (vtarget, output);
7175 
7176   if (do_const_opt)
7177     {
7178       rtx cc, cond, ite;
7179 
7180       /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7181 	 btarget has already been initialized with 0 above.  */
7182       cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7183       cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7184       ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7185       emit_insn (gen_rtx_SET (btarget, ite));
7186     }
7187   else
7188     {
7189       rtx cc, cond;
7190 
7191       cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7192       cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7193       emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7194     }
7195 }
7196 
7197 /* Expand an atomic compare and swap operation.  MEM is the memory location,
7198    CMP the old value to compare MEM with and NEW_RTX the value to set if
7199    CMP == MEM.  */
7200 
7201 void
s390_expand_cs(machine_mode mode,rtx btarget,rtx vtarget,rtx mem,rtx cmp,rtx new_rtx,bool is_weak)7202 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7203 		rtx cmp, rtx new_rtx, bool is_weak)
7204 {
7205   switch (mode)
7206     {
7207     case E_TImode:
7208     case E_DImode:
7209     case E_SImode:
7210       s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7211       break;
7212     case E_HImode:
7213     case E_QImode:
7214       s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7215       break;
7216     default:
7217       gcc_unreachable ();
7218     }
7219 }
7220 
7221 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7222    The memory location MEM is set to INPUT.  OUTPUT is set to the previous value
7223    of MEM.  */
7224 
7225 void
s390_expand_atomic_exchange_tdsi(rtx output,rtx mem,rtx input)7226 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7227 {
7228   machine_mode mode = GET_MODE (mem);
7229   rtx_code_label *csloop;
7230 
7231   if (TARGET_Z196
7232       && (mode == DImode || mode == SImode)
7233       && CONST_INT_P (input) && INTVAL (input) == 0)
7234     {
7235       emit_move_insn (output, const0_rtx);
7236       if (mode == DImode)
7237 	emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7238       else
7239 	emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7240       return;
7241     }
7242 
7243   input = force_reg (mode, input);
7244   emit_move_insn (output, mem);
7245   csloop = gen_label_rtx ();
7246   emit_label (csloop);
7247   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7248 						      input, CCZ1mode));
7249 }
7250 
7251 /* Expand an atomic operation CODE of mode MODE.  MEM is the memory location
7252    and VAL the value to play with.  If AFTER is true then store the value
7253    MEM holds after the operation, if AFTER is false then store the value MEM
7254    holds before the operation.  If TARGET is zero then discard that value, else
7255    store it to TARGET.  */
7256 
7257 void
s390_expand_atomic(machine_mode mode,enum rtx_code code,rtx target,rtx mem,rtx val,bool after)7258 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7259 		    rtx target, rtx mem, rtx val, bool after)
7260 {
7261   struct alignment_context ac;
7262   rtx cmp;
7263   rtx new_rtx = gen_reg_rtx (SImode);
7264   rtx orig = gen_reg_rtx (SImode);
7265   rtx_code_label *csloop = gen_label_rtx ();
7266 
7267   gcc_assert (!target || register_operand (target, VOIDmode));
7268   gcc_assert (MEM_P (mem));
7269 
7270   init_alignment_context (&ac, mem, mode);
7271 
7272   /* Shift val to the correct bit positions.
7273      Preserve "icm", but prevent "ex icm".  */
7274   if (!(ac.aligned && code == SET && MEM_P (val)))
7275     val = s390_expand_mask_and_shift (val, mode, ac.shift);
7276 
7277   /* Further preparation insns.  */
7278   if (code == PLUS || code == MINUS)
7279     emit_move_insn (orig, val);
7280   else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7281     val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7282 			       NULL_RTX, 1, OPTAB_DIRECT);
7283 
7284   /* Load full word.  Subsequent loads are performed by CS.  */
7285   cmp = force_reg (SImode, ac.memsi);
7286 
7287   /* Start CS loop.  */
7288   emit_label (csloop);
7289   emit_move_insn (new_rtx, cmp);
7290 
7291   /* Patch new with val at correct position.  */
7292   switch (code)
7293     {
7294     case PLUS:
7295     case MINUS:
7296       val = expand_simple_binop (SImode, code, new_rtx, orig,
7297 				 NULL_RTX, 1, OPTAB_DIRECT);
7298       val = expand_simple_binop (SImode, AND, val, ac.modemask,
7299 				 NULL_RTX, 1, OPTAB_DIRECT);
7300       /* FALLTHRU */
7301     case SET:
7302       if (ac.aligned && MEM_P (val))
7303 	store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7304 			 0, 0, SImode, val, false);
7305       else
7306 	{
7307 	  new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7308 				     NULL_RTX, 1, OPTAB_DIRECT);
7309 	  new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7310 				     NULL_RTX, 1, OPTAB_DIRECT);
7311 	}
7312       break;
7313     case AND:
7314     case IOR:
7315     case XOR:
7316       new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7317 				 NULL_RTX, 1, OPTAB_DIRECT);
7318       break;
7319     case MULT: /* NAND */
7320       new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7321 				 NULL_RTX, 1, OPTAB_DIRECT);
7322       new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7323 				 NULL_RTX, 1, OPTAB_DIRECT);
7324       break;
7325     default:
7326       gcc_unreachable ();
7327     }
7328 
7329   s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7330 						      ac.memsi, cmp, new_rtx,
7331 						      CCZ1mode));
7332 
7333   /* Return the correct part of the bitfield.  */
7334   if (target)
7335     convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7336 					       after ? new_rtx : cmp, ac.shift,
7337 					       NULL_RTX, 1, OPTAB_DIRECT), 1);
7338 }
7339 
7340 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7341    We need to emit DTP-relative relocations.  */
7342 
7343 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7344 
7345 static void
s390_output_dwarf_dtprel(FILE * file,int size,rtx x)7346 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7347 {
7348   switch (size)
7349     {
7350     case 4:
7351       fputs ("\t.long\t", file);
7352       break;
7353     case 8:
7354       fputs ("\t.quad\t", file);
7355       break;
7356     default:
7357       gcc_unreachable ();
7358     }
7359   output_addr_const (file, x);
7360   fputs ("@DTPOFF", file);
7361 }
7362 
7363 /* Return the proper mode for REGNO being represented in the dwarf
7364    unwind table.  */
7365 machine_mode
s390_dwarf_frame_reg_mode(int regno)7366 s390_dwarf_frame_reg_mode (int regno)
7367 {
7368   machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7369 
7370   /* Make sure not to return DImode for any GPR with -m31 -mzarch.  */
7371   if (GENERAL_REGNO_P (regno))
7372     save_mode = Pmode;
7373 
7374   /* The rightmost 64 bits of vector registers are call-clobbered.  */
7375   if (GET_MODE_SIZE (save_mode) > 8)
7376     save_mode = DImode;
7377 
7378   return save_mode;
7379 }
7380 
7381 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7382 /* Implement TARGET_MANGLE_TYPE.  */
7383 
7384 static const char *
s390_mangle_type(const_tree type)7385 s390_mangle_type (const_tree type)
7386 {
7387   type = TYPE_MAIN_VARIANT (type);
7388 
7389   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7390       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7391     return NULL;
7392 
7393   if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7394   if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7395   if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7396   if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7397 
7398   if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7399       && TARGET_LONG_DOUBLE_128)
7400     return "g";
7401 
7402   /* For all other types, use normal C++ mangling.  */
7403   return NULL;
7404 }
7405 #endif
7406 
7407 /* In the name of slightly smaller debug output, and to cater to
7408    general assembler lossage, recognize various UNSPEC sequences
7409    and turn them back into a direct symbol reference.  */
7410 
7411 static rtx
s390_delegitimize_address(rtx orig_x)7412 s390_delegitimize_address (rtx orig_x)
7413 {
7414   rtx x, y;
7415 
7416   orig_x = delegitimize_mem_from_attrs (orig_x);
7417   x = orig_x;
7418 
7419   /* Extract the symbol ref from:
7420      (plus:SI (reg:SI 12 %r12)
7421 	      (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7422 				    UNSPEC_GOTOFF/PLTOFF)))
7423      and
7424      (plus:SI (reg:SI 12 %r12)
7425 	      (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7426 					     UNSPEC_GOTOFF/PLTOFF)
7427 				 (const_int 4 [0x4]))))  */
7428   if (GET_CODE (x) == PLUS
7429       && REG_P (XEXP (x, 0))
7430       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7431       && GET_CODE (XEXP (x, 1)) == CONST)
7432     {
7433       HOST_WIDE_INT offset = 0;
7434 
7435       /* The const operand.  */
7436       y = XEXP (XEXP (x, 1), 0);
7437 
7438       if (GET_CODE (y) == PLUS
7439 	  && GET_CODE (XEXP (y, 1)) == CONST_INT)
7440 	{
7441 	  offset = INTVAL (XEXP (y, 1));
7442 	  y = XEXP (y, 0);
7443 	}
7444 
7445       if (GET_CODE (y) == UNSPEC
7446 	  && (XINT (y, 1) == UNSPEC_GOTOFF
7447 	      || XINT (y, 1) == UNSPEC_PLTOFF))
7448 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7449     }
7450 
7451   if (GET_CODE (x) != MEM)
7452     return orig_x;
7453 
7454   x = XEXP (x, 0);
7455   if (GET_CODE (x) == PLUS
7456       && GET_CODE (XEXP (x, 1)) == CONST
7457       && GET_CODE (XEXP (x, 0)) == REG
7458       && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7459     {
7460       y = XEXP (XEXP (x, 1), 0);
7461       if (GET_CODE (y) == UNSPEC
7462 	  && XINT (y, 1) == UNSPEC_GOT)
7463 	y = XVECEXP (y, 0, 0);
7464       else
7465 	return orig_x;
7466     }
7467   else if (GET_CODE (x) == CONST)
7468     {
7469       /* Extract the symbol ref from:
7470 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7471 				       UNSPEC_PLT/GOTENT)))  */
7472 
7473       y = XEXP (x, 0);
7474       if (GET_CODE (y) == UNSPEC
7475 	  && (XINT (y, 1) == UNSPEC_GOTENT
7476 	      || XINT (y, 1) == UNSPEC_PLT))
7477 	y = XVECEXP (y, 0, 0);
7478       else
7479 	return orig_x;
7480     }
7481   else
7482     return orig_x;
7483 
7484   if (GET_MODE (orig_x) != Pmode)
7485     {
7486       if (GET_MODE (orig_x) == BLKmode)
7487 	return orig_x;
7488       y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7489       if (y == NULL_RTX)
7490 	return orig_x;
7491     }
7492   return y;
7493 }
7494 
7495 /* Output operand OP to stdio stream FILE.
7496    OP is an address (register + offset) which is not used to address data;
7497    instead the rightmost bits are interpreted as the value.  */
7498 
7499 static void
print_addrstyle_operand(FILE * file,rtx op)7500 print_addrstyle_operand (FILE *file, rtx op)
7501 {
7502   HOST_WIDE_INT offset;
7503   rtx base;
7504 
7505   /* Extract base register and offset.  */
7506   if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7507     gcc_unreachable ();
7508 
7509   /* Sanity check.  */
7510   if (base)
7511     {
7512       gcc_assert (GET_CODE (base) == REG);
7513       gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7514       gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7515     }
7516 
7517   /* Offsets are constricted to twelve bits.  */
7518   fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7519   if (base)
7520     fprintf (file, "(%s)", reg_names[REGNO (base)]);
7521 }
7522 
7523 /* Print the shift count operand OP to FILE.
7524    OP is an address-style operand in a form which
7525    s390_valid_shift_count permits.  Subregs and no-op
7526    and-masking of the operand are stripped.  */
7527 
7528 static void
print_shift_count_operand(FILE * file,rtx op)7529 print_shift_count_operand (FILE *file, rtx op)
7530 {
7531   /* No checking of the and mask required here.  */
7532   if (!s390_valid_shift_count (op, 0))
7533     gcc_unreachable ();
7534 
7535   while (op && GET_CODE (op) == SUBREG)
7536     op = SUBREG_REG (op);
7537 
7538   if (GET_CODE (op) == AND)
7539     op = XEXP (op, 0);
7540 
7541   print_addrstyle_operand (file, op);
7542 }
7543 
7544 /* Assigns the number of NOP halfwords to be emitted before and after the
7545    function label to *HW_BEFORE and *HW_AFTER.  Both pointers must not be NULL.
7546    If hotpatching is disabled for the function, the values are set to zero.
7547 */
7548 
7549 static void
s390_function_num_hotpatch_hw(tree decl,int * hw_before,int * hw_after)7550 s390_function_num_hotpatch_hw (tree decl,
7551 			       int *hw_before,
7552 			       int *hw_after)
7553 {
7554   tree attr;
7555 
7556   attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7557 
7558   /* Handle the arguments of the hotpatch attribute.  The values
7559      specified via attribute might override the cmdline argument
7560      values.  */
7561   if (attr)
7562     {
7563       tree args = TREE_VALUE (attr);
7564 
7565       *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7566       *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7567     }
7568   else
7569     {
7570       /* Use the values specified by the cmdline arguments.  */
7571       *hw_before = s390_hotpatch_hw_before_label;
7572       *hw_after = s390_hotpatch_hw_after_label;
7573     }
7574 }
7575 
7576 /* Write the current .machine and .machinemode specification to the assembler
7577    file.  */
7578 
7579 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7580 static void
s390_asm_output_machine_for_arch(FILE * asm_out_file)7581 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7582 {
7583   fprintf (asm_out_file, "\t.machinemode %s\n",
7584 	   (TARGET_ZARCH) ? "zarch" : "esa");
7585   fprintf (asm_out_file, "\t.machine \"%s",
7586 	   processor_table[s390_arch].binutils_name);
7587   if (S390_USE_ARCHITECTURE_MODIFIERS)
7588     {
7589       int cpu_flags;
7590 
7591       cpu_flags = processor_flags_table[(int) s390_arch];
7592       if (TARGET_HTM && !(cpu_flags & PF_TX))
7593 	fprintf (asm_out_file, "+htm");
7594       else if (!TARGET_HTM && (cpu_flags & PF_TX))
7595 	fprintf (asm_out_file, "+nohtm");
7596       if (TARGET_VX && !(cpu_flags & PF_VX))
7597 	fprintf (asm_out_file, "+vx");
7598       else if (!TARGET_VX && (cpu_flags & PF_VX))
7599 	fprintf (asm_out_file, "+novx");
7600     }
7601   fprintf (asm_out_file, "\"\n");
7602 }
7603 
7604 /* Write an extra function header before the very start of the function.  */
7605 
7606 void
s390_asm_output_function_prefix(FILE * asm_out_file,const char * fnname ATTRIBUTE_UNUSED)7607 s390_asm_output_function_prefix (FILE *asm_out_file,
7608 				 const char *fnname ATTRIBUTE_UNUSED)
7609 {
7610   if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7611     return;
7612   /* Since only the function specific options are saved but not the indications
7613      which options are set, it's too much work here to figure out which options
7614      have actually changed.  Thus, generate .machine and .machinemode whenever a
7615      function has the target attribute or pragma.  */
7616   fprintf (asm_out_file, "\t.machinemode push\n");
7617   fprintf (asm_out_file, "\t.machine push\n");
7618   s390_asm_output_machine_for_arch (asm_out_file);
7619 }
7620 
7621 /* Write an extra function footer after the very end of the function.  */
7622 
7623 void
s390_asm_declare_function_size(FILE * asm_out_file,const char * fnname,tree decl)7624 s390_asm_declare_function_size (FILE *asm_out_file,
7625 				const char *fnname, tree decl)
7626 {
7627   if (!flag_inhibit_size_directive)
7628     ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7629   if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7630     return;
7631   fprintf (asm_out_file, "\t.machine pop\n");
7632   fprintf (asm_out_file, "\t.machinemode pop\n");
7633 }
7634 #endif
7635 
7636 /* Write the extra assembler code needed to declare a function properly.  */
7637 
7638 void
s390_asm_output_function_label(FILE * asm_out_file,const char * fname,tree decl)7639 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7640 				tree decl)
7641 {
7642   int hw_before, hw_after;
7643 
7644   s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7645   if (hw_before > 0)
7646     {
7647       unsigned int function_alignment;
7648       int i;
7649 
7650       /* Add a trampoline code area before the function label and initialize it
7651 	 with two-byte nop instructions.  This area can be overwritten with code
7652 	 that jumps to a patched version of the function.  */
7653       asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7654 		   "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7655 		   hw_before);
7656       for (i = 1; i < hw_before; i++)
7657 	fputs ("\tnopr\t%r0\n", asm_out_file);
7658 
7659       /* Note:  The function label must be aligned so that (a) the bytes of the
7660 	 following nop do not cross a cacheline boundary, and (b) a jump address
7661 	 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7662 	 stored directly before the label without crossing a cacheline
7663 	 boundary.  All this is necessary to make sure the trampoline code can
7664 	 be changed atomically.
7665 	 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7666 	 if there are NOPs before the function label, the alignment is placed
7667 	 before them.  So it is necessary to duplicate the alignment after the
7668 	 NOPs.  */
7669       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7670       if (! DECL_USER_ALIGN (decl))
7671 	function_alignment
7672 	  = MAX (function_alignment,
7673 		 (unsigned int) align_functions.levels[0].get_value ());
7674       fputs ("\t# alignment for hotpatch\n", asm_out_file);
7675       ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
7676     }
7677 
7678   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7679     {
7680       asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7681       asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7682       asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7683       asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7684       asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7685       asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7686 		   s390_warn_framesize);
7687       asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7688       asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7689       asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7690       asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7691       asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7692       asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7693 		   TARGET_PACKED_STACK);
7694       asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7695       asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7696       asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7697       asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7698 		   s390_warn_dynamicstack_p);
7699     }
7700   ASM_OUTPUT_LABEL (asm_out_file, fname);
7701   if (hw_after > 0)
7702     asm_fprintf (asm_out_file,
7703 		 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7704 		 hw_after);
7705 }
7706 
7707 /* Output machine-dependent UNSPECs occurring in address constant X
7708    in assembler syntax to stdio stream FILE.  Returns true if the
7709    constant X could be recognized, false otherwise.  */
7710 
7711 static bool
s390_output_addr_const_extra(FILE * file,rtx x)7712 s390_output_addr_const_extra (FILE *file, rtx x)
7713 {
7714   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7715     switch (XINT (x, 1))
7716       {
7717       case UNSPEC_GOTENT:
7718 	output_addr_const (file, XVECEXP (x, 0, 0));
7719 	fprintf (file, "@GOTENT");
7720 	return true;
7721       case UNSPEC_GOT:
7722 	output_addr_const (file, XVECEXP (x, 0, 0));
7723 	fprintf (file, "@GOT");
7724 	return true;
7725       case UNSPEC_GOTOFF:
7726 	output_addr_const (file, XVECEXP (x, 0, 0));
7727 	fprintf (file, "@GOTOFF");
7728 	return true;
7729       case UNSPEC_PLT:
7730 	output_addr_const (file, XVECEXP (x, 0, 0));
7731 	fprintf (file, "@PLT");
7732 	return true;
7733       case UNSPEC_PLTOFF:
7734 	output_addr_const (file, XVECEXP (x, 0, 0));
7735 	fprintf (file, "@PLTOFF");
7736 	return true;
7737       case UNSPEC_TLSGD:
7738 	output_addr_const (file, XVECEXP (x, 0, 0));
7739 	fprintf (file, "@TLSGD");
7740 	return true;
7741       case UNSPEC_TLSLDM:
7742 	assemble_name (file, get_some_local_dynamic_name ());
7743 	fprintf (file, "@TLSLDM");
7744 	return true;
7745       case UNSPEC_DTPOFF:
7746 	output_addr_const (file, XVECEXP (x, 0, 0));
7747 	fprintf (file, "@DTPOFF");
7748 	return true;
7749       case UNSPEC_NTPOFF:
7750 	output_addr_const (file, XVECEXP (x, 0, 0));
7751 	fprintf (file, "@NTPOFF");
7752 	return true;
7753       case UNSPEC_GOTNTPOFF:
7754 	output_addr_const (file, XVECEXP (x, 0, 0));
7755 	fprintf (file, "@GOTNTPOFF");
7756 	return true;
7757       case UNSPEC_INDNTPOFF:
7758 	output_addr_const (file, XVECEXP (x, 0, 0));
7759 	fprintf (file, "@INDNTPOFF");
7760 	return true;
7761       }
7762 
7763   if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7764     switch (XINT (x, 1))
7765       {
7766       case UNSPEC_POOL_OFFSET:
7767 	x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7768 	output_addr_const (file, x);
7769 	return true;
7770       }
7771   return false;
7772 }
7773 
7774 /* Output address operand ADDR in assembler syntax to
7775    stdio stream FILE.  */
7776 
7777 void
print_operand_address(FILE * file,rtx addr)7778 print_operand_address (FILE *file, rtx addr)
7779 {
7780   struct s390_address ad;
7781   memset (&ad, 0, sizeof (s390_address));
7782 
7783   if (s390_loadrelative_operand_p (addr, NULL, NULL))
7784     {
7785       if (!TARGET_Z10)
7786 	{
7787 	  output_operand_lossage ("symbolic memory references are "
7788 				  "only supported on z10 or later");
7789 	  return;
7790 	}
7791       output_addr_const (file, addr);
7792       return;
7793     }
7794 
7795   if (!s390_decompose_address (addr, &ad)
7796       || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7797       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7798     output_operand_lossage ("cannot decompose address");
7799 
7800   if (ad.disp)
7801     output_addr_const (file, ad.disp);
7802   else
7803     fprintf (file, "0");
7804 
7805   if (ad.base && ad.indx)
7806     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7807 			      reg_names[REGNO (ad.base)]);
7808   else if (ad.base)
7809     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7810 }
7811 
7812 /* Output operand X in assembler syntax to stdio stream FILE.
7813    CODE specified the format flag.  The following format flags
7814    are recognized:
7815 
7816     'A': On z14 or higher: If operand is a mem print the alignment
7817 	 hint usable with vl/vst prefixed by a comma.
7818     'C': print opcode suffix for branch condition.
7819     'D': print opcode suffix for inverse branch condition.
7820     'E': print opcode suffix for branch on index instruction.
7821     'G': print the size of the operand in bytes.
7822     'J': print tls_load/tls_gdcall/tls_ldcall suffix
7823     'M': print the second word of a TImode operand.
7824     'N': print the second word of a DImode operand.
7825     'O': print only the displacement of a memory reference or address.
7826     'R': print only the base register of a memory reference or address.
7827     'S': print S-type memory reference (base+displacement).
7828     'Y': print address style operand without index (e.g. shift count or setmem
7829 	 operand).
7830 
7831     'b': print integer X as if it's an unsigned byte.
7832     'c': print integer X as if it's an signed byte.
7833     'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7834     'f': "end" contiguous bitmask X in SImode.
7835     'h': print integer X as if it's a signed halfword.
7836     'i': print the first nonzero HImode part of X.
7837     'j': print the first HImode part unequal to -1 of X.
7838     'k': print the first nonzero SImode part of X.
7839     'm': print the first SImode part unequal to -1 of X.
7840     'o': print integer X as if it's an unsigned 32bit word.
7841     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7842     't': CONST_INT: "start" of contiguous bitmask X in SImode.
7843 	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7844     'x': print integer X as if it's an unsigned halfword.
7845     'v': print register number as vector register (v1 instead of f1).
7846 */
7847 
7848 void
print_operand(FILE * file,rtx x,int code)7849 print_operand (FILE *file, rtx x, int code)
7850 {
7851   HOST_WIDE_INT ival;
7852 
7853   switch (code)
7854     {
7855     case 'A':
7856       if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
7857 	{
7858 	  if (MEM_ALIGN (x) >= 128)
7859 	    fprintf (file, ",4");
7860 	  else if (MEM_ALIGN (x) == 64)
7861 	    fprintf (file, ",3");
7862 	}
7863       return;
7864     case 'C':
7865       fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7866       return;
7867 
7868     case 'D':
7869       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7870       return;
7871 
7872     case 'E':
7873       if (GET_CODE (x) == LE)
7874 	fprintf (file, "l");
7875       else if (GET_CODE (x) == GT)
7876 	fprintf (file, "h");
7877       else
7878 	output_operand_lossage ("invalid comparison operator "
7879 				"for 'E' output modifier");
7880       return;
7881 
7882     case 'J':
7883       if (GET_CODE (x) == SYMBOL_REF)
7884 	{
7885 	  fprintf (file, "%s", ":tls_load:");
7886 	  output_addr_const (file, x);
7887 	}
7888       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7889 	{
7890 	  fprintf (file, "%s", ":tls_gdcall:");
7891 	  output_addr_const (file, XVECEXP (x, 0, 0));
7892 	}
7893       else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7894 	{
7895 	  fprintf (file, "%s", ":tls_ldcall:");
7896 	  const char *name = get_some_local_dynamic_name ();
7897 	  gcc_assert (name);
7898 	  assemble_name (file, name);
7899 	}
7900       else
7901 	output_operand_lossage ("invalid reference for 'J' output modifier");
7902       return;
7903 
7904     case 'G':
7905       fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7906       return;
7907 
7908     case 'O':
7909       {
7910 	struct s390_address ad;
7911 	int ret;
7912 
7913 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7914 
7915 	if (!ret
7916 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7917 	    || ad.indx)
7918 	  {
7919 	    output_operand_lossage ("invalid address for 'O' output modifier");
7920 	    return;
7921 	  }
7922 
7923 	if (ad.disp)
7924 	  output_addr_const (file, ad.disp);
7925 	else
7926 	  fprintf (file, "0");
7927       }
7928       return;
7929 
7930     case 'R':
7931       {
7932 	struct s390_address ad;
7933 	int ret;
7934 
7935 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7936 
7937 	if (!ret
7938 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7939 	    || ad.indx)
7940 	  {
7941 	    output_operand_lossage ("invalid address for 'R' output modifier");
7942 	    return;
7943 	  }
7944 
7945 	if (ad.base)
7946 	  fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7947 	else
7948 	  fprintf (file, "0");
7949       }
7950       return;
7951 
7952     case 'S':
7953       {
7954 	struct s390_address ad;
7955 	int ret;
7956 
7957 	if (!MEM_P (x))
7958 	  {
7959 	    output_operand_lossage ("memory reference expected for "
7960 				    "'S' output modifier");
7961 	    return;
7962 	  }
7963 	ret = s390_decompose_address (XEXP (x, 0), &ad);
7964 
7965 	if (!ret
7966 	    || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7967 	    || ad.indx)
7968 	  {
7969 	    output_operand_lossage ("invalid address for 'S' output modifier");
7970 	    return;
7971 	  }
7972 
7973 	if (ad.disp)
7974 	  output_addr_const (file, ad.disp);
7975 	else
7976 	  fprintf (file, "0");
7977 
7978 	if (ad.base)
7979 	  fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7980       }
7981       return;
7982 
7983     case 'N':
7984       if (GET_CODE (x) == REG)
7985 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7986       else if (GET_CODE (x) == MEM)
7987 	x = change_address (x, VOIDmode,
7988 			    plus_constant (Pmode, XEXP (x, 0), 4));
7989       else
7990 	output_operand_lossage ("register or memory expression expected "
7991 				"for 'N' output modifier");
7992       break;
7993 
7994     case 'M':
7995       if (GET_CODE (x) == REG)
7996 	x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7997       else if (GET_CODE (x) == MEM)
7998 	x = change_address (x, VOIDmode,
7999 			    plus_constant (Pmode, XEXP (x, 0), 8));
8000       else
8001 	output_operand_lossage ("register or memory expression expected "
8002 				"for 'M' output modifier");
8003       break;
8004 
8005     case 'Y':
8006       print_shift_count_operand (file, x);
8007       return;
8008     }
8009 
8010   switch (GET_CODE (x))
8011     {
8012     case REG:
8013       /* Print FP regs as fx instead of vx when they are accessed
8014 	 through non-vector mode.  */
8015       if (code == 'v'
8016 	  || VECTOR_NOFP_REG_P (x)
8017 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8018 	  || (VECTOR_REG_P (x)
8019 	      && (GET_MODE_SIZE (GET_MODE (x)) /
8020 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8021 	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
8022       else
8023 	fprintf (file, "%s", reg_names[REGNO (x)]);
8024       break;
8025 
8026     case MEM:
8027       output_address (GET_MODE (x), XEXP (x, 0));
8028       break;
8029 
8030     case CONST:
8031     case CODE_LABEL:
8032     case LABEL_REF:
8033     case SYMBOL_REF:
8034       output_addr_const (file, x);
8035       break;
8036 
8037     case CONST_INT:
8038       ival = INTVAL (x);
8039       switch (code)
8040 	{
8041 	case 0:
8042 	  break;
8043 	case 'b':
8044 	  ival &= 0xff;
8045 	  break;
8046 	case 'c':
8047 	  ival = ((ival & 0xff) ^ 0x80) - 0x80;
8048 	  break;
8049 	case 'x':
8050 	  ival &= 0xffff;
8051 	  break;
8052 	case 'h':
8053 	  ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8054 	  break;
8055 	case 'i':
8056 	  ival = s390_extract_part (x, HImode, 0);
8057 	  break;
8058 	case 'j':
8059 	  ival = s390_extract_part (x, HImode, -1);
8060 	  break;
8061 	case 'k':
8062 	  ival = s390_extract_part (x, SImode, 0);
8063 	  break;
8064 	case 'm':
8065 	  ival = s390_extract_part (x, SImode, -1);
8066 	  break;
8067 	case 'o':
8068 	  ival &= 0xffffffff;
8069 	  break;
8070 	case 'e': case 'f':
8071 	case 's': case 't':
8072 	  {
8073 	    int start, end;
8074 	    int len;
8075 	    bool ok;
8076 
8077 	    len = (code == 's' || code == 'e' ? 64 : 32);
8078 	    ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8079 	    gcc_assert (ok);
8080 	    if (code == 's' || code == 't')
8081 	      ival = start;
8082 	    else
8083 	      ival = end;
8084 	  }
8085 	  break;
8086 	default:
8087 	  output_operand_lossage ("invalid constant for output modifier '%c'", code);
8088 	}
8089       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8090       break;
8091 
8092     case CONST_WIDE_INT:
8093       if (code == 'b')
8094 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8095 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8096       else if (code == 'x')
8097 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8098 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8099       else if (code == 'h')
8100 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8101 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8102       else
8103 	{
8104 	  if (code == 0)
8105 	    output_operand_lossage ("invalid constant - try using "
8106 				    "an output modifier");
8107 	  else
8108 	    output_operand_lossage ("invalid constant for output modifier '%c'",
8109 				    code);
8110 	}
8111       break;
8112     case CONST_VECTOR:
8113       switch (code)
8114 	{
8115 	case 'h':
8116 	  gcc_assert (const_vec_duplicate_p (x));
8117 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8118 		   ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8119 	  break;
8120 	case 'e':
8121 	case 's':
8122 	  {
8123 	    int start, end;
8124 	    bool ok;
8125 
8126 	    ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8127 	    gcc_assert (ok);
8128 	    ival = (code == 's') ? start : end;
8129 	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8130 	  }
8131 	  break;
8132 	case 't':
8133 	  {
8134 	    unsigned mask;
8135 	    bool ok = s390_bytemask_vector_p (x, &mask);
8136 	    gcc_assert (ok);
8137 	    fprintf (file, "%u", mask);
8138 	  }
8139 	  break;
8140 
8141 	default:
8142 	  output_operand_lossage ("invalid constant vector for output "
8143 				  "modifier '%c'", code);
8144 	}
8145       break;
8146 
8147     default:
8148       if (code == 0)
8149 	output_operand_lossage ("invalid expression - try using "
8150 				"an output modifier");
8151       else
8152 	output_operand_lossage ("invalid expression for output "
8153 				"modifier '%c'", code);
8154       break;
8155     }
8156 }
8157 
8158 /* Target hook for assembling integer objects.  We need to define it
8159    here to work a round a bug in some versions of GAS, which couldn't
8160    handle values smaller than INT_MIN when printed in decimal.  */
8161 
8162 static bool
s390_assemble_integer(rtx x,unsigned int size,int aligned_p)8163 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8164 {
8165   if (size == 8 && aligned_p
8166       && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8167     {
8168       fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8169 	       INTVAL (x));
8170       return true;
8171     }
8172   return default_assemble_integer (x, size, aligned_p);
8173 }
8174 
8175 /* Returns true if register REGNO is used  for forming
8176    a memory address in expression X.  */
8177 
8178 static bool
reg_used_in_mem_p(int regno,rtx x)8179 reg_used_in_mem_p (int regno, rtx x)
8180 {
8181   enum rtx_code code = GET_CODE (x);
8182   int i, j;
8183   const char *fmt;
8184 
8185   if (code == MEM)
8186     {
8187       if (refers_to_regno_p (regno, XEXP (x, 0)))
8188 	return true;
8189     }
8190   else if (code == SET
8191 	   && GET_CODE (SET_DEST (x)) == PC)
8192     {
8193       if (refers_to_regno_p (regno, SET_SRC (x)))
8194 	return true;
8195     }
8196 
8197   fmt = GET_RTX_FORMAT (code);
8198   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8199     {
8200       if (fmt[i] == 'e'
8201 	  && reg_used_in_mem_p (regno, XEXP (x, i)))
8202 	return true;
8203 
8204       else if (fmt[i] == 'E')
8205 	for (j = 0; j < XVECLEN (x, i); j++)
8206 	  if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8207 	    return true;
8208     }
8209   return false;
8210 }
8211 
8212 /* Returns true if expression DEP_RTX sets an address register
8213    used by instruction INSN to address memory.  */
8214 
8215 static bool
addr_generation_dependency_p(rtx dep_rtx,rtx_insn * insn)8216 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8217 {
8218   rtx target, pat;
8219 
8220   if (NONJUMP_INSN_P (dep_rtx))
8221     dep_rtx = PATTERN (dep_rtx);
8222 
8223   if (GET_CODE (dep_rtx) == SET)
8224     {
8225       target = SET_DEST (dep_rtx);
8226       if (GET_CODE (target) == STRICT_LOW_PART)
8227 	target = XEXP (target, 0);
8228       while (GET_CODE (target) == SUBREG)
8229 	target = SUBREG_REG (target);
8230 
8231       if (GET_CODE (target) == REG)
8232 	{
8233 	  int regno = REGNO (target);
8234 
8235 	  if (s390_safe_attr_type (insn) == TYPE_LA)
8236 	    {
8237 	      pat = PATTERN (insn);
8238 	      if (GET_CODE (pat) == PARALLEL)
8239 		{
8240 		  gcc_assert (XVECLEN (pat, 0) == 2);
8241 		  pat = XVECEXP (pat, 0, 0);
8242 		}
8243 	      gcc_assert (GET_CODE (pat) == SET);
8244 	      return refers_to_regno_p (regno, SET_SRC (pat));
8245 	    }
8246 	  else if (get_attr_atype (insn) == ATYPE_AGEN)
8247 	    return reg_used_in_mem_p (regno, PATTERN (insn));
8248 	}
8249     }
8250   return false;
8251 }
8252 
8253 /* Return 1, if dep_insn sets register used in insn in the agen unit.  */
8254 
8255 int
s390_agen_dep_p(rtx_insn * dep_insn,rtx_insn * insn)8256 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8257 {
8258   rtx dep_rtx = PATTERN (dep_insn);
8259   int i;
8260 
8261   if (GET_CODE (dep_rtx) == SET
8262       && addr_generation_dependency_p (dep_rtx, insn))
8263     return 1;
8264   else if (GET_CODE (dep_rtx) == PARALLEL)
8265     {
8266       for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8267 	{
8268 	  if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8269 	    return 1;
8270 	}
8271     }
8272   return 0;
8273 }
8274 
8275 
8276 /* A C statement (sans semicolon) to update the integer scheduling priority
8277    INSN_PRIORITY (INSN).  Increase the priority to execute the INSN earlier,
8278    reduce the priority to execute INSN later.  Do not define this macro if
8279    you do not need to adjust the scheduling priorities of insns.
8280 
8281    A STD instruction should be scheduled earlier,
8282    in order to use the bypass.  */
8283 static int
s390_adjust_priority(rtx_insn * insn,int priority)8284 s390_adjust_priority (rtx_insn *insn, int priority)
8285 {
8286   if (! INSN_P (insn))
8287     return priority;
8288 
8289   if (s390_tune <= PROCESSOR_2064_Z900)
8290     return priority;
8291 
8292   switch (s390_safe_attr_type (insn))
8293     {
8294       case TYPE_FSTOREDF:
8295       case TYPE_FSTORESF:
8296 	priority = priority << 3;
8297 	break;
8298       case TYPE_STORE:
8299       case TYPE_STM:
8300 	priority = priority << 1;
8301 	break;
8302       default:
8303 	break;
8304     }
8305   return priority;
8306 }
8307 
8308 
8309 /* The number of instructions that can be issued per cycle.  */
8310 
8311 static int
s390_issue_rate(void)8312 s390_issue_rate (void)
8313 {
8314   switch (s390_tune)
8315     {
8316     case PROCESSOR_2084_Z990:
8317     case PROCESSOR_2094_Z9_109:
8318     case PROCESSOR_2094_Z9_EC:
8319     case PROCESSOR_2817_Z196:
8320       return 3;
8321     case PROCESSOR_2097_Z10:
8322       return 2;
8323     case PROCESSOR_2064_Z900:
8324       /* Starting with EC12 we use the sched_reorder hook to take care
8325 	 of instruction dispatch constraints.  The algorithm only
8326 	 picks the best instruction and assumes only a single
8327 	 instruction gets issued per cycle.  */
8328     case PROCESSOR_2827_ZEC12:
8329     case PROCESSOR_2964_Z13:
8330     case PROCESSOR_3906_Z14:
8331     default:
8332       return 1;
8333     }
8334 }
8335 
8336 static int
s390_first_cycle_multipass_dfa_lookahead(void)8337 s390_first_cycle_multipass_dfa_lookahead (void)
8338 {
8339   return 4;
8340 }
8341 
8342 static void
annotate_constant_pool_refs_1(rtx * x)8343 annotate_constant_pool_refs_1 (rtx *x)
8344 {
8345   int i, j;
8346   const char *fmt;
8347 
8348   gcc_assert (GET_CODE (*x) != SYMBOL_REF
8349 	      || !CONSTANT_POOL_ADDRESS_P (*x));
8350 
8351   /* Literal pool references can only occur inside a MEM ...  */
8352   if (GET_CODE (*x) == MEM)
8353     {
8354       rtx memref = XEXP (*x, 0);
8355 
8356       if (GET_CODE (memref) == SYMBOL_REF
8357 	  && CONSTANT_POOL_ADDRESS_P (memref))
8358 	{
8359 	  rtx base = cfun->machine->base_reg;
8360 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8361 				     UNSPEC_LTREF);
8362 
8363 	  *x = replace_equiv_address (*x, addr);
8364 	  return;
8365 	}
8366 
8367       if (GET_CODE (memref) == CONST
8368 	  && GET_CODE (XEXP (memref, 0)) == PLUS
8369 	  && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8370 	  && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8371 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8372 	{
8373 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8374 	  rtx sym = XEXP (XEXP (memref, 0), 0);
8375 	  rtx base = cfun->machine->base_reg;
8376 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8377 				     UNSPEC_LTREF);
8378 
8379 	  *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8380 	  return;
8381 	}
8382     }
8383 
8384   /* ... or a load-address type pattern.  */
8385   if (GET_CODE (*x) == SET)
8386     {
8387       rtx addrref = SET_SRC (*x);
8388 
8389       if (GET_CODE (addrref) == SYMBOL_REF
8390 	  && CONSTANT_POOL_ADDRESS_P (addrref))
8391 	{
8392 	  rtx base = cfun->machine->base_reg;
8393 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8394 				     UNSPEC_LTREF);
8395 
8396 	  SET_SRC (*x) = addr;
8397 	  return;
8398 	}
8399 
8400       if (GET_CODE (addrref) == CONST
8401 	  && GET_CODE (XEXP (addrref, 0)) == PLUS
8402 	  && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8403 	  && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8404 	  && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8405 	{
8406 	  HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8407 	  rtx sym = XEXP (XEXP (addrref, 0), 0);
8408 	  rtx base = cfun->machine->base_reg;
8409 	  rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8410 				     UNSPEC_LTREF);
8411 
8412 	  SET_SRC (*x) = plus_constant (Pmode, addr, off);
8413 	  return;
8414 	}
8415     }
8416 
8417   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8418   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8419     {
8420       if (fmt[i] == 'e')
8421 	{
8422 	  annotate_constant_pool_refs_1 (&XEXP (*x, i));
8423 	}
8424       else if (fmt[i] == 'E')
8425 	{
8426 	  for (j = 0; j < XVECLEN (*x, i); j++)
8427 	    annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8428 	}
8429     }
8430 }
8431 
8432 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8433    Fix up MEMs as required.
8434    Skip insns which support relative addressing, because they do not use a base
8435    register.  */
8436 
8437 static void
annotate_constant_pool_refs(rtx_insn * insn)8438 annotate_constant_pool_refs (rtx_insn *insn)
8439 {
8440   if (s390_safe_relative_long_p (insn))
8441     return;
8442   annotate_constant_pool_refs_1 (&PATTERN (insn));
8443 }
8444 
8445 static void
find_constant_pool_ref_1(rtx x,rtx * ref)8446 find_constant_pool_ref_1 (rtx x, rtx *ref)
8447 {
8448   int i, j;
8449   const char *fmt;
8450 
8451   /* Likewise POOL_ENTRY insns.  */
8452   if (GET_CODE (x) == UNSPEC_VOLATILE
8453       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8454     return;
8455 
8456   gcc_assert (GET_CODE (x) != SYMBOL_REF
8457 	      || !CONSTANT_POOL_ADDRESS_P (x));
8458 
8459   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8460     {
8461       rtx sym = XVECEXP (x, 0, 0);
8462       gcc_assert (GET_CODE (sym) == SYMBOL_REF
8463 		  && CONSTANT_POOL_ADDRESS_P (sym));
8464 
8465       if (*ref == NULL_RTX)
8466 	*ref = sym;
8467       else
8468 	gcc_assert (*ref == sym);
8469 
8470       return;
8471     }
8472 
8473   fmt = GET_RTX_FORMAT (GET_CODE (x));
8474   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8475     {
8476       if (fmt[i] == 'e')
8477 	{
8478 	  find_constant_pool_ref_1 (XEXP (x, i), ref);
8479 	}
8480       else if (fmt[i] == 'E')
8481 	{
8482 	  for (j = 0; j < XVECLEN (x, i); j++)
8483 	    find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8484 	}
8485     }
8486 }
8487 
8488 /* Find an annotated literal pool symbol referenced in INSN,
8489    and store it at REF.  Will abort if INSN contains references to
8490    more than one such pool symbol; multiple references to the same
8491    symbol are allowed, however.
8492 
8493    The rtx pointed to by REF must be initialized to NULL_RTX
8494    by the caller before calling this routine.
8495 
8496    Skip insns which support relative addressing, because they do not use a base
8497    register.  */
8498 
8499 static void
find_constant_pool_ref(rtx_insn * insn,rtx * ref)8500 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8501 {
8502   if (s390_safe_relative_long_p (insn))
8503     return;
8504   find_constant_pool_ref_1 (PATTERN (insn), ref);
8505 }
8506 
8507 static void
replace_constant_pool_ref_1(rtx * x,rtx ref,rtx offset)8508 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8509 {
8510   int i, j;
8511   const char *fmt;
8512 
8513   gcc_assert (*x != ref);
8514 
8515   if (GET_CODE (*x) == UNSPEC
8516       && XINT (*x, 1) == UNSPEC_LTREF
8517       && XVECEXP (*x, 0, 0) == ref)
8518     {
8519       *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8520       return;
8521     }
8522 
8523   if (GET_CODE (*x) == PLUS
8524       && GET_CODE (XEXP (*x, 1)) == CONST_INT
8525       && GET_CODE (XEXP (*x, 0)) == UNSPEC
8526       && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8527       && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8528     {
8529       rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8530       *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8531       return;
8532     }
8533 
8534   fmt = GET_RTX_FORMAT (GET_CODE (*x));
8535   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8536     {
8537       if (fmt[i] == 'e')
8538 	{
8539 	  replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8540 	}
8541       else if (fmt[i] == 'E')
8542 	{
8543 	  for (j = 0; j < XVECLEN (*x, i); j++)
8544 	    replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8545 	}
8546     }
8547 }
8548 
8549 /* Replace every reference to the annotated literal pool
8550    symbol REF in INSN by its base plus OFFSET.
8551    Skip insns which support relative addressing, because they do not use a base
8552    register.  */
8553 
8554 static void
replace_constant_pool_ref(rtx_insn * insn,rtx ref,rtx offset)8555 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8556 {
8557   if (s390_safe_relative_long_p (insn))
8558     return;
8559   replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8560 }
8561 
8562 /* We keep a list of constants which we have to add to internal
8563    constant tables in the middle of large functions.  */
8564 
8565 #define NR_C_MODES 32
8566 machine_mode constant_modes[NR_C_MODES] =
8567 {
8568   TFmode, TImode, TDmode,
8569   V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8570   V4SFmode, V2DFmode, V1TFmode,
8571   DFmode, DImode, DDmode,
8572   V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8573   SFmode, SImode, SDmode,
8574   V4QImode, V2HImode, V1SImode,  V1SFmode,
8575   HImode,
8576   V2QImode, V1HImode,
8577   QImode,
8578   V1QImode
8579 };
8580 
8581 struct constant
8582 {
8583   struct constant *next;
8584   rtx value;
8585   rtx_code_label *label;
8586 };
8587 
8588 struct constant_pool
8589 {
8590   struct constant_pool *next;
8591   rtx_insn *first_insn;
8592   rtx_insn *pool_insn;
8593   bitmap insns;
8594   rtx_insn *emit_pool_after;
8595 
8596   struct constant *constants[NR_C_MODES];
8597   struct constant *execute;
8598   rtx_code_label *label;
8599   int size;
8600 };
8601 
8602 /* Allocate new constant_pool structure.  */
8603 
8604 static struct constant_pool *
s390_alloc_pool(void)8605 s390_alloc_pool (void)
8606 {
8607   struct constant_pool *pool;
8608   int i;
8609 
8610   pool = (struct constant_pool *) xmalloc (sizeof *pool);
8611   pool->next = NULL;
8612   for (i = 0; i < NR_C_MODES; i++)
8613     pool->constants[i] = NULL;
8614 
8615   pool->execute = NULL;
8616   pool->label = gen_label_rtx ();
8617   pool->first_insn = NULL;
8618   pool->pool_insn = NULL;
8619   pool->insns = BITMAP_ALLOC (NULL);
8620   pool->size = 0;
8621   pool->emit_pool_after = NULL;
8622 
8623   return pool;
8624 }
8625 
8626 /* Create new constant pool covering instructions starting at INSN
8627    and chain it to the end of POOL_LIST.  */
8628 
8629 static struct constant_pool *
s390_start_pool(struct constant_pool ** pool_list,rtx_insn * insn)8630 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8631 {
8632   struct constant_pool *pool, **prev;
8633 
8634   pool = s390_alloc_pool ();
8635   pool->first_insn = insn;
8636 
8637   for (prev = pool_list; *prev; prev = &(*prev)->next)
8638     ;
8639   *prev = pool;
8640 
8641   return pool;
8642 }
8643 
8644 /* End range of instructions covered by POOL at INSN and emit
8645    placeholder insn representing the pool.  */
8646 
8647 static void
s390_end_pool(struct constant_pool * pool,rtx_insn * insn)8648 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8649 {
8650   rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8651 
8652   if (!insn)
8653     insn = get_last_insn ();
8654 
8655   pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8656   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8657 }
8658 
8659 /* Add INSN to the list of insns covered by POOL.  */
8660 
8661 static void
s390_add_pool_insn(struct constant_pool * pool,rtx insn)8662 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8663 {
8664   bitmap_set_bit (pool->insns, INSN_UID (insn));
8665 }
8666 
8667 /* Return pool out of POOL_LIST that covers INSN.  */
8668 
8669 static struct constant_pool *
s390_find_pool(struct constant_pool * pool_list,rtx insn)8670 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8671 {
8672   struct constant_pool *pool;
8673 
8674   for (pool = pool_list; pool; pool = pool->next)
8675     if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8676       break;
8677 
8678   return pool;
8679 }
8680 
8681 /* Add constant VAL of mode MODE to the constant pool POOL.  */
8682 
8683 static void
s390_add_constant(struct constant_pool * pool,rtx val,machine_mode mode)8684 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8685 {
8686   struct constant *c;
8687   int i;
8688 
8689   for (i = 0; i < NR_C_MODES; i++)
8690     if (constant_modes[i] == mode)
8691       break;
8692   gcc_assert (i != NR_C_MODES);
8693 
8694   for (c = pool->constants[i]; c != NULL; c = c->next)
8695     if (rtx_equal_p (val, c->value))
8696       break;
8697 
8698   if (c == NULL)
8699     {
8700       c = (struct constant *) xmalloc (sizeof *c);
8701       c->value = val;
8702       c->label = gen_label_rtx ();
8703       c->next = pool->constants[i];
8704       pool->constants[i] = c;
8705       pool->size += GET_MODE_SIZE (mode);
8706     }
8707 }
8708 
8709 /* Return an rtx that represents the offset of X from the start of
8710    pool POOL.  */
8711 
8712 static rtx
s390_pool_offset(struct constant_pool * pool,rtx x)8713 s390_pool_offset (struct constant_pool *pool, rtx x)
8714 {
8715   rtx label;
8716 
8717   label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8718   x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8719 		      UNSPEC_POOL_OFFSET);
8720   return gen_rtx_CONST (GET_MODE (x), x);
8721 }
8722 
8723 /* Find constant VAL of mode MODE in the constant pool POOL.
8724    Return an RTX describing the distance from the start of
8725    the pool to the location of the new constant.  */
8726 
8727 static rtx
s390_find_constant(struct constant_pool * pool,rtx val,machine_mode mode)8728 s390_find_constant (struct constant_pool *pool, rtx val,
8729 		    machine_mode mode)
8730 {
8731   struct constant *c;
8732   int i;
8733 
8734   for (i = 0; i < NR_C_MODES; i++)
8735     if (constant_modes[i] == mode)
8736       break;
8737   gcc_assert (i != NR_C_MODES);
8738 
8739   for (c = pool->constants[i]; c != NULL; c = c->next)
8740     if (rtx_equal_p (val, c->value))
8741       break;
8742 
8743   gcc_assert (c);
8744 
8745   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8746 }
8747 
8748 /* Check whether INSN is an execute.  Return the label_ref to its
8749    execute target template if so, NULL_RTX otherwise.  */
8750 
8751 static rtx
s390_execute_label(rtx insn)8752 s390_execute_label (rtx insn)
8753 {
8754   if (INSN_P (insn)
8755       && GET_CODE (PATTERN (insn)) == PARALLEL
8756       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8757       && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8758 	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8759     {
8760       if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8761 	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8762       else
8763 	{
8764 	  gcc_assert (JUMP_P (insn));
8765 	  /* For jump insns as execute target:
8766 	     - There is one operand less in the parallel (the
8767 	       modification register of the execute is always 0).
8768 	     - The execute target label is wrapped into an
8769 	       if_then_else in order to hide it from jump analysis.  */
8770 	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8771 	}
8772     }
8773 
8774   return NULL_RTX;
8775 }
8776 
8777 /* Find execute target for INSN in the constant pool POOL.
8778    Return an RTX describing the distance from the start of
8779    the pool to the location of the execute target.  */
8780 
8781 static rtx
s390_find_execute(struct constant_pool * pool,rtx insn)8782 s390_find_execute (struct constant_pool *pool, rtx insn)
8783 {
8784   struct constant *c;
8785 
8786   for (c = pool->execute; c != NULL; c = c->next)
8787     if (INSN_UID (insn) == INSN_UID (c->value))
8788       break;
8789 
8790   gcc_assert (c);
8791 
8792   return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8793 }
8794 
8795 /* For an execute INSN, extract the execute target template.  */
8796 
8797 static rtx
s390_execute_target(rtx insn)8798 s390_execute_target (rtx insn)
8799 {
8800   rtx pattern = PATTERN (insn);
8801   gcc_assert (s390_execute_label (insn));
8802 
8803   if (XVECLEN (pattern, 0) == 2)
8804     {
8805       pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8806     }
8807   else
8808     {
8809       rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8810       int i;
8811 
8812       for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8813 	RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8814 
8815       pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8816     }
8817 
8818   return pattern;
8819 }
8820 
8821 /* Indicate that INSN cannot be duplicated.  This is the case for
8822    execute insns that carry a unique label.  */
8823 
8824 static bool
s390_cannot_copy_insn_p(rtx_insn * insn)8825 s390_cannot_copy_insn_p (rtx_insn *insn)
8826 {
8827   rtx label = s390_execute_label (insn);
8828   return label && label != const0_rtx;
8829 }
8830 
8831 /* Dump out the constants in POOL.  If REMOTE_LABEL is true,
8832    do not emit the pool base label.  */
8833 
8834 static void
s390_dump_pool(struct constant_pool * pool,bool remote_label)8835 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8836 {
8837   struct constant *c;
8838   rtx_insn *insn = pool->pool_insn;
8839   int i;
8840 
8841   /* Switch to rodata section.  */
8842   insn = emit_insn_after (gen_pool_section_start (), insn);
8843   INSN_ADDRESSES_NEW (insn, -1);
8844 
8845   /* Ensure minimum pool alignment.  */
8846   insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8847   INSN_ADDRESSES_NEW (insn, -1);
8848 
8849   /* Emit pool base label.  */
8850   if (!remote_label)
8851     {
8852       insn = emit_label_after (pool->label, insn);
8853       INSN_ADDRESSES_NEW (insn, -1);
8854     }
8855 
8856   /* Dump constants in descending alignment requirement order,
8857      ensuring proper alignment for every constant.  */
8858   for (i = 0; i < NR_C_MODES; i++)
8859     for (c = pool->constants[i]; c; c = c->next)
8860       {
8861 	/* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references.  */
8862 	rtx value = copy_rtx (c->value);
8863 	if (GET_CODE (value) == CONST
8864 	    && GET_CODE (XEXP (value, 0)) == UNSPEC
8865 	    && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8866 	    && XVECLEN (XEXP (value, 0), 0) == 1)
8867 	  value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8868 
8869 	insn = emit_label_after (c->label, insn);
8870 	INSN_ADDRESSES_NEW (insn, -1);
8871 
8872 	value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8873 					 gen_rtvec (1, value),
8874 					 UNSPECV_POOL_ENTRY);
8875 	insn = emit_insn_after (value, insn);
8876 	INSN_ADDRESSES_NEW (insn, -1);
8877       }
8878 
8879   /* Ensure minimum alignment for instructions.  */
8880   insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8881   INSN_ADDRESSES_NEW (insn, -1);
8882 
8883   /* Output in-pool execute template insns.  */
8884   for (c = pool->execute; c; c = c->next)
8885     {
8886       insn = emit_label_after (c->label, insn);
8887       INSN_ADDRESSES_NEW (insn, -1);
8888 
8889       insn = emit_insn_after (s390_execute_target (c->value), insn);
8890       INSN_ADDRESSES_NEW (insn, -1);
8891     }
8892 
8893   /* Switch back to previous section.  */
8894   insn = emit_insn_after (gen_pool_section_end (), insn);
8895   INSN_ADDRESSES_NEW (insn, -1);
8896 
8897   insn = emit_barrier_after (insn);
8898   INSN_ADDRESSES_NEW (insn, -1);
8899 
8900   /* Remove placeholder insn.  */
8901   remove_insn (pool->pool_insn);
8902 }
8903 
8904 /* Free all memory used by POOL.  */
8905 
8906 static void
s390_free_pool(struct constant_pool * pool)8907 s390_free_pool (struct constant_pool *pool)
8908 {
8909   struct constant *c, *next;
8910   int i;
8911 
8912   for (i = 0; i < NR_C_MODES; i++)
8913     for (c = pool->constants[i]; c; c = next)
8914       {
8915 	next = c->next;
8916 	free (c);
8917       }
8918 
8919   for (c = pool->execute; c; c = next)
8920     {
8921       next = c->next;
8922       free (c);
8923     }
8924 
8925   BITMAP_FREE (pool->insns);
8926   free (pool);
8927 }
8928 
8929 
8930 /* Collect main literal pool.  Return NULL on overflow.  */
8931 
8932 static struct constant_pool *
s390_mainpool_start(void)8933 s390_mainpool_start (void)
8934 {
8935   struct constant_pool *pool;
8936   rtx_insn *insn;
8937 
8938   pool = s390_alloc_pool ();
8939 
8940   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8941     {
8942       if (NONJUMP_INSN_P (insn)
8943 	  && GET_CODE (PATTERN (insn)) == SET
8944 	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8945 	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8946 	{
8947 	  /* There might be two main_pool instructions if base_reg
8948 	     is call-clobbered; one for shrink-wrapped code and one
8949 	     for the rest.  We want to keep the first.  */
8950 	  if (pool->pool_insn)
8951 	    {
8952 	      insn = PREV_INSN (insn);
8953 	      delete_insn (NEXT_INSN (insn));
8954 	      continue;
8955 	    }
8956 	  pool->pool_insn = insn;
8957 	}
8958 
8959       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8960 	{
8961 	  rtx pool_ref = NULL_RTX;
8962 	  find_constant_pool_ref (insn, &pool_ref);
8963 	  if (pool_ref)
8964 	    {
8965 	      rtx constant = get_pool_constant (pool_ref);
8966 	      machine_mode mode = get_pool_mode (pool_ref);
8967 	      s390_add_constant (pool, constant, mode);
8968 	    }
8969 	}
8970 
8971       /* If hot/cold partitioning is enabled we have to make sure that
8972 	 the literal pool is emitted in the same section where the
8973 	 initialization of the literal pool base pointer takes place.
8974 	 emit_pool_after is only used in the non-overflow case on non
8975 	 Z cpus where we can emit the literal pool at the end of the
8976 	 function body within the text section.  */
8977       if (NOTE_P (insn)
8978 	  && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8979 	  && !pool->emit_pool_after)
8980 	pool->emit_pool_after = PREV_INSN (insn);
8981     }
8982 
8983   gcc_assert (pool->pool_insn || pool->size == 0);
8984 
8985   if (pool->size >= 4096)
8986     {
8987       /* We're going to chunkify the pool, so remove the main
8988 	 pool placeholder insn.  */
8989       remove_insn (pool->pool_insn);
8990 
8991       s390_free_pool (pool);
8992       pool = NULL;
8993     }
8994 
8995   /* If the functions ends with the section where the literal pool
8996      should be emitted set the marker to its end.  */
8997   if (pool && !pool->emit_pool_after)
8998     pool->emit_pool_after = get_last_insn ();
8999 
9000   return pool;
9001 }
9002 
9003 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9004    Modify the current function to output the pool constants as well as
9005    the pool register setup instruction.  */
9006 
9007 static void
s390_mainpool_finish(struct constant_pool * pool)9008 s390_mainpool_finish (struct constant_pool *pool)
9009 {
9010   rtx base_reg = cfun->machine->base_reg;
9011   rtx set;
9012   rtx_insn *insn;
9013 
9014   /* If the pool is empty, we're done.  */
9015   if (pool->size == 0)
9016     {
9017       /* We don't actually need a base register after all.  */
9018       cfun->machine->base_reg = NULL_RTX;
9019 
9020       if (pool->pool_insn)
9021 	remove_insn (pool->pool_insn);
9022       s390_free_pool (pool);
9023       return;
9024     }
9025 
9026   /* We need correct insn addresses.  */
9027   shorten_branches (get_insns ());
9028 
9029   /* Use a LARL to load the pool register.  The pool is
9030      located in the .rodata section, so we emit it after the function.  */
9031   set = gen_main_base_64 (base_reg, pool->label);
9032   insn = emit_insn_after (set, pool->pool_insn);
9033   INSN_ADDRESSES_NEW (insn, -1);
9034   remove_insn (pool->pool_insn);
9035 
9036   insn = get_last_insn ();
9037   pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9038   INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9039 
9040   s390_dump_pool (pool, 0);
9041 
9042   /* Replace all literal pool references.  */
9043 
9044   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9045     {
9046       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9047 	{
9048 	  rtx addr, pool_ref = NULL_RTX;
9049 	  find_constant_pool_ref (insn, &pool_ref);
9050 	  if (pool_ref)
9051 	    {
9052 	      if (s390_execute_label (insn))
9053 		addr = s390_find_execute (pool, insn);
9054 	      else
9055 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9056 						 get_pool_mode (pool_ref));
9057 
9058 	      replace_constant_pool_ref (insn, pool_ref, addr);
9059 	      INSN_CODE (insn) = -1;
9060 	    }
9061 	}
9062     }
9063 
9064 
9065   /* Free the pool.  */
9066   s390_free_pool (pool);
9067 }
9068 
9069 /* Chunkify the literal pool.  */
9070 
9071 #define S390_POOL_CHUNK_MIN	0xc00
9072 #define S390_POOL_CHUNK_MAX	0xe00
9073 
9074 static struct constant_pool *
s390_chunkify_start(void)9075 s390_chunkify_start (void)
9076 {
9077   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9078   bitmap far_labels;
9079   rtx_insn *insn;
9080 
9081   /* We need correct insn addresses.  */
9082 
9083   shorten_branches (get_insns ());
9084 
9085   /* Scan all insns and move literals to pool chunks.  */
9086 
9087   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9088     {
9089       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9090 	{
9091 	  rtx pool_ref = NULL_RTX;
9092 	  find_constant_pool_ref (insn, &pool_ref);
9093 	  if (pool_ref)
9094 	    {
9095 	      rtx constant = get_pool_constant (pool_ref);
9096 	      machine_mode mode = get_pool_mode (pool_ref);
9097 
9098 	      if (!curr_pool)
9099 		curr_pool = s390_start_pool (&pool_list, insn);
9100 
9101 	      s390_add_constant (curr_pool, constant, mode);
9102 	      s390_add_pool_insn (curr_pool, insn);
9103 	    }
9104 	}
9105 
9106       if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9107 	{
9108 	  if (curr_pool)
9109 	    s390_add_pool_insn (curr_pool, insn);
9110 	}
9111 
9112       if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9113 	continue;
9114 
9115       if (!curr_pool
9116 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9117 	  || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9118 	continue;
9119 
9120       if (curr_pool->size < S390_POOL_CHUNK_MAX)
9121 	continue;
9122 
9123       s390_end_pool (curr_pool, NULL);
9124       curr_pool = NULL;
9125     }
9126 
9127   if (curr_pool)
9128     s390_end_pool (curr_pool, NULL);
9129 
9130   /* Find all labels that are branched into
9131      from an insn belonging to a different chunk.  */
9132 
9133   far_labels = BITMAP_ALLOC (NULL);
9134 
9135   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9136     {
9137       rtx_jump_table_data *table;
9138 
9139       /* Labels marked with LABEL_PRESERVE_P can be target
9140 	 of non-local jumps, so we have to mark them.
9141 	 The same holds for named labels.
9142 
9143 	 Don't do that, however, if it is the label before
9144 	 a jump table.  */
9145 
9146       if (LABEL_P (insn)
9147 	  && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9148 	{
9149 	  rtx_insn *vec_insn = NEXT_INSN (insn);
9150 	  if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9151 	    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9152 	}
9153       /* Check potential targets in a table jump (casesi_jump).  */
9154       else if (tablejump_p (insn, NULL, &table))
9155 	{
9156 	  rtx vec_pat = PATTERN (table);
9157 	  int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9158 
9159 	  for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9160 	    {
9161 	      rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9162 
9163 	      if (s390_find_pool (pool_list, label)
9164 		  != s390_find_pool (pool_list, insn))
9165 		bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9166 	    }
9167 	}
9168       /* If we have a direct jump (conditional or unconditional),
9169 	 check all potential targets.  */
9170       else if (JUMP_P (insn))
9171 	{
9172 	  rtx pat = PATTERN (insn);
9173 
9174 	  if (GET_CODE (pat) == PARALLEL)
9175 	    pat = XVECEXP (pat, 0, 0);
9176 
9177 	  if (GET_CODE (pat) == SET)
9178 	    {
9179 	      rtx label = JUMP_LABEL (insn);
9180 	      if (label && !ANY_RETURN_P (label))
9181 		{
9182 		  if (s390_find_pool (pool_list, label)
9183 		      != s390_find_pool (pool_list, insn))
9184 		    bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9185 		}
9186 	    }
9187 	}
9188     }
9189 
9190   /* Insert base register reload insns before every pool.  */
9191 
9192   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9193     {
9194       rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9195 					 curr_pool->label);
9196       rtx_insn *insn = curr_pool->first_insn;
9197       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9198     }
9199 
9200   /* Insert base register reload insns at every far label.  */
9201 
9202   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9203     if (LABEL_P (insn)
9204 	&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9205       {
9206 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
9207 	if (pool)
9208 	  {
9209 	    rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9210 					       pool->label);
9211 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9212 	  }
9213       }
9214 
9215 
9216   BITMAP_FREE (far_labels);
9217 
9218 
9219   /* Recompute insn addresses.  */
9220 
9221   init_insn_lengths ();
9222   shorten_branches (get_insns ());
9223 
9224   return pool_list;
9225 }
9226 
9227 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9228    After we have decided to use this list, finish implementing
9229    all changes to the current function as required.  */
9230 
9231 static void
s390_chunkify_finish(struct constant_pool * pool_list)9232 s390_chunkify_finish (struct constant_pool *pool_list)
9233 {
9234   struct constant_pool *curr_pool = NULL;
9235   rtx_insn *insn;
9236 
9237 
9238   /* Replace all literal pool references.  */
9239 
9240   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9241     {
9242       curr_pool = s390_find_pool (pool_list, insn);
9243       if (!curr_pool)
9244 	continue;
9245 
9246       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9247 	{
9248 	  rtx addr, pool_ref = NULL_RTX;
9249 	  find_constant_pool_ref (insn, &pool_ref);
9250 	  if (pool_ref)
9251 	    {
9252 	      if (s390_execute_label (insn))
9253 		addr = s390_find_execute (curr_pool, insn);
9254 	      else
9255 		addr = s390_find_constant (curr_pool,
9256 					   get_pool_constant (pool_ref),
9257 					   get_pool_mode (pool_ref));
9258 
9259 	      replace_constant_pool_ref (insn, pool_ref, addr);
9260 	      INSN_CODE (insn) = -1;
9261 	    }
9262 	}
9263     }
9264 
9265   /* Dump out all literal pools.  */
9266 
9267   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9268     s390_dump_pool (curr_pool, 0);
9269 
9270   /* Free pool list.  */
9271 
9272   while (pool_list)
9273     {
9274       struct constant_pool *next = pool_list->next;
9275       s390_free_pool (pool_list);
9276       pool_list = next;
9277     }
9278 }
9279 
9280 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
9281 
9282 void
s390_output_pool_entry(rtx exp,machine_mode mode,unsigned int align)9283 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9284 {
9285   switch (GET_MODE_CLASS (mode))
9286     {
9287     case MODE_FLOAT:
9288     case MODE_DECIMAL_FLOAT:
9289       gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9290 
9291       assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9292 		     as_a <scalar_float_mode> (mode), align);
9293       break;
9294 
9295     case MODE_INT:
9296       assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9297       mark_symbol_refs_as_used (exp);
9298       break;
9299 
9300     case MODE_VECTOR_INT:
9301     case MODE_VECTOR_FLOAT:
9302       {
9303 	int i;
9304 	machine_mode inner_mode;
9305 	gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9306 
9307 	inner_mode = GET_MODE_INNER (GET_MODE (exp));
9308 	for (i = 0; i < XVECLEN (exp, 0); i++)
9309 	  s390_output_pool_entry (XVECEXP (exp, 0, i),
9310 				  inner_mode,
9311 				  i == 0
9312 				  ? align
9313 				  : GET_MODE_BITSIZE (inner_mode));
9314       }
9315       break;
9316 
9317     default:
9318       gcc_unreachable ();
9319     }
9320 }
9321 
9322 
9323 /* Return an RTL expression representing the value of the return address
9324    for the frame COUNT steps up from the current frame.  FRAME is the
9325    frame pointer of that frame.  */
9326 
9327 rtx
s390_return_addr_rtx(int count,rtx frame ATTRIBUTE_UNUSED)9328 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9329 {
9330   int offset;
9331   rtx addr;
9332 
9333   /* Without backchain, we fail for all but the current frame.  */
9334 
9335   if (!TARGET_BACKCHAIN && count > 0)
9336     return NULL_RTX;
9337 
9338   /* For the current frame, we need to make sure the initial
9339      value of RETURN_REGNUM is actually saved.  */
9340 
9341   if (count == 0)
9342     return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9343 
9344   if (TARGET_PACKED_STACK)
9345     offset = -2 * UNITS_PER_LONG;
9346   else
9347     offset = RETURN_REGNUM * UNITS_PER_LONG;
9348 
9349   addr = plus_constant (Pmode, frame, offset);
9350   addr = memory_address (Pmode, addr);
9351   return gen_rtx_MEM (Pmode, addr);
9352 }
9353 
9354 /* Return an RTL expression representing the back chain stored in
9355    the current stack frame.  */
9356 
9357 rtx
s390_back_chain_rtx(void)9358 s390_back_chain_rtx (void)
9359 {
9360   rtx chain;
9361 
9362   gcc_assert (TARGET_BACKCHAIN);
9363 
9364   if (TARGET_PACKED_STACK)
9365     chain = plus_constant (Pmode, stack_pointer_rtx,
9366 			   STACK_POINTER_OFFSET - UNITS_PER_LONG);
9367   else
9368     chain = stack_pointer_rtx;
9369 
9370   chain = gen_rtx_MEM (Pmode, chain);
9371   return chain;
9372 }
9373 
9374 /* Find first call clobbered register unused in a function.
9375    This could be used as base register in a leaf function
9376    or for holding the return address before epilogue.  */
9377 
9378 static int
find_unused_clobbered_reg(void)9379 find_unused_clobbered_reg (void)
9380 {
9381   int i;
9382   for (i = 0; i < 6; i++)
9383     if (!df_regs_ever_live_p (i))
9384       return i;
9385   return 0;
9386 }
9387 
9388 
9389 /* Helper function for s390_regs_ever_clobbered.  Sets the fields in DATA for all
9390    clobbered hard regs in SETREG.  */
9391 
9392 static void
s390_reg_clobbered_rtx(rtx setreg,const_rtx set_insn ATTRIBUTE_UNUSED,void * data)9393 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9394 {
9395   char *regs_ever_clobbered = (char *)data;
9396   unsigned int i, regno;
9397   machine_mode mode = GET_MODE (setreg);
9398 
9399   if (GET_CODE (setreg) == SUBREG)
9400     {
9401       rtx inner = SUBREG_REG (setreg);
9402       if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9403 	return;
9404       regno = subreg_regno (setreg);
9405     }
9406   else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9407     regno = REGNO (setreg);
9408   else
9409     return;
9410 
9411   for (i = regno;
9412        i < end_hard_regno (mode, regno);
9413        i++)
9414     regs_ever_clobbered[i] = 1;
9415 }
9416 
9417 /* Walks through all basic blocks of the current function looking
9418    for clobbered hard regs using s390_reg_clobbered_rtx.  The fields
9419    of the passed integer array REGS_EVER_CLOBBERED are set to one for
9420    each of those regs.  */
9421 
9422 static void
s390_regs_ever_clobbered(char regs_ever_clobbered[])9423 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9424 {
9425   basic_block cur_bb;
9426   rtx_insn *cur_insn;
9427   unsigned int i;
9428 
9429   memset (regs_ever_clobbered, 0, 32);
9430 
9431   /* For non-leaf functions we have to consider all call clobbered regs to be
9432      clobbered.  */
9433   if (!crtl->is_leaf)
9434     {
9435       for (i = 0; i < 32; i++)
9436 	regs_ever_clobbered[i] = call_used_regs[i];
9437     }
9438 
9439   /* Make the "magic" eh_return registers live if necessary.  For regs_ever_live
9440      this work is done by liveness analysis (mark_regs_live_at_end).
9441      Special care is needed for functions containing landing pads.  Landing pads
9442      may use the eh registers, but the code which sets these registers is not
9443      contained in that function.  Hence s390_regs_ever_clobbered is not able to
9444      deal with this automatically.  */
9445   if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9446     for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9447       if (crtl->calls_eh_return
9448 	  || (cfun->machine->has_landing_pad_p
9449 	      && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9450 	regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9451 
9452   /* For nonlocal gotos all call-saved registers have to be saved.
9453      This flag is also set for the unwinding code in libgcc.
9454      See expand_builtin_unwind_init.  For regs_ever_live this is done by
9455      reload.  */
9456   if (crtl->saves_all_registers)
9457     for (i = 0; i < 32; i++)
9458       if (!call_used_regs[i])
9459 	regs_ever_clobbered[i] = 1;
9460 
9461   FOR_EACH_BB_FN (cur_bb, cfun)
9462     {
9463       FOR_BB_INSNS (cur_bb, cur_insn)
9464 	{
9465 	  rtx pat;
9466 
9467 	  if (!INSN_P (cur_insn))
9468 	    continue;
9469 
9470 	  pat = PATTERN (cur_insn);
9471 
9472 	  /* Ignore GPR restore insns.  */
9473 	  if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9474 	    {
9475 	      if (GET_CODE (pat) == SET
9476 		  && GENERAL_REG_P (SET_DEST (pat)))
9477 		{
9478 		  /* lgdr  */
9479 		  if (GET_MODE (SET_SRC (pat)) == DImode
9480 		      && FP_REG_P (SET_SRC (pat)))
9481 		    continue;
9482 
9483 		  /* l / lg  */
9484 		  if (GET_CODE (SET_SRC (pat)) == MEM)
9485 		    continue;
9486 		}
9487 
9488 	      /* lm / lmg */
9489 	      if (GET_CODE (pat) == PARALLEL
9490 		  && load_multiple_operation (pat, VOIDmode))
9491 		continue;
9492 	    }
9493 
9494 	  note_stores (cur_insn,
9495 		       s390_reg_clobbered_rtx,
9496 		       regs_ever_clobbered);
9497 	}
9498     }
9499 }
9500 
9501 /* Determine the frame area which actually has to be accessed
9502    in the function epilogue. The values are stored at the
9503    given pointers AREA_BOTTOM (address of the lowest used stack
9504    address) and AREA_TOP (address of the first item which does
9505    not belong to the stack frame).  */
9506 
9507 static void
s390_frame_area(int * area_bottom,int * area_top)9508 s390_frame_area (int *area_bottom, int *area_top)
9509 {
9510   int b, t;
9511 
9512   b = INT_MAX;
9513   t = INT_MIN;
9514 
9515   if (cfun_frame_layout.first_restore_gpr != -1)
9516     {
9517       b = (cfun_frame_layout.gprs_offset
9518 	   + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9519       t = b + (cfun_frame_layout.last_restore_gpr
9520 	       - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9521     }
9522 
9523   if (TARGET_64BIT && cfun_save_high_fprs_p)
9524     {
9525       b = MIN (b, cfun_frame_layout.f8_offset);
9526       t = MAX (t, (cfun_frame_layout.f8_offset
9527 		   + cfun_frame_layout.high_fprs * 8));
9528     }
9529 
9530   if (!TARGET_64BIT)
9531     {
9532       if (cfun_fpr_save_p (FPR4_REGNUM))
9533 	{
9534 	  b = MIN (b, cfun_frame_layout.f4_offset);
9535 	  t = MAX (t, cfun_frame_layout.f4_offset + 8);
9536 	}
9537       if (cfun_fpr_save_p (FPR6_REGNUM))
9538 	{
9539 	  b = MIN (b, cfun_frame_layout.f4_offset + 8);
9540 	  t = MAX (t, cfun_frame_layout.f4_offset + 16);
9541 	}
9542     }
9543   *area_bottom = b;
9544   *area_top = t;
9545 }
9546 /* Update gpr_save_slots in the frame layout trying to make use of
9547    FPRs as GPR save slots.
9548    This is a helper routine of s390_register_info.  */
9549 
9550 static void
s390_register_info_gprtofpr()9551 s390_register_info_gprtofpr ()
9552 {
9553   int save_reg_slot = FPR0_REGNUM;
9554   int i, j;
9555 
9556   if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9557     return;
9558 
9559   /* builtin_eh_return needs to be able to modify the return address
9560      on the stack.  It could also adjust the FPR save slot instead but
9561      is it worth the trouble?!  */
9562   if (crtl->calls_eh_return)
9563     return;
9564 
9565   for (i = 15; i >= 6; i--)
9566     {
9567       if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9568 	continue;
9569 
9570       /* Advance to the next FP register which can be used as a
9571 	 GPR save slot.  */
9572       while ((!call_used_regs[save_reg_slot]
9573 	      || df_regs_ever_live_p (save_reg_slot)
9574 	      || cfun_fpr_save_p (save_reg_slot))
9575 	     && FP_REGNO_P (save_reg_slot))
9576 	save_reg_slot++;
9577       if (!FP_REGNO_P (save_reg_slot))
9578 	{
9579 	  /* We only want to use ldgr/lgdr if we can get rid of
9580 	     stm/lm entirely.  So undo the gpr slot allocation in
9581 	     case we ran out of FPR save slots.  */
9582 	  for (j = 6; j <= 15; j++)
9583 	    if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9584 	      cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9585 	  break;
9586 	}
9587       cfun_gpr_save_slot (i) = save_reg_slot++;
9588     }
9589 }
9590 
9591 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9592    stdarg.
9593    This is a helper routine for s390_register_info.  */
9594 
9595 static void
s390_register_info_stdarg_fpr()9596 s390_register_info_stdarg_fpr ()
9597 {
9598   int i;
9599   int min_fpr;
9600   int max_fpr;
9601 
9602   /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9603      f0-f4 for 64 bit.  */
9604   if (!cfun->stdarg
9605       || !TARGET_HARD_FLOAT
9606       || !cfun->va_list_fpr_size
9607       || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9608     return;
9609 
9610   min_fpr = crtl->args.info.fprs;
9611   max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9612   if (max_fpr >= FP_ARG_NUM_REG)
9613     max_fpr = FP_ARG_NUM_REG - 1;
9614 
9615   /* FPR argument regs start at f0.  */
9616   min_fpr += FPR0_REGNUM;
9617   max_fpr += FPR0_REGNUM;
9618 
9619   for (i = min_fpr; i <= max_fpr; i++)
9620     cfun_set_fpr_save (i);
9621 }
9622 
9623 /* Reserve the GPR save slots for GPRs which need to be saved due to
9624    stdarg.
9625    This is a helper routine for s390_register_info.  */
9626 
9627 static void
s390_register_info_stdarg_gpr()9628 s390_register_info_stdarg_gpr ()
9629 {
9630   int i;
9631   int min_gpr;
9632   int max_gpr;
9633 
9634   if (!cfun->stdarg
9635       || !cfun->va_list_gpr_size
9636       || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9637     return;
9638 
9639   min_gpr = crtl->args.info.gprs;
9640   max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9641   if (max_gpr >= GP_ARG_NUM_REG)
9642     max_gpr = GP_ARG_NUM_REG - 1;
9643 
9644   /* GPR argument regs start at r2.  */
9645   min_gpr += GPR2_REGNUM;
9646   max_gpr += GPR2_REGNUM;
9647 
9648   /* If r6 was supposed to be saved into an FPR and now needs to go to
9649      the stack for vararg we have to adjust the restore range to make
9650      sure that the restore is done from stack as well.  */
9651   if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9652       && min_gpr <= GPR6_REGNUM
9653       && max_gpr >= GPR6_REGNUM)
9654     {
9655       if (cfun_frame_layout.first_restore_gpr == -1
9656 	  || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9657 	cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9658       if (cfun_frame_layout.last_restore_gpr == -1
9659 	  || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9660 	cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9661     }
9662 
9663   if (cfun_frame_layout.first_save_gpr == -1
9664       || cfun_frame_layout.first_save_gpr > min_gpr)
9665     cfun_frame_layout.first_save_gpr = min_gpr;
9666 
9667   if (cfun_frame_layout.last_save_gpr == -1
9668       || cfun_frame_layout.last_save_gpr < max_gpr)
9669     cfun_frame_layout.last_save_gpr = max_gpr;
9670 
9671   for (i = min_gpr; i <= max_gpr; i++)
9672     cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9673 }
9674 
9675 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9676    prologue and epilogue.  */
9677 
9678 static void
s390_register_info_set_ranges()9679 s390_register_info_set_ranges ()
9680 {
9681   int i, j;
9682 
9683   /* Find the first and the last save slot supposed to use the stack
9684      to set the restore range.
9685      Vararg regs might be marked as save to stack but only the
9686      call-saved regs really need restoring (i.e. r6).  This code
9687      assumes that the vararg regs have not yet been recorded in
9688      cfun_gpr_save_slot.  */
9689   for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9690   for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9691   cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9692   cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9693   cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9694   cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9695 }
9696 
9697 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9698    for registers which need to be saved in function prologue.
9699    This function can be used until the insns emitted for save/restore
9700    of the regs are visible in the RTL stream.  */
9701 
9702 static void
s390_register_info()9703 s390_register_info ()
9704 {
9705   int i;
9706   char clobbered_regs[32];
9707 
9708   gcc_assert (!epilogue_completed);
9709 
9710   if (reload_completed)
9711     /* After reload we rely on our own routine to determine which
9712        registers need saving.  */
9713     s390_regs_ever_clobbered (clobbered_regs);
9714   else
9715     /* During reload we use regs_ever_live as a base since reload
9716        does changes in there which we otherwise would not be aware
9717        of.  */
9718     for (i = 0; i < 32; i++)
9719       clobbered_regs[i] = df_regs_ever_live_p (i);
9720 
9721   for (i = 0; i < 32; i++)
9722     clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9723 
9724   /* Mark the call-saved FPRs which need to be saved.
9725      This needs to be done before checking the special GPRs since the
9726      stack pointer usage depends on whether high FPRs have to be saved
9727      or not.  */
9728   cfun_frame_layout.fpr_bitmap = 0;
9729   cfun_frame_layout.high_fprs = 0;
9730   for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9731     if (clobbered_regs[i] && !call_used_regs[i])
9732       {
9733 	cfun_set_fpr_save (i);
9734 	if (i >= FPR8_REGNUM)
9735 	  cfun_frame_layout.high_fprs++;
9736       }
9737 
9738   /* Register 12 is used for GOT address, but also as temp in prologue
9739      for split-stack stdarg functions (unless r14 is available).  */
9740   clobbered_regs[12]
9741     |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9742 	|| (flag_split_stack && cfun->stdarg
9743 	    && (crtl->is_leaf || TARGET_TPF_PROFILING
9744 		|| has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9745 
9746   clobbered_regs[BASE_REGNUM]
9747     |= (cfun->machine->base_reg
9748 	&& REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9749 
9750   clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9751     |= !!frame_pointer_needed;
9752 
9753   /* On pre z900 machines this might take until machine dependent
9754      reorg to decide.
9755      save_return_addr_p will only be set on non-zarch machines so
9756      there is no risk that r14 goes into an FPR instead of a stack
9757      slot.  */
9758   clobbered_regs[RETURN_REGNUM]
9759     |= (!crtl->is_leaf
9760 	|| TARGET_TPF_PROFILING
9761 	|| cfun_frame_layout.save_return_addr_p
9762 	|| crtl->calls_eh_return);
9763 
9764   clobbered_regs[STACK_POINTER_REGNUM]
9765     |= (!crtl->is_leaf
9766 	|| TARGET_TPF_PROFILING
9767 	|| cfun_save_high_fprs_p
9768 	|| get_frame_size () > 0
9769 	|| (reload_completed && cfun_frame_layout.frame_size > 0)
9770 	|| cfun->calls_alloca);
9771 
9772   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9773 
9774   for (i = 6; i < 16; i++)
9775     if (clobbered_regs[i])
9776       cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9777 
9778   s390_register_info_stdarg_fpr ();
9779   s390_register_info_gprtofpr ();
9780   s390_register_info_set_ranges ();
9781   /* stdarg functions might need to save GPRs 2 to 6.  This might
9782      override the GPR->FPR save decision made by
9783      s390_register_info_gprtofpr for r6 since vararg regs must go to
9784      the stack.  */
9785   s390_register_info_stdarg_gpr ();
9786 }
9787 
9788 /* Return true if REGNO is a global register, but not one
9789    of the special ones that need to be saved/restored in anyway.  */
9790 
9791 static inline bool
global_not_special_regno_p(int regno)9792 global_not_special_regno_p (int regno)
9793 {
9794   return (global_regs[regno]
9795 	  /* These registers are special and need to be
9796 	     restored in any case.  */
9797 	  && !(regno == STACK_POINTER_REGNUM
9798 	       || regno == RETURN_REGNUM
9799 	       || regno == BASE_REGNUM
9800 	       || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9801 }
9802 
9803 /* This function is called by s390_optimize_prologue in order to get
9804    rid of unnecessary GPR save/restore instructions.  The register info
9805    for the GPRs is re-computed and the ranges are re-calculated.  */
9806 
9807 static void
s390_optimize_register_info()9808 s390_optimize_register_info ()
9809 {
9810   char clobbered_regs[32];
9811   int i;
9812 
9813   gcc_assert (epilogue_completed);
9814 
9815   s390_regs_ever_clobbered (clobbered_regs);
9816 
9817   /* Global registers do not need to be saved and restored unless it
9818      is one of our special regs.  (r12, r13, r14, or r15).  */
9819   for (i = 0; i < 32; i++)
9820     clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
9821 
9822   /* There is still special treatment needed for cases invisible to
9823      s390_regs_ever_clobbered.  */
9824   clobbered_regs[RETURN_REGNUM]
9825     |= (TARGET_TPF_PROFILING
9826 	/* When expanding builtin_return_addr in ESA mode we do not
9827 	   know whether r14 will later be needed as scratch reg when
9828 	   doing branch splitting.  So the builtin always accesses the
9829 	   r14 save slot and we need to stick to the save/restore
9830 	   decision for r14 even if it turns out that it didn't get
9831 	   clobbered.  */
9832 	|| cfun_frame_layout.save_return_addr_p
9833 	|| crtl->calls_eh_return);
9834 
9835   memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9836 
9837   for (i = 6; i < 16; i++)
9838     if (!clobbered_regs[i])
9839       cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9840 
9841   s390_register_info_set_ranges ();
9842   s390_register_info_stdarg_gpr ();
9843 }
9844 
9845 /* Fill cfun->machine with info about frame of current function.  */
9846 
9847 static void
s390_frame_info(void)9848 s390_frame_info (void)
9849 {
9850   HOST_WIDE_INT lowest_offset;
9851 
9852   cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9853   cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9854 
9855   /* The va_arg builtin uses a constant distance of 16 *
9856      UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9857      pointer.  So even if we are going to save the stack pointer in an
9858      FPR we need the stack space in order to keep the offsets
9859      correct.  */
9860   if (cfun->stdarg && cfun_save_arg_fprs_p)
9861     {
9862       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9863 
9864       if (cfun_frame_layout.first_save_gpr_slot == -1)
9865 	cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9866     }
9867 
9868   cfun_frame_layout.frame_size = get_frame_size ();
9869   if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9870     fatal_error (input_location,
9871 		 "total size of local variables exceeds architecture limit");
9872 
9873   if (!TARGET_PACKED_STACK)
9874     {
9875       /* Fixed stack layout.  */
9876       cfun_frame_layout.backchain_offset = 0;
9877       cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9878       cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9879       cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9880       cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9881 				       * UNITS_PER_LONG);
9882     }
9883   else if (TARGET_BACKCHAIN)
9884     {
9885       /* Kernel stack layout - packed stack, backchain, no float  */
9886       gcc_assert (TARGET_SOFT_FLOAT);
9887       cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9888 					    - UNITS_PER_LONG);
9889 
9890       /* The distance between the backchain and the return address
9891 	 save slot must not change.  So we always need a slot for the
9892 	 stack pointer which resides in between.  */
9893       cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9894 
9895       cfun_frame_layout.gprs_offset
9896 	= cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9897 
9898       /* FPRs will not be saved.  Nevertheless pick sane values to
9899 	 keep area calculations valid.  */
9900       cfun_frame_layout.f0_offset =
9901 	cfun_frame_layout.f4_offset =
9902 	cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9903     }
9904   else
9905     {
9906       int num_fprs;
9907 
9908       /* Packed stack layout without backchain.  */
9909 
9910       /* With stdarg FPRs need their dedicated slots.  */
9911       num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9912 		  : (cfun_fpr_save_p (FPR4_REGNUM) +
9913 		     cfun_fpr_save_p (FPR6_REGNUM)));
9914       cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9915 
9916       num_fprs = (cfun->stdarg ? 2
9917 		  : (cfun_fpr_save_p (FPR0_REGNUM)
9918 		     + cfun_fpr_save_p (FPR2_REGNUM)));
9919       cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9920 
9921       cfun_frame_layout.gprs_offset
9922 	= cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9923 
9924       cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9925 				     - cfun_frame_layout.high_fprs * 8);
9926     }
9927 
9928   if (cfun_save_high_fprs_p)
9929     cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9930 
9931   if (!crtl->is_leaf)
9932     cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9933 
9934   /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9935      sized area at the bottom of the stack.  This is required also for
9936      leaf functions.  When GCC generates a local stack reference it
9937      will always add STACK_POINTER_OFFSET to all these references.  */
9938   if (crtl->is_leaf
9939       && !TARGET_TPF_PROFILING
9940       && cfun_frame_layout.frame_size == 0
9941       && !cfun->calls_alloca)
9942     return;
9943 
9944   /* Calculate the number of bytes we have used in our own register
9945      save area.  With the packed stack layout we can re-use the
9946      remaining bytes for normal stack elements.  */
9947 
9948   if (TARGET_PACKED_STACK)
9949     lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9950 			      cfun_frame_layout.f4_offset),
9951 			 cfun_frame_layout.gprs_offset);
9952   else
9953     lowest_offset = 0;
9954 
9955   if (TARGET_BACKCHAIN)
9956     lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9957 
9958   cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9959 
9960   /* If under 31 bit an odd number of gprs has to be saved we have to
9961      adjust the frame size to sustain 8 byte alignment of stack
9962      frames.  */
9963   cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9964 				   STACK_BOUNDARY / BITS_PER_UNIT - 1)
9965 				  & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9966 }
9967 
9968 /* Generate frame layout.  Fills in register and frame data for the current
9969    function in cfun->machine.  This routine can be called multiple times;
9970    it will re-do the complete frame layout every time.  */
9971 
9972 static void
s390_init_frame_layout(void)9973 s390_init_frame_layout (void)
9974 {
9975   HOST_WIDE_INT frame_size;
9976   int base_used;
9977 
9978   /* After LRA the frame layout is supposed to be read-only and should
9979      not be re-computed.  */
9980   if (reload_completed)
9981     return;
9982 
9983   do
9984     {
9985       frame_size = cfun_frame_layout.frame_size;
9986 
9987       /* Try to predict whether we'll need the base register.  */
9988       base_used = crtl->uses_const_pool
9989 		  || (!DISP_IN_RANGE (frame_size)
9990 		      && !CONST_OK_FOR_K (frame_size));
9991 
9992       /* Decide which register to use as literal pool base.  In small
9993 	 leaf functions, try to use an unused call-clobbered register
9994 	 as base register to avoid save/restore overhead.  */
9995       if (!base_used)
9996 	cfun->machine->base_reg = NULL_RTX;
9997       else
9998 	{
9999 	  int br = 0;
10000 
10001 	  if (crtl->is_leaf)
10002 	    /* Prefer r5 (most likely to be free).  */
10003 	    for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10004 	      ;
10005 	  cfun->machine->base_reg =
10006 	    gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10007 	}
10008 
10009       s390_register_info ();
10010       s390_frame_info ();
10011     }
10012   while (frame_size != cfun_frame_layout.frame_size);
10013 }
10014 
10015 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10016    the TX is nonescaping.  A transaction is considered escaping if
10017    there is at least one path from tbegin returning CC0 to the
10018    function exit block without an tend.
10019 
10020    The check so far has some limitations:
10021    - only single tbegin/tend BBs are supported
10022    - the first cond jump after tbegin must separate the CC0 path from ~CC0
10023    - when CC is copied to a GPR and the CC0 check is done with the GPR
10024      this is not supported
10025 */
10026 
10027 static void
s390_optimize_nonescaping_tx(void)10028 s390_optimize_nonescaping_tx (void)
10029 {
10030   const unsigned int CC0 = 1 << 3;
10031   basic_block tbegin_bb = NULL;
10032   basic_block tend_bb = NULL;
10033   basic_block bb;
10034   rtx_insn *insn;
10035   bool result = true;
10036   int bb_index;
10037   rtx_insn *tbegin_insn = NULL;
10038 
10039   if (!cfun->machine->tbegin_p)
10040     return;
10041 
10042   for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10043     {
10044       bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10045 
10046       if (!bb)
10047 	continue;
10048 
10049       FOR_BB_INSNS (bb, insn)
10050 	{
10051 	  rtx ite, cc, pat, target;
10052 	  unsigned HOST_WIDE_INT mask;
10053 
10054 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10055 	    continue;
10056 
10057 	  pat = PATTERN (insn);
10058 
10059 	  if (GET_CODE (pat) == PARALLEL)
10060 	    pat = XVECEXP (pat, 0, 0);
10061 
10062 	  if (GET_CODE (pat) != SET
10063 	      || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10064 	    continue;
10065 
10066 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10067 	    {
10068 	      rtx_insn *tmp;
10069 
10070 	      tbegin_insn = insn;
10071 
10072 	      /* Just return if the tbegin doesn't have clobbers.  */
10073 	      if (GET_CODE (PATTERN (insn)) != PARALLEL)
10074 		return;
10075 
10076 	      if (tbegin_bb != NULL)
10077 		return;
10078 
10079 	      /* Find the next conditional jump.  */
10080 	      for (tmp = NEXT_INSN (insn);
10081 		   tmp != NULL_RTX;
10082 		   tmp = NEXT_INSN (tmp))
10083 		{
10084 		  if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10085 		    return;
10086 		  if (!JUMP_P (tmp))
10087 		    continue;
10088 
10089 		  ite = SET_SRC (PATTERN (tmp));
10090 		  if (GET_CODE (ite) != IF_THEN_ELSE)
10091 		    continue;
10092 
10093 		  cc = XEXP (XEXP (ite, 0), 0);
10094 		  if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10095 		      || GET_MODE (cc) != CCRAWmode
10096 		      || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10097 		    return;
10098 
10099 		  if (bb->succs->length () != 2)
10100 		    return;
10101 
10102 		  mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10103 		  if (GET_CODE (XEXP (ite, 0)) == NE)
10104 		    mask ^= 0xf;
10105 
10106 		  if (mask == CC0)
10107 		    target = XEXP (ite, 1);
10108 		  else if (mask == (CC0 ^ 0xf))
10109 		    target = XEXP (ite, 2);
10110 		  else
10111 		    return;
10112 
10113 		  {
10114 		    edge_iterator ei;
10115 		    edge e1, e2;
10116 
10117 		    ei = ei_start (bb->succs);
10118 		    e1 = ei_safe_edge (ei);
10119 		    ei_next (&ei);
10120 		    e2 = ei_safe_edge (ei);
10121 
10122 		    if (e2->flags & EDGE_FALLTHRU)
10123 		      {
10124 			e2 = e1;
10125 			e1 = ei_safe_edge (ei);
10126 		      }
10127 
10128 		    if (!(e1->flags & EDGE_FALLTHRU))
10129 		      return;
10130 
10131 		    tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10132 		  }
10133 		  if (tmp == BB_END (bb))
10134 		    break;
10135 		}
10136 	    }
10137 
10138 	  if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10139 	    {
10140 	      if (tend_bb != NULL)
10141 		return;
10142 	      tend_bb = bb;
10143 	    }
10144 	}
10145     }
10146 
10147   /* Either we successfully remove the FPR clobbers here or we are not
10148      able to do anything for this TX.  Both cases don't qualify for
10149      another look.  */
10150   cfun->machine->tbegin_p = false;
10151 
10152   if (tbegin_bb == NULL || tend_bb == NULL)
10153     return;
10154 
10155   calculate_dominance_info (CDI_POST_DOMINATORS);
10156   result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10157   free_dominance_info (CDI_POST_DOMINATORS);
10158 
10159   if (!result)
10160     return;
10161 
10162   PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10163 			    gen_rtvec (2,
10164 				       XVECEXP (PATTERN (tbegin_insn), 0, 0),
10165 				       XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10166   INSN_CODE (tbegin_insn) = -1;
10167   df_insn_rescan (tbegin_insn);
10168 
10169   return;
10170 }
10171 
10172 /* Implement TARGET_HARD_REGNO_NREGS.  Because all registers in a class
10173    have the same size, this is equivalent to CLASS_MAX_NREGS.  */
10174 
10175 static unsigned int
s390_hard_regno_nregs(unsigned int regno,machine_mode mode)10176 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10177 {
10178   return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10179 }
10180 
10181 /* Implement TARGET_HARD_REGNO_MODE_OK.
10182 
10183    Integer modes <= word size fit into any GPR.
10184    Integer modes > word size fit into successive GPRs, starting with
10185    an even-numbered register.
10186    SImode and DImode fit into FPRs as well.
10187 
10188    Floating point modes <= word size fit into any FPR or GPR.
10189    Floating point modes > word size (i.e. DFmode on 32-bit) fit
10190    into any FPR, or an even-odd GPR pair.
10191    TFmode fits only into an even-odd FPR pair.
10192 
10193    Complex floating point modes fit either into two FPRs, or into
10194    successive GPRs (again starting with an even number).
10195    TCmode fits only into two successive even-odd FPR pairs.
10196 
10197    Condition code modes fit only into the CC register.  */
10198 
10199 static bool
s390_hard_regno_mode_ok(unsigned int regno,machine_mode mode)10200 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10201 {
10202   if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10203     return false;
10204 
10205   switch (REGNO_REG_CLASS (regno))
10206     {
10207     case VEC_REGS:
10208       return ((GET_MODE_CLASS (mode) == MODE_INT
10209 	       && s390_class_max_nregs (VEC_REGS, mode) == 1)
10210 	      || mode == DFmode
10211 	      || (TARGET_VXE && mode == SFmode)
10212 	      || s390_vector_mode_supported_p (mode));
10213       break;
10214     case FP_REGS:
10215       if (TARGET_VX
10216 	  && ((GET_MODE_CLASS (mode) == MODE_INT
10217 	       && s390_class_max_nregs (FP_REGS, mode) == 1)
10218 	      || mode == DFmode
10219 	      || s390_vector_mode_supported_p (mode)))
10220 	return true;
10221 
10222       if (REGNO_PAIR_OK (regno, mode))
10223 	{
10224 	  if (mode == SImode || mode == DImode)
10225 	    return true;
10226 
10227 	  if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10228 	    return true;
10229 	}
10230       break;
10231     case ADDR_REGS:
10232       if (FRAME_REGNO_P (regno) && mode == Pmode)
10233 	return true;
10234 
10235       /* fallthrough */
10236     case GENERAL_REGS:
10237       if (REGNO_PAIR_OK (regno, mode))
10238 	{
10239 	  if (TARGET_ZARCH
10240 	      || (mode != TFmode && mode != TCmode && mode != TDmode))
10241 	    return true;
10242 	}
10243       break;
10244     case CC_REGS:
10245       if (GET_MODE_CLASS (mode) == MODE_CC)
10246 	return true;
10247       break;
10248     case ACCESS_REGS:
10249       if (REGNO_PAIR_OK (regno, mode))
10250 	{
10251 	  if (mode == SImode || mode == Pmode)
10252 	    return true;
10253 	}
10254       break;
10255     default:
10256       return false;
10257     }
10258 
10259   return false;
10260 }
10261 
10262 /* Implement TARGET_MODES_TIEABLE_P.  */
10263 
10264 static bool
s390_modes_tieable_p(machine_mode mode1,machine_mode mode2)10265 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10266 {
10267   return ((mode1 == SFmode || mode1 == DFmode)
10268 	  == (mode2 == SFmode || mode2 == DFmode));
10269 }
10270 
10271 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
10272 
10273 bool
s390_hard_regno_rename_ok(unsigned int old_reg,unsigned int new_reg)10274 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10275 {
10276    /* Once we've decided upon a register to use as base register, it must
10277       no longer be used for any other purpose.  */
10278   if (cfun->machine->base_reg)
10279     if (REGNO (cfun->machine->base_reg) == old_reg
10280 	|| REGNO (cfun->machine->base_reg) == new_reg)
10281       return false;
10282 
10283   /* Prevent regrename from using call-saved regs which haven't
10284      actually been saved.  This is necessary since regrename assumes
10285      the backend save/restore decisions are based on
10286      df_regs_ever_live.  Since we have our own routine we have to tell
10287      regrename manually about it.  */
10288   if (GENERAL_REGNO_P (new_reg)
10289       && !call_used_regs[new_reg]
10290       && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10291     return false;
10292 
10293   return true;
10294 }
10295 
10296 /* Return nonzero if register REGNO can be used as a scratch register
10297    in peephole2.  */
10298 
10299 static bool
s390_hard_regno_scratch_ok(unsigned int regno)10300 s390_hard_regno_scratch_ok (unsigned int regno)
10301 {
10302   /* See s390_hard_regno_rename_ok.  */
10303   if (GENERAL_REGNO_P (regno)
10304       && !call_used_regs[regno]
10305       && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10306     return false;
10307 
10308   return true;
10309 }
10310 
10311 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  When generating
10312    code that runs in z/Architecture mode, but conforms to the 31-bit
10313    ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10314    bytes are saved across calls, however.  */
10315 
10316 static bool
s390_hard_regno_call_part_clobbered(unsigned int,unsigned int regno,machine_mode mode)10317 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10318 				     machine_mode mode)
10319 {
10320   if (!TARGET_64BIT
10321       && TARGET_ZARCH
10322       && GET_MODE_SIZE (mode) > 4
10323       && ((regno >= 6 && regno <= 15) || regno == 32))
10324     return true;
10325 
10326   if (TARGET_VX
10327       && GET_MODE_SIZE (mode) > 8
10328       && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10329 	  || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10330     return true;
10331 
10332   return false;
10333 }
10334 
10335 /* Maximum number of registers to represent a value of mode MODE
10336    in a register of class RCLASS.  */
10337 
10338 int
s390_class_max_nregs(enum reg_class rclass,machine_mode mode)10339 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10340 {
10341   int reg_size;
10342   bool reg_pair_required_p = false;
10343 
10344   switch (rclass)
10345     {
10346     case FP_REGS:
10347     case VEC_REGS:
10348       reg_size = TARGET_VX ? 16 : 8;
10349 
10350       /* TF and TD modes would fit into a VR but we put them into a
10351 	 register pair since we do not have 128bit FP instructions on
10352 	 full VRs.  */
10353       if (TARGET_VX
10354 	  && SCALAR_FLOAT_MODE_P (mode)
10355 	  && GET_MODE_SIZE (mode) >= 16)
10356 	reg_pair_required_p = true;
10357 
10358       /* Even if complex types would fit into a single FPR/VR we force
10359 	 them into a register pair to deal with the parts more easily.
10360 	 (FIXME: What about complex ints?)  */
10361       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10362 	reg_pair_required_p = true;
10363       break;
10364     case ACCESS_REGS:
10365       reg_size = 4;
10366       break;
10367     default:
10368       reg_size = UNITS_PER_WORD;
10369       break;
10370     }
10371 
10372   if (reg_pair_required_p)
10373     return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10374 
10375   return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10376 }
10377 
10378 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
10379 
10380 static bool
s390_can_change_mode_class(machine_mode from_mode,machine_mode to_mode,reg_class_t rclass)10381 s390_can_change_mode_class (machine_mode from_mode,
10382 			    machine_mode to_mode,
10383 			    reg_class_t rclass)
10384 {
10385   machine_mode small_mode;
10386   machine_mode big_mode;
10387 
10388   /* V1TF and TF have different representations in vector
10389      registers.  */
10390   if (reg_classes_intersect_p (VEC_REGS, rclass)
10391       && ((from_mode == V1TFmode && to_mode == TFmode)
10392 	  || (from_mode == TFmode && to_mode == V1TFmode)))
10393     return false;
10394 
10395   if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10396     return true;
10397 
10398   if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10399     {
10400       small_mode = from_mode;
10401       big_mode = to_mode;
10402     }
10403   else
10404     {
10405       small_mode = to_mode;
10406       big_mode = from_mode;
10407     }
10408 
10409   /* Values residing in VRs are little-endian style.  All modes are
10410      placed left-aligned in an VR.  This means that we cannot allow
10411      switching between modes with differing sizes.  Also if the vector
10412      facility is available we still place TFmode values in VR register
10413      pairs, since the only instructions we have operating on TFmodes
10414      only deal with register pairs.  Therefore we have to allow DFmode
10415      subregs of TFmodes to enable the TFmode splitters.  */
10416   if (reg_classes_intersect_p (VEC_REGS, rclass)
10417       && (GET_MODE_SIZE (small_mode) < 8
10418 	  || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10419     return false;
10420 
10421   /* Likewise for access registers, since they have only half the
10422      word size on 64-bit.  */
10423   if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10424     return false;
10425 
10426   return true;
10427 }
10428 
10429 /* Return true if we use LRA instead of reload pass.  */
10430 static bool
s390_lra_p(void)10431 s390_lra_p (void)
10432 {
10433   return s390_lra_flag;
10434 }
10435 
10436 /* Return true if register FROM can be eliminated via register TO.  */
10437 
10438 static bool
s390_can_eliminate(const int from,const int to)10439 s390_can_eliminate (const int from, const int to)
10440 {
10441   /* We have not marked the base register as fixed.
10442      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10443      If a function requires the base register, we say here that this
10444      elimination cannot be performed.  This will cause reload to free
10445      up the base register (as if it were fixed).  On the other hand,
10446      if the current function does *not* require the base register, we
10447      say here the elimination succeeds, which in turn allows reload
10448      to allocate the base register for any other purpose.  */
10449   if (from == BASE_REGNUM && to == BASE_REGNUM)
10450     {
10451       s390_init_frame_layout ();
10452       return cfun->machine->base_reg == NULL_RTX;
10453     }
10454 
10455   /* Everything else must point into the stack frame.  */
10456   gcc_assert (to == STACK_POINTER_REGNUM
10457 	      || to == HARD_FRAME_POINTER_REGNUM);
10458 
10459   gcc_assert (from == FRAME_POINTER_REGNUM
10460 	      || from == ARG_POINTER_REGNUM
10461 	      || from == RETURN_ADDRESS_POINTER_REGNUM);
10462 
10463   /* Make sure we actually saved the return address.  */
10464   if (from == RETURN_ADDRESS_POINTER_REGNUM)
10465     if (!crtl->calls_eh_return
10466 	&& !cfun->stdarg
10467 	&& !cfun_frame_layout.save_return_addr_p)
10468       return false;
10469 
10470   return true;
10471 }
10472 
10473 /* Return offset between register FROM and TO initially after prolog.  */
10474 
10475 HOST_WIDE_INT
s390_initial_elimination_offset(int from,int to)10476 s390_initial_elimination_offset (int from, int to)
10477 {
10478   HOST_WIDE_INT offset;
10479 
10480   /* ??? Why are we called for non-eliminable pairs?  */
10481   if (!s390_can_eliminate (from, to))
10482     return 0;
10483 
10484   switch (from)
10485     {
10486     case FRAME_POINTER_REGNUM:
10487       offset = (get_frame_size()
10488 		+ STACK_POINTER_OFFSET
10489 		+ crtl->outgoing_args_size);
10490       break;
10491 
10492     case ARG_POINTER_REGNUM:
10493       s390_init_frame_layout ();
10494       offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10495       break;
10496 
10497     case RETURN_ADDRESS_POINTER_REGNUM:
10498       s390_init_frame_layout ();
10499 
10500       if (cfun_frame_layout.first_save_gpr_slot == -1)
10501 	{
10502 	  /* If it turns out that for stdarg nothing went into the reg
10503 	     save area we also do not need the return address
10504 	     pointer.  */
10505 	  if (cfun->stdarg && !cfun_save_arg_fprs_p)
10506 	    return 0;
10507 
10508 	  gcc_unreachable ();
10509 	}
10510 
10511       /* In order to make the following work it is not necessary for
10512 	 r14 to have a save slot.  It is sufficient if one other GPR
10513 	 got one.  Since the GPRs are always stored without gaps we
10514 	 are able to calculate where the r14 save slot would
10515 	 reside.  */
10516       offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10517 		(RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10518 		UNITS_PER_LONG);
10519       break;
10520 
10521     case BASE_REGNUM:
10522       offset = 0;
10523       break;
10524 
10525     default:
10526       gcc_unreachable ();
10527     }
10528 
10529   return offset;
10530 }
10531 
10532 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10533    to register BASE.  Return generated insn.  */
10534 
10535 static rtx
save_fpr(rtx base,int offset,int regnum)10536 save_fpr (rtx base, int offset, int regnum)
10537 {
10538   rtx addr;
10539   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10540 
10541   if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10542     set_mem_alias_set (addr, get_varargs_alias_set ());
10543   else
10544     set_mem_alias_set (addr, get_frame_alias_set ());
10545 
10546   return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10547 }
10548 
10549 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10550    to register BASE.  Return generated insn.  */
10551 
10552 static rtx
restore_fpr(rtx base,int offset,int regnum)10553 restore_fpr (rtx base, int offset, int regnum)
10554 {
10555   rtx addr;
10556   addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10557   set_mem_alias_set (addr, get_frame_alias_set ());
10558 
10559   return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10560 }
10561 
10562 /* Generate insn to save registers FIRST to LAST into
10563    the register save area located at offset OFFSET
10564    relative to register BASE.  */
10565 
10566 static rtx
save_gprs(rtx base,int offset,int first,int last)10567 save_gprs (rtx base, int offset, int first, int last)
10568 {
10569   rtx addr, insn, note;
10570   int i;
10571 
10572   addr = plus_constant (Pmode, base, offset);
10573   addr = gen_rtx_MEM (Pmode, addr);
10574 
10575   set_mem_alias_set (addr, get_frame_alias_set ());
10576 
10577   /* Special-case single register.  */
10578   if (first == last)
10579     {
10580       if (TARGET_64BIT)
10581 	insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10582       else
10583 	insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10584 
10585       if (!global_not_special_regno_p (first))
10586 	RTX_FRAME_RELATED_P (insn) = 1;
10587       return insn;
10588     }
10589 
10590 
10591   insn = gen_store_multiple (addr,
10592 			     gen_rtx_REG (Pmode, first),
10593 			     GEN_INT (last - first + 1));
10594 
10595   if (first <= 6 && cfun->stdarg)
10596     for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10597       {
10598 	rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10599 
10600 	if (first + i <= 6)
10601 	  set_mem_alias_set (mem, get_varargs_alias_set ());
10602       }
10603 
10604   /* We need to set the FRAME_RELATED flag on all SETs
10605      inside the store-multiple pattern.
10606 
10607      However, we must not emit DWARF records for registers 2..5
10608      if they are stored for use by variable arguments ...
10609 
10610      ??? Unfortunately, it is not enough to simply not the
10611      FRAME_RELATED flags for those SETs, because the first SET
10612      of the PARALLEL is always treated as if it had the flag
10613      set, even if it does not.  Therefore we emit a new pattern
10614      without those registers as REG_FRAME_RELATED_EXPR note.  */
10615 
10616   if (first >= 6 && !global_not_special_regno_p (first))
10617     {
10618       rtx pat = PATTERN (insn);
10619 
10620       for (i = 0; i < XVECLEN (pat, 0); i++)
10621 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10622 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10623 								     0, i)))))
10624 	  RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10625 
10626       RTX_FRAME_RELATED_P (insn) = 1;
10627     }
10628   else if (last >= 6)
10629     {
10630       int start;
10631 
10632       for (start = first >= 6 ? first : 6; start <= last; start++)
10633 	if (!global_not_special_regno_p (start))
10634 	  break;
10635 
10636       if (start > last)
10637 	return insn;
10638 
10639       addr = plus_constant (Pmode, base,
10640 			    offset + (start - first) * UNITS_PER_LONG);
10641 
10642       if (start == last)
10643 	{
10644 	  if (TARGET_64BIT)
10645 	    note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10646 			      gen_rtx_REG (Pmode, start));
10647 	  else
10648 	    note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10649 			      gen_rtx_REG (Pmode, start));
10650 	  note = PATTERN (note);
10651 
10652 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10653 	  RTX_FRAME_RELATED_P (insn) = 1;
10654 
10655 	  return insn;
10656 	}
10657 
10658       note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10659 				 gen_rtx_REG (Pmode, start),
10660 				 GEN_INT (last - start + 1));
10661       note = PATTERN (note);
10662 
10663       add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10664 
10665       for (i = 0; i < XVECLEN (note, 0); i++)
10666 	if (GET_CODE (XVECEXP (note, 0, i)) == SET
10667 	    && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10668 								     0, i)))))
10669 	  RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10670 
10671       RTX_FRAME_RELATED_P (insn) = 1;
10672     }
10673 
10674   return insn;
10675 }
10676 
10677 /* Generate insn to restore registers FIRST to LAST from
10678    the register save area located at offset OFFSET
10679    relative to register BASE.  */
10680 
10681 static rtx
restore_gprs(rtx base,int offset,int first,int last)10682 restore_gprs (rtx base, int offset, int first, int last)
10683 {
10684   rtx addr, insn;
10685 
10686   addr = plus_constant (Pmode, base, offset);
10687   addr = gen_rtx_MEM (Pmode, addr);
10688   set_mem_alias_set (addr, get_frame_alias_set ());
10689 
10690   /* Special-case single register.  */
10691   if (first == last)
10692     {
10693       if (TARGET_64BIT)
10694 	insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10695       else
10696 	insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10697 
10698       RTX_FRAME_RELATED_P (insn) = 1;
10699       return insn;
10700     }
10701 
10702   insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10703 			    addr,
10704 			    GEN_INT (last - first + 1));
10705   RTX_FRAME_RELATED_P (insn) = 1;
10706   return insn;
10707 }
10708 
10709 /* Return insn sequence to load the GOT register.  */
10710 
10711 rtx_insn *
s390_load_got(void)10712 s390_load_got (void)
10713 {
10714   rtx_insn *insns;
10715 
10716   /* We cannot use pic_offset_table_rtx here since we use this
10717      function also for non-pic if __tls_get_offset is called and in
10718      that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10719      aren't usable.  */
10720   rtx got_rtx = gen_rtx_REG (Pmode, 12);
10721 
10722   start_sequence ();
10723 
10724   emit_move_insn (got_rtx, s390_got_symbol ());
10725 
10726   insns = get_insns ();
10727   end_sequence ();
10728   return insns;
10729 }
10730 
10731 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10732    and the change to the stack pointer.  */
10733 
10734 static void
s390_emit_stack_tie(void)10735 s390_emit_stack_tie (void)
10736 {
10737   rtx mem = gen_frame_mem (BLKmode,
10738 			   gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10739 
10740   emit_insn (gen_stack_tie (mem));
10741 }
10742 
10743 /* Copy GPRS into FPR save slots.  */
10744 
10745 static void
s390_save_gprs_to_fprs(void)10746 s390_save_gprs_to_fprs (void)
10747 {
10748   int i;
10749 
10750   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10751     return;
10752 
10753   for (i = 6; i < 16; i++)
10754     {
10755       if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10756 	{
10757 	  rtx_insn *insn =
10758 	    emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10759 			    gen_rtx_REG (DImode, i));
10760 	  RTX_FRAME_RELATED_P (insn) = 1;
10761 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
10762 	     so it might emit def_cfa_register infos setting an FPR as
10763 	     new CFA.  */
10764 	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10765 	}
10766     }
10767 }
10768 
10769 /* Restore GPRs from FPR save slots.  */
10770 
10771 static void
s390_restore_gprs_from_fprs(void)10772 s390_restore_gprs_from_fprs (void)
10773 {
10774   int i;
10775 
10776   if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10777     return;
10778 
10779   /* Restore the GPRs starting with the stack pointer.  That way the
10780      stack pointer already has its original value when it comes to
10781      restoring the hard frame pointer.  So we can set the cfa reg back
10782      to the stack pointer.  */
10783   for (i = STACK_POINTER_REGNUM; i >= 6; i--)
10784     {
10785       rtx_insn *insn;
10786 
10787       if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10788 	continue;
10789 
10790       rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10791 
10792       if (i == STACK_POINTER_REGNUM)
10793 	insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10794       else
10795 	insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10796 
10797       df_set_regs_ever_live (i, true);
10798       add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10799 
10800       /* If either the stack pointer or the frame pointer get restored
10801 	 set the CFA value to its value at function start.  Doing this
10802 	 for the frame pointer results in .cfi_def_cfa_register 15
10803 	 what is ok since if the stack pointer got modified it has
10804 	 been restored already.  */
10805       if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
10806 	add_reg_note (insn, REG_CFA_DEF_CFA,
10807 		      plus_constant (Pmode, stack_pointer_rtx,
10808 				     STACK_POINTER_OFFSET));
10809       RTX_FRAME_RELATED_P (insn) = 1;
10810     }
10811 }
10812 
10813 
10814 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10815    generation.  */
10816 
10817 namespace {
10818 
10819 const pass_data pass_data_s390_early_mach =
10820 {
10821   RTL_PASS, /* type */
10822   "early_mach", /* name */
10823   OPTGROUP_NONE, /* optinfo_flags */
10824   TV_MACH_DEP, /* tv_id */
10825   0, /* properties_required */
10826   0, /* properties_provided */
10827   0, /* properties_destroyed */
10828   0, /* todo_flags_start */
10829   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10830 };
10831 
10832 class pass_s390_early_mach : public rtl_opt_pass
10833 {
10834 public:
pass_s390_early_mach(gcc::context * ctxt)10835   pass_s390_early_mach (gcc::context *ctxt)
10836     : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10837   {}
10838 
10839   /* opt_pass methods: */
10840   virtual unsigned int execute (function *);
10841 
10842 }; // class pass_s390_early_mach
10843 
10844 unsigned int
execute(function * fun)10845 pass_s390_early_mach::execute (function *fun)
10846 {
10847   rtx_insn *insn;
10848 
10849   /* Try to get rid of the FPR clobbers.  */
10850   s390_optimize_nonescaping_tx ();
10851 
10852   /* Re-compute register info.  */
10853   s390_register_info ();
10854 
10855   /* If we're using a base register, ensure that it is always valid for
10856      the first non-prologue instruction.  */
10857   if (fun->machine->base_reg)
10858     emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10859 
10860   /* Annotate all constant pool references to let the scheduler know
10861      they implicitly use the base register.  */
10862   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10863     if (INSN_P (insn))
10864       {
10865 	annotate_constant_pool_refs (insn);
10866 	df_insn_rescan (insn);
10867       }
10868   return 0;
10869 }
10870 
10871 } // anon namespace
10872 
10873 rtl_opt_pass *
make_pass_s390_early_mach(gcc::context * ctxt)10874 make_pass_s390_early_mach (gcc::context *ctxt)
10875 {
10876   return new pass_s390_early_mach (ctxt);
10877 }
10878 
10879 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
10880    - push too big immediates to the literal pool and annotate the refs
10881    - emit frame related notes for stack pointer changes.  */
10882 
10883 static rtx
s390_prologue_plus_offset(rtx target,rtx reg,rtx offset,bool frame_related_p)10884 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
10885 {
10886   rtx_insn *insn;
10887   rtx orig_offset = offset;
10888 
10889   gcc_assert (REG_P (target));
10890   gcc_assert (REG_P (reg));
10891   gcc_assert (CONST_INT_P (offset));
10892 
10893   if (offset == const0_rtx)                               /* lr/lgr */
10894     {
10895       insn = emit_move_insn (target, reg);
10896     }
10897   else if (DISP_IN_RANGE (INTVAL (offset)))               /* la */
10898     {
10899       insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
10900 						   offset));
10901     }
10902   else
10903     {
10904       if (!satisfies_constraint_K (offset)                /* ahi/aghi */
10905 	  && (!TARGET_EXTIMM
10906 	      || (!satisfies_constraint_Op (offset)       /* alfi/algfi */
10907 		  && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
10908 	offset = force_const_mem (Pmode, offset);
10909 
10910       if (target != reg)
10911 	{
10912 	  insn = emit_move_insn (target, reg);
10913 	  RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10914 	}
10915 
10916       insn = emit_insn (gen_add2_insn (target, offset));
10917 
10918       if (!CONST_INT_P (offset))
10919 	{
10920 	  annotate_constant_pool_refs (insn);
10921 
10922 	  if (frame_related_p)
10923 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10924 			  gen_rtx_SET (target,
10925 				       gen_rtx_PLUS (Pmode, target,
10926 						     orig_offset)));
10927 	}
10928     }
10929 
10930   RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
10931 
10932   /* If this is a stack adjustment and we are generating a stack clash
10933      prologue, then add a REG_STACK_CHECK note to signal that this insn
10934      should be left alone.  */
10935   if (flag_stack_clash_protection && target == stack_pointer_rtx)
10936     add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
10937 
10938   return insn;
10939 }
10940 
10941 /* Emit a compare instruction with a volatile memory access as stack
10942    probe.  It does not waste store tags and does not clobber any
10943    registers apart from the condition code.  */
10944 static void
s390_emit_stack_probe(rtx addr)10945 s390_emit_stack_probe (rtx addr)
10946 {
10947   rtx tmp = gen_rtx_MEM (Pmode, addr);
10948   MEM_VOLATILE_P (tmp) = 1;
10949   s390_emit_compare (EQ, gen_rtx_REG (Pmode, 0), tmp);
10950   emit_insn (gen_blockage ());
10951 }
10952 
10953 /* Use a runtime loop if we have to emit more probes than this.  */
10954 #define MIN_UNROLL_PROBES 3
10955 
10956 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
10957    if necessary.  LAST_PROBE_OFFSET contains the offset of the closest
10958    probe relative to the stack pointer.
10959 
10960    Note that SIZE is negative.
10961 
10962    The return value is true if TEMP_REG has been clobbered.  */
10963 static bool
allocate_stack_space(rtx size,HOST_WIDE_INT last_probe_offset,rtx temp_reg)10964 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
10965 		      rtx temp_reg)
10966 {
10967   bool temp_reg_clobbered_p = false;
10968   HOST_WIDE_INT probe_interval
10969     = 1 << param_stack_clash_protection_probe_interval;
10970   HOST_WIDE_INT guard_size
10971     = 1 << param_stack_clash_protection_guard_size;
10972 
10973   if (flag_stack_clash_protection)
10974     {
10975       if (last_probe_offset + -INTVAL (size) < guard_size)
10976 	dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
10977       else
10978 	{
10979 	  rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
10980 	  HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
10981 	  HOST_WIDE_INT num_probes = rounded_size / probe_interval;
10982 	  HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
10983 
10984 	  if (num_probes < MIN_UNROLL_PROBES)
10985 	    {
10986 	      /* Emit unrolled probe statements.  */
10987 
10988 	      for (unsigned int i = 0; i < num_probes; i++)
10989 		{
10990 		  s390_prologue_plus_offset (stack_pointer_rtx,
10991 					     stack_pointer_rtx,
10992 					     GEN_INT (-probe_interval), true);
10993 		  s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
10994 						       stack_pointer_rtx,
10995 						       offset));
10996 		}
10997 	      dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
10998 	    }
10999 	  else
11000 	    {
11001 	      /* Emit a loop probing the pages.  */
11002 
11003 	      rtx_code_label *loop_start_label = gen_label_rtx ();
11004 
11005 	      /* From now on temp_reg will be the CFA register.  */
11006 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11007 					 GEN_INT (-rounded_size), true);
11008 	      emit_label (loop_start_label);
11009 
11010 	      s390_prologue_plus_offset (stack_pointer_rtx,
11011 					 stack_pointer_rtx,
11012 					 GEN_INT (-probe_interval), false);
11013 	      s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11014 						   stack_pointer_rtx,
11015 						   offset));
11016 	      emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11017 				       GT, NULL_RTX,
11018 				       Pmode, 1, loop_start_label);
11019 
11020 	      /* Without this make_edges ICEes.  */
11021 	      JUMP_LABEL (get_last_insn ()) = loop_start_label;
11022 	      LABEL_NUSES (loop_start_label) = 1;
11023 
11024 	      /* That's going to be a NOP since stack pointer and
11025 		 temp_reg are supposed to be the same here.  We just
11026 		 emit it to set the CFA reg back to r15.  */
11027 	      s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11028 					 const0_rtx, true);
11029 	      temp_reg_clobbered_p = true;
11030 	      dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11031 	    }
11032 
11033 	  /* Handle any residual allocation request.  */
11034 	  s390_prologue_plus_offset (stack_pointer_rtx,
11035 				     stack_pointer_rtx,
11036 				     GEN_INT (-residual), true);
11037 	  last_probe_offset += residual;
11038 	  if (last_probe_offset >= probe_interval)
11039 	    s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11040 						 stack_pointer_rtx,
11041 						 GEN_INT (residual
11042 							  - UNITS_PER_LONG)));
11043 
11044 	  return temp_reg_clobbered_p;
11045 	}
11046     }
11047 
11048   /* Subtract frame size from stack pointer.  */
11049   s390_prologue_plus_offset (stack_pointer_rtx,
11050 			     stack_pointer_rtx,
11051 			     size, true);
11052 
11053   return temp_reg_clobbered_p;
11054 }
11055 
11056 /* Expand the prologue into a bunch of separate insns.  */
11057 
11058 void
s390_emit_prologue(void)11059 s390_emit_prologue (void)
11060 {
11061   rtx insn, addr;
11062   rtx temp_reg;
11063   int i;
11064   int offset;
11065   int next_fpr = 0;
11066 
11067   /* Choose best register to use for temp use within prologue.
11068      TPF with profiling must avoid the register 14 - the tracing function
11069      needs the original contents of r14 to be preserved.  */
11070 
11071   if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11072       && !crtl->is_leaf
11073       && !TARGET_TPF_PROFILING)
11074     temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11075   else if (flag_split_stack && cfun->stdarg)
11076     temp_reg = gen_rtx_REG (Pmode, 12);
11077   else
11078     temp_reg = gen_rtx_REG (Pmode, 1);
11079 
11080   /* When probing for stack-clash mitigation, we have to track the distance
11081      between the stack pointer and closest known reference.
11082 
11083      Most of the time we have to make a worst case assumption.  The
11084      only exception is when TARGET_BACKCHAIN is active, in which case
11085      we know *sp (offset 0) was written.  */
11086   HOST_WIDE_INT probe_interval
11087     = 1 << param_stack_clash_protection_probe_interval;
11088   HOST_WIDE_INT last_probe_offset
11089     = (TARGET_BACKCHAIN
11090        ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11091        : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11092 
11093   s390_save_gprs_to_fprs ();
11094 
11095   /* Save call saved gprs.  */
11096   if (cfun_frame_layout.first_save_gpr != -1)
11097     {
11098       insn = save_gprs (stack_pointer_rtx,
11099 			cfun_frame_layout.gprs_offset +
11100 			UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11101 					  - cfun_frame_layout.first_save_gpr_slot),
11102 			cfun_frame_layout.first_save_gpr,
11103 			cfun_frame_layout.last_save_gpr);
11104 
11105       /* This is not 100% correct.  If we have more than one register saved,
11106 	 then LAST_PROBE_OFFSET can move even closer to sp.  */
11107       last_probe_offset
11108 	= (cfun_frame_layout.gprs_offset +
11109 	   UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11110 			     - cfun_frame_layout.first_save_gpr_slot));
11111 
11112       emit_insn (insn);
11113     }
11114 
11115   /* Dummy insn to mark literal pool slot.  */
11116 
11117   if (cfun->machine->base_reg)
11118     emit_insn (gen_main_pool (cfun->machine->base_reg));
11119 
11120   offset = cfun_frame_layout.f0_offset;
11121 
11122   /* Save f0 and f2.  */
11123   for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11124     {
11125       if (cfun_fpr_save_p (i))
11126 	{
11127 	  save_fpr (stack_pointer_rtx, offset, i);
11128 	  if (offset < last_probe_offset)
11129 	    last_probe_offset = offset;
11130 	  offset += 8;
11131 	}
11132       else if (!TARGET_PACKED_STACK || cfun->stdarg)
11133 	offset += 8;
11134     }
11135 
11136   /* Save f4 and f6.  */
11137   offset = cfun_frame_layout.f4_offset;
11138   for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11139     {
11140       if (cfun_fpr_save_p (i))
11141 	{
11142 	  insn = save_fpr (stack_pointer_rtx, offset, i);
11143 	  if (offset < last_probe_offset)
11144 	    last_probe_offset = offset;
11145 	  offset += 8;
11146 
11147 	  /* If f4 and f6 are call clobbered they are saved due to
11148 	     stdargs and therefore are not frame related.  */
11149 	  if (!call_used_regs[i])
11150 	    RTX_FRAME_RELATED_P (insn) = 1;
11151 	}
11152       else if (!TARGET_PACKED_STACK || call_used_regs[i])
11153 	offset += 8;
11154     }
11155 
11156   if (TARGET_PACKED_STACK
11157       && cfun_save_high_fprs_p
11158       && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11159     {
11160       offset = (cfun_frame_layout.f8_offset
11161 		+ (cfun_frame_layout.high_fprs - 1) * 8);
11162 
11163       for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11164 	if (cfun_fpr_save_p (i))
11165 	  {
11166 	    insn = save_fpr (stack_pointer_rtx, offset, i);
11167 	    if (offset < last_probe_offset)
11168 	      last_probe_offset = offset;
11169 
11170 	    RTX_FRAME_RELATED_P (insn) = 1;
11171 	    offset -= 8;
11172 	  }
11173       if (offset >= cfun_frame_layout.f8_offset)
11174 	next_fpr = i;
11175     }
11176 
11177   if (!TARGET_PACKED_STACK)
11178     next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11179 
11180   if (flag_stack_usage_info)
11181     current_function_static_stack_size = cfun_frame_layout.frame_size;
11182 
11183   /* Decrement stack pointer.  */
11184 
11185   if (cfun_frame_layout.frame_size > 0)
11186     {
11187       rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11188       rtx_insn *stack_pointer_backup_loc;
11189       bool temp_reg_clobbered_p;
11190 
11191       if (s390_stack_size)
11192 	{
11193 	  HOST_WIDE_INT stack_guard;
11194 
11195 	  if (s390_stack_guard)
11196 	    stack_guard = s390_stack_guard;
11197 	  else
11198 	    {
11199 	      /* If no value for stack guard is provided the smallest power of 2
11200 		 larger than the current frame size is chosen.  */
11201 	      stack_guard = 1;
11202 	      while (stack_guard < cfun_frame_layout.frame_size)
11203 		stack_guard <<= 1;
11204 	    }
11205 
11206 	  if (cfun_frame_layout.frame_size >= s390_stack_size)
11207 	    {
11208 	      warning (0, "frame size of function %qs is %wd"
11209 		       " bytes exceeding user provided stack limit of "
11210 		       "%d bytes.  "
11211 		       "An unconditional trap is added.",
11212 		       current_function_name(), cfun_frame_layout.frame_size,
11213 		       s390_stack_size);
11214 	      emit_insn (gen_trap ());
11215 	      emit_barrier ();
11216 	    }
11217 	  else
11218 	    {
11219 	      /* stack_guard has to be smaller than s390_stack_size.
11220 		 Otherwise we would emit an AND with zero which would
11221 		 not match the test under mask pattern.  */
11222 	      if (stack_guard >= s390_stack_size)
11223 		{
11224 		  warning (0, "frame size of function %qs is %wd"
11225 			   " bytes which is more than half the stack size. "
11226 			   "The dynamic check would not be reliable. "
11227 			   "No check emitted for this function.",
11228 			   current_function_name(),
11229 			   cfun_frame_layout.frame_size);
11230 		}
11231 	      else
11232 		{
11233 		  HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11234 						    & ~(stack_guard - 1));
11235 
11236 		  rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11237 				       GEN_INT (stack_check_mask));
11238 		  if (TARGET_64BIT)
11239 		    emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11240 							 t, const0_rtx),
11241 					     t, const0_rtx, const0_rtx));
11242 		  else
11243 		    emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11244 							 t, const0_rtx),
11245 					     t, const0_rtx, const0_rtx));
11246 		}
11247 	    }
11248 	}
11249 
11250       if (s390_warn_framesize > 0
11251 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
11252 	warning (0, "frame size of %qs is %wd bytes",
11253 		 current_function_name (), cfun_frame_layout.frame_size);
11254 
11255       if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11256 	warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11257 
11258       /* Save the location where we could backup the incoming stack
11259 	 pointer.  */
11260       stack_pointer_backup_loc = get_last_insn ();
11261 
11262       temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11263 						   temp_reg);
11264 
11265       if (TARGET_BACKCHAIN || next_fpr)
11266 	{
11267 	  if (temp_reg_clobbered_p)
11268 	    {
11269 	      /* allocate_stack_space had to make use of temp_reg and
11270 		 we need it to hold a backup of the incoming stack
11271 		 pointer.  Calculate back that value from the current
11272 		 stack pointer.  */
11273 	      s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11274 					 GEN_INT (cfun_frame_layout.frame_size),
11275 					 false);
11276 	    }
11277 	  else
11278 	    {
11279 	      /* allocate_stack_space didn't actually required
11280 		 temp_reg.  Insert the stack pointer backup insn
11281 		 before the stack pointer decrement code - knowing now
11282 		 that the value will survive.  */
11283 	      emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11284 			       stack_pointer_backup_loc);
11285 	    }
11286 	}
11287 
11288       /* Set backchain.  */
11289 
11290       if (TARGET_BACKCHAIN)
11291 	{
11292 	  if (cfun_frame_layout.backchain_offset)
11293 	    addr = gen_rtx_MEM (Pmode,
11294 				plus_constant (Pmode, stack_pointer_rtx,
11295 				  cfun_frame_layout.backchain_offset));
11296 	  else
11297 	    addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11298 	  set_mem_alias_set (addr, get_frame_alias_set ());
11299 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
11300 	}
11301 
11302       /* If we support non-call exceptions (e.g. for Java),
11303 	 we need to make sure the backchain pointer is set up
11304 	 before any possibly trapping memory access.  */
11305       if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11306 	{
11307 	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11308 	  emit_clobber (addr);
11309 	}
11310     }
11311   else if (flag_stack_clash_protection)
11312     dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11313 
11314   /* Save fprs 8 - 15 (64 bit ABI).  */
11315 
11316   if (cfun_save_high_fprs_p && next_fpr)
11317     {
11318       /* If the stack might be accessed through a different register
11319 	 we have to make sure that the stack pointer decrement is not
11320 	 moved below the use of the stack slots.  */
11321       s390_emit_stack_tie ();
11322 
11323       insn = emit_insn (gen_add2_insn (temp_reg,
11324 				       GEN_INT (cfun_frame_layout.f8_offset)));
11325 
11326       offset = 0;
11327 
11328       for (i = FPR8_REGNUM; i <= next_fpr; i++)
11329 	if (cfun_fpr_save_p (i))
11330 	  {
11331 	    rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11332 				      cfun_frame_layout.frame_size
11333 				      + cfun_frame_layout.f8_offset
11334 				      + offset);
11335 
11336 	    insn = save_fpr (temp_reg, offset, i);
11337 	    offset += 8;
11338 	    RTX_FRAME_RELATED_P (insn) = 1;
11339 	    add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11340 			  gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11341 				       gen_rtx_REG (DFmode, i)));
11342 	  }
11343     }
11344 
11345   /* Set frame pointer, if needed.  */
11346 
11347   if (frame_pointer_needed)
11348     {
11349       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11350       RTX_FRAME_RELATED_P (insn) = 1;
11351     }
11352 
11353   /* Set up got pointer, if needed.  */
11354 
11355   if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11356     {
11357       rtx_insn *insns = s390_load_got ();
11358 
11359       for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11360 	annotate_constant_pool_refs (insn);
11361 
11362       emit_insn (insns);
11363     }
11364 
11365 #if TARGET_TPF != 0
11366   if (TARGET_TPF_PROFILING)
11367     {
11368       /* Generate a BAS instruction to serve as a function entry
11369 	 intercept to facilitate the use of tracing algorithms located
11370 	 at the branch target.  */
11371       emit_insn (gen_prologue_tpf (
11372 		   GEN_INT (s390_tpf_trace_hook_prologue_check),
11373 		   GEN_INT (s390_tpf_trace_hook_prologue_target)));
11374 
11375       /* Emit a blockage here so that all code lies between the
11376 	 profiling mechanisms.  */
11377       emit_insn (gen_blockage ());
11378     }
11379 #endif
11380 }
11381 
11382 /* Expand the epilogue into a bunch of separate insns.  */
11383 
11384 void
s390_emit_epilogue(bool sibcall)11385 s390_emit_epilogue (bool sibcall)
11386 {
11387   rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11388   int area_bottom, area_top, offset = 0;
11389   int next_offset;
11390   int i;
11391 
11392 #if TARGET_TPF != 0
11393   if (TARGET_TPF_PROFILING)
11394     {
11395       /* Generate a BAS instruction to serve as a function entry
11396 	 intercept to facilitate the use of tracing algorithms located
11397 	 at the branch target.  */
11398 
11399       /* Emit a blockage here so that all code lies between the
11400 	 profiling mechanisms.  */
11401       emit_insn (gen_blockage ());
11402 
11403       emit_insn (gen_epilogue_tpf (
11404 		   GEN_INT (s390_tpf_trace_hook_epilogue_check),
11405 		   GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11406     }
11407 #endif
11408 
11409   /* Check whether to use frame or stack pointer for restore.  */
11410 
11411   frame_pointer = (frame_pointer_needed
11412 		   ? hard_frame_pointer_rtx : stack_pointer_rtx);
11413 
11414   s390_frame_area (&area_bottom, &area_top);
11415 
11416   /* Check whether we can access the register save area.
11417      If not, increment the frame pointer as required.  */
11418 
11419   if (area_top <= area_bottom)
11420     {
11421       /* Nothing to restore.  */
11422     }
11423   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11424 	   && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11425     {
11426       /* Area is in range.  */
11427       offset = cfun_frame_layout.frame_size;
11428     }
11429   else
11430     {
11431       rtx_insn *insn;
11432       rtx frame_off, cfa;
11433 
11434       offset = area_bottom < 0 ? -area_bottom : 0;
11435       frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11436 
11437       cfa = gen_rtx_SET (frame_pointer,
11438 			 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11439       if (DISP_IN_RANGE (INTVAL (frame_off)))
11440 	{
11441 	  rtx set;
11442 
11443 	  set = gen_rtx_SET (frame_pointer,
11444 			     gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11445 	  insn = emit_insn (set);
11446 	}
11447       else
11448 	{
11449 	  if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11450 	    frame_off = force_const_mem (Pmode, frame_off);
11451 
11452 	  insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11453 	  annotate_constant_pool_refs (insn);
11454 	}
11455       add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11456       RTX_FRAME_RELATED_P (insn) = 1;
11457     }
11458 
11459   /* Restore call saved fprs.  */
11460 
11461   if (TARGET_64BIT)
11462     {
11463       if (cfun_save_high_fprs_p)
11464 	{
11465 	  next_offset = cfun_frame_layout.f8_offset;
11466 	  for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11467 	    {
11468 	      if (cfun_fpr_save_p (i))
11469 		{
11470 		  restore_fpr (frame_pointer,
11471 			       offset + next_offset, i);
11472 		  cfa_restores
11473 		    = alloc_reg_note (REG_CFA_RESTORE,
11474 				      gen_rtx_REG (DFmode, i), cfa_restores);
11475 		  next_offset += 8;
11476 		}
11477 	    }
11478 	}
11479 
11480     }
11481   else
11482     {
11483       next_offset = cfun_frame_layout.f4_offset;
11484       /* f4, f6 */
11485       for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11486 	{
11487 	  if (cfun_fpr_save_p (i))
11488 	    {
11489 	      restore_fpr (frame_pointer,
11490 			   offset + next_offset, i);
11491 	      cfa_restores
11492 		= alloc_reg_note (REG_CFA_RESTORE,
11493 				  gen_rtx_REG (DFmode, i), cfa_restores);
11494 	      next_offset += 8;
11495 	    }
11496 	  else if (!TARGET_PACKED_STACK)
11497 	    next_offset += 8;
11498 	}
11499 
11500     }
11501 
11502   /* Restore call saved gprs.  */
11503 
11504   if (cfun_frame_layout.first_restore_gpr != -1)
11505     {
11506       rtx insn, addr;
11507       int i;
11508 
11509       /* Check for global register and save them
11510 	 to stack location from where they get restored.  */
11511 
11512       for (i = cfun_frame_layout.first_restore_gpr;
11513 	   i <= cfun_frame_layout.last_restore_gpr;
11514 	   i++)
11515 	{
11516 	  if (global_not_special_regno_p (i))
11517 	    {
11518 	      addr = plus_constant (Pmode, frame_pointer,
11519 				    offset + cfun_frame_layout.gprs_offset
11520 				    + (i - cfun_frame_layout.first_save_gpr_slot)
11521 				    * UNITS_PER_LONG);
11522 	      addr = gen_rtx_MEM (Pmode, addr);
11523 	      set_mem_alias_set (addr, get_frame_alias_set ());
11524 	      emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11525 	    }
11526 	  else
11527 	    cfa_restores
11528 	      = alloc_reg_note (REG_CFA_RESTORE,
11529 				gen_rtx_REG (Pmode, i), cfa_restores);
11530 	}
11531 
11532       /* Fetch return address from stack before load multiple,
11533 	 this will do good for scheduling.
11534 
11535 	 Only do this if we already decided that r14 needs to be
11536 	 saved to a stack slot. (And not just because r14 happens to
11537 	 be in between two GPRs which need saving.)  Otherwise it
11538 	 would be difficult to take that decision back in
11539 	 s390_optimize_prologue.
11540 
11541 	 This optimization is only helpful on in-order machines.  */
11542       if (! sibcall
11543 	  && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11544 	  && s390_tune <= PROCESSOR_2097_Z10)
11545 	{
11546 	  int return_regnum = find_unused_clobbered_reg();
11547 	  if (!return_regnum
11548 	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11549 		  && !TARGET_CPU_Z10
11550 		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11551 	    {
11552 	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11553 	      return_regnum = 4;
11554 	    }
11555 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
11556 
11557 	  addr = plus_constant (Pmode, frame_pointer,
11558 				offset + cfun_frame_layout.gprs_offset
11559 				+ (RETURN_REGNUM
11560 				   - cfun_frame_layout.first_save_gpr_slot)
11561 				* UNITS_PER_LONG);
11562 	  addr = gen_rtx_MEM (Pmode, addr);
11563 	  set_mem_alias_set (addr, get_frame_alias_set ());
11564 	  emit_move_insn (return_reg, addr);
11565 
11566 	  /* Once we did that optimization we have to make sure
11567 	     s390_optimize_prologue does not try to remove the store
11568 	     of r14 since we will not be able to find the load issued
11569 	     here.  */
11570 	  cfun_frame_layout.save_return_addr_p = true;
11571 	}
11572 
11573       insn = restore_gprs (frame_pointer,
11574 			   offset + cfun_frame_layout.gprs_offset
11575 			   + (cfun_frame_layout.first_restore_gpr
11576 			      - cfun_frame_layout.first_save_gpr_slot)
11577 			   * UNITS_PER_LONG,
11578 			   cfun_frame_layout.first_restore_gpr,
11579 			   cfun_frame_layout.last_restore_gpr);
11580       insn = emit_insn (insn);
11581       REG_NOTES (insn) = cfa_restores;
11582       add_reg_note (insn, REG_CFA_DEF_CFA,
11583 		    plus_constant (Pmode, stack_pointer_rtx,
11584 				   STACK_POINTER_OFFSET));
11585       RTX_FRAME_RELATED_P (insn) = 1;
11586     }
11587 
11588   s390_restore_gprs_from_fprs ();
11589 
11590   if (! sibcall)
11591     {
11592       if (!return_reg && !s390_can_use_return_insn ())
11593         /* We planned to emit (return), be we are not allowed to.  */
11594         return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11595 
11596       if (return_reg)
11597         /* Emit (return) and (use).  */
11598         emit_jump_insn (gen_return_use (return_reg));
11599       else
11600         /* The fact that RETURN_REGNUM is used is already reflected by
11601            EPILOGUE_USES.  Emit plain (return).  */
11602         emit_jump_insn (gen_return ());
11603     }
11604 }
11605 
11606 /* Implement TARGET_SET_UP_BY_PROLOGUE.  */
11607 
11608 static void
s300_set_up_by_prologue(hard_reg_set_container * regs)11609 s300_set_up_by_prologue (hard_reg_set_container *regs)
11610 {
11611   if (cfun->machine->base_reg
11612       && !call_used_regs[REGNO (cfun->machine->base_reg)])
11613     SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11614 }
11615 
11616 /* -fsplit-stack support.  */
11617 
11618 /* A SYMBOL_REF for __morestack.  */
11619 static GTY(()) rtx morestack_ref;
11620 
11621 /* When using -fsplit-stack, the allocation routines set a field in
11622    the TCB to the bottom of the stack plus this much space, measured
11623    in bytes.  */
11624 
11625 #define SPLIT_STACK_AVAILABLE 1024
11626 
11627 /* Emit the parmblock for __morestack into .rodata section.  It
11628    consists of 3 pointer size entries:
11629    - frame size
11630    - size of stack arguments
11631    - offset between parm block and __morestack return label  */
11632 
11633 void
s390_output_split_stack_data(rtx parm_block,rtx call_done,rtx frame_size,rtx args_size)11634 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11635 			      rtx frame_size, rtx args_size)
11636 {
11637   rtx ops[] = { parm_block, call_done };
11638 
11639   switch_to_section (targetm.asm_out.function_rodata_section
11640 		     (current_function_decl));
11641 
11642   if (TARGET_64BIT)
11643     output_asm_insn (".align\t8", NULL);
11644   else
11645     output_asm_insn (".align\t4", NULL);
11646 
11647   (*targetm.asm_out.internal_label) (asm_out_file, "L",
11648 				     CODE_LABEL_NUMBER (parm_block));
11649   if (TARGET_64BIT)
11650     {
11651       output_asm_insn (".quad\t%0", &frame_size);
11652       output_asm_insn (".quad\t%0", &args_size);
11653       output_asm_insn (".quad\t%1-%0", ops);
11654     }
11655   else
11656     {
11657       output_asm_insn (".long\t%0", &frame_size);
11658       output_asm_insn (".long\t%0", &args_size);
11659       output_asm_insn (".long\t%1-%0", ops);
11660     }
11661 
11662   switch_to_section (current_function_section ());
11663 }
11664 
11665 /* Emit -fsplit-stack prologue, which goes before the regular function
11666    prologue.  */
11667 
11668 void
s390_expand_split_stack_prologue(void)11669 s390_expand_split_stack_prologue (void)
11670 {
11671   rtx r1, guard, cc = NULL;
11672   rtx_insn *insn;
11673   /* Offset from thread pointer to __private_ss.  */
11674   int psso = TARGET_64BIT ? 0x38 : 0x20;
11675   /* Pointer size in bytes.  */
11676   /* Frame size and argument size - the two parameters to __morestack.  */
11677   HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11678   /* Align argument size to 8 bytes - simplifies __morestack code.  */
11679   HOST_WIDE_INT args_size = crtl->args.size >= 0
11680 			    ? ((crtl->args.size + 7) & ~7)
11681 			    : 0;
11682   /* Label to be called by __morestack.  */
11683   rtx_code_label *call_done = NULL;
11684   rtx_code_label *parm_base = NULL;
11685   rtx tmp;
11686 
11687   gcc_assert (flag_split_stack && reload_completed);
11688 
11689   r1 = gen_rtx_REG (Pmode, 1);
11690 
11691   /* If no stack frame will be allocated, don't do anything.  */
11692   if (!frame_size)
11693     {
11694       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11695 	{
11696 	  /* If va_start is used, just use r15.  */
11697 	  emit_move_insn (r1,
11698 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11699 				       GEN_INT (STACK_POINTER_OFFSET)));
11700 
11701 	}
11702       return;
11703     }
11704 
11705   if (morestack_ref == NULL_RTX)
11706     {
11707       morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11708       SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11709 					   | SYMBOL_FLAG_FUNCTION);
11710     }
11711 
11712   if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11713     {
11714       /* If frame_size will fit in an add instruction, do a stack space
11715 	 check, and only call __morestack if there's not enough space.  */
11716 
11717       /* Get thread pointer.  r1 is the only register we can always destroy - r0
11718 	 could contain a static chain (and cannot be used to address memory
11719 	 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved.  */
11720       emit_insn (gen_get_thread_pointer (Pmode, r1));
11721       /* Aim at __private_ss.  */
11722       guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11723 
11724       /* If less that 1kiB used, skip addition and compare directly with
11725 	 __private_ss.  */
11726       if (frame_size > SPLIT_STACK_AVAILABLE)
11727 	{
11728 	  emit_move_insn (r1, guard);
11729 	  if (TARGET_64BIT)
11730 	    emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11731 	  else
11732 	    emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11733 	  guard = r1;
11734 	}
11735 
11736       /* Compare the (maybe adjusted) guard with the stack pointer.  */
11737       cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11738     }
11739 
11740   call_done = gen_label_rtx ();
11741   parm_base = gen_label_rtx ();
11742   LABEL_NUSES (parm_base)++;
11743   LABEL_NUSES (call_done)++;
11744 
11745   /* %r1 = litbase.  */
11746   insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11747   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11748   LABEL_NUSES (parm_base)++;
11749 
11750   /* Now, we need to call __morestack.  It has very special calling
11751      conventions: it preserves param/return/static chain registers for
11752      calling main function body, and looks for its own parameters at %r1. */
11753   if (cc != NULL)
11754     tmp = gen_split_stack_cond_call (Pmode,
11755 				     morestack_ref,
11756 				     parm_base,
11757 				     call_done,
11758 				     GEN_INT (frame_size),
11759 				     GEN_INT (args_size),
11760 				     cc);
11761   else
11762     tmp = gen_split_stack_call (Pmode,
11763 				morestack_ref,
11764 				parm_base,
11765 				call_done,
11766 				GEN_INT (frame_size),
11767 				GEN_INT (args_size));
11768 
11769   insn = emit_jump_insn (tmp);
11770   JUMP_LABEL (insn) = call_done;
11771   add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11772   add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11773 
11774   if (cc != NULL)
11775     {
11776       /* Mark the jump as very unlikely to be taken.  */
11777       add_reg_br_prob_note (insn,
11778 			    profile_probability::very_unlikely ());
11779 
11780       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11781 	{
11782 	  /* If va_start is used, and __morestack was not called, just use
11783 	     r15.  */
11784 	  emit_move_insn (r1,
11785 			 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11786 				       GEN_INT (STACK_POINTER_OFFSET)));
11787 	}
11788     }
11789   else
11790     {
11791       emit_barrier ();
11792     }
11793 
11794   /* __morestack will call us here.  */
11795 
11796   emit_label (call_done);
11797 }
11798 
11799 /* We may have to tell the dataflow pass that the split stack prologue
11800    is initializing a register.  */
11801 
11802 static void
s390_live_on_entry(bitmap regs)11803 s390_live_on_entry (bitmap regs)
11804 {
11805   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11806     {
11807       gcc_assert (flag_split_stack);
11808       bitmap_set_bit (regs, 1);
11809     }
11810 }
11811 
11812 /* Return true if the function can use simple_return to return outside
11813    of a shrink-wrapped region.  At present shrink-wrapping is supported
11814    in all cases.  */
11815 
11816 bool
s390_can_use_simple_return_insn(void)11817 s390_can_use_simple_return_insn (void)
11818 {
11819   return true;
11820 }
11821 
11822 /* Return true if the epilogue is guaranteed to contain only a return
11823    instruction and if a direct return can therefore be used instead.
11824    One of the main advantages of using direct return instructions
11825    is that we can then use conditional returns.  */
11826 
11827 bool
s390_can_use_return_insn(void)11828 s390_can_use_return_insn (void)
11829 {
11830   int i;
11831 
11832   if (!reload_completed)
11833     return false;
11834 
11835   if (crtl->profile)
11836     return false;
11837 
11838   if (TARGET_TPF_PROFILING)
11839     return false;
11840 
11841   for (i = 0; i < 16; i++)
11842     if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11843       return false;
11844 
11845   /* For 31 bit this is not covered by the frame_size check below
11846      since f4, f6 are saved in the register save area without needing
11847      additional stack space.  */
11848   if (!TARGET_64BIT
11849       && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11850     return false;
11851 
11852   if (cfun->machine->base_reg
11853       && !call_used_regs[REGNO (cfun->machine->base_reg)])
11854     return false;
11855 
11856   return cfun_frame_layout.frame_size == 0;
11857 }
11858 
11859 /* The VX ABI differs for vararg functions.  Therefore we need the
11860    prototype of the callee to be available when passing vector type
11861    values.  */
11862 static const char *
s390_invalid_arg_for_unprototyped_fn(const_tree typelist,const_tree funcdecl,const_tree val)11863 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11864 {
11865   return ((TARGET_VX_ABI
11866 	   && typelist == 0
11867 	   && VECTOR_TYPE_P (TREE_TYPE (val))
11868 	   && (funcdecl == NULL_TREE
11869 	       || (TREE_CODE (funcdecl) == FUNCTION_DECL
11870 		   && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11871 	  ? N_("vector argument passed to unprototyped function")
11872 	  : NULL);
11873 }
11874 
11875 
11876 /* Return the size in bytes of a function argument of
11877    type TYPE and/or mode MODE.  At least one of TYPE or
11878    MODE must be specified.  */
11879 
11880 static int
s390_function_arg_size(machine_mode mode,const_tree type)11881 s390_function_arg_size (machine_mode mode, const_tree type)
11882 {
11883   if (type)
11884     return int_size_in_bytes (type);
11885 
11886   /* No type info available for some library calls ...  */
11887   if (mode != BLKmode)
11888     return GET_MODE_SIZE (mode);
11889 
11890   /* If we have neither type nor mode, abort */
11891   gcc_unreachable ();
11892 }
11893 
11894 /* Return true if a function argument of type TYPE and mode MODE
11895    is to be passed in a vector register, if available.  */
11896 
11897 bool
s390_function_arg_vector(machine_mode mode,const_tree type)11898 s390_function_arg_vector (machine_mode mode, const_tree type)
11899 {
11900   if (!TARGET_VX_ABI)
11901     return false;
11902 
11903   if (s390_function_arg_size (mode, type) > 16)
11904     return false;
11905 
11906   /* No type info available for some library calls ...  */
11907   if (!type)
11908     return VECTOR_MODE_P (mode);
11909 
11910   /* The ABI says that record types with a single member are treated
11911      just like that member would be.  */
11912   int empty_base_seen = 0;
11913   const_tree orig_type = type;
11914   while (TREE_CODE (type) == RECORD_TYPE)
11915     {
11916       tree field, single = NULL_TREE;
11917 
11918       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11919 	{
11920 	  if (TREE_CODE (field) != FIELD_DECL)
11921 	    continue;
11922 
11923 	  if (DECL_FIELD_ABI_IGNORED (field))
11924 	    {
11925 	      if (lookup_attribute ("no_unique_address",
11926 				    DECL_ATTRIBUTES (field)))
11927 		empty_base_seen |= 2;
11928 	      else
11929 		empty_base_seen |= 1;
11930 	      continue;
11931 	    }
11932 
11933 	  if (single == NULL_TREE)
11934 	    single = TREE_TYPE (field);
11935 	  else
11936 	    return false;
11937 	}
11938 
11939       if (single == NULL_TREE)
11940 	return false;
11941       else
11942 	{
11943 	  /* If the field declaration adds extra byte due to
11944 	     e.g. padding this is not accepted as vector type.  */
11945 	  if (int_size_in_bytes (single) <= 0
11946 	      || int_size_in_bytes (single) != int_size_in_bytes (type))
11947 	    return false;
11948 	  type = single;
11949 	}
11950     }
11951 
11952   if (!VECTOR_TYPE_P (type))
11953     return false;
11954 
11955   if (warn_psabi && empty_base_seen)
11956     {
11957       static unsigned last_reported_type_uid;
11958       unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
11959       if (uid != last_reported_type_uid)
11960 	{
11961 	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
11962 	  last_reported_type_uid = uid;
11963 	  if (empty_base_seen & 1)
11964 	    inform (input_location,
11965 		    "parameter passing for argument of type %qT when C++17 "
11966 		    "is enabled changed to match C++14 %{in GCC 10.1%}",
11967 		    orig_type, url);
11968 	  else
11969 	    inform (input_location,
11970 		    "parameter passing for argument of type %qT with "
11971 		    "%<[[no_unique_address]]%> members changed "
11972 		    "%{in GCC 10.1%}", orig_type, url);
11973 	}
11974     }
11975   return true;
11976 }
11977 
11978 /* Return true if a function argument of type TYPE and mode MODE
11979    is to be passed in a floating-point register, if available.  */
11980 
11981 static bool
s390_function_arg_float(machine_mode mode,const_tree type)11982 s390_function_arg_float (machine_mode mode, const_tree type)
11983 {
11984   if (s390_function_arg_size (mode, type) > 8)
11985     return false;
11986 
11987   /* Soft-float changes the ABI: no floating-point registers are used.  */
11988   if (TARGET_SOFT_FLOAT)
11989     return false;
11990 
11991   /* No type info available for some library calls ...  */
11992   if (!type)
11993     return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11994 
11995   /* The ABI says that record types with a single member are treated
11996      just like that member would be.  */
11997   int empty_base_seen = 0;
11998   const_tree orig_type = type;
11999   while (TREE_CODE (type) == RECORD_TYPE)
12000     {
12001       tree field, single = NULL_TREE;
12002 
12003       for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12004 	{
12005 	  if (TREE_CODE (field) != FIELD_DECL)
12006 	    continue;
12007 	  if (DECL_FIELD_ABI_IGNORED (field))
12008 	    {
12009 	      if (lookup_attribute ("no_unique_address",
12010 				    DECL_ATTRIBUTES (field)))
12011 		empty_base_seen |= 2;
12012 	      else
12013 		empty_base_seen |= 1;
12014 	      continue;
12015 	    }
12016 
12017 	  if (single == NULL_TREE)
12018 	    single = TREE_TYPE (field);
12019 	  else
12020 	    return false;
12021 	}
12022 
12023       if (single == NULL_TREE)
12024 	return false;
12025       else
12026 	type = single;
12027     }
12028 
12029   if (TREE_CODE (type) != REAL_TYPE)
12030     return false;
12031 
12032   if (warn_psabi && empty_base_seen)
12033     {
12034       static unsigned last_reported_type_uid;
12035       unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12036       if (uid != last_reported_type_uid)
12037 	{
12038 	  const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12039 	  last_reported_type_uid = uid;
12040 	  if (empty_base_seen & 1)
12041 	    inform (input_location,
12042 		    "parameter passing for argument of type %qT when C++17 "
12043 		    "is enabled changed to match C++14 %{in GCC 10.1%}",
12044 		    orig_type, url);
12045 	  else
12046 	    inform (input_location,
12047 		    "parameter passing for argument of type %qT with "
12048 		    "%<[[no_unique_address]]%> members changed "
12049 		    "%{in GCC 10.1%}", orig_type, url);
12050 	}
12051     }
12052 
12053   return true;
12054 }
12055 
12056 /* Return true if a function argument of type TYPE and mode MODE
12057    is to be passed in an integer register, or a pair of integer
12058    registers, if available.  */
12059 
12060 static bool
s390_function_arg_integer(machine_mode mode,const_tree type)12061 s390_function_arg_integer (machine_mode mode, const_tree type)
12062 {
12063   int size = s390_function_arg_size (mode, type);
12064   if (size > 8)
12065     return false;
12066 
12067   /* No type info available for some library calls ...  */
12068   if (!type)
12069     return GET_MODE_CLASS (mode) == MODE_INT
12070 	   || (TARGET_SOFT_FLOAT &&  SCALAR_FLOAT_MODE_P (mode));
12071 
12072   /* We accept small integral (and similar) types.  */
12073   if (INTEGRAL_TYPE_P (type)
12074       || POINTER_TYPE_P (type)
12075       || TREE_CODE (type) == NULLPTR_TYPE
12076       || TREE_CODE (type) == OFFSET_TYPE
12077       || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12078     return true;
12079 
12080   /* We also accept structs of size 1, 2, 4, 8 that are not
12081      passed in floating-point registers.  */
12082   if (AGGREGATE_TYPE_P (type)
12083       && exact_log2 (size) >= 0
12084       && !s390_function_arg_float (mode, type))
12085     return true;
12086 
12087   return false;
12088 }
12089 
12090 /* Return 1 if a function argument ARG is to be passed by reference.
12091    The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12092    are passed by value, all other structures (and complex numbers) are
12093    passed by reference.  */
12094 
12095 static bool
s390_pass_by_reference(cumulative_args_t,const function_arg_info & arg)12096 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12097 {
12098   int size = s390_function_arg_size (arg.mode, arg.type);
12099 
12100   if (s390_function_arg_vector (arg.mode, arg.type))
12101     return false;
12102 
12103   if (size > 8)
12104     return true;
12105 
12106   if (tree type = arg.type)
12107     {
12108       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12109 	return true;
12110 
12111       if (TREE_CODE (type) == COMPLEX_TYPE
12112 	  || TREE_CODE (type) == VECTOR_TYPE)
12113 	return true;
12114     }
12115 
12116   return false;
12117 }
12118 
12119 /* Update the data in CUM to advance over argument ARG.  */
12120 
12121 static void
s390_function_arg_advance(cumulative_args_t cum_v,const function_arg_info & arg)12122 s390_function_arg_advance (cumulative_args_t cum_v,
12123 			   const function_arg_info &arg)
12124 {
12125   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12126 
12127   if (s390_function_arg_vector (arg.mode, arg.type))
12128     {
12129       /* We are called for unnamed vector stdarg arguments which are
12130 	 passed on the stack.  In this case this hook does not have to
12131 	 do anything since stack arguments are tracked by common
12132 	 code.  */
12133       if (!arg.named)
12134 	return;
12135       cum->vrs += 1;
12136     }
12137   else if (s390_function_arg_float (arg.mode, arg.type))
12138     {
12139       cum->fprs += 1;
12140     }
12141   else if (s390_function_arg_integer (arg.mode, arg.type))
12142     {
12143       int size = s390_function_arg_size (arg.mode, arg.type);
12144       cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12145     }
12146   else
12147     gcc_unreachable ();
12148 }
12149 
12150 /* Define where to put the arguments to a function.
12151    Value is zero to push the argument on the stack,
12152    or a hard register in which to store the argument.
12153 
12154    CUM is a variable of type CUMULATIVE_ARGS which gives info about
12155     the preceding args and about the function being called.
12156    ARG is a description of the argument.
12157 
12158    On S/390, we use general purpose registers 2 through 6 to
12159    pass integer, pointer, and certain structure arguments, and
12160    floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12161    to pass floating point arguments.  All remaining arguments
12162    are pushed to the stack.  */
12163 
12164 static rtx
s390_function_arg(cumulative_args_t cum_v,const function_arg_info & arg)12165 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12166 {
12167   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12168 
12169   if (!arg.named)
12170     s390_check_type_for_vector_abi (arg.type, true, false);
12171 
12172   if (s390_function_arg_vector (arg.mode, arg.type))
12173     {
12174       /* Vector arguments being part of the ellipsis are passed on the
12175 	 stack.  */
12176       if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12177 	return NULL_RTX;
12178 
12179       return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12180     }
12181   else if (s390_function_arg_float (arg.mode, arg.type))
12182     {
12183       if (cum->fprs + 1 > FP_ARG_NUM_REG)
12184 	return NULL_RTX;
12185       else
12186 	return gen_rtx_REG (arg.mode, cum->fprs + 16);
12187     }
12188   else if (s390_function_arg_integer (arg.mode, arg.type))
12189     {
12190       int size = s390_function_arg_size (arg.mode, arg.type);
12191       int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12192 
12193       if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12194 	return NULL_RTX;
12195       else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12196 	return gen_rtx_REG (arg.mode, cum->gprs + 2);
12197       else if (n_gprs == 2)
12198 	{
12199 	  rtvec p = rtvec_alloc (2);
12200 
12201 	  RTVEC_ELT (p, 0)
12202 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12203 					 const0_rtx);
12204 	  RTVEC_ELT (p, 1)
12205 	    = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12206 					 GEN_INT (4));
12207 
12208 	  return gen_rtx_PARALLEL (arg.mode, p);
12209 	}
12210     }
12211 
12212   /* After the real arguments, expand_call calls us once again with an
12213      end marker.  Whatever we return here is passed as operand 2 to the
12214      call expanders.
12215 
12216      We don't need this feature ...  */
12217   else if (arg.end_marker_p ())
12218     return const0_rtx;
12219 
12220   gcc_unreachable ();
12221 }
12222 
12223 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Vector arguments are
12224    left-justified when placed on the stack during parameter passing.  */
12225 
12226 static pad_direction
s390_function_arg_padding(machine_mode mode,const_tree type)12227 s390_function_arg_padding (machine_mode mode, const_tree type)
12228 {
12229   if (s390_function_arg_vector (mode, type))
12230     return PAD_UPWARD;
12231 
12232   return default_function_arg_padding (mode, type);
12233 }
12234 
12235 /* Return true if return values of type TYPE should be returned
12236    in a memory buffer whose address is passed by the caller as
12237    hidden first argument.  */
12238 
12239 static bool
s390_return_in_memory(const_tree type,const_tree fundecl ATTRIBUTE_UNUSED)12240 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12241 {
12242   /* We accept small integral (and similar) types.  */
12243   if (INTEGRAL_TYPE_P (type)
12244       || POINTER_TYPE_P (type)
12245       || TREE_CODE (type) == OFFSET_TYPE
12246       || TREE_CODE (type) == REAL_TYPE)
12247     return int_size_in_bytes (type) > 8;
12248 
12249   /* vector types which fit into a VR.  */
12250   if (TARGET_VX_ABI
12251       && VECTOR_TYPE_P (type)
12252       && int_size_in_bytes (type) <= 16)
12253     return false;
12254 
12255   /* Aggregates and similar constructs are always returned
12256      in memory.  */
12257   if (AGGREGATE_TYPE_P (type)
12258       || TREE_CODE (type) == COMPLEX_TYPE
12259       || VECTOR_TYPE_P (type))
12260     return true;
12261 
12262   /* ??? We get called on all sorts of random stuff from
12263      aggregate_value_p.  We can't abort, but it's not clear
12264      what's safe to return.  Pretend it's a struct I guess.  */
12265   return true;
12266 }
12267 
12268 /* Function arguments and return values are promoted to word size.  */
12269 
12270 static machine_mode
s390_promote_function_mode(const_tree type,machine_mode mode,int * punsignedp,const_tree fntype ATTRIBUTE_UNUSED,int for_return ATTRIBUTE_UNUSED)12271 s390_promote_function_mode (const_tree type, machine_mode mode,
12272 			    int *punsignedp,
12273 			    const_tree fntype ATTRIBUTE_UNUSED,
12274 			    int for_return ATTRIBUTE_UNUSED)
12275 {
12276   if (INTEGRAL_MODE_P (mode)
12277       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12278     {
12279       if (type != NULL_TREE && POINTER_TYPE_P (type))
12280 	*punsignedp = POINTERS_EXTEND_UNSIGNED;
12281       return Pmode;
12282     }
12283 
12284   return mode;
12285 }
12286 
12287 /* Define where to return a (scalar) value of type RET_TYPE.
12288    If RET_TYPE is null, define where to return a (scalar)
12289    value of mode MODE from a libcall.  */
12290 
12291 static rtx
s390_function_and_libcall_value(machine_mode mode,const_tree ret_type,const_tree fntype_or_decl,bool outgoing ATTRIBUTE_UNUSED)12292 s390_function_and_libcall_value (machine_mode mode,
12293 				 const_tree ret_type,
12294 				 const_tree fntype_or_decl,
12295 				 bool outgoing ATTRIBUTE_UNUSED)
12296 {
12297   /* For vector return types it is important to use the RET_TYPE
12298      argument whenever available since the middle-end might have
12299      changed the mode to a scalar mode.  */
12300   bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12301 			    || (!ret_type && VECTOR_MODE_P (mode)));
12302 
12303   /* For normal functions perform the promotion as
12304      promote_function_mode would do.  */
12305   if (ret_type)
12306     {
12307       int unsignedp = TYPE_UNSIGNED (ret_type);
12308       mode = promote_function_mode (ret_type, mode, &unsignedp,
12309 				    fntype_or_decl, 1);
12310     }
12311 
12312   gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12313 	      || SCALAR_FLOAT_MODE_P (mode)
12314 	      || (TARGET_VX_ABI && vector_ret_type_p));
12315   gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12316 
12317   if (TARGET_VX_ABI && vector_ret_type_p)
12318     return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12319   else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12320     return gen_rtx_REG (mode, 16);
12321   else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12322 	   || UNITS_PER_LONG == UNITS_PER_WORD)
12323     return gen_rtx_REG (mode, 2);
12324   else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12325     {
12326       /* This case is triggered when returning a 64 bit value with
12327 	 -m31 -mzarch.  Although the value would fit into a single
12328 	 register it has to be forced into a 32 bit register pair in
12329 	 order to match the ABI.  */
12330       rtvec p = rtvec_alloc (2);
12331 
12332       RTVEC_ELT (p, 0)
12333 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12334       RTVEC_ELT (p, 1)
12335 	= gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12336 
12337       return gen_rtx_PARALLEL (mode, p);
12338     }
12339 
12340   gcc_unreachable ();
12341 }
12342 
12343 /* Define where to return a scalar return value of type RET_TYPE.  */
12344 
12345 static rtx
s390_function_value(const_tree ret_type,const_tree fn_decl_or_type,bool outgoing)12346 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12347 		     bool outgoing)
12348 {
12349   return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12350 					  fn_decl_or_type, outgoing);
12351 }
12352 
12353 /* Define where to return a scalar libcall return value of mode
12354    MODE.  */
12355 
12356 static rtx
s390_libcall_value(machine_mode mode,const_rtx fun ATTRIBUTE_UNUSED)12357 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12358 {
12359   return s390_function_and_libcall_value (mode, NULL_TREE,
12360 					  NULL_TREE, true);
12361 }
12362 
12363 
12364 /* Create and return the va_list datatype.
12365 
12366    On S/390, va_list is an array type equivalent to
12367 
12368       typedef struct __va_list_tag
12369 	{
12370 	    long __gpr;
12371 	    long __fpr;
12372 	    void *__overflow_arg_area;
12373 	    void *__reg_save_area;
12374 	} va_list[1];
12375 
12376    where __gpr and __fpr hold the number of general purpose
12377    or floating point arguments used up to now, respectively,
12378    __overflow_arg_area points to the stack location of the
12379    next argument passed on the stack, and __reg_save_area
12380    always points to the start of the register area in the
12381    call frame of the current function.  The function prologue
12382    saves all registers used for argument passing into this
12383    area if the function uses variable arguments.  */
12384 
12385 static tree
s390_build_builtin_va_list(void)12386 s390_build_builtin_va_list (void)
12387 {
12388   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12389 
12390   record = lang_hooks.types.make_type (RECORD_TYPE);
12391 
12392   type_decl =
12393     build_decl (BUILTINS_LOCATION,
12394 		TYPE_DECL, get_identifier ("__va_list_tag"), record);
12395 
12396   f_gpr = build_decl (BUILTINS_LOCATION,
12397 		      FIELD_DECL, get_identifier ("__gpr"),
12398 		      long_integer_type_node);
12399   f_fpr = build_decl (BUILTINS_LOCATION,
12400 		      FIELD_DECL, get_identifier ("__fpr"),
12401 		      long_integer_type_node);
12402   f_ovf = build_decl (BUILTINS_LOCATION,
12403 		      FIELD_DECL, get_identifier ("__overflow_arg_area"),
12404 		      ptr_type_node);
12405   f_sav = build_decl (BUILTINS_LOCATION,
12406 		      FIELD_DECL, get_identifier ("__reg_save_area"),
12407 		      ptr_type_node);
12408 
12409   va_list_gpr_counter_field = f_gpr;
12410   va_list_fpr_counter_field = f_fpr;
12411 
12412   DECL_FIELD_CONTEXT (f_gpr) = record;
12413   DECL_FIELD_CONTEXT (f_fpr) = record;
12414   DECL_FIELD_CONTEXT (f_ovf) = record;
12415   DECL_FIELD_CONTEXT (f_sav) = record;
12416 
12417   TYPE_STUB_DECL (record) = type_decl;
12418   TYPE_NAME (record) = type_decl;
12419   TYPE_FIELDS (record) = f_gpr;
12420   DECL_CHAIN (f_gpr) = f_fpr;
12421   DECL_CHAIN (f_fpr) = f_ovf;
12422   DECL_CHAIN (f_ovf) = f_sav;
12423 
12424   layout_type (record);
12425 
12426   /* The correct type is an array type of one element.  */
12427   return build_array_type (record, build_index_type (size_zero_node));
12428 }
12429 
12430 /* Implement va_start by filling the va_list structure VALIST.
12431    STDARG_P is always true, and ignored.
12432    NEXTARG points to the first anonymous stack argument.
12433 
12434    The following global variables are used to initialize
12435    the va_list structure:
12436 
12437      crtl->args.info:
12438        holds number of gprs and fprs used for named arguments.
12439      crtl->args.arg_offset_rtx:
12440        holds the offset of the first anonymous stack argument
12441        (relative to the virtual arg pointer).  */
12442 
12443 static void
s390_va_start(tree valist,rtx nextarg ATTRIBUTE_UNUSED)12444 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12445 {
12446   HOST_WIDE_INT n_gpr, n_fpr;
12447   int off;
12448   tree f_gpr, f_fpr, f_ovf, f_sav;
12449   tree gpr, fpr, ovf, sav, t;
12450 
12451   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12452   f_fpr = DECL_CHAIN (f_gpr);
12453   f_ovf = DECL_CHAIN (f_fpr);
12454   f_sav = DECL_CHAIN (f_ovf);
12455 
12456   valist = build_simple_mem_ref (valist);
12457   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12458   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12459   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12460   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12461 
12462   /* Count number of gp and fp argument registers used.  */
12463 
12464   n_gpr = crtl->args.info.gprs;
12465   n_fpr = crtl->args.info.fprs;
12466 
12467   if (cfun->va_list_gpr_size)
12468     {
12469       t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12470 		  build_int_cst (NULL_TREE, n_gpr));
12471       TREE_SIDE_EFFECTS (t) = 1;
12472       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12473     }
12474 
12475   if (cfun->va_list_fpr_size)
12476     {
12477       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12478 		  build_int_cst (NULL_TREE, n_fpr));
12479       TREE_SIDE_EFFECTS (t) = 1;
12480       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12481     }
12482 
12483   if (flag_split_stack
12484      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12485 	 == NULL)
12486      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12487     {
12488       rtx reg;
12489       rtx_insn *seq;
12490 
12491       reg = gen_reg_rtx (Pmode);
12492       cfun->machine->split_stack_varargs_pointer = reg;
12493 
12494       start_sequence ();
12495       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12496       seq = get_insns ();
12497       end_sequence ();
12498 
12499       push_topmost_sequence ();
12500       emit_insn_after (seq, entry_of_function ());
12501       pop_topmost_sequence ();
12502     }
12503 
12504   /* Find the overflow area.
12505      FIXME: This currently is too pessimistic when the vector ABI is
12506      enabled.  In that case we *always* set up the overflow area
12507      pointer.  */
12508   if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12509       || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12510       || TARGET_VX_ABI)
12511     {
12512       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12513 	t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12514       else
12515 	t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12516 
12517       off = INTVAL (crtl->args.arg_offset_rtx);
12518       off = off < 0 ? 0 : off;
12519       if (TARGET_DEBUG_ARG)
12520 	fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12521 		 (int)n_gpr, (int)n_fpr, off);
12522 
12523       t = fold_build_pointer_plus_hwi (t, off);
12524 
12525       t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12526       TREE_SIDE_EFFECTS (t) = 1;
12527       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12528     }
12529 
12530   /* Find the register save area.  */
12531   if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12532       || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12533     {
12534       t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12535       t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12536 
12537       t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12538       TREE_SIDE_EFFECTS (t) = 1;
12539       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12540     }
12541 }
12542 
12543 /* Implement va_arg by updating the va_list structure
12544    VALIST as required to retrieve an argument of type
12545    TYPE, and returning that argument.
12546 
12547    Generates code equivalent to:
12548 
12549    if (integral value) {
12550      if (size  <= 4 && args.gpr < 5 ||
12551 	 size  > 4 && args.gpr < 4 )
12552        ret = args.reg_save_area[args.gpr+8]
12553      else
12554        ret = *args.overflow_arg_area++;
12555    } else if (vector value) {
12556        ret = *args.overflow_arg_area;
12557        args.overflow_arg_area += size / 8;
12558    } else if (float value) {
12559      if (args.fgpr < 2)
12560        ret = args.reg_save_area[args.fpr+64]
12561      else
12562        ret = *args.overflow_arg_area++;
12563    } else if (aggregate value) {
12564      if (args.gpr < 5)
12565        ret = *args.reg_save_area[args.gpr]
12566      else
12567        ret = **args.overflow_arg_area++;
12568    } */
12569 
12570 static tree
s390_gimplify_va_arg(tree valist,tree type,gimple_seq * pre_p,gimple_seq * post_p ATTRIBUTE_UNUSED)12571 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12572 		      gimple_seq *post_p ATTRIBUTE_UNUSED)
12573 {
12574   tree f_gpr, f_fpr, f_ovf, f_sav;
12575   tree gpr, fpr, ovf, sav, reg, t, u;
12576   int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12577   tree lab_false, lab_over = NULL_TREE;
12578   tree addr = create_tmp_var (ptr_type_node, "addr");
12579   bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12580 			a stack slot.  */
12581 
12582   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12583   f_fpr = DECL_CHAIN (f_gpr);
12584   f_ovf = DECL_CHAIN (f_fpr);
12585   f_sav = DECL_CHAIN (f_ovf);
12586 
12587   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12588   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12589   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12590 
12591   /* The tree for args* cannot be shared between gpr/fpr and ovf since
12592      both appear on a lhs.  */
12593   valist = unshare_expr (valist);
12594   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12595 
12596   size = int_size_in_bytes (type);
12597 
12598   s390_check_type_for_vector_abi (type, true, false);
12599 
12600   if (pass_va_arg_by_reference (type))
12601     {
12602       if (TARGET_DEBUG_ARG)
12603 	{
12604 	  fprintf (stderr, "va_arg: aggregate type");
12605 	  debug_tree (type);
12606 	}
12607 
12608       /* Aggregates are passed by reference.  */
12609       indirect_p = 1;
12610       reg = gpr;
12611       n_reg = 1;
12612 
12613       /* kernel stack layout on 31 bit: It is assumed here that no padding
12614 	 will be added by s390_frame_info because for va_args always an even
12615 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12616       sav_ofs = 2 * UNITS_PER_LONG;
12617       sav_scale = UNITS_PER_LONG;
12618       size = UNITS_PER_LONG;
12619       max_reg = GP_ARG_NUM_REG - n_reg;
12620       left_align_p = false;
12621     }
12622   else if (s390_function_arg_vector (TYPE_MODE (type), type))
12623     {
12624       if (TARGET_DEBUG_ARG)
12625 	{
12626 	  fprintf (stderr, "va_arg: vector type");
12627 	  debug_tree (type);
12628 	}
12629 
12630       indirect_p = 0;
12631       reg = NULL_TREE;
12632       n_reg = 0;
12633       sav_ofs = 0;
12634       sav_scale = 8;
12635       max_reg = 0;
12636       left_align_p = true;
12637     }
12638   else if (s390_function_arg_float (TYPE_MODE (type), type))
12639     {
12640       if (TARGET_DEBUG_ARG)
12641 	{
12642 	  fprintf (stderr, "va_arg: float type");
12643 	  debug_tree (type);
12644 	}
12645 
12646       /* FP args go in FP registers, if present.  */
12647       indirect_p = 0;
12648       reg = fpr;
12649       n_reg = 1;
12650       sav_ofs = 16 * UNITS_PER_LONG;
12651       sav_scale = 8;
12652       max_reg = FP_ARG_NUM_REG - n_reg;
12653       left_align_p = false;
12654     }
12655   else
12656     {
12657       if (TARGET_DEBUG_ARG)
12658 	{
12659 	  fprintf (stderr, "va_arg: other type");
12660 	  debug_tree (type);
12661 	}
12662 
12663       /* Otherwise into GP registers.  */
12664       indirect_p = 0;
12665       reg = gpr;
12666       n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12667 
12668       /* kernel stack layout on 31 bit: It is assumed here that no padding
12669 	 will be added by s390_frame_info because for va_args always an even
12670 	 number of gprs has to be saved r15-r2 = 14 regs.  */
12671       sav_ofs = 2 * UNITS_PER_LONG;
12672 
12673       if (size < UNITS_PER_LONG)
12674 	sav_ofs += UNITS_PER_LONG - size;
12675 
12676       sav_scale = UNITS_PER_LONG;
12677       max_reg = GP_ARG_NUM_REG - n_reg;
12678       left_align_p = false;
12679     }
12680 
12681   /* Pull the value out of the saved registers ...  */
12682 
12683   if (reg != NULL_TREE)
12684     {
12685       /*
12686 	if (reg > ((typeof (reg))max_reg))
12687 	  goto lab_false;
12688 
12689 	addr = sav + sav_ofs + reg * save_scale;
12690 
12691 	goto lab_over;
12692 
12693 	lab_false:
12694       */
12695 
12696       lab_false = create_artificial_label (UNKNOWN_LOCATION);
12697       lab_over = create_artificial_label (UNKNOWN_LOCATION);
12698 
12699       t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12700       t = build2 (GT_EXPR, boolean_type_node, reg, t);
12701       u = build1 (GOTO_EXPR, void_type_node, lab_false);
12702       t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12703       gimplify_and_add (t, pre_p);
12704 
12705       t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12706       u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12707 		  fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12708       t = fold_build_pointer_plus (t, u);
12709 
12710       gimplify_assign (addr, t, pre_p);
12711 
12712       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12713 
12714       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12715     }
12716 
12717   /* ... Otherwise out of the overflow area.  */
12718 
12719   t = ovf;
12720   if (size < UNITS_PER_LONG && !left_align_p)
12721     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12722 
12723   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12724 
12725   gimplify_assign (addr, t, pre_p);
12726 
12727   if (size < UNITS_PER_LONG && left_align_p)
12728     t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12729   else
12730     t = fold_build_pointer_plus_hwi (t, size);
12731 
12732   gimplify_assign (ovf, t, pre_p);
12733 
12734   if (reg != NULL_TREE)
12735     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12736 
12737 
12738   /* Increment register save count.  */
12739 
12740   if (n_reg > 0)
12741     {
12742       u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12743 		  fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12744       gimplify_and_add (u, pre_p);
12745     }
12746 
12747   if (indirect_p)
12748     {
12749       t = build_pointer_type_for_mode (build_pointer_type (type),
12750 				       ptr_mode, true);
12751       addr = fold_convert (t, addr);
12752       addr = build_va_arg_indirect_ref (addr);
12753     }
12754   else
12755     {
12756       t = build_pointer_type_for_mode (type, ptr_mode, true);
12757       addr = fold_convert (t, addr);
12758     }
12759 
12760   return build_va_arg_indirect_ref (addr);
12761 }
12762 
12763 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12764    expanders.
12765    DEST  - Register location where CC will be stored.
12766    TDB   - Pointer to a 256 byte area where to store the transaction.
12767 	   diagnostic block. NULL if TDB is not needed.
12768    RETRY - Retry count value.  If non-NULL a retry loop for CC2
12769 	   is emitted
12770    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12771 		    of the tbegin instruction pattern.  */
12772 
12773 void
s390_expand_tbegin(rtx dest,rtx tdb,rtx retry,bool clobber_fprs_p)12774 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12775 {
12776   rtx retry_plus_two = gen_reg_rtx (SImode);
12777   rtx retry_reg = gen_reg_rtx (SImode);
12778   rtx_code_label *retry_label = NULL;
12779 
12780   if (retry != NULL_RTX)
12781     {
12782       emit_move_insn (retry_reg, retry);
12783       emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12784       emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12785       retry_label = gen_label_rtx ();
12786       emit_label (retry_label);
12787     }
12788 
12789   if (clobber_fprs_p)
12790     {
12791       if (TARGET_VX)
12792 	emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12793 				     tdb));
12794       else
12795 	emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12796 				 tdb));
12797     }
12798   else
12799     emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12800 				     tdb));
12801 
12802   emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12803 					gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12804 								   CC_REGNUM)),
12805 					UNSPEC_CC_TO_INT));
12806   if (retry != NULL_RTX)
12807     {
12808       const int CC0 = 1 << 3;
12809       const int CC1 = 1 << 2;
12810       const int CC3 = 1 << 0;
12811       rtx jump;
12812       rtx count = gen_reg_rtx (SImode);
12813       rtx_code_label *leave_label = gen_label_rtx ();
12814 
12815       /* Exit for success and permanent failures.  */
12816       jump = s390_emit_jump (leave_label,
12817 			     gen_rtx_EQ (VOIDmode,
12818 			       gen_rtx_REG (CCRAWmode, CC_REGNUM),
12819 			       gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12820       LABEL_NUSES (leave_label) = 1;
12821 
12822       /* CC2 - transient failure. Perform retry with ppa.  */
12823       emit_move_insn (count, retry_plus_two);
12824       emit_insn (gen_subsi3 (count, count, retry_reg));
12825       emit_insn (gen_tx_assist (count));
12826       jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12827 					      retry_reg,
12828 					      retry_reg));
12829       JUMP_LABEL (jump) = retry_label;
12830       LABEL_NUSES (retry_label) = 1;
12831       emit_label (leave_label);
12832     }
12833 }
12834 
12835 
12836 /* Return the decl for the target specific builtin with the function
12837    code FCODE.  */
12838 
12839 static tree
s390_builtin_decl(unsigned fcode,bool initialized_p ATTRIBUTE_UNUSED)12840 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12841 {
12842   if (fcode >= S390_BUILTIN_MAX)
12843     return error_mark_node;
12844 
12845   return s390_builtin_decls[fcode];
12846 }
12847 
12848 /* We call mcount before the function prologue.  So a profiled leaf
12849    function should stay a leaf function.  */
12850 
12851 static bool
s390_keep_leaf_when_profiled()12852 s390_keep_leaf_when_profiled ()
12853 {
12854   return true;
12855 }
12856 
12857 /* Output assembly code for the trampoline template to
12858    stdio stream FILE.
12859 
12860    On S/390, we use gpr 1 internally in the trampoline code;
12861    gpr 0 is used to hold the static chain.  */
12862 
12863 static void
s390_asm_trampoline_template(FILE * file)12864 s390_asm_trampoline_template (FILE *file)
12865 {
12866   rtx op[2];
12867   op[0] = gen_rtx_REG (Pmode, 0);
12868   op[1] = gen_rtx_REG (Pmode, 1);
12869 
12870   if (TARGET_64BIT)
12871     {
12872       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12873       output_asm_insn ("lmg\t%0,%1,14(%1)", op);  /* 6 byte */
12874       output_asm_insn ("br\t%1", op);             /* 2 byte */
12875       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12876     }
12877   else
12878     {
12879       output_asm_insn ("basr\t%1,0", op);         /* 2 byte */
12880       output_asm_insn ("lm\t%0,%1,6(%1)", op);    /* 4 byte */
12881       output_asm_insn ("br\t%1", op);             /* 2 byte */
12882       ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12883     }
12884 }
12885 
12886 /* Emit RTL insns to initialize the variable parts of a trampoline.
12887    FNADDR is an RTX for the address of the function's pure code.
12888    CXT is an RTX for the static chain value for the function.  */
12889 
12890 static void
s390_trampoline_init(rtx m_tramp,tree fndecl,rtx cxt)12891 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12892 {
12893   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12894   rtx mem;
12895 
12896   emit_block_move (m_tramp, assemble_trampoline_template (),
12897 		   GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12898 
12899   mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12900   emit_move_insn (mem, cxt);
12901   mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12902   emit_move_insn (mem, fnaddr);
12903 }
12904 
12905 static void
output_asm_nops(const char * user,int hw)12906 output_asm_nops (const char *user, int hw)
12907 {
12908   asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
12909   while (hw > 0)
12910     {
12911       if (hw >= 3)
12912 	{
12913 	  output_asm_insn ("brcl\t0,0", NULL);
12914 	  hw -= 3;
12915 	}
12916       else if (hw >= 2)
12917 	{
12918 	  output_asm_insn ("bc\t0,0", NULL);
12919 	  hw -= 2;
12920 	}
12921       else
12922 	{
12923 	  output_asm_insn ("bcr\t0,0", NULL);
12924 	  hw -= 1;
12925 	}
12926     }
12927 }
12928 
12929 /* Output assembler code to FILE to increment profiler label # LABELNO
12930    for profiling a function entry.  */
12931 
12932 void
s390_function_profiler(FILE * file,int labelno)12933 s390_function_profiler (FILE *file, int labelno)
12934 {
12935   rtx op[8];
12936 
12937   char label[128];
12938   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12939 
12940   fprintf (file, "# function profiler \n");
12941 
12942   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12943   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12944   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12945   op[7] = GEN_INT (UNITS_PER_LONG);
12946 
12947   op[2] = gen_rtx_REG (Pmode, 1);
12948   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12949   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12950 
12951   op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
12952   if (flag_pic)
12953     {
12954       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12955       op[4] = gen_rtx_CONST (Pmode, op[4]);
12956     }
12957 
12958   if (flag_record_mcount)
12959     fprintf (file, "1:\n");
12960 
12961   if (flag_fentry)
12962     {
12963       if (flag_nop_mcount)
12964 	output_asm_nops ("-mnop-mcount", /* brasl */ 3);
12965       else if (cfun->static_chain_decl)
12966 	warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
12967 		 "with %<-mfentry%> on s390");
12968       else
12969 	output_asm_insn ("brasl\t0,%4", op);
12970     }
12971   else if (TARGET_64BIT)
12972     {
12973       if (flag_nop_mcount)
12974 	output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
12975 			 /* brasl */ 3 + /* lg */ 3);
12976       else
12977 	{
12978 	  output_asm_insn ("stg\t%0,%1", op);
12979 	  if (flag_dwarf2_cfi_asm)
12980 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12981 	  output_asm_insn ("larl\t%2,%3", op);
12982 	  output_asm_insn ("brasl\t%0,%4", op);
12983 	  output_asm_insn ("lg\t%0,%1", op);
12984 	  if (flag_dwarf2_cfi_asm)
12985 	    output_asm_insn (".cfi_restore\t%0", op);
12986 	}
12987     }
12988   else
12989     {
12990       if (flag_nop_mcount)
12991 	output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
12992 			 /* brasl */ 3 + /* l */ 2);
12993       else
12994 	{
12995 	  output_asm_insn ("st\t%0,%1", op);
12996 	  if (flag_dwarf2_cfi_asm)
12997 	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
12998 	  output_asm_insn ("larl\t%2,%3", op);
12999 	  output_asm_insn ("brasl\t%0,%4", op);
13000 	  output_asm_insn ("l\t%0,%1", op);
13001 	  if (flag_dwarf2_cfi_asm)
13002 	    output_asm_insn (".cfi_restore\t%0", op);
13003 	}
13004     }
13005 
13006   if (flag_record_mcount)
13007     {
13008       fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13009       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13010       fprintf (file, "\t.previous\n");
13011     }
13012 }
13013 
13014 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13015    into its SYMBOL_REF_FLAGS.  */
13016 
13017 static void
s390_encode_section_info(tree decl,rtx rtl,int first)13018 s390_encode_section_info (tree decl, rtx rtl, int first)
13019 {
13020   default_encode_section_info (decl, rtl, first);
13021 
13022   if (TREE_CODE (decl) == VAR_DECL)
13023     {
13024       /* Store the alignment to be able to check if we can use
13025 	 a larl/load-relative instruction.  We only handle the cases
13026 	 that can go wrong (i.e. no FUNC_DECLs).  */
13027       if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13028 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13029       else if (DECL_ALIGN (decl) % 32)
13030 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13031       else if (DECL_ALIGN (decl) % 64)
13032 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13033     }
13034 
13035   /* Literal pool references don't have a decl so they are handled
13036      differently here.  We rely on the information in the MEM_ALIGN
13037      entry to decide upon the alignment.  */
13038   if (MEM_P (rtl)
13039       && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13040       && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13041     {
13042       if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13043 	SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13044       else if (MEM_ALIGN (rtl) % 32)
13045 	SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13046       else if (MEM_ALIGN (rtl) % 64)
13047 	SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13048     }
13049 }
13050 
13051 /* Output thunk to FILE that implements a C++ virtual function call (with
13052    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
13053    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13054    stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13055    relative to the resulting this pointer.  */
13056 
13057 static void
s390_output_mi_thunk(FILE * file,tree thunk ATTRIBUTE_UNUSED,HOST_WIDE_INT delta,HOST_WIDE_INT vcall_offset,tree function)13058 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13059 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13060 		      tree function)
13061 {
13062   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13063   rtx op[10];
13064   int nonlocal = 0;
13065 
13066   assemble_start_function (thunk, fnname);
13067   /* Make sure unwind info is emitted for the thunk if needed.  */
13068   final_start_function (emit_barrier (), file, 1);
13069 
13070   /* Operand 0 is the target function.  */
13071   op[0] = XEXP (DECL_RTL (function), 0);
13072   if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13073     {
13074       nonlocal = 1;
13075       op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
13076 			      TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
13077       op[0] = gen_rtx_CONST (Pmode, op[0]);
13078     }
13079 
13080   /* Operand 1 is the 'this' pointer.  */
13081   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13082     op[1] = gen_rtx_REG (Pmode, 3);
13083   else
13084     op[1] = gen_rtx_REG (Pmode, 2);
13085 
13086   /* Operand 2 is the delta.  */
13087   op[2] = GEN_INT (delta);
13088 
13089   /* Operand 3 is the vcall_offset.  */
13090   op[3] = GEN_INT (vcall_offset);
13091 
13092   /* Operand 4 is the temporary register.  */
13093   op[4] = gen_rtx_REG (Pmode, 1);
13094 
13095   /* Operands 5 to 8 can be used as labels.  */
13096   op[5] = NULL_RTX;
13097   op[6] = NULL_RTX;
13098   op[7] = NULL_RTX;
13099   op[8] = NULL_RTX;
13100 
13101   /* Operand 9 can be used for temporary register.  */
13102   op[9] = NULL_RTX;
13103 
13104   /* Generate code.  */
13105   if (TARGET_64BIT)
13106     {
13107       /* Setup literal pool pointer if required.  */
13108       if ((!DISP_IN_RANGE (delta)
13109 	   && !CONST_OK_FOR_K (delta)
13110 	   && !CONST_OK_FOR_Os (delta))
13111 	  || (!DISP_IN_RANGE (vcall_offset)
13112 	      && !CONST_OK_FOR_K (vcall_offset)
13113 	      && !CONST_OK_FOR_Os (vcall_offset)))
13114 	{
13115 	  op[5] = gen_label_rtx ();
13116 	  output_asm_insn ("larl\t%4,%5", op);
13117 	}
13118 
13119       /* Add DELTA to this pointer.  */
13120       if (delta)
13121 	{
13122 	  if (CONST_OK_FOR_J (delta))
13123 	    output_asm_insn ("la\t%1,%2(%1)", op);
13124 	  else if (DISP_IN_RANGE (delta))
13125 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13126 	  else if (CONST_OK_FOR_K (delta))
13127 	    output_asm_insn ("aghi\t%1,%2", op);
13128 	  else if (CONST_OK_FOR_Os (delta))
13129 	    output_asm_insn ("agfi\t%1,%2", op);
13130 	  else
13131 	    {
13132 	      op[6] = gen_label_rtx ();
13133 	      output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13134 	    }
13135 	}
13136 
13137       /* Perform vcall adjustment.  */
13138       if (vcall_offset)
13139 	{
13140 	  if (DISP_IN_RANGE (vcall_offset))
13141 	    {
13142 	      output_asm_insn ("lg\t%4,0(%1)", op);
13143 	      output_asm_insn ("ag\t%1,%3(%4)", op);
13144 	    }
13145 	  else if (CONST_OK_FOR_K (vcall_offset))
13146 	    {
13147 	      output_asm_insn ("lghi\t%4,%3", op);
13148 	      output_asm_insn ("ag\t%4,0(%1)", op);
13149 	      output_asm_insn ("ag\t%1,0(%4)", op);
13150 	    }
13151 	  else if (CONST_OK_FOR_Os (vcall_offset))
13152 	    {
13153 	      output_asm_insn ("lgfi\t%4,%3", op);
13154 	      output_asm_insn ("ag\t%4,0(%1)", op);
13155 	      output_asm_insn ("ag\t%1,0(%4)", op);
13156 	    }
13157 	  else
13158 	    {
13159 	      op[7] = gen_label_rtx ();
13160 	      output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13161 	      output_asm_insn ("ag\t%4,0(%1)", op);
13162 	      output_asm_insn ("ag\t%1,0(%4)", op);
13163 	    }
13164 	}
13165 
13166       /* Jump to target.  */
13167       output_asm_insn ("jg\t%0", op);
13168 
13169       /* Output literal pool if required.  */
13170       if (op[5])
13171 	{
13172 	  output_asm_insn (".align\t4", op);
13173 	  targetm.asm_out.internal_label (file, "L",
13174 					  CODE_LABEL_NUMBER (op[5]));
13175 	}
13176       if (op[6])
13177 	{
13178 	  targetm.asm_out.internal_label (file, "L",
13179 					  CODE_LABEL_NUMBER (op[6]));
13180 	  output_asm_insn (".long\t%2", op);
13181 	}
13182       if (op[7])
13183 	{
13184 	  targetm.asm_out.internal_label (file, "L",
13185 					  CODE_LABEL_NUMBER (op[7]));
13186 	  output_asm_insn (".long\t%3", op);
13187 	}
13188     }
13189   else
13190     {
13191       /* Setup base pointer if required.  */
13192       if (!vcall_offset
13193 	  || (!DISP_IN_RANGE (delta)
13194 	      && !CONST_OK_FOR_K (delta)
13195 	      && !CONST_OK_FOR_Os (delta))
13196 	  || (!DISP_IN_RANGE (delta)
13197 	      && !CONST_OK_FOR_K (vcall_offset)
13198 	      && !CONST_OK_FOR_Os (vcall_offset)))
13199 	{
13200 	  op[5] = gen_label_rtx ();
13201 	  output_asm_insn ("basr\t%4,0", op);
13202 	  targetm.asm_out.internal_label (file, "L",
13203 					  CODE_LABEL_NUMBER (op[5]));
13204 	}
13205 
13206       /* Add DELTA to this pointer.  */
13207       if (delta)
13208 	{
13209 	  if (CONST_OK_FOR_J (delta))
13210 	    output_asm_insn ("la\t%1,%2(%1)", op);
13211 	  else if (DISP_IN_RANGE (delta))
13212 	    output_asm_insn ("lay\t%1,%2(%1)", op);
13213 	  else if (CONST_OK_FOR_K (delta))
13214 	    output_asm_insn ("ahi\t%1,%2", op);
13215 	  else if (CONST_OK_FOR_Os (delta))
13216 	    output_asm_insn ("afi\t%1,%2", op);
13217 	  else
13218 	    {
13219 	      op[6] = gen_label_rtx ();
13220 	      output_asm_insn ("a\t%1,%6-%5(%4)", op);
13221 	    }
13222 	}
13223 
13224       /* Perform vcall adjustment.  */
13225       if (vcall_offset)
13226 	{
13227 	  if (CONST_OK_FOR_J (vcall_offset))
13228 	    {
13229 	      output_asm_insn ("l\t%4,0(%1)", op);
13230 	      output_asm_insn ("a\t%1,%3(%4)", op);
13231 	    }
13232 	  else if (DISP_IN_RANGE (vcall_offset))
13233 	    {
13234 	      output_asm_insn ("l\t%4,0(%1)", op);
13235 	      output_asm_insn ("ay\t%1,%3(%4)", op);
13236 	    }
13237 	  else if (CONST_OK_FOR_K (vcall_offset))
13238 	    {
13239 	      output_asm_insn ("lhi\t%4,%3", op);
13240 	      output_asm_insn ("a\t%4,0(%1)", op);
13241 	      output_asm_insn ("a\t%1,0(%4)", op);
13242 	    }
13243 	  else if (CONST_OK_FOR_Os (vcall_offset))
13244 	    {
13245 	      output_asm_insn ("iilf\t%4,%3", op);
13246 	      output_asm_insn ("a\t%4,0(%1)", op);
13247 	      output_asm_insn ("a\t%1,0(%4)", op);
13248 	    }
13249 	  else
13250 	    {
13251 	      op[7] = gen_label_rtx ();
13252 	      output_asm_insn ("l\t%4,%7-%5(%4)", op);
13253 	      output_asm_insn ("a\t%4,0(%1)", op);
13254 	      output_asm_insn ("a\t%1,0(%4)", op);
13255 	    }
13256 
13257 	  /* We had to clobber the base pointer register.
13258 	     Re-setup the base pointer (with a different base).  */
13259 	  op[5] = gen_label_rtx ();
13260 	  output_asm_insn ("basr\t%4,0", op);
13261 	  targetm.asm_out.internal_label (file, "L",
13262 					  CODE_LABEL_NUMBER (op[5]));
13263 	}
13264 
13265       /* Jump to target.  */
13266       op[8] = gen_label_rtx ();
13267 
13268       if (!flag_pic)
13269 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
13270       else if (!nonlocal)
13271 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
13272       /* We cannot call through .plt, since .plt requires %r12 loaded.  */
13273       else if (flag_pic == 1)
13274 	{
13275 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13276 	  output_asm_insn ("l\t%4,%0(%4)", op);
13277 	}
13278       else if (flag_pic == 2)
13279 	{
13280 	  op[9] = gen_rtx_REG (Pmode, 0);
13281 	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13282 	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
13283 	  output_asm_insn ("ar\t%4,%9", op);
13284 	  output_asm_insn ("l\t%4,0(%4)", op);
13285 	}
13286 
13287       output_asm_insn ("br\t%4", op);
13288 
13289       /* Output literal pool.  */
13290       output_asm_insn (".align\t4", op);
13291 
13292       if (nonlocal && flag_pic == 2)
13293 	output_asm_insn (".long\t%0", op);
13294       if (nonlocal)
13295 	{
13296 	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13297 	  SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13298 	}
13299 
13300       targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13301       if (!flag_pic)
13302 	output_asm_insn (".long\t%0", op);
13303       else
13304 	output_asm_insn (".long\t%0-%5", op);
13305 
13306       if (op[6])
13307 	{
13308 	  targetm.asm_out.internal_label (file, "L",
13309 					  CODE_LABEL_NUMBER (op[6]));
13310 	  output_asm_insn (".long\t%2", op);
13311 	}
13312       if (op[7])
13313 	{
13314 	  targetm.asm_out.internal_label (file, "L",
13315 					  CODE_LABEL_NUMBER (op[7]));
13316 	  output_asm_insn (".long\t%3", op);
13317 	}
13318     }
13319   final_end_function ();
13320   assemble_end_function (thunk, fnname);
13321 }
13322 
13323 /* Output either an indirect jump or an indirect call
13324    (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13325    using a branch trampoline disabling branch target prediction.  */
13326 
13327 void
s390_indirect_branch_via_thunk(unsigned int regno,unsigned int return_addr_regno,rtx comparison_operator,enum s390_indirect_branch_type type)13328 s390_indirect_branch_via_thunk (unsigned int regno,
13329 				unsigned int return_addr_regno,
13330 				rtx comparison_operator,
13331 				enum s390_indirect_branch_type type)
13332 {
13333   enum s390_indirect_branch_option option;
13334 
13335   if (type == s390_indirect_branch_type_return)
13336     {
13337       if (s390_return_addr_from_memory ())
13338 	option = s390_opt_function_return_mem;
13339       else
13340 	option = s390_opt_function_return_reg;
13341     }
13342   else if (type == s390_indirect_branch_type_jump)
13343     option = s390_opt_indirect_branch_jump;
13344   else if (type == s390_indirect_branch_type_call)
13345     option = s390_opt_indirect_branch_call;
13346   else
13347     gcc_unreachable ();
13348 
13349   if (TARGET_INDIRECT_BRANCH_TABLE)
13350     {
13351       char label[32];
13352 
13353       ASM_GENERATE_INTERNAL_LABEL (label,
13354 				   indirect_branch_table_label[option],
13355 				   indirect_branch_table_label_no[option]++);
13356       ASM_OUTPUT_LABEL (asm_out_file, label);
13357     }
13358 
13359   if (return_addr_regno != INVALID_REGNUM)
13360     {
13361       gcc_assert (comparison_operator == NULL_RTX);
13362       fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13363     }
13364   else
13365     {
13366       fputs (" \tjg", asm_out_file);
13367       if (comparison_operator != NULL_RTX)
13368 	print_operand (asm_out_file, comparison_operator, 'C');
13369 
13370       fputs ("\t", asm_out_file);
13371     }
13372 
13373   if (TARGET_CPU_Z10)
13374     fprintf (asm_out_file,
13375 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13376 	     regno);
13377   else
13378     fprintf (asm_out_file,
13379 	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13380 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
13381 
13382   if ((option == s390_opt_indirect_branch_jump
13383        && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13384       || (option == s390_opt_indirect_branch_call
13385 	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13386       || (option == s390_opt_function_return_reg
13387 	  && cfun->machine->function_return_reg == indirect_branch_thunk)
13388       || (option == s390_opt_function_return_mem
13389 	  && cfun->machine->function_return_mem == indirect_branch_thunk))
13390     {
13391       if (TARGET_CPU_Z10)
13392 	indirect_branch_z10thunk_mask |= (1 << regno);
13393       else
13394 	indirect_branch_prez10thunk_mask |= (1 << regno);
13395     }
13396 }
13397 
13398 /* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
13399    either be an address register or a label pointing to the location
13400    of the jump instruction.  */
13401 
13402 void
s390_indirect_branch_via_inline_thunk(rtx execute_target)13403 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13404 {
13405   if (TARGET_INDIRECT_BRANCH_TABLE)
13406     {
13407       char label[32];
13408 
13409       ASM_GENERATE_INTERNAL_LABEL (label,
13410 				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
13411 				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13412       ASM_OUTPUT_LABEL (asm_out_file, label);
13413     }
13414 
13415   if (!TARGET_ZARCH)
13416     fputs ("\t.machinemode zarch\n", asm_out_file);
13417 
13418   if (REG_P (execute_target))
13419     fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13420   else
13421     output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13422 
13423   if (!TARGET_ZARCH)
13424     fputs ("\t.machinemode esa\n", asm_out_file);
13425 
13426   fputs ("0:\tj\t0b\n", asm_out_file);
13427 }
13428 
13429 static bool
s390_valid_pointer_mode(scalar_int_mode mode)13430 s390_valid_pointer_mode (scalar_int_mode mode)
13431 {
13432   return (mode == SImode || (TARGET_64BIT && mode == DImode));
13433 }
13434 
13435 /* Checks whether the given CALL_EXPR would use a caller
13436    saved register.  This is used to decide whether sibling call
13437    optimization could be performed on the respective function
13438    call.  */
13439 
13440 static bool
s390_call_saved_register_used(tree call_expr)13441 s390_call_saved_register_used (tree call_expr)
13442 {
13443   CUMULATIVE_ARGS cum_v;
13444   cumulative_args_t cum;
13445   tree parameter;
13446   rtx parm_rtx;
13447   int reg, i;
13448 
13449   INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13450   cum = pack_cumulative_args (&cum_v);
13451 
13452   for (i = 0; i < call_expr_nargs (call_expr); i++)
13453     {
13454       parameter = CALL_EXPR_ARG (call_expr, i);
13455       gcc_assert (parameter);
13456 
13457       /* For an undeclared variable passed as parameter we will get
13458 	 an ERROR_MARK node here.  */
13459       if (TREE_CODE (parameter) == ERROR_MARK)
13460 	return true;
13461 
13462       /* We assume that in the target function all parameters are
13463 	 named.  This only has an impact on vector argument register
13464 	 usage none of which is call-saved.  */
13465       function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13466       apply_pass_by_reference_rules (&cum_v, arg);
13467 
13468       parm_rtx = s390_function_arg (cum, arg);
13469 
13470       s390_function_arg_advance (cum, arg);
13471 
13472       if (!parm_rtx)
13473 	continue;
13474 
13475       if (REG_P (parm_rtx))
13476 	{
13477 	  int size = s390_function_arg_size (arg.mode, arg.type);
13478 	  int nregs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
13479 
13480 	  for (reg = 0; reg < nregs; reg++)
13481 	    if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13482 	      return true;
13483 	}
13484       else if (GET_CODE (parm_rtx) == PARALLEL)
13485 	{
13486 	  int i;
13487 
13488 	  for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13489 	    {
13490 	      rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13491 
13492 	      gcc_assert (REG_P (r));
13493 	      gcc_assert (REG_NREGS (r) == 1);
13494 
13495 	      if (!call_used_or_fixed_reg_p (REGNO (r)))
13496 		return true;
13497 	    }
13498 	}
13499     }
13500   return false;
13501 }
13502 
13503 /* Return true if the given call expression can be
13504    turned into a sibling call.
13505    DECL holds the declaration of the function to be called whereas
13506    EXP is the call expression itself.  */
13507 
13508 static bool
s390_function_ok_for_sibcall(tree decl,tree exp)13509 s390_function_ok_for_sibcall (tree decl, tree exp)
13510 {
13511   /* The TPF epilogue uses register 1.  */
13512   if (TARGET_TPF_PROFILING)
13513     return false;
13514 
13515   /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13516      which would have to be restored before the sibcall.  */
13517   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13518     return false;
13519 
13520   /* The thunks for indirect branches require r1 if no exrl is
13521      available.  r1 might not be available when doing a sibling
13522      call.  */
13523   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13524       && !TARGET_CPU_Z10
13525       && !decl)
13526     return false;
13527 
13528   /* Register 6 on s390 is available as an argument register but unfortunately
13529      "caller saved". This makes functions needing this register for arguments
13530      not suitable for sibcalls.  */
13531   return !s390_call_saved_register_used (exp);
13532 }
13533 
13534 /* Return the fixed registers used for condition codes.  */
13535 
13536 static bool
s390_fixed_condition_code_regs(unsigned int * p1,unsigned int * p2)13537 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13538 {
13539   *p1 = CC_REGNUM;
13540   *p2 = INVALID_REGNUM;
13541 
13542   return true;
13543 }
13544 
13545 /* This function is used by the call expanders of the machine description.
13546    It emits the call insn itself together with the necessary operations
13547    to adjust the target address and returns the emitted insn.
13548    ADDR_LOCATION is the target address rtx
13549    TLS_CALL the location of the thread-local symbol
13550    RESULT_REG the register where the result of the call should be stored
13551    RETADDR_REG the register where the return address should be stored
13552 	       If this parameter is NULL_RTX the call is considered
13553 	       to be a sibling call.  */
13554 
13555 rtx_insn *
s390_emit_call(rtx addr_location,rtx tls_call,rtx result_reg,rtx retaddr_reg)13556 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13557 		rtx retaddr_reg)
13558 {
13559   bool plt_call = false;
13560   rtx_insn *insn;
13561   rtx vec[4] = { NULL_RTX };
13562   int elts = 0;
13563   rtx *call = &vec[0];
13564   rtx *clobber_ret_reg = &vec[1];
13565   rtx *use = &vec[2];
13566   rtx *clobber_thunk_reg = &vec[3];
13567   int i;
13568 
13569   /* Direct function calls need special treatment.  */
13570   if (GET_CODE (addr_location) == SYMBOL_REF)
13571     {
13572       /* When calling a global routine in PIC mode, we must
13573 	 replace the symbol itself with the PLT stub.  */
13574       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13575 	{
13576 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13577 	    {
13578 	      addr_location = gen_rtx_UNSPEC (Pmode,
13579 					      gen_rtvec (1, addr_location),
13580 					      UNSPEC_PLT);
13581 	      addr_location = gen_rtx_CONST (Pmode, addr_location);
13582 	      plt_call = true;
13583 	    }
13584 	  else
13585 	    /* For -fpic code the PLT entries might use r12 which is
13586 	       call-saved.  Therefore we cannot do a sibcall when
13587 	       calling directly using a symbol ref.  When reaching
13588 	       this point we decided (in s390_function_ok_for_sibcall)
13589 	       to do a sibcall for a function pointer but one of the
13590 	       optimizers was able to get rid of the function pointer
13591 	       by propagating the symbol ref into the call.  This
13592 	       optimization is illegal for S/390 so we turn the direct
13593 	       call into a indirect call again.  */
13594 	    addr_location = force_reg (Pmode, addr_location);
13595 	}
13596     }
13597 
13598   /* If it is already an indirect call or the code above moved the
13599      SYMBOL_REF to somewhere else make sure the address can be found in
13600      register 1.  */
13601   if (retaddr_reg == NULL_RTX
13602       && GET_CODE (addr_location) != SYMBOL_REF
13603       && !plt_call)
13604     {
13605       emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13606       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13607     }
13608 
13609   if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13610       && GET_CODE (addr_location) != SYMBOL_REF
13611       && !plt_call)
13612     {
13613       /* Indirect branch thunks require the target to be a single GPR.  */
13614       addr_location = force_reg (Pmode, addr_location);
13615 
13616       /* Without exrl the indirect branch thunks need an additional
13617 	 register for larl;ex */
13618       if (!TARGET_CPU_Z10)
13619 	{
13620 	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13621 	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13622 	}
13623     }
13624 
13625   addr_location = gen_rtx_MEM (QImode, addr_location);
13626   *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13627 
13628   if (result_reg != NULL_RTX)
13629     *call = gen_rtx_SET (result_reg, *call);
13630 
13631   if (retaddr_reg != NULL_RTX)
13632     {
13633       *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13634 
13635       if (tls_call != NULL_RTX)
13636 	*use = gen_rtx_USE (VOIDmode, tls_call);
13637     }
13638 
13639 
13640   for (i = 0; i < 4; i++)
13641     if (vec[i] != NULL_RTX)
13642       elts++;
13643 
13644   if (elts > 1)
13645     {
13646       rtvec v;
13647       int e = 0;
13648 
13649       v = rtvec_alloc (elts);
13650       for (i = 0; i < 4; i++)
13651 	if (vec[i] != NULL_RTX)
13652 	  {
13653 	    RTVEC_ELT (v, e) = vec[i];
13654 	    e++;
13655 	  }
13656 
13657       *call = gen_rtx_PARALLEL (VOIDmode, v);
13658     }
13659 
13660   insn = emit_call_insn (*call);
13661 
13662   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
13663   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13664     {
13665       /* s390_function_ok_for_sibcall should
13666 	 have denied sibcalls in this case.  */
13667       gcc_assert (retaddr_reg != NULL_RTX);
13668       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13669     }
13670   return insn;
13671 }
13672 
13673 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
13674 
13675 static void
s390_conditional_register_usage(void)13676 s390_conditional_register_usage (void)
13677 {
13678   int i;
13679 
13680   if (flag_pic)
13681     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13682   fixed_regs[BASE_REGNUM] = 0;
13683   fixed_regs[RETURN_REGNUM] = 0;
13684   if (TARGET_64BIT)
13685     {
13686       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13687 	call_used_regs[i] = 0;
13688     }
13689   else
13690     {
13691       call_used_regs[FPR4_REGNUM] = 0;
13692       call_used_regs[FPR6_REGNUM] = 0;
13693     }
13694 
13695   if (TARGET_SOFT_FLOAT)
13696     {
13697       for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13698 	fixed_regs[i] = 1;
13699     }
13700 
13701   /* Disable v16 - v31 for non-vector target.  */
13702   if (!TARGET_VX)
13703     {
13704       for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13705 	fixed_regs[i] = call_used_regs[i] = 1;
13706     }
13707 }
13708 
13709 /* Corresponding function to eh_return expander.  */
13710 
13711 static GTY(()) rtx s390_tpf_eh_return_symbol;
13712 void
s390_emit_tpf_eh_return(rtx target)13713 s390_emit_tpf_eh_return (rtx target)
13714 {
13715   rtx_insn *insn;
13716   rtx reg, orig_ra;
13717 
13718   if (!s390_tpf_eh_return_symbol)
13719     s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13720 
13721   reg = gen_rtx_REG (Pmode, 2);
13722   orig_ra = gen_rtx_REG (Pmode, 3);
13723 
13724   emit_move_insn (reg, target);
13725   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13726   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13727 				     gen_rtx_REG (Pmode, RETURN_REGNUM));
13728   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13729   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13730 
13731   emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13732 }
13733 
13734 /* Rework the prologue/epilogue to avoid saving/restoring
13735    registers unnecessarily.  */
13736 
13737 static void
s390_optimize_prologue(void)13738 s390_optimize_prologue (void)
13739 {
13740   rtx_insn *insn, *new_insn, *next_insn;
13741 
13742   /* Do a final recompute of the frame-related data.  */
13743   s390_optimize_register_info ();
13744 
13745   /* If all special registers are in fact used, there's nothing we
13746      can do, so no point in walking the insn list.  */
13747 
13748   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13749       && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
13750     return;
13751 
13752   /* Search for prologue/epilogue insns and replace them.  */
13753   for (insn = get_insns (); insn; insn = next_insn)
13754     {
13755       int first, last, off;
13756       rtx set, base, offset;
13757       rtx pat;
13758 
13759       next_insn = NEXT_INSN (insn);
13760 
13761       if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13762 	continue;
13763 
13764       pat = PATTERN (insn);
13765 
13766       /* Remove ldgr/lgdr instructions used for saving and restore
13767 	 GPRs if possible.  */
13768       if (TARGET_Z10)
13769 	{
13770 	  rtx tmp_pat = pat;
13771 
13772 	  if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13773 	    tmp_pat = XVECEXP (pat, 0, 0);
13774 
13775 	  if (GET_CODE (tmp_pat) == SET
13776 	      && GET_MODE (SET_SRC (tmp_pat)) == DImode
13777 	      && REG_P (SET_SRC (tmp_pat))
13778 	      && REG_P (SET_DEST (tmp_pat)))
13779 	    {
13780 	      int src_regno = REGNO (SET_SRC (tmp_pat));
13781 	      int dest_regno = REGNO (SET_DEST (tmp_pat));
13782 	      int gpr_regno;
13783 	      int fpr_regno;
13784 
13785 	      if (!((GENERAL_REGNO_P (src_regno)
13786 		     && FP_REGNO_P (dest_regno))
13787 		    || (FP_REGNO_P (src_regno)
13788 			&& GENERAL_REGNO_P (dest_regno))))
13789 		continue;
13790 
13791 	      gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13792 	      fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13793 
13794 	      /* GPR must be call-saved, FPR must be call-clobbered.  */
13795 	      if (!call_used_regs[fpr_regno]
13796 		  || call_used_regs[gpr_regno])
13797 		continue;
13798 
13799 	      /* It must not happen that what we once saved in an FPR now
13800 		 needs a stack slot.  */
13801 	      gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13802 
13803 	      if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13804 		{
13805 		  remove_insn (insn);
13806 		  continue;
13807 		}
13808 	    }
13809 	}
13810 
13811       if (GET_CODE (pat) == PARALLEL
13812 	  && store_multiple_operation (pat, VOIDmode))
13813 	{
13814 	  set = XVECEXP (pat, 0, 0);
13815 	  first = REGNO (SET_SRC (set));
13816 	  last = first + XVECLEN (pat, 0) - 1;
13817 	  offset = const0_rtx;
13818 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13819 	  off = INTVAL (offset);
13820 
13821 	  if (GET_CODE (base) != REG || off < 0)
13822 	    continue;
13823 	  if (cfun_frame_layout.first_save_gpr != -1
13824 	      && (cfun_frame_layout.first_save_gpr < first
13825 		  || cfun_frame_layout.last_save_gpr > last))
13826 	    continue;
13827 	  if (REGNO (base) != STACK_POINTER_REGNUM
13828 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13829 	    continue;
13830 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13831 	    continue;
13832 
13833 	  if (cfun_frame_layout.first_save_gpr != -1)
13834 	    {
13835 	      rtx s_pat = save_gprs (base,
13836 				     off + (cfun_frame_layout.first_save_gpr
13837 					    - first) * UNITS_PER_LONG,
13838 				     cfun_frame_layout.first_save_gpr,
13839 				     cfun_frame_layout.last_save_gpr);
13840 	      new_insn = emit_insn_before (s_pat, insn);
13841 	      INSN_ADDRESSES_NEW (new_insn, -1);
13842 	    }
13843 
13844 	  remove_insn (insn);
13845 	  continue;
13846 	}
13847 
13848       if (cfun_frame_layout.first_save_gpr == -1
13849 	  && GET_CODE (pat) == SET
13850 	  && GENERAL_REG_P (SET_SRC (pat))
13851 	  && GET_CODE (SET_DEST (pat)) == MEM)
13852 	{
13853 	  set = pat;
13854 	  first = REGNO (SET_SRC (set));
13855 	  offset = const0_rtx;
13856 	  base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13857 	  off = INTVAL (offset);
13858 
13859 	  if (GET_CODE (base) != REG || off < 0)
13860 	    continue;
13861 	  if (REGNO (base) != STACK_POINTER_REGNUM
13862 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13863 	    continue;
13864 
13865 	  remove_insn (insn);
13866 	  continue;
13867 	}
13868 
13869       if (GET_CODE (pat) == PARALLEL
13870 	  && load_multiple_operation (pat, VOIDmode))
13871 	{
13872 	  set = XVECEXP (pat, 0, 0);
13873 	  first = REGNO (SET_DEST (set));
13874 	  last = first + XVECLEN (pat, 0) - 1;
13875 	  offset = const0_rtx;
13876 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13877 	  off = INTVAL (offset);
13878 
13879 	  if (GET_CODE (base) != REG || off < 0)
13880 	    continue;
13881 
13882 	  if (cfun_frame_layout.first_restore_gpr != -1
13883 	      && (cfun_frame_layout.first_restore_gpr < first
13884 		  || cfun_frame_layout.last_restore_gpr > last))
13885 	    continue;
13886 	  if (REGNO (base) != STACK_POINTER_REGNUM
13887 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13888 	    continue;
13889 	  if (first > BASE_REGNUM || last < BASE_REGNUM)
13890 	    continue;
13891 
13892 	  if (cfun_frame_layout.first_restore_gpr != -1)
13893 	    {
13894 	      rtx rpat = restore_gprs (base,
13895 				       off + (cfun_frame_layout.first_restore_gpr
13896 					      - first) * UNITS_PER_LONG,
13897 				       cfun_frame_layout.first_restore_gpr,
13898 				       cfun_frame_layout.last_restore_gpr);
13899 
13900 	      /* Remove REG_CFA_RESTOREs for registers that we no
13901 		 longer need to save.  */
13902 	      REG_NOTES (rpat) = REG_NOTES (insn);
13903 	      for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13904 		if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13905 		    && ((int) REGNO (XEXP (*ptr, 0))
13906 			< cfun_frame_layout.first_restore_gpr))
13907 		  *ptr = XEXP (*ptr, 1);
13908 		else
13909 		  ptr = &XEXP (*ptr, 1);
13910 	      new_insn = emit_insn_before (rpat, insn);
13911 	      RTX_FRAME_RELATED_P (new_insn) = 1;
13912 	      INSN_ADDRESSES_NEW (new_insn, -1);
13913 	    }
13914 
13915 	  remove_insn (insn);
13916 	  continue;
13917 	}
13918 
13919       if (cfun_frame_layout.first_restore_gpr == -1
13920 	  && GET_CODE (pat) == SET
13921 	  && GENERAL_REG_P (SET_DEST (pat))
13922 	  && GET_CODE (SET_SRC (pat)) == MEM)
13923 	{
13924 	  set = pat;
13925 	  first = REGNO (SET_DEST (set));
13926 	  offset = const0_rtx;
13927 	  base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13928 	  off = INTVAL (offset);
13929 
13930 	  if (GET_CODE (base) != REG || off < 0)
13931 	    continue;
13932 
13933 	  if (REGNO (base) != STACK_POINTER_REGNUM
13934 	      && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13935 	    continue;
13936 
13937 	  remove_insn (insn);
13938 	  continue;
13939 	}
13940     }
13941 }
13942 
13943 /* On z10 and later the dynamic branch prediction must see the
13944    backward jump within a certain windows.  If not it falls back to
13945    the static prediction.  This function rearranges the loop backward
13946    branch in a way which makes the static prediction always correct.
13947    The function returns true if it added an instruction.  */
13948 static bool
s390_fix_long_loop_prediction(rtx_insn * insn)13949 s390_fix_long_loop_prediction (rtx_insn *insn)
13950 {
13951   rtx set = single_set (insn);
13952   rtx code_label, label_ref;
13953   rtx_insn *uncond_jump;
13954   rtx_insn *cur_insn;
13955   rtx tmp;
13956   int distance;
13957 
13958   /* This will exclude branch on count and branch on index patterns
13959      since these are correctly statically predicted.  */
13960   if (!set
13961       || SET_DEST (set) != pc_rtx
13962       || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13963     return false;
13964 
13965   /* Skip conditional returns.  */
13966   if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13967       && XEXP (SET_SRC (set), 2) == pc_rtx)
13968     return false;
13969 
13970   label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13971 	       XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13972 
13973   gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13974 
13975   code_label = XEXP (label_ref, 0);
13976 
13977   if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13978       || INSN_ADDRESSES (INSN_UID (insn)) == -1
13979       || (INSN_ADDRESSES (INSN_UID (insn))
13980 	  - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13981     return false;
13982 
13983   for (distance = 0, cur_insn = PREV_INSN (insn);
13984        distance < PREDICT_DISTANCE - 6;
13985        distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13986     if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13987       return false;
13988 
13989   rtx_code_label *new_label = gen_label_rtx ();
13990   uncond_jump = emit_jump_insn_after (
13991 		  gen_rtx_SET (pc_rtx,
13992 			       gen_rtx_LABEL_REF (VOIDmode, code_label)),
13993 		  insn);
13994   emit_label_after (new_label, uncond_jump);
13995 
13996   tmp = XEXP (SET_SRC (set), 1);
13997   XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13998   XEXP (SET_SRC (set), 2) = tmp;
13999   INSN_CODE (insn) = -1;
14000 
14001   XEXP (label_ref, 0) = new_label;
14002   JUMP_LABEL (insn) = new_label;
14003   JUMP_LABEL (uncond_jump) = code_label;
14004 
14005   return true;
14006 }
14007 
14008 /* Returns 1 if INSN reads the value of REG for purposes not related
14009    to addressing of memory, and 0 otherwise.  */
14010 static int
s390_non_addr_reg_read_p(rtx reg,rtx_insn * insn)14011 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14012 {
14013   return reg_referenced_p (reg, PATTERN (insn))
14014     && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14015 }
14016 
14017 /* Starting from INSN find_cond_jump looks downwards in the insn
14018    stream for a single jump insn which is the last user of the
14019    condition code set in INSN.  */
14020 static rtx_insn *
find_cond_jump(rtx_insn * insn)14021 find_cond_jump (rtx_insn *insn)
14022 {
14023   for (; insn; insn = NEXT_INSN (insn))
14024     {
14025       rtx ite, cc;
14026 
14027       if (LABEL_P (insn))
14028 	break;
14029 
14030       if (!JUMP_P (insn))
14031 	{
14032 	  if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14033 	    break;
14034 	  continue;
14035 	}
14036 
14037       /* This will be triggered by a return.  */
14038       if (GET_CODE (PATTERN (insn)) != SET)
14039 	break;
14040 
14041       gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14042       ite = SET_SRC (PATTERN (insn));
14043 
14044       if (GET_CODE (ite) != IF_THEN_ELSE)
14045 	break;
14046 
14047       cc = XEXP (XEXP (ite, 0), 0);
14048       if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14049 	break;
14050 
14051       if (find_reg_note (insn, REG_DEAD, cc))
14052 	return insn;
14053       break;
14054     }
14055 
14056   return NULL;
14057 }
14058 
14059 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14060    the semantics does not change.  If NULL_RTX is passed as COND the
14061    function tries to find the conditional jump starting with INSN.  */
14062 static void
s390_swap_cmp(rtx cond,rtx * op0,rtx * op1,rtx_insn * insn)14063 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14064 {
14065   rtx tmp = *op0;
14066 
14067   if (cond == NULL_RTX)
14068     {
14069       rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14070       rtx set = jump ? single_set (jump) : NULL_RTX;
14071 
14072       if (set == NULL_RTX)
14073 	return;
14074 
14075       cond = XEXP (SET_SRC (set), 0);
14076     }
14077 
14078   *op0 = *op1;
14079   *op1 = tmp;
14080   PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14081 }
14082 
14083 /* On z10, instructions of the compare-and-branch family have the
14084    property to access the register occurring as second operand with
14085    its bits complemented.  If such a compare is grouped with a second
14086    instruction that accesses the same register non-complemented, and
14087    if that register's value is delivered via a bypass, then the
14088    pipeline recycles, thereby causing significant performance decline.
14089    This function locates such situations and exchanges the two
14090    operands of the compare.  The function return true whenever it
14091    added an insn.  */
14092 static bool
s390_z10_optimize_cmp(rtx_insn * insn)14093 s390_z10_optimize_cmp (rtx_insn *insn)
14094 {
14095   rtx_insn *prev_insn, *next_insn;
14096   bool insn_added_p = false;
14097   rtx cond, *op0, *op1;
14098 
14099   if (GET_CODE (PATTERN (insn)) == PARALLEL)
14100     {
14101       /* Handle compare and branch and branch on count
14102 	 instructions.  */
14103       rtx pattern = single_set (insn);
14104 
14105       if (!pattern
14106 	  || SET_DEST (pattern) != pc_rtx
14107 	  || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14108 	return false;
14109 
14110       cond = XEXP (SET_SRC (pattern), 0);
14111       op0 = &XEXP (cond, 0);
14112       op1 = &XEXP (cond, 1);
14113     }
14114   else if (GET_CODE (PATTERN (insn)) == SET)
14115     {
14116       rtx src, dest;
14117 
14118       /* Handle normal compare instructions.  */
14119       src = SET_SRC (PATTERN (insn));
14120       dest = SET_DEST (PATTERN (insn));
14121 
14122       if (!REG_P (dest)
14123 	  || !CC_REGNO_P (REGNO (dest))
14124 	  || GET_CODE (src) != COMPARE)
14125 	return false;
14126 
14127       /* s390_swap_cmp will try to find the conditional
14128 	 jump when passing NULL_RTX as condition.  */
14129       cond = NULL_RTX;
14130       op0 = &XEXP (src, 0);
14131       op1 = &XEXP (src, 1);
14132     }
14133   else
14134     return false;
14135 
14136   if (!REG_P (*op0) || !REG_P (*op1))
14137     return false;
14138 
14139   if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14140     return false;
14141 
14142   /* Swap the COMPARE arguments and its mask if there is a
14143      conflicting access in the previous insn.  */
14144   prev_insn = prev_active_insn (insn);
14145   if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14146       && reg_referenced_p (*op1, PATTERN (prev_insn)))
14147     s390_swap_cmp (cond, op0, op1, insn);
14148 
14149   /* Check if there is a conflict with the next insn. If there
14150      was no conflict with the previous insn, then swap the
14151      COMPARE arguments and its mask.  If we already swapped
14152      the operands, or if swapping them would cause a conflict
14153      with the previous insn, issue a NOP after the COMPARE in
14154      order to separate the two instuctions.  */
14155   next_insn = next_active_insn (insn);
14156   if (next_insn != NULL_RTX && INSN_P (next_insn)
14157       && s390_non_addr_reg_read_p (*op1, next_insn))
14158     {
14159       if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14160 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
14161 	{
14162 	  if (REGNO (*op1) == 0)
14163 	    emit_insn_after (gen_nop_lr1 (), insn);
14164 	  else
14165 	    emit_insn_after (gen_nop_lr0 (), insn);
14166 	  insn_added_p = true;
14167 	}
14168       else
14169 	s390_swap_cmp (cond, op0, op1, insn);
14170     }
14171   return insn_added_p;
14172 }
14173 
14174 /* Number of INSNs to be scanned backward in the last BB of the loop
14175    and forward in the first BB of the loop.  This usually should be a
14176    bit more than the number of INSNs which could go into one
14177    group.  */
14178 #define S390_OSC_SCAN_INSN_NUM 5
14179 
14180 /* Scan LOOP for static OSC collisions and return true if a osc_break
14181    should be issued for this loop.  */
14182 static bool
s390_adjust_loop_scan_osc(struct loop * loop)14183 s390_adjust_loop_scan_osc (struct loop* loop)
14184 
14185 {
14186   HARD_REG_SET modregs, newregs;
14187   rtx_insn *insn, *store_insn = NULL;
14188   rtx set;
14189   struct s390_address addr_store, addr_load;
14190   subrtx_iterator::array_type array;
14191   int insn_count;
14192 
14193   CLEAR_HARD_REG_SET (modregs);
14194 
14195   insn_count = 0;
14196   FOR_BB_INSNS_REVERSE (loop->latch, insn)
14197     {
14198       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14199 	continue;
14200 
14201       insn_count++;
14202       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14203 	return false;
14204 
14205       find_all_hard_reg_sets (insn, &newregs, true);
14206       modregs |= newregs;
14207 
14208       set = single_set (insn);
14209       if (!set)
14210 	continue;
14211 
14212       if (MEM_P (SET_DEST (set))
14213 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14214 	{
14215 	  store_insn = insn;
14216 	  break;
14217 	}
14218     }
14219 
14220   if (store_insn == NULL_RTX)
14221     return false;
14222 
14223   insn_count = 0;
14224   FOR_BB_INSNS (loop->header, insn)
14225     {
14226       if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14227 	continue;
14228 
14229       if (insn == store_insn)
14230 	return false;
14231 
14232       insn_count++;
14233       if (insn_count > S390_OSC_SCAN_INSN_NUM)
14234 	return false;
14235 
14236       find_all_hard_reg_sets (insn, &newregs, true);
14237       modregs |= newregs;
14238 
14239       set = single_set (insn);
14240       if (!set)
14241 	continue;
14242 
14243       /* An intermediate store disrupts static OSC checking
14244 	 anyway.  */
14245       if (MEM_P (SET_DEST (set))
14246 	  && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14247 	return false;
14248 
14249       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14250 	if (MEM_P (*iter)
14251 	    && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14252 	    && rtx_equal_p (addr_load.base, addr_store.base)
14253 	    && rtx_equal_p (addr_load.indx, addr_store.indx)
14254 	    && rtx_equal_p (addr_load.disp, addr_store.disp))
14255 	  {
14256 	    if ((addr_load.base != NULL_RTX
14257 		 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14258 		|| (addr_load.indx != NULL_RTX
14259 		    && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14260 	      return true;
14261 	  }
14262     }
14263   return false;
14264 }
14265 
14266 /* Look for adjustments which can be done on simple innermost
14267    loops.  */
14268 static void
s390_adjust_loops()14269 s390_adjust_loops ()
14270 {
14271   struct loop *loop = NULL;
14272 
14273   df_analyze ();
14274   compute_bb_for_insn ();
14275 
14276   /* Find the loops.  */
14277   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14278 
14279   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14280     {
14281       if (dump_file)
14282 	{
14283 	  flow_loop_dump (loop, dump_file, NULL, 0);
14284 	  fprintf (dump_file, ";;  OSC loop scan Loop: ");
14285 	}
14286       if (loop->latch == NULL
14287 	  || pc_set (BB_END (loop->latch)) == NULL_RTX
14288 	  || !s390_adjust_loop_scan_osc (loop))
14289 	{
14290 	  if (dump_file)
14291 	    {
14292 	      if (loop->latch == NULL)
14293 		fprintf (dump_file, " muliple backward jumps\n");
14294 	      else
14295 		{
14296 		  fprintf (dump_file, " header insn: %d latch insn: %d ",
14297 			   INSN_UID (BB_HEAD (loop->header)),
14298 			   INSN_UID (BB_END (loop->latch)));
14299 		  if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14300 		    fprintf (dump_file, " loop does not end with jump\n");
14301 		  else
14302 		    fprintf (dump_file, " not instrumented\n");
14303 		}
14304 	    }
14305 	}
14306       else
14307 	{
14308 	  rtx_insn *new_insn;
14309 
14310 	  if (dump_file)
14311 	    fprintf (dump_file, " adding OSC break insn: ");
14312 	  new_insn = emit_insn_before (gen_osc_break (),
14313 				       BB_END (loop->latch));
14314 	  INSN_ADDRESSES_NEW (new_insn, -1);
14315 	}
14316     }
14317 
14318   loop_optimizer_finalize ();
14319 
14320   df_finish_pass (false);
14321 }
14322 
14323 /* Perform machine-dependent processing.  */
14324 
14325 static void
s390_reorg(void)14326 s390_reorg (void)
14327 {
14328   struct constant_pool *pool;
14329   rtx_insn *insn;
14330   int hw_before, hw_after;
14331 
14332   if (s390_tune == PROCESSOR_2964_Z13)
14333     s390_adjust_loops ();
14334 
14335   /* Make sure all splits have been performed; splits after
14336      machine_dependent_reorg might confuse insn length counts.  */
14337   split_all_insns_noflow ();
14338 
14339   /* Install the main literal pool and the associated base
14340      register load insns.  The literal pool might be > 4096 bytes in
14341      size, so that some of its elements cannot be directly accessed.
14342 
14343      To fix this, we split the single literal pool into multiple
14344      pool chunks, reloading the pool base register at various
14345      points throughout the function to ensure it always points to
14346      the pool chunk the following code expects.  */
14347 
14348   /* Collect the literal pool.  */
14349   pool = s390_mainpool_start ();
14350   if (pool)
14351     {
14352       /* Finish up literal pool related changes.  */
14353       s390_mainpool_finish (pool);
14354     }
14355   else
14356     {
14357       /* If literal pool overflowed, chunkify it.  */
14358       pool = s390_chunkify_start ();
14359       s390_chunkify_finish (pool);
14360     }
14361 
14362   /* Generate out-of-pool execute target insns.  */
14363   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14364     {
14365       rtx label;
14366       rtx_insn *target;
14367 
14368       label = s390_execute_label (insn);
14369       if (!label)
14370 	continue;
14371 
14372       gcc_assert (label != const0_rtx);
14373 
14374       target = emit_label (XEXP (label, 0));
14375       INSN_ADDRESSES_NEW (target, -1);
14376 
14377       if (JUMP_P (insn))
14378 	{
14379 	  target = emit_jump_insn (s390_execute_target (insn));
14380 	  /* This is important in order to keep a table jump
14381 	     pointing at the jump table label.  Only this makes it
14382 	     being recognized as table jump.  */
14383 	  JUMP_LABEL (target) = JUMP_LABEL (insn);
14384 	}
14385       else
14386 	target = emit_insn (s390_execute_target (insn));
14387       INSN_ADDRESSES_NEW (target, -1);
14388     }
14389 
14390   /* Try to optimize prologue and epilogue further.  */
14391   s390_optimize_prologue ();
14392 
14393   /* Walk over the insns and do some >=z10 specific changes.  */
14394   if (s390_tune >= PROCESSOR_2097_Z10)
14395     {
14396       rtx_insn *insn;
14397       bool insn_added_p = false;
14398 
14399       /* The insn lengths and addresses have to be up to date for the
14400 	 following manipulations.  */
14401       shorten_branches (get_insns ());
14402 
14403       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14404 	{
14405 	  if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14406 	    continue;
14407 
14408 	  if (JUMP_P (insn))
14409 	    insn_added_p |= s390_fix_long_loop_prediction (insn);
14410 
14411 	  if ((GET_CODE (PATTERN (insn)) == PARALLEL
14412 	       || GET_CODE (PATTERN (insn)) == SET)
14413 	      && s390_tune == PROCESSOR_2097_Z10)
14414 	    insn_added_p |= s390_z10_optimize_cmp (insn);
14415 	}
14416 
14417       /* Adjust branches if we added new instructions.  */
14418       if (insn_added_p)
14419 	shorten_branches (get_insns ());
14420     }
14421 
14422   s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14423   if (hw_after > 0)
14424     {
14425       rtx_insn *insn;
14426 
14427       /* Insert NOPs for hotpatching. */
14428       for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14429 	/* Emit NOPs
14430 	    1. inside the area covered by debug information to allow setting
14431 	       breakpoints at the NOPs,
14432 	    2. before any insn which results in an asm instruction,
14433 	    3. before in-function labels to avoid jumping to the NOPs, for
14434 	       example as part of a loop,
14435 	    4. before any barrier in case the function is completely empty
14436 	       (__builtin_unreachable ()) and has neither internal labels nor
14437 	       active insns.
14438 	*/
14439 	if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14440 	  break;
14441       /* Output a series of NOPs before the first active insn.  */
14442       while (insn && hw_after > 0)
14443 	{
14444 	  if (hw_after >= 3)
14445 	    {
14446 	      emit_insn_before (gen_nop_6_byte (), insn);
14447 	      hw_after -= 3;
14448 	    }
14449 	  else if (hw_after >= 2)
14450 	    {
14451 	      emit_insn_before (gen_nop_4_byte (), insn);
14452 	      hw_after -= 2;
14453 	    }
14454 	  else
14455 	    {
14456 	      emit_insn_before (gen_nop_2_byte (), insn);
14457 	      hw_after -= 1;
14458 	    }
14459 	}
14460     }
14461 }
14462 
14463 /* Return true if INSN is a fp load insn writing register REGNO.  */
14464 static inline bool
s390_fpload_toreg(rtx_insn * insn,unsigned int regno)14465 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14466 {
14467   rtx set;
14468   enum attr_type flag = s390_safe_attr_type (insn);
14469 
14470   if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14471     return false;
14472 
14473   set = single_set (insn);
14474 
14475   if (set == NULL_RTX)
14476     return false;
14477 
14478   if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14479     return false;
14480 
14481   if (REGNO (SET_DEST (set)) != regno)
14482     return false;
14483 
14484   return true;
14485 }
14486 
14487 /* This value describes the distance to be avoided between an
14488    arithmetic fp instruction and an fp load writing the same register.
14489    Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14490    fine but the exact value has to be avoided. Otherwise the FP
14491    pipeline will throw an exception causing a major penalty.  */
14492 #define Z10_EARLYLOAD_DISTANCE 7
14493 
14494 /* Rearrange the ready list in order to avoid the situation described
14495    for Z10_EARLYLOAD_DISTANCE.  A problematic load instruction is
14496    moved to the very end of the ready list.  */
14497 static void
s390_z10_prevent_earlyload_conflicts(rtx_insn ** ready,int * nready_p)14498 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14499 {
14500   unsigned int regno;
14501   int nready = *nready_p;
14502   rtx_insn *tmp;
14503   int i;
14504   rtx_insn *insn;
14505   rtx set;
14506   enum attr_type flag;
14507   int distance;
14508 
14509   /* Skip DISTANCE - 1 active insns.  */
14510   for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14511        distance > 0 && insn != NULL_RTX;
14512        distance--, insn = prev_active_insn (insn))
14513     if (CALL_P (insn) || JUMP_P (insn))
14514       return;
14515 
14516   if (insn == NULL_RTX)
14517     return;
14518 
14519   set = single_set (insn);
14520 
14521   if (set == NULL_RTX || !REG_P (SET_DEST (set))
14522       || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14523     return;
14524 
14525   flag = s390_safe_attr_type (insn);
14526 
14527   if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14528     return;
14529 
14530   regno = REGNO (SET_DEST (set));
14531   i = nready - 1;
14532 
14533   while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14534     i--;
14535 
14536   if (!i)
14537     return;
14538 
14539   tmp = ready[i];
14540   memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14541   ready[0] = tmp;
14542 }
14543 
14544 /* Returns TRUE if BB is entered via a fallthru edge and all other
14545    incoming edges are less than likely.  */
14546 static bool
s390_bb_fallthru_entry_likely(basic_block bb)14547 s390_bb_fallthru_entry_likely (basic_block bb)
14548 {
14549   edge e, fallthru_edge;
14550   edge_iterator ei;
14551 
14552   if (!bb)
14553     return false;
14554 
14555   fallthru_edge = find_fallthru_edge (bb->preds);
14556   if (!fallthru_edge)
14557     return false;
14558 
14559   FOR_EACH_EDGE (e, ei, bb->preds)
14560     if (e != fallthru_edge
14561 	&& e->probability >= profile_probability::likely ())
14562       return false;
14563 
14564   return true;
14565 }
14566 
14567 struct s390_sched_state
14568 {
14569   /* Number of insns in the group.  */
14570   int group_state;
14571   /* Execution side of the group.  */
14572   int side;
14573   /* Group can only hold two insns.  */
14574   bool group_of_two;
14575 } s390_sched_state;
14576 
14577 static struct s390_sched_state sched_state = {0, 1, false};
14578 
14579 #define S390_SCHED_ATTR_MASK_CRACKED    0x1
14580 #define S390_SCHED_ATTR_MASK_EXPANDED   0x2
14581 #define S390_SCHED_ATTR_MASK_ENDGROUP   0x4
14582 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14583 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14584 
14585 static unsigned int
s390_get_sched_attrmask(rtx_insn * insn)14586 s390_get_sched_attrmask (rtx_insn *insn)
14587 {
14588   unsigned int mask = 0;
14589 
14590   switch (s390_tune)
14591     {
14592     case PROCESSOR_2827_ZEC12:
14593       if (get_attr_zEC12_cracked (insn))
14594 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14595       if (get_attr_zEC12_expanded (insn))
14596 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14597       if (get_attr_zEC12_endgroup (insn))
14598 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14599       if (get_attr_zEC12_groupalone (insn))
14600 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14601       break;
14602     case PROCESSOR_2964_Z13:
14603       if (get_attr_z13_cracked (insn))
14604 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14605       if (get_attr_z13_expanded (insn))
14606 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14607       if (get_attr_z13_endgroup (insn))
14608 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14609       if (get_attr_z13_groupalone (insn))
14610 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14611       if (get_attr_z13_groupoftwo (insn))
14612 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14613       break;
14614     case PROCESSOR_3906_Z14:
14615       if (get_attr_z14_cracked (insn))
14616 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14617       if (get_attr_z14_expanded (insn))
14618 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14619       if (get_attr_z14_endgroup (insn))
14620 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14621       if (get_attr_z14_groupalone (insn))
14622 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14623       if (get_attr_z14_groupoftwo (insn))
14624 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14625       break;
14626     case PROCESSOR_8561_Z15:
14627       if (get_attr_z15_cracked (insn))
14628 	mask |= S390_SCHED_ATTR_MASK_CRACKED;
14629       if (get_attr_z15_expanded (insn))
14630 	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14631       if (get_attr_z15_endgroup (insn))
14632 	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14633       if (get_attr_z15_groupalone (insn))
14634 	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14635       if (get_attr_z15_groupoftwo (insn))
14636 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14637       break;
14638     default:
14639       gcc_unreachable ();
14640     }
14641   return mask;
14642 }
14643 
14644 static unsigned int
s390_get_unit_mask(rtx_insn * insn,int * units)14645 s390_get_unit_mask (rtx_insn *insn, int *units)
14646 {
14647   unsigned int mask = 0;
14648 
14649   switch (s390_tune)
14650     {
14651     case PROCESSOR_2964_Z13:
14652       *units = 4;
14653       if (get_attr_z13_unit_lsu (insn))
14654 	mask |= 1 << 0;
14655       if (get_attr_z13_unit_fxa (insn))
14656 	mask |= 1 << 1;
14657       if (get_attr_z13_unit_fxb (insn))
14658 	mask |= 1 << 2;
14659       if (get_attr_z13_unit_vfu (insn))
14660 	mask |= 1 << 3;
14661       break;
14662     case PROCESSOR_3906_Z14:
14663       *units = 4;
14664       if (get_attr_z14_unit_lsu (insn))
14665 	mask |= 1 << 0;
14666       if (get_attr_z14_unit_fxa (insn))
14667 	mask |= 1 << 1;
14668       if (get_attr_z14_unit_fxb (insn))
14669 	mask |= 1 << 2;
14670       if (get_attr_z14_unit_vfu (insn))
14671 	mask |= 1 << 3;
14672       break;
14673     case PROCESSOR_8561_Z15:
14674       *units = 4;
14675       if (get_attr_z15_unit_lsu (insn))
14676 	mask |= 1 << 0;
14677       if (get_attr_z15_unit_fxa (insn))
14678 	mask |= 1 << 1;
14679       if (get_attr_z15_unit_fxb (insn))
14680 	mask |= 1 << 2;
14681       if (get_attr_z15_unit_vfu (insn))
14682 	mask |= 1 << 3;
14683       break;
14684     default:
14685       gcc_unreachable ();
14686     }
14687   return mask;
14688 }
14689 
14690 static bool
s390_is_fpd(rtx_insn * insn)14691 s390_is_fpd (rtx_insn *insn)
14692 {
14693   if (insn == NULL_RTX)
14694     return false;
14695 
14696   return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14697     || get_attr_z15_unit_fpd (insn);
14698 }
14699 
14700 static bool
s390_is_fxd(rtx_insn * insn)14701 s390_is_fxd (rtx_insn *insn)
14702 {
14703   if (insn == NULL_RTX)
14704     return false;
14705 
14706   return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14707     || get_attr_z15_unit_fxd (insn);
14708 }
14709 
14710 /* Returns TRUE if INSN is a long-running instruction.  */
14711 static bool
s390_is_longrunning(rtx_insn * insn)14712 s390_is_longrunning (rtx_insn *insn)
14713 {
14714   if (insn == NULL_RTX)
14715     return false;
14716 
14717   return s390_is_fxd (insn) || s390_is_fpd (insn);
14718 }
14719 
14720 
14721 /* Return the scheduling score for INSN.  The higher the score the
14722    better.  The score is calculated from the OOO scheduling attributes
14723    of INSN and the scheduling state sched_state.  */
14724 static int
s390_sched_score(rtx_insn * insn)14725 s390_sched_score (rtx_insn *insn)
14726 {
14727   unsigned int mask = s390_get_sched_attrmask (insn);
14728   int score = 0;
14729 
14730   switch (sched_state.group_state)
14731     {
14732     case 0:
14733       /* Try to put insns into the first slot which would otherwise
14734 	 break a group.  */
14735       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14736 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14737 	score += 5;
14738       if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14739 	score += 10;
14740       break;
14741     case 1:
14742       /* Prefer not cracked insns while trying to put together a
14743 	 group.  */
14744       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14745 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14746 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14747 	score += 10;
14748       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14749 	score += 5;
14750       /* If we are in a group of two already, try to schedule another
14751 	 group-of-two insn to avoid shortening another group.  */
14752       if (sched_state.group_of_two
14753 	  && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14754 	score += 15;
14755       break;
14756     case 2:
14757       /* Prefer not cracked insns while trying to put together a
14758 	 group.  */
14759       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14760 	  && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14761 	  && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14762 	score += 10;
14763       /* Prefer endgroup insns in the last slot.  */
14764       if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14765 	score += 10;
14766       /* Try to avoid group-of-two insns in the last slot as they will
14767 	 shorten this group as well as the next one.  */
14768       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14769 	score = MAX (0, score - 15);
14770       break;
14771     }
14772 
14773   if (s390_tune >= PROCESSOR_2964_Z13)
14774     {
14775       int units, i;
14776       unsigned unit_mask, m = 1;
14777 
14778       unit_mask = s390_get_unit_mask (insn, &units);
14779       gcc_assert (units <= MAX_SCHED_UNITS);
14780 
14781       /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14782 	 ago the last insn of this unit type got scheduled.  This is
14783 	 supposed to help providing a proper instruction mix to the
14784 	 CPU.  */
14785       for (i = 0; i < units; i++, m <<= 1)
14786 	if (m & unit_mask)
14787 	  score += (last_scheduled_unit_distance[i][sched_state.side]
14788 	      * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
14789 
14790       int other_side = 1 - sched_state.side;
14791 
14792       /* Try to delay long-running insns when side is busy.  */
14793       if (s390_is_longrunning (insn))
14794 	{
14795 	  if (s390_is_fxd (insn))
14796 	    {
14797 	      if (fxd_longrunning[sched_state.side]
14798 		  && fxd_longrunning[other_side]
14799 		  <= fxd_longrunning[sched_state.side])
14800 		score = MAX (0, score - 10);
14801 
14802 	      else if (fxd_longrunning[other_side]
14803 		  >= fxd_longrunning[sched_state.side])
14804 		score += 10;
14805 	    }
14806 
14807 	  if (s390_is_fpd (insn))
14808 	    {
14809 	      if (fpd_longrunning[sched_state.side]
14810 		  && fpd_longrunning[other_side]
14811 		  <= fpd_longrunning[sched_state.side])
14812 		score = MAX (0, score - 10);
14813 
14814 	      else if (fpd_longrunning[other_side]
14815 		  >= fpd_longrunning[sched_state.side])
14816 		score += 10;
14817 	    }
14818 	}
14819     }
14820 
14821   return score;
14822 }
14823 
14824 /* This function is called via hook TARGET_SCHED_REORDER before
14825    issuing one insn from list READY which contains *NREADYP entries.
14826    For target z10 it reorders load instructions to avoid early load
14827    conflicts in the floating point pipeline  */
14828 static int
s390_sched_reorder(FILE * file,int verbose,rtx_insn ** ready,int * nreadyp,int clock ATTRIBUTE_UNUSED)14829 s390_sched_reorder (FILE *file, int verbose,
14830 		    rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14831 {
14832   if (s390_tune == PROCESSOR_2097_Z10
14833       && reload_completed
14834       && *nreadyp > 1)
14835     s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14836 
14837   if (s390_tune >= PROCESSOR_2827_ZEC12
14838       && reload_completed
14839       && *nreadyp > 1)
14840     {
14841       int i;
14842       int last_index = *nreadyp - 1;
14843       int max_index = -1;
14844       int max_score = -1;
14845       rtx_insn *tmp;
14846 
14847       /* Just move the insn with the highest score to the top (the
14848 	 end) of the list.  A full sort is not needed since a conflict
14849 	 in the hazard recognition cannot happen.  So the top insn in
14850 	 the ready list will always be taken.  */
14851       for (i = last_index; i >= 0; i--)
14852 	{
14853 	  int score;
14854 
14855 	  if (recog_memoized (ready[i]) < 0)
14856 	    continue;
14857 
14858 	  score = s390_sched_score (ready[i]);
14859 	  if (score > max_score)
14860 	    {
14861 	      max_score = score;
14862 	      max_index = i;
14863 	    }
14864 	}
14865 
14866       if (max_index != -1)
14867 	{
14868 	  if (max_index != last_index)
14869 	    {
14870 	      tmp = ready[max_index];
14871 	      ready[max_index] = ready[last_index];
14872 	      ready[last_index] = tmp;
14873 
14874 	      if (verbose > 5)
14875 		fprintf (file,
14876 			 ";;\t\tBACKEND: move insn %d to the top of list\n",
14877 			 INSN_UID (ready[last_index]));
14878 	    }
14879 	  else if (verbose > 5)
14880 	    fprintf (file,
14881 		     ";;\t\tBACKEND: best insn %d already on top\n",
14882 		     INSN_UID (ready[last_index]));
14883 	}
14884 
14885       if (verbose > 5)
14886 	{
14887 	  fprintf (file, "ready list ooo attributes - sched state: %d\n",
14888 		   sched_state.group_state);
14889 
14890 	  for (i = last_index; i >= 0; i--)
14891 	    {
14892 	      unsigned int sched_mask;
14893 	      rtx_insn *insn = ready[i];
14894 
14895 	      if (recog_memoized (insn) < 0)
14896 		continue;
14897 
14898 	      sched_mask = s390_get_sched_attrmask (insn);
14899 	      fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14900 		       INSN_UID (insn),
14901 		       s390_sched_score (insn));
14902 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14903 					   ((M) & sched_mask) ? #ATTR : "");
14904 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14905 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14906 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14907 	      PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14908 #undef PRINT_SCHED_ATTR
14909 	      if (s390_tune >= PROCESSOR_2964_Z13)
14910 		{
14911 		  unsigned int unit_mask, m = 1;
14912 		  int units, j;
14913 
14914 		  unit_mask  = s390_get_unit_mask (insn, &units);
14915 		  fprintf (file, "(units:");
14916 		  for (j = 0; j < units; j++, m <<= 1)
14917 		    if (m & unit_mask)
14918 		      fprintf (file, " u%d", j);
14919 		  fprintf (file, ")");
14920 		}
14921 	      fprintf (file, "\n");
14922 	    }
14923 	}
14924     }
14925 
14926   return s390_issue_rate ();
14927 }
14928 
14929 
14930 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14931    the scheduler has issued INSN.  It stores the last issued insn into
14932    last_scheduled_insn in order to make it available for
14933    s390_sched_reorder.  */
14934 static int
s390_sched_variable_issue(FILE * file,int verbose,rtx_insn * insn,int more)14935 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14936 {
14937   last_scheduled_insn = insn;
14938 
14939   bool ends_group = false;
14940 
14941   if (s390_tune >= PROCESSOR_2827_ZEC12
14942       && reload_completed
14943       && recog_memoized (insn) >= 0)
14944     {
14945       unsigned int mask = s390_get_sched_attrmask (insn);
14946 
14947       if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
14948 	sched_state.group_of_two = true;
14949 
14950       /* If this is a group-of-two insn, we actually ended the last group
14951 	 and this insn is the first one of the new group.  */
14952       if (sched_state.group_state == 2 && sched_state.group_of_two)
14953 	{
14954 	  sched_state.side = sched_state.side ? 0 : 1;
14955 	  sched_state.group_state = 0;
14956 	}
14957 
14958       /* Longrunning and side bookkeeping.  */
14959       for (int i = 0; i < 2; i++)
14960 	{
14961 	  fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
14962 	  fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
14963 	}
14964 
14965       unsigned latency = insn_default_latency (insn);
14966       if (s390_is_longrunning (insn))
14967 	{
14968 	  if (s390_is_fxd (insn))
14969 	    fxd_longrunning[sched_state.side] = latency;
14970 	  else
14971 	    fpd_longrunning[sched_state.side] = latency;
14972 	}
14973 
14974       if (s390_tune >= PROCESSOR_2964_Z13)
14975 	{
14976 	  int units, i;
14977 	  unsigned unit_mask, m = 1;
14978 
14979 	  unit_mask = s390_get_unit_mask (insn, &units);
14980 	  gcc_assert (units <= MAX_SCHED_UNITS);
14981 
14982 	  for (i = 0; i < units; i++, m <<= 1)
14983 	    if (m & unit_mask)
14984 	      last_scheduled_unit_distance[i][sched_state.side] = 0;
14985 	    else if (last_scheduled_unit_distance[i][sched_state.side]
14986 		< MAX_SCHED_MIX_DISTANCE)
14987 	      last_scheduled_unit_distance[i][sched_state.side]++;
14988 	}
14989 
14990       if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14991 	  || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
14992 	  || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
14993 	  || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14994 	{
14995 	  sched_state.group_state = 0;
14996 	  ends_group = true;
14997 	}
14998       else
14999 	{
15000 	  switch (sched_state.group_state)
15001 	    {
15002 	    case 0:
15003 	      sched_state.group_state++;
15004 	      break;
15005 	    case 1:
15006 	      sched_state.group_state++;
15007 	      if (sched_state.group_of_two)
15008 		{
15009 		  sched_state.group_state = 0;
15010 		  ends_group = true;
15011 		}
15012 	      break;
15013 	    case 2:
15014 	      sched_state.group_state++;
15015 	      ends_group = true;
15016 	      break;
15017 	    }
15018 	}
15019 
15020       if (verbose > 5)
15021 	{
15022 	  unsigned int sched_mask;
15023 
15024 	  sched_mask = s390_get_sched_attrmask (insn);
15025 
15026 	  fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15027 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15028 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15029 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15030 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15031 	  PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15032 #undef PRINT_SCHED_ATTR
15033 
15034 	  if (s390_tune >= PROCESSOR_2964_Z13)
15035 	    {
15036 	      unsigned int unit_mask, m = 1;
15037 	      int units, j;
15038 
15039 	      unit_mask  = s390_get_unit_mask (insn, &units);
15040 	      fprintf (file, "(units:");
15041 	      for (j = 0; j < units; j++, m <<= 1)
15042 		if (m & unit_mask)
15043 		  fprintf (file, " %d", j);
15044 	      fprintf (file, ")");
15045 	    }
15046 	  fprintf (file, " sched state: %d\n", sched_state.group_state);
15047 
15048 	  if (s390_tune >= PROCESSOR_2964_Z13)
15049 	    {
15050 	      int units, j;
15051 
15052 	      s390_get_unit_mask (insn, &units);
15053 
15054 	      fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15055 	      for (j = 0; j < units; j++)
15056 		fprintf (file, "%d:%d ", j,
15057 		    last_scheduled_unit_distance[j][sched_state.side]);
15058 	      fprintf (file, "\n");
15059 	    }
15060 	}
15061 
15062       /* If this insn ended a group, the next will be on the other side.  */
15063       if (ends_group)
15064 	{
15065 	  sched_state.group_state = 0;
15066 	  sched_state.side = sched_state.side ? 0 : 1;
15067 	  sched_state.group_of_two = false;
15068 	}
15069     }
15070 
15071   if (GET_CODE (PATTERN (insn)) != USE
15072       && GET_CODE (PATTERN (insn)) != CLOBBER)
15073     return more - 1;
15074   else
15075     return more;
15076 }
15077 
15078 static void
s390_sched_init(FILE * file ATTRIBUTE_UNUSED,int verbose ATTRIBUTE_UNUSED,int max_ready ATTRIBUTE_UNUSED)15079 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15080 		 int verbose ATTRIBUTE_UNUSED,
15081 		 int max_ready ATTRIBUTE_UNUSED)
15082 {
15083   /* If the next basic block is most likely entered via a fallthru edge
15084      we keep the last sched state.  Otherwise we start a new group.
15085      The scheduler traverses basic blocks in "instruction stream" ordering
15086      so if we see a fallthru edge here, sched_state will be of its
15087      source block.
15088 
15089      current_sched_info->prev_head is the insn before the first insn of the
15090      block of insns to be scheduled.
15091      */
15092   rtx_insn *insn = current_sched_info->prev_head
15093     ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15094   basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15095   if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15096     {
15097       last_scheduled_insn = NULL;
15098       memset (last_scheduled_unit_distance, 0,
15099 	  MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15100       sched_state.group_state = 0;
15101       sched_state.group_of_two = false;
15102     }
15103 }
15104 
15105 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15106    a new number struct loop *loop should be unrolled if tuned for cpus with
15107    a built-in stride prefetcher.
15108    The loop is analyzed for memory accesses by calling check_dpu for
15109    each rtx of the loop. Depending on the loop_depth and the amount of
15110    memory accesses a new number <=nunroll is returned to improve the
15111    behavior of the hardware prefetch unit.  */
15112 static unsigned
s390_loop_unroll_adjust(unsigned nunroll,struct loop * loop)15113 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15114 {
15115   basic_block *bbs;
15116   rtx_insn *insn;
15117   unsigned i;
15118   unsigned mem_count = 0;
15119 
15120   if (s390_tune < PROCESSOR_2097_Z10)
15121     return nunroll;
15122 
15123   /* Count the number of memory references within the loop body.  */
15124   bbs = get_loop_body (loop);
15125   subrtx_iterator::array_type array;
15126   for (i = 0; i < loop->num_nodes; i++)
15127     FOR_BB_INSNS (bbs[i], insn)
15128       if (INSN_P (insn) && INSN_CODE (insn) != -1)
15129 	{
15130 	  rtx set;
15131 
15132 	  /* The runtime of small loops with memory block operations
15133 	     will be determined by the memory operation.  Doing
15134 	     unrolling doesn't help here.  Measurements to confirm
15135 	     this where only done on recent CPU levels.  So better do
15136 	     not change anything for older CPUs.  */
15137 	  if (s390_tune >= PROCESSOR_2964_Z13
15138 	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15139 	      && ((set = single_set (insn)) != NULL_RTX)
15140 	      && ((GET_MODE (SET_DEST (set)) == BLKmode
15141 		   && (GET_MODE (SET_SRC (set)) == BLKmode
15142 		       || SET_SRC (set) == const0_rtx))
15143 		  || (GET_CODE (SET_SRC (set)) == COMPARE
15144 		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15145 		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15146 	    return 1;
15147 
15148 	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15149 	    if (MEM_P (*iter))
15150 	      mem_count += 1;
15151 	}
15152   free (bbs);
15153 
15154   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
15155   if (mem_count == 0)
15156     return nunroll;
15157 
15158   switch (loop_depth(loop))
15159     {
15160     case 1:
15161       return MIN (nunroll, 28 / mem_count);
15162     case 2:
15163       return MIN (nunroll, 22 / mem_count);
15164     default:
15165       return MIN (nunroll, 16 / mem_count);
15166     }
15167 }
15168 
15169 /* Restore the current options.  This is a hook function and also called
15170    internally.  */
15171 
15172 static void
s390_function_specific_restore(struct gcc_options * opts,struct cl_target_option * ptr ATTRIBUTE_UNUSED)15173 s390_function_specific_restore (struct gcc_options *opts,
15174 				struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15175 {
15176   opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15177 }
15178 
15179 static void
s390_default_align(struct gcc_options * opts)15180 s390_default_align (struct gcc_options *opts)
15181 {
15182   /* Set the default function alignment to 16 in order to get rid of
15183      some unwanted performance effects. */
15184   if (opts->x_flag_align_functions && !opts->x_str_align_functions
15185       && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15186     opts->x_str_align_functions = "16";
15187 }
15188 
15189 static void
s390_override_options_after_change(void)15190 s390_override_options_after_change (void)
15191 {
15192   s390_default_align (&global_options);
15193 }
15194 
15195 static void
s390_option_override_internal(struct gcc_options * opts,const struct gcc_options * opts_set)15196 s390_option_override_internal (struct gcc_options *opts,
15197 			       const struct gcc_options *opts_set)
15198 {
15199   /* Architecture mode defaults according to ABI.  */
15200   if (!(opts_set->x_target_flags & MASK_ZARCH))
15201     {
15202       if (TARGET_64BIT)
15203 	opts->x_target_flags |= MASK_ZARCH;
15204       else
15205 	opts->x_target_flags &= ~MASK_ZARCH;
15206     }
15207 
15208   /* Set the march default in case it hasn't been specified on cmdline.  */
15209   if (!opts_set->x_s390_arch)
15210     opts->x_s390_arch = PROCESSOR_2064_Z900;
15211 
15212   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15213 
15214   /* Determine processor to tune for.  */
15215   if (!opts_set->x_s390_tune)
15216     opts->x_s390_tune = opts->x_s390_arch;
15217 
15218   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15219 
15220   /* Sanity checks.  */
15221   if (opts->x_s390_arch == PROCESSOR_NATIVE
15222       || opts->x_s390_tune == PROCESSOR_NATIVE)
15223     gcc_unreachable ();
15224   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15225     error ("64-bit ABI not supported in ESA/390 mode");
15226 
15227   if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15228       || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15229       || opts->x_s390_function_return == indirect_branch_thunk_inline
15230       || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15231       || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15232     error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15233 
15234   if (opts->x_s390_indirect_branch != indirect_branch_keep)
15235     {
15236       if (!opts_set->x_s390_indirect_branch_call)
15237 	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15238 
15239       if (!opts_set->x_s390_indirect_branch_jump)
15240 	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15241     }
15242 
15243   if (opts->x_s390_function_return != indirect_branch_keep)
15244     {
15245       if (!opts_set->x_s390_function_return_reg)
15246 	opts->x_s390_function_return_reg = opts->x_s390_function_return;
15247 
15248       if (!opts_set->x_s390_function_return_mem)
15249 	opts->x_s390_function_return_mem = opts->x_s390_function_return;
15250     }
15251 
15252   /* Enable hardware transactions if available and not explicitly
15253      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
15254   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15255     {
15256       if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15257 	opts->x_target_flags |= MASK_OPT_HTM;
15258       else
15259 	opts->x_target_flags &= ~MASK_OPT_HTM;
15260     }
15261 
15262   if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15263     {
15264       if (TARGET_OPT_VX_P (opts->x_target_flags))
15265 	{
15266 	  if (!TARGET_CPU_VX_P (opts))
15267 	    error ("hardware vector support not available on %s",
15268 		   processor_table[(int)opts->x_s390_arch].name);
15269 	  if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15270 	    error ("hardware vector support not available with "
15271 		   "%<-msoft-float%>");
15272 	}
15273     }
15274   else
15275     {
15276       if (TARGET_CPU_VX_P (opts))
15277 	/* Enable vector support if available and not explicitly disabled
15278 	   by user.  E.g. with -m31 -march=z13 -mzarch */
15279 	opts->x_target_flags |= MASK_OPT_VX;
15280       else
15281 	opts->x_target_flags &= ~MASK_OPT_VX;
15282     }
15283 
15284   /* Use hardware DFP if available and not explicitly disabled by
15285      user. E.g. with -m31 -march=z10 -mzarch   */
15286   if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15287     {
15288       if (TARGET_DFP_P (opts))
15289 	opts->x_target_flags |= MASK_HARD_DFP;
15290       else
15291 	opts->x_target_flags &= ~MASK_HARD_DFP;
15292     }
15293 
15294   if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15295     {
15296       if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15297 	{
15298 	  if (!TARGET_CPU_DFP_P (opts))
15299 	    error ("hardware decimal floating point instructions"
15300 		   " not available on %s",
15301 		   processor_table[(int)opts->x_s390_arch].name);
15302 	  if (!TARGET_ZARCH_P (opts->x_target_flags))
15303 	    error ("hardware decimal floating point instructions"
15304 		   " not available in ESA/390 mode");
15305 	}
15306       else
15307 	opts->x_target_flags &= ~MASK_HARD_DFP;
15308     }
15309 
15310   if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15311       && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15312     {
15313       if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15314 	  && TARGET_HARD_DFP_P (opts->x_target_flags))
15315 	error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15316 	       "%<-msoft-float%>");
15317 
15318       opts->x_target_flags &= ~MASK_HARD_DFP;
15319     }
15320 
15321   if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15322       && TARGET_PACKED_STACK_P (opts->x_target_flags)
15323       && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15324     error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15325 	   "supported in combination");
15326 
15327   if (opts->x_s390_stack_size)
15328     {
15329       if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15330 	error ("stack size must be greater than the stack guard value");
15331       else if (opts->x_s390_stack_size > 1 << 16)
15332 	error ("stack size must not be greater than 64k");
15333     }
15334   else if (opts->x_s390_stack_guard)
15335     error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15336 
15337   /* Our implementation of the stack probe requires the probe interval
15338      to be used as displacement in an address operand.  The maximum
15339      probe interval currently is 64k.  This would exceed short
15340      displacements.  Trim that value down to 4k if that happens.  This
15341      might result in too many probes being generated only on the
15342      oldest supported machine level z900.  */
15343   if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15344     param_stack_clash_protection_probe_interval = 12;
15345 
15346 #if TARGET_TPF != 0
15347   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15348     error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15349 
15350   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15351     error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15352 
15353   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15354     error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15355 
15356   if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15357     error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15358 
15359   if (s390_tpf_trace_skip)
15360     {
15361       opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15362       opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15363     }
15364 #endif
15365 
15366 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15367   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15368     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15369 #endif
15370 
15371   if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15372     {
15373       SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15374 			   100);
15375       SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15376       SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15377 			   2000);
15378       SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15379 			   64);
15380     }
15381 
15382   SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15383 		       256);
15384   /* values for loop prefetching */
15385   SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15386   SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15387   /* s390 has more than 2 levels and the size is much larger.  Since
15388      we are always running virtualized assume that we only get a small
15389      part of the caches above l1.  */
15390   SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15391   SET_OPTION_IF_UNSET (opts, opts_set,
15392 		       param_prefetch_min_insn_to_mem_ratio, 2);
15393   SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15394 
15395   /* Use the alternative scheduling-pressure algorithm by default.  */
15396   SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15397   SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15398 
15399   /* Use aggressive inlining parameters.  */
15400   if (opts->x_s390_tune >= PROCESSOR_2964_Z13)
15401     {
15402       SET_OPTION_IF_UNSET (opts, opts_set, param_inline_min_speedup, 2);
15403       SET_OPTION_IF_UNSET (opts, opts_set, param_max_inline_insns_auto, 80);
15404     }
15405 
15406   /* Set the default alignment.  */
15407   s390_default_align (opts);
15408 
15409   /* Call target specific restore function to do post-init work.  At the moment,
15410      this just sets opts->x_s390_cost_pointer.  */
15411   s390_function_specific_restore (opts, NULL);
15412 
15413   /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15414      because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15415      not the case when the code runs before the prolog. */
15416   if (opts->x_flag_fentry && !TARGET_64BIT)
15417     error ("%<-mfentry%> is supported only for 64-bit CPUs");
15418 }
15419 
15420 static void
s390_option_override(void)15421 s390_option_override (void)
15422 {
15423   unsigned int i;
15424   cl_deferred_option *opt;
15425   vec<cl_deferred_option> *v =
15426     (vec<cl_deferred_option> *) s390_deferred_options;
15427 
15428   if (v)
15429     FOR_EACH_VEC_ELT (*v, i, opt)
15430       {
15431 	switch (opt->opt_index)
15432 	  {
15433 	  case OPT_mhotpatch_:
15434 	    {
15435 	      int val1;
15436 	      int val2;
15437 	      char *s = strtok (ASTRDUP (opt->arg), ",");
15438 	      char *t = strtok (NULL, "\0");
15439 
15440 	      if (t != NULL)
15441 		{
15442 		  val1 = integral_argument (s);
15443 		  val2 = integral_argument (t);
15444 		}
15445 	      else
15446 		{
15447 		  val1 = -1;
15448 		  val2 = -1;
15449 		}
15450 	      if (val1 == -1 || val2 == -1)
15451 		{
15452 		  /* argument is not a plain number */
15453 		  error ("arguments to %qs should be non-negative integers",
15454 			 "-mhotpatch=n,m");
15455 		  break;
15456 		}
15457 	      else if (val1 > s390_hotpatch_hw_max
15458 		       || val2 > s390_hotpatch_hw_max)
15459 		{
15460 		  error ("argument to %qs is too large (max. %d)",
15461 			 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15462 		  break;
15463 		}
15464 	      s390_hotpatch_hw_before_label = val1;
15465 	      s390_hotpatch_hw_after_label = val2;
15466 	      break;
15467 	    }
15468 	  default:
15469 	    gcc_unreachable ();
15470 	  }
15471       }
15472 
15473   /* Set up function hooks.  */
15474   init_machine_status = s390_init_machine_status;
15475 
15476   s390_option_override_internal (&global_options, &global_options_set);
15477 
15478   /* Save the initial options in case the user does function specific
15479      options.  */
15480   target_option_default_node = build_target_option_node (&global_options);
15481   target_option_current_node = target_option_default_node;
15482 
15483   /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15484      requires the arch flags to be evaluated already.  Since prefetching
15485      is beneficial on s390, we enable it if available.  */
15486   if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15487     flag_prefetch_loop_arrays = 1;
15488 
15489   if (!s390_pic_data_is_text_relative && !flag_pic)
15490     error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15491 	   "%<-fpic%>/%<-fPIC%>");
15492 
15493   if (TARGET_TPF)
15494     {
15495       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
15496 	 debuggers do not yet support DWARF 3/4.  */
15497       if (!global_options_set.x_dwarf_strict)
15498 	dwarf_strict = 1;
15499       if (!global_options_set.x_dwarf_version)
15500 	dwarf_version = 2;
15501     }
15502 }
15503 
15504 #if S390_USE_TARGET_ATTRIBUTE
15505 /* Inner function to process the attribute((target(...))), take an argument and
15506    set the current options from the argument. If we have a list, recursively go
15507    over the list.  */
15508 
15509 static bool
s390_valid_target_attribute_inner_p(tree args,struct gcc_options * opts,struct gcc_options * new_opts_set,bool force_pragma)15510 s390_valid_target_attribute_inner_p (tree args,
15511 				     struct gcc_options *opts,
15512 				     struct gcc_options *new_opts_set,
15513 				     bool force_pragma)
15514 {
15515   char *next_optstr;
15516   bool ret = true;
15517 
15518 #define S390_ATTRIB(S,O,A)  { S, sizeof (S)-1, O, A, 0 }
15519 #define S390_PRAGMA(S,O,A)  { S, sizeof (S)-1, O, A, 1 }
15520   static const struct
15521   {
15522     const char *string;
15523     size_t len;
15524     int opt;
15525     int has_arg;
15526     int only_as_pragma;
15527   } attrs[] = {
15528     /* enum options */
15529     S390_ATTRIB ("arch=", OPT_march_, 1),
15530     S390_ATTRIB ("tune=", OPT_mtune_, 1),
15531     /* uinteger options */
15532     S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15533     S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15534     S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15535     S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15536     /* flag options */
15537     S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15538     S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15539     S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15540     S390_ATTRIB ("htm", OPT_mhtm, 0),
15541     S390_ATTRIB ("vx", OPT_mvx, 0),
15542     S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15543     S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15544     S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15545     S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15546     S390_PRAGMA ("zvector", OPT_mzvector, 0),
15547     /* boolean options */
15548     S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15549   };
15550 #undef S390_ATTRIB
15551 #undef S390_PRAGMA
15552 
15553   /* If this is a list, recurse to get the options.  */
15554   if (TREE_CODE (args) == TREE_LIST)
15555     {
15556       bool ret = true;
15557       int num_pragma_values;
15558       int i;
15559 
15560       /* Note: attribs.c:decl_attributes prepends the values from
15561 	 current_target_pragma to the list of target attributes.  To determine
15562 	 whether we're looking at a value of the attribute or the pragma we
15563 	 assume that the first [list_length (current_target_pragma)] values in
15564 	 the list are the values from the pragma.  */
15565       num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15566 	? list_length (current_target_pragma) : 0;
15567       for (i = 0; args; args = TREE_CHAIN (args), i++)
15568 	{
15569 	  bool is_pragma;
15570 
15571 	  is_pragma = (force_pragma || i < num_pragma_values);
15572 	  if (TREE_VALUE (args)
15573 	      && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15574 						       opts, new_opts_set,
15575 						       is_pragma))
15576 	    {
15577 	      ret = false;
15578 	    }
15579 	}
15580       return ret;
15581     }
15582 
15583   else if (TREE_CODE (args) != STRING_CST)
15584     {
15585       error ("attribute %<target%> argument not a string");
15586       return false;
15587     }
15588 
15589   /* Handle multiple arguments separated by commas.  */
15590   next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15591 
15592   while (next_optstr && *next_optstr != '\0')
15593     {
15594       char *p = next_optstr;
15595       char *orig_p = p;
15596       char *comma = strchr (next_optstr, ',');
15597       size_t len, opt_len;
15598       int opt;
15599       bool opt_set_p;
15600       char ch;
15601       unsigned i;
15602       int mask = 0;
15603       enum cl_var_type var_type;
15604       bool found;
15605 
15606       if (comma)
15607 	{
15608 	  *comma = '\0';
15609 	  len = comma - next_optstr;
15610 	  next_optstr = comma + 1;
15611 	}
15612       else
15613 	{
15614 	  len = strlen (p);
15615 	  next_optstr = NULL;
15616 	}
15617 
15618       /* Recognize no-xxx.  */
15619       if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15620 	{
15621 	  opt_set_p = false;
15622 	  p += 3;
15623 	  len -= 3;
15624 	}
15625       else
15626 	opt_set_p = true;
15627 
15628       /* Find the option.  */
15629       ch = *p;
15630       found = false;
15631       for (i = 0; i < ARRAY_SIZE (attrs); i++)
15632 	{
15633 	  opt_len = attrs[i].len;
15634 	  if (ch == attrs[i].string[0]
15635 	      && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15636 	      && memcmp (p, attrs[i].string, opt_len) == 0)
15637 	    {
15638 	      opt = attrs[i].opt;
15639 	      if (!opt_set_p && cl_options[opt].cl_reject_negative)
15640 		continue;
15641 	      mask = cl_options[opt].var_value;
15642 	      var_type = cl_options[opt].var_type;
15643 	      found = true;
15644 	      break;
15645 	    }
15646 	}
15647 
15648       /* Process the option.  */
15649       if (!found)
15650 	{
15651 	  error ("attribute(target(\"%s\")) is unknown", orig_p);
15652 	  return false;
15653 	}
15654       else if (attrs[i].only_as_pragma && !force_pragma)
15655 	{
15656 	  /* Value is not allowed for the target attribute.  */
15657 	  error ("value %qs is not supported by attribute %<target%>",
15658 		 attrs[i].string);
15659 	  return false;
15660 	}
15661 
15662       else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15663 	{
15664 	  if (var_type == CLVC_BIT_CLEAR)
15665 	    opt_set_p = !opt_set_p;
15666 
15667 	  if (opt_set_p)
15668 	    opts->x_target_flags |= mask;
15669 	  else
15670 	    opts->x_target_flags &= ~mask;
15671 	  new_opts_set->x_target_flags |= mask;
15672 	}
15673 
15674       else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15675 	{
15676 	  int value;
15677 
15678 	  if (cl_options[opt].cl_uinteger)
15679 	    {
15680 	      /* Unsigned integer argument.  Code based on the function
15681 		 decode_cmdline_option () in opts-common.c.  */
15682 	      value = integral_argument (p + opt_len);
15683 	    }
15684 	  else
15685 	    value = (opt_set_p) ? 1 : 0;
15686 
15687 	  if (value != -1)
15688 	    {
15689 	      struct cl_decoded_option decoded;
15690 
15691 	      /* Value range check; only implemented for numeric and boolean
15692 		 options at the moment.  */
15693 	      generate_option (opt, NULL, value, CL_TARGET, &decoded);
15694 	      s390_handle_option (opts, new_opts_set, &decoded, input_location);
15695 	      set_option (opts, new_opts_set, opt, value,
15696 			  p + opt_len, DK_UNSPECIFIED, input_location,
15697 			  global_dc);
15698 	    }
15699 	  else
15700 	    {
15701 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15702 	      ret = false;
15703 	    }
15704 	}
15705 
15706       else if (cl_options[opt].var_type == CLVC_ENUM)
15707 	{
15708 	  bool arg_ok;
15709 	  int value;
15710 
15711 	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15712 	  if (arg_ok)
15713 	    set_option (opts, new_opts_set, opt, value,
15714 			p + opt_len, DK_UNSPECIFIED, input_location,
15715 			global_dc);
15716 	  else
15717 	    {
15718 	      error ("attribute(target(\"%s\")) is unknown", orig_p);
15719 	      ret = false;
15720 	    }
15721 	}
15722 
15723       else
15724 	gcc_unreachable ();
15725     }
15726   return ret;
15727 }
15728 
15729 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
15730 
15731 tree
s390_valid_target_attribute_tree(tree args,struct gcc_options * opts,const struct gcc_options * opts_set,bool force_pragma)15732 s390_valid_target_attribute_tree (tree args,
15733 				  struct gcc_options *opts,
15734 				  const struct gcc_options *opts_set,
15735 				  bool force_pragma)
15736 {
15737   tree t = NULL_TREE;
15738   struct gcc_options new_opts_set;
15739 
15740   memset (&new_opts_set, 0, sizeof (new_opts_set));
15741 
15742   /* Process each of the options on the chain.  */
15743   if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15744 					     force_pragma))
15745     return error_mark_node;
15746 
15747   /* If some option was set (even if it has not changed), rerun
15748      s390_option_override_internal, and then save the options away.  */
15749   if (new_opts_set.x_target_flags
15750       || new_opts_set.x_s390_arch
15751       || new_opts_set.x_s390_tune
15752       || new_opts_set.x_s390_stack_guard
15753       || new_opts_set.x_s390_stack_size
15754       || new_opts_set.x_s390_branch_cost
15755       || new_opts_set.x_s390_warn_framesize
15756       || new_opts_set.x_s390_warn_dynamicstack_p)
15757     {
15758       const unsigned char *src = (const unsigned char *)opts_set;
15759       unsigned char *dest = (unsigned char *)&new_opts_set;
15760       unsigned int i;
15761 
15762       /* Merge the original option flags into the new ones.  */
15763       for (i = 0; i < sizeof(*opts_set); i++)
15764 	dest[i] |= src[i];
15765 
15766       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
15767       s390_option_override_internal (opts, &new_opts_set);
15768       /* Save the current options unless we are validating options for
15769 	 #pragma.  */
15770       t = build_target_option_node (opts);
15771     }
15772   return t;
15773 }
15774 
15775 /* Hook to validate attribute((target("string"))).  */
15776 
15777 static bool
s390_valid_target_attribute_p(tree fndecl,tree ARG_UNUSED (name),tree args,int ARG_UNUSED (flags))15778 s390_valid_target_attribute_p (tree fndecl,
15779 			       tree ARG_UNUSED (name),
15780 			       tree args,
15781 			       int ARG_UNUSED (flags))
15782 {
15783   struct gcc_options func_options;
15784   tree new_target, new_optimize;
15785   bool ret = true;
15786 
15787   /* attribute((target("default"))) does nothing, beyond
15788      affecting multi-versioning.  */
15789   if (TREE_VALUE (args)
15790       && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15791       && TREE_CHAIN (args) == NULL_TREE
15792       && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15793     return true;
15794 
15795   tree old_optimize = build_optimization_node (&global_options);
15796 
15797   /* Get the optimization options of the current function.  */
15798   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15799 
15800   if (!func_optimize)
15801     func_optimize = old_optimize;
15802 
15803   /* Init func_options.  */
15804   memset (&func_options, 0, sizeof (func_options));
15805   init_options_struct (&func_options, NULL);
15806   lang_hooks.init_options_struct (&func_options);
15807 
15808   cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15809 
15810   /* Initialize func_options to the default before its target options can
15811      be set.  */
15812   cl_target_option_restore (&func_options,
15813 			    TREE_TARGET_OPTION (target_option_default_node));
15814 
15815   new_target = s390_valid_target_attribute_tree (args, &func_options,
15816 						 &global_options_set,
15817 						 (args ==
15818 						  current_target_pragma));
15819   new_optimize = build_optimization_node (&func_options);
15820   if (new_target == error_mark_node)
15821     ret = false;
15822   else if (fndecl && new_target)
15823     {
15824       DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15825       if (old_optimize != new_optimize)
15826 	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15827     }
15828   return ret;
15829 }
15830 
15831 /* Hook to determine if one function can safely inline another.  */
15832 
15833 static bool
s390_can_inline_p(tree caller,tree callee)15834 s390_can_inline_p (tree caller, tree callee)
15835 {
15836   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15837   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15838 
15839   if (!callee_tree)
15840     callee_tree = target_option_default_node;
15841   if (!caller_tree)
15842     caller_tree = target_option_default_node;
15843   if (callee_tree == caller_tree)
15844     return true;
15845 
15846   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15847   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15848   bool ret = true;
15849 
15850   if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15851       != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15852     ret = false;
15853 
15854   /* Don't inline functions to be compiled for a more recent arch into a
15855      function for an older arch.  */
15856   else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15857     ret = false;
15858 
15859   /* Inlining a hard float function into a soft float function is only
15860      allowed if the hard float function doesn't actually make use of
15861      floating point.
15862 
15863      We are called from FEs for multi-versioning call optimization, so
15864      beware of ipa_fn_summaries not available.  */
15865   else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15866 	     && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15867 	    || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15868 		&& TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15869 	   && (! ipa_fn_summaries
15870 	       || ipa_fn_summaries->get
15871 	       (cgraph_node::get (callee))->fp_expressions))
15872     ret = false;
15873 
15874   return ret;
15875 }
15876 #endif
15877 
15878 /* Set VAL to correct enum value according to the indirect-branch or
15879    function-return attribute in ATTR.  */
15880 
15881 static inline void
s390_indirect_branch_attrvalue(tree attr,enum indirect_branch * val)15882 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
15883 {
15884   const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
15885   if (strcmp (str, "keep") == 0)
15886     *val = indirect_branch_keep;
15887   else if (strcmp (str, "thunk") == 0)
15888     *val = indirect_branch_thunk;
15889   else if (strcmp (str, "thunk-inline") == 0)
15890     *val = indirect_branch_thunk_inline;
15891   else if (strcmp (str, "thunk-extern") == 0)
15892     *val = indirect_branch_thunk_extern;
15893 }
15894 
15895 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
15896    from either the cmdline or the function attributes in
15897    cfun->machine.  */
15898 
15899 static void
s390_indirect_branch_settings(tree fndecl)15900 s390_indirect_branch_settings (tree fndecl)
15901 {
15902   tree attr;
15903 
15904   if (!fndecl)
15905     return;
15906 
15907   /* Initialize with the cmdline options and let the attributes
15908      override it.  */
15909   cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
15910   cfun->machine->indirect_branch_call = s390_indirect_branch_call;
15911 
15912   cfun->machine->function_return_reg = s390_function_return_reg;
15913   cfun->machine->function_return_mem = s390_function_return_mem;
15914 
15915   if ((attr = lookup_attribute ("indirect_branch",
15916 				DECL_ATTRIBUTES (fndecl))))
15917     {
15918       s390_indirect_branch_attrvalue (attr,
15919 				      &cfun->machine->indirect_branch_jump);
15920       s390_indirect_branch_attrvalue (attr,
15921 				      &cfun->machine->indirect_branch_call);
15922     }
15923 
15924   if ((attr = lookup_attribute ("indirect_branch_jump",
15925 				DECL_ATTRIBUTES (fndecl))))
15926     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
15927 
15928   if ((attr = lookup_attribute ("indirect_branch_call",
15929 				DECL_ATTRIBUTES (fndecl))))
15930     s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
15931 
15932   if ((attr = lookup_attribute ("function_return",
15933 				DECL_ATTRIBUTES (fndecl))))
15934     {
15935       s390_indirect_branch_attrvalue (attr,
15936 				      &cfun->machine->function_return_reg);
15937       s390_indirect_branch_attrvalue (attr,
15938 				      &cfun->machine->function_return_mem);
15939     }
15940 
15941   if ((attr = lookup_attribute ("function_return_reg",
15942 				DECL_ATTRIBUTES (fndecl))))
15943     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
15944 
15945   if ((attr = lookup_attribute ("function_return_mem",
15946 				DECL_ATTRIBUTES (fndecl))))
15947     s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
15948 }
15949 
15950 #if S390_USE_TARGET_ATTRIBUTE
15951 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15952    cache.  */
15953 
15954 void
s390_activate_target_options(tree new_tree)15955 s390_activate_target_options (tree new_tree)
15956 {
15957   cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15958   if (TREE_TARGET_GLOBALS (new_tree))
15959     restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15960   else if (new_tree == target_option_default_node)
15961     restore_target_globals (&default_target_globals);
15962   else
15963     TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15964   s390_previous_fndecl = NULL_TREE;
15965 }
15966 #endif
15967 
15968 /* Establish appropriate back-end context for processing the function
15969    FNDECL.  The argument might be NULL to indicate processing at top
15970    level, outside of any function scope.  */
15971 static void
s390_set_current_function(tree fndecl)15972 s390_set_current_function (tree fndecl)
15973 {
15974 #if S390_USE_TARGET_ATTRIBUTE
15975   /* Only change the context if the function changes.  This hook is called
15976      several times in the course of compiling a function, and we don't want to
15977      slow things down too much or call target_reinit when it isn't safe.  */
15978   if (fndecl == s390_previous_fndecl)
15979     {
15980       s390_indirect_branch_settings (fndecl);
15981       return;
15982     }
15983 
15984   tree old_tree;
15985   if (s390_previous_fndecl == NULL_TREE)
15986     old_tree = target_option_current_node;
15987   else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15988     old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15989   else
15990     old_tree = target_option_default_node;
15991 
15992   if (fndecl == NULL_TREE)
15993     {
15994       if (old_tree != target_option_current_node)
15995 	s390_activate_target_options (target_option_current_node);
15996       return;
15997     }
15998 
15999   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16000   if (new_tree == NULL_TREE)
16001     new_tree = target_option_default_node;
16002 
16003   if (old_tree != new_tree)
16004     s390_activate_target_options (new_tree);
16005   s390_previous_fndecl = fndecl;
16006 #endif
16007   s390_indirect_branch_settings (fndecl);
16008 }
16009 
16010 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
16011 
16012 static bool
s390_use_by_pieces_infrastructure_p(unsigned HOST_WIDE_INT size,unsigned int align ATTRIBUTE_UNUSED,enum by_pieces_operation op ATTRIBUTE_UNUSED,bool speed_p ATTRIBUTE_UNUSED)16013 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16014 				     unsigned int align ATTRIBUTE_UNUSED,
16015 				     enum by_pieces_operation op ATTRIBUTE_UNUSED,
16016 				     bool speed_p ATTRIBUTE_UNUSED)
16017 {
16018   return (size == 1 || size == 2
16019 	  || size == 4 || (TARGET_ZARCH && size == 8));
16020 }
16021 
16022 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook.  */
16023 
16024 static void
s390_atomic_assign_expand_fenv(tree * hold,tree * clear,tree * update)16025 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16026 {
16027   tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16028   tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16029   tree call_efpc = build_call_expr (efpc, 0);
16030   tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16031 
16032 #define FPC_EXCEPTION_MASK	 HOST_WIDE_INT_UC (0xf8000000)
16033 #define FPC_FLAGS_MASK		 HOST_WIDE_INT_UC (0x00f80000)
16034 #define FPC_DXC_MASK		 HOST_WIDE_INT_UC (0x0000ff00)
16035 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16036 #define FPC_FLAGS_SHIFT		 HOST_WIDE_INT_UC (16)
16037 #define FPC_DXC_SHIFT		 HOST_WIDE_INT_UC (8)
16038 
16039   /* Generates the equivalent of feholdexcept (&fenv_var)
16040 
16041      fenv_var = __builtin_s390_efpc ();
16042      __builtin_s390_sfpc (fenv_var & mask) */
16043   tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16044 			 NULL_TREE, NULL_TREE);
16045   tree new_fpc
16046     = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16047 	      build_int_cst (unsigned_type_node,
16048 			     ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16049 			       | FPC_EXCEPTION_MASK)));
16050   tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16051   *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16052 
16053   /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16054 
16055      __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16056   new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16057 		    build_int_cst (unsigned_type_node,
16058 				   ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16059   *clear = build_call_expr (sfpc, 1, new_fpc);
16060 
16061   /* Generates the equivalent of feupdateenv (fenv_var)
16062 
16063   old_fpc = __builtin_s390_efpc ();
16064   __builtin_s390_sfpc (fenv_var);
16065   __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT);  */
16066 
16067   old_fpc = create_tmp_var_raw (unsigned_type_node);
16068   tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16069 			       NULL_TREE, NULL_TREE);
16070 
16071   set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16072 
16073   tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16074 				  build_int_cst (unsigned_type_node,
16075 						 FPC_FLAGS_MASK));
16076   raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16077 			     build_int_cst (unsigned_type_node,
16078 					    FPC_FLAGS_SHIFT));
16079   tree atomic_feraiseexcept
16080     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16081   raise_old_except = build_call_expr (atomic_feraiseexcept,
16082 				      1, raise_old_except);
16083 
16084   *update = build2 (COMPOUND_EXPR, void_type_node,
16085 		    build2 (COMPOUND_EXPR, void_type_node,
16086 			    store_old_fpc, set_new_fpc),
16087 		    raise_old_except);
16088 
16089 #undef FPC_EXCEPTION_MASK
16090 #undef FPC_FLAGS_MASK
16091 #undef FPC_DXC_MASK
16092 #undef FPC_EXCEPTION_MASK_SHIFT
16093 #undef FPC_FLAGS_SHIFT
16094 #undef FPC_DXC_SHIFT
16095 }
16096 
16097 /* Return the vector mode to be used for inner mode MODE when doing
16098    vectorization.  */
16099 static machine_mode
s390_preferred_simd_mode(scalar_mode mode)16100 s390_preferred_simd_mode (scalar_mode mode)
16101 {
16102   if (TARGET_VXE)
16103     switch (mode)
16104       {
16105       case E_SFmode:
16106 	return V4SFmode;
16107       default:;
16108       }
16109 
16110   if (TARGET_VX)
16111     switch (mode)
16112       {
16113       case E_DFmode:
16114 	return V2DFmode;
16115       case E_DImode:
16116 	return V2DImode;
16117       case E_SImode:
16118 	return V4SImode;
16119       case E_HImode:
16120 	return V8HImode;
16121       case E_QImode:
16122 	return V16QImode;
16123       default:;
16124       }
16125   return word_mode;
16126 }
16127 
16128 /* Our hardware does not require vectors to be strictly aligned.  */
16129 static bool
s390_support_vector_misalignment(machine_mode mode ATTRIBUTE_UNUSED,const_tree type ATTRIBUTE_UNUSED,int misalignment ATTRIBUTE_UNUSED,bool is_packed ATTRIBUTE_UNUSED)16130 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16131 				  const_tree type ATTRIBUTE_UNUSED,
16132 				  int misalignment ATTRIBUTE_UNUSED,
16133 				  bool is_packed ATTRIBUTE_UNUSED)
16134 {
16135   if (TARGET_VX)
16136     return true;
16137 
16138   return default_builtin_support_vector_misalignment (mode, type, misalignment,
16139 						      is_packed);
16140 }
16141 
16142 /* The vector ABI requires vector types to be aligned on an 8 byte
16143    boundary (our stack alignment).  However, we allow this to be
16144    overriden by the user, while this definitely breaks the ABI.  */
16145 static HOST_WIDE_INT
s390_vector_alignment(const_tree type)16146 s390_vector_alignment (const_tree type)
16147 {
16148   tree size = TYPE_SIZE (type);
16149 
16150   if (!TARGET_VX_ABI)
16151     return default_vector_alignment (type);
16152 
16153   if (TYPE_USER_ALIGN (type))
16154     return TYPE_ALIGN (type);
16155 
16156   if (tree_fits_uhwi_p (size)
16157       && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16158     return tree_to_uhwi (size);
16159 
16160   return BIGGEST_ALIGNMENT;
16161 }
16162 
16163 /* Implement TARGET_CONSTANT_ALIGNMENT.  Alignment on even addresses for
16164    LARL instruction.  */
16165 
16166 static HOST_WIDE_INT
s390_constant_alignment(const_tree,HOST_WIDE_INT align)16167 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16168 {
16169   return MAX (align, 16);
16170 }
16171 
16172 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16173 /* Implement TARGET_ASM_FILE_START.  */
16174 static void
s390_asm_file_start(void)16175 s390_asm_file_start (void)
16176 {
16177   default_file_start ();
16178   s390_asm_output_machine_for_arch (asm_out_file);
16179 }
16180 #endif
16181 
16182 /* Implement TARGET_ASM_FILE_END.  */
16183 static void
s390_asm_file_end(void)16184 s390_asm_file_end (void)
16185 {
16186 #ifdef HAVE_AS_GNU_ATTRIBUTE
16187   varpool_node *vnode;
16188   cgraph_node *cnode;
16189 
16190   FOR_EACH_VARIABLE (vnode)
16191     if (TREE_PUBLIC (vnode->decl))
16192       s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16193 
16194   FOR_EACH_FUNCTION (cnode)
16195     if (TREE_PUBLIC (cnode->decl))
16196       s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16197 
16198 
16199   if (s390_vector_abi != 0)
16200     fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16201 	     s390_vector_abi);
16202 #endif
16203   file_end_indicate_exec_stack ();
16204 
16205   if (flag_split_stack)
16206     file_end_indicate_split_stack ();
16207 }
16208 
16209 /* Return true if TYPE is a vector bool type.  */
16210 static inline bool
s390_vector_bool_type_p(const_tree type)16211 s390_vector_bool_type_p (const_tree type)
16212 {
16213   return TYPE_VECTOR_OPAQUE (type);
16214 }
16215 
16216 /* Return the diagnostic message string if the binary operation OP is
16217    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
16218 static const char*
s390_invalid_binary_op(int op ATTRIBUTE_UNUSED,const_tree type1,const_tree type2)16219 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16220 {
16221   bool bool1_p, bool2_p;
16222   bool plusminus_p;
16223   bool muldiv_p;
16224   bool compare_p;
16225   machine_mode mode1, mode2;
16226 
16227   if (!TARGET_ZVECTOR)
16228     return NULL;
16229 
16230   if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16231     return NULL;
16232 
16233   bool1_p = s390_vector_bool_type_p (type1);
16234   bool2_p = s390_vector_bool_type_p (type2);
16235 
16236   /* Mixing signed and unsigned types is forbidden for all
16237      operators.  */
16238   if (!bool1_p && !bool2_p
16239       && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16240     return N_("types differ in signedness");
16241 
16242   plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16243   muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16244 	      || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16245 	      || op == ROUND_DIV_EXPR);
16246   compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16247 	       || op == EQ_EXPR || op == NE_EXPR);
16248 
16249   if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16250     return N_("binary operator does not support two vector bool operands");
16251 
16252   if (bool1_p != bool2_p && (muldiv_p || compare_p))
16253     return N_("binary operator does not support vector bool operand");
16254 
16255   mode1 = TYPE_MODE (type1);
16256   mode2 = TYPE_MODE (type2);
16257 
16258   if (bool1_p != bool2_p && plusminus_p
16259       && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16260 	  || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16261     return N_("binary operator does not support mixing vector "
16262 	      "bool with floating point vector operands");
16263 
16264   return NULL;
16265 }
16266 
16267 /* Implement TARGET_C_EXCESS_PRECISION.
16268 
16269    FIXME: For historical reasons, float_t and double_t are typedef'ed to
16270    double on s390, causing operations on float_t to operate in a higher
16271    precision than is necessary.  However, it is not the case that SFmode
16272    operations have implicit excess precision, and we generate more optimal
16273    code if we let the compiler know no implicit extra precision is added.
16274 
16275    That means when we are compiling with -fexcess-precision=fast, the value
16276    we set for FLT_EVAL_METHOD will be out of line with the actual precision of
16277    float_t (though they would be correct for -fexcess-precision=standard).
16278 
16279    A complete fix would modify glibc to remove the unnecessary typedef
16280    of float_t to double.  */
16281 
16282 static enum flt_eval_method
s390_excess_precision(enum excess_precision_type type)16283 s390_excess_precision (enum excess_precision_type type)
16284 {
16285   switch (type)
16286     {
16287       case EXCESS_PRECISION_TYPE_IMPLICIT:
16288       case EXCESS_PRECISION_TYPE_FAST:
16289 	/* The fastest type to promote to will always be the native type,
16290 	   whether that occurs with implicit excess precision or
16291 	   otherwise.  */
16292 	return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16293       case EXCESS_PRECISION_TYPE_STANDARD:
16294 	/* Otherwise, when we are in a standards compliant mode, to
16295 	   ensure consistency with the implementation in glibc, report that
16296 	   float is evaluated to the range and precision of double.  */
16297 	return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16298       default:
16299 	gcc_unreachable ();
16300     }
16301   return FLT_EVAL_METHOD_UNPREDICTABLE;
16302 }
16303 
16304 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
16305 
16306 static unsigned HOST_WIDE_INT
s390_asan_shadow_offset(void)16307 s390_asan_shadow_offset (void)
16308 {
16309   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16310 }
16311 
16312 #ifdef HAVE_GAS_HIDDEN
16313 # define USE_HIDDEN_LINKONCE 1
16314 #else
16315 # define USE_HIDDEN_LINKONCE 0
16316 #endif
16317 
16318 /* Output an indirect branch trampoline for target register REGNO.  */
16319 
16320 static void
s390_output_indirect_thunk_function(unsigned int regno,bool z10_p)16321 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16322 {
16323   tree decl;
16324   char thunk_label[32];
16325   int i;
16326 
16327   if (z10_p)
16328     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16329   else
16330     sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16331 	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
16332 
16333   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16334 		     get_identifier (thunk_label),
16335 		     build_function_type_list (void_type_node, NULL_TREE));
16336   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16337 				   NULL_TREE, void_type_node);
16338   TREE_PUBLIC (decl) = 1;
16339   TREE_STATIC (decl) = 1;
16340   DECL_IGNORED_P (decl) = 1;
16341 
16342   if (USE_HIDDEN_LINKONCE)
16343     {
16344       cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16345 
16346       targetm.asm_out.unique_section (decl, 0);
16347       switch_to_section (get_named_section (decl, NULL, 0));
16348 
16349       targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16350       fputs ("\t.hidden\t", asm_out_file);
16351       assemble_name (asm_out_file, thunk_label);
16352       putc ('\n', asm_out_file);
16353       ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16354     }
16355   else
16356     {
16357       switch_to_section (text_section);
16358       ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16359     }
16360 
16361   DECL_INITIAL (decl) = make_node (BLOCK);
16362   current_function_decl = decl;
16363   allocate_struct_function (decl, false);
16364   init_function_start (decl);
16365   cfun->is_thunk = true;
16366   first_function_block_is_cold = false;
16367   final_start_function (emit_barrier (), asm_out_file, 1);
16368 
16369   /* This makes CFI at least usable for indirect jumps.
16370 
16371      Stopping in the thunk: backtrace will point to the thunk target
16372      is if it was interrupted by a signal.  For a call this means that
16373      the call chain will be: caller->callee->thunk   */
16374   if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16375     {
16376       fputs ("\t.cfi_signal_frame\n", asm_out_file);
16377       fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16378       for (i = 0; i < FPR15_REGNUM; i++)
16379 	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16380     }
16381 
16382   if (z10_p)
16383     {
16384       /* exrl  0,1f  */
16385 
16386       /* We generate a thunk for z10 compiled code although z10 is
16387 	 currently not enabled.  Tell the assembler to accept the
16388 	 instruction.  */
16389       if (!TARGET_CPU_Z10)
16390 	{
16391 	  fputs ("\t.machine push\n", asm_out_file);
16392 	  fputs ("\t.machine z10\n", asm_out_file);
16393 	}
16394       /* We use exrl even if -mzarch hasn't been specified on the
16395 	 command line so we have to tell the assembler to accept
16396 	 it.  */
16397       if (!TARGET_ZARCH)
16398 	fputs ("\t.machinemode zarch\n", asm_out_file);
16399 
16400       fputs ("\texrl\t0,1f\n", asm_out_file);
16401 
16402       if (!TARGET_ZARCH)
16403 	fputs ("\t.machinemode esa\n", asm_out_file);
16404 
16405       if (!TARGET_CPU_Z10)
16406 	fputs ("\t.machine pop\n", asm_out_file);
16407     }
16408   else
16409     {
16410       /* larl %r1,1f  */
16411       fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16412 	       INDIRECT_BRANCH_THUNK_REGNUM);
16413 
16414       /* ex 0,0(%r1)  */
16415       fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16416 	       INDIRECT_BRANCH_THUNK_REGNUM);
16417     }
16418 
16419   /* 0:    j 0b  */
16420   fputs ("0:\tj\t0b\n", asm_out_file);
16421 
16422   /* 1:    br <regno>  */
16423   fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16424 
16425   final_end_function ();
16426   init_insn_lengths ();
16427   free_after_compilation (cfun);
16428   set_cfun (NULL);
16429   current_function_decl = NULL;
16430 }
16431 
16432 /* Implement the asm.code_end target hook.  */
16433 
16434 static void
s390_code_end(void)16435 s390_code_end (void)
16436 {
16437   int i;
16438 
16439   for (i = 1; i < 16; i++)
16440     {
16441       if (indirect_branch_z10thunk_mask & (1 << i))
16442 	s390_output_indirect_thunk_function (i, true);
16443 
16444       if (indirect_branch_prez10thunk_mask & (1 << i))
16445 	s390_output_indirect_thunk_function (i, false);
16446     }
16447 
16448   if (TARGET_INDIRECT_BRANCH_TABLE)
16449     {
16450       int o;
16451       int i;
16452 
16453       for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16454 	{
16455 	  if (indirect_branch_table_label_no[o] == 0)
16456 	    continue;
16457 
16458 	  switch_to_section (get_section (indirect_branch_table_name[o],
16459 					  0,
16460 					  NULL_TREE));
16461 	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16462 	    {
16463 	      char label_start[32];
16464 
16465 	      ASM_GENERATE_INTERNAL_LABEL (label_start,
16466 					   indirect_branch_table_label[o], i);
16467 
16468 	      fputs ("\t.long\t", asm_out_file);
16469 	      assemble_name_raw (asm_out_file, label_start);
16470 	      fputs ("-.\n", asm_out_file);
16471 	    }
16472 	  switch_to_section (current_function_section ());
16473 	}
16474     }
16475 }
16476 
16477 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
16478 
16479 unsigned int
s390_case_values_threshold(void)16480 s390_case_values_threshold (void)
16481 {
16482   /* Disabling branch prediction for indirect jumps makes jump tables
16483      much more expensive.  */
16484   if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16485     return 20;
16486 
16487   return default_case_values_threshold ();
16488 }
16489 
16490 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16491    back-end specific dependencies.
16492 
16493    Establish an ANTI dependency between r11 and r15 restores from FPRs
16494    to prevent the instructions scheduler from reordering them since
16495    this would break CFI.  No further handling in the sched_reorder
16496    hook is required since the r11 and r15 restore will never appear in
16497    the same ready list with that change.  */
16498 void
s390_sched_dependencies_evaluation(rtx_insn * head,rtx_insn * tail)16499 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16500 {
16501   if (!frame_pointer_needed || !epilogue_completed)
16502     return;
16503 
16504   while (head != tail && DEBUG_INSN_P (head))
16505     head = NEXT_INSN (head);
16506 
16507   rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16508 
16509   for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16510     {
16511       rtx set = single_set (insn);
16512       if (!INSN_P (insn)
16513 	  || !RTX_FRAME_RELATED_P (insn)
16514 	  || set == NULL_RTX
16515 	  || !REG_P (SET_DEST (set))
16516 	  || !FP_REG_P (SET_SRC (set)))
16517 	continue;
16518 
16519       if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16520 	r11_restore = insn;
16521 
16522       if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16523 	r15_restore = insn;
16524     }
16525 
16526   if (r11_restore == NULL || r15_restore == NULL)
16527     return;
16528   add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16529 }
16530 
16531 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts.  */
16532 
16533 static unsigned HOST_WIDE_INT
s390_shift_truncation_mask(machine_mode mode)16534 s390_shift_truncation_mask (machine_mode mode)
16535 {
16536   return mode == DImode || mode == SImode ? 63 : 0;
16537 }
16538 
16539 /* Initialize GCC target structure.  */
16540 
16541 #undef  TARGET_ASM_ALIGNED_HI_OP
16542 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
16543 #undef  TARGET_ASM_ALIGNED_DI_OP
16544 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
16545 #undef  TARGET_ASM_INTEGER
16546 #define TARGET_ASM_INTEGER s390_assemble_integer
16547 
16548 #undef  TARGET_ASM_OPEN_PAREN
16549 #define TARGET_ASM_OPEN_PAREN ""
16550 
16551 #undef  TARGET_ASM_CLOSE_PAREN
16552 #define TARGET_ASM_CLOSE_PAREN ""
16553 
16554 #undef TARGET_OPTION_OVERRIDE
16555 #define TARGET_OPTION_OVERRIDE s390_option_override
16556 
16557 #ifdef TARGET_THREAD_SSP_OFFSET
16558 #undef TARGET_STACK_PROTECT_GUARD
16559 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
16560 #endif
16561 
16562 #undef	TARGET_ENCODE_SECTION_INFO
16563 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
16564 
16565 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16566 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16567 
16568 #ifdef HAVE_AS_TLS
16569 #undef TARGET_HAVE_TLS
16570 #define TARGET_HAVE_TLS true
16571 #endif
16572 #undef TARGET_CANNOT_FORCE_CONST_MEM
16573 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
16574 
16575 #undef TARGET_DELEGITIMIZE_ADDRESS
16576 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
16577 
16578 #undef TARGET_LEGITIMIZE_ADDRESS
16579 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
16580 
16581 #undef TARGET_RETURN_IN_MEMORY
16582 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
16583 
16584 #undef  TARGET_INIT_BUILTINS
16585 #define TARGET_INIT_BUILTINS s390_init_builtins
16586 #undef  TARGET_EXPAND_BUILTIN
16587 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
16588 #undef  TARGET_BUILTIN_DECL
16589 #define TARGET_BUILTIN_DECL s390_builtin_decl
16590 
16591 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
16592 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
16593 
16594 #undef TARGET_ASM_OUTPUT_MI_THUNK
16595 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
16596 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
16597 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
16598 
16599 #undef TARGET_C_EXCESS_PRECISION
16600 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
16601 
16602 #undef  TARGET_SCHED_ADJUST_PRIORITY
16603 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
16604 #undef TARGET_SCHED_ISSUE_RATE
16605 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
16606 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
16607 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
16608 
16609 #undef TARGET_SCHED_VARIABLE_ISSUE
16610 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
16611 #undef TARGET_SCHED_REORDER
16612 #define TARGET_SCHED_REORDER s390_sched_reorder
16613 #undef TARGET_SCHED_INIT
16614 #define TARGET_SCHED_INIT s390_sched_init
16615 
16616 #undef TARGET_CANNOT_COPY_INSN_P
16617 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
16618 #undef TARGET_RTX_COSTS
16619 #define TARGET_RTX_COSTS s390_rtx_costs
16620 #undef TARGET_ADDRESS_COST
16621 #define TARGET_ADDRESS_COST s390_address_cost
16622 #undef TARGET_REGISTER_MOVE_COST
16623 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
16624 #undef TARGET_MEMORY_MOVE_COST
16625 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
16626 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
16627 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
16628   s390_builtin_vectorization_cost
16629 
16630 #undef TARGET_MACHINE_DEPENDENT_REORG
16631 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
16632 
16633 #undef TARGET_VALID_POINTER_MODE
16634 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
16635 
16636 #undef TARGET_BUILD_BUILTIN_VA_LIST
16637 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
16638 #undef TARGET_EXPAND_BUILTIN_VA_START
16639 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
16640 #undef TARGET_ASAN_SHADOW_OFFSET
16641 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
16642 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
16643 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
16644 
16645 #undef TARGET_PROMOTE_FUNCTION_MODE
16646 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
16647 #undef TARGET_PASS_BY_REFERENCE
16648 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
16649 
16650 #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
16651 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
16652 
16653 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
16654 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
16655 #undef TARGET_FUNCTION_ARG
16656 #define TARGET_FUNCTION_ARG s390_function_arg
16657 #undef TARGET_FUNCTION_ARG_ADVANCE
16658 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
16659 #undef TARGET_FUNCTION_ARG_PADDING
16660 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
16661 #undef TARGET_FUNCTION_VALUE
16662 #define TARGET_FUNCTION_VALUE s390_function_value
16663 #undef TARGET_LIBCALL_VALUE
16664 #define TARGET_LIBCALL_VALUE s390_libcall_value
16665 #undef TARGET_STRICT_ARGUMENT_NAMING
16666 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
16667 
16668 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
16669 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
16670 
16671 #undef TARGET_FIXED_CONDITION_CODE_REGS
16672 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
16673 
16674 #undef TARGET_CC_MODES_COMPATIBLE
16675 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
16676 
16677 #undef TARGET_INVALID_WITHIN_DOLOOP
16678 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
16679 
16680 #ifdef HAVE_AS_TLS
16681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
16682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
16683 #endif
16684 
16685 #undef TARGET_DWARF_FRAME_REG_MODE
16686 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
16687 
16688 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
16689 #undef TARGET_MANGLE_TYPE
16690 #define TARGET_MANGLE_TYPE s390_mangle_type
16691 #endif
16692 
16693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
16694 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
16695 
16696 #undef TARGET_VECTOR_MODE_SUPPORTED_P
16697 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
16698 
16699 #undef  TARGET_PREFERRED_RELOAD_CLASS
16700 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
16701 
16702 #undef TARGET_SECONDARY_RELOAD
16703 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
16704 #undef TARGET_SECONDARY_MEMORY_NEEDED
16705 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
16706 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
16707 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
16708 
16709 #undef TARGET_LIBGCC_CMP_RETURN_MODE
16710 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
16711 
16712 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
16713 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
16714 
16715 #undef TARGET_LEGITIMATE_ADDRESS_P
16716 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
16717 
16718 #undef TARGET_LEGITIMATE_CONSTANT_P
16719 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
16720 
16721 #undef TARGET_LRA_P
16722 #define TARGET_LRA_P s390_lra_p
16723 
16724 #undef TARGET_CAN_ELIMINATE
16725 #define TARGET_CAN_ELIMINATE s390_can_eliminate
16726 
16727 #undef TARGET_CONDITIONAL_REGISTER_USAGE
16728 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
16729 
16730 #undef TARGET_LOOP_UNROLL_ADJUST
16731 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
16732 
16733 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
16734 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
16735 #undef TARGET_TRAMPOLINE_INIT
16736 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
16737 
16738 /* PR 79421 */
16739 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
16740 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
16741 
16742 #undef TARGET_UNWIND_WORD_MODE
16743 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
16744 
16745 #undef TARGET_CANONICALIZE_COMPARISON
16746 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
16747 
16748 #undef TARGET_HARD_REGNO_SCRATCH_OK
16749 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
16750 
16751 #undef TARGET_HARD_REGNO_NREGS
16752 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
16753 #undef TARGET_HARD_REGNO_MODE_OK
16754 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
16755 #undef TARGET_MODES_TIEABLE_P
16756 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
16757 
16758 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
16759 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
16760   s390_hard_regno_call_part_clobbered
16761 
16762 #undef TARGET_ATTRIBUTE_TABLE
16763 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16764 
16765 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16766 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16767 
16768 #undef TARGET_SET_UP_BY_PROLOGUE
16769 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16770 
16771 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16772 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16773 
16774 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16775 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16776   s390_use_by_pieces_infrastructure_p
16777 
16778 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16779 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16780 
16781 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16782 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16783 
16784 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16785 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16786 
16787 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16788 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16789 
16790 #undef TARGET_VECTOR_ALIGNMENT
16791 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16792 
16793 #undef TARGET_INVALID_BINARY_OP
16794 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16795 
16796 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16797 #undef TARGET_ASM_FILE_START
16798 #define TARGET_ASM_FILE_START s390_asm_file_start
16799 #endif
16800 
16801 #undef TARGET_ASM_FILE_END
16802 #define TARGET_ASM_FILE_END s390_asm_file_end
16803 
16804 #undef TARGET_SET_CURRENT_FUNCTION
16805 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16806 
16807 #if S390_USE_TARGET_ATTRIBUTE
16808 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16809 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16810 
16811 #undef TARGET_CAN_INLINE_P
16812 #define TARGET_CAN_INLINE_P s390_can_inline_p
16813 #endif
16814 
16815 #undef TARGET_OPTION_RESTORE
16816 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16817 
16818 #undef TARGET_CAN_CHANGE_MODE_CLASS
16819 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
16820 
16821 #undef TARGET_CONSTANT_ALIGNMENT
16822 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
16823 
16824 #undef TARGET_ASM_CODE_END
16825 #define TARGET_ASM_CODE_END s390_code_end
16826 
16827 #undef TARGET_CASE_VALUES_THRESHOLD
16828 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
16829 
16830 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
16831 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
16832   s390_sched_dependencies_evaluation
16833 
16834 #undef TARGET_SHIFT_TRUNCATION_MASK
16835 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
16836 
16837 /* Use only short displacement, since long displacement is not available for
16838    the floating point instructions.  */
16839 #undef TARGET_MAX_ANCHOR_OFFSET
16840 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
16841 
16842 struct gcc_target targetm = TARGET_INITIALIZER;
16843 
16844 #include "gt-s390.h"
16845