xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/riscv/riscv.c (revision f3cfa6f6ce31685c6c4a758bc430e69eb99f50a4)
1 /* Subroutines used for code generation for RISC-V.
2    Copyright (C) 2011-2018 Free Software Foundation, Inc.
3    Contributed by Andrew Waterman (andrew@sifive.com).
4    Based on MIPS target for GNU compiler.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12 
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "regs.h"
28 #include "hard-reg-set.h"
29 #include "insn-config.h"
30 #include "conditions.h"
31 #include "insn-attr.h"
32 #include "recog.h"
33 #include "output.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "varasm.h"
46 #include "stringpool.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "function.h"
50 #include "hashtab.h"
51 #include "flags.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "memmodel.h"
59 #include "emit-rtl.h"
60 #include "stmt.h"
61 #include "expr.h"
62 #include "insn-codes.h"
63 #include "optabs.h"
64 #include "libfuncs.h"
65 #include "reload.h"
66 #include "tm_p.h"
67 #include "ggc.h"
68 #include "gstab.h"
69 #include "hash-table.h"
70 #include "debug.h"
71 #include "target.h"
72 #include "target-def.h"
73 #include "common/common-target.h"
74 #include "langhooks.h"
75 #include "dominance.h"
76 #include "cfg.h"
77 #include "cfgrtl.h"
78 #include "cfganal.h"
79 #include "lcm.h"
80 #include "cfgbuild.h"
81 #include "cfgcleanup.h"
82 #include "predict.h"
83 #include "basic-block.h"
84 #include "bitmap.h"
85 #include "regset.h"
86 #include "df.h"
87 #include "sched-int.h"
88 #include "tree-ssa-alias.h"
89 #include "internal-fn.h"
90 #include "gimple-fold.h"
91 #include "tree-eh.h"
92 #include "gimple-expr.h"
93 #include "is-a.h"
94 #include "gimple.h"
95 #include "gimplify.h"
96 #include "diagnostic.h"
97 #include "target-globals.h"
98 #include "opts.h"
99 #include "tree-pass.h"
100 #include "context.h"
101 #include "hash-map.h"
102 #include "plugin-api.h"
103 #include "ipa-ref.h"
104 #include "cgraph.h"
105 #include "builtins.h"
106 #include "rtl-iter.h"
107 
108 /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF.  */
109 #define UNSPEC_ADDRESS_P(X)					\
110   (GET_CODE (X) == UNSPEC					\
111    && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST			\
112    && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES)
113 
114 /* Extract the symbol or label from UNSPEC wrapper X.  */
115 #define UNSPEC_ADDRESS(X) \
116   XVECEXP (X, 0, 0)
117 
118 /* Extract the symbol type from UNSPEC wrapper X.  */
119 #define UNSPEC_ADDRESS_TYPE(X) \
120   ((enum riscv_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST))
121 
122 /* True if bit BIT is set in VALUE.  */
123 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
124 
125 /* Classifies an address.
126 
127    ADDRESS_REG
128        A natural register + offset address.  The register satisfies
129        riscv_valid_base_register_p and the offset is a const_arith_operand.
130 
131    ADDRESS_LO_SUM
132        A LO_SUM rtx.  The first operand is a valid base register and
133        the second operand is a symbolic address.
134 
135    ADDRESS_CONST_INT
136        A signed 16-bit constant address.
137 
138    ADDRESS_SYMBOLIC:
139        A constant symbolic address.  */
140 enum riscv_address_type {
141   ADDRESS_REG,
142   ADDRESS_LO_SUM,
143   ADDRESS_CONST_INT,
144   ADDRESS_SYMBOLIC
145 };
146 
147 /* Information about a function's frame layout.  */
148 struct GTY(())  riscv_frame_info {
149   /* The size of the frame in bytes.  */
150   HOST_WIDE_INT total_size;
151 
152   /* Bit X is set if the function saves or restores GPR X.  */
153   unsigned int mask;
154 
155   /* Likewise FPR X.  */
156   unsigned int fmask;
157 
158   /* How much the GPR save/restore routines adjust sp (or 0 if unused).  */
159   unsigned save_libcall_adjustment;
160 
161   /* Offsets of fixed-point and floating-point save areas from frame bottom */
162   HOST_WIDE_INT gp_sp_offset;
163   HOST_WIDE_INT fp_sp_offset;
164 
165   /* Offset of virtual frame pointer from stack pointer/frame bottom */
166   HOST_WIDE_INT frame_pointer_offset;
167 
168   /* Offset of hard frame pointer from stack pointer/frame bottom */
169   HOST_WIDE_INT hard_frame_pointer_offset;
170 
171   /* The offset of arg_pointer_rtx from the bottom of the frame.  */
172   HOST_WIDE_INT arg_pointer_offset;
173 };
174 
175 struct GTY(())  machine_function {
176   /* The number of extra stack bytes taken up by register varargs.
177      This area is allocated by the callee at the very top of the frame.  */
178   int varargs_size;
179 
180   /* The current frame information, calculated by riscv_compute_frame_info.  */
181   struct riscv_frame_info frame;
182 };
183 
184 /* Information about a single argument.  */
185 struct riscv_arg_info {
186   /* True if the argument is at least partially passed on the stack.  */
187   bool stack_p;
188 
189   /* The number of integer registers allocated to this argument.  */
190   unsigned int num_gprs;
191 
192   /* The offset of the first register used, provided num_gprs is nonzero.
193      If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS.  */
194   unsigned int gpr_offset;
195 
196   /* The number of floating-point registers allocated to this argument.  */
197   unsigned int num_fprs;
198 
199   /* The offset of the first register used, provided num_fprs is nonzero.  */
200   unsigned int fpr_offset;
201 };
202 
203 /* Information about an address described by riscv_address_type.
204 
205    ADDRESS_CONST_INT
206        No fields are used.
207 
208    ADDRESS_REG
209        REG is the base register and OFFSET is the constant offset.
210 
211    ADDRESS_LO_SUM
212        REG and OFFSET are the operands to the LO_SUM and SYMBOL_TYPE
213        is the type of symbol it references.
214 
215    ADDRESS_SYMBOLIC
216        SYMBOL_TYPE is the type of symbol that the address references.  */
217 struct riscv_address_info {
218   enum riscv_address_type type;
219   rtx reg;
220   rtx offset;
221   enum riscv_symbol_type symbol_type;
222 };
223 
224 /* One stage in a constant building sequence.  These sequences have
225    the form:
226 
227 	A = VALUE[0]
228 	A = A CODE[1] VALUE[1]
229 	A = A CODE[2] VALUE[2]
230 	...
231 
232    where A is an accumulator, each CODE[i] is a binary rtl operation
233    and each VALUE[i] is a constant integer.  CODE[0] is undefined.  */
234 struct riscv_integer_op {
235   enum rtx_code code;
236   unsigned HOST_WIDE_INT value;
237 };
238 
239 /* The largest number of operations needed to load an integer constant.
240    The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
241 #define RISCV_MAX_INTEGER_OPS 8
242 
243 /* Costs of various operations on the different architectures.  */
244 
245 struct riscv_tune_info
246 {
247   unsigned short fp_add[2];
248   unsigned short fp_mul[2];
249   unsigned short fp_div[2];
250   unsigned short int_mul[2];
251   unsigned short int_div[2];
252   unsigned short issue_rate;
253   unsigned short branch_cost;
254   unsigned short memory_cost;
255   bool slow_unaligned_access;
256 };
257 
258 /* Information about one CPU we know about.  */
259 struct riscv_cpu_info {
260   /* This CPU's canonical name.  */
261   const char *name;
262 
263   /* Tuning parameters for this CPU.  */
264   const struct riscv_tune_info *tune_info;
265 };
266 
267 /* Global variables for machine-dependent things.  */
268 
269 /* Whether unaligned accesses execute very slowly.  */
270 bool riscv_slow_unaligned_access;
271 
272 /* Which tuning parameters to use.  */
273 static const struct riscv_tune_info *tune_info;
274 
275 /* Index R is the smallest register class that contains register R.  */
276 const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
277   GR_REGS,	GR_REGS,	GR_REGS,	GR_REGS,
278   GR_REGS,	GR_REGS,	SIBCALL_REGS,	SIBCALL_REGS,
279   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
280   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
281   JALR_REGS,	JALR_REGS, 	JALR_REGS,	JALR_REGS,
282   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
283   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
284   SIBCALL_REGS,	SIBCALL_REGS,	SIBCALL_REGS,	SIBCALL_REGS,
285   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
286   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
287   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
288   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
289   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
290   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
291   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
292   FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
293   FRAME_REGS,	FRAME_REGS,
294 };
295 
296 /* Costs to use when optimizing for rocket.  */
297 static const struct riscv_tune_info rocket_tune_info = {
298   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
299   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
300   {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
301   {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* int_mul */
302   {COSTS_N_INSNS (6), COSTS_N_INSNS (6)},	/* int_div */
303   1,						/* issue_rate */
304   3,						/* branch_cost */
305   5,						/* memory_cost */
306   true,						/* slow_unaligned_access */
307 };
308 
309 /* Costs to use when optimizing for size.  */
310 static const struct riscv_tune_info optimize_size_tune_info = {
311   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_add */
312   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_mul */
313   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* fp_div */
314   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* int_mul */
315   {COSTS_N_INSNS (1), COSTS_N_INSNS (1)},	/* int_div */
316   1,						/* issue_rate */
317   1,						/* branch_cost */
318   2,						/* memory_cost */
319   false,					/* slow_unaligned_access */
320 };
321 
322 /* A table describing all the processors GCC knows about.  */
323 static const struct riscv_cpu_info riscv_cpu_info_table[] = {
324   { "rocket", &rocket_tune_info },
325   { "size", &optimize_size_tune_info },
326 };
327 
328 /* Return the riscv_cpu_info entry for the given name string.  */
329 
330 static const struct riscv_cpu_info *
331 riscv_parse_cpu (const char *cpu_string)
332 {
333   for (unsigned i = 0; i < ARRAY_SIZE (riscv_cpu_info_table); i++)
334     if (strcmp (riscv_cpu_info_table[i].name, cpu_string) == 0)
335       return riscv_cpu_info_table + i;
336 
337   error ("unknown cpu %qs for -mtune", cpu_string);
338   return riscv_cpu_info_table;
339 }
340 
341 /* Helper function for riscv_build_integer; arguments are as for
342    riscv_build_integer.  */
343 
344 static int
345 riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
346 		       HOST_WIDE_INT value, enum machine_mode mode)
347 {
348   HOST_WIDE_INT low_part = CONST_LOW_PART (value);
349   int cost = RISCV_MAX_INTEGER_OPS + 1, alt_cost;
350   struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
351 
352   if (SMALL_OPERAND (value) || LUI_OPERAND (value))
353     {
354       /* Simply ADDI or LUI.  */
355       codes[0].code = UNKNOWN;
356       codes[0].value = value;
357       return 1;
358     }
359 
360   /* End with ADDI.  When constructing HImode constants, do not generate any
361      intermediate value that is not itself a valid HImode constant.  The
362      XORI case below will handle those remaining HImode constants.  */
363   if (low_part != 0
364       && (mode != HImode
365 	  || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
366     {
367       alt_cost = 1 + riscv_build_integer_1 (alt_codes, value - low_part, mode);
368       if (alt_cost < cost)
369 	{
370 	  alt_codes[alt_cost-1].code = PLUS;
371 	  alt_codes[alt_cost-1].value = low_part;
372 	  memcpy (codes, alt_codes, sizeof (alt_codes));
373 	  cost = alt_cost;
374 	}
375     }
376 
377   /* End with XORI.  */
378   if (cost > 2 && (low_part < 0 || mode == HImode))
379     {
380       alt_cost = 1 + riscv_build_integer_1 (alt_codes, value ^ low_part, mode);
381       if (alt_cost < cost)
382 	{
383 	  alt_codes[alt_cost-1].code = XOR;
384 	  alt_codes[alt_cost-1].value = low_part;
385 	  memcpy (codes, alt_codes, sizeof (alt_codes));
386 	  cost = alt_cost;
387 	}
388     }
389 
390   /* Eliminate trailing zeros and end with SLLI.  */
391   if (cost > 2 && (value & 1) == 0)
392     {
393       int shift = ctz_hwi (value);
394       unsigned HOST_WIDE_INT x = value;
395       x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
396 
397       /* Don't eliminate the lower 12 bits if LUI might apply.  */
398       if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
399 	shift -= IMM_BITS, x <<= IMM_BITS;
400 
401       alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
402       if (alt_cost < cost)
403 	{
404 	  alt_codes[alt_cost-1].code = ASHIFT;
405 	  alt_codes[alt_cost-1].value = shift;
406 	  memcpy (codes, alt_codes, sizeof (alt_codes));
407 	  cost = alt_cost;
408 	}
409     }
410 
411   gcc_assert (cost <= RISCV_MAX_INTEGER_OPS);
412   return cost;
413 }
414 
415 /* Fill CODES with a sequence of rtl operations to load VALUE.
416    Return the number of operations needed.  */
417 
418 static int
419 riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
420 		     enum machine_mode mode)
421 {
422   int cost = riscv_build_integer_1 (codes, value, mode);
423 
424   /* Eliminate leading zeros and end with SRLI.  */
425   if (value > 0 && cost > 2)
426     {
427       struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
428       int alt_cost, shift = clz_hwi (value);
429       HOST_WIDE_INT shifted_val;
430 
431       /* Try filling trailing bits with 1s.  */
432       shifted_val = (value << shift) | ((((HOST_WIDE_INT) 1) << shift) - 1);
433       alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
434       if (alt_cost < cost)
435 	{
436 	  alt_codes[alt_cost-1].code = LSHIFTRT;
437 	  alt_codes[alt_cost-1].value = shift;
438 	  memcpy (codes, alt_codes, sizeof (alt_codes));
439 	  cost = alt_cost;
440 	}
441 
442       /* Try filling trailing bits with 0s.  */
443       shifted_val = value << shift;
444       alt_cost = 1 + riscv_build_integer_1 (alt_codes, shifted_val, mode);
445       if (alt_cost < cost)
446 	{
447 	  alt_codes[alt_cost-1].code = LSHIFTRT;
448 	  alt_codes[alt_cost-1].value = shift;
449 	  memcpy (codes, alt_codes, sizeof (alt_codes));
450 	  cost = alt_cost;
451 	}
452     }
453 
454   return cost;
455 }
456 
457 /* Return the cost of constructing VAL in the event that a scratch
458    register is available.  */
459 
460 static int
461 riscv_split_integer_cost (HOST_WIDE_INT val)
462 {
463   int cost;
464   unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
465   unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
466   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
467 
468   cost = 2 + riscv_build_integer (codes, loval, VOIDmode);
469   if (loval != hival)
470     cost += riscv_build_integer (codes, hival, VOIDmode);
471 
472   return cost;
473 }
474 
475 /* Return the cost of constructing the integer constant VAL.  */
476 
477 static int
478 riscv_integer_cost (HOST_WIDE_INT val)
479 {
480   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
481   return MIN (riscv_build_integer (codes, val, VOIDmode),
482 	      riscv_split_integer_cost (val));
483 }
484 
485 /* Try to split a 64b integer into 32b parts, then reassemble.  */
486 
487 static rtx
488 riscv_split_integer (HOST_WIDE_INT val, enum machine_mode mode)
489 {
490   unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
491   unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
492   rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
493 
494   riscv_move_integer (hi, hi, hival);
495   riscv_move_integer (lo, lo, loval);
496 
497   hi = gen_rtx_fmt_ee (ASHIFT, mode, hi, GEN_INT (32));
498   hi = force_reg (mode, hi);
499 
500   return gen_rtx_fmt_ee (PLUS, mode, hi, lo);
501 }
502 
503 /* Return true if X is a thread-local symbol.  */
504 
505 static bool
506 riscv_tls_symbol_p (const_rtx x)
507 {
508   return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0;
509 }
510 
511 /* Return true if symbol X binds locally.  */
512 
513 static bool
514 riscv_symbol_binds_local_p (const_rtx x)
515 {
516   if (SYMBOL_REF_P (x))
517     return (SYMBOL_REF_DECL (x)
518 	    ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
519 	    : SYMBOL_REF_LOCAL_P (x));
520   else
521     return false;
522 }
523 
524 /* Return the method that should be used to access SYMBOL_REF or
525    LABEL_REF X.  */
526 
527 static enum riscv_symbol_type
528 riscv_classify_symbol (const_rtx x)
529 {
530   if (riscv_tls_symbol_p (x))
531     return SYMBOL_TLS;
532 
533   if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p (x))
534     return SYMBOL_GOT_DISP;
535 
536   return riscv_cmodel == CM_MEDLOW ? SYMBOL_ABSOLUTE : SYMBOL_PCREL;
537 }
538 
539 /* Classify the base of symbolic expression X.  */
540 
541 enum riscv_symbol_type
542 riscv_classify_symbolic_expression (rtx x)
543 {
544   rtx offset;
545 
546   split_const (x, &x, &offset);
547   if (UNSPEC_ADDRESS_P (x))
548     return UNSPEC_ADDRESS_TYPE (x);
549 
550   return riscv_classify_symbol (x);
551 }
552 
553 /* Return true if X is a symbolic constant.  If it is, store the type of
554    the symbol in *SYMBOL_TYPE.  */
555 
556 bool
557 riscv_symbolic_constant_p (rtx x, enum riscv_symbol_type *symbol_type)
558 {
559   rtx offset;
560 
561   split_const (x, &x, &offset);
562   if (UNSPEC_ADDRESS_P (x))
563     {
564       *symbol_type = UNSPEC_ADDRESS_TYPE (x);
565       x = UNSPEC_ADDRESS (x);
566     }
567   else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
568     *symbol_type = riscv_classify_symbol (x);
569   else
570     return false;
571 
572   if (offset == const0_rtx)
573     return true;
574 
575   /* Nonzero offsets are only valid for references that don't use the GOT.  */
576   switch (*symbol_type)
577     {
578     case SYMBOL_ABSOLUTE:
579     case SYMBOL_PCREL:
580     case SYMBOL_TLS_LE:
581       /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
582       return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
583 
584     default:
585       return false;
586     }
587 }
588 
589 /* Returns the number of instructions necessary to reference a symbol. */
590 
591 static int riscv_symbol_insns (enum riscv_symbol_type type)
592 {
593   switch (type)
594     {
595     case SYMBOL_TLS: return 0; /* Depends on the TLS model.  */
596     case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference.  */
597     case SYMBOL_PCREL: return 2; /* AUIPC + the reference.  */
598     case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference.  */
599     case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference.  */
600     default: gcc_unreachable ();
601     }
602 }
603 
604 /* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
605 
606 static bool
607 riscv_legitimate_constant_p (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
608 {
609   return riscv_const_insns (x) > 0;
610 }
611 
612 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
613 
614 static bool
615 riscv_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
616 {
617   enum riscv_symbol_type type;
618   rtx base, offset;
619 
620   /* There is no assembler syntax for expressing an address-sized
621      high part.  */
622   if (GET_CODE (x) == HIGH)
623     return true;
624 
625   split_const (x, &base, &offset);
626   if (riscv_symbolic_constant_p (base, &type))
627     {
628       /* As an optimization, don't spill symbolic constants that are as
629 	 cheap to rematerialize as to access in the constant pool.  */
630       if (SMALL_OPERAND (INTVAL (offset)) && riscv_symbol_insns (type) > 0)
631 	return true;
632 
633       /* As an optimization, avoid needlessly generate dynamic relocations.  */
634       if (flag_pic)
635 	return true;
636     }
637 
638   /* TLS symbols must be computed by riscv_legitimize_move.  */
639   if (tls_referenced_p (x))
640     return true;
641 
642   return false;
643 }
644 
645 /* Return true if register REGNO is a valid base register for mode MODE.
646    STRICT_P is true if REG_OK_STRICT is in effect.  */
647 
648 int
649 riscv_regno_mode_ok_for_base_p (int regno,
650 				enum machine_mode mode ATTRIBUTE_UNUSED,
651 				bool strict_p)
652 {
653   if (!HARD_REGISTER_NUM_P (regno))
654     {
655       if (!strict_p)
656 	return true;
657       regno = reg_renumber[regno];
658     }
659 
660   /* These fake registers will be eliminated to either the stack or
661      hard frame pointer, both of which are usually valid base registers.
662      Reload deals with the cases where the eliminated form isn't valid.  */
663   if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM)
664     return true;
665 
666   return GP_REG_P (regno);
667 }
668 
669 /* Return true if X is a valid base register for mode MODE.
670    STRICT_P is true if REG_OK_STRICT is in effect.  */
671 
672 static bool
673 riscv_valid_base_register_p (rtx x, enum machine_mode mode, bool strict_p)
674 {
675   if (!strict_p && GET_CODE (x) == SUBREG)
676     x = SUBREG_REG (x);
677 
678   return (REG_P (x)
679 	  && riscv_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p));
680 }
681 
682 /* Return true if, for every base register BASE_REG, (plus BASE_REG X)
683    can address a value of mode MODE.  */
684 
685 static bool
686 riscv_valid_offset_p (rtx x, enum machine_mode mode)
687 {
688   /* Check that X is a signed 12-bit number.  */
689   if (!const_arith_operand (x, Pmode))
690     return false;
691 
692   /* We may need to split multiword moves, so make sure that every word
693      is accessible.  */
694   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
695       && !SMALL_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
696     return false;
697 
698   return true;
699 }
700 
701 /* Should a symbol of type SYMBOL_TYPE should be split in two?  */
702 
703 bool
704 riscv_split_symbol_type (enum riscv_symbol_type symbol_type)
705 {
706   if (symbol_type == SYMBOL_TLS_LE)
707     return true;
708 
709   if (!TARGET_EXPLICIT_RELOCS)
710     return false;
711 
712   return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
713 }
714 
715 /* Return true if a LO_SUM can address a value of mode MODE when the
716    LO_SUM symbol has type SYM_TYPE.  */
717 
718 static bool
719 riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, enum machine_mode mode)
720 {
721   /* Check that symbols of type SYMBOL_TYPE can be used to access values
722      of mode MODE.  */
723   if (riscv_symbol_insns (sym_type) == 0)
724     return false;
725 
726   /* Check that there is a known low-part relocation.  */
727   if (!riscv_split_symbol_type (sym_type))
728     return false;
729 
730   /* We may need to split multiword moves, so make sure that each word
731      can be accessed without inducing a carry.  */
732   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
733       && (!TARGET_STRICT_ALIGN
734 	  || GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode)))
735     return false;
736 
737   return true;
738 }
739 
740 /* Return true if X is a valid address for machine mode MODE.  If it is,
741    fill in INFO appropriately.  STRICT_P is true if REG_OK_STRICT is in
742    effect.  */
743 
744 static bool
745 riscv_classify_address (struct riscv_address_info *info, rtx x,
746 		       enum machine_mode mode, bool strict_p)
747 {
748   switch (GET_CODE (x))
749     {
750     case REG:
751     case SUBREG:
752       info->type = ADDRESS_REG;
753       info->reg = x;
754       info->offset = const0_rtx;
755       return riscv_valid_base_register_p (info->reg, mode, strict_p);
756 
757     case PLUS:
758       info->type = ADDRESS_REG;
759       info->reg = XEXP (x, 0);
760       info->offset = XEXP (x, 1);
761       return (riscv_valid_base_register_p (info->reg, mode, strict_p)
762 	      && riscv_valid_offset_p (info->offset, mode));
763 
764     case LO_SUM:
765       info->type = ADDRESS_LO_SUM;
766       info->reg = XEXP (x, 0);
767       info->offset = XEXP (x, 1);
768       /* We have to trust the creator of the LO_SUM to do something vaguely
769 	 sane.  Target-independent code that creates a LO_SUM should also
770 	 create and verify the matching HIGH.  Target-independent code that
771 	 adds an offset to a LO_SUM must prove that the offset will not
772 	 induce a carry.  Failure to do either of these things would be
773 	 a bug, and we are not required to check for it here.  The RISC-V
774 	 backend itself should only create LO_SUMs for valid symbolic
775 	 constants, with the high part being either a HIGH or a copy
776 	 of _gp. */
777       info->symbol_type
778 	= riscv_classify_symbolic_expression (info->offset);
779       return (riscv_valid_base_register_p (info->reg, mode, strict_p)
780 	      && riscv_valid_lo_sum_p (info->symbol_type, mode));
781 
782     case CONST_INT:
783       /* Small-integer addresses don't occur very often, but they
784 	 are legitimate if x0 is a valid base register.  */
785       info->type = ADDRESS_CONST_INT;
786       return SMALL_OPERAND (INTVAL (x));
787 
788     default:
789       return false;
790     }
791 }
792 
793 /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
794 
795 static bool
796 riscv_legitimate_address_p (enum machine_mode mode, rtx x, bool strict_p)
797 {
798   struct riscv_address_info addr;
799 
800   return riscv_classify_address (&addr, x, mode, strict_p);
801 }
802 
803 /* Return the number of instructions needed to load or store a value
804    of mode MODE at address X.  Return 0 if X isn't valid for MODE.
805    Assume that multiword moves may need to be split into word moves
806    if MIGHT_SPLIT_P, otherwise assume that a single load or store is
807    enough. */
808 
809 int
810 riscv_address_insns (rtx x, enum machine_mode mode, bool might_split_p)
811 {
812   struct riscv_address_info addr;
813   int n = 1;
814 
815   if (!riscv_classify_address (&addr, x, mode, false))
816     return 0;
817 
818   /* BLKmode is used for single unaligned loads and stores and should
819      not count as a multiword mode. */
820   if (mode != BLKmode && might_split_p)
821     n += (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
822 
823   if (addr.type == ADDRESS_LO_SUM)
824     n += riscv_symbol_insns (addr.symbol_type) - 1;
825 
826   return n;
827 }
828 
829 /* Return the number of instructions needed to load constant X.
830    Return 0 if X isn't a valid constant.  */
831 
832 int
833 riscv_const_insns (rtx x)
834 {
835   enum riscv_symbol_type symbol_type;
836   rtx offset;
837 
838   switch (GET_CODE (x))
839     {
840     case HIGH:
841       if (!riscv_symbolic_constant_p (XEXP (x, 0), &symbol_type)
842 	  || !riscv_split_symbol_type (symbol_type))
843 	return 0;
844 
845       /* This is simply an LUI.  */
846       return 1;
847 
848     case CONST_INT:
849       {
850 	int cost = riscv_integer_cost (INTVAL (x));
851 	/* Force complicated constants to memory.  */
852 	return cost < 4 ? cost : 0;
853       }
854 
855     case CONST_DOUBLE:
856     case CONST_VECTOR:
857       /* We can use x0 to load floating-point zero.  */
858       return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
859 
860     case CONST:
861       /* See if we can refer to X directly.  */
862       if (riscv_symbolic_constant_p (x, &symbol_type))
863 	return riscv_symbol_insns (symbol_type);
864 
865       /* Otherwise try splitting the constant into a base and offset.  */
866       split_const (x, &x, &offset);
867       if (offset != 0)
868 	{
869 	  int n = riscv_const_insns (x);
870 	  if (n != 0)
871 	    return n + riscv_integer_cost (INTVAL (offset));
872 	}
873       return 0;
874 
875     case SYMBOL_REF:
876     case LABEL_REF:
877       return riscv_symbol_insns (riscv_classify_symbol (x));
878 
879     default:
880       return 0;
881     }
882 }
883 
884 /* X is a doubleword constant that can be handled by splitting it into
885    two words and loading each word separately.  Return the number of
886    instructions required to do this.  */
887 
888 int
889 riscv_split_const_insns (rtx x)
890 {
891   unsigned int low, high;
892 
893   low = riscv_const_insns (riscv_subword (x, false));
894   high = riscv_const_insns (riscv_subword (x, true));
895   gcc_assert (low > 0 && high > 0);
896   return low + high;
897 }
898 
899 /* Return the number of instructions needed to implement INSN,
900    given that it loads from or stores to MEM. */
901 
902 int
903 riscv_load_store_insns (rtx mem, rtx_insn *insn)
904 {
905   enum machine_mode mode;
906   bool might_split_p;
907   rtx set;
908 
909   gcc_assert (MEM_P (mem));
910   mode = GET_MODE (mem);
911 
912   /* Try to prove that INSN does not need to be split.  */
913   might_split_p = true;
914   if (GET_MODE_BITSIZE (mode) <= 32)
915     might_split_p = false;
916   else if (GET_MODE_BITSIZE (mode) == 64)
917     {
918       set = single_set (insn);
919       if (set && !riscv_split_64bit_move_p (SET_DEST (set), SET_SRC (set)))
920 	might_split_p = false;
921     }
922 
923   return riscv_address_insns (XEXP (mem, 0), mode, might_split_p);
924 }
925 
926 /* Emit a move from SRC to DEST.  Assume that the move expanders can
927    handle all moves if !can_create_pseudo_p ().  The distinction is
928    important because, unlike emit_move_insn, the move expanders know
929    how to force Pmode objects into the constant pool even when the
930    constant pool address is not itself legitimate.  */
931 
932 rtx
933 riscv_emit_move (rtx dest, rtx src)
934 {
935   return (can_create_pseudo_p ()
936 	  ? emit_move_insn (dest, src)
937 	  : emit_move_insn_1 (dest, src));
938 }
939 
940 /* Emit an instruction of the form (set TARGET SRC).  */
941 
942 static rtx
943 riscv_emit_set (rtx target, rtx src)
944 {
945   emit_insn (gen_rtx_SET (target, src));
946   return target;
947 }
948 
949 /* Emit an instruction of the form (set DEST (CODE X Y)).  */
950 
951 static rtx
952 riscv_emit_binary (enum rtx_code code, rtx dest, rtx x, rtx y)
953 {
954   return riscv_emit_set (dest, gen_rtx_fmt_ee (code, GET_MODE (dest), x, y));
955 }
956 
957 /* Compute (CODE X Y) and store the result in a new register
958    of mode MODE.  Return that new register.  */
959 
960 static rtx
961 riscv_force_binary (enum machine_mode mode, enum rtx_code code, rtx x, rtx y)
962 {
963   return riscv_emit_binary (code, gen_reg_rtx (mode), x, y);
964 }
965 
966 /* Copy VALUE to a register and return that register.  If new pseudos
967    are allowed, copy it into a new register, otherwise use DEST.  */
968 
969 static rtx
970 riscv_force_temporary (rtx dest, rtx value)
971 {
972   if (can_create_pseudo_p ())
973     return force_reg (Pmode, value);
974   else
975     {
976       riscv_emit_move (dest, value);
977       return dest;
978     }
979 }
980 
981 /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE,
982    then add CONST_INT OFFSET to the result.  */
983 
984 static rtx
985 riscv_unspec_address_offset (rtx base, rtx offset,
986 			     enum riscv_symbol_type symbol_type)
987 {
988   base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base),
989 			 UNSPEC_ADDRESS_FIRST + symbol_type);
990   if (offset != const0_rtx)
991     base = gen_rtx_PLUS (Pmode, base, offset);
992   return gen_rtx_CONST (Pmode, base);
993 }
994 
995 /* Return an UNSPEC address with underlying address ADDRESS and symbol
996    type SYMBOL_TYPE.  */
997 
998 rtx
999 riscv_unspec_address (rtx address, enum riscv_symbol_type symbol_type)
1000 {
1001   rtx base, offset;
1002 
1003   split_const (address, &base, &offset);
1004   return riscv_unspec_address_offset (base, offset, symbol_type);
1005 }
1006 
1007 /* If OP is an UNSPEC address, return the address to which it refers,
1008    otherwise return OP itself.  */
1009 
1010 static rtx
1011 riscv_strip_unspec_address (rtx op)
1012 {
1013   rtx base, offset;
1014 
1015   split_const (op, &base, &offset);
1016   if (UNSPEC_ADDRESS_P (base))
1017     op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset));
1018   return op;
1019 }
1020 
1021 /* If riscv_unspec_address (ADDR, SYMBOL_TYPE) is a 32-bit value, add the
1022    high part to BASE and return the result.  Just return BASE otherwise.
1023    TEMP is as for riscv_force_temporary.
1024 
1025    The returned expression can be used as the first operand to a LO_SUM.  */
1026 
1027 static rtx
1028 riscv_unspec_offset_high (rtx temp, rtx addr, enum riscv_symbol_type symbol_type)
1029 {
1030   addr = gen_rtx_HIGH (Pmode, riscv_unspec_address (addr, symbol_type));
1031   return riscv_force_temporary (temp, addr);
1032 }
1033 
1034 /* Load an entry from the GOT for a TLS GD access.  */
1035 
1036 static rtx riscv_got_load_tls_gd (rtx dest, rtx sym)
1037 {
1038   if (Pmode == DImode)
1039     return gen_got_load_tls_gddi (dest, sym);
1040   else
1041     return gen_got_load_tls_gdsi (dest, sym);
1042 }
1043 
1044 /* Load an entry from the GOT for a TLS IE access.  */
1045 
1046 static rtx riscv_got_load_tls_ie (rtx dest, rtx sym)
1047 {
1048   if (Pmode == DImode)
1049     return gen_got_load_tls_iedi (dest, sym);
1050   else
1051     return gen_got_load_tls_iesi (dest, sym);
1052 }
1053 
1054 /* Add in the thread pointer for a TLS LE access.  */
1055 
1056 static rtx riscv_tls_add_tp_le (rtx dest, rtx base, rtx sym)
1057 {
1058   rtx tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
1059   if (Pmode == DImode)
1060     return gen_tls_add_tp_ledi (dest, base, tp, sym);
1061   else
1062     return gen_tls_add_tp_lesi (dest, base, tp, sym);
1063 }
1064 
1065 /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise
1066    it appears in a MEM of that mode.  Return true if ADDR is a legitimate
1067    constant in that context and can be split into high and low parts.
1068    If so, and if LOW_OUT is nonnull, emit the high part and store the
1069    low part in *LOW_OUT.  Leave *LOW_OUT unchanged otherwise.
1070 
1071    TEMP is as for riscv_force_temporary and is used to load the high
1072    part into a register.
1073 
1074    When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be
1075    a legitimize SET_SRC for an .md pattern, otherwise the low part
1076    is guaranteed to be a legitimate address for mode MODE.  */
1077 
1078 bool
1079 riscv_split_symbol (rtx temp, rtx addr, enum machine_mode mode, rtx *low_out)
1080 {
1081   enum riscv_symbol_type symbol_type;
1082 
1083   if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
1084       || !riscv_symbolic_constant_p (addr, &symbol_type)
1085       || riscv_symbol_insns (symbol_type) == 0
1086       || !riscv_split_symbol_type (symbol_type))
1087     return false;
1088 
1089   if (low_out)
1090     switch (symbol_type)
1091       {
1092       case SYMBOL_ABSOLUTE:
1093 	{
1094 	  rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
1095 	  high = riscv_force_temporary (temp, high);
1096 	  *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
1097 	}
1098 	break;
1099 
1100       case SYMBOL_PCREL:
1101 	{
1102 	  static unsigned seqno;
1103 	  char buf[32];
1104 	  rtx label;
1105 
1106 	  ssize_t bytes = snprintf (buf, sizeof (buf), ".LA%u", seqno);
1107 	  gcc_assert ((size_t) bytes < sizeof (buf));
1108 
1109 	  label = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
1110 	  SYMBOL_REF_FLAGS (label) |= SYMBOL_FLAG_LOCAL;
1111 
1112 	  if (temp == NULL)
1113 	    temp = gen_reg_rtx (Pmode);
1114 
1115 	  if (Pmode == DImode)
1116 	    emit_insn (gen_auipcdi (temp, copy_rtx (addr), GEN_INT (seqno)));
1117 	  else
1118 	    emit_insn (gen_auipcsi (temp, copy_rtx (addr), GEN_INT (seqno)));
1119 
1120 	  *low_out = gen_rtx_LO_SUM (Pmode, temp, label);
1121 
1122 	  seqno++;
1123 	}
1124 	break;
1125 
1126       default:
1127 	gcc_unreachable ();
1128       }
1129 
1130   return true;
1131 }
1132 
1133 /* Return a legitimate address for REG + OFFSET.  TEMP is as for
1134    riscv_force_temporary; it is only needed when OFFSET is not a
1135    SMALL_OPERAND.  */
1136 
1137 static rtx
1138 riscv_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset)
1139 {
1140   if (!SMALL_OPERAND (offset))
1141     {
1142       rtx high;
1143 
1144       /* Leave OFFSET as a 16-bit offset and put the excess in HIGH.
1145 	 The addition inside the macro CONST_HIGH_PART may cause an
1146 	 overflow, so we need to force a sign-extension check.  */
1147       high = gen_int_mode (CONST_HIGH_PART (offset), Pmode);
1148       offset = CONST_LOW_PART (offset);
1149       high = riscv_force_temporary (temp, high);
1150       reg = riscv_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
1151     }
1152   return plus_constant (Pmode, reg, offset);
1153 }
1154 
1155 /* The __tls_get_attr symbol.  */
1156 static GTY(()) rtx riscv_tls_symbol;
1157 
1158 /* Return an instruction sequence that calls __tls_get_addr.  SYM is
1159    the TLS symbol we are referencing and TYPE is the symbol type to use
1160    (either global dynamic or local dynamic).  RESULT is an RTX for the
1161    return value location.  */
1162 
1163 static rtx_insn *
1164 riscv_call_tls_get_addr (rtx sym, rtx result)
1165 {
1166   rtx a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST), func;
1167   rtx_insn *insn;
1168 
1169   if (!riscv_tls_symbol)
1170     riscv_tls_symbol = init_one_libfunc ("__tls_get_addr");
1171   func = gen_rtx_MEM (FUNCTION_MODE, riscv_tls_symbol);
1172 
1173   start_sequence ();
1174 
1175   emit_insn (riscv_got_load_tls_gd (a0, sym));
1176   insn = emit_call_insn (gen_call_value (result, func, const0_rtx, NULL));
1177   RTL_CONST_CALL_P (insn) = 1;
1178   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
1179   insn = get_insns ();
1180 
1181   end_sequence ();
1182 
1183   return insn;
1184 }
1185 
1186 /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return
1187    its address.  The return value will be both a valid address and a valid
1188    SET_SRC (either a REG or a LO_SUM).  */
1189 
1190 static rtx
1191 riscv_legitimize_tls_address (rtx loc)
1192 {
1193   rtx dest, tp, tmp;
1194   enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
1195 
1196   /* Since we support TLS copy relocs, non-PIC TLS accesses may all use LE.  */
1197   if (!flag_pic)
1198     model = TLS_MODEL_LOCAL_EXEC;
1199 
1200   switch (model)
1201     {
1202     case TLS_MODEL_LOCAL_DYNAMIC:
1203       /* Rely on section anchors for the optimization that LDM TLS
1204 	 provides.  The anchor's address is loaded with GD TLS. */
1205     case TLS_MODEL_GLOBAL_DYNAMIC:
1206       tmp = gen_rtx_REG (Pmode, GP_RETURN);
1207       dest = gen_reg_rtx (Pmode);
1208       emit_libcall_block (riscv_call_tls_get_addr (loc, tmp), dest, tmp, loc);
1209       break;
1210 
1211     case TLS_MODEL_INITIAL_EXEC:
1212       /* la.tls.ie; tp-relative add */
1213       tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
1214       tmp = gen_reg_rtx (Pmode);
1215       emit_insn (riscv_got_load_tls_ie (tmp, loc));
1216       dest = gen_reg_rtx (Pmode);
1217       emit_insn (gen_add3_insn (dest, tmp, tp));
1218       break;
1219 
1220     case TLS_MODEL_LOCAL_EXEC:
1221       tmp = riscv_unspec_offset_high (NULL, loc, SYMBOL_TLS_LE);
1222       dest = gen_reg_rtx (Pmode);
1223       emit_insn (riscv_tls_add_tp_le (dest, tmp, loc));
1224       dest = gen_rtx_LO_SUM (Pmode, dest,
1225 			     riscv_unspec_address (loc, SYMBOL_TLS_LE));
1226       break;
1227 
1228     default:
1229       gcc_unreachable ();
1230     }
1231   return dest;
1232 }
1233 
1234 /* If X is not a valid address for mode MODE, force it into a register.  */
1235 
1236 static rtx
1237 riscv_force_address (rtx x, enum machine_mode mode)
1238 {
1239   if (!riscv_legitimate_address_p (mode, x, false))
1240     x = force_reg (Pmode, x);
1241   return x;
1242 }
1243 
1244 /* This function is used to implement LEGITIMIZE_ADDRESS.  If X can
1245    be legitimized in a way that the generic machinery might not expect,
1246    return a new address, otherwise return NULL.  MODE is the mode of
1247    the memory being accessed.  */
1248 
1249 static rtx
1250 riscv_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
1251 			 enum machine_mode mode)
1252 {
1253   rtx addr;
1254 
1255   if (riscv_tls_symbol_p (x))
1256     return riscv_legitimize_tls_address (x);
1257 
1258   /* See if the address can split into a high part and a LO_SUM.  */
1259   if (riscv_split_symbol (NULL, x, mode, &addr))
1260     return riscv_force_address (addr, mode);
1261 
1262   /* Handle BASE + OFFSET using riscv_add_offset.  */
1263   if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
1264       && INTVAL (XEXP (x, 1)) != 0)
1265     {
1266       rtx base = XEXP (x, 0);
1267       HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
1268 
1269       if (!riscv_valid_base_register_p (base, mode, false))
1270 	base = copy_to_mode_reg (Pmode, base);
1271       addr = riscv_add_offset (NULL, base, offset);
1272       return riscv_force_address (addr, mode);
1273     }
1274 
1275   return x;
1276 }
1277 
1278 /* Load VALUE into DEST.  TEMP is as for riscv_force_temporary.  */
1279 
1280 void
1281 riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value)
1282 {
1283   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
1284   enum machine_mode mode;
1285   int i, num_ops;
1286   rtx x;
1287 
1288   mode = GET_MODE (dest);
1289   num_ops = riscv_build_integer (codes, value, mode);
1290 
1291   if (can_create_pseudo_p () && num_ops > 2 /* not a simple constant */
1292       && num_ops >= riscv_split_integer_cost (value))
1293     x = riscv_split_integer (value, mode);
1294   else
1295     {
1296       /* Apply each binary operation to X. */
1297       x = GEN_INT (codes[0].value);
1298 
1299       for (i = 1; i < num_ops; i++)
1300 	{
1301 	  if (!can_create_pseudo_p ())
1302 	    x = riscv_emit_set (temp, x);
1303 	  else
1304 	    x = force_reg (mode, x);
1305 
1306 	  x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
1307 	}
1308     }
1309 
1310   riscv_emit_set (dest, x);
1311 }
1312 
1313 /* Subroutine of riscv_legitimize_move.  Move constant SRC into register
1314    DEST given that SRC satisfies immediate_operand but doesn't satisfy
1315    move_operand.  */
1316 
1317 static void
1318 riscv_legitimize_const_move (enum machine_mode mode, rtx dest, rtx src)
1319 {
1320   rtx base, offset;
1321 
1322   /* Split moves of big integers into smaller pieces.  */
1323   if (splittable_const_int_operand (src, mode))
1324     {
1325       riscv_move_integer (dest, dest, INTVAL (src));
1326       return;
1327     }
1328 
1329   /* Split moves of symbolic constants into high/low pairs.  */
1330   if (riscv_split_symbol (dest, src, MAX_MACHINE_MODE, &src))
1331     {
1332       riscv_emit_set (dest, src);
1333       return;
1334     }
1335 
1336   /* Generate the appropriate access sequences for TLS symbols.  */
1337   if (riscv_tls_symbol_p (src))
1338     {
1339       riscv_emit_move (dest, riscv_legitimize_tls_address (src));
1340       return;
1341     }
1342 
1343   /* If we have (const (plus symbol offset)), and that expression cannot
1344      be forced into memory, load the symbol first and add in the offset.  Also
1345      prefer to do this even if the constant _can_ be forced into memory, as it
1346      usually produces better code.  */
1347   split_const (src, &base, &offset);
1348   if (offset != const0_rtx
1349       && (targetm.cannot_force_const_mem (mode, src) || can_create_pseudo_p ()))
1350     {
1351       base = riscv_force_temporary (dest, base);
1352       riscv_emit_move (dest, riscv_add_offset (NULL, base, INTVAL (offset)));
1353       return;
1354     }
1355 
1356   src = force_const_mem (mode, src);
1357 
1358   /* When using explicit relocs, constant pool references are sometimes
1359      not legitimate addresses.  */
1360   riscv_split_symbol (dest, XEXP (src, 0), mode, &XEXP (src, 0));
1361   riscv_emit_move (dest, src);
1362 }
1363 
1364 /* If (set DEST SRC) is not a valid move instruction, emit an equivalent
1365    sequence that is valid.  */
1366 
1367 bool
1368 riscv_legitimize_move (enum machine_mode mode, rtx dest, rtx src)
1369 {
1370   if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode))
1371     {
1372       riscv_emit_move (dest, force_reg (mode, src));
1373       return true;
1374     }
1375 
1376   /* We need to deal with constants that would be legitimate
1377      immediate_operands but aren't legitimate move_operands.  */
1378   if (CONSTANT_P (src) && !move_operand (src, mode))
1379     {
1380       riscv_legitimize_const_move (mode, dest, src);
1381       set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src));
1382       return true;
1383     }
1384 
1385   /* RISC-V GCC may generate non-legitimate address due to we provide some
1386      pattern for optimize access PIC local symbol and it's make GCC generate
1387      unrecognizable instruction during optmizing.  */
1388 
1389   if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0),
1390 						   reload_completed))
1391     {
1392       XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode);
1393     }
1394 
1395   if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0),
1396 						  reload_completed))
1397     {
1398       XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode);
1399     }
1400 
1401   return false;
1402 }
1403 
1404 /* Return true if there is an instruction that implements CODE and accepts
1405    X as an immediate operand. */
1406 
1407 static int
1408 riscv_immediate_operand_p (int code, HOST_WIDE_INT x)
1409 {
1410   switch (code)
1411     {
1412     case ASHIFT:
1413     case ASHIFTRT:
1414     case LSHIFTRT:
1415       /* All shift counts are truncated to a valid constant.  */
1416       return true;
1417 
1418     case AND:
1419     case IOR:
1420     case XOR:
1421     case PLUS:
1422     case LT:
1423     case LTU:
1424       /* These instructions take 12-bit signed immediates.  */
1425       return SMALL_OPERAND (x);
1426 
1427     case LE:
1428       /* We add 1 to the immediate and use SLT.  */
1429       return SMALL_OPERAND (x + 1);
1430 
1431     case LEU:
1432       /* Likewise SLTU, but reject the always-true case.  */
1433       return SMALL_OPERAND (x + 1) && x + 1 != 0;
1434 
1435     case GE:
1436     case GEU:
1437       /* We can emulate an immediate of 1 by using GT/GTU against x0.  */
1438       return x == 1;
1439 
1440     default:
1441       /* By default assume that x0 can be used for 0.  */
1442       return x == 0;
1443     }
1444 }
1445 
1446 /* Return the cost of binary operation X, given that the instruction
1447    sequence for a word-sized or smaller operation takes SIGNLE_INSNS
1448    instructions and that the sequence of a double-word operation takes
1449    DOUBLE_INSNS instructions.  */
1450 
1451 static int
1452 riscv_binary_cost (rtx x, int single_insns, int double_insns)
1453 {
1454   if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2)
1455     return COSTS_N_INSNS (double_insns);
1456   return COSTS_N_INSNS (single_insns);
1457 }
1458 
1459 /* Return the cost of sign- or zero-extending OP.  */
1460 
1461 static int
1462 riscv_extend_cost (rtx op, bool unsigned_p)
1463 {
1464   if (MEM_P (op))
1465     return 0;
1466 
1467   if (unsigned_p && GET_MODE (op) == QImode)
1468     /* We can use ANDI.  */
1469     return COSTS_N_INSNS (1);
1470 
1471   if (!unsigned_p && GET_MODE (op) == SImode)
1472     /* We can use SEXT.W.  */
1473     return COSTS_N_INSNS (1);
1474 
1475   /* We need to use a shift left and a shift right.  */
1476   return COSTS_N_INSNS (2);
1477 }
1478 
1479 /* Implement TARGET_RTX_COSTS.  */
1480 
1481 static bool
1482 riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UNUSED,
1483 		 int *total, bool speed)
1484 {
1485   bool float_mode_p = FLOAT_MODE_P (mode);
1486   int cost;
1487 
1488   switch (GET_CODE (x))
1489     {
1490     case CONST_INT:
1491       if (riscv_immediate_operand_p (outer_code, INTVAL (x)))
1492 	{
1493 	  *total = 0;
1494 	  return true;
1495 	}
1496       /* Fall through.  */
1497 
1498     case SYMBOL_REF:
1499     case LABEL_REF:
1500     case CONST_DOUBLE:
1501     case CONST:
1502       if ((cost = riscv_const_insns (x)) > 0)
1503 	{
1504 	  /* If the constant is likely to be stored in a GPR, SETs of
1505 	     single-insn constants are as cheap as register sets; we
1506 	     never want to CSE them.  */
1507 	  if (cost == 1 && outer_code == SET)
1508 	    *total = 0;
1509 	  /* When we load a constant more than once, it usually is better
1510 	     to duplicate the last operation in the sequence than to CSE
1511 	     the constant itself.  */
1512 	  else if (outer_code == SET || GET_MODE (x) == VOIDmode)
1513 	    *total = COSTS_N_INSNS (1);
1514 	}
1515       else /* The instruction will be fetched from the constant pool.  */
1516 	*total = COSTS_N_INSNS (riscv_symbol_insns (SYMBOL_ABSOLUTE));
1517       return true;
1518 
1519     case MEM:
1520       /* If the address is legitimate, return the number of
1521 	 instructions it needs.  */
1522       if ((cost = riscv_address_insns (XEXP (x, 0), mode, true)) > 0)
1523 	{
1524 	  *total = COSTS_N_INSNS (cost + tune_info->memory_cost);
1525 	  return true;
1526 	}
1527       /* Otherwise use the default handling.  */
1528       return false;
1529 
1530     case NOT:
1531       *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1);
1532       return false;
1533 
1534     case AND:
1535     case IOR:
1536     case XOR:
1537       /* Double-word operations use two single-word operations.  */
1538       *total = riscv_binary_cost (x, 1, 2);
1539       return false;
1540 
1541     case ASHIFT:
1542     case ASHIFTRT:
1543     case LSHIFTRT:
1544       *total = riscv_binary_cost (x, 1, CONSTANT_P (XEXP (x, 1)) ? 4 : 9);
1545       return false;
1546 
1547     case ABS:
1548       *total = COSTS_N_INSNS (float_mode_p ? 1 : 3);
1549       return false;
1550 
1551     case LO_SUM:
1552       *total = set_src_cost (XEXP (x, 0), mode, speed);
1553       return true;
1554 
1555     case LT:
1556     case LTU:
1557     case LE:
1558     case LEU:
1559     case GT:
1560     case GTU:
1561     case GE:
1562     case GEU:
1563     case EQ:
1564     case NE:
1565       /* Branch comparisons have VOIDmode, so use the first operand's
1566 	 mode instead.  */
1567       mode = GET_MODE (XEXP (x, 0));
1568       if (float_mode_p)
1569 	*total = tune_info->fp_add[mode == DFmode];
1570       else
1571 	*total = riscv_binary_cost (x, 1, 3);
1572       return false;
1573 
1574     case UNORDERED:
1575     case ORDERED:
1576       /* (FEQ(A, A) & FEQ(B, B)) compared against 0.  */
1577       mode = GET_MODE (XEXP (x, 0));
1578       *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (2);
1579       return false;
1580 
1581     case UNEQ:
1582     case LTGT:
1583       /* (FEQ(A, A) & FEQ(B, B)) compared against FEQ(A, B).  */
1584       mode = GET_MODE (XEXP (x, 0));
1585       *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (3);
1586       return false;
1587 
1588     case UNGE:
1589     case UNGT:
1590     case UNLE:
1591     case UNLT:
1592       /* FLT or FLE, but guarded by an FFLAGS read and write.  */
1593       mode = GET_MODE (XEXP (x, 0));
1594       *total = tune_info->fp_add[mode == DFmode] + COSTS_N_INSNS (4);
1595       return false;
1596 
1597     case MINUS:
1598     case PLUS:
1599       if (float_mode_p)
1600 	*total = tune_info->fp_add[mode == DFmode];
1601       else
1602 	*total = riscv_binary_cost (x, 1, 4);
1603       return false;
1604 
1605     case NEG:
1606       {
1607 	rtx op = XEXP (x, 0);
1608 	if (GET_CODE (op) == FMA && !HONOR_SIGNED_ZEROS (mode))
1609 	  {
1610 	    *total = (tune_info->fp_mul[mode == DFmode]
1611 		      + set_src_cost (XEXP (op, 0), mode, speed)
1612 		      + set_src_cost (XEXP (op, 1), mode, speed)
1613 		      + set_src_cost (XEXP (op, 2), mode, speed));
1614 	    return true;
1615 	  }
1616       }
1617 
1618       if (float_mode_p)
1619 	*total = tune_info->fp_add[mode == DFmode];
1620       else
1621 	*total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1);
1622       return false;
1623 
1624     case MULT:
1625       if (float_mode_p)
1626 	*total = tune_info->fp_mul[mode == DFmode];
1627       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
1628 	*total = 3 * tune_info->int_mul[0] + COSTS_N_INSNS (2);
1629       else if (!speed)
1630 	*total = COSTS_N_INSNS (1);
1631       else
1632 	*total = tune_info->int_mul[mode == DImode];
1633       return false;
1634 
1635     case DIV:
1636     case SQRT:
1637     case MOD:
1638       if (float_mode_p)
1639 	{
1640 	  *total = tune_info->fp_div[mode == DFmode];
1641 	  return false;
1642 	}
1643       /* Fall through.  */
1644 
1645     case UDIV:
1646     case UMOD:
1647       if (speed)
1648 	*total = tune_info->int_div[mode == DImode];
1649       else
1650 	*total = COSTS_N_INSNS (1);
1651       return false;
1652 
1653     case SIGN_EXTEND:
1654     case ZERO_EXTEND:
1655       *total = riscv_extend_cost (XEXP (x, 0), GET_CODE (x) == ZERO_EXTEND);
1656       return false;
1657 
1658     case FLOAT:
1659     case UNSIGNED_FLOAT:
1660     case FIX:
1661     case FLOAT_EXTEND:
1662     case FLOAT_TRUNCATE:
1663       *total = tune_info->fp_add[mode == DFmode];
1664       return false;
1665 
1666     case FMA:
1667       *total = (tune_info->fp_mul[mode == DFmode]
1668 		+ set_src_cost (XEXP (x, 0), mode, speed)
1669 		+ set_src_cost (XEXP (x, 1), mode, speed)
1670 		+ set_src_cost (XEXP (x, 2), mode, speed));
1671       return true;
1672 
1673     case UNSPEC:
1674       if (XINT (x, 1) == UNSPEC_AUIPC)
1675 	{
1676 	  /* Make AUIPC cheap to avoid spilling its result to the stack.  */
1677 	  *total = 1;
1678 	  return true;
1679 	}
1680       return false;
1681 
1682     default:
1683       return false;
1684     }
1685 }
1686 
1687 /* Implement TARGET_ADDRESS_COST.  */
1688 
1689 static int
1690 riscv_address_cost (rtx addr, enum machine_mode mode,
1691 		    addr_space_t as ATTRIBUTE_UNUSED,
1692 		    bool speed ATTRIBUTE_UNUSED)
1693 {
1694   return riscv_address_insns (addr, mode, false);
1695 }
1696 
1697 /* Return one word of double-word value OP.  HIGH_P is true to select the
1698    high part or false to select the low part. */
1699 
1700 rtx
1701 riscv_subword (rtx op, bool high_p)
1702 {
1703   unsigned int byte = high_p ? UNITS_PER_WORD : 0;
1704   enum machine_mode mode = GET_MODE (op);
1705 
1706   if (mode == VOIDmode)
1707     mode = TARGET_64BIT ? TImode : DImode;
1708 
1709   if (MEM_P (op))
1710     return adjust_address (op, word_mode, byte);
1711 
1712   if (REG_P (op))
1713     gcc_assert (!FP_REG_RTX_P (op));
1714 
1715   return simplify_gen_subreg (word_mode, op, mode, byte);
1716 }
1717 
1718 /* Return true if a 64-bit move from SRC to DEST should be split into two.  */
1719 
1720 bool
1721 riscv_split_64bit_move_p (rtx dest, rtx src)
1722 {
1723   if (TARGET_64BIT)
1724     return false;
1725 
1726   /* Allow FPR <-> FPR and FPR <-> MEM moves, and permit the special case
1727      of zeroing an FPR with FCVT.D.W.  */
1728   if (TARGET_DOUBLE_FLOAT
1729       && ((FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
1730 	  || (FP_REG_RTX_P (dest) && MEM_P (src))
1731 	  || (FP_REG_RTX_P (src) && MEM_P (dest))
1732 	  || (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))))
1733     return false;
1734 
1735   return true;
1736 }
1737 
1738 /* Split a doubleword move from SRC to DEST.  On 32-bit targets,
1739    this function handles 64-bit moves for which riscv_split_64bit_move_p
1740    holds.  For 64-bit targets, this function handles 128-bit moves.  */
1741 
1742 void
1743 riscv_split_doubleword_move (rtx dest, rtx src)
1744 {
1745   rtx low_dest;
1746 
1747    /* The operation can be split into two normal moves.  Decide in
1748       which order to do them.  */
1749    low_dest = riscv_subword (dest, false);
1750    if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src))
1751      {
1752        riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
1753        riscv_emit_move (low_dest, riscv_subword (src, false));
1754      }
1755    else
1756      {
1757        riscv_emit_move (low_dest, riscv_subword (src, false));
1758        riscv_emit_move (riscv_subword (dest, true), riscv_subword (src, true));
1759      }
1760 }
1761 
1762 /* Return the appropriate instructions to move SRC into DEST.  Assume
1763    that SRC is operand 1 and DEST is operand 0.  */
1764 
1765 const char *
1766 riscv_output_move (rtx dest, rtx src)
1767 {
1768   enum rtx_code dest_code, src_code;
1769   enum machine_mode mode;
1770   bool dbl_p;
1771 
1772   dest_code = GET_CODE (dest);
1773   src_code = GET_CODE (src);
1774   mode = GET_MODE (dest);
1775   dbl_p = (GET_MODE_SIZE (mode) == 8);
1776 
1777   if (dbl_p && riscv_split_64bit_move_p (dest, src))
1778     return "#";
1779 
1780   if (dest_code == REG && GP_REG_P (REGNO (dest)))
1781     {
1782       if (src_code == REG && FP_REG_P (REGNO (src)))
1783 	return dbl_p ? "fmv.x.d\t%0,%1" : "fmv.x.s\t%0,%1";
1784 
1785       if (src_code == MEM)
1786 	switch (GET_MODE_SIZE (mode))
1787 	  {
1788 	  case 1: return "lbu\t%0,%1";
1789 	  case 2: return "lhu\t%0,%1";
1790 	  case 4: return "lw\t%0,%1";
1791 	  case 8: return "ld\t%0,%1";
1792 	  }
1793 
1794       if (src_code == CONST_INT)
1795 	return "li\t%0,%1";
1796 
1797       if (src_code == HIGH)
1798 	return "lui\t%0,%h1";
1799 
1800       if (symbolic_operand (src, VOIDmode))
1801 	switch (riscv_classify_symbolic_expression (src))
1802 	  {
1803 	  case SYMBOL_GOT_DISP: return "la\t%0,%1";
1804 	  case SYMBOL_ABSOLUTE: return "lla\t%0,%1";
1805 	  case SYMBOL_PCREL: return "lla\t%0,%1";
1806 	  default: gcc_unreachable ();
1807 	  }
1808     }
1809   if ((src_code == REG && GP_REG_P (REGNO (src)))
1810       || (src == CONST0_RTX (mode)))
1811     {
1812       if (dest_code == REG)
1813 	{
1814 	  if (GP_REG_P (REGNO (dest)))
1815 	    return "mv\t%0,%z1";
1816 
1817 	  if (FP_REG_P (REGNO (dest)))
1818 	    {
1819 	      if (!dbl_p)
1820 		return "fmv.s.x\t%0,%z1";
1821 	      if (TARGET_64BIT)
1822 		return "fmv.d.x\t%0,%z1";
1823 	      /* in RV32, we can emulate fmv.d.x %0, x0 using fcvt.d.w */
1824 	      gcc_assert (src == CONST0_RTX (mode));
1825 	      return "fcvt.d.w\t%0,x0";
1826 	    }
1827 	}
1828       if (dest_code == MEM)
1829 	switch (GET_MODE_SIZE (mode))
1830 	  {
1831 	  case 1: return "sb\t%z1,%0";
1832 	  case 2: return "sh\t%z1,%0";
1833 	  case 4: return "sw\t%z1,%0";
1834 	  case 8: return "sd\t%z1,%0";
1835 	  }
1836     }
1837   if (src_code == REG && FP_REG_P (REGNO (src)))
1838     {
1839       if (dest_code == REG && FP_REG_P (REGNO (dest)))
1840 	return dbl_p ? "fmv.d\t%0,%1" : "fmv.s\t%0,%1";
1841 
1842       if (dest_code == MEM)
1843 	return dbl_p ? "fsd\t%1,%0" : "fsw\t%1,%0";
1844     }
1845   if (dest_code == REG && FP_REG_P (REGNO (dest)))
1846     {
1847       if (src_code == MEM)
1848 	return dbl_p ? "fld\t%0,%1" : "flw\t%0,%1";
1849     }
1850   gcc_unreachable ();
1851 }
1852 
1853 /* Return true if CMP1 is a suitable second operand for integer ordering
1854    test CODE.  See also the *sCC patterns in riscv.md.  */
1855 
1856 static bool
1857 riscv_int_order_operand_ok_p (enum rtx_code code, rtx cmp1)
1858 {
1859   switch (code)
1860     {
1861     case GT:
1862     case GTU:
1863       return reg_or_0_operand (cmp1, VOIDmode);
1864 
1865     case GE:
1866     case GEU:
1867       return cmp1 == const1_rtx;
1868 
1869     case LT:
1870     case LTU:
1871       return arith_operand (cmp1, VOIDmode);
1872 
1873     case LE:
1874       return sle_operand (cmp1, VOIDmode);
1875 
1876     case LEU:
1877       return sleu_operand (cmp1, VOIDmode);
1878 
1879     default:
1880       gcc_unreachable ();
1881     }
1882 }
1883 
1884 /* Return true if *CMP1 (of mode MODE) is a valid second operand for
1885    integer ordering test *CODE, or if an equivalent combination can
1886    be formed by adjusting *CODE and *CMP1.  When returning true, update
1887    *CODE and *CMP1 with the chosen code and operand, otherwise leave
1888    them alone.  */
1889 
1890 static bool
1891 riscv_canonicalize_int_order_test (enum rtx_code *code, rtx *cmp1,
1892 				  enum machine_mode mode)
1893 {
1894   HOST_WIDE_INT plus_one;
1895 
1896   if (riscv_int_order_operand_ok_p (*code, *cmp1))
1897     return true;
1898 
1899   if (CONST_INT_P (*cmp1))
1900     switch (*code)
1901       {
1902       case LE:
1903 	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
1904 	if (INTVAL (*cmp1) < plus_one)
1905 	  {
1906 	    *code = LT;
1907 	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
1908 	    return true;
1909 	  }
1910 	break;
1911 
1912       case LEU:
1913 	plus_one = trunc_int_for_mode (UINTVAL (*cmp1) + 1, mode);
1914 	if (plus_one != 0)
1915 	  {
1916 	    *code = LTU;
1917 	    *cmp1 = force_reg (mode, GEN_INT (plus_one));
1918 	    return true;
1919 	  }
1920 	break;
1921 
1922       default:
1923 	break;
1924       }
1925   return false;
1926 }
1927 
1928 /* Compare CMP0 and CMP1 using ordering test CODE and store the result
1929    in TARGET.  CMP0 and TARGET are register_operands.  If INVERT_PTR
1930    is nonnull, it's OK to set TARGET to the inverse of the result and
1931    flip *INVERT_PTR instead.  */
1932 
1933 static void
1934 riscv_emit_int_order_test (enum rtx_code code, bool *invert_ptr,
1935 			  rtx target, rtx cmp0, rtx cmp1)
1936 {
1937   enum machine_mode mode;
1938 
1939   /* First see if there is a RISCV instruction that can do this operation.
1940      If not, try doing the same for the inverse operation.  If that also
1941      fails, force CMP1 into a register and try again.  */
1942   mode = GET_MODE (cmp0);
1943   if (riscv_canonicalize_int_order_test (&code, &cmp1, mode))
1944     riscv_emit_binary (code, target, cmp0, cmp1);
1945   else
1946     {
1947       enum rtx_code inv_code = reverse_condition (code);
1948       if (!riscv_canonicalize_int_order_test (&inv_code, &cmp1, mode))
1949 	{
1950 	  cmp1 = force_reg (mode, cmp1);
1951 	  riscv_emit_int_order_test (code, invert_ptr, target, cmp0, cmp1);
1952 	}
1953       else if (invert_ptr == 0)
1954 	{
1955 	  rtx inv_target = riscv_force_binary (GET_MODE (target),
1956 					       inv_code, cmp0, cmp1);
1957 	  riscv_emit_binary (XOR, target, inv_target, const1_rtx);
1958 	}
1959       else
1960 	{
1961 	  *invert_ptr = !*invert_ptr;
1962 	  riscv_emit_binary (inv_code, target, cmp0, cmp1);
1963 	}
1964     }
1965 }
1966 
1967 /* Return a register that is zero iff CMP0 and CMP1 are equal.
1968    The register will have the same mode as CMP0.  */
1969 
1970 static rtx
1971 riscv_zero_if_equal (rtx cmp0, rtx cmp1)
1972 {
1973   if (cmp1 == const0_rtx)
1974     return cmp0;
1975 
1976   return expand_binop (GET_MODE (cmp0), sub_optab,
1977 		       cmp0, cmp1, 0, 0, OPTAB_DIRECT);
1978 }
1979 
1980 /* Sign- or zero-extend OP0 and OP1 for integer comparisons.  */
1981 
1982 static void
1983 riscv_extend_comparands (rtx_code code, rtx *op0, rtx *op1)
1984 {
1985   /* Comparisons consider all XLEN bits, so extend sub-XLEN values.  */
1986   if (GET_MODE_SIZE (word_mode) > GET_MODE_SIZE (GET_MODE (*op0)))
1987     {
1988       /* It is more profitable to zero-extend QImode values.  */
1989       if (unsigned_condition (code) == code && GET_MODE (*op0) == QImode)
1990 	{
1991 	  *op0 = gen_rtx_ZERO_EXTEND (word_mode, *op0);
1992 	  if (CONST_INT_P (*op1))
1993 	    *op1 = GEN_INT ((uint8_t) INTVAL (*op1));
1994 	  else
1995 	    *op1 = gen_rtx_ZERO_EXTEND (word_mode, *op1);
1996 	}
1997       else
1998 	{
1999 	  *op0 = gen_rtx_SIGN_EXTEND (word_mode, *op0);
2000 	  if (*op1 != const0_rtx)
2001 	    *op1 = gen_rtx_SIGN_EXTEND (word_mode, *op1);
2002 	}
2003     }
2004 }
2005 
2006 /* Convert a comparison into something that can be used in a branch.  On
2007    entry, *OP0 and *OP1 are the values being compared and *CODE is the code
2008    used to compare them.  Update them to describe the final comparison.  */
2009 
2010 static void
2011 riscv_emit_int_compare (enum rtx_code *code, rtx *op0, rtx *op1)
2012 {
2013   if (splittable_const_int_operand (*op1, VOIDmode))
2014     {
2015       HOST_WIDE_INT rhs = INTVAL (*op1);
2016 
2017       if (*code == EQ || *code == NE)
2018 	{
2019 	  /* Convert e.g. OP0 == 2048 into OP0 - 2048 == 0.  */
2020 	  if (SMALL_OPERAND (-rhs))
2021 	    {
2022 	      *op0 = riscv_force_binary (GET_MODE (*op0), PLUS, *op0,
2023 					 GEN_INT (-rhs));
2024 	      *op1 = const0_rtx;
2025 	    }
2026 	}
2027       else
2028 	{
2029 	  static const enum rtx_code mag_comparisons[][2] = {
2030 	    {LEU, LTU}, {GTU, GEU}, {LE, LT}, {GT, GE}
2031 	  };
2032 
2033 	  /* Convert e.g. (OP0 <= 0xFFF) into (OP0 < 0x1000).  */
2034 	  for (size_t i = 0; i < ARRAY_SIZE (mag_comparisons); i++)
2035 	    {
2036 	      HOST_WIDE_INT new_rhs;
2037 	      bool increment = *code == mag_comparisons[i][0];
2038 	      bool decrement = *code == mag_comparisons[i][1];
2039 	      if (!increment && !decrement)
2040 		continue;
2041 
2042 	      new_rhs = rhs + (increment ? 1 : -1);
2043 	      if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
2044 		  && (rhs < 0) == (new_rhs < 0))
2045 		{
2046 		  *op1 = GEN_INT (new_rhs);
2047 		  *code = mag_comparisons[i][increment];
2048 		}
2049 	      break;
2050 	    }
2051 	}
2052     }
2053 
2054   riscv_extend_comparands (*code, op0, op1);
2055 
2056   *op0 = force_reg (word_mode, *op0);
2057   if (*op1 != const0_rtx)
2058     *op1 = force_reg (word_mode, *op1);
2059 }
2060 
2061 /* Like riscv_emit_int_compare, but for floating-point comparisons.  */
2062 
2063 static void
2064 riscv_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
2065 {
2066   rtx tmp0, tmp1, cmp_op0 = *op0, cmp_op1 = *op1;
2067   enum rtx_code fp_code = *code;
2068   *code = NE;
2069 
2070   switch (fp_code)
2071     {
2072     case UNORDERED:
2073       *code = EQ;
2074       /* Fall through.  */
2075 
2076     case ORDERED:
2077       /* a == a && b == b */
2078       tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
2079       tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
2080       *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
2081       *op1 = const0_rtx;
2082       break;
2083 
2084     case UNEQ:
2085     case LTGT:
2086       /* ordered(a, b) > (a == b) */
2087       *code = fp_code == LTGT ? GTU : EQ;
2088       tmp0 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op0);
2089       tmp1 = riscv_force_binary (word_mode, EQ, cmp_op1, cmp_op1);
2090       *op0 = riscv_force_binary (word_mode, AND, tmp0, tmp1);
2091       *op1 = riscv_force_binary (word_mode, EQ, cmp_op0, cmp_op1);
2092       break;
2093 
2094 #define UNORDERED_COMPARISON(CODE, CMP)					\
2095     case CODE:								\
2096       *code = EQ;							\
2097       *op0 = gen_reg_rtx (word_mode);					\
2098       if (GET_MODE (cmp_op0) == SFmode && TARGET_64BIT)			\
2099 	emit_insn (gen_f##CMP##_quietsfdi4 (*op0, cmp_op0, cmp_op1));	\
2100       else if (GET_MODE (cmp_op0) == SFmode)				\
2101 	emit_insn (gen_f##CMP##_quietsfsi4 (*op0, cmp_op0, cmp_op1));	\
2102       else if (GET_MODE (cmp_op0) == DFmode && TARGET_64BIT)		\
2103 	emit_insn (gen_f##CMP##_quietdfdi4 (*op0, cmp_op0, cmp_op1));	\
2104       else if (GET_MODE (cmp_op0) == DFmode)				\
2105 	emit_insn (gen_f##CMP##_quietdfsi4 (*op0, cmp_op0, cmp_op1));	\
2106       else								\
2107 	gcc_unreachable ();						\
2108       *op1 = const0_rtx;						\
2109       break;
2110 
2111     case UNLT:
2112       std::swap (cmp_op0, cmp_op1);
2113       gcc_fallthrough ();
2114 
2115     UNORDERED_COMPARISON(UNGT, le)
2116 
2117     case UNLE:
2118       std::swap (cmp_op0, cmp_op1);
2119       gcc_fallthrough ();
2120 
2121     UNORDERED_COMPARISON(UNGE, lt)
2122 #undef UNORDERED_COMPARISON
2123 
2124     case NE:
2125       fp_code = EQ;
2126       *code = EQ;
2127       /* Fall through.  */
2128 
2129     case EQ:
2130     case LE:
2131     case LT:
2132     case GE:
2133     case GT:
2134       /* We have instructions for these cases.  */
2135       *op0 = riscv_force_binary (word_mode, fp_code, cmp_op0, cmp_op1);
2136       *op1 = const0_rtx;
2137       break;
2138 
2139     default:
2140       gcc_unreachable ();
2141     }
2142 }
2143 
2144 /* CODE-compare OP0 and OP1.  Store the result in TARGET.  */
2145 
2146 void
2147 riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1)
2148 {
2149   riscv_extend_comparands (code, &op0, &op1);
2150   op0 = force_reg (word_mode, op0);
2151 
2152   if (code == EQ || code == NE)
2153     {
2154       rtx zie = riscv_zero_if_equal (op0, op1);
2155       riscv_emit_binary (code, target, zie, const0_rtx);
2156     }
2157   else
2158     riscv_emit_int_order_test (code, 0, target, op0, op1);
2159 }
2160 
2161 /* Like riscv_expand_int_scc, but for floating-point comparisons.  */
2162 
2163 void
2164 riscv_expand_float_scc (rtx target, enum rtx_code code, rtx op0, rtx op1)
2165 {
2166   riscv_emit_float_compare (&code, &op0, &op1);
2167 
2168   rtx cmp = riscv_force_binary (word_mode, code, op0, op1);
2169   riscv_emit_set (target, lowpart_subreg (SImode, cmp, word_mode));
2170 }
2171 
2172 /* Jump to LABEL if (CODE OP0 OP1) holds.  */
2173 
2174 void
2175 riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
2176 {
2177   if (FLOAT_MODE_P (GET_MODE (op1)))
2178     riscv_emit_float_compare (&code, &op0, &op1);
2179   else
2180     riscv_emit_int_compare (&code, &op0, &op1);
2181 
2182   rtx condition = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
2183   emit_jump_insn (gen_condjump (condition, label));
2184 }
2185 
2186 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  Every parameter gets at
2187    least PARM_BOUNDARY bits of alignment, but will be given anything up
2188    to STACK_BOUNDARY bits if the type requires it.  */
2189 
2190 static unsigned int
2191 riscv_function_arg_boundary (enum machine_mode mode, const_tree type)
2192 {
2193   unsigned int alignment;
2194 
2195   /* Use natural alignment if the type is not aggregate data.  */
2196   if (type && !AGGREGATE_TYPE_P (type))
2197     alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type));
2198   else
2199     alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
2200 
2201   return MIN (STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment));
2202 }
2203 
2204 /* If MODE represents an argument that can be passed or returned in
2205    floating-point registers, return the number of registers, else 0.  */
2206 
2207 static unsigned
2208 riscv_pass_mode_in_fpr_p (enum machine_mode mode)
2209 {
2210   if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG)
2211     {
2212       if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2213 	return 1;
2214 
2215       if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
2216 	return 2;
2217     }
2218 
2219   return 0;
2220 }
2221 
2222 typedef struct {
2223   const_tree type;
2224   HOST_WIDE_INT offset;
2225 } riscv_aggregate_field;
2226 
2227 /* Identify subfields of aggregates that are candidates for passing in
2228    floating-point registers.  */
2229 
2230 static int
2231 riscv_flatten_aggregate_field (const_tree type,
2232 			       riscv_aggregate_field fields[2],
2233 			       int n, HOST_WIDE_INT offset)
2234 {
2235   switch (TREE_CODE (type))
2236     {
2237     case RECORD_TYPE:
2238      /* Can't handle incomplete types nor sizes that are not fixed.  */
2239      if (!COMPLETE_TYPE_P (type)
2240 	 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
2241 	 || !tree_fits_uhwi_p (TYPE_SIZE (type)))
2242        return -1;
2243 
2244       for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f))
2245 	if (TREE_CODE (f) == FIELD_DECL)
2246 	  {
2247 	    if (!TYPE_P (TREE_TYPE (f)))
2248 	      return -1;
2249 
2250 	    HOST_WIDE_INT pos = offset + int_byte_position (f);
2251 	    n = riscv_flatten_aggregate_field (TREE_TYPE (f), fields, n, pos);
2252 	    if (n < 0)
2253 	      return -1;
2254 	  }
2255       return n;
2256 
2257     case ARRAY_TYPE:
2258       {
2259 	HOST_WIDE_INT n_elts;
2260 	riscv_aggregate_field subfields[2];
2261 	tree index = TYPE_DOMAIN (type);
2262 	tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type));
2263 	int n_subfields = riscv_flatten_aggregate_field (TREE_TYPE (type),
2264 							 subfields, 0, offset);
2265 
2266 	/* Can't handle incomplete types nor sizes that are not fixed.  */
2267 	if (n_subfields <= 0
2268 	    || !COMPLETE_TYPE_P (type)
2269 	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
2270 	    || !index
2271 	    || !TYPE_MAX_VALUE (index)
2272 	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
2273 	    || !TYPE_MIN_VALUE (index)
2274 	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
2275 	    || !tree_fits_uhwi_p (elt_size))
2276 	  return -1;
2277 
2278 	n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
2279 		   - tree_to_uhwi (TYPE_MIN_VALUE (index));
2280 	gcc_assert (n_elts >= 0);
2281 
2282 	for (HOST_WIDE_INT i = 0; i < n_elts; i++)
2283 	  for (int j = 0; j < n_subfields; j++)
2284 	    {
2285 	      if (n >= 2)
2286 		return -1;
2287 
2288 	      fields[n] = subfields[j];
2289 	      fields[n++].offset += i * tree_to_uhwi (elt_size);
2290 	    }
2291 
2292 	return n;
2293       }
2294 
2295     case COMPLEX_TYPE:
2296       {
2297 	/* Complex type need consume 2 field, so n must be 0.  */
2298 	if (n != 0)
2299 	  return -1;
2300 
2301 	HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type)));
2302 
2303 	if (elt_size <= UNITS_PER_FP_ARG)
2304 	  {
2305 	    fields[0].type = TREE_TYPE (type);
2306 	    fields[0].offset = offset;
2307 	    fields[1].type = TREE_TYPE (type);
2308 	    fields[1].offset = offset + elt_size;
2309 
2310 	    return 2;
2311 	  }
2312 
2313 	return -1;
2314       }
2315 
2316     default:
2317       if (n < 2
2318 	  && ((SCALAR_FLOAT_TYPE_P (type)
2319 	       && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG)
2320 	      || (INTEGRAL_TYPE_P (type)
2321 		  && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD)))
2322 	{
2323 	  fields[n].type = type;
2324 	  fields[n].offset = offset;
2325 	  return n + 1;
2326 	}
2327       else
2328 	return -1;
2329     }
2330 }
2331 
2332 /* Identify candidate aggregates for passing in floating-point registers.
2333    Candidates have at most two fields after flattening.  */
2334 
2335 static int
2336 riscv_flatten_aggregate_argument (const_tree type,
2337 				  riscv_aggregate_field fields[2])
2338 {
2339   if (!type || TREE_CODE (type) != RECORD_TYPE)
2340     return -1;
2341 
2342   return riscv_flatten_aggregate_field (type, fields, 0, 0);
2343 }
2344 
2345 /* See whether TYPE is a record whose fields should be returned in one or
2346    two floating-point registers.  If so, populate FIELDS accordingly.  */
2347 
2348 static unsigned
2349 riscv_pass_aggregate_in_fpr_pair_p (const_tree type,
2350 				    riscv_aggregate_field fields[2])
2351 {
2352   int n = riscv_flatten_aggregate_argument (type, fields);
2353 
2354   for (int i = 0; i < n; i++)
2355     if (!SCALAR_FLOAT_TYPE_P (fields[i].type))
2356       return 0;
2357 
2358   return n > 0 ? n : 0;
2359 }
2360 
2361 /* See whether TYPE is a record whose fields should be returned in one or
2362    floating-point register and one integer register.  If so, populate
2363    FIELDS accordingly.  */
2364 
2365 static bool
2366 riscv_pass_aggregate_in_fpr_and_gpr_p (const_tree type,
2367 				       riscv_aggregate_field fields[2])
2368 {
2369   unsigned num_int = 0, num_float = 0;
2370   int n = riscv_flatten_aggregate_argument (type, fields);
2371 
2372   for (int i = 0; i < n; i++)
2373     {
2374       num_float += SCALAR_FLOAT_TYPE_P (fields[i].type);
2375       num_int += INTEGRAL_TYPE_P (fields[i].type);
2376     }
2377 
2378   return num_int == 1 && num_float == 1;
2379 }
2380 
2381 /* Return the representation of an argument passed or returned in an FPR
2382    when the value has mode VALUE_MODE and the type has TYPE_MODE.  The
2383    two modes may be different for structures like:
2384 
2385        struct __attribute__((packed)) foo { float f; }
2386 
2387   where the SFmode value "f" is passed in REGNO but the struct itself
2388   has mode BLKmode.  */
2389 
2390 static rtx
2391 riscv_pass_fpr_single (enum machine_mode type_mode, unsigned regno,
2392 		       enum machine_mode value_mode)
2393 {
2394   rtx x = gen_rtx_REG (value_mode, regno);
2395 
2396   if (type_mode != value_mode)
2397     {
2398       x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
2399       x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x));
2400     }
2401   return x;
2402 }
2403 
2404 /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1.
2405    MODE is the mode of the composite.  MODE1 and OFFSET1 are the mode and
2406    byte offset for the first value, likewise MODE2 and OFFSET2 for the
2407    second value.  */
2408 
2409 static rtx
2410 riscv_pass_fpr_pair (enum machine_mode mode, unsigned regno1,
2411 		     enum machine_mode mode1, HOST_WIDE_INT offset1,
2412 		     unsigned regno2, enum machine_mode mode2,
2413 		     HOST_WIDE_INT offset2)
2414 {
2415   return gen_rtx_PARALLEL
2416     (mode,
2417      gen_rtvec (2,
2418 		gen_rtx_EXPR_LIST (VOIDmode,
2419 				   gen_rtx_REG (mode1, regno1),
2420 				   GEN_INT (offset1)),
2421 		gen_rtx_EXPR_LIST (VOIDmode,
2422 				   gen_rtx_REG (mode2, regno2),
2423 				   GEN_INT (offset2))));
2424 }
2425 
2426 /* Fill INFO with information about a single argument, and return an
2427    RTL pattern to pass or return the argument.  CUM is the cumulative
2428    state for earlier arguments.  MODE is the mode of this argument and
2429    TYPE is its type (if known).  NAMED is true if this is a named
2430    (fixed) argument rather than a variable one.  RETURN_P is true if
2431    returning the argument, or false if passing the argument.  */
2432 
2433 static rtx
2434 riscv_get_arg_info (struct riscv_arg_info *info, const CUMULATIVE_ARGS *cum,
2435 		    enum machine_mode mode, const_tree type, bool named,
2436 		    bool return_p)
2437 {
2438   unsigned num_bytes, num_words;
2439   unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST;
2440   unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST;
2441   unsigned alignment = riscv_function_arg_boundary (mode, type);
2442 
2443   memset (info, 0, sizeof (*info));
2444   info->gpr_offset = cum->num_gprs;
2445   info->fpr_offset = cum->num_fprs;
2446 
2447   if (named)
2448     {
2449       riscv_aggregate_field fields[2];
2450       unsigned fregno = fpr_base + info->fpr_offset;
2451       unsigned gregno = gpr_base + info->gpr_offset;
2452 
2453       /* Pass one- or two-element floating-point aggregates in FPRs.  */
2454       if ((info->num_fprs = riscv_pass_aggregate_in_fpr_pair_p (type, fields))
2455 	  && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
2456 	switch (info->num_fprs)
2457 	  {
2458 	  case 1:
2459 	    return riscv_pass_fpr_single (mode, fregno,
2460 					  TYPE_MODE (fields[0].type));
2461 
2462 	  case 2:
2463 	    return riscv_pass_fpr_pair (mode, fregno,
2464 					TYPE_MODE (fields[0].type),
2465 					fields[0].offset,
2466 					fregno + 1,
2467 					TYPE_MODE (fields[1].type),
2468 					fields[1].offset);
2469 
2470 	  default:
2471 	    gcc_unreachable ();
2472 	  }
2473 
2474       /* Pass real and complex floating-point numbers in FPRs.  */
2475       if ((info->num_fprs = riscv_pass_mode_in_fpr_p (mode))
2476 	  && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS)
2477 	switch (GET_MODE_CLASS (mode))
2478 	  {
2479 	  case MODE_FLOAT:
2480 	    return gen_rtx_REG (mode, fregno);
2481 
2482 	  case MODE_COMPLEX_FLOAT:
2483 	    return riscv_pass_fpr_pair (mode, fregno, GET_MODE_INNER (mode), 0,
2484 					fregno + 1, GET_MODE_INNER (mode),
2485 					GET_MODE_UNIT_SIZE (mode));
2486 
2487 	  default:
2488 	    gcc_unreachable ();
2489 	  }
2490 
2491       /* Pass structs with one float and one integer in an FPR and a GPR.  */
2492       if (riscv_pass_aggregate_in_fpr_and_gpr_p (type, fields)
2493 	  && info->gpr_offset < MAX_ARGS_IN_REGISTERS
2494 	  && info->fpr_offset < MAX_ARGS_IN_REGISTERS)
2495 	{
2496 	  info->num_gprs = 1;
2497 	  info->num_fprs = 1;
2498 
2499 	  if (!SCALAR_FLOAT_TYPE_P (fields[0].type))
2500 	    std::swap (fregno, gregno);
2501 
2502 	  return riscv_pass_fpr_pair (mode, fregno, TYPE_MODE (fields[0].type),
2503 				      fields[0].offset,
2504 				      gregno, TYPE_MODE (fields[1].type),
2505 				      fields[1].offset);
2506 	}
2507     }
2508 
2509   /* Work out the size of the argument.  */
2510   num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
2511   num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2512 
2513   /* Doubleword-aligned varargs start on an even register boundary.  */
2514   if (!named && num_bytes != 0 && alignment > BITS_PER_WORD)
2515     info->gpr_offset += info->gpr_offset & 1;
2516 
2517   /* Partition the argument between registers and stack.  */
2518   info->num_fprs = 0;
2519   info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset);
2520   info->stack_p = (num_words - info->num_gprs) != 0;
2521 
2522   if (info->num_gprs || return_p)
2523     return gen_rtx_REG (mode, gpr_base + info->gpr_offset);
2524 
2525   return NULL_RTX;
2526 }
2527 
2528 /* Implement TARGET_FUNCTION_ARG.  */
2529 
2530 static rtx
2531 riscv_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
2532 		    const_tree type, bool named)
2533 {
2534   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2535   struct riscv_arg_info info;
2536 
2537   if (mode == VOIDmode)
2538     return NULL;
2539 
2540   return riscv_get_arg_info (&info, cum, mode, type, named, false);
2541 }
2542 
2543 /* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
2544 
2545 static void
2546 riscv_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
2547 			    const_tree type, bool named)
2548 {
2549   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2550   struct riscv_arg_info info;
2551 
2552   riscv_get_arg_info (&info, cum, mode, type, named, false);
2553 
2554   /* Advance the register count.  This has the effect of setting
2555      num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned
2556      argument required us to skip the final GPR and pass the whole
2557      argument on the stack.  */
2558   cum->num_fprs = info.fpr_offset + info.num_fprs;
2559   cum->num_gprs = info.gpr_offset + info.num_gprs;
2560 }
2561 
2562 /* Implement TARGET_ARG_PARTIAL_BYTES.  */
2563 
2564 static int
2565 riscv_arg_partial_bytes (cumulative_args_t cum,
2566 			 enum machine_mode mode, tree type, bool named)
2567 {
2568   struct riscv_arg_info arg;
2569 
2570   riscv_get_arg_info (&arg, get_cumulative_args (cum), mode, type, named, false);
2571   return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0;
2572 }
2573 
2574 /* Implement FUNCTION_VALUE and LIBCALL_VALUE.  For normal calls,
2575    VALTYPE is the return type and MODE is VOIDmode.  For libcalls,
2576    VALTYPE is null and MODE is the mode of the return value.  */
2577 
2578 rtx
2579 riscv_function_value (const_tree type, const_tree func, enum machine_mode mode)
2580 {
2581   struct riscv_arg_info info;
2582   CUMULATIVE_ARGS args;
2583 
2584   if (type)
2585     {
2586       int unsigned_p = TYPE_UNSIGNED (type);
2587 
2588       mode = TYPE_MODE (type);
2589 
2590       /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes,
2591 	 return values, promote the mode here too.  */
2592       mode = promote_function_mode (type, mode, &unsigned_p, func, 1);
2593     }
2594 
2595   memset (&args, 0, sizeof args);
2596   return riscv_get_arg_info (&info, &args, mode, type, true, true);
2597 }
2598 
2599 /* Implement TARGET_PASS_BY_REFERENCE. */
2600 
2601 static bool
2602 riscv_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
2603 			 const_tree type, bool named)
2604 {
2605   HOST_WIDE_INT size = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
2606   struct riscv_arg_info info;
2607   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
2608 
2609   /* ??? std_gimplify_va_arg_expr passes NULL for cum.  Fortunately, we
2610      never pass variadic arguments in floating-point registers, so we can
2611      avoid the call to riscv_get_arg_info in this case.  */
2612   if (cum != NULL)
2613     {
2614       /* Don't pass by reference if we can use a floating-point register.  */
2615       riscv_get_arg_info (&info, cum, mode, type, named, false);
2616       if (info.num_fprs)
2617 	return false;
2618     }
2619 
2620   /* Pass by reference if the data do not fit in two integer registers.  */
2621   return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD);
2622 }
2623 
2624 /* Implement TARGET_RETURN_IN_MEMORY.  */
2625 
2626 static bool
2627 riscv_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
2628 {
2629   CUMULATIVE_ARGS args;
2630   cumulative_args_t cum = pack_cumulative_args (&args);
2631 
2632   /* The rules for returning in memory are the same as for passing the
2633      first named argument by reference.  */
2634   memset (&args, 0, sizeof args);
2635   return riscv_pass_by_reference (cum, TYPE_MODE (type), type, true);
2636 }
2637 
2638 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
2639 
2640 static void
2641 riscv_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
2642 			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
2643 			     int no_rtl)
2644 {
2645   CUMULATIVE_ARGS local_cum;
2646   int gp_saved;
2647 
2648   /* The caller has advanced CUM up to, but not beyond, the last named
2649      argument.  Advance a local copy of CUM past the last "real" named
2650      argument, to find out how many registers are left over.  */
2651   local_cum = *get_cumulative_args (cum);
2652   riscv_function_arg_advance (pack_cumulative_args (&local_cum), mode, type, 1);
2653 
2654   /* Found out how many registers we need to save.  */
2655   gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs;
2656 
2657   if (!no_rtl && gp_saved > 0)
2658     {
2659       rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
2660 			       REG_PARM_STACK_SPACE (cfun->decl)
2661 			       - gp_saved * UNITS_PER_WORD);
2662       rtx mem = gen_frame_mem (BLKmode, ptr);
2663       set_mem_alias_set (mem, get_varargs_alias_set ());
2664 
2665       move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST,
2666 			   mem, gp_saved);
2667     }
2668   if (REG_PARM_STACK_SPACE (cfun->decl) == 0)
2669     cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD;
2670 }
2671 
2672 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
2673 
2674 static void
2675 riscv_va_start (tree valist, rtx nextarg)
2676 {
2677   nextarg = plus_constant (Pmode, nextarg, -cfun->machine->varargs_size);
2678   std_expand_builtin_va_start (valist, nextarg);
2679 }
2680 
2681 /* Make ADDR suitable for use as a call or sibcall target.  */
2682 
2683 rtx
2684 riscv_legitimize_call_address (rtx addr)
2685 {
2686   if (!call_insn_operand (addr, VOIDmode))
2687     {
2688       rtx reg = RISCV_PROLOGUE_TEMP (Pmode);
2689       riscv_emit_move (reg, addr);
2690       return reg;
2691     }
2692   return addr;
2693 }
2694 
2695 /* Print symbolic operand OP, which is part of a HIGH or LO_SUM
2696    in context CONTEXT.  HI_RELOC indicates a high-part reloc.  */
2697 
2698 static void
2699 riscv_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
2700 {
2701   const char *reloc;
2702 
2703   switch (riscv_classify_symbolic_expression (op))
2704     {
2705       case SYMBOL_ABSOLUTE:
2706 	reloc = hi_reloc ? "%hi" : "%lo";
2707 	break;
2708 
2709       case SYMBOL_PCREL:
2710 	reloc = hi_reloc ? "%pcrel_hi" : "%pcrel_lo";
2711 	break;
2712 
2713       case SYMBOL_TLS_LE:
2714 	reloc = hi_reloc ? "%tprel_hi" : "%tprel_lo";
2715 	break;
2716 
2717       default:
2718 	gcc_unreachable ();
2719     }
2720 
2721   fprintf (file, "%s(", reloc);
2722   output_addr_const (file, riscv_strip_unspec_address (op));
2723   fputc (')', file);
2724 }
2725 
2726 /* Return true if the .AQ suffix should be added to an AMO to implement the
2727    acquire portion of memory model MODEL.  */
2728 
2729 static bool
2730 riscv_memmodel_needs_amo_acquire (enum memmodel model)
2731 {
2732   switch (model)
2733     {
2734       case MEMMODEL_ACQ_REL:
2735       case MEMMODEL_SEQ_CST:
2736       case MEMMODEL_SYNC_SEQ_CST:
2737       case MEMMODEL_ACQUIRE:
2738       case MEMMODEL_CONSUME:
2739       case MEMMODEL_SYNC_ACQUIRE:
2740 	return true;
2741 
2742       case MEMMODEL_RELEASE:
2743       case MEMMODEL_SYNC_RELEASE:
2744       case MEMMODEL_RELAXED:
2745 	return false;
2746 
2747       default:
2748 	gcc_unreachable ();
2749     }
2750 }
2751 
2752 /* Return true if a FENCE should be emitted to before a memory access to
2753    implement the release portion of memory model MODEL.  */
2754 
2755 static bool
2756 riscv_memmodel_needs_release_fence (enum memmodel model)
2757 {
2758   switch (model)
2759     {
2760       case MEMMODEL_ACQ_REL:
2761       case MEMMODEL_SEQ_CST:
2762       case MEMMODEL_SYNC_SEQ_CST:
2763       case MEMMODEL_RELEASE:
2764       case MEMMODEL_SYNC_RELEASE:
2765 	return true;
2766 
2767       case MEMMODEL_ACQUIRE:
2768       case MEMMODEL_CONSUME:
2769       case MEMMODEL_SYNC_ACQUIRE:
2770       case MEMMODEL_RELAXED:
2771 	return false;
2772 
2773       default:
2774 	gcc_unreachable ();
2775     }
2776 }
2777 
2778 /* Implement TARGET_PRINT_OPERAND.  The RISCV-specific operand codes are:
2779 
2780    'h'	Print the high-part relocation associated with OP, after stripping
2781 	  any outermost HIGH.
2782    'R'	Print the low-part relocation associated with OP.
2783    'C'	Print the integer branch condition for comparison OP.
2784    'A'	Print the atomic operation suffix for memory model OP.
2785    'F'	Print a FENCE if the memory model requires a release.
2786    'z'	Print x0 if OP is zero, otherwise print OP normally.  */
2787 
2788 static void
2789 riscv_print_operand (FILE *file, rtx op, int letter)
2790 {
2791   enum machine_mode mode = GET_MODE (op);
2792   enum rtx_code code = GET_CODE (op);
2793 
2794   switch (letter)
2795     {
2796     case 'h':
2797       if (code == HIGH)
2798 	op = XEXP (op, 0);
2799       riscv_print_operand_reloc (file, op, true);
2800       break;
2801 
2802     case 'R':
2803       riscv_print_operand_reloc (file, op, false);
2804       break;
2805 
2806     case 'C':
2807       /* The RTL names match the instruction names. */
2808       fputs (GET_RTX_NAME (code), file);
2809       break;
2810 
2811     case 'A':
2812       if (riscv_memmodel_needs_amo_acquire ((enum memmodel) INTVAL (op)))
2813 	fputs (".aq", file);
2814       break;
2815 
2816     case 'F':
2817       if (riscv_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
2818 	fputs ("fence iorw,ow; ", file);
2819       break;
2820 
2821     default:
2822       switch (code)
2823 	{
2824 	case REG:
2825 	  if (letter && letter != 'z')
2826 	    output_operand_lossage ("invalid use of '%%%c'", letter);
2827 	  fprintf (file, "%s", reg_names[REGNO (op)]);
2828 	  break;
2829 
2830 	case MEM:
2831 	  if (letter && letter != 'z')
2832 	    output_operand_lossage ("invalid use of '%%%c'", letter);
2833 	  else
2834 	    output_address (mode, XEXP (op, 0));
2835 	  break;
2836 
2837 	default:
2838 	  if (letter == 'z' && op == CONST0_RTX (GET_MODE (op)))
2839 	    fputs (reg_names[GP_REG_FIRST], file);
2840 	  else if (letter && letter != 'z')
2841 	    output_operand_lossage ("invalid use of '%%%c'", letter);
2842 	  else
2843 	    output_addr_const (file, riscv_strip_unspec_address (op));
2844 	  break;
2845 	}
2846     }
2847 }
2848 
2849 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
2850 
2851 static void
2852 riscv_print_operand_address (FILE *file, machine_mode mode ATTRIBUTE_UNUSED, rtx x)
2853 {
2854   struct riscv_address_info addr;
2855 
2856   if (riscv_classify_address (&addr, x, word_mode, true))
2857     switch (addr.type)
2858       {
2859       case ADDRESS_REG:
2860 	riscv_print_operand (file, addr.offset, 0);
2861 	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
2862 	return;
2863 
2864       case ADDRESS_LO_SUM:
2865 	riscv_print_operand_reloc (file, addr.offset, false);
2866 	fprintf (file, "(%s)", reg_names[REGNO (addr.reg)]);
2867 	return;
2868 
2869       case ADDRESS_CONST_INT:
2870 	output_addr_const (file, x);
2871 	fprintf (file, "(%s)", reg_names[GP_REG_FIRST]);
2872 	return;
2873 
2874       case ADDRESS_SYMBOLIC:
2875 	output_addr_const (file, riscv_strip_unspec_address (x));
2876 	return;
2877       }
2878   gcc_unreachable ();
2879 }
2880 
2881 static bool
2882 riscv_size_ok_for_small_data_p (int size)
2883 {
2884   return g_switch_value && IN_RANGE (size, 1, g_switch_value);
2885 }
2886 
2887 /* Return true if EXP should be placed in the small data section. */
2888 
2889 static bool
2890 riscv_in_small_data_p (const_tree x)
2891 {
2892   if (TREE_CODE (x) == STRING_CST || TREE_CODE (x) == FUNCTION_DECL)
2893     return false;
2894 
2895   if (TREE_CODE (x) == VAR_DECL && DECL_SECTION_NAME (x))
2896     {
2897       const char *sec = DECL_SECTION_NAME (x);
2898       return strcmp (sec, ".sdata") == 0 || strcmp (sec, ".sbss") == 0;
2899     }
2900 
2901   return riscv_size_ok_for_small_data_p (int_size_in_bytes (TREE_TYPE (x)));
2902 }
2903 
2904 /* Return a section for X, handling small data. */
2905 
2906 static section *
2907 riscv_elf_select_rtx_section (enum machine_mode mode, rtx x,
2908 			      unsigned HOST_WIDE_INT align)
2909 {
2910   section *s = default_elf_select_rtx_section (mode, x, align);
2911 
2912   if (riscv_size_ok_for_small_data_p (GET_MODE_SIZE (mode)))
2913     {
2914       if (strncmp (s->named.name, ".rodata.cst", strlen (".rodata.cst")) == 0)
2915 	{
2916 	  /* Rename .rodata.cst* to .srodata.cst*. */
2917 	  char *name = (char *) alloca (strlen (s->named.name) + 2);
2918 	  sprintf (name, ".s%s", s->named.name + 1);
2919 	  return get_section (name, s->named.common.flags, NULL);
2920 	}
2921 
2922       if (s == data_section)
2923 	return sdata_section;
2924     }
2925 
2926   return s;
2927 }
2928 
2929 /* Make the last instruction frame-related and note that it performs
2930    the operation described by FRAME_PATTERN.  */
2931 
2932 static void
2933 riscv_set_frame_expr (rtx frame_pattern)
2934 {
2935   rtx insn;
2936 
2937   insn = get_last_insn ();
2938   RTX_FRAME_RELATED_P (insn) = 1;
2939   REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR,
2940 				      frame_pattern,
2941 				      REG_NOTES (insn));
2942 }
2943 
2944 /* Return a frame-related rtx that stores REG at MEM.
2945    REG must be a single register.  */
2946 
2947 static rtx
2948 riscv_frame_set (rtx mem, rtx reg)
2949 {
2950   rtx set = gen_rtx_SET (mem, reg);
2951   RTX_FRAME_RELATED_P (set) = 1;
2952   return set;
2953 }
2954 
2955 /* Return true if the current function must save register REGNO.  */
2956 
2957 static bool
2958 riscv_save_reg_p (unsigned int regno)
2959 {
2960   bool call_saved = !global_regs[regno] && !call_used_regs[regno];
2961   bool might_clobber = crtl->saves_all_registers
2962 		       || df_regs_ever_live_p (regno);
2963 
2964   if (call_saved && might_clobber)
2965     return true;
2966 
2967   if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
2968     return true;
2969 
2970   if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return)
2971     return true;
2972 
2973   return false;
2974 }
2975 
2976 /* Determine whether to call GPR save/restore routines.  */
2977 static bool
2978 riscv_use_save_libcall (const struct riscv_frame_info *frame)
2979 {
2980   if (!TARGET_SAVE_RESTORE || crtl->calls_eh_return || frame_pointer_needed)
2981     return false;
2982 
2983   return frame->save_libcall_adjustment != 0;
2984 }
2985 
2986 /* Determine which GPR save/restore routine to call.  */
2987 
2988 static unsigned
2989 riscv_save_libcall_count (unsigned mask)
2990 {
2991   for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--)
2992     if (BITSET_P (mask, n))
2993       return CALLEE_SAVED_REG_NUMBER (n) + 1;
2994   abort ();
2995 }
2996 
2997 /* Populate the current function's riscv_frame_info structure.
2998 
2999    RISC-V stack frames grown downward.  High addresses are at the top.
3000 
3001 	+-------------------------------+
3002 	|                               |
3003 	|  incoming stack arguments     |
3004 	|                               |
3005 	+-------------------------------+ <-- incoming stack pointer
3006 	|                               |
3007 	|  callee-allocated save area   |
3008 	|  for arguments that are       |
3009 	|  split between registers and  |
3010 	|  the stack                    |
3011 	|                               |
3012 	+-------------------------------+ <-- arg_pointer_rtx
3013 	|                               |
3014 	|  callee-allocated save area   |
3015 	|  for register varargs         |
3016 	|                               |
3017 	+-------------------------------+ <-- hard_frame_pointer_rtx;
3018 	|                               |     stack_pointer_rtx + gp_sp_offset
3019 	|  GPR save area                |       + UNITS_PER_WORD
3020 	|                               |
3021 	+-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset
3022 	|                               |       + UNITS_PER_HWVALUE
3023 	|  FPR save area                |
3024 	|                               |
3025 	+-------------------------------+ <-- frame_pointer_rtx (virtual)
3026 	|                               |
3027 	|  local variables              |
3028 	|                               |
3029       P +-------------------------------+
3030 	|                               |
3031 	|  outgoing stack arguments     |
3032 	|                               |
3033 	+-------------------------------+ <-- stack_pointer_rtx
3034 
3035    Dynamic stack allocations such as alloca insert data at point P.
3036    They decrease stack_pointer_rtx but leave frame_pointer_rtx and
3037    hard_frame_pointer_rtx unchanged.  */
3038 
3039 static void
3040 riscv_compute_frame_info (void)
3041 {
3042   struct riscv_frame_info *frame;
3043   HOST_WIDE_INT offset;
3044   unsigned int regno, i, num_x_saved = 0, num_f_saved = 0;
3045 
3046   frame = &cfun->machine->frame;
3047   memset (frame, 0, sizeof (*frame));
3048 
3049   /* Find out which GPRs we need to save.  */
3050   for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
3051     if (riscv_save_reg_p (regno))
3052       frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
3053 
3054   /* If this function calls eh_return, we must also save and restore the
3055      EH data registers.  */
3056   if (crtl->calls_eh_return)
3057     for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
3058       frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++;
3059 
3060   /* Find out which FPRs we need to save.  This loop must iterate over
3061      the same space as its companion in riscv_for_each_saved_reg.  */
3062   if (TARGET_HARD_FLOAT)
3063     for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
3064       if (riscv_save_reg_p (regno))
3065 	frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++;
3066 
3067   /* At the bottom of the frame are any outgoing stack arguments. */
3068   offset = crtl->outgoing_args_size;
3069   /* Next are local stack variables. */
3070   offset += RISCV_STACK_ALIGN (get_frame_size ());
3071   /* The virtual frame pointer points above the local variables. */
3072   frame->frame_pointer_offset = offset;
3073   /* Next are the callee-saved FPRs. */
3074   if (frame->fmask)
3075     offset += RISCV_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG);
3076   frame->fp_sp_offset = offset - UNITS_PER_FP_REG;
3077   /* Next are the callee-saved GPRs. */
3078   if (frame->mask)
3079     {
3080       unsigned x_save_size = RISCV_STACK_ALIGN (num_x_saved * UNITS_PER_WORD);
3081       unsigned num_save_restore = 1 + riscv_save_libcall_count (frame->mask);
3082 
3083       /* Only use save/restore routines if they don't alter the stack size.  */
3084       if (RISCV_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size)
3085 	frame->save_libcall_adjustment = x_save_size;
3086 
3087       offset += x_save_size;
3088     }
3089   frame->gp_sp_offset = offset - UNITS_PER_WORD;
3090   /* The hard frame pointer points above the callee-saved GPRs. */
3091   frame->hard_frame_pointer_offset = offset;
3092   /* Above the hard frame pointer is the callee-allocated varags save area. */
3093   offset += RISCV_STACK_ALIGN (cfun->machine->varargs_size);
3094   frame->arg_pointer_offset = offset;
3095   /* Next is the callee-allocated area for pretend stack arguments.  */
3096   offset += crtl->args.pretend_args_size;
3097   frame->total_size = offset;
3098   /* Next points the incoming stack pointer and any incoming arguments. */
3099 
3100   /* Only use save/restore routines when the GPRs are atop the frame.  */
3101   if (frame->hard_frame_pointer_offset != frame->total_size)
3102     frame->save_libcall_adjustment = 0;
3103 }
3104 
3105 /* Make sure that we're not trying to eliminate to the wrong hard frame
3106    pointer.  */
3107 
3108 static bool
3109 riscv_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
3110 {
3111   return (to == HARD_FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM);
3112 }
3113 
3114 /* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame pointer
3115    or argument pointer.  TO is either the stack pointer or hard frame
3116    pointer.  */
3117 
3118 HOST_WIDE_INT
3119 riscv_initial_elimination_offset (int from, int to)
3120 {
3121   HOST_WIDE_INT src, dest;
3122 
3123   riscv_compute_frame_info ();
3124 
3125   if (to == HARD_FRAME_POINTER_REGNUM)
3126     dest = cfun->machine->frame.hard_frame_pointer_offset;
3127   else if (to == STACK_POINTER_REGNUM)
3128     dest = 0; /* The stack pointer is the base of all offsets, hence 0.  */
3129   else
3130     gcc_unreachable ();
3131 
3132   if (from == FRAME_POINTER_REGNUM)
3133     src = cfun->machine->frame.frame_pointer_offset;
3134   else if (from == ARG_POINTER_REGNUM)
3135     src = cfun->machine->frame.arg_pointer_offset;
3136   else
3137     gcc_unreachable ();
3138 
3139   return src - dest;
3140 }
3141 
3142 /* Implement RETURN_ADDR_RTX.  We do not support moving back to a
3143    previous frame.  */
3144 
3145 rtx
3146 riscv_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
3147 {
3148   if (count != 0)
3149     return const0_rtx;
3150 
3151   return get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
3152 }
3153 
3154 /* Emit code to change the current function's return address to
3155    ADDRESS.  SCRATCH is available as a scratch register, if needed.
3156    ADDRESS and SCRATCH are both word-mode GPRs.  */
3157 
3158 void
3159 riscv_set_return_address (rtx address, rtx scratch)
3160 {
3161   rtx slot_address;
3162 
3163   gcc_assert (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM));
3164   slot_address = riscv_add_offset (scratch, stack_pointer_rtx,
3165 				  cfun->machine->frame.gp_sp_offset);
3166   riscv_emit_move (gen_frame_mem (GET_MODE (address), slot_address), address);
3167 }
3168 
3169 /* A function to save or store a register.  The first argument is the
3170    register and the second is the stack slot.  */
3171 typedef void (*riscv_save_restore_fn) (rtx, rtx);
3172 
3173 /* Use FN to save or restore register REGNO.  MODE is the register's
3174    mode and OFFSET is the offset of its save slot from the current
3175    stack pointer.  */
3176 
3177 static void
3178 riscv_save_restore_reg (enum machine_mode mode, int regno,
3179 		       HOST_WIDE_INT offset, riscv_save_restore_fn fn)
3180 {
3181   rtx mem;
3182 
3183   mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset));
3184   fn (gen_rtx_REG (mode, regno), mem);
3185 }
3186 
3187 /* Call FN for each register that is saved by the current function.
3188    SP_OFFSET is the offset of the current stack pointer from the start
3189    of the frame.  */
3190 
3191 static void
3192 riscv_for_each_saved_reg (HOST_WIDE_INT sp_offset, riscv_save_restore_fn fn)
3193 {
3194   HOST_WIDE_INT offset;
3195 
3196   /* Save the link register and s-registers. */
3197   offset = cfun->machine->frame.gp_sp_offset - sp_offset;
3198   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++)
3199     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
3200       {
3201 	riscv_save_restore_reg (word_mode, regno, offset, fn);
3202 	offset -= UNITS_PER_WORD;
3203       }
3204 
3205   /* This loop must iterate over the same space as its companion in
3206      riscv_compute_frame_info.  */
3207   offset = cfun->machine->frame.fp_sp_offset - sp_offset;
3208   for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
3209     if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST))
3210       {
3211 	enum machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode;
3212 
3213 	riscv_save_restore_reg (mode, regno, offset, fn);
3214 	offset -= GET_MODE_SIZE (mode);
3215       }
3216 }
3217 
3218 /* Save register REG to MEM.  Make the instruction frame-related.  */
3219 
3220 static void
3221 riscv_save_reg (rtx reg, rtx mem)
3222 {
3223   riscv_emit_move (mem, reg);
3224   riscv_set_frame_expr (riscv_frame_set (mem, reg));
3225 }
3226 
3227 /* Restore register REG from MEM.  */
3228 
3229 static void
3230 riscv_restore_reg (rtx reg, rtx mem)
3231 {
3232   rtx insn = riscv_emit_move (reg, mem);
3233   rtx dwarf = NULL_RTX;
3234   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
3235   REG_NOTES (insn) = dwarf;
3236 
3237   RTX_FRAME_RELATED_P (insn) = 1;
3238 }
3239 
3240 /* Return the code to invoke the GPR save routine.  */
3241 
3242 const char *
3243 riscv_output_gpr_save (unsigned mask)
3244 {
3245   static char s[32];
3246   unsigned n = riscv_save_libcall_count (mask);
3247 
3248   ssize_t bytes = snprintf (s, sizeof (s), "call\tt0,__riscv_save_%u", n);
3249   gcc_assert ((size_t) bytes < sizeof (s));
3250 
3251   return s;
3252 }
3253 
3254 /* For stack frames that can't be allocated with a single ADDI instruction,
3255    compute the best value to initially allocate.  It must at a minimum
3256    allocate enough space to spill the callee-saved registers.  */
3257 
3258 static HOST_WIDE_INT
3259 riscv_first_stack_step (struct riscv_frame_info *frame)
3260 {
3261   HOST_WIDE_INT min_first_step = frame->total_size - frame->fp_sp_offset;
3262   HOST_WIDE_INT max_first_step = IMM_REACH / 2 - STACK_BOUNDARY / 8;
3263 
3264   if (SMALL_OPERAND (frame->total_size))
3265     return frame->total_size;
3266 
3267   /* As an optimization, use the least-significant bits of the total frame
3268      size, so that the second adjustment step is just LUI + ADD.  */
3269   if (!SMALL_OPERAND (frame->total_size - max_first_step)
3270       && frame->total_size % IMM_REACH < IMM_REACH / 2
3271       && frame->total_size % IMM_REACH >= min_first_step)
3272     return frame->total_size % IMM_REACH;
3273 
3274   gcc_assert (min_first_step <= max_first_step);
3275   return max_first_step;
3276 }
3277 
3278 static rtx
3279 riscv_adjust_libcall_cfi_prologue ()
3280 {
3281   rtx dwarf = NULL_RTX;
3282   rtx adjust_sp_rtx, reg, mem, insn;
3283   int saved_size = cfun->machine->frame.save_libcall_adjustment;
3284   int offset;
3285 
3286   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++)
3287     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
3288       {
3289 	/* The save order is ra, s0, s1, s2 to s11.  */
3290 	if (regno == RETURN_ADDR_REGNUM)
3291 	  offset = saved_size - UNITS_PER_WORD;
3292 	else if (regno == S0_REGNUM)
3293 	  offset = saved_size - UNITS_PER_WORD * 2;
3294 	else if (regno == S1_REGNUM)
3295 	  offset = saved_size - UNITS_PER_WORD * 3;
3296 	else
3297 	  offset = saved_size - ((regno - S2_REGNUM + 4) * UNITS_PER_WORD);
3298 
3299 	reg = gen_rtx_REG (SImode, regno);
3300 	mem = gen_frame_mem (SImode, plus_constant (Pmode,
3301 						    stack_pointer_rtx,
3302 						    offset));
3303 
3304 	insn = gen_rtx_SET (mem, reg);
3305 	dwarf = alloc_reg_note (REG_CFA_OFFSET, insn, dwarf);
3306       }
3307 
3308   /* Debug info for adjust sp.  */
3309   adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
3310 				 stack_pointer_rtx, GEN_INT (-saved_size));
3311   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
3312 			  dwarf);
3313   return dwarf;
3314 }
3315 
3316 static void
3317 riscv_emit_stack_tie (void)
3318 {
3319   if (Pmode == SImode)
3320     emit_insn (gen_stack_tiesi (stack_pointer_rtx, hard_frame_pointer_rtx));
3321   else
3322     emit_insn (gen_stack_tiedi (stack_pointer_rtx, hard_frame_pointer_rtx));
3323 }
3324 
3325 /* Expand the "prologue" pattern.  */
3326 
3327 void
3328 riscv_expand_prologue (void)
3329 {
3330   struct riscv_frame_info *frame = &cfun->machine->frame;
3331   HOST_WIDE_INT size = frame->total_size;
3332   unsigned mask = frame->mask;
3333   rtx insn;
3334 
3335   if (flag_stack_usage_info)
3336     current_function_static_stack_size = size;
3337 
3338   /* When optimizing for size, call a subroutine to save the registers.  */
3339   if (riscv_use_save_libcall (frame))
3340     {
3341       rtx dwarf = NULL_RTX;
3342       dwarf = riscv_adjust_libcall_cfi_prologue ();
3343 
3344       frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
3345       size -= frame->save_libcall_adjustment;
3346       insn = emit_insn (gen_gpr_save (GEN_INT (mask)));
3347 
3348       RTX_FRAME_RELATED_P (insn) = 1;
3349       REG_NOTES (insn) = dwarf;
3350     }
3351 
3352   /* Save the registers.  */
3353   if ((frame->mask | frame->fmask) != 0)
3354     {
3355       HOST_WIDE_INT step1 = MIN (size, riscv_first_stack_step (frame));
3356 
3357       insn = gen_add3_insn (stack_pointer_rtx,
3358 			    stack_pointer_rtx,
3359 			    GEN_INT (-step1));
3360       RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
3361       size -= step1;
3362       riscv_for_each_saved_reg (size, riscv_save_reg);
3363     }
3364 
3365   frame->mask = mask; /* Undo the above fib.  */
3366 
3367   /* Set up the frame pointer, if we're using one.  */
3368   if (frame_pointer_needed)
3369     {
3370       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
3371 			    GEN_INT (frame->hard_frame_pointer_offset - size));
3372       RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
3373 
3374       riscv_emit_stack_tie ();
3375     }
3376 
3377   /* Allocate the rest of the frame.  */
3378   if (size > 0)
3379     {
3380       if (SMALL_OPERAND (-size))
3381 	{
3382 	  insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
3383 				GEN_INT (-size));
3384 	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
3385 	}
3386       else
3387 	{
3388 	  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), GEN_INT (-size));
3389 	  emit_insn (gen_add3_insn (stack_pointer_rtx,
3390 				    stack_pointer_rtx,
3391 				    RISCV_PROLOGUE_TEMP (Pmode)));
3392 
3393 	  /* Describe the effect of the previous instructions.  */
3394 	  insn = plus_constant (Pmode, stack_pointer_rtx, -size);
3395 	  insn = gen_rtx_SET (stack_pointer_rtx, insn);
3396 	  riscv_set_frame_expr (insn);
3397 	}
3398     }
3399 }
3400 
3401 static rtx
3402 riscv_adjust_libcall_cfi_epilogue ()
3403 {
3404   rtx dwarf = NULL_RTX;
3405   rtx adjust_sp_rtx, reg;
3406   int saved_size = cfun->machine->frame.save_libcall_adjustment;
3407 
3408   /* Debug info for adjust sp.  */
3409   adjust_sp_rtx = gen_add3_insn (stack_pointer_rtx,
3410 				 stack_pointer_rtx, GEN_INT (saved_size));
3411   dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, adjust_sp_rtx,
3412 			  dwarf);
3413 
3414   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST-1; regno++)
3415     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
3416       {
3417 	reg = gen_rtx_REG (SImode, regno);
3418 	dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
3419       }
3420 
3421   return dwarf;
3422 }
3423 
3424 /* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
3425    says which.  */
3426 
3427 void
3428 riscv_expand_epilogue (bool sibcall_p)
3429 {
3430   /* Split the frame into two.  STEP1 is the amount of stack we should
3431      deallocate before restoring the registers.  STEP2 is the amount we
3432      should deallocate afterwards.
3433 
3434      Start off by assuming that no registers need to be restored.  */
3435   struct riscv_frame_info *frame = &cfun->machine->frame;
3436   unsigned mask = frame->mask;
3437   HOST_WIDE_INT step1 = frame->total_size;
3438   HOST_WIDE_INT step2 = 0;
3439   bool use_restore_libcall = !sibcall_p && riscv_use_save_libcall (frame);
3440   rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
3441   rtx insn;
3442 
3443   /* We need to add memory barrier to prevent read from deallocated stack.  */
3444   bool need_barrier_p = (get_frame_size ()
3445 			 + cfun->machine->frame.arg_pointer_offset) != 0;
3446 
3447   if (!sibcall_p && riscv_can_use_return_insn ())
3448     {
3449       emit_jump_insn (gen_return ());
3450       return;
3451     }
3452 
3453   /* Move past any dynamic stack allocations.  */
3454   if (cfun->calls_alloca)
3455     {
3456       /* Emit a barrier to prevent loads from a deallocated stack.  */
3457       riscv_emit_stack_tie ();
3458       need_barrier_p = false;
3459 
3460       rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset);
3461       if (!SMALL_OPERAND (INTVAL (adjust)))
3462 	{
3463 	  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
3464 	  adjust = RISCV_PROLOGUE_TEMP (Pmode);
3465 	}
3466 
3467       insn = emit_insn (
3468 	       gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
3469 			      adjust));
3470 
3471       rtx dwarf = NULL_RTX;
3472       rtx cfa_adjust_value = gen_rtx_PLUS (
3473 			       Pmode, hard_frame_pointer_rtx,
3474 			       GEN_INT (-frame->hard_frame_pointer_offset));
3475       rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value);
3476       dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf);
3477       RTX_FRAME_RELATED_P (insn) = 1;
3478 
3479       REG_NOTES (insn) = dwarf;
3480     }
3481 
3482   /* If we need to restore registers, deallocate as much stack as
3483      possible in the second step without going out of range.  */
3484   if ((frame->mask | frame->fmask) != 0)
3485     {
3486       step2 = riscv_first_stack_step (frame);
3487       step1 -= step2;
3488     }
3489 
3490   /* Set TARGET to BASE + STEP1.  */
3491   if (step1 > 0)
3492     {
3493       /* Emit a barrier to prevent loads from a deallocated stack.  */
3494       riscv_emit_stack_tie ();
3495       need_barrier_p = false;
3496 
3497       /* Get an rtx for STEP1 that we can add to BASE.  */
3498       rtx adjust = GEN_INT (step1);
3499       if (!SMALL_OPERAND (step1))
3500 	{
3501 	  riscv_emit_move (RISCV_PROLOGUE_TEMP (Pmode), adjust);
3502 	  adjust = RISCV_PROLOGUE_TEMP (Pmode);
3503 	}
3504 
3505       insn = emit_insn (
3506 	       gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, adjust));
3507 
3508       rtx dwarf = NULL_RTX;
3509       rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
3510 					 GEN_INT (step2));
3511 
3512       dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
3513       RTX_FRAME_RELATED_P (insn) = 1;
3514 
3515       REG_NOTES (insn) = dwarf;
3516     }
3517 
3518   if (use_restore_libcall)
3519     frame->mask = 0; /* Temporarily fib that we need not save GPRs.  */
3520 
3521   /* Restore the registers.  */
3522   riscv_for_each_saved_reg (frame->total_size - step2, riscv_restore_reg);
3523 
3524   if (use_restore_libcall)
3525     {
3526       frame->mask = mask; /* Undo the above fib.  */
3527       gcc_assert (step2 >= frame->save_libcall_adjustment);
3528       step2 -= frame->save_libcall_adjustment;
3529     }
3530 
3531   if (need_barrier_p)
3532     riscv_emit_stack_tie ();
3533 
3534   /* Deallocate the final bit of the frame.  */
3535   if (step2 > 0)
3536     {
3537       insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
3538 				       GEN_INT (step2)));
3539 
3540       rtx dwarf = NULL_RTX;
3541       rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
3542 					 const0_rtx);
3543       dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf);
3544       RTX_FRAME_RELATED_P (insn) = 1;
3545 
3546       REG_NOTES (insn) = dwarf;
3547     }
3548 
3549   if (use_restore_libcall)
3550     {
3551       rtx dwarf = riscv_adjust_libcall_cfi_epilogue ();
3552       insn = emit_insn (gen_gpr_restore (GEN_INT (riscv_save_libcall_count (mask))));
3553       RTX_FRAME_RELATED_P (insn) = 1;
3554       REG_NOTES (insn) = dwarf;
3555 
3556       emit_jump_insn (gen_gpr_restore_return (ra));
3557       return;
3558     }
3559 
3560   /* Add in the __builtin_eh_return stack adjustment. */
3561   if (crtl->calls_eh_return)
3562     emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
3563 			      EH_RETURN_STACKADJ_RTX));
3564 
3565   if (!sibcall_p)
3566     emit_jump_insn (gen_simple_return_internal (ra));
3567 }
3568 
3569 /* Return nonzero if this function is known to have a null epilogue.
3570    This allows the optimizer to omit jumps to jumps if no stack
3571    was created.  */
3572 
3573 bool
3574 riscv_can_use_return_insn (void)
3575 {
3576   return reload_completed && cfun->machine->frame.total_size == 0;
3577 }
3578 
3579 /* Implement TARGET_REGISTER_MOVE_COST.  */
3580 
3581 static int
3582 riscv_register_move_cost (enum machine_mode mode,
3583 			  reg_class_t from, reg_class_t to)
3584 {
3585   return SECONDARY_MEMORY_NEEDED (from, to, mode) ? 8 : 2;
3586 }
3587 
3588 /* Return true if register REGNO can store a value of mode MODE.  */
3589 
3590 bool
3591 riscv_hard_regno_mode_ok_p (unsigned int regno, enum machine_mode mode)
3592 {
3593   unsigned int nregs = riscv_hard_regno_nregs (regno, mode);
3594 
3595   if (GP_REG_P (regno))
3596     {
3597       if (!GP_REG_P (regno + nregs - 1))
3598 	return false;
3599     }
3600   else if (FP_REG_P (regno))
3601     {
3602       if (!FP_REG_P (regno + nregs - 1))
3603 	return false;
3604 
3605       if (GET_MODE_CLASS (mode) != MODE_FLOAT
3606 	  && GET_MODE_CLASS (mode) != MODE_COMPLEX_FLOAT)
3607 	return false;
3608 
3609       /* Only use callee-saved registers if a potential callee is guaranteed
3610 	 to spill the requisite width.  */
3611       if (GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_REG
3612 	  || (!call_used_regs[regno]
3613 	      && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_FP_ARG))
3614 	return false;
3615     }
3616   else
3617     return false;
3618 
3619   /* Require same callee-savedness for all registers.  */
3620   for (unsigned i = 1; i < nregs; i++)
3621     if (call_used_regs[regno] != call_used_regs[regno + i])
3622       return false;
3623 
3624   return true;
3625 }
3626 
3627 /* Implement HARD_REGNO_NREGS.  */
3628 
3629 unsigned int
3630 riscv_hard_regno_nregs (int regno, enum machine_mode mode)
3631 {
3632   if (FP_REG_P (regno))
3633     return (GET_MODE_SIZE (mode) + UNITS_PER_FP_REG - 1) / UNITS_PER_FP_REG;
3634 
3635   /* All other registers are word-sized.  */
3636   return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3637 }
3638 
3639 /* Implement CLASS_MAX_NREGS.  */
3640 
3641 static unsigned char
3642 riscv_class_max_nregs (reg_class_t rclass, enum machine_mode mode)
3643 {
3644   if (reg_class_subset_p (FP_REGS, rclass))
3645     return riscv_hard_regno_nregs (FP_REG_FIRST, mode);
3646 
3647   if (reg_class_subset_p (GR_REGS, rclass))
3648     return riscv_hard_regno_nregs (GP_REG_FIRST, mode);
3649 
3650   return 0;
3651 }
3652 
3653 /* Implement TARGET_MEMORY_MOVE_COST.  */
3654 
3655 static int
3656 riscv_memory_move_cost (enum machine_mode mode, reg_class_t rclass, bool in)
3657 {
3658   return (tune_info->memory_cost
3659 	  + memory_move_secondary_cost (mode, rclass, in));
3660 }
3661 
3662 /* Return the number of instructions that can be issued per cycle.  */
3663 
3664 static int
3665 riscv_issue_rate (void)
3666 {
3667   return tune_info->issue_rate;
3668 }
3669 
3670 /* Implement TARGET_ASM_FILE_START.  */
3671 
3672 static void
3673 riscv_file_start (void)
3674 {
3675   default_file_start ();
3676 
3677   /* Instruct GAS to generate position-[in]dependent code.  */
3678   fprintf (asm_out_file, "\t.option %spic\n", (flag_pic ? "" : "no"));
3679 }
3680 
3681 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
3682    in order to avoid duplicating too much logic from elsewhere.  */
3683 
3684 static void
3685 riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
3686 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
3687 		      tree function)
3688 {
3689   rtx this_rtx, temp1, temp2, fnaddr;
3690   rtx_insn *insn;
3691 
3692   /* Pretend to be a post-reload pass while generating rtl.  */
3693   reload_completed = 1;
3694 
3695   /* Mark the end of the (empty) prologue.  */
3696   emit_note (NOTE_INSN_PROLOGUE_END);
3697 
3698   /* Determine if we can use a sibcall to call FUNCTION directly.  */
3699   fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
3700 
3701   /* We need two temporary registers in some cases.  */
3702   temp1 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
3703   temp2 = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
3704 
3705   /* Find out which register contains the "this" pointer.  */
3706   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
3707     this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST + 1);
3708   else
3709     this_rtx = gen_rtx_REG (Pmode, GP_ARG_FIRST);
3710 
3711   /* Add DELTA to THIS_RTX.  */
3712   if (delta != 0)
3713     {
3714       rtx offset = GEN_INT (delta);
3715       if (!SMALL_OPERAND (delta))
3716 	{
3717 	  riscv_emit_move (temp1, offset);
3718 	  offset = temp1;
3719 	}
3720       emit_insn (gen_add3_insn (this_rtx, this_rtx, offset));
3721     }
3722 
3723   /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
3724   if (vcall_offset != 0)
3725     {
3726       rtx addr;
3727 
3728       /* Set TEMP1 to *THIS_RTX.  */
3729       riscv_emit_move (temp1, gen_rtx_MEM (Pmode, this_rtx));
3730 
3731       /* Set ADDR to a legitimate address for *THIS_RTX + VCALL_OFFSET.  */
3732       addr = riscv_add_offset (temp2, temp1, vcall_offset);
3733 
3734       /* Load the offset and add it to THIS_RTX.  */
3735       riscv_emit_move (temp1, gen_rtx_MEM (Pmode, addr));
3736       emit_insn (gen_add3_insn (this_rtx, this_rtx, temp1));
3737     }
3738 
3739   /* Jump to the target function.  */
3740   insn = emit_call_insn (gen_sibcall (fnaddr, const0_rtx, NULL, const0_rtx));
3741   SIBLING_CALL_P (insn) = 1;
3742 
3743   /* Run just enough of rest_of_compilation.  This sequence was
3744      "borrowed" from alpha.c.  */
3745   insn = get_insns ();
3746   split_all_insns_noflow ();
3747   shorten_branches (insn);
3748   final_start_function (insn, file, 1);
3749   final (insn, file, 1);
3750   final_end_function ();
3751 
3752   /* Clean up the vars set above.  Note that final_end_function resets
3753      the global pointer for us.  */
3754   reload_completed = 0;
3755 }
3756 
3757 /* Allocate a chunk of memory for per-function machine-dependent data.  */
3758 
3759 static struct machine_function *
3760 riscv_init_machine_status (void)
3761 {
3762   return ggc_cleared_alloc<machine_function> ();
3763 }
3764 
3765 /* Implement TARGET_OPTION_OVERRIDE.  */
3766 
3767 static void
3768 riscv_option_override (void)
3769 {
3770   const struct riscv_cpu_info *cpu;
3771 
3772 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3773   SUBTARGET_OVERRIDE_OPTIONS;
3774 #endif
3775 
3776   flag_pcc_struct_return = 0;
3777 
3778   if (flag_pic)
3779     g_switch_value = 0;
3780 
3781   /* The presence of the M extension implies that division instructions
3782      are present, so include them unless explicitly disabled.  */
3783   if (TARGET_MUL && (target_flags_explicit & MASK_DIV) == 0)
3784     target_flags |= MASK_DIV;
3785   else if (!TARGET_MUL && TARGET_DIV)
3786     error ("-mdiv requires -march to subsume the %<M%> extension");
3787 
3788   /* Likewise floating-point division and square root.  */
3789   if (TARGET_HARD_FLOAT && (target_flags_explicit & MASK_FDIV) == 0)
3790     target_flags |= MASK_FDIV;
3791 
3792   /* Handle -mtune.  */
3793   cpu = riscv_parse_cpu (riscv_tune_string ? riscv_tune_string :
3794 			 RISCV_TUNE_STRING_DEFAULT);
3795   tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info;
3796 
3797   /* Use -mtune's setting for slow_unaligned_access, even when optimizing
3798      for size.  For architectures that trap and emulate unaligned accesses,
3799      the performance cost is too great, even for -Os.  Similarly, if
3800      -m[no-]strict-align is left unspecified, heed -mtune's advice.  */
3801   riscv_slow_unaligned_access = (cpu->tune_info->slow_unaligned_access
3802 				 || TARGET_STRICT_ALIGN);
3803   if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0
3804       && cpu->tune_info->slow_unaligned_access)
3805     target_flags |= MASK_STRICT_ALIGN;
3806 
3807   /* If the user hasn't specified a branch cost, use the processor's
3808      default.  */
3809   if (riscv_branch_cost == 0)
3810     riscv_branch_cost = tune_info->branch_cost;
3811 
3812   /* Function to allocate machine-dependent function status.  */
3813   init_machine_status = &riscv_init_machine_status;
3814 
3815   if (flag_pic)
3816     riscv_cmodel = CM_PIC;
3817 
3818   /* We get better code with explicit relocs for CM_MEDLOW, but
3819      worse code for the others (for now).  Pick the best default.  */
3820   if ((target_flags_explicit & MASK_EXPLICIT_RELOCS) == 0)
3821     if (riscv_cmodel == CM_MEDLOW)
3822       target_flags |= MASK_EXPLICIT_RELOCS;
3823 
3824   /* Require that the ISA supports the requested floating-point ABI.  */
3825   if (UNITS_PER_FP_ARG > (TARGET_HARD_FLOAT ? UNITS_PER_FP_REG : 0))
3826     error ("requested ABI requires -march to subsume the %qc extension",
3827 	   UNITS_PER_FP_ARG > 8 ? 'Q' : (UNITS_PER_FP_ARG > 4 ? 'D' : 'F'));
3828 
3829   /* We do not yet support ILP32 on RV64.  */
3830   if (BITS_PER_WORD != POINTER_SIZE)
3831     error ("ABI requires -march=rv%d", POINTER_SIZE);
3832 }
3833 
3834 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE.  */
3835 
3836 static void
3837 riscv_conditional_register_usage (void)
3838 {
3839   if (!TARGET_HARD_FLOAT)
3840     {
3841       for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
3842 	fixed_regs[regno] = call_used_regs[regno] = 1;
3843     }
3844 }
3845 
3846 /* Return a register priority for hard reg REGNO.  */
3847 
3848 static int
3849 riscv_register_priority (int regno)
3850 {
3851   /* Favor x8-x15/f8-f15 to improve the odds of RVC instruction selection.  */
3852   if (TARGET_RVC && (IN_RANGE (regno, GP_REG_FIRST + 8, GP_REG_FIRST + 15)
3853 		     || IN_RANGE (regno, FP_REG_FIRST + 8, FP_REG_FIRST + 15)))
3854     return 1;
3855 
3856   return 0;
3857 }
3858 
3859 /* Implement TARGET_TRAMPOLINE_INIT.  */
3860 
3861 static void
3862 riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3863 {
3864   rtx addr, end_addr, mem;
3865   uint32_t trampoline[4];
3866   unsigned int i;
3867   HOST_WIDE_INT static_chain_offset, target_function_offset;
3868 
3869   /* Work out the offsets of the pointers from the start of the
3870      trampoline code.  */
3871   gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
3872 
3873   /* Get pointers to the beginning and end of the code block.  */
3874   addr = force_reg (Pmode, XEXP (m_tramp, 0));
3875   end_addr = riscv_force_binary (Pmode, PLUS, addr,
3876 				 GEN_INT (TRAMPOLINE_CODE_SIZE));
3877 
3878 
3879   if (Pmode == SImode)
3880     {
3881       chain_value = force_reg (Pmode, chain_value);
3882 
3883       rtx target_function = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
3884       /* lui     t2, hi(chain)
3885 	 lui     t1, hi(func)
3886 	 addi    t2, t2, lo(chain)
3887 	 jr      r1, lo(func)
3888       */
3889       unsigned HOST_WIDE_INT lui_hi_chain_code, lui_hi_func_code;
3890       unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
3891 
3892       rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
3893 
3894       /* 0xfff.  */
3895       rtx imm12_mask = gen_reg_rtx (SImode);
3896       emit_insn (gen_one_cmplsi2 (imm12_mask, uimm_mask));
3897 
3898       rtx fixup_value = force_reg (SImode, gen_int_mode (IMM_REACH/2, SImode));
3899 
3900       /* Gen lui t2, hi(chain).  */
3901       rtx hi_chain = riscv_force_binary (SImode, PLUS, chain_value,
3902 					 fixup_value);
3903       hi_chain = riscv_force_binary (SImode, AND, hi_chain,
3904 				     uimm_mask);
3905       lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
3906       rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
3907 					     gen_int_mode (lui_hi_chain_code, SImode));
3908 
3909       mem = adjust_address (m_tramp, SImode, 0);
3910       riscv_emit_move (mem, lui_hi_chain);
3911 
3912       /* Gen lui t1, hi(func).  */
3913       rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
3914 					fixup_value);
3915       hi_func = riscv_force_binary (SImode, AND, hi_func,
3916 				    uimm_mask);
3917       lui_hi_func_code = OPCODE_LUI | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD);
3918       rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
3919 					    gen_int_mode (lui_hi_func_code, SImode));
3920 
3921       mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
3922       riscv_emit_move (mem, lui_hi_func);
3923 
3924       /* Gen addi t2, t2, lo(chain).  */
3925       rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
3926 					 imm12_mask);
3927       lo_chain = riscv_force_binary (SImode, ASHIFT, lo_chain, GEN_INT (20));
3928 
3929       lo_chain_code = OPCODE_ADDI
3930 		      | (STATIC_CHAIN_REGNUM << SHIFT_RD)
3931 		      | (STATIC_CHAIN_REGNUM << SHIFT_RS1);
3932 
3933       rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
3934 					      force_reg (SImode, GEN_INT (lo_chain_code)));
3935 
3936       mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
3937       riscv_emit_move (mem, addi_lo_chain);
3938 
3939       /* Gen jr r1, lo(func).  */
3940       rtx lo_func = riscv_force_binary (SImode, AND, target_function,
3941 					imm12_mask);
3942       lo_func = riscv_force_binary (SImode, ASHIFT, lo_func, GEN_INT (20));
3943 
3944       lo_func_code = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
3945 
3946       rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
3947 					   force_reg (SImode, GEN_INT (lo_func_code)));
3948 
3949       mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
3950       riscv_emit_move (mem, jr_lo_func);
3951     }
3952   else
3953     {
3954       static_chain_offset = TRAMPOLINE_CODE_SIZE;
3955       target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
3956 
3957       /* auipc   t2, 0
3958 	 l[wd]   t1, target_function_offset(t2)
3959 	 l[wd]   t2, static_chain_offset(t2)
3960 	 jr      t1
3961       */
3962       trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
3963       trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
3964 		      | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
3965 		      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
3966 		      | (target_function_offset << SHIFT_IMM);
3967       trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
3968 		      | (STATIC_CHAIN_REGNUM << SHIFT_RD)
3969 		      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
3970 		      | (static_chain_offset << SHIFT_IMM);
3971       trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
3972 
3973       /* Copy the trampoline code.  */
3974       for (i = 0; i < ARRAY_SIZE (trampoline); i++)
3975 	{
3976 	  mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
3977 	  riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
3978 	}
3979 
3980       /* Set up the static chain pointer field.  */
3981       mem = adjust_address (m_tramp, ptr_mode, static_chain_offset);
3982       riscv_emit_move (mem, chain_value);
3983 
3984       /* Set up the target function field.  */
3985       mem = adjust_address (m_tramp, ptr_mode, target_function_offset);
3986       riscv_emit_move (mem, XEXP (DECL_RTL (fndecl), 0));
3987     }
3988 
3989   /* Flush the code part of the trampoline.  */
3990   emit_insn (gen_add3_insn (end_addr, addr, GEN_INT (TRAMPOLINE_SIZE)));
3991   emit_insn (gen_clear_cache (addr, end_addr));
3992 }
3993 
3994 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
3995 
3996 static bool
3997 riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
3998 			       tree exp ATTRIBUTE_UNUSED)
3999 {
4000   /* Don't use sibcalls when use save-restore routine.  */
4001   if (TARGET_SAVE_RESTORE)
4002     return false;
4003 
4004   return true;
4005 }
4006 
4007 /* Implement TARGET_CANNOT_COPY_INSN_P.  */
4008 
4009 static bool
4010 riscv_cannot_copy_insn_p (rtx_insn *insn)
4011 {
4012   return recog_memoized (insn) >= 0 && get_attr_cannot_copy (insn);
4013 }
4014 
4015 /* Initialize the GCC target structure.  */
4016 #undef TARGET_ASM_ALIGNED_HI_OP
4017 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
4018 #undef TARGET_ASM_ALIGNED_SI_OP
4019 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
4020 #undef TARGET_ASM_ALIGNED_DI_OP
4021 #define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
4022 
4023 #undef TARGET_OPTION_OVERRIDE
4024 #define TARGET_OPTION_OVERRIDE riscv_option_override
4025 
4026 #undef TARGET_LEGITIMIZE_ADDRESS
4027 #define TARGET_LEGITIMIZE_ADDRESS riscv_legitimize_address
4028 
4029 #undef TARGET_SCHED_ISSUE_RATE
4030 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
4031 
4032 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
4033 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
4034 
4035 #undef TARGET_REGISTER_MOVE_COST
4036 #define TARGET_REGISTER_MOVE_COST riscv_register_move_cost
4037 #undef TARGET_MEMORY_MOVE_COST
4038 #define TARGET_MEMORY_MOVE_COST riscv_memory_move_cost
4039 #undef TARGET_RTX_COSTS
4040 #define TARGET_RTX_COSTS riscv_rtx_costs
4041 #undef TARGET_ADDRESS_COST
4042 #define TARGET_ADDRESS_COST riscv_address_cost
4043 
4044 #undef TARGET_ASM_FILE_START
4045 #define TARGET_ASM_FILE_START riscv_file_start
4046 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
4047 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
4048 
4049 #undef TARGET_EXPAND_BUILTIN_VA_START
4050 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
4051 
4052 #undef  TARGET_PROMOTE_FUNCTION_MODE
4053 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
4054 
4055 #undef TARGET_RETURN_IN_MEMORY
4056 #define TARGET_RETURN_IN_MEMORY riscv_return_in_memory
4057 
4058 #undef TARGET_ASM_OUTPUT_MI_THUNK
4059 #define TARGET_ASM_OUTPUT_MI_THUNK riscv_output_mi_thunk
4060 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
4061 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
4062 
4063 #undef TARGET_PRINT_OPERAND
4064 #define TARGET_PRINT_OPERAND riscv_print_operand
4065 #undef TARGET_PRINT_OPERAND_ADDRESS
4066 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
4067 
4068 #undef TARGET_SETUP_INCOMING_VARARGS
4069 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
4070 #undef TARGET_STRICT_ARGUMENT_NAMING
4071 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
4072 #undef TARGET_MUST_PASS_IN_STACK
4073 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
4074 #undef TARGET_PASS_BY_REFERENCE
4075 #define TARGET_PASS_BY_REFERENCE riscv_pass_by_reference
4076 #undef TARGET_ARG_PARTIAL_BYTES
4077 #define TARGET_ARG_PARTIAL_BYTES riscv_arg_partial_bytes
4078 #undef TARGET_FUNCTION_ARG
4079 #define TARGET_FUNCTION_ARG riscv_function_arg
4080 #undef TARGET_FUNCTION_ARG_ADVANCE
4081 #define TARGET_FUNCTION_ARG_ADVANCE riscv_function_arg_advance
4082 #undef TARGET_FUNCTION_ARG_BOUNDARY
4083 #define TARGET_FUNCTION_ARG_BOUNDARY riscv_function_arg_boundary
4084 
4085 /* The generic ELF target does not always have TLS support.  */
4086 #ifdef HAVE_AS_TLS
4087 #undef TARGET_HAVE_TLS
4088 #define TARGET_HAVE_TLS true
4089 #endif
4090 
4091 #undef TARGET_CANNOT_FORCE_CONST_MEM
4092 #define TARGET_CANNOT_FORCE_CONST_MEM riscv_cannot_force_const_mem
4093 
4094 #undef TARGET_LEGITIMATE_CONSTANT_P
4095 #define TARGET_LEGITIMATE_CONSTANT_P riscv_legitimate_constant_p
4096 
4097 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
4098 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
4099 
4100 #undef TARGET_LEGITIMATE_ADDRESS_P
4101 #define TARGET_LEGITIMATE_ADDRESS_P	riscv_legitimate_address_p
4102 
4103 #undef TARGET_CAN_ELIMINATE
4104 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
4105 
4106 #undef TARGET_CONDITIONAL_REGISTER_USAGE
4107 #define TARGET_CONDITIONAL_REGISTER_USAGE riscv_conditional_register_usage
4108 
4109 #undef TARGET_CLASS_MAX_NREGS
4110 #define TARGET_CLASS_MAX_NREGS riscv_class_max_nregs
4111 
4112 #undef TARGET_TRAMPOLINE_INIT
4113 #define TARGET_TRAMPOLINE_INIT riscv_trampoline_init
4114 
4115 #undef TARGET_IN_SMALL_DATA_P
4116 #define TARGET_IN_SMALL_DATA_P riscv_in_small_data_p
4117 
4118 #undef TARGET_ASM_SELECT_RTX_SECTION
4119 #define TARGET_ASM_SELECT_RTX_SECTION  riscv_elf_select_rtx_section
4120 
4121 #undef TARGET_MIN_ANCHOR_OFFSET
4122 #define TARGET_MIN_ANCHOR_OFFSET (-IMM_REACH/2)
4123 
4124 #undef TARGET_MAX_ANCHOR_OFFSET
4125 #define TARGET_MAX_ANCHOR_OFFSET (IMM_REACH/2-1)
4126 
4127 #undef TARGET_REGISTER_PRIORITY
4128 #define TARGET_REGISTER_PRIORITY riscv_register_priority
4129 
4130 #undef TARGET_CANNOT_COPY_INSN_P
4131 #define TARGET_CANNOT_COPY_INSN_P riscv_cannot_copy_insn_p
4132 
4133 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
4134 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV riscv_atomic_assign_expand_fenv
4135 
4136 #undef TARGET_INIT_BUILTINS
4137 #define TARGET_INIT_BUILTINS riscv_init_builtins
4138 
4139 #undef TARGET_BUILTIN_DECL
4140 #define TARGET_BUILTIN_DECL riscv_builtin_decl
4141 
4142 #undef TARGET_EXPAND_BUILTIN
4143 #define TARGET_EXPAND_BUILTIN riscv_expand_builtin
4144 
4145 struct gcc_target targetm = TARGET_INITIALIZER;
4146 
4147 #include "gt-riscv.h"
4148