1 /* Induction variable optimizations.
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
68
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
73
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
79
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
87
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
91
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
135
136 /* For lang_hooks.types.type_for_mode. */
137 #include "langhooks.h"
138
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
142
143 /* The infinite cost. */
144 #define INFTY 1000000000
145
146 /* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
149
150 static inline HOST_WIDE_INT
avg_loop_niter(class loop * loop)151 avg_loop_niter (class loop *loop)
152 {
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
155 {
156 niter = likely_max_stmt_executions_int (loop);
157
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
160 }
161
162 return niter;
163 }
164
165 struct iv_use;
166
167 /* Representation of the induction variable. */
168 struct iv
169 {
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
179 };
180
181 /* Per-ssa version information (induction variable descriptions, etc.). */
182 struct version_info
183 {
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
190 };
191
192 /* Types of uses. */
193 enum use_type
194 {
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
202 };
203
204 /* Cost of a computation. */
205 class comp_cost
206 {
207 public:
comp_cost()208 comp_cost (): cost (0), complexity (0), scratch (0)
209 {}
210
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
cost(cost)212 : cost (cost), complexity (complexity), scratch (scratch)
213 {}
214
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
217
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
220
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
223
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
226
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
229
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
232
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
235
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
238
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
241
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
244
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
247
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
250
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
257 };
258
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
261
262 bool
infinite_cost_p()263 comp_cost::infinite_cost_p ()
264 {
265 return cost == INFTY;
266 }
267
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
270 {
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
273
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
277
278 return cost1;
279 }
280
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
283 {
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
286
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
289
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
292
293 return cost1;
294 }
295
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
298 {
299 *this = *this + cost;
300 return *this;
301 }
302
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
305 {
306 if (c >= INFTY)
307 this->cost = INFTY;
308
309 if (infinite_cost_p ())
310 return *this;
311
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
314
315 return *this;
316 }
317
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
320 {
321 if (infinite_cost_p ())
322 return *this;
323
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
326
327 return *this;
328 }
329
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
332 {
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
336
337 this->cost /= c;
338
339 return *this;
340 }
341
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
344 {
345 if (infinite_cost_p ())
346 return *this;
347
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
350
351 return *this;
352 }
353
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
356 {
357 *this = *this - cost;
358 return *this;
359 }
360
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
363 {
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
366
367 return cost1.cost < cost2.cost;
368 }
369
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
372 {
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
375 }
376
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
379 {
380 return cost1 < cost2 || cost1 == cost2;
381 }
382
383 struct iv_inv_expr_ent;
384
385 /* The candidate - cost pair. */
386 class cost_pair
387 {
388 public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
399 };
400
401 /* Use. */
402 struct iv_use
403 {
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
413
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64_pod addr_offset;
416 /* Const offset stripped from base address. */
417 };
418
419 /* Group of uses. */
420 struct iv_group
421 {
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
438 };
439
440 /* The position where the iv is computed. */
441 enum iv_position
442 {
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
448 };
449
450 /* The induction variable candidate. */
451 struct iv_cand
452 {
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
457 gimple *incremented_at;/* For original biv, the statement where it is
458 incremented. */
459 tree var_before; /* The variable used for it before increment. */
460 tree var_after; /* The variable used for it after increment. */
461 struct iv *iv; /* The value of the candidate. NULL for
462 "pseudocandidate" used to indicate the possibility
463 to replace the final value of an iv by direct
464 computation of the value. */
465 unsigned cost; /* Cost of the candidate. */
466 unsigned cost_step; /* Cost of the candidate's increment operation. */
467 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
468 where it is incremented. */
469 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
470 iv_cand. */
471 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
472 hanlde it as a new invariant expression which will
473 be hoisted out of loop. */
474 struct iv *orig_iv; /* The original iv if this cand is added from biv with
475 smaller type. */
476 bool doloop_p; /* Whether this is a doloop candidate. */
477 };
478
479 /* Hashtable entry for common candidate derived from iv uses. */
480 class iv_common_cand
481 {
482 public:
483 tree base;
484 tree step;
485 /* IV uses from which this common candidate is derived. */
486 auto_vec<struct iv_use *> uses;
487 hashval_t hash;
488 };
489
490 /* Hashtable helpers. */
491
492 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
493 {
494 static inline hashval_t hash (const iv_common_cand *);
495 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
496 };
497
498 /* Hash function for possible common candidates. */
499
500 inline hashval_t
hash(const iv_common_cand * ccand)501 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
502 {
503 return ccand->hash;
504 }
505
506 /* Hash table equality function for common candidates. */
507
508 inline bool
equal(const iv_common_cand * ccand1,const iv_common_cand * ccand2)509 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
510 const iv_common_cand *ccand2)
511 {
512 return (ccand1->hash == ccand2->hash
513 && operand_equal_p (ccand1->base, ccand2->base, 0)
514 && operand_equal_p (ccand1->step, ccand2->step, 0)
515 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
516 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
517 }
518
519 /* Loop invariant expression hashtable entry. */
520
521 struct iv_inv_expr_ent
522 {
523 /* Tree expression of the entry. */
524 tree expr;
525 /* Unique indentifier. */
526 int id;
527 /* Hash value. */
528 hashval_t hash;
529 };
530
531 /* Sort iv_inv_expr_ent pair A and B by id field. */
532
533 static int
sort_iv_inv_expr_ent(const void * a,const void * b)534 sort_iv_inv_expr_ent (const void *a, const void *b)
535 {
536 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
537 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
538
539 unsigned id1 = (*e1)->id;
540 unsigned id2 = (*e2)->id;
541
542 if (id1 < id2)
543 return -1;
544 else if (id1 > id2)
545 return 1;
546 else
547 return 0;
548 }
549
550 /* Hashtable helpers. */
551
552 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
553 {
554 static inline hashval_t hash (const iv_inv_expr_ent *);
555 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
556 };
557
558 /* Return true if uses of type TYPE represent some form of address. */
559
560 inline bool
address_p(use_type type)561 address_p (use_type type)
562 {
563 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
564 }
565
566 /* Hash function for loop invariant expressions. */
567
568 inline hashval_t
hash(const iv_inv_expr_ent * expr)569 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
570 {
571 return expr->hash;
572 }
573
574 /* Hash table equality function for expressions. */
575
576 inline bool
equal(const iv_inv_expr_ent * expr1,const iv_inv_expr_ent * expr2)577 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
578 const iv_inv_expr_ent *expr2)
579 {
580 return expr1->hash == expr2->hash
581 && operand_equal_p (expr1->expr, expr2->expr, 0);
582 }
583
584 struct ivopts_data
585 {
586 /* The currently optimized loop. */
587 class loop *current_loop;
588 location_t loop_loc;
589
590 /* Numbers of iterations for all exits of the current loop. */
591 hash_map<edge, tree_niter_desc *> *niters;
592
593 /* Number of registers used in it. */
594 unsigned regs_used;
595
596 /* The size of version_info array allocated. */
597 unsigned version_info_size;
598
599 /* The array of information for the ssa names. */
600 struct version_info *version_info;
601
602 /* The hashtable of loop invariant expressions created
603 by ivopt. */
604 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
605
606 /* The bitmap of indices in version_info whose value was changed. */
607 bitmap relevant;
608
609 /* The uses of induction variables. */
610 vec<iv_group *> vgroups;
611
612 /* The candidates. */
613 vec<iv_cand *> vcands;
614
615 /* A bitmap of important candidates. */
616 bitmap important_candidates;
617
618 /* Cache used by tree_to_aff_combination_expand. */
619 hash_map<tree, name_expansion *> *name_expansion_cache;
620
621 /* The hashtable of common candidates derived from iv uses. */
622 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
623
624 /* The common candidates. */
625 vec<iv_common_cand *> iv_common_cands;
626
627 /* Hash map recording base object information of tree exp. */
628 hash_map<tree, tree> *base_object_map;
629
630 /* The maximum invariant variable id. */
631 unsigned max_inv_var_id;
632
633 /* The maximum invariant expression id. */
634 unsigned max_inv_expr_id;
635
636 /* Number of no_overflow BIVs which are not used in memory address. */
637 unsigned bivs_not_used_in_addr;
638
639 /* Obstack for iv structure. */
640 struct obstack iv_obstack;
641
642 /* Whether to consider just related and important candidates when replacing a
643 use. */
644 bool consider_all_candidates;
645
646 /* Are we optimizing for speed? */
647 bool speed;
648
649 /* Whether the loop body includes any function calls. */
650 bool body_includes_call;
651
652 /* Whether the loop body can only be exited via single exit. */
653 bool loop_single_exit_p;
654
655 /* Whether the loop has doloop comparison use. */
656 bool doloop_use_p;
657 };
658
659 /* An assignment of iv candidates to uses. */
660
661 class iv_ca
662 {
663 public:
664 /* The number of uses covered by the assignment. */
665 unsigned upto;
666
667 /* Number of uses that cannot be expressed by the candidates in the set. */
668 unsigned bad_groups;
669
670 /* Candidate assigned to a use, together with the related costs. */
671 class cost_pair **cand_for_group;
672
673 /* Number of times each candidate is used. */
674 unsigned *n_cand_uses;
675
676 /* The candidates used. */
677 bitmap cands;
678
679 /* The number of candidates in the set. */
680 unsigned n_cands;
681
682 /* The number of invariants needed, including both invariant variants and
683 invariant expressions. */
684 unsigned n_invs;
685
686 /* Total cost of expressing uses. */
687 comp_cost cand_use_cost;
688
689 /* Total cost of candidates. */
690 int64_t cand_cost;
691
692 /* Number of times each invariant variable is used. */
693 unsigned *n_inv_var_uses;
694
695 /* Number of times each invariant expression is used. */
696 unsigned *n_inv_expr_uses;
697
698 /* Total cost of the assignment. */
699 comp_cost cost;
700 };
701
702 /* Difference of two iv candidate assignments. */
703
704 struct iv_ca_delta
705 {
706 /* Changed group. */
707 struct iv_group *group;
708
709 /* An old assignment (for rollback purposes). */
710 class cost_pair *old_cp;
711
712 /* A new assignment. */
713 class cost_pair *new_cp;
714
715 /* Next change in the list. */
716 struct iv_ca_delta *next;
717 };
718
719 /* Bound on number of candidates below that all candidates are considered. */
720
721 #define CONSIDER_ALL_CANDIDATES_BOUND \
722 ((unsigned) param_iv_consider_all_candidates_bound)
723
724 /* If there are more iv occurrences, we just give up (it is quite unlikely that
725 optimizing such a loop would help, and it would take ages). */
726
727 #define MAX_CONSIDERED_GROUPS \
728 ((unsigned) param_iv_max_considered_uses)
729
730 /* If there are at most this number of ivs in the set, try removing unnecessary
731 ivs from the set always. */
732
733 #define ALWAYS_PRUNE_CAND_SET_BOUND \
734 ((unsigned) param_iv_always_prune_cand_set_bound)
735
736 /* The list of trees for that the decl_rtl field must be reset is stored
737 here. */
738
739 static vec<tree> decl_rtl_to_reset;
740
741 static comp_cost force_expr_to_var_cost (tree, bool);
742
743 /* The single loop exit if it dominates the latch, NULL otherwise. */
744
745 edge
single_dom_exit(class loop * loop)746 single_dom_exit (class loop *loop)
747 {
748 edge exit = single_exit (loop);
749
750 if (!exit)
751 return NULL;
752
753 if (!just_once_each_iteration_p (loop, exit->src))
754 return NULL;
755
756 return exit;
757 }
758
759 /* Dumps information about the induction variable IV to FILE. Don't dump
760 variable's name if DUMP_NAME is FALSE. The information is dumped with
761 preceding spaces indicated by INDENT_LEVEL. */
762
763 void
dump_iv(FILE * file,struct iv * iv,bool dump_name,unsigned indent_level)764 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
765 {
766 const char *p;
767 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
768
769 if (indent_level > 4)
770 indent_level = 4;
771 p = spaces + 8 - (indent_level << 1);
772
773 fprintf (file, "%sIV struct:\n", p);
774 if (iv->ssa_name && dump_name)
775 {
776 fprintf (file, "%s SSA_NAME:\t", p);
777 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
778 fprintf (file, "\n");
779 }
780
781 fprintf (file, "%s Type:\t", p);
782 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
783 fprintf (file, "\n");
784
785 fprintf (file, "%s Base:\t", p);
786 print_generic_expr (file, iv->base, TDF_SLIM);
787 fprintf (file, "\n");
788
789 fprintf (file, "%s Step:\t", p);
790 print_generic_expr (file, iv->step, TDF_SLIM);
791 fprintf (file, "\n");
792
793 if (iv->base_object)
794 {
795 fprintf (file, "%s Object:\t", p);
796 print_generic_expr (file, iv->base_object, TDF_SLIM);
797 fprintf (file, "\n");
798 }
799
800 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
801
802 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
803 p, iv->no_overflow ? "No-overflow" : "Overflow");
804 }
805
806 /* Dumps information about the USE to FILE. */
807
808 void
dump_use(FILE * file,struct iv_use * use)809 dump_use (FILE *file, struct iv_use *use)
810 {
811 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
812 fprintf (file, " At stmt:\t");
813 print_gimple_stmt (file, use->stmt, 0);
814 fprintf (file, " At pos:\t");
815 if (use->op_p)
816 print_generic_expr (file, *use->op_p, TDF_SLIM);
817 fprintf (file, "\n");
818 dump_iv (file, use->iv, false, 2);
819 }
820
821 /* Dumps information about the uses to FILE. */
822
823 void
dump_groups(FILE * file,struct ivopts_data * data)824 dump_groups (FILE *file, struct ivopts_data *data)
825 {
826 unsigned i, j;
827 struct iv_group *group;
828
829 for (i = 0; i < data->vgroups.length (); i++)
830 {
831 group = data->vgroups[i];
832 fprintf (file, "Group %d:\n", group->id);
833 if (group->type == USE_NONLINEAR_EXPR)
834 fprintf (file, " Type:\tGENERIC\n");
835 else if (group->type == USE_REF_ADDRESS)
836 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
837 else if (group->type == USE_PTR_ADDRESS)
838 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
839 else
840 {
841 gcc_assert (group->type == USE_COMPARE);
842 fprintf (file, " Type:\tCOMPARE\n");
843 }
844 for (j = 0; j < group->vuses.length (); j++)
845 dump_use (file, group->vuses[j]);
846 }
847 }
848
849 /* Dumps information about induction variable candidate CAND to FILE. */
850
851 void
dump_cand(FILE * file,struct iv_cand * cand)852 dump_cand (FILE *file, struct iv_cand *cand)
853 {
854 struct iv *iv = cand->iv;
855
856 fprintf (file, "Candidate %d:\n", cand->id);
857 if (cand->inv_vars)
858 {
859 fprintf (file, " Depend on inv.vars: ");
860 dump_bitmap (file, cand->inv_vars);
861 }
862 if (cand->inv_exprs)
863 {
864 fprintf (file, " Depend on inv.exprs: ");
865 dump_bitmap (file, cand->inv_exprs);
866 }
867
868 if (cand->var_before)
869 {
870 fprintf (file, " Var befor: ");
871 print_generic_expr (file, cand->var_before, TDF_SLIM);
872 fprintf (file, "\n");
873 }
874 if (cand->var_after)
875 {
876 fprintf (file, " Var after: ");
877 print_generic_expr (file, cand->var_after, TDF_SLIM);
878 fprintf (file, "\n");
879 }
880
881 switch (cand->pos)
882 {
883 case IP_NORMAL:
884 fprintf (file, " Incr POS: before exit test\n");
885 break;
886
887 case IP_BEFORE_USE:
888 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
889 break;
890
891 case IP_AFTER_USE:
892 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
893 break;
894
895 case IP_END:
896 fprintf (file, " Incr POS: at end\n");
897 break;
898
899 case IP_ORIGINAL:
900 fprintf (file, " Incr POS: orig biv\n");
901 break;
902 }
903
904 dump_iv (file, iv, false, 1);
905 }
906
907 /* Returns the info for ssa version VER. */
908
909 static inline struct version_info *
ver_info(struct ivopts_data * data,unsigned ver)910 ver_info (struct ivopts_data *data, unsigned ver)
911 {
912 return data->version_info + ver;
913 }
914
915 /* Returns the info for ssa name NAME. */
916
917 static inline struct version_info *
name_info(struct ivopts_data * data,tree name)918 name_info (struct ivopts_data *data, tree name)
919 {
920 return ver_info (data, SSA_NAME_VERSION (name));
921 }
922
923 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
924 emitted in LOOP. */
925
926 static bool
stmt_after_ip_normal_pos(class loop * loop,gimple * stmt)927 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
928 {
929 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
930
931 gcc_assert (bb);
932
933 if (sbb == loop->latch)
934 return true;
935
936 if (sbb != bb)
937 return false;
938
939 return stmt == last_stmt (bb);
940 }
941
942 /* Returns true if STMT if after the place where the original induction
943 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
944 if the positions are identical. */
945
946 static bool
stmt_after_inc_pos(struct iv_cand * cand,gimple * stmt,bool true_if_equal)947 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
948 {
949 basic_block cand_bb = gimple_bb (cand->incremented_at);
950 basic_block stmt_bb = gimple_bb (stmt);
951
952 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
953 return false;
954
955 if (stmt_bb != cand_bb)
956 return true;
957
958 if (true_if_equal
959 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
960 return true;
961 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
962 }
963
964 /* Returns true if STMT if after the place where the induction variable
965 CAND is incremented in LOOP. */
966
967 static bool
stmt_after_increment(class loop * loop,struct iv_cand * cand,gimple * stmt)968 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
969 {
970 switch (cand->pos)
971 {
972 case IP_END:
973 return false;
974
975 case IP_NORMAL:
976 return stmt_after_ip_normal_pos (loop, stmt);
977
978 case IP_ORIGINAL:
979 case IP_AFTER_USE:
980 return stmt_after_inc_pos (cand, stmt, false);
981
982 case IP_BEFORE_USE:
983 return stmt_after_inc_pos (cand, stmt, true);
984
985 default:
986 gcc_unreachable ();
987 }
988 }
989
990 /* walk_tree callback for contains_abnormal_ssa_name_p. */
991
992 static tree
contains_abnormal_ssa_name_p_1(tree * tp,int * walk_subtrees,void *)993 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
994 {
995 if (TREE_CODE (*tp) == SSA_NAME
996 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
997 return *tp;
998
999 if (!EXPR_P (*tp))
1000 *walk_subtrees = 0;
1001
1002 return NULL_TREE;
1003 }
1004
1005 /* Returns true if EXPR contains a ssa name that occurs in an
1006 abnormal phi node. */
1007
1008 bool
contains_abnormal_ssa_name_p(tree expr)1009 contains_abnormal_ssa_name_p (tree expr)
1010 {
1011 return walk_tree_without_duplicates
1012 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1013 }
1014
1015 /* Returns the structure describing number of iterations determined from
1016 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1017
1018 static class tree_niter_desc *
niter_for_exit(struct ivopts_data * data,edge exit)1019 niter_for_exit (struct ivopts_data *data, edge exit)
1020 {
1021 class tree_niter_desc *desc;
1022 tree_niter_desc **slot;
1023
1024 if (!data->niters)
1025 {
1026 data->niters = new hash_map<edge, tree_niter_desc *>;
1027 slot = NULL;
1028 }
1029 else
1030 slot = data->niters->get (exit);
1031
1032 if (!slot)
1033 {
1034 /* Try to determine number of iterations. We cannot safely work with ssa
1035 names that appear in phi nodes on abnormal edges, so that we do not
1036 create overlapping life ranges for them (PR 27283). */
1037 desc = XNEW (class tree_niter_desc);
1038 if (!number_of_iterations_exit (data->current_loop,
1039 exit, desc, true)
1040 || contains_abnormal_ssa_name_p (desc->niter))
1041 {
1042 XDELETE (desc);
1043 desc = NULL;
1044 }
1045 data->niters->put (exit, desc);
1046 }
1047 else
1048 desc = *slot;
1049
1050 return desc;
1051 }
1052
1053 /* Returns the structure describing number of iterations determined from
1054 single dominating exit of DATA->current_loop, or NULL if something
1055 goes wrong. */
1056
1057 static class tree_niter_desc *
niter_for_single_dom_exit(struct ivopts_data * data)1058 niter_for_single_dom_exit (struct ivopts_data *data)
1059 {
1060 edge exit = single_dom_exit (data->current_loop);
1061
1062 if (!exit)
1063 return NULL;
1064
1065 return niter_for_exit (data, exit);
1066 }
1067
1068 /* Initializes data structures used by the iv optimization pass, stored
1069 in DATA. */
1070
1071 static void
tree_ssa_iv_optimize_init(struct ivopts_data * data)1072 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1073 {
1074 data->version_info_size = 2 * num_ssa_names;
1075 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1076 data->relevant = BITMAP_ALLOC (NULL);
1077 data->important_candidates = BITMAP_ALLOC (NULL);
1078 data->max_inv_var_id = 0;
1079 data->max_inv_expr_id = 0;
1080 data->niters = NULL;
1081 data->vgroups.create (20);
1082 data->vcands.create (20);
1083 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1084 data->name_expansion_cache = NULL;
1085 data->base_object_map = NULL;
1086 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1087 data->iv_common_cands.create (20);
1088 decl_rtl_to_reset.create (20);
1089 gcc_obstack_init (&data->iv_obstack);
1090 }
1091
1092 /* walk_tree callback for determine_base_object. */
1093
1094 static tree
determine_base_object_1(tree * tp,int * walk_subtrees,void * wdata)1095 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1096 {
1097 tree_code code = TREE_CODE (*tp);
1098 tree obj = NULL_TREE;
1099 if (code == ADDR_EXPR)
1100 {
1101 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1102 if (!base)
1103 obj = *tp;
1104 else if (TREE_CODE (base) != MEM_REF)
1105 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1106 }
1107 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1108 obj = fold_convert (ptr_type_node, *tp);
1109
1110 if (!obj)
1111 {
1112 if (!EXPR_P (*tp))
1113 *walk_subtrees = 0;
1114
1115 return NULL_TREE;
1116 }
1117 /* Record special node for multiple base objects and stop. */
1118 if (*static_cast<tree *> (wdata))
1119 {
1120 *static_cast<tree *> (wdata) = integer_zero_node;
1121 return integer_zero_node;
1122 }
1123 /* Record the base object and continue looking. */
1124 *static_cast<tree *> (wdata) = obj;
1125 return NULL_TREE;
1126 }
1127
1128 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1129 are able to determine that it does not point to any such object; specially
1130 return integer_zero_node if EXPR contains multiple base objects. */
1131
1132 static tree
determine_base_object(struct ivopts_data * data,tree expr)1133 determine_base_object (struct ivopts_data *data, tree expr)
1134 {
1135 tree *slot, obj = NULL_TREE;
1136 if (data->base_object_map)
1137 {
1138 if ((slot = data->base_object_map->get(expr)) != NULL)
1139 return *slot;
1140 }
1141 else
1142 data->base_object_map = new hash_map<tree, tree>;
1143
1144 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1145 data->base_object_map->put (expr, obj);
1146 return obj;
1147 }
1148
1149 /* Return true if address expression with non-DECL_P operand appears
1150 in EXPR. */
1151
1152 static bool
contain_complex_addr_expr(tree expr)1153 contain_complex_addr_expr (tree expr)
1154 {
1155 bool res = false;
1156
1157 STRIP_NOPS (expr);
1158 switch (TREE_CODE (expr))
1159 {
1160 case POINTER_PLUS_EXPR:
1161 case PLUS_EXPR:
1162 case MINUS_EXPR:
1163 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1164 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1165 break;
1166
1167 case ADDR_EXPR:
1168 return (!DECL_P (TREE_OPERAND (expr, 0)));
1169
1170 default:
1171 return false;
1172 }
1173
1174 return res;
1175 }
1176
1177 /* Allocates an induction variable with given initial value BASE and step STEP
1178 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1179
1180 static struct iv *
1181 alloc_iv (struct ivopts_data *data, tree base, tree step,
1182 bool no_overflow = false)
1183 {
1184 tree expr = base;
1185 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1186 sizeof (struct iv));
1187 gcc_assert (step != NULL_TREE);
1188
1189 /* Lower address expression in base except ones with DECL_P as operand.
1190 By doing this:
1191 1) More accurate cost can be computed for address expressions;
1192 2) Duplicate candidates won't be created for bases in different
1193 forms, like &a[0] and &a. */
1194 STRIP_NOPS (expr);
1195 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1196 || contain_complex_addr_expr (expr))
1197 {
1198 aff_tree comb;
1199 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1200 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1201 }
1202
1203 iv->base = base;
1204 iv->base_object = determine_base_object (data, base);
1205 iv->step = step;
1206 iv->biv_p = false;
1207 iv->nonlin_use = NULL;
1208 iv->ssa_name = NULL_TREE;
1209 if (!no_overflow
1210 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1211 base, step))
1212 no_overflow = true;
1213 iv->no_overflow = no_overflow;
1214 iv->have_address_use = false;
1215
1216 return iv;
1217 }
1218
1219 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1220 doesn't overflow. */
1221
1222 static void
set_iv(struct ivopts_data * data,tree iv,tree base,tree step,bool no_overflow)1223 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1224 bool no_overflow)
1225 {
1226 struct version_info *info = name_info (data, iv);
1227
1228 gcc_assert (!info->iv);
1229
1230 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1231 info->iv = alloc_iv (data, base, step, no_overflow);
1232 info->iv->ssa_name = iv;
1233 }
1234
1235 /* Finds induction variable declaration for VAR. */
1236
1237 static struct iv *
get_iv(struct ivopts_data * data,tree var)1238 get_iv (struct ivopts_data *data, tree var)
1239 {
1240 basic_block bb;
1241 tree type = TREE_TYPE (var);
1242
1243 if (!POINTER_TYPE_P (type)
1244 && !INTEGRAL_TYPE_P (type))
1245 return NULL;
1246
1247 if (!name_info (data, var)->iv)
1248 {
1249 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1250
1251 if (!bb
1252 || !flow_bb_inside_loop_p (data->current_loop, bb))
1253 {
1254 if (POINTER_TYPE_P (type))
1255 type = sizetype;
1256 set_iv (data, var, var, build_int_cst (type, 0), true);
1257 }
1258 }
1259
1260 return name_info (data, var)->iv;
1261 }
1262
1263 /* Return the first non-invariant ssa var found in EXPR. */
1264
1265 static tree
extract_single_var_from_expr(tree expr)1266 extract_single_var_from_expr (tree expr)
1267 {
1268 int i, n;
1269 tree tmp;
1270 enum tree_code code;
1271
1272 if (!expr || is_gimple_min_invariant (expr))
1273 return NULL;
1274
1275 code = TREE_CODE (expr);
1276 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1277 {
1278 n = TREE_OPERAND_LENGTH (expr);
1279 for (i = 0; i < n; i++)
1280 {
1281 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1282
1283 if (tmp)
1284 return tmp;
1285 }
1286 }
1287 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1288 }
1289
1290 /* Finds basic ivs. */
1291
1292 static bool
find_bivs(struct ivopts_data * data)1293 find_bivs (struct ivopts_data *data)
1294 {
1295 gphi *phi;
1296 affine_iv iv;
1297 tree step, type, base, stop;
1298 bool found = false;
1299 class loop *loop = data->current_loop;
1300 gphi_iterator psi;
1301
1302 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1303 {
1304 phi = psi.phi ();
1305
1306 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1307 continue;
1308
1309 if (virtual_operand_p (PHI_RESULT (phi)))
1310 continue;
1311
1312 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1313 continue;
1314
1315 if (integer_zerop (iv.step))
1316 continue;
1317
1318 step = iv.step;
1319 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1320 /* Stop expanding iv base at the first ssa var referred by iv step.
1321 Ideally we should stop at any ssa var, because that's expensive
1322 and unusual to happen, we just do it on the first one.
1323
1324 See PR64705 for the rationale. */
1325 stop = extract_single_var_from_expr (step);
1326 base = expand_simple_operations (base, stop);
1327 if (contains_abnormal_ssa_name_p (base)
1328 || contains_abnormal_ssa_name_p (step))
1329 continue;
1330
1331 type = TREE_TYPE (PHI_RESULT (phi));
1332 base = fold_convert (type, base);
1333 if (step)
1334 {
1335 if (POINTER_TYPE_P (type))
1336 step = convert_to_ptrofftype (step);
1337 else
1338 step = fold_convert (type, step);
1339 }
1340
1341 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1342 found = true;
1343 }
1344
1345 return found;
1346 }
1347
1348 /* Marks basic ivs. */
1349
1350 static void
mark_bivs(struct ivopts_data * data)1351 mark_bivs (struct ivopts_data *data)
1352 {
1353 gphi *phi;
1354 gimple *def;
1355 tree var;
1356 struct iv *iv, *incr_iv;
1357 class loop *loop = data->current_loop;
1358 basic_block incr_bb;
1359 gphi_iterator psi;
1360
1361 data->bivs_not_used_in_addr = 0;
1362 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1363 {
1364 phi = psi.phi ();
1365
1366 iv = get_iv (data, PHI_RESULT (phi));
1367 if (!iv)
1368 continue;
1369
1370 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1371 def = SSA_NAME_DEF_STMT (var);
1372 /* Don't mark iv peeled from other one as biv. */
1373 if (def
1374 && gimple_code (def) == GIMPLE_PHI
1375 && gimple_bb (def) == loop->header)
1376 continue;
1377
1378 incr_iv = get_iv (data, var);
1379 if (!incr_iv)
1380 continue;
1381
1382 /* If the increment is in the subloop, ignore it. */
1383 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1384 if (incr_bb->loop_father != data->current_loop
1385 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1386 continue;
1387
1388 iv->biv_p = true;
1389 incr_iv->biv_p = true;
1390 if (iv->no_overflow)
1391 data->bivs_not_used_in_addr++;
1392 if (incr_iv->no_overflow)
1393 data->bivs_not_used_in_addr++;
1394 }
1395 }
1396
1397 /* Checks whether STMT defines a linear induction variable and stores its
1398 parameters to IV. */
1399
1400 static bool
find_givs_in_stmt_scev(struct ivopts_data * data,gimple * stmt,affine_iv * iv)1401 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1402 {
1403 tree lhs, stop;
1404 class loop *loop = data->current_loop;
1405
1406 iv->base = NULL_TREE;
1407 iv->step = NULL_TREE;
1408
1409 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1410 return false;
1411
1412 lhs = gimple_assign_lhs (stmt);
1413 if (TREE_CODE (lhs) != SSA_NAME)
1414 return false;
1415
1416 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1417 return false;
1418
1419 /* Stop expanding iv base at the first ssa var referred by iv step.
1420 Ideally we should stop at any ssa var, because that's expensive
1421 and unusual to happen, we just do it on the first one.
1422
1423 See PR64705 for the rationale. */
1424 stop = extract_single_var_from_expr (iv->step);
1425 iv->base = expand_simple_operations (iv->base, stop);
1426 if (contains_abnormal_ssa_name_p (iv->base)
1427 || contains_abnormal_ssa_name_p (iv->step))
1428 return false;
1429
1430 /* If STMT could throw, then do not consider STMT as defining a GIV.
1431 While this will suppress optimizations, we cannot safely delete this
1432 GIV and associated statements, even if it appears it is not used. */
1433 if (stmt_could_throw_p (cfun, stmt))
1434 return false;
1435
1436 return true;
1437 }
1438
1439 /* Finds general ivs in statement STMT. */
1440
1441 static void
find_givs_in_stmt(struct ivopts_data * data,gimple * stmt)1442 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1443 {
1444 affine_iv iv;
1445
1446 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1447 return;
1448
1449 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1450 }
1451
1452 /* Finds general ivs in basic block BB. */
1453
1454 static void
find_givs_in_bb(struct ivopts_data * data,basic_block bb)1455 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1456 {
1457 gimple_stmt_iterator bsi;
1458
1459 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1460 find_givs_in_stmt (data, gsi_stmt (bsi));
1461 }
1462
1463 /* Finds general ivs. */
1464
1465 static void
find_givs(struct ivopts_data * data)1466 find_givs (struct ivopts_data *data)
1467 {
1468 class loop *loop = data->current_loop;
1469 basic_block *body = get_loop_body_in_dom_order (loop);
1470 unsigned i;
1471
1472 for (i = 0; i < loop->num_nodes; i++)
1473 find_givs_in_bb (data, body[i]);
1474 free (body);
1475 }
1476
1477 /* For each ssa name defined in LOOP determines whether it is an induction
1478 variable and if so, its initial value and step. */
1479
1480 static bool
find_induction_variables(struct ivopts_data * data)1481 find_induction_variables (struct ivopts_data *data)
1482 {
1483 unsigned i;
1484 bitmap_iterator bi;
1485
1486 if (!find_bivs (data))
1487 return false;
1488
1489 find_givs (data);
1490 mark_bivs (data);
1491
1492 if (dump_file && (dump_flags & TDF_DETAILS))
1493 {
1494 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495
1496 if (niter)
1497 {
1498 fprintf (dump_file, " number of iterations ");
1499 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1500 if (!integer_zerop (niter->may_be_zero))
1501 {
1502 fprintf (dump_file, "; zero if ");
1503 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 }
1505 fprintf (dump_file, "\n");
1506 };
1507
1508 fprintf (dump_file, "\n<Induction Vars>:\n");
1509 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 {
1511 struct version_info *info = ver_info (data, i);
1512 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1513 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1514 }
1515 }
1516
1517 return true;
1518 }
1519
1520 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1521 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1522 is the const offset stripped from IV base and MEM_TYPE is the type
1523 of the memory being addressed. For uses of other types, ADDR_BASE
1524 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1525
1526 static struct iv_use *
record_use(struct iv_group * group,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type,tree addr_base,poly_uint64 addr_offset)1527 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1528 gimple *stmt, enum use_type type, tree mem_type,
1529 tree addr_base, poly_uint64 addr_offset)
1530 {
1531 struct iv_use *use = XCNEW (struct iv_use);
1532
1533 use->id = group->vuses.length ();
1534 use->group_id = group->id;
1535 use->type = type;
1536 use->mem_type = mem_type;
1537 use->iv = iv;
1538 use->stmt = stmt;
1539 use->op_p = use_p;
1540 use->addr_base = addr_base;
1541 use->addr_offset = addr_offset;
1542
1543 group->vuses.safe_push (use);
1544 return use;
1545 }
1546
1547 /* Checks whether OP is a loop-level invariant and if so, records it.
1548 NONLINEAR_USE is true if the invariant is used in a way we do not
1549 handle specially. */
1550
1551 static void
record_invariant(struct ivopts_data * data,tree op,bool nonlinear_use)1552 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 {
1554 basic_block bb;
1555 struct version_info *info;
1556
1557 if (TREE_CODE (op) != SSA_NAME
1558 || virtual_operand_p (op))
1559 return;
1560
1561 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1562 if (bb
1563 && flow_bb_inside_loop_p (data->current_loop, bb))
1564 return;
1565
1566 info = name_info (data, op);
1567 info->name = op;
1568 info->has_nonlin_use |= nonlinear_use;
1569 if (!info->inv_id)
1570 info->inv_id = ++data->max_inv_var_id;
1571 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1572 }
1573
1574 /* Record a group of TYPE. */
1575
1576 static struct iv_group *
record_group(struct ivopts_data * data,enum use_type type)1577 record_group (struct ivopts_data *data, enum use_type type)
1578 {
1579 struct iv_group *group = XCNEW (struct iv_group);
1580
1581 group->id = data->vgroups.length ();
1582 group->type = type;
1583 group->related_cands = BITMAP_ALLOC (NULL);
1584 group->vuses.create (1);
1585 group->doloop_p = false;
1586
1587 data->vgroups.safe_push (group);
1588 return group;
1589 }
1590
1591 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1592 New group will be created if there is no existing group for the use.
1593 MEM_TYPE is the type of memory being addressed, or NULL if this
1594 isn't an address reference. */
1595
1596 static struct iv_use *
record_group_use(struct ivopts_data * data,tree * use_p,struct iv * iv,gimple * stmt,enum use_type type,tree mem_type)1597 record_group_use (struct ivopts_data *data, tree *use_p,
1598 struct iv *iv, gimple *stmt, enum use_type type,
1599 tree mem_type)
1600 {
1601 tree addr_base = NULL;
1602 struct iv_group *group = NULL;
1603 poly_uint64 addr_offset = 0;
1604
1605 /* Record non address type use in a new group. */
1606 if (address_p (type))
1607 {
1608 unsigned int i;
1609
1610 addr_base = strip_offset (iv->base, &addr_offset);
1611 for (i = 0; i < data->vgroups.length (); i++)
1612 {
1613 struct iv_use *use;
1614
1615 group = data->vgroups[i];
1616 use = group->vuses[0];
1617 if (!address_p (use->type))
1618 continue;
1619
1620 /* Check if it has the same stripped base and step. */
1621 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1622 && operand_equal_p (iv->step, use->iv->step, 0)
1623 && operand_equal_p (addr_base, use->addr_base, 0))
1624 break;
1625 }
1626 if (i == data->vgroups.length ())
1627 group = NULL;
1628 }
1629
1630 if (!group)
1631 group = record_group (data, type);
1632
1633 return record_use (group, use_p, iv, stmt, type, mem_type,
1634 addr_base, addr_offset);
1635 }
1636
1637 /* Checks whether the use OP is interesting and if so, records it. */
1638
1639 static struct iv_use *
find_interesting_uses_op(struct ivopts_data * data,tree op)1640 find_interesting_uses_op (struct ivopts_data *data, tree op)
1641 {
1642 struct iv *iv;
1643 gimple *stmt;
1644 struct iv_use *use;
1645
1646 if (TREE_CODE (op) != SSA_NAME)
1647 return NULL;
1648
1649 iv = get_iv (data, op);
1650 if (!iv)
1651 return NULL;
1652
1653 if (iv->nonlin_use)
1654 {
1655 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1656 return iv->nonlin_use;
1657 }
1658
1659 if (integer_zerop (iv->step))
1660 {
1661 record_invariant (data, op, true);
1662 return NULL;
1663 }
1664
1665 stmt = SSA_NAME_DEF_STMT (op);
1666 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1667
1668 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1669 iv->nonlin_use = use;
1670 return use;
1671 }
1672
1673 /* Indicate how compare type iv_use can be handled. */
1674 enum comp_iv_rewrite
1675 {
1676 COMP_IV_NA,
1677 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1678 COMP_IV_EXPR,
1679 /* We may rewrite compare type iv_uses on both sides of comparison by
1680 expressing value of each iv_use. */
1681 COMP_IV_EXPR_2,
1682 /* We may rewrite compare type iv_use by expressing value of the iv_use
1683 or by eliminating it with other iv_cand. */
1684 COMP_IV_ELIM
1685 };
1686
1687 /* Given a condition in statement STMT, checks whether it is a compare
1688 of an induction variable and an invariant. If this is the case,
1689 CONTROL_VAR is set to location of the iv, BOUND to the location of
1690 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1691 induction variable descriptions, and true is returned. If this is not
1692 the case, CONTROL_VAR and BOUND are set to the arguments of the
1693 condition and false is returned. */
1694
1695 static enum comp_iv_rewrite
extract_cond_operands(struct ivopts_data * data,gimple * stmt,tree ** control_var,tree ** bound,struct iv ** iv_var,struct iv ** iv_bound)1696 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1697 tree **control_var, tree **bound,
1698 struct iv **iv_var, struct iv **iv_bound)
1699 {
1700 /* The objects returned when COND has constant operands. */
1701 static struct iv const_iv;
1702 static tree zero;
1703 tree *op0 = &zero, *op1 = &zero;
1704 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1705 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1706
1707 if (gimple_code (stmt) == GIMPLE_COND)
1708 {
1709 gcond *cond_stmt = as_a <gcond *> (stmt);
1710 op0 = gimple_cond_lhs_ptr (cond_stmt);
1711 op1 = gimple_cond_rhs_ptr (cond_stmt);
1712 }
1713 else
1714 {
1715 op0 = gimple_assign_rhs1_ptr (stmt);
1716 op1 = gimple_assign_rhs2_ptr (stmt);
1717 }
1718
1719 zero = integer_zero_node;
1720 const_iv.step = integer_zero_node;
1721
1722 if (TREE_CODE (*op0) == SSA_NAME)
1723 iv0 = get_iv (data, *op0);
1724 if (TREE_CODE (*op1) == SSA_NAME)
1725 iv1 = get_iv (data, *op1);
1726
1727 /* If both sides of comparison are IVs. We can express ivs on both end. */
1728 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1729 {
1730 rewrite_type = COMP_IV_EXPR_2;
1731 goto end;
1732 }
1733
1734 /* If none side of comparison is IV. */
1735 if ((!iv0 || integer_zerop (iv0->step))
1736 && (!iv1 || integer_zerop (iv1->step)))
1737 goto end;
1738
1739 /* Control variable may be on the other side. */
1740 if (!iv0 || integer_zerop (iv0->step))
1741 {
1742 std::swap (op0, op1);
1743 std::swap (iv0, iv1);
1744 }
1745 /* If one side is IV and the other side isn't loop invariant. */
1746 if (!iv1)
1747 rewrite_type = COMP_IV_EXPR;
1748 /* If one side is IV and the other side is loop invariant. */
1749 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1750 rewrite_type = COMP_IV_ELIM;
1751
1752 end:
1753 if (control_var)
1754 *control_var = op0;
1755 if (iv_var)
1756 *iv_var = iv0;
1757 if (bound)
1758 *bound = op1;
1759 if (iv_bound)
1760 *iv_bound = iv1;
1761
1762 return rewrite_type;
1763 }
1764
1765 /* Checks whether the condition in STMT is interesting and if so,
1766 records it. */
1767
1768 static void
find_interesting_uses_cond(struct ivopts_data * data,gimple * stmt)1769 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1770 {
1771 tree *var_p, *bound_p;
1772 struct iv *var_iv, *bound_iv;
1773 enum comp_iv_rewrite ret;
1774
1775 ret = extract_cond_operands (data, stmt,
1776 &var_p, &bound_p, &var_iv, &bound_iv);
1777 if (ret == COMP_IV_NA)
1778 {
1779 find_interesting_uses_op (data, *var_p);
1780 find_interesting_uses_op (data, *bound_p);
1781 return;
1782 }
1783
1784 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1785 /* Record compare type iv_use for iv on the other side of comparison. */
1786 if (ret == COMP_IV_EXPR_2)
1787 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1788 }
1789
1790 /* Returns the outermost loop EXPR is obviously invariant in
1791 relative to the loop LOOP, i.e. if all its operands are defined
1792 outside of the returned loop. Returns NULL if EXPR is not
1793 even obviously invariant in LOOP. */
1794
1795 class loop *
outermost_invariant_loop_for_expr(class loop * loop,tree expr)1796 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1797 {
1798 basic_block def_bb;
1799 unsigned i, len;
1800
1801 if (is_gimple_min_invariant (expr))
1802 return current_loops->tree_root;
1803
1804 if (TREE_CODE (expr) == SSA_NAME)
1805 {
1806 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1807 if (def_bb)
1808 {
1809 if (flow_bb_inside_loop_p (loop, def_bb))
1810 return NULL;
1811 return superloop_at_depth (loop,
1812 loop_depth (def_bb->loop_father) + 1);
1813 }
1814
1815 return current_loops->tree_root;
1816 }
1817
1818 if (!EXPR_P (expr))
1819 return NULL;
1820
1821 unsigned maxdepth = 0;
1822 len = TREE_OPERAND_LENGTH (expr);
1823 for (i = 0; i < len; i++)
1824 {
1825 class loop *ivloop;
1826 if (!TREE_OPERAND (expr, i))
1827 continue;
1828
1829 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1830 if (!ivloop)
1831 return NULL;
1832 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1833 }
1834
1835 return superloop_at_depth (loop, maxdepth);
1836 }
1837
1838 /* Returns true if expression EXPR is obviously invariant in LOOP,
1839 i.e. if all its operands are defined outside of the LOOP. LOOP
1840 should not be the function body. */
1841
1842 bool
expr_invariant_in_loop_p(class loop * loop,tree expr)1843 expr_invariant_in_loop_p (class loop *loop, tree expr)
1844 {
1845 basic_block def_bb;
1846 unsigned i, len;
1847
1848 gcc_assert (loop_depth (loop) > 0);
1849
1850 if (is_gimple_min_invariant (expr))
1851 return true;
1852
1853 if (TREE_CODE (expr) == SSA_NAME)
1854 {
1855 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1856 if (def_bb
1857 && flow_bb_inside_loop_p (loop, def_bb))
1858 return false;
1859
1860 return true;
1861 }
1862
1863 if (!EXPR_P (expr))
1864 return false;
1865
1866 len = TREE_OPERAND_LENGTH (expr);
1867 for (i = 0; i < len; i++)
1868 if (TREE_OPERAND (expr, i)
1869 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1870 return false;
1871
1872 return true;
1873 }
1874
1875 /* Given expression EXPR which computes inductive values with respect
1876 to loop recorded in DATA, this function returns biv from which EXPR
1877 is derived by tracing definition chains of ssa variables in EXPR. */
1878
1879 static struct iv*
find_deriving_biv_for_expr(struct ivopts_data * data,tree expr)1880 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1881 {
1882 struct iv *iv;
1883 unsigned i, n;
1884 tree e2, e1;
1885 enum tree_code code;
1886 gimple *stmt;
1887
1888 if (expr == NULL_TREE)
1889 return NULL;
1890
1891 if (is_gimple_min_invariant (expr))
1892 return NULL;
1893
1894 code = TREE_CODE (expr);
1895 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1896 {
1897 n = TREE_OPERAND_LENGTH (expr);
1898 for (i = 0; i < n; i++)
1899 {
1900 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1901 if (iv)
1902 return iv;
1903 }
1904 }
1905
1906 /* Stop if it's not ssa name. */
1907 if (code != SSA_NAME)
1908 return NULL;
1909
1910 iv = get_iv (data, expr);
1911 if (!iv || integer_zerop (iv->step))
1912 return NULL;
1913 else if (iv->biv_p)
1914 return iv;
1915
1916 stmt = SSA_NAME_DEF_STMT (expr);
1917 if (gphi *phi = dyn_cast <gphi *> (stmt))
1918 {
1919 ssa_op_iter iter;
1920 use_operand_p use_p;
1921 basic_block phi_bb = gimple_bb (phi);
1922
1923 /* Skip loop header PHI that doesn't define biv. */
1924 if (phi_bb->loop_father == data->current_loop)
1925 return NULL;
1926
1927 if (virtual_operand_p (gimple_phi_result (phi)))
1928 return NULL;
1929
1930 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1931 {
1932 tree use = USE_FROM_PTR (use_p);
1933 iv = find_deriving_biv_for_expr (data, use);
1934 if (iv)
1935 return iv;
1936 }
1937 return NULL;
1938 }
1939 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1940 return NULL;
1941
1942 e1 = gimple_assign_rhs1 (stmt);
1943 code = gimple_assign_rhs_code (stmt);
1944 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1945 return find_deriving_biv_for_expr (data, e1);
1946
1947 switch (code)
1948 {
1949 case MULT_EXPR:
1950 case PLUS_EXPR:
1951 case MINUS_EXPR:
1952 case POINTER_PLUS_EXPR:
1953 /* Increments, decrements and multiplications by a constant
1954 are simple. */
1955 e2 = gimple_assign_rhs2 (stmt);
1956 iv = find_deriving_biv_for_expr (data, e2);
1957 if (iv)
1958 return iv;
1959 gcc_fallthrough ();
1960
1961 CASE_CONVERT:
1962 /* Casts are simple. */
1963 return find_deriving_biv_for_expr (data, e1);
1964
1965 default:
1966 break;
1967 }
1968
1969 return NULL;
1970 }
1971
1972 /* Record BIV, its predecessor and successor that they are used in
1973 address type uses. */
1974
1975 static void
record_biv_for_address_use(struct ivopts_data * data,struct iv * biv)1976 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1977 {
1978 unsigned i;
1979 tree type, base_1, base_2;
1980 bitmap_iterator bi;
1981
1982 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1983 || biv->have_address_use || !biv->no_overflow)
1984 return;
1985
1986 type = TREE_TYPE (biv->base);
1987 if (!INTEGRAL_TYPE_P (type))
1988 return;
1989
1990 biv->have_address_use = true;
1991 data->bivs_not_used_in_addr--;
1992 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1993 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1994 {
1995 struct iv *iv = ver_info (data, i)->iv;
1996
1997 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1998 || iv->have_address_use || !iv->no_overflow)
1999 continue;
2000
2001 if (type != TREE_TYPE (iv->base)
2002 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2003 continue;
2004
2005 if (!operand_equal_p (biv->step, iv->step, 0))
2006 continue;
2007
2008 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2009 if (operand_equal_p (base_1, iv->base, 0)
2010 || operand_equal_p (base_2, biv->base, 0))
2011 {
2012 iv->have_address_use = true;
2013 data->bivs_not_used_in_addr--;
2014 }
2015 }
2016 }
2017
2018 /* Cumulates the steps of indices into DATA and replaces their values with the
2019 initial ones. Returns false when the value of the index cannot be determined.
2020 Callback for for_each_index. */
2021
2022 struct ifs_ivopts_data
2023 {
2024 struct ivopts_data *ivopts_data;
2025 gimple *stmt;
2026 tree step;
2027 };
2028
2029 static bool
idx_find_step(tree base,tree * idx,void * data)2030 idx_find_step (tree base, tree *idx, void *data)
2031 {
2032 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2033 struct iv *iv;
2034 bool use_overflow_semantics = false;
2035 tree step, iv_base, iv_step, lbound, off;
2036 class loop *loop = dta->ivopts_data->current_loop;
2037
2038 /* If base is a component ref, require that the offset of the reference
2039 be invariant. */
2040 if (TREE_CODE (base) == COMPONENT_REF)
2041 {
2042 off = component_ref_field_offset (base);
2043 return expr_invariant_in_loop_p (loop, off);
2044 }
2045
2046 /* If base is array, first check whether we will be able to move the
2047 reference out of the loop (in order to take its address in strength
2048 reduction). In order for this to work we need both lower bound
2049 and step to be loop invariants. */
2050 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2051 {
2052 /* Moreover, for a range, the size needs to be invariant as well. */
2053 if (TREE_CODE (base) == ARRAY_RANGE_REF
2054 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2055 return false;
2056
2057 step = array_ref_element_size (base);
2058 lbound = array_ref_low_bound (base);
2059
2060 if (!expr_invariant_in_loop_p (loop, step)
2061 || !expr_invariant_in_loop_p (loop, lbound))
2062 return false;
2063 }
2064
2065 if (TREE_CODE (*idx) != SSA_NAME)
2066 return true;
2067
2068 iv = get_iv (dta->ivopts_data, *idx);
2069 if (!iv)
2070 return false;
2071
2072 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2073 *&x[0], which is not folded and does not trigger the
2074 ARRAY_REF path below. */
2075 *idx = iv->base;
2076
2077 if (integer_zerop (iv->step))
2078 return true;
2079
2080 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2081 {
2082 step = array_ref_element_size (base);
2083
2084 /* We only handle addresses whose step is an integer constant. */
2085 if (TREE_CODE (step) != INTEGER_CST)
2086 return false;
2087 }
2088 else
2089 /* The step for pointer arithmetics already is 1 byte. */
2090 step = size_one_node;
2091
2092 iv_base = iv->base;
2093 iv_step = iv->step;
2094 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2095 use_overflow_semantics = true;
2096
2097 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2098 sizetype, &iv_base, &iv_step, dta->stmt,
2099 use_overflow_semantics))
2100 {
2101 /* The index might wrap. */
2102 return false;
2103 }
2104
2105 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2106 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2107
2108 if (dta->ivopts_data->bivs_not_used_in_addr)
2109 {
2110 if (!iv->biv_p)
2111 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2112
2113 record_biv_for_address_use (dta->ivopts_data, iv);
2114 }
2115 return true;
2116 }
2117
2118 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2119 object is passed to it in DATA. */
2120
2121 static bool
idx_record_use(tree base,tree * idx,void * vdata)2122 idx_record_use (tree base, tree *idx,
2123 void *vdata)
2124 {
2125 struct ivopts_data *data = (struct ivopts_data *) vdata;
2126 find_interesting_uses_op (data, *idx);
2127 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2128 {
2129 find_interesting_uses_op (data, array_ref_element_size (base));
2130 find_interesting_uses_op (data, array_ref_low_bound (base));
2131 }
2132 return true;
2133 }
2134
2135 /* If we can prove that TOP = cst * BOT for some constant cst,
2136 store cst to MUL and return true. Otherwise return false.
2137 The returned value is always sign-extended, regardless of the
2138 signedness of TOP and BOT. */
2139
2140 static bool
constant_multiple_of(tree top,tree bot,widest_int * mul)2141 constant_multiple_of (tree top, tree bot, widest_int *mul)
2142 {
2143 tree mby;
2144 enum tree_code code;
2145 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2146 widest_int res, p0, p1;
2147
2148 STRIP_NOPS (top);
2149 STRIP_NOPS (bot);
2150
2151 if (operand_equal_p (top, bot, 0))
2152 {
2153 *mul = 1;
2154 return true;
2155 }
2156
2157 code = TREE_CODE (top);
2158 switch (code)
2159 {
2160 case MULT_EXPR:
2161 mby = TREE_OPERAND (top, 1);
2162 if (TREE_CODE (mby) != INTEGER_CST)
2163 return false;
2164
2165 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2166 return false;
2167
2168 *mul = wi::sext (res * wi::to_widest (mby), precision);
2169 return true;
2170
2171 case PLUS_EXPR:
2172 case MINUS_EXPR:
2173 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2174 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2175 return false;
2176
2177 if (code == MINUS_EXPR)
2178 p1 = -p1;
2179 *mul = wi::sext (p0 + p1, precision);
2180 return true;
2181
2182 case INTEGER_CST:
2183 if (TREE_CODE (bot) != INTEGER_CST)
2184 return false;
2185
2186 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2187 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2188 if (p1 == 0)
2189 return false;
2190 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2191 return res == 0;
2192
2193 default:
2194 if (POLY_INT_CST_P (top)
2195 && POLY_INT_CST_P (bot)
2196 && constant_multiple_p (wi::to_poly_widest (top),
2197 wi::to_poly_widest (bot), mul))
2198 return true;
2199
2200 return false;
2201 }
2202 }
2203
2204 /* Return true if memory reference REF with step STEP may be unaligned. */
2205
2206 static bool
may_be_unaligned_p(tree ref,tree step)2207 may_be_unaligned_p (tree ref, tree step)
2208 {
2209 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2210 thus they are not misaligned. */
2211 if (TREE_CODE (ref) == TARGET_MEM_REF)
2212 return false;
2213
2214 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2215 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2216 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2217
2218 unsigned HOST_WIDE_INT bitpos;
2219 unsigned int ref_align;
2220 get_object_alignment_1 (ref, &ref_align, &bitpos);
2221 if (ref_align < align
2222 || (bitpos % align) != 0
2223 || (bitpos % BITS_PER_UNIT) != 0)
2224 return true;
2225
2226 unsigned int trailing_zeros = tree_ctz (step);
2227 if (trailing_zeros < HOST_BITS_PER_INT
2228 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2229 return true;
2230
2231 return false;
2232 }
2233
2234 /* Return true if EXPR may be non-addressable. */
2235
2236 bool
may_be_nonaddressable_p(tree expr)2237 may_be_nonaddressable_p (tree expr)
2238 {
2239 switch (TREE_CODE (expr))
2240 {
2241 case VAR_DECL:
2242 /* Check if it's a register variable. */
2243 return DECL_HARD_REGISTER (expr);
2244
2245 case TARGET_MEM_REF:
2246 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2247 target, thus they are always addressable. */
2248 return false;
2249
2250 case MEM_REF:
2251 /* Likewise for MEM_REFs, modulo the storage order. */
2252 return REF_REVERSE_STORAGE_ORDER (expr);
2253
2254 case BIT_FIELD_REF:
2255 if (REF_REVERSE_STORAGE_ORDER (expr))
2256 return true;
2257 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2258
2259 case COMPONENT_REF:
2260 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2261 return true;
2262 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2263 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2264
2265 case ARRAY_REF:
2266 case ARRAY_RANGE_REF:
2267 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2268 return true;
2269 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2270
2271 case VIEW_CONVERT_EXPR:
2272 /* This kind of view-conversions may wrap non-addressable objects
2273 and make them look addressable. After some processing the
2274 non-addressability may be uncovered again, causing ADDR_EXPRs
2275 of inappropriate objects to be built. */
2276 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2277 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2278 return true;
2279 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2280
2281 CASE_CONVERT:
2282 return true;
2283
2284 default:
2285 break;
2286 }
2287
2288 return false;
2289 }
2290
2291 /* Finds addresses in *OP_P inside STMT. */
2292
2293 static void
find_interesting_uses_address(struct ivopts_data * data,gimple * stmt,tree * op_p)2294 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2295 tree *op_p)
2296 {
2297 tree base = *op_p, step = size_zero_node;
2298 struct iv *civ;
2299 struct ifs_ivopts_data ifs_ivopts_data;
2300
2301 /* Do not play with volatile memory references. A bit too conservative,
2302 perhaps, but safe. */
2303 if (gimple_has_volatile_ops (stmt))
2304 goto fail;
2305
2306 /* Ignore bitfields for now. Not really something terribly complicated
2307 to handle. TODO. */
2308 if (TREE_CODE (base) == BIT_FIELD_REF)
2309 goto fail;
2310
2311 base = unshare_expr (base);
2312
2313 if (TREE_CODE (base) == TARGET_MEM_REF)
2314 {
2315 tree type = build_pointer_type (TREE_TYPE (base));
2316 tree astep;
2317
2318 if (TMR_BASE (base)
2319 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2320 {
2321 civ = get_iv (data, TMR_BASE (base));
2322 if (!civ)
2323 goto fail;
2324
2325 TMR_BASE (base) = civ->base;
2326 step = civ->step;
2327 }
2328 if (TMR_INDEX2 (base)
2329 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2330 {
2331 civ = get_iv (data, TMR_INDEX2 (base));
2332 if (!civ)
2333 goto fail;
2334
2335 TMR_INDEX2 (base) = civ->base;
2336 step = civ->step;
2337 }
2338 if (TMR_INDEX (base)
2339 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2340 {
2341 civ = get_iv (data, TMR_INDEX (base));
2342 if (!civ)
2343 goto fail;
2344
2345 TMR_INDEX (base) = civ->base;
2346 astep = civ->step;
2347
2348 if (astep)
2349 {
2350 if (TMR_STEP (base))
2351 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2352
2353 step = fold_build2 (PLUS_EXPR, type, step, astep);
2354 }
2355 }
2356
2357 if (integer_zerop (step))
2358 goto fail;
2359 base = tree_mem_ref_addr (type, base);
2360 }
2361 else
2362 {
2363 ifs_ivopts_data.ivopts_data = data;
2364 ifs_ivopts_data.stmt = stmt;
2365 ifs_ivopts_data.step = size_zero_node;
2366 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2367 || integer_zerop (ifs_ivopts_data.step))
2368 goto fail;
2369 step = ifs_ivopts_data.step;
2370
2371 /* Check that the base expression is addressable. This needs
2372 to be done after substituting bases of IVs into it. */
2373 if (may_be_nonaddressable_p (base))
2374 goto fail;
2375
2376 /* Moreover, on strict alignment platforms, check that it is
2377 sufficiently aligned. */
2378 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2379 goto fail;
2380
2381 base = build_fold_addr_expr (base);
2382
2383 /* Substituting bases of IVs into the base expression might
2384 have caused folding opportunities. */
2385 if (TREE_CODE (base) == ADDR_EXPR)
2386 {
2387 tree *ref = &TREE_OPERAND (base, 0);
2388 while (handled_component_p (*ref))
2389 ref = &TREE_OPERAND (*ref, 0);
2390 if (TREE_CODE (*ref) == MEM_REF)
2391 {
2392 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2393 TREE_OPERAND (*ref, 0),
2394 TREE_OPERAND (*ref, 1));
2395 if (tem)
2396 *ref = tem;
2397 }
2398 }
2399 }
2400
2401 civ = alloc_iv (data, base, step);
2402 /* Fail if base object of this memory reference is unknown. */
2403 if (civ->base_object == NULL_TREE)
2404 goto fail;
2405
2406 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2407 return;
2408
2409 fail:
2410 for_each_index (op_p, idx_record_use, data);
2411 }
2412
2413 /* Finds and records invariants used in STMT. */
2414
2415 static void
find_invariants_stmt(struct ivopts_data * data,gimple * stmt)2416 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2417 {
2418 ssa_op_iter iter;
2419 use_operand_p use_p;
2420 tree op;
2421
2422 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2423 {
2424 op = USE_FROM_PTR (use_p);
2425 record_invariant (data, op, false);
2426 }
2427 }
2428
2429 /* CALL calls an internal function. If operand *OP_P will become an
2430 address when the call is expanded, return the type of the memory
2431 being addressed, otherwise return null. */
2432
2433 static tree
get_mem_type_for_internal_fn(gcall * call,tree * op_p)2434 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2435 {
2436 switch (gimple_call_internal_fn (call))
2437 {
2438 case IFN_MASK_LOAD:
2439 case IFN_MASK_LOAD_LANES:
2440 if (op_p == gimple_call_arg_ptr (call, 0))
2441 return TREE_TYPE (gimple_call_lhs (call));
2442 return NULL_TREE;
2443
2444 case IFN_MASK_STORE:
2445 case IFN_MASK_STORE_LANES:
2446 if (op_p == gimple_call_arg_ptr (call, 0))
2447 return TREE_TYPE (gimple_call_arg (call, 3));
2448 return NULL_TREE;
2449
2450 default:
2451 return NULL_TREE;
2452 }
2453 }
2454
2455 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2456 Return true if the operand will become an address when STMT
2457 is expanded and record the associated address use if so. */
2458
2459 static bool
find_address_like_use(struct ivopts_data * data,gimple * stmt,tree * op_p,struct iv * iv)2460 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2461 struct iv *iv)
2462 {
2463 /* Fail if base object of this memory reference is unknown. */
2464 if (iv->base_object == NULL_TREE)
2465 return false;
2466
2467 tree mem_type = NULL_TREE;
2468 if (gcall *call = dyn_cast <gcall *> (stmt))
2469 if (gimple_call_internal_p (call))
2470 mem_type = get_mem_type_for_internal_fn (call, op_p);
2471 if (mem_type)
2472 {
2473 iv = alloc_iv (data, iv->base, iv->step);
2474 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2475 return true;
2476 }
2477 return false;
2478 }
2479
2480 /* Finds interesting uses of induction variables in the statement STMT. */
2481
2482 static void
find_interesting_uses_stmt(struct ivopts_data * data,gimple * stmt)2483 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2484 {
2485 struct iv *iv;
2486 tree op, *lhs, *rhs;
2487 ssa_op_iter iter;
2488 use_operand_p use_p;
2489 enum tree_code code;
2490
2491 find_invariants_stmt (data, stmt);
2492
2493 if (gimple_code (stmt) == GIMPLE_COND)
2494 {
2495 find_interesting_uses_cond (data, stmt);
2496 return;
2497 }
2498
2499 if (is_gimple_assign (stmt))
2500 {
2501 lhs = gimple_assign_lhs_ptr (stmt);
2502 rhs = gimple_assign_rhs1_ptr (stmt);
2503
2504 if (TREE_CODE (*lhs) == SSA_NAME)
2505 {
2506 /* If the statement defines an induction variable, the uses are not
2507 interesting by themselves. */
2508
2509 iv = get_iv (data, *lhs);
2510
2511 if (iv && !integer_zerop (iv->step))
2512 return;
2513 }
2514
2515 code = gimple_assign_rhs_code (stmt);
2516 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2517 && (REFERENCE_CLASS_P (*rhs)
2518 || is_gimple_val (*rhs)))
2519 {
2520 if (REFERENCE_CLASS_P (*rhs))
2521 find_interesting_uses_address (data, stmt, rhs);
2522 else
2523 find_interesting_uses_op (data, *rhs);
2524
2525 if (REFERENCE_CLASS_P (*lhs))
2526 find_interesting_uses_address (data, stmt, lhs);
2527 return;
2528 }
2529 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2530 {
2531 find_interesting_uses_cond (data, stmt);
2532 return;
2533 }
2534
2535 /* TODO -- we should also handle address uses of type
2536
2537 memory = call (whatever);
2538
2539 and
2540
2541 call (memory). */
2542 }
2543
2544 if (gimple_code (stmt) == GIMPLE_PHI
2545 && gimple_bb (stmt) == data->current_loop->header)
2546 {
2547 iv = get_iv (data, PHI_RESULT (stmt));
2548
2549 if (iv && !integer_zerop (iv->step))
2550 return;
2551 }
2552
2553 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2554 {
2555 op = USE_FROM_PTR (use_p);
2556
2557 if (TREE_CODE (op) != SSA_NAME)
2558 continue;
2559
2560 iv = get_iv (data, op);
2561 if (!iv)
2562 continue;
2563
2564 if (!find_address_like_use (data, stmt, use_p->use, iv))
2565 find_interesting_uses_op (data, op);
2566 }
2567 }
2568
2569 /* Finds interesting uses of induction variables outside of loops
2570 on loop exit edge EXIT. */
2571
2572 static void
find_interesting_uses_outside(struct ivopts_data * data,edge exit)2573 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2574 {
2575 gphi *phi;
2576 gphi_iterator psi;
2577 tree def;
2578
2579 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2580 {
2581 phi = psi.phi ();
2582 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2583 if (!virtual_operand_p (def))
2584 find_interesting_uses_op (data, def);
2585 }
2586 }
2587
2588 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2589 mode for memory reference represented by USE. */
2590
2591 static GTY (()) vec<rtx, va_gc> *addr_list;
2592
2593 static bool
addr_offset_valid_p(struct iv_use * use,poly_int64 offset)2594 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2595 {
2596 rtx reg, addr;
2597 unsigned list_index;
2598 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2599 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2600
2601 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2602 if (list_index >= vec_safe_length (addr_list))
2603 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2604
2605 addr = (*addr_list)[list_index];
2606 if (!addr)
2607 {
2608 addr_mode = targetm.addr_space.address_mode (as);
2609 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2610 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2611 (*addr_list)[list_index] = addr;
2612 }
2613 else
2614 addr_mode = GET_MODE (addr);
2615
2616 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2617 return (memory_address_addr_space_p (mem_mode, addr, as));
2618 }
2619
2620 /* Comparison function to sort group in ascending order of addr_offset. */
2621
2622 static int
group_compare_offset(const void * a,const void * b)2623 group_compare_offset (const void *a, const void *b)
2624 {
2625 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2626 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2627
2628 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2629 }
2630
2631 /* Check if small groups should be split. Return true if no group
2632 contains more than two uses with distinct addr_offsets. Return
2633 false otherwise. We want to split such groups because:
2634
2635 1) Small groups don't have much benefit and may interfer with
2636 general candidate selection.
2637 2) Size for problem with only small groups is usually small and
2638 general algorithm can handle it well.
2639
2640 TODO -- Above claim may not hold when we want to merge memory
2641 accesses with conseuctive addresses. */
2642
2643 static bool
split_small_address_groups_p(struct ivopts_data * data)2644 split_small_address_groups_p (struct ivopts_data *data)
2645 {
2646 unsigned int i, j, distinct = 1;
2647 struct iv_use *pre;
2648 struct iv_group *group;
2649
2650 for (i = 0; i < data->vgroups.length (); i++)
2651 {
2652 group = data->vgroups[i];
2653 if (group->vuses.length () == 1)
2654 continue;
2655
2656 gcc_assert (address_p (group->type));
2657 if (group->vuses.length () == 2)
2658 {
2659 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2660 group->vuses[1]->addr_offset) > 0)
2661 std::swap (group->vuses[0], group->vuses[1]);
2662 }
2663 else
2664 group->vuses.qsort (group_compare_offset);
2665
2666 if (distinct > 2)
2667 continue;
2668
2669 distinct = 1;
2670 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2671 {
2672 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2673 {
2674 pre = group->vuses[j];
2675 distinct++;
2676 }
2677
2678 if (distinct > 2)
2679 break;
2680 }
2681 }
2682
2683 return (distinct <= 2);
2684 }
2685
2686 /* For each group of address type uses, this function further groups
2687 these uses according to the maximum offset supported by target's
2688 [base + offset] addressing mode. */
2689
2690 static void
split_address_groups(struct ivopts_data * data)2691 split_address_groups (struct ivopts_data *data)
2692 {
2693 unsigned int i, j;
2694 /* Always split group. */
2695 bool split_p = split_small_address_groups_p (data);
2696
2697 for (i = 0; i < data->vgroups.length (); i++)
2698 {
2699 struct iv_group *new_group = NULL;
2700 struct iv_group *group = data->vgroups[i];
2701 struct iv_use *use = group->vuses[0];
2702
2703 use->id = 0;
2704 use->group_id = group->id;
2705 if (group->vuses.length () == 1)
2706 continue;
2707
2708 gcc_assert (address_p (use->type));
2709
2710 for (j = 1; j < group->vuses.length ();)
2711 {
2712 struct iv_use *next = group->vuses[j];
2713 poly_int64 offset = next->addr_offset - use->addr_offset;
2714
2715 /* Split group if aksed to, or the offset against the first
2716 use can't fit in offset part of addressing mode. IV uses
2717 having the same offset are still kept in one group. */
2718 if (maybe_ne (offset, 0)
2719 && (split_p || !addr_offset_valid_p (use, offset)))
2720 {
2721 if (!new_group)
2722 new_group = record_group (data, group->type);
2723 group->vuses.ordered_remove (j);
2724 new_group->vuses.safe_push (next);
2725 continue;
2726 }
2727
2728 next->id = j;
2729 next->group_id = group->id;
2730 j++;
2731 }
2732 }
2733 }
2734
2735 /* Finds uses of the induction variables that are interesting. */
2736
2737 static void
find_interesting_uses(struct ivopts_data * data)2738 find_interesting_uses (struct ivopts_data *data)
2739 {
2740 basic_block bb;
2741 gimple_stmt_iterator bsi;
2742 basic_block *body = get_loop_body (data->current_loop);
2743 unsigned i;
2744 edge e;
2745
2746 for (i = 0; i < data->current_loop->num_nodes; i++)
2747 {
2748 edge_iterator ei;
2749 bb = body[i];
2750
2751 FOR_EACH_EDGE (e, ei, bb->succs)
2752 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2753 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2754 find_interesting_uses_outside (data, e);
2755
2756 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2757 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2758 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2759 if (!is_gimple_debug (gsi_stmt (bsi)))
2760 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2761 }
2762 free (body);
2763
2764 split_address_groups (data);
2765
2766 if (dump_file && (dump_flags & TDF_DETAILS))
2767 {
2768 fprintf (dump_file, "\n<IV Groups>:\n");
2769 dump_groups (dump_file, data);
2770 fprintf (dump_file, "\n");
2771 }
2772 }
2773
2774 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2775 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2776 we are at the top-level of the processed address. */
2777
2778 static tree
strip_offset_1(tree expr,bool inside_addr,bool top_compref,poly_int64 * offset)2779 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2780 poly_int64 *offset)
2781 {
2782 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2783 enum tree_code code;
2784 tree type, orig_type = TREE_TYPE (expr);
2785 poly_int64 off0, off1;
2786 HOST_WIDE_INT st;
2787 tree orig_expr = expr;
2788
2789 STRIP_NOPS (expr);
2790
2791 type = TREE_TYPE (expr);
2792 code = TREE_CODE (expr);
2793 *offset = 0;
2794
2795 switch (code)
2796 {
2797 case POINTER_PLUS_EXPR:
2798 case PLUS_EXPR:
2799 case MINUS_EXPR:
2800 op0 = TREE_OPERAND (expr, 0);
2801 op1 = TREE_OPERAND (expr, 1);
2802
2803 op0 = strip_offset_1 (op0, false, false, &off0);
2804 op1 = strip_offset_1 (op1, false, false, &off1);
2805
2806 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2807 if (op0 == TREE_OPERAND (expr, 0)
2808 && op1 == TREE_OPERAND (expr, 1))
2809 return orig_expr;
2810
2811 if (integer_zerop (op1))
2812 expr = op0;
2813 else if (integer_zerop (op0))
2814 {
2815 if (code == MINUS_EXPR)
2816 expr = fold_build1 (NEGATE_EXPR, type, op1);
2817 else
2818 expr = op1;
2819 }
2820 else
2821 expr = fold_build2 (code, type, op0, op1);
2822
2823 return fold_convert (orig_type, expr);
2824
2825 case MULT_EXPR:
2826 op1 = TREE_OPERAND (expr, 1);
2827 if (!cst_and_fits_in_hwi (op1))
2828 return orig_expr;
2829
2830 op0 = TREE_OPERAND (expr, 0);
2831 op0 = strip_offset_1 (op0, false, false, &off0);
2832 if (op0 == TREE_OPERAND (expr, 0))
2833 return orig_expr;
2834
2835 *offset = off0 * int_cst_value (op1);
2836 if (integer_zerop (op0))
2837 expr = op0;
2838 else
2839 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2840
2841 return fold_convert (orig_type, expr);
2842
2843 case ARRAY_REF:
2844 case ARRAY_RANGE_REF:
2845 if (!inside_addr)
2846 return orig_expr;
2847
2848 step = array_ref_element_size (expr);
2849 if (!cst_and_fits_in_hwi (step))
2850 break;
2851
2852 st = int_cst_value (step);
2853 op1 = TREE_OPERAND (expr, 1);
2854 op1 = strip_offset_1 (op1, false, false, &off1);
2855 *offset = off1 * st;
2856
2857 if (top_compref
2858 && integer_zerop (op1))
2859 {
2860 /* Strip the component reference completely. */
2861 op0 = TREE_OPERAND (expr, 0);
2862 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2863 *offset += off0;
2864 return op0;
2865 }
2866 break;
2867
2868 case COMPONENT_REF:
2869 {
2870 tree field;
2871
2872 if (!inside_addr)
2873 return orig_expr;
2874
2875 tmp = component_ref_field_offset (expr);
2876 field = TREE_OPERAND (expr, 1);
2877 if (top_compref
2878 && cst_and_fits_in_hwi (tmp)
2879 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2880 {
2881 HOST_WIDE_INT boffset, abs_off;
2882
2883 /* Strip the component reference completely. */
2884 op0 = TREE_OPERAND (expr, 0);
2885 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2886 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2887 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2888 if (boffset < 0)
2889 abs_off = -abs_off;
2890
2891 *offset = off0 + int_cst_value (tmp) + abs_off;
2892 return op0;
2893 }
2894 }
2895 break;
2896
2897 case ADDR_EXPR:
2898 op0 = TREE_OPERAND (expr, 0);
2899 op0 = strip_offset_1 (op0, true, true, &off0);
2900 *offset += off0;
2901
2902 if (op0 == TREE_OPERAND (expr, 0))
2903 return orig_expr;
2904
2905 expr = build_fold_addr_expr (op0);
2906 return fold_convert (orig_type, expr);
2907
2908 case MEM_REF:
2909 /* ??? Offset operand? */
2910 inside_addr = false;
2911 break;
2912
2913 default:
2914 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2915 return build_int_cst (orig_type, 0);
2916 return orig_expr;
2917 }
2918
2919 /* Default handling of expressions for that we want to recurse into
2920 the first operand. */
2921 op0 = TREE_OPERAND (expr, 0);
2922 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2923 *offset += off0;
2924
2925 if (op0 == TREE_OPERAND (expr, 0)
2926 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2927 return orig_expr;
2928
2929 expr = copy_node (expr);
2930 TREE_OPERAND (expr, 0) = op0;
2931 if (op1)
2932 TREE_OPERAND (expr, 1) = op1;
2933
2934 /* Inside address, we might strip the top level component references,
2935 thus changing type of the expression. Handling of ADDR_EXPR
2936 will fix that. */
2937 expr = fold_convert (orig_type, expr);
2938
2939 return expr;
2940 }
2941
2942 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2943
2944 tree
strip_offset(tree expr,poly_uint64_pod * offset)2945 strip_offset (tree expr, poly_uint64_pod *offset)
2946 {
2947 poly_int64 off;
2948 tree core = strip_offset_1 (expr, false, false, &off);
2949 *offset = off;
2950 return core;
2951 }
2952
2953 /* Returns variant of TYPE that can be used as base for different uses.
2954 We return unsigned type with the same precision, which avoids problems
2955 with overflows. */
2956
2957 static tree
generic_type_for(tree type)2958 generic_type_for (tree type)
2959 {
2960 if (POINTER_TYPE_P (type))
2961 return unsigned_type_for (type);
2962
2963 if (TYPE_UNSIGNED (type))
2964 return type;
2965
2966 return unsigned_type_for (type);
2967 }
2968
2969 /* Private data for walk_tree. */
2970
2971 struct walk_tree_data
2972 {
2973 bitmap *inv_vars;
2974 struct ivopts_data *idata;
2975 };
2976
2977 /* Callback function for walk_tree, it records invariants and symbol
2978 reference in *EXPR_P. DATA is the structure storing result info. */
2979
2980 static tree
find_inv_vars_cb(tree * expr_p,int * ws ATTRIBUTE_UNUSED,void * data)2981 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2982 {
2983 tree op = *expr_p;
2984 struct version_info *info;
2985 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2986
2987 if (TREE_CODE (op) != SSA_NAME)
2988 return NULL_TREE;
2989
2990 info = name_info (wdata->idata, op);
2991 /* Because we expand simple operations when finding IVs, loop invariant
2992 variable that isn't referred by the original loop could be used now.
2993 Record such invariant variables here. */
2994 if (!info->iv)
2995 {
2996 struct ivopts_data *idata = wdata->idata;
2997 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2998
2999 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3000 {
3001 tree steptype = TREE_TYPE (op);
3002 if (POINTER_TYPE_P (steptype))
3003 steptype = sizetype;
3004 set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3005 record_invariant (idata, op, false);
3006 }
3007 }
3008 if (!info->inv_id || info->has_nonlin_use)
3009 return NULL_TREE;
3010
3011 if (!*wdata->inv_vars)
3012 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3013 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3014
3015 return NULL_TREE;
3016 }
3017
3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3019 store it. */
3020
3021 static inline void
find_inv_vars(struct ivopts_data * data,tree * expr_p,bitmap * inv_vars)3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3023 {
3024 struct walk_tree_data wdata;
3025
3026 if (!inv_vars)
3027 return;
3028
3029 wdata.idata = data;
3030 wdata.inv_vars = inv_vars;
3031 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3032 }
3033
3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3035 will be recorded if it doesn't exist yet. Given below two exprs:
3036 inv_expr + cst1, inv_expr + cst2
3037 It's hard to make decision whether constant part should be stripped
3038 or not. We choose to not strip based on below facts:
3039 1) We need to count ADD cost for constant part if it's stripped,
3040 which isn't always trivial where this functions is called.
3041 2) Stripping constant away may be conflict with following loop
3042 invariant hoisting pass.
3043 3) Not stripping constant away results in more invariant exprs,
3044 which usually leads to decision preferring lower reg pressure. */
3045
3046 static iv_inv_expr_ent *
get_loop_invariant_expr(struct ivopts_data * data,tree inv_expr)3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3048 {
3049 STRIP_NOPS (inv_expr);
3050
3051 if (poly_int_tree_p (inv_expr)
3052 || TREE_CODE (inv_expr) == SSA_NAME)
3053 return NULL;
3054
3055 /* Don't strip constant part away as we used to. */
3056
3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3058 struct iv_inv_expr_ent ent;
3059 ent.expr = inv_expr;
3060 ent.hash = iterative_hash_expr (inv_expr, 0);
3061 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3062
3063 if (!*slot)
3064 {
3065 *slot = XNEW (struct iv_inv_expr_ent);
3066 (*slot)->expr = inv_expr;
3067 (*slot)->hash = ent.hash;
3068 (*slot)->id = ++data->max_inv_expr_id;
3069 }
3070
3071 return *slot;
3072 }
3073
3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3075 position to POS. If USE is not NULL, the candidate is set as related to
3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3077 replacement of the final value of the iv by a direct computation. */
3078
3079 static struct iv_cand *
3080 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3081 enum iv_position pos, struct iv_use *use,
3082 gimple *incremented_at, struct iv *orig_iv = NULL,
3083 bool doloop = false)
3084 {
3085 unsigned i;
3086 struct iv_cand *cand = NULL;
3087 tree type, orig_type;
3088
3089 gcc_assert (base && step);
3090
3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092 live, but the ivopts code may replace a real pointer with one
3093 pointing before or after the memory block that is then adjusted
3094 into the memory block during the loop. FIXME: It would likely be
3095 better to actually force the pointer live and still use ivopts;
3096 for example, it would be enough to write the pointer into memory
3097 and keep it there until after the loop. */
3098 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3099 return NULL;
3100
3101 /* For non-original variables, make sure their values are computed in a type
3102 that does not invoke undefined behavior on overflows (since in general,
3103 we cannot prove that these induction variables are non-wrapping). */
3104 if (pos != IP_ORIGINAL)
3105 {
3106 orig_type = TREE_TYPE (base);
3107 type = generic_type_for (orig_type);
3108 if (type != orig_type)
3109 {
3110 base = fold_convert (type, base);
3111 step = fold_convert (type, step);
3112 }
3113 }
3114
3115 for (i = 0; i < data->vcands.length (); i++)
3116 {
3117 cand = data->vcands[i];
3118
3119 if (cand->pos != pos)
3120 continue;
3121
3122 if (cand->incremented_at != incremented_at
3123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3124 && cand->ainc_use != use))
3125 continue;
3126
3127 if (operand_equal_p (base, cand->iv->base, 0)
3128 && operand_equal_p (step, cand->iv->step, 0)
3129 && (TYPE_PRECISION (TREE_TYPE (base))
3130 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3131 break;
3132 }
3133
3134 if (i == data->vcands.length ())
3135 {
3136 cand = XCNEW (struct iv_cand);
3137 cand->id = i;
3138 cand->iv = alloc_iv (data, base, step);
3139 cand->pos = pos;
3140 if (pos != IP_ORIGINAL)
3141 {
3142 if (doloop)
3143 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3144 else
3145 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3146 cand->var_after = cand->var_before;
3147 }
3148 cand->important = important;
3149 cand->incremented_at = incremented_at;
3150 cand->doloop_p = doloop;
3151 data->vcands.safe_push (cand);
3152
3153 if (!poly_int_tree_p (step))
3154 {
3155 find_inv_vars (data, &step, &cand->inv_vars);
3156
3157 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 /* Share bitmap between inv_vars and inv_exprs for cand. */
3159 if (inv_expr != NULL)
3160 {
3161 cand->inv_exprs = cand->inv_vars;
3162 cand->inv_vars = NULL;
3163 if (cand->inv_exprs)
3164 bitmap_clear (cand->inv_exprs);
3165 else
3166 cand->inv_exprs = BITMAP_ALLOC (NULL);
3167
3168 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169 }
3170 }
3171
3172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 cand->ainc_use = use;
3174 else
3175 cand->ainc_use = NULL;
3176
3177 cand->orig_iv = orig_iv;
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 dump_cand (dump_file, cand);
3180 }
3181
3182 cand->important |= important;
3183 cand->doloop_p |= doloop;
3184
3185 /* Relate candidate to the group for which it is added. */
3186 if (use)
3187 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3188
3189 return cand;
3190 }
3191
3192 /* Returns true if incrementing the induction variable at the end of the LOOP
3193 is allowed.
3194
3195 The purpose is to avoid splitting latch edge with a biv increment, thus
3196 creating a jump, possibly confusing other optimization passes and leaving
3197 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3198 available (so we do not have a better alternative), or if the latch edge
3199 is already nonempty. */
3200
3201 static bool
allow_ip_end_pos_p(class loop * loop)3202 allow_ip_end_pos_p (class loop *loop)
3203 {
3204 if (!ip_normal_pos (loop))
3205 return true;
3206
3207 if (!empty_block_p (ip_end_pos (loop)))
3208 return true;
3209
3210 return false;
3211 }
3212
3213 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3214 Important field is set to IMPORTANT. */
3215
3216 static void
add_autoinc_candidates(struct ivopts_data * data,tree base,tree step,bool important,struct iv_use * use)3217 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3218 bool important, struct iv_use *use)
3219 {
3220 basic_block use_bb = gimple_bb (use->stmt);
3221 machine_mode mem_mode;
3222 unsigned HOST_WIDE_INT cstepi;
3223
3224 /* If we insert the increment in any position other than the standard
3225 ones, we must ensure that it is incremented once per iteration.
3226 It must not be in an inner nested loop, or one side of an if
3227 statement. */
3228 if (use_bb->loop_father != data->current_loop
3229 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3230 || stmt_can_throw_internal (cfun, use->stmt)
3231 || !cst_and_fits_in_hwi (step))
3232 return;
3233
3234 cstepi = int_cst_value (step);
3235
3236 mem_mode = TYPE_MODE (use->mem_type);
3237 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3238 || USE_STORE_PRE_INCREMENT (mem_mode))
3239 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3240 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3241 || USE_STORE_PRE_DECREMENT (mem_mode))
3242 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3243 {
3244 enum tree_code code = MINUS_EXPR;
3245 tree new_base;
3246 tree new_step = step;
3247
3248 if (POINTER_TYPE_P (TREE_TYPE (base)))
3249 {
3250 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3251 code = POINTER_PLUS_EXPR;
3252 }
3253 else
3254 new_step = fold_convert (TREE_TYPE (base), new_step);
3255 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3256 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3257 use->stmt);
3258 }
3259 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3260 || USE_STORE_POST_INCREMENT (mem_mode))
3261 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3262 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3263 || USE_STORE_POST_DECREMENT (mem_mode))
3264 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3265 {
3266 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3267 use->stmt);
3268 }
3269 }
3270
3271 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3272 position to POS. If USE is not NULL, the candidate is set as related to
3273 it. The candidate computation is scheduled before exit condition and at
3274 the end of loop. */
3275
3276 static void
3277 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3278 struct iv_use *use, struct iv *orig_iv = NULL,
3279 bool doloop = false)
3280 {
3281 if (ip_normal_pos (data->current_loop))
3282 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3283 doloop);
3284 /* Exclude doloop candidate here since it requires decrement then comparison
3285 and jump, the IP_END position doesn't match. */
3286 if (!doloop && ip_end_pos (data->current_loop)
3287 && allow_ip_end_pos_p (data->current_loop))
3288 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3289 }
3290
3291 /* Adds standard iv candidates. */
3292
3293 static void
add_standard_iv_candidates(struct ivopts_data * data)3294 add_standard_iv_candidates (struct ivopts_data *data)
3295 {
3296 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3297
3298 /* The same for a double-integer type if it is still fast enough. */
3299 if (TYPE_PRECISION
3300 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3301 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3302 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3303 build_int_cst (long_integer_type_node, 1), true, NULL);
3304
3305 /* The same for a double-integer type if it is still fast enough. */
3306 if (TYPE_PRECISION
3307 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3308 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3309 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3310 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3311 }
3312
3313
3314 /* Adds candidates bases on the old induction variable IV. */
3315
3316 static void
add_iv_candidate_for_biv(struct ivopts_data * data,struct iv * iv)3317 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3318 {
3319 gimple *phi;
3320 tree def;
3321 struct iv_cand *cand;
3322
3323 /* Check if this biv is used in address type use. */
3324 if (iv->no_overflow && iv->have_address_use
3325 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3326 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3327 {
3328 tree base = fold_convert (sizetype, iv->base);
3329 tree step = fold_convert (sizetype, iv->step);
3330
3331 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3332 add_candidate (data, base, step, true, NULL, iv);
3333 /* Add iv cand of the original type only if it has nonlinear use. */
3334 if (iv->nonlin_use)
3335 add_candidate (data, iv->base, iv->step, true, NULL);
3336 }
3337 else
3338 add_candidate (data, iv->base, iv->step, true, NULL);
3339
3340 /* The same, but with initial value zero. */
3341 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3342 add_candidate (data, size_int (0), iv->step, true, NULL);
3343 else
3344 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3345 iv->step, true, NULL);
3346
3347 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3348 if (gimple_code (phi) == GIMPLE_PHI)
3349 {
3350 /* Additionally record the possibility of leaving the original iv
3351 untouched. */
3352 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3353 /* Don't add candidate if it's from another PHI node because
3354 it's an affine iv appearing in the form of PEELED_CHREC. */
3355 phi = SSA_NAME_DEF_STMT (def);
3356 if (gimple_code (phi) != GIMPLE_PHI)
3357 {
3358 cand = add_candidate_1 (data,
3359 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3360 SSA_NAME_DEF_STMT (def));
3361 if (cand)
3362 {
3363 cand->var_before = iv->ssa_name;
3364 cand->var_after = def;
3365 }
3366 }
3367 else
3368 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3369 }
3370 }
3371
3372 /* Adds candidates based on the old induction variables. */
3373
3374 static void
add_iv_candidate_for_bivs(struct ivopts_data * data)3375 add_iv_candidate_for_bivs (struct ivopts_data *data)
3376 {
3377 unsigned i;
3378 struct iv *iv;
3379 bitmap_iterator bi;
3380
3381 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3382 {
3383 iv = ver_info (data, i)->iv;
3384 if (iv && iv->biv_p && !integer_zerop (iv->step))
3385 add_iv_candidate_for_biv (data, iv);
3386 }
3387 }
3388
3389 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3390
3391 static void
record_common_cand(struct ivopts_data * data,tree base,tree step,struct iv_use * use)3392 record_common_cand (struct ivopts_data *data, tree base,
3393 tree step, struct iv_use *use)
3394 {
3395 class iv_common_cand ent;
3396 class iv_common_cand **slot;
3397
3398 ent.base = base;
3399 ent.step = step;
3400 ent.hash = iterative_hash_expr (base, 0);
3401 ent.hash = iterative_hash_expr (step, ent.hash);
3402
3403 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3404 if (*slot == NULL)
3405 {
3406 *slot = new iv_common_cand ();
3407 (*slot)->base = base;
3408 (*slot)->step = step;
3409 (*slot)->uses.create (8);
3410 (*slot)->hash = ent.hash;
3411 data->iv_common_cands.safe_push ((*slot));
3412 }
3413
3414 gcc_assert (use != NULL);
3415 (*slot)->uses.safe_push (use);
3416 return;
3417 }
3418
3419 /* Comparison function used to sort common candidates. */
3420
3421 static int
common_cand_cmp(const void * p1,const void * p2)3422 common_cand_cmp (const void *p1, const void *p2)
3423 {
3424 unsigned n1, n2;
3425 const class iv_common_cand *const *const ccand1
3426 = (const class iv_common_cand *const *)p1;
3427 const class iv_common_cand *const *const ccand2
3428 = (const class iv_common_cand *const *)p2;
3429
3430 n1 = (*ccand1)->uses.length ();
3431 n2 = (*ccand2)->uses.length ();
3432 return n2 - n1;
3433 }
3434
3435 /* Adds IV candidates based on common candidated recorded. */
3436
3437 static void
add_iv_candidate_derived_from_uses(struct ivopts_data * data)3438 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3439 {
3440 unsigned i, j;
3441 struct iv_cand *cand_1, *cand_2;
3442
3443 data->iv_common_cands.qsort (common_cand_cmp);
3444 for (i = 0; i < data->iv_common_cands.length (); i++)
3445 {
3446 class iv_common_cand *ptr = data->iv_common_cands[i];
3447
3448 /* Only add IV candidate if it's derived from multiple uses. */
3449 if (ptr->uses.length () <= 1)
3450 break;
3451
3452 cand_1 = NULL;
3453 cand_2 = NULL;
3454 if (ip_normal_pos (data->current_loop))
3455 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3456 false, IP_NORMAL, NULL, NULL);
3457
3458 if (ip_end_pos (data->current_loop)
3459 && allow_ip_end_pos_p (data->current_loop))
3460 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3461 false, IP_END, NULL, NULL);
3462
3463 /* Bind deriving uses and the new candidates. */
3464 for (j = 0; j < ptr->uses.length (); j++)
3465 {
3466 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3467 if (cand_1)
3468 bitmap_set_bit (group->related_cands, cand_1->id);
3469 if (cand_2)
3470 bitmap_set_bit (group->related_cands, cand_2->id);
3471 }
3472 }
3473
3474 /* Release data since it is useless from this point. */
3475 data->iv_common_cand_tab->empty ();
3476 data->iv_common_cands.truncate (0);
3477 }
3478
3479 /* Adds candidates based on the value of USE's iv. */
3480
3481 static void
add_iv_candidate_for_use(struct ivopts_data * data,struct iv_use * use)3482 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3483 {
3484 poly_uint64 offset;
3485 tree base;
3486 struct iv *iv = use->iv;
3487 tree basetype = TREE_TYPE (iv->base);
3488
3489 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3490 precision types, instead, add candidate for the corresponding scev in
3491 unsigned type with the same precision. See PR93674 for more info. */
3492 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3493 || !type_has_mode_precision_p (basetype))
3494 {
3495 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3496 TYPE_UNSIGNED (basetype));
3497 add_candidate (data, fold_convert (basetype, iv->base),
3498 fold_convert (basetype, iv->step), false, NULL);
3499 return;
3500 }
3501
3502 add_candidate (data, iv->base, iv->step, false, use);
3503
3504 /* Record common candidate for use in case it can be shared by others. */
3505 record_common_cand (data, iv->base, iv->step, use);
3506
3507 /* Record common candidate with initial value zero. */
3508 basetype = TREE_TYPE (iv->base);
3509 if (POINTER_TYPE_P (basetype))
3510 basetype = sizetype;
3511 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3512
3513 /* Compare the cost of an address with an unscaled index with the cost of
3514 an address with a scaled index and add candidate if useful. */
3515 poly_int64 step;
3516 if (use != NULL
3517 && poly_int_tree_p (iv->step, &step)
3518 && address_p (use->type))
3519 {
3520 poly_int64 new_step;
3521 unsigned int fact = preferred_mem_scale_factor
3522 (use->iv->base,
3523 TYPE_MODE (use->mem_type),
3524 optimize_loop_for_speed_p (data->current_loop));
3525
3526 if (fact != 1
3527 && multiple_p (step, fact, &new_step))
3528 add_candidate (data, size_int (0),
3529 wide_int_to_tree (sizetype, new_step),
3530 true, NULL);
3531 }
3532
3533 /* Record common candidate with constant offset stripped in base.
3534 Like the use itself, we also add candidate directly for it. */
3535 base = strip_offset (iv->base, &offset);
3536 if (maybe_ne (offset, 0U) || base != iv->base)
3537 {
3538 record_common_cand (data, base, iv->step, use);
3539 add_candidate (data, base, iv->step, false, use);
3540 }
3541
3542 /* Record common candidate with base_object removed in base. */
3543 base = iv->base;
3544 STRIP_NOPS (base);
3545 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3546 {
3547 tree step = iv->step;
3548
3549 STRIP_NOPS (step);
3550 base = TREE_OPERAND (base, 1);
3551 step = fold_convert (sizetype, step);
3552 record_common_cand (data, base, step, use);
3553 /* Also record common candidate with offset stripped. */
3554 base = strip_offset (base, &offset);
3555 if (maybe_ne (offset, 0U))
3556 record_common_cand (data, base, step, use);
3557 }
3558
3559 /* At last, add auto-incremental candidates. Make such variables
3560 important since other iv uses with same base object may be based
3561 on it. */
3562 if (use != NULL && address_p (use->type))
3563 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3564 }
3565
3566 /* Adds candidates based on the uses. */
3567
3568 static void
add_iv_candidate_for_groups(struct ivopts_data * data)3569 add_iv_candidate_for_groups (struct ivopts_data *data)
3570 {
3571 unsigned i;
3572
3573 /* Only add candidate for the first use in group. */
3574 for (i = 0; i < data->vgroups.length (); i++)
3575 {
3576 struct iv_group *group = data->vgroups[i];
3577
3578 gcc_assert (group->vuses[0] != NULL);
3579 add_iv_candidate_for_use (data, group->vuses[0]);
3580 }
3581 add_iv_candidate_derived_from_uses (data);
3582 }
3583
3584 /* Record important candidates and add them to related_cands bitmaps. */
3585
3586 static void
record_important_candidates(struct ivopts_data * data)3587 record_important_candidates (struct ivopts_data *data)
3588 {
3589 unsigned i;
3590 struct iv_group *group;
3591
3592 for (i = 0; i < data->vcands.length (); i++)
3593 {
3594 struct iv_cand *cand = data->vcands[i];
3595
3596 if (cand->important)
3597 bitmap_set_bit (data->important_candidates, i);
3598 }
3599
3600 data->consider_all_candidates = (data->vcands.length ()
3601 <= CONSIDER_ALL_CANDIDATES_BOUND);
3602
3603 /* Add important candidates to groups' related_cands bitmaps. */
3604 for (i = 0; i < data->vgroups.length (); i++)
3605 {
3606 group = data->vgroups[i];
3607 bitmap_ior_into (group->related_cands, data->important_candidates);
3608 }
3609 }
3610
3611 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3612 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3613 we allocate a simple list to every use. */
3614
3615 static void
alloc_use_cost_map(struct ivopts_data * data)3616 alloc_use_cost_map (struct ivopts_data *data)
3617 {
3618 unsigned i, size, s;
3619
3620 for (i = 0; i < data->vgroups.length (); i++)
3621 {
3622 struct iv_group *group = data->vgroups[i];
3623
3624 if (data->consider_all_candidates)
3625 size = data->vcands.length ();
3626 else
3627 {
3628 s = bitmap_count_bits (group->related_cands);
3629
3630 /* Round up to the power of two, so that moduling by it is fast. */
3631 size = s ? (1 << ceil_log2 (s)) : 1;
3632 }
3633
3634 group->n_map_members = size;
3635 group->cost_map = XCNEWVEC (class cost_pair, size);
3636 }
3637 }
3638
3639 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3640 on invariants INV_VARS and that the value used in expressing it is
3641 VALUE, and in case of iv elimination the comparison operator is COMP. */
3642
3643 static void
set_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand,comp_cost cost,bitmap inv_vars,tree value,enum tree_code comp,bitmap inv_exprs)3644 set_group_iv_cost (struct ivopts_data *data,
3645 struct iv_group *group, struct iv_cand *cand,
3646 comp_cost cost, bitmap inv_vars, tree value,
3647 enum tree_code comp, bitmap inv_exprs)
3648 {
3649 unsigned i, s;
3650
3651 if (cost.infinite_cost_p ())
3652 {
3653 BITMAP_FREE (inv_vars);
3654 BITMAP_FREE (inv_exprs);
3655 return;
3656 }
3657
3658 if (data->consider_all_candidates)
3659 {
3660 group->cost_map[cand->id].cand = cand;
3661 group->cost_map[cand->id].cost = cost;
3662 group->cost_map[cand->id].inv_vars = inv_vars;
3663 group->cost_map[cand->id].inv_exprs = inv_exprs;
3664 group->cost_map[cand->id].value = value;
3665 group->cost_map[cand->id].comp = comp;
3666 return;
3667 }
3668
3669 /* n_map_members is a power of two, so this computes modulo. */
3670 s = cand->id & (group->n_map_members - 1);
3671 for (i = s; i < group->n_map_members; i++)
3672 if (!group->cost_map[i].cand)
3673 goto found;
3674 for (i = 0; i < s; i++)
3675 if (!group->cost_map[i].cand)
3676 goto found;
3677
3678 gcc_unreachable ();
3679
3680 found:
3681 group->cost_map[i].cand = cand;
3682 group->cost_map[i].cost = cost;
3683 group->cost_map[i].inv_vars = inv_vars;
3684 group->cost_map[i].inv_exprs = inv_exprs;
3685 group->cost_map[i].value = value;
3686 group->cost_map[i].comp = comp;
3687 }
3688
3689 /* Gets cost of (GROUP, CAND) pair. */
3690
3691 static class cost_pair *
get_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)3692 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3693 struct iv_cand *cand)
3694 {
3695 unsigned i, s;
3696 class cost_pair *ret;
3697
3698 if (!cand)
3699 return NULL;
3700
3701 if (data->consider_all_candidates)
3702 {
3703 ret = group->cost_map + cand->id;
3704 if (!ret->cand)
3705 return NULL;
3706
3707 return ret;
3708 }
3709
3710 /* n_map_members is a power of two, so this computes modulo. */
3711 s = cand->id & (group->n_map_members - 1);
3712 for (i = s; i < group->n_map_members; i++)
3713 if (group->cost_map[i].cand == cand)
3714 return group->cost_map + i;
3715 else if (group->cost_map[i].cand == NULL)
3716 return NULL;
3717 for (i = 0; i < s; i++)
3718 if (group->cost_map[i].cand == cand)
3719 return group->cost_map + i;
3720 else if (group->cost_map[i].cand == NULL)
3721 return NULL;
3722
3723 return NULL;
3724 }
3725
3726 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3727 static rtx
produce_memory_decl_rtl(tree obj,int * regno)3728 produce_memory_decl_rtl (tree obj, int *regno)
3729 {
3730 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3731 machine_mode address_mode = targetm.addr_space.address_mode (as);
3732 rtx x;
3733
3734 gcc_assert (obj);
3735 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3736 {
3737 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3738 x = gen_rtx_SYMBOL_REF (address_mode, name);
3739 SET_SYMBOL_REF_DECL (x, obj);
3740 x = gen_rtx_MEM (DECL_MODE (obj), x);
3741 set_mem_addr_space (x, as);
3742 targetm.encode_section_info (obj, x, true);
3743 }
3744 else
3745 {
3746 x = gen_raw_REG (address_mode, (*regno)++);
3747 x = gen_rtx_MEM (DECL_MODE (obj), x);
3748 set_mem_addr_space (x, as);
3749 }
3750
3751 return x;
3752 }
3753
3754 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3755 walk_tree. DATA contains the actual fake register number. */
3756
3757 static tree
prepare_decl_rtl(tree * expr_p,int * ws,void * data)3758 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3759 {
3760 tree obj = NULL_TREE;
3761 rtx x = NULL_RTX;
3762 int *regno = (int *) data;
3763
3764 switch (TREE_CODE (*expr_p))
3765 {
3766 case ADDR_EXPR:
3767 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3768 handled_component_p (*expr_p);
3769 expr_p = &TREE_OPERAND (*expr_p, 0))
3770 continue;
3771 obj = *expr_p;
3772 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3773 x = produce_memory_decl_rtl (obj, regno);
3774 break;
3775
3776 case SSA_NAME:
3777 *ws = 0;
3778 obj = SSA_NAME_VAR (*expr_p);
3779 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3780 if (!obj)
3781 return NULL_TREE;
3782 if (!DECL_RTL_SET_P (obj))
3783 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3784 break;
3785
3786 case VAR_DECL:
3787 case PARM_DECL:
3788 case RESULT_DECL:
3789 *ws = 0;
3790 obj = *expr_p;
3791
3792 if (DECL_RTL_SET_P (obj))
3793 break;
3794
3795 if (DECL_MODE (obj) == BLKmode)
3796 x = produce_memory_decl_rtl (obj, regno);
3797 else
3798 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3799
3800 break;
3801
3802 default:
3803 break;
3804 }
3805
3806 if (x)
3807 {
3808 decl_rtl_to_reset.safe_push (obj);
3809 SET_DECL_RTL (obj, x);
3810 }
3811
3812 return NULL_TREE;
3813 }
3814
3815 /* Predict whether the given loop will be transformed in the RTL
3816 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3817 This is only for target independent checks, see targetm.predict_doloop_p
3818 for the target dependent ones.
3819
3820 Note that according to some initial investigation, some checks like costly
3821 niter check and invalid stmt scanning don't have much gains among general
3822 cases, so keep this as simple as possible first.
3823
3824 Some RTL specific checks seems unable to be checked in gimple, if any new
3825 checks or easy checks _are_ missing here, please add them. */
3826
3827 static bool
generic_predict_doloop_p(struct ivopts_data * data)3828 generic_predict_doloop_p (struct ivopts_data *data)
3829 {
3830 class loop *loop = data->current_loop;
3831
3832 /* Call target hook for target dependent checks. */
3833 if (!targetm.predict_doloop_p (loop))
3834 {
3835 if (dump_file && (dump_flags & TDF_DETAILS))
3836 fprintf (dump_file, "Predict doloop failure due to"
3837 " target specific checks.\n");
3838 return false;
3839 }
3840
3841 /* Similar to doloop_optimize, check iteration description to know it's
3842 suitable or not. Keep it as simple as possible, feel free to extend it
3843 if you find any multiple exits cases matter. */
3844 edge exit = single_dom_exit (loop);
3845 class tree_niter_desc *niter_desc;
3846 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3847 {
3848 if (dump_file && (dump_flags & TDF_DETAILS))
3849 fprintf (dump_file, "Predict doloop failure due to"
3850 " unexpected niters.\n");
3851 return false;
3852 }
3853
3854 /* Similar to doloop_optimize, check whether iteration count too small
3855 and not profitable. */
3856 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3857 if (est_niter == -1)
3858 est_niter = get_likely_max_loop_iterations_int (loop);
3859 if (est_niter >= 0 && est_niter < 3)
3860 {
3861 if (dump_file && (dump_flags & TDF_DETAILS))
3862 fprintf (dump_file,
3863 "Predict doloop failure due to"
3864 " too few iterations (%u).\n",
3865 (unsigned int) est_niter);
3866 return false;
3867 }
3868
3869 return true;
3870 }
3871
3872 /* Determines cost of the computation of EXPR. */
3873
3874 static unsigned
computation_cost(tree expr,bool speed)3875 computation_cost (tree expr, bool speed)
3876 {
3877 rtx_insn *seq;
3878 rtx rslt;
3879 tree type = TREE_TYPE (expr);
3880 unsigned cost;
3881 /* Avoid using hard regs in ways which may be unsupported. */
3882 int regno = LAST_VIRTUAL_REGISTER + 1;
3883 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3884 enum node_frequency real_frequency = node->frequency;
3885
3886 node->frequency = NODE_FREQUENCY_NORMAL;
3887 crtl->maybe_hot_insn_p = speed;
3888 walk_tree (&expr, prepare_decl_rtl, ®no, NULL);
3889 start_sequence ();
3890 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3891 seq = get_insns ();
3892 end_sequence ();
3893 default_rtl_profile ();
3894 node->frequency = real_frequency;
3895
3896 cost = seq_cost (seq, speed);
3897 if (MEM_P (rslt))
3898 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3899 TYPE_ADDR_SPACE (type), speed);
3900 else if (!REG_P (rslt))
3901 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3902
3903 return cost;
3904 }
3905
3906 /* Returns variable containing the value of candidate CAND at statement AT. */
3907
3908 static tree
var_at_stmt(class loop * loop,struct iv_cand * cand,gimple * stmt)3909 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3910 {
3911 if (stmt_after_increment (loop, cand, stmt))
3912 return cand->var_after;
3913 else
3914 return cand->var_before;
3915 }
3916
3917 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3918 same precision that is at least as wide as the precision of TYPE, stores
3919 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3920 type of A and B. */
3921
3922 static tree
determine_common_wider_type(tree * a,tree * b)3923 determine_common_wider_type (tree *a, tree *b)
3924 {
3925 tree wider_type = NULL;
3926 tree suba, subb;
3927 tree atype = TREE_TYPE (*a);
3928
3929 if (CONVERT_EXPR_P (*a))
3930 {
3931 suba = TREE_OPERAND (*a, 0);
3932 wider_type = TREE_TYPE (suba);
3933 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3934 return atype;
3935 }
3936 else
3937 return atype;
3938
3939 if (CONVERT_EXPR_P (*b))
3940 {
3941 subb = TREE_OPERAND (*b, 0);
3942 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3943 return atype;
3944 }
3945 else
3946 return atype;
3947
3948 *a = suba;
3949 *b = subb;
3950 return wider_type;
3951 }
3952
3953 /* Determines the expression by that USE is expressed from induction variable
3954 CAND at statement AT in LOOP. The expression is stored in two parts in a
3955 decomposed form. The invariant part is stored in AFF_INV; while variant
3956 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3957 non-null. Returns false if USE cannot be expressed using CAND. */
3958
3959 static bool
3960 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3961 struct iv_cand *cand, class aff_tree *aff_inv,
3962 class aff_tree *aff_var, widest_int *prat = NULL)
3963 {
3964 tree ubase = use->iv->base, ustep = use->iv->step;
3965 tree cbase = cand->iv->base, cstep = cand->iv->step;
3966 tree common_type, uutype, var, cstep_common;
3967 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3968 aff_tree aff_cbase;
3969 widest_int rat;
3970
3971 /* We must have a precision to express the values of use. */
3972 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3973 return false;
3974
3975 var = var_at_stmt (loop, cand, at);
3976 uutype = unsigned_type_for (utype);
3977
3978 /* If the conversion is not noop, perform it. */
3979 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3980 {
3981 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3982 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3983 {
3984 tree inner_base, inner_step, inner_type;
3985 inner_base = TREE_OPERAND (cbase, 0);
3986 if (CONVERT_EXPR_P (cstep))
3987 inner_step = TREE_OPERAND (cstep, 0);
3988 else
3989 inner_step = cstep;
3990
3991 inner_type = TREE_TYPE (inner_base);
3992 /* If candidate is added from a biv whose type is smaller than
3993 ctype, we know both candidate and the biv won't overflow.
3994 In this case, it's safe to skip the convertion in candidate.
3995 As an example, (unsigned short)((unsigned long)A) equals to
3996 (unsigned short)A, if A has a type no larger than short. */
3997 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3998 {
3999 cbase = inner_base;
4000 cstep = inner_step;
4001 }
4002 }
4003 cbase = fold_convert (uutype, cbase);
4004 cstep = fold_convert (uutype, cstep);
4005 var = fold_convert (uutype, var);
4006 }
4007
4008 /* Ratio is 1 when computing the value of biv cand by itself.
4009 We can't rely on constant_multiple_of in this case because the
4010 use is created after the original biv is selected. The call
4011 could fail because of inconsistent fold behavior. See PR68021
4012 for more information. */
4013 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4014 {
4015 gcc_assert (is_gimple_assign (use->stmt));
4016 gcc_assert (use->iv->ssa_name == cand->var_after);
4017 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4018 rat = 1;
4019 }
4020 else if (!constant_multiple_of (ustep, cstep, &rat))
4021 return false;
4022
4023 if (prat)
4024 *prat = rat;
4025
4026 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4027 type, we achieve better folding by computing their difference in this
4028 wider type, and cast the result to UUTYPE. We do not need to worry about
4029 overflows, as all the arithmetics will in the end be performed in UUTYPE
4030 anyway. */
4031 common_type = determine_common_wider_type (&ubase, &cbase);
4032
4033 /* use = ubase - ratio * cbase + ratio * var. */
4034 tree_to_aff_combination (ubase, common_type, aff_inv);
4035 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4036 tree_to_aff_combination (var, uutype, aff_var);
4037
4038 /* We need to shift the value if we are after the increment. */
4039 if (stmt_after_increment (loop, cand, at))
4040 {
4041 aff_tree cstep_aff;
4042
4043 if (common_type != uutype)
4044 cstep_common = fold_convert (common_type, cstep);
4045 else
4046 cstep_common = cstep;
4047
4048 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4049 aff_combination_add (&aff_cbase, &cstep_aff);
4050 }
4051
4052 aff_combination_scale (&aff_cbase, -rat);
4053 aff_combination_add (aff_inv, &aff_cbase);
4054 if (common_type != uutype)
4055 aff_combination_convert (aff_inv, uutype);
4056
4057 aff_combination_scale (aff_var, rat);
4058 return true;
4059 }
4060
4061 /* Determines the expression by that USE is expressed from induction variable
4062 CAND at statement AT in LOOP. The expression is stored in a decomposed
4063 form into AFF. Returns false if USE cannot be expressed using CAND. */
4064
4065 static bool
get_computation_aff(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand,class aff_tree * aff)4066 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4067 struct iv_cand *cand, class aff_tree *aff)
4068 {
4069 aff_tree aff_var;
4070
4071 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4072 return false;
4073
4074 aff_combination_add (aff, &aff_var);
4075 return true;
4076 }
4077
4078 /* Return the type of USE. */
4079
4080 static tree
get_use_type(struct iv_use * use)4081 get_use_type (struct iv_use *use)
4082 {
4083 tree base_type = TREE_TYPE (use->iv->base);
4084 tree type;
4085
4086 if (use->type == USE_REF_ADDRESS)
4087 {
4088 /* The base_type may be a void pointer. Create a pointer type based on
4089 the mem_ref instead. */
4090 type = build_pointer_type (TREE_TYPE (*use->op_p));
4091 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4092 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4093 }
4094 else
4095 type = base_type;
4096
4097 return type;
4098 }
4099
4100 /* Determines the expression by that USE is expressed from induction variable
4101 CAND at statement AT in LOOP. The computation is unshared. */
4102
4103 static tree
get_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4104 get_computation_at (class loop *loop, gimple *at,
4105 struct iv_use *use, struct iv_cand *cand)
4106 {
4107 aff_tree aff;
4108 tree type = get_use_type (use);
4109
4110 if (!get_computation_aff (loop, at, use, cand, &aff))
4111 return NULL_TREE;
4112 unshare_aff_combination (&aff);
4113 return fold_convert (type, aff_combination_to_tree (&aff));
4114 }
4115
4116 /* Like get_computation_at, but try harder, even if the computation
4117 is more expensive. Intended for debug stmts. */
4118
4119 static tree
get_debug_computation_at(class loop * loop,gimple * at,struct iv_use * use,struct iv_cand * cand)4120 get_debug_computation_at (class loop *loop, gimple *at,
4121 struct iv_use *use, struct iv_cand *cand)
4122 {
4123 if (tree ret = get_computation_at (loop, at, use, cand))
4124 return ret;
4125
4126 tree ubase = use->iv->base, ustep = use->iv->step;
4127 tree cbase = cand->iv->base, cstep = cand->iv->step;
4128 tree var;
4129 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4130 widest_int rat;
4131
4132 /* We must have a precision to express the values of use. */
4133 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4134 return NULL_TREE;
4135
4136 /* Try to handle the case that get_computation_at doesn't,
4137 try to express
4138 use = ubase + (var - cbase) / ratio. */
4139 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4140 &rat))
4141 return NULL_TREE;
4142
4143 bool neg_p = false;
4144 if (wi::neg_p (rat))
4145 {
4146 if (TYPE_UNSIGNED (ctype))
4147 return NULL_TREE;
4148 neg_p = true;
4149 rat = wi::neg (rat);
4150 }
4151
4152 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4153 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4154 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4155 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4156 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4157 if (!use->iv->no_overflow
4158 && !cand->iv->no_overflow
4159 && !integer_pow2p (cstep))
4160 return NULL_TREE;
4161
4162 int bits = wi::exact_log2 (rat);
4163 if (bits == -1)
4164 bits = wi::floor_log2 (rat) + 1;
4165 if (!cand->iv->no_overflow
4166 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4167 return NULL_TREE;
4168
4169 var = var_at_stmt (loop, cand, at);
4170
4171 if (POINTER_TYPE_P (ctype))
4172 {
4173 ctype = unsigned_type_for (ctype);
4174 cbase = fold_convert (ctype, cbase);
4175 cstep = fold_convert (ctype, cstep);
4176 var = fold_convert (ctype, var);
4177 }
4178
4179 if (stmt_after_increment (loop, cand, at))
4180 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4181 unshare_expr (cstep));
4182
4183 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4184 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4185 wide_int_to_tree (TREE_TYPE (var), rat));
4186 if (POINTER_TYPE_P (utype))
4187 {
4188 var = fold_convert (sizetype, var);
4189 if (neg_p)
4190 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4191 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4192 }
4193 else
4194 {
4195 var = fold_convert (utype, var);
4196 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4197 ubase, var);
4198 }
4199 return var;
4200 }
4201
4202 /* Adjust the cost COST for being in loop setup rather than loop body.
4203 If we're optimizing for space, the loop setup overhead is constant;
4204 if we're optimizing for speed, amortize it over the per-iteration cost.
4205 If ROUND_UP_P is true, the result is round up rather than to zero when
4206 optimizing for speed. */
4207 static int64_t
4208 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4209 bool round_up_p = false)
4210 {
4211 if (cost == INFTY)
4212 return cost;
4213 else if (optimize_loop_for_speed_p (data->current_loop))
4214 {
4215 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4216 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4217 }
4218 else
4219 return cost;
4220 }
4221
4222 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4223 EXPR operand holding the shift. COST0 and COST1 are the costs for
4224 calculating the operands of EXPR. Returns true if successful, and returns
4225 the cost in COST. */
4226
4227 static bool
get_shiftadd_cost(tree expr,scalar_int_mode mode,comp_cost cost0,comp_cost cost1,tree mult,bool speed,comp_cost * cost)4228 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4229 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4230 {
4231 comp_cost res;
4232 tree op1 = TREE_OPERAND (expr, 1);
4233 tree cst = TREE_OPERAND (mult, 1);
4234 tree multop = TREE_OPERAND (mult, 0);
4235 int m = exact_log2 (int_cst_value (cst));
4236 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4237 int as_cost, sa_cost;
4238 bool mult_in_op1;
4239
4240 if (!(m >= 0 && m < maxm))
4241 return false;
4242
4243 STRIP_NOPS (op1);
4244 mult_in_op1 = operand_equal_p (op1, mult, 0);
4245
4246 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4247
4248 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4249 use that in preference to a shift insn followed by an add insn. */
4250 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4251 ? shiftadd_cost (speed, mode, m)
4252 : (mult_in_op1
4253 ? shiftsub1_cost (speed, mode, m)
4254 : shiftsub0_cost (speed, mode, m)));
4255
4256 res = comp_cost (MIN (as_cost, sa_cost), 0);
4257 res += (mult_in_op1 ? cost0 : cost1);
4258
4259 STRIP_NOPS (multop);
4260 if (!is_gimple_val (multop))
4261 res += force_expr_to_var_cost (multop, speed);
4262
4263 *cost = res;
4264 return true;
4265 }
4266
4267 /* Estimates cost of forcing expression EXPR into a variable. */
4268
4269 static comp_cost
force_expr_to_var_cost(tree expr,bool speed)4270 force_expr_to_var_cost (tree expr, bool speed)
4271 {
4272 static bool costs_initialized = false;
4273 static unsigned integer_cost [2];
4274 static unsigned symbol_cost [2];
4275 static unsigned address_cost [2];
4276 tree op0, op1;
4277 comp_cost cost0, cost1, cost;
4278 machine_mode mode;
4279 scalar_int_mode int_mode;
4280
4281 if (!costs_initialized)
4282 {
4283 tree type = build_pointer_type (integer_type_node);
4284 tree var, addr;
4285 rtx x;
4286 int i;
4287
4288 var = create_tmp_var_raw (integer_type_node, "test_var");
4289 TREE_STATIC (var) = 1;
4290 x = produce_memory_decl_rtl (var, NULL);
4291 SET_DECL_RTL (var, x);
4292
4293 addr = build1 (ADDR_EXPR, type, var);
4294
4295
4296 for (i = 0; i < 2; i++)
4297 {
4298 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4299 2000), i);
4300
4301 symbol_cost[i] = computation_cost (addr, i) + 1;
4302
4303 address_cost[i]
4304 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4305 if (dump_file && (dump_flags & TDF_DETAILS))
4306 {
4307 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4308 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4309 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4310 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4311 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4312 fprintf (dump_file, "\n");
4313 }
4314 }
4315
4316 costs_initialized = true;
4317 }
4318
4319 STRIP_NOPS (expr);
4320
4321 if (SSA_VAR_P (expr))
4322 return no_cost;
4323
4324 if (is_gimple_min_invariant (expr))
4325 {
4326 if (poly_int_tree_p (expr))
4327 return comp_cost (integer_cost [speed], 0);
4328
4329 if (TREE_CODE (expr) == ADDR_EXPR)
4330 {
4331 tree obj = TREE_OPERAND (expr, 0);
4332
4333 if (VAR_P (obj)
4334 || TREE_CODE (obj) == PARM_DECL
4335 || TREE_CODE (obj) == RESULT_DECL)
4336 return comp_cost (symbol_cost [speed], 0);
4337 }
4338
4339 return comp_cost (address_cost [speed], 0);
4340 }
4341
4342 switch (TREE_CODE (expr))
4343 {
4344 case POINTER_PLUS_EXPR:
4345 case PLUS_EXPR:
4346 case MINUS_EXPR:
4347 case MULT_EXPR:
4348 case TRUNC_DIV_EXPR:
4349 case BIT_AND_EXPR:
4350 case BIT_IOR_EXPR:
4351 case LSHIFT_EXPR:
4352 case RSHIFT_EXPR:
4353 op0 = TREE_OPERAND (expr, 0);
4354 op1 = TREE_OPERAND (expr, 1);
4355 STRIP_NOPS (op0);
4356 STRIP_NOPS (op1);
4357 break;
4358
4359 CASE_CONVERT:
4360 case NEGATE_EXPR:
4361 case BIT_NOT_EXPR:
4362 op0 = TREE_OPERAND (expr, 0);
4363 STRIP_NOPS (op0);
4364 op1 = NULL_TREE;
4365 break;
4366 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4367 introduce COND_EXPR for IV base, need to support better cost estimation
4368 for this COND_EXPR and tcc_comparison. */
4369 case COND_EXPR:
4370 op0 = TREE_OPERAND (expr, 1);
4371 STRIP_NOPS (op0);
4372 op1 = TREE_OPERAND (expr, 2);
4373 STRIP_NOPS (op1);
4374 break;
4375 case LT_EXPR:
4376 case LE_EXPR:
4377 case GT_EXPR:
4378 case GE_EXPR:
4379 case EQ_EXPR:
4380 case NE_EXPR:
4381 case UNORDERED_EXPR:
4382 case ORDERED_EXPR:
4383 case UNLT_EXPR:
4384 case UNLE_EXPR:
4385 case UNGT_EXPR:
4386 case UNGE_EXPR:
4387 case UNEQ_EXPR:
4388 case LTGT_EXPR:
4389 case MAX_EXPR:
4390 case MIN_EXPR:
4391 op0 = TREE_OPERAND (expr, 0);
4392 STRIP_NOPS (op0);
4393 op1 = TREE_OPERAND (expr, 1);
4394 STRIP_NOPS (op1);
4395 break;
4396
4397 default:
4398 /* Just an arbitrary value, FIXME. */
4399 return comp_cost (target_spill_cost[speed], 0);
4400 }
4401
4402 if (op0 == NULL_TREE
4403 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4404 cost0 = no_cost;
4405 else
4406 cost0 = force_expr_to_var_cost (op0, speed);
4407
4408 if (op1 == NULL_TREE
4409 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4410 cost1 = no_cost;
4411 else
4412 cost1 = force_expr_to_var_cost (op1, speed);
4413
4414 mode = TYPE_MODE (TREE_TYPE (expr));
4415 switch (TREE_CODE (expr))
4416 {
4417 case POINTER_PLUS_EXPR:
4418 case PLUS_EXPR:
4419 case MINUS_EXPR:
4420 case NEGATE_EXPR:
4421 cost = comp_cost (add_cost (speed, mode), 0);
4422 if (TREE_CODE (expr) != NEGATE_EXPR)
4423 {
4424 tree mult = NULL_TREE;
4425 comp_cost sa_cost;
4426 if (TREE_CODE (op1) == MULT_EXPR)
4427 mult = op1;
4428 else if (TREE_CODE (op0) == MULT_EXPR)
4429 mult = op0;
4430
4431 if (mult != NULL_TREE
4432 && is_a <scalar_int_mode> (mode, &int_mode)
4433 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4434 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4435 speed, &sa_cost))
4436 return sa_cost;
4437 }
4438 break;
4439
4440 CASE_CONVERT:
4441 {
4442 tree inner_mode, outer_mode;
4443 outer_mode = TREE_TYPE (expr);
4444 inner_mode = TREE_TYPE (op0);
4445 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4446 TYPE_MODE (inner_mode), speed), 0);
4447 }
4448 break;
4449
4450 case MULT_EXPR:
4451 if (cst_and_fits_in_hwi (op0))
4452 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4453 mode, speed), 0);
4454 else if (cst_and_fits_in_hwi (op1))
4455 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4456 mode, speed), 0);
4457 else
4458 return comp_cost (target_spill_cost [speed], 0);
4459 break;
4460
4461 case TRUNC_DIV_EXPR:
4462 /* Division by power of two is usually cheap, so we allow it. Forbid
4463 anything else. */
4464 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4465 cost = comp_cost (add_cost (speed, mode), 0);
4466 else
4467 cost = comp_cost (target_spill_cost[speed], 0);
4468 break;
4469
4470 case BIT_AND_EXPR:
4471 case BIT_IOR_EXPR:
4472 case BIT_NOT_EXPR:
4473 case LSHIFT_EXPR:
4474 case RSHIFT_EXPR:
4475 cost = comp_cost (add_cost (speed, mode), 0);
4476 break;
4477 case COND_EXPR:
4478 op0 = TREE_OPERAND (expr, 0);
4479 STRIP_NOPS (op0);
4480 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4481 || CONSTANT_CLASS_P (op0))
4482 cost = no_cost;
4483 else
4484 cost = force_expr_to_var_cost (op0, speed);
4485 break;
4486 case LT_EXPR:
4487 case LE_EXPR:
4488 case GT_EXPR:
4489 case GE_EXPR:
4490 case EQ_EXPR:
4491 case NE_EXPR:
4492 case UNORDERED_EXPR:
4493 case ORDERED_EXPR:
4494 case UNLT_EXPR:
4495 case UNLE_EXPR:
4496 case UNGT_EXPR:
4497 case UNGE_EXPR:
4498 case UNEQ_EXPR:
4499 case LTGT_EXPR:
4500 case MAX_EXPR:
4501 case MIN_EXPR:
4502 /* Simply use add cost for now, FIXME if there is some more accurate cost
4503 evaluation way. */
4504 cost = comp_cost (add_cost (speed, mode), 0);
4505 break;
4506
4507 default:
4508 gcc_unreachable ();
4509 }
4510
4511 cost += cost0;
4512 cost += cost1;
4513 return cost;
4514 }
4515
4516 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4517 invariants the computation depends on. */
4518
4519 static comp_cost
force_var_cost(struct ivopts_data * data,tree expr,bitmap * inv_vars)4520 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4521 {
4522 if (!expr)
4523 return no_cost;
4524
4525 find_inv_vars (data, &expr, inv_vars);
4526 return force_expr_to_var_cost (expr, data->speed);
4527 }
4528
4529 /* Returns cost of auto-modifying address expression in shape base + offset.
4530 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4531 address expression. The address expression has ADDR_MODE in addr space
4532 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4533 speed or size. */
4534
4535 enum ainc_type
4536 {
4537 AINC_PRE_INC, /* Pre increment. */
4538 AINC_PRE_DEC, /* Pre decrement. */
4539 AINC_POST_INC, /* Post increment. */
4540 AINC_POST_DEC, /* Post decrement. */
4541 AINC_NONE /* Also the number of auto increment types. */
4542 };
4543
4544 struct ainc_cost_data
4545 {
4546 int64_t costs[AINC_NONE];
4547 };
4548
4549 static comp_cost
get_address_cost_ainc(poly_int64 ainc_step,poly_int64 ainc_offset,machine_mode addr_mode,machine_mode mem_mode,addr_space_t as,bool speed)4550 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4551 machine_mode addr_mode, machine_mode mem_mode,
4552 addr_space_t as, bool speed)
4553 {
4554 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4555 && !USE_STORE_PRE_DECREMENT (mem_mode)
4556 && !USE_LOAD_POST_DECREMENT (mem_mode)
4557 && !USE_STORE_POST_DECREMENT (mem_mode)
4558 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4559 && !USE_STORE_PRE_INCREMENT (mem_mode)
4560 && !USE_LOAD_POST_INCREMENT (mem_mode)
4561 && !USE_STORE_POST_INCREMENT (mem_mode))
4562 return infinite_cost;
4563
4564 static vec<ainc_cost_data *> ainc_cost_data_list;
4565 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4566 if (idx >= ainc_cost_data_list.length ())
4567 {
4568 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4569
4570 gcc_assert (nsize > idx);
4571 ainc_cost_data_list.safe_grow_cleared (nsize);
4572 }
4573
4574 ainc_cost_data *data = ainc_cost_data_list[idx];
4575 if (data == NULL)
4576 {
4577 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4578
4579 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4580 data->costs[AINC_PRE_DEC] = INFTY;
4581 data->costs[AINC_POST_DEC] = INFTY;
4582 data->costs[AINC_PRE_INC] = INFTY;
4583 data->costs[AINC_POST_INC] = INFTY;
4584 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4585 || USE_STORE_PRE_DECREMENT (mem_mode))
4586 {
4587 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4588
4589 if (memory_address_addr_space_p (mem_mode, addr, as))
4590 data->costs[AINC_PRE_DEC]
4591 = address_cost (addr, mem_mode, as, speed);
4592 }
4593 if (USE_LOAD_POST_DECREMENT (mem_mode)
4594 || USE_STORE_POST_DECREMENT (mem_mode))
4595 {
4596 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4597
4598 if (memory_address_addr_space_p (mem_mode, addr, as))
4599 data->costs[AINC_POST_DEC]
4600 = address_cost (addr, mem_mode, as, speed);
4601 }
4602 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4603 || USE_STORE_PRE_INCREMENT (mem_mode))
4604 {
4605 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4606
4607 if (memory_address_addr_space_p (mem_mode, addr, as))
4608 data->costs[AINC_PRE_INC]
4609 = address_cost (addr, mem_mode, as, speed);
4610 }
4611 if (USE_LOAD_POST_INCREMENT (mem_mode)
4612 || USE_STORE_POST_INCREMENT (mem_mode))
4613 {
4614 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4615
4616 if (memory_address_addr_space_p (mem_mode, addr, as))
4617 data->costs[AINC_POST_INC]
4618 = address_cost (addr, mem_mode, as, speed);
4619 }
4620 ainc_cost_data_list[idx] = data;
4621 }
4622
4623 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4624 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4625 return comp_cost (data->costs[AINC_POST_INC], 0);
4626 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4627 return comp_cost (data->costs[AINC_POST_DEC], 0);
4628 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4629 return comp_cost (data->costs[AINC_PRE_INC], 0);
4630 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4631 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4632
4633 return infinite_cost;
4634 }
4635
4636 /* Return cost of computing USE's address expression by using CAND.
4637 AFF_INV and AFF_VAR represent invariant and variant parts of the
4638 address expression, respectively. If AFF_INV is simple, store
4639 the loop invariant variables which are depended by it in INV_VARS;
4640 if AFF_INV is complicated, handle it as a new invariant expression
4641 and record it in INV_EXPR. RATIO indicates multiple times between
4642 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4643 value to it indicating if this is an auto-increment address. */
4644
4645 static comp_cost
get_address_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,aff_tree * aff_inv,aff_tree * aff_var,HOST_WIDE_INT ratio,bitmap * inv_vars,iv_inv_expr_ent ** inv_expr,bool * can_autoinc,bool speed)4646 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4647 struct iv_cand *cand, aff_tree *aff_inv,
4648 aff_tree *aff_var, HOST_WIDE_INT ratio,
4649 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4650 bool *can_autoinc, bool speed)
4651 {
4652 rtx addr;
4653 bool simple_inv = true;
4654 tree comp_inv = NULL_TREE, type = aff_var->type;
4655 comp_cost var_cost = no_cost, cost = no_cost;
4656 struct mem_address parts = {NULL_TREE, integer_one_node,
4657 NULL_TREE, NULL_TREE, NULL_TREE};
4658 machine_mode addr_mode = TYPE_MODE (type);
4659 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4660 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4661 /* Only true if ratio != 1. */
4662 bool ok_with_ratio_p = false;
4663 bool ok_without_ratio_p = false;
4664
4665 if (!aff_combination_const_p (aff_inv))
4666 {
4667 parts.index = integer_one_node;
4668 /* Addressing mode "base + index". */
4669 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4670 if (ratio != 1)
4671 {
4672 parts.step = wide_int_to_tree (type, ratio);
4673 /* Addressing mode "base + index << scale". */
4674 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4675 if (!ok_with_ratio_p)
4676 parts.step = NULL_TREE;
4677 }
4678 if (ok_with_ratio_p || ok_without_ratio_p)
4679 {
4680 if (maybe_ne (aff_inv->offset, 0))
4681 {
4682 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4683 /* Addressing mode "base + index [<< scale] + offset". */
4684 if (!valid_mem_ref_p (mem_mode, as, &parts))
4685 parts.offset = NULL_TREE;
4686 else
4687 aff_inv->offset = 0;
4688 }
4689
4690 move_fixed_address_to_symbol (&parts, aff_inv);
4691 /* Base is fixed address and is moved to symbol part. */
4692 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4693 parts.base = NULL_TREE;
4694
4695 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4696 if (parts.symbol != NULL_TREE
4697 && !valid_mem_ref_p (mem_mode, as, &parts))
4698 {
4699 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4700 parts.symbol = NULL_TREE;
4701 /* Reset SIMPLE_INV since symbol address needs to be computed
4702 outside of address expression in this case. */
4703 simple_inv = false;
4704 /* Symbol part is moved back to base part, it can't be NULL. */
4705 parts.base = integer_one_node;
4706 }
4707 }
4708 else
4709 parts.index = NULL_TREE;
4710 }
4711 else
4712 {
4713 poly_int64 ainc_step;
4714 if (can_autoinc
4715 && ratio == 1
4716 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4717 {
4718 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4719
4720 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4721 ainc_offset += ainc_step;
4722 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4723 addr_mode, mem_mode, as, speed);
4724 if (!cost.infinite_cost_p ())
4725 {
4726 *can_autoinc = true;
4727 return cost;
4728 }
4729 cost = no_cost;
4730 }
4731 if (!aff_combination_zero_p (aff_inv))
4732 {
4733 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4734 /* Addressing mode "base + offset". */
4735 if (!valid_mem_ref_p (mem_mode, as, &parts))
4736 parts.offset = NULL_TREE;
4737 else
4738 aff_inv->offset = 0;
4739 }
4740 }
4741
4742 if (simple_inv)
4743 simple_inv = (aff_inv == NULL
4744 || aff_combination_const_p (aff_inv)
4745 || aff_combination_singleton_var_p (aff_inv));
4746 if (!aff_combination_zero_p (aff_inv))
4747 comp_inv = aff_combination_to_tree (aff_inv);
4748 if (comp_inv != NULL_TREE)
4749 cost = force_var_cost (data, comp_inv, inv_vars);
4750 if (ratio != 1 && parts.step == NULL_TREE)
4751 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4752 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4753 var_cost += add_cost (speed, addr_mode);
4754
4755 if (comp_inv && inv_expr && !simple_inv)
4756 {
4757 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4758 /* Clear depends on. */
4759 if (*inv_expr != NULL && inv_vars && *inv_vars)
4760 bitmap_clear (*inv_vars);
4761
4762 /* Cost of small invariant expression adjusted against loop niters
4763 is usually zero, which makes it difficult to be differentiated
4764 from candidate based on loop invariant variables. Secondly, the
4765 generated invariant expression may not be hoisted out of loop by
4766 following pass. We penalize the cost by rounding up in order to
4767 neutralize such effects. */
4768 cost.cost = adjust_setup_cost (data, cost.cost, true);
4769 cost.scratch = cost.cost;
4770 }
4771
4772 cost += var_cost;
4773 addr = addr_for_mem_ref (&parts, as, false);
4774 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4775 cost += address_cost (addr, mem_mode, as, speed);
4776
4777 if (parts.symbol != NULL_TREE)
4778 cost.complexity += 1;
4779 /* Don't increase the complexity of adding a scaled index if it's
4780 the only kind of index that the target allows. */
4781 if (parts.step != NULL_TREE && ok_without_ratio_p)
4782 cost.complexity += 1;
4783 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4784 cost.complexity += 1;
4785 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4786 cost.complexity += 1;
4787
4788 return cost;
4789 }
4790
4791 /* Scale (multiply) the computed COST (except scratch part that should be
4792 hoisted out a loop) by header->frequency / AT->frequency, which makes
4793 expected cost more accurate. */
4794
4795 static comp_cost
get_scaled_computation_cost_at(ivopts_data * data,gimple * at,comp_cost cost)4796 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4797 {
4798 if (data->speed
4799 && data->current_loop->header->count.to_frequency (cfun) > 0)
4800 {
4801 basic_block bb = gimple_bb (at);
4802 gcc_assert (cost.scratch <= cost.cost);
4803 int scale_factor = (int)(intptr_t) bb->aux;
4804 if (scale_factor == 1)
4805 return cost;
4806
4807 int64_t scaled_cost
4808 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4809
4810 if (dump_file && (dump_flags & TDF_DETAILS))
4811 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4812 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4813 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4814
4815 cost.cost = scaled_cost;
4816 }
4817
4818 return cost;
4819 }
4820
4821 /* Determines the cost of the computation by that USE is expressed
4822 from induction variable CAND. If ADDRESS_P is true, we just need
4823 to create an address from it, otherwise we want to get it into
4824 register. A set of invariants we depend on is stored in INV_VARS.
4825 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4826 addressing is likely. If INV_EXPR is nonnull, record invariant
4827 expr entry in it. */
4828
4829 static comp_cost
get_computation_cost(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,bool address_p,bitmap * inv_vars,bool * can_autoinc,iv_inv_expr_ent ** inv_expr)4830 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4831 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4832 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4833 {
4834 gimple *at = use->stmt;
4835 tree ubase = use->iv->base, cbase = cand->iv->base;
4836 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4837 tree comp_inv = NULL_TREE;
4838 HOST_WIDE_INT ratio, aratio;
4839 comp_cost cost;
4840 widest_int rat;
4841 aff_tree aff_inv, aff_var;
4842 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4843
4844 if (inv_vars)
4845 *inv_vars = NULL;
4846 if (can_autoinc)
4847 *can_autoinc = false;
4848 if (inv_expr)
4849 *inv_expr = NULL;
4850
4851 /* Check if we have enough precision to express the values of use. */
4852 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4853 return infinite_cost;
4854
4855 if (address_p
4856 || (use->iv->base_object
4857 && cand->iv->base_object
4858 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4859 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4860 {
4861 /* Do not try to express address of an object with computation based
4862 on address of a different object. This may cause problems in rtl
4863 level alias analysis (that does not expect this to be happening,
4864 as this is illegal in C), and would be unlikely to be useful
4865 anyway. */
4866 if (use->iv->base_object
4867 && cand->iv->base_object
4868 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4869 return infinite_cost;
4870 }
4871
4872 if (!get_computation_aff_1 (data->current_loop, at, use,
4873 cand, &aff_inv, &aff_var, &rat)
4874 || !wi::fits_shwi_p (rat))
4875 return infinite_cost;
4876
4877 ratio = rat.to_shwi ();
4878 if (address_p)
4879 {
4880 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4881 inv_vars, inv_expr, can_autoinc, speed);
4882 cost = get_scaled_computation_cost_at (data, at, cost);
4883 /* For doloop IV cand, add on the extra cost. */
4884 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4885 return cost;
4886 }
4887
4888 bool simple_inv = (aff_combination_const_p (&aff_inv)
4889 || aff_combination_singleton_var_p (&aff_inv));
4890 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4891 aff_combination_convert (&aff_inv, signed_type);
4892 if (!aff_combination_zero_p (&aff_inv))
4893 comp_inv = aff_combination_to_tree (&aff_inv);
4894
4895 cost = force_var_cost (data, comp_inv, inv_vars);
4896 if (comp_inv && inv_expr && !simple_inv)
4897 {
4898 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4899 /* Clear depends on. */
4900 if (*inv_expr != NULL && inv_vars && *inv_vars)
4901 bitmap_clear (*inv_vars);
4902
4903 cost.cost = adjust_setup_cost (data, cost.cost);
4904 /* Record setup cost in scratch field. */
4905 cost.scratch = cost.cost;
4906 }
4907 /* Cost of constant integer can be covered when adding invariant part to
4908 variant part. */
4909 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4910 cost = no_cost;
4911
4912 /* Need type narrowing to represent use with cand. */
4913 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4914 {
4915 machine_mode outer_mode = TYPE_MODE (utype);
4916 machine_mode inner_mode = TYPE_MODE (ctype);
4917 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4918 }
4919
4920 /* Turn a + i * (-c) into a - i * c. */
4921 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4922 aratio = -ratio;
4923 else
4924 aratio = ratio;
4925
4926 if (ratio != 1)
4927 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4928
4929 /* TODO: We may also need to check if we can compute a + i * 4 in one
4930 instruction. */
4931 /* Need to add up the invariant and variant parts. */
4932 if (comp_inv && !integer_zerop (comp_inv))
4933 cost += add_cost (speed, TYPE_MODE (utype));
4934
4935 cost = get_scaled_computation_cost_at (data, at, cost);
4936
4937 /* For doloop IV cand, add on the extra cost. */
4938 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4939 cost += targetm.doloop_cost_for_generic;
4940
4941 return cost;
4942 }
4943
4944 /* Determines cost of computing the use in GROUP with CAND in a generic
4945 expression. */
4946
4947 static bool
determine_group_iv_cost_generic(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4948 determine_group_iv_cost_generic (struct ivopts_data *data,
4949 struct iv_group *group, struct iv_cand *cand)
4950 {
4951 comp_cost cost;
4952 iv_inv_expr_ent *inv_expr = NULL;
4953 bitmap inv_vars = NULL, inv_exprs = NULL;
4954 struct iv_use *use = group->vuses[0];
4955
4956 /* The simple case first -- if we need to express value of the preserved
4957 original biv, the cost is 0. This also prevents us from counting the
4958 cost of increment twice -- once at this use and once in the cost of
4959 the candidate. */
4960 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4961 cost = no_cost;
4962 else
4963 cost = get_computation_cost (data, use, cand, false,
4964 &inv_vars, NULL, &inv_expr);
4965
4966 if (inv_expr)
4967 {
4968 inv_exprs = BITMAP_ALLOC (NULL);
4969 bitmap_set_bit (inv_exprs, inv_expr->id);
4970 }
4971 set_group_iv_cost (data, group, cand, cost, inv_vars,
4972 NULL_TREE, ERROR_MARK, inv_exprs);
4973 return !cost.infinite_cost_p ();
4974 }
4975
4976 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4977
4978 static bool
determine_group_iv_cost_address(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)4979 determine_group_iv_cost_address (struct ivopts_data *data,
4980 struct iv_group *group, struct iv_cand *cand)
4981 {
4982 unsigned i;
4983 bitmap inv_vars = NULL, inv_exprs = NULL;
4984 bool can_autoinc;
4985 iv_inv_expr_ent *inv_expr = NULL;
4986 struct iv_use *use = group->vuses[0];
4987 comp_cost sum_cost = no_cost, cost;
4988
4989 cost = get_computation_cost (data, use, cand, true,
4990 &inv_vars, &can_autoinc, &inv_expr);
4991
4992 if (inv_expr)
4993 {
4994 inv_exprs = BITMAP_ALLOC (NULL);
4995 bitmap_set_bit (inv_exprs, inv_expr->id);
4996 }
4997 sum_cost = cost;
4998 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4999 {
5000 if (can_autoinc)
5001 sum_cost -= cand->cost_step;
5002 /* If we generated the candidate solely for exploiting autoincrement
5003 opportunities, and it turns out it can't be used, set the cost to
5004 infinity to make sure we ignore it. */
5005 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5006 sum_cost = infinite_cost;
5007 }
5008
5009 /* Uses in a group can share setup code, so only add setup cost once. */
5010 cost -= cost.scratch;
5011 /* Compute and add costs for rest uses of this group. */
5012 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5013 {
5014 struct iv_use *next = group->vuses[i];
5015
5016 /* TODO: We could skip computing cost for sub iv_use when it has the
5017 same cost as the first iv_use, but the cost really depends on the
5018 offset and where the iv_use is. */
5019 cost = get_computation_cost (data, next, cand, true,
5020 NULL, &can_autoinc, &inv_expr);
5021 if (inv_expr)
5022 {
5023 if (!inv_exprs)
5024 inv_exprs = BITMAP_ALLOC (NULL);
5025
5026 bitmap_set_bit (inv_exprs, inv_expr->id);
5027 }
5028 sum_cost += cost;
5029 }
5030 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5031 NULL_TREE, ERROR_MARK, inv_exprs);
5032
5033 return !sum_cost.infinite_cost_p ();
5034 }
5035
5036 /* Computes value of candidate CAND at position AT in iteration NITER, and
5037 stores it to VAL. */
5038
5039 static void
cand_value_at(class loop * loop,struct iv_cand * cand,gimple * at,tree niter,aff_tree * val)5040 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5041 aff_tree *val)
5042 {
5043 aff_tree step, delta, nit;
5044 struct iv *iv = cand->iv;
5045 tree type = TREE_TYPE (iv->base);
5046 tree steptype;
5047 if (POINTER_TYPE_P (type))
5048 steptype = sizetype;
5049 else
5050 steptype = unsigned_type_for (type);
5051
5052 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5053 aff_combination_convert (&step, steptype);
5054 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5055 aff_combination_convert (&nit, steptype);
5056 aff_combination_mult (&nit, &step, &delta);
5057 if (stmt_after_increment (loop, cand, at))
5058 aff_combination_add (&delta, &step);
5059
5060 tree_to_aff_combination (iv->base, type, val);
5061 if (!POINTER_TYPE_P (type))
5062 aff_combination_convert (val, steptype);
5063 aff_combination_add (val, &delta);
5064 }
5065
5066 /* Returns period of induction variable iv. */
5067
5068 static tree
iv_period(struct iv * iv)5069 iv_period (struct iv *iv)
5070 {
5071 tree step = iv->step, period, type;
5072 tree pow2div;
5073
5074 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5075
5076 type = unsigned_type_for (TREE_TYPE (step));
5077 /* Period of the iv is lcm (step, type_range)/step -1,
5078 i.e., N*type_range/step - 1. Since type range is power
5079 of two, N == (step >> num_of_ending_zeros_binary (step),
5080 so the final result is
5081
5082 (type_range >> num_of_ending_zeros_binary (step)) - 1
5083
5084 */
5085 pow2div = num_ending_zeros (step);
5086
5087 period = build_low_bits_mask (type,
5088 (TYPE_PRECISION (type)
5089 - tree_to_uhwi (pow2div)));
5090
5091 return period;
5092 }
5093
5094 /* Returns the comparison operator used when eliminating the iv USE. */
5095
5096 static enum tree_code
iv_elimination_compare(struct ivopts_data * data,struct iv_use * use)5097 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5098 {
5099 class loop *loop = data->current_loop;
5100 basic_block ex_bb;
5101 edge exit;
5102
5103 ex_bb = gimple_bb (use->stmt);
5104 exit = EDGE_SUCC (ex_bb, 0);
5105 if (flow_bb_inside_loop_p (loop, exit->dest))
5106 exit = EDGE_SUCC (ex_bb, 1);
5107
5108 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5109 }
5110
5111 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5112 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5113 calculation is performed in non-wrapping type.
5114
5115 TODO: More generally, we could test for the situation that
5116 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5117 This would require knowing the sign of OFFSET. */
5118
5119 static bool
difference_cannot_overflow_p(struct ivopts_data * data,tree base,tree offset)5120 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5121 {
5122 enum tree_code code;
5123 tree e1, e2;
5124 aff_tree aff_e1, aff_e2, aff_offset;
5125
5126 if (!nowrap_type_p (TREE_TYPE (base)))
5127 return false;
5128
5129 base = expand_simple_operations (base);
5130
5131 if (TREE_CODE (base) == SSA_NAME)
5132 {
5133 gimple *stmt = SSA_NAME_DEF_STMT (base);
5134
5135 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5136 return false;
5137
5138 code = gimple_assign_rhs_code (stmt);
5139 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5140 return false;
5141
5142 e1 = gimple_assign_rhs1 (stmt);
5143 e2 = gimple_assign_rhs2 (stmt);
5144 }
5145 else
5146 {
5147 code = TREE_CODE (base);
5148 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5149 return false;
5150 e1 = TREE_OPERAND (base, 0);
5151 e2 = TREE_OPERAND (base, 1);
5152 }
5153
5154 /* Use affine expansion as deeper inspection to prove the equality. */
5155 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5156 &aff_e2, &data->name_expansion_cache);
5157 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5158 &aff_offset, &data->name_expansion_cache);
5159 aff_combination_scale (&aff_offset, -1);
5160 switch (code)
5161 {
5162 case PLUS_EXPR:
5163 aff_combination_add (&aff_e2, &aff_offset);
5164 if (aff_combination_zero_p (&aff_e2))
5165 return true;
5166
5167 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5168 &aff_e1, &data->name_expansion_cache);
5169 aff_combination_add (&aff_e1, &aff_offset);
5170 return aff_combination_zero_p (&aff_e1);
5171
5172 case POINTER_PLUS_EXPR:
5173 aff_combination_add (&aff_e2, &aff_offset);
5174 return aff_combination_zero_p (&aff_e2);
5175
5176 default:
5177 return false;
5178 }
5179 }
5180
5181 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5182 comparison with CAND. NITER describes the number of iterations of
5183 the loops. If successful, the comparison in COMP_P is altered accordingly.
5184
5185 We aim to handle the following situation:
5186
5187 sometype *base, *p;
5188 int a, b, i;
5189
5190 i = a;
5191 p = p_0 = base + a;
5192
5193 do
5194 {
5195 bla (*p);
5196 p++;
5197 i++;
5198 }
5199 while (i < b);
5200
5201 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5202 We aim to optimize this to
5203
5204 p = p_0 = base + a;
5205 do
5206 {
5207 bla (*p);
5208 p++;
5209 }
5210 while (p < p_0 - a + b);
5211
5212 This preserves the correctness, since the pointer arithmetics does not
5213 overflow. More precisely:
5214
5215 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5216 overflow in computing it or the values of p.
5217 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5218 overflow. To prove this, we use the fact that p_0 = base + a. */
5219
5220 static bool
iv_elimination_compare_lt(struct ivopts_data * data,struct iv_cand * cand,enum tree_code * comp_p,class tree_niter_desc * niter)5221 iv_elimination_compare_lt (struct ivopts_data *data,
5222 struct iv_cand *cand, enum tree_code *comp_p,
5223 class tree_niter_desc *niter)
5224 {
5225 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5226 class aff_tree nit, tmpa, tmpb;
5227 enum tree_code comp;
5228 HOST_WIDE_INT step;
5229
5230 /* We need to know that the candidate induction variable does not overflow.
5231 While more complex analysis may be used to prove this, for now just
5232 check that the variable appears in the original program and that it
5233 is computed in a type that guarantees no overflows. */
5234 cand_type = TREE_TYPE (cand->iv->base);
5235 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5236 return false;
5237
5238 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5239 the calculation of the BOUND could overflow, making the comparison
5240 invalid. */
5241 if (!data->loop_single_exit_p)
5242 return false;
5243
5244 /* We need to be able to decide whether candidate is increasing or decreasing
5245 in order to choose the right comparison operator. */
5246 if (!cst_and_fits_in_hwi (cand->iv->step))
5247 return false;
5248 step = int_cst_value (cand->iv->step);
5249
5250 /* Check that the number of iterations matches the expected pattern:
5251 a + 1 > b ? 0 : b - a - 1. */
5252 mbz = niter->may_be_zero;
5253 if (TREE_CODE (mbz) == GT_EXPR)
5254 {
5255 /* Handle a + 1 > b. */
5256 tree op0 = TREE_OPERAND (mbz, 0);
5257 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5258 {
5259 a = TREE_OPERAND (op0, 0);
5260 b = TREE_OPERAND (mbz, 1);
5261 }
5262 else
5263 return false;
5264 }
5265 else if (TREE_CODE (mbz) == LT_EXPR)
5266 {
5267 tree op1 = TREE_OPERAND (mbz, 1);
5268
5269 /* Handle b < a + 1. */
5270 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5271 {
5272 a = TREE_OPERAND (op1, 0);
5273 b = TREE_OPERAND (mbz, 0);
5274 }
5275 else
5276 return false;
5277 }
5278 else
5279 return false;
5280
5281 /* Expected number of iterations is B - A - 1. Check that it matches
5282 the actual number, i.e., that B - A - NITER = 1. */
5283 tree_to_aff_combination (niter->niter, nit_type, &nit);
5284 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5285 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5286 aff_combination_scale (&nit, -1);
5287 aff_combination_scale (&tmpa, -1);
5288 aff_combination_add (&tmpb, &tmpa);
5289 aff_combination_add (&tmpb, &nit);
5290 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5291 return false;
5292
5293 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5294 overflow. */
5295 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5296 cand->iv->step,
5297 fold_convert (TREE_TYPE (cand->iv->step), a));
5298 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5299 return false;
5300
5301 /* Determine the new comparison operator. */
5302 comp = step < 0 ? GT_EXPR : LT_EXPR;
5303 if (*comp_p == NE_EXPR)
5304 *comp_p = comp;
5305 else if (*comp_p == EQ_EXPR)
5306 *comp_p = invert_tree_comparison (comp, false);
5307 else
5308 gcc_unreachable ();
5309
5310 return true;
5311 }
5312
5313 /* Check whether it is possible to express the condition in USE by comparison
5314 of candidate CAND. If so, store the value compared with to BOUND, and the
5315 comparison operator to COMP. */
5316
5317 static bool
may_eliminate_iv(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand,tree * bound,enum tree_code * comp)5318 may_eliminate_iv (struct ivopts_data *data,
5319 struct iv_use *use, struct iv_cand *cand, tree *bound,
5320 enum tree_code *comp)
5321 {
5322 basic_block ex_bb;
5323 edge exit;
5324 tree period;
5325 class loop *loop = data->current_loop;
5326 aff_tree bnd;
5327 class tree_niter_desc *desc = NULL;
5328
5329 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5330 return false;
5331
5332 /* For now works only for exits that dominate the loop latch.
5333 TODO: extend to other conditions inside loop body. */
5334 ex_bb = gimple_bb (use->stmt);
5335 if (use->stmt != last_stmt (ex_bb)
5336 || gimple_code (use->stmt) != GIMPLE_COND
5337 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5338 return false;
5339
5340 exit = EDGE_SUCC (ex_bb, 0);
5341 if (flow_bb_inside_loop_p (loop, exit->dest))
5342 exit = EDGE_SUCC (ex_bb, 1);
5343 if (flow_bb_inside_loop_p (loop, exit->dest))
5344 return false;
5345
5346 desc = niter_for_exit (data, exit);
5347 if (!desc)
5348 return false;
5349
5350 /* Determine whether we can use the variable to test the exit condition.
5351 This is the case iff the period of the induction variable is greater
5352 than the number of iterations for which the exit condition is true. */
5353 period = iv_period (cand->iv);
5354
5355 /* If the number of iterations is constant, compare against it directly. */
5356 if (TREE_CODE (desc->niter) == INTEGER_CST)
5357 {
5358 /* See cand_value_at. */
5359 if (stmt_after_increment (loop, cand, use->stmt))
5360 {
5361 if (!tree_int_cst_lt (desc->niter, period))
5362 return false;
5363 }
5364 else
5365 {
5366 if (tree_int_cst_lt (period, desc->niter))
5367 return false;
5368 }
5369 }
5370
5371 /* If not, and if this is the only possible exit of the loop, see whether
5372 we can get a conservative estimate on the number of iterations of the
5373 entire loop and compare against that instead. */
5374 else
5375 {
5376 widest_int period_value, max_niter;
5377
5378 max_niter = desc->max;
5379 if (stmt_after_increment (loop, cand, use->stmt))
5380 max_niter += 1;
5381 period_value = wi::to_widest (period);
5382 if (wi::gtu_p (max_niter, period_value))
5383 {
5384 /* See if we can take advantage of inferred loop bound
5385 information. */
5386 if (data->loop_single_exit_p)
5387 {
5388 if (!max_loop_iterations (loop, &max_niter))
5389 return false;
5390 /* The loop bound is already adjusted by adding 1. */
5391 if (wi::gtu_p (max_niter, period_value))
5392 return false;
5393 }
5394 else
5395 return false;
5396 }
5397 }
5398
5399 /* For doloop IV cand, the bound would be zero. It's safe whether
5400 may_be_zero set or not. */
5401 if (cand->doloop_p)
5402 {
5403 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5404 *comp = iv_elimination_compare (data, use);
5405 return true;
5406 }
5407
5408 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5409
5410 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5411 aff_combination_to_tree (&bnd));
5412 *comp = iv_elimination_compare (data, use);
5413
5414 /* It is unlikely that computing the number of iterations using division
5415 would be more profitable than keeping the original induction variable. */
5416 if (expression_expensive_p (*bound))
5417 return false;
5418
5419 /* Sometimes, it is possible to handle the situation that the number of
5420 iterations may be zero unless additional assumptions by using <
5421 instead of != in the exit condition.
5422
5423 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5424 base the exit condition on it. However, that is often too
5425 expensive. */
5426 if (!integer_zerop (desc->may_be_zero))
5427 return iv_elimination_compare_lt (data, cand, comp, desc);
5428
5429 return true;
5430 }
5431
5432 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5433 be copied, if it is used in the loop body and DATA->body_includes_call. */
5434
5435 static int
parm_decl_cost(struct ivopts_data * data,tree bound)5436 parm_decl_cost (struct ivopts_data *data, tree bound)
5437 {
5438 tree sbound = bound;
5439 STRIP_NOPS (sbound);
5440
5441 if (TREE_CODE (sbound) == SSA_NAME
5442 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5443 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5444 && data->body_includes_call)
5445 return COSTS_N_INSNS (1);
5446
5447 return 0;
5448 }
5449
5450 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5451
5452 static bool
determine_group_iv_cost_cond(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5453 determine_group_iv_cost_cond (struct ivopts_data *data,
5454 struct iv_group *group, struct iv_cand *cand)
5455 {
5456 tree bound = NULL_TREE;
5457 struct iv *cmp_iv;
5458 bitmap inv_exprs = NULL;
5459 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5460 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5461 enum comp_iv_rewrite rewrite_type;
5462 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5463 tree *control_var, *bound_cst;
5464 enum tree_code comp = ERROR_MARK;
5465 struct iv_use *use = group->vuses[0];
5466
5467 /* Extract condition operands. */
5468 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5469 &bound_cst, NULL, &cmp_iv);
5470 gcc_assert (rewrite_type != COMP_IV_NA);
5471
5472 /* Try iv elimination. */
5473 if (rewrite_type == COMP_IV_ELIM
5474 && may_eliminate_iv (data, use, cand, &bound, &comp))
5475 {
5476 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5477 if (elim_cost.cost == 0)
5478 elim_cost.cost = parm_decl_cost (data, bound);
5479 else if (TREE_CODE (bound) == INTEGER_CST)
5480 elim_cost.cost = 0;
5481 /* If we replace a loop condition 'i < n' with 'p < base + n',
5482 inv_vars_elim will have 'base' and 'n' set, which implies that both
5483 'base' and 'n' will be live during the loop. More likely,
5484 'base + n' will be loop invariant, resulting in only one live value
5485 during the loop. So in that case we clear inv_vars_elim and set
5486 inv_expr_elim instead. */
5487 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5488 {
5489 inv_expr_elim = get_loop_invariant_expr (data, bound);
5490 bitmap_clear (inv_vars_elim);
5491 }
5492 /* The bound is a loop invariant, so it will be only computed
5493 once. */
5494 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5495 }
5496
5497 /* When the condition is a comparison of the candidate IV against
5498 zero, prefer this IV.
5499
5500 TODO: The constant that we're subtracting from the cost should
5501 be target-dependent. This information should be added to the
5502 target costs for each backend. */
5503 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5504 && integer_zerop (*bound_cst)
5505 && (operand_equal_p (*control_var, cand->var_after, 0)
5506 || operand_equal_p (*control_var, cand->var_before, 0)))
5507 elim_cost -= 1;
5508
5509 express_cost = get_computation_cost (data, use, cand, false,
5510 &inv_vars_express, NULL,
5511 &inv_expr_express);
5512 if (cmp_iv != NULL)
5513 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5514
5515 /* Count the cost of the original bound as well. */
5516 bound_cost = force_var_cost (data, *bound_cst, NULL);
5517 if (bound_cost.cost == 0)
5518 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5519 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5520 bound_cost.cost = 0;
5521 express_cost += bound_cost;
5522
5523 /* Choose the better approach, preferring the eliminated IV. */
5524 if (elim_cost <= express_cost)
5525 {
5526 cost = elim_cost;
5527 inv_vars = inv_vars_elim;
5528 inv_vars_elim = NULL;
5529 inv_expr = inv_expr_elim;
5530 /* For doloop candidate/use pair, adjust to zero cost. */
5531 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5532 cost = no_cost;
5533 }
5534 else
5535 {
5536 cost = express_cost;
5537 inv_vars = inv_vars_express;
5538 inv_vars_express = NULL;
5539 bound = NULL_TREE;
5540 comp = ERROR_MARK;
5541 inv_expr = inv_expr_express;
5542 }
5543
5544 if (inv_expr)
5545 {
5546 inv_exprs = BITMAP_ALLOC (NULL);
5547 bitmap_set_bit (inv_exprs, inv_expr->id);
5548 }
5549 set_group_iv_cost (data, group, cand, cost,
5550 inv_vars, bound, comp, inv_exprs);
5551
5552 if (inv_vars_elim)
5553 BITMAP_FREE (inv_vars_elim);
5554 if (inv_vars_express)
5555 BITMAP_FREE (inv_vars_express);
5556
5557 return !cost.infinite_cost_p ();
5558 }
5559
5560 /* Determines cost of computing uses in GROUP with CAND. Returns false
5561 if USE cannot be represented with CAND. */
5562
5563 static bool
determine_group_iv_cost(struct ivopts_data * data,struct iv_group * group,struct iv_cand * cand)5564 determine_group_iv_cost (struct ivopts_data *data,
5565 struct iv_group *group, struct iv_cand *cand)
5566 {
5567 switch (group->type)
5568 {
5569 case USE_NONLINEAR_EXPR:
5570 return determine_group_iv_cost_generic (data, group, cand);
5571
5572 case USE_REF_ADDRESS:
5573 case USE_PTR_ADDRESS:
5574 return determine_group_iv_cost_address (data, group, cand);
5575
5576 case USE_COMPARE:
5577 return determine_group_iv_cost_cond (data, group, cand);
5578
5579 default:
5580 gcc_unreachable ();
5581 }
5582 }
5583
5584 /* Return true if get_computation_cost indicates that autoincrement is
5585 a possibility for the pair of USE and CAND, false otherwise. */
5586
5587 static bool
autoinc_possible_for_pair(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)5588 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5589 struct iv_cand *cand)
5590 {
5591 if (!address_p (use->type))
5592 return false;
5593
5594 bool can_autoinc = false;
5595 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5596 return can_autoinc;
5597 }
5598
5599 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5600 use that allows autoincrement, and set their AINC_USE if possible. */
5601
5602 static void
set_autoinc_for_original_candidates(struct ivopts_data * data)5603 set_autoinc_for_original_candidates (struct ivopts_data *data)
5604 {
5605 unsigned i, j;
5606
5607 for (i = 0; i < data->vcands.length (); i++)
5608 {
5609 struct iv_cand *cand = data->vcands[i];
5610 struct iv_use *closest_before = NULL;
5611 struct iv_use *closest_after = NULL;
5612 if (cand->pos != IP_ORIGINAL)
5613 continue;
5614
5615 for (j = 0; j < data->vgroups.length (); j++)
5616 {
5617 struct iv_group *group = data->vgroups[j];
5618 struct iv_use *use = group->vuses[0];
5619 unsigned uid = gimple_uid (use->stmt);
5620
5621 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5622 continue;
5623
5624 if (uid < gimple_uid (cand->incremented_at)
5625 && (closest_before == NULL
5626 || uid > gimple_uid (closest_before->stmt)))
5627 closest_before = use;
5628
5629 if (uid > gimple_uid (cand->incremented_at)
5630 && (closest_after == NULL
5631 || uid < gimple_uid (closest_after->stmt)))
5632 closest_after = use;
5633 }
5634
5635 if (closest_before != NULL
5636 && autoinc_possible_for_pair (data, closest_before, cand))
5637 cand->ainc_use = closest_before;
5638 else if (closest_after != NULL
5639 && autoinc_possible_for_pair (data, closest_after, cand))
5640 cand->ainc_use = closest_after;
5641 }
5642 }
5643
5644 /* Relate compare use with all candidates. */
5645
5646 static void
relate_compare_use_with_all_cands(struct ivopts_data * data)5647 relate_compare_use_with_all_cands (struct ivopts_data *data)
5648 {
5649 unsigned i, count = data->vcands.length ();
5650 for (i = 0; i < data->vgroups.length (); i++)
5651 {
5652 struct iv_group *group = data->vgroups[i];
5653
5654 if (group->type == USE_COMPARE)
5655 bitmap_set_range (group->related_cands, 0, count);
5656 }
5657 }
5658
5659 /* Add one doloop dedicated IV candidate:
5660 - Base is (may_be_zero ? 1 : (niter + 1)).
5661 - Step is -1. */
5662
5663 static void
add_iv_candidate_for_doloop(struct ivopts_data * data)5664 add_iv_candidate_for_doloop (struct ivopts_data *data)
5665 {
5666 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5667 gcc_assert (niter_desc && niter_desc->assumptions);
5668
5669 tree niter = niter_desc->niter;
5670 tree ntype = TREE_TYPE (niter);
5671 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5672
5673 tree may_be_zero = niter_desc->may_be_zero;
5674 if (may_be_zero && integer_zerop (may_be_zero))
5675 may_be_zero = NULL_TREE;
5676 if (may_be_zero)
5677 {
5678 if (COMPARISON_CLASS_P (may_be_zero))
5679 {
5680 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5681 build_int_cst (ntype, 0),
5682 rewrite_to_non_trapping_overflow (niter));
5683 }
5684 /* Don't try to obtain the iteration count expression when may_be_zero is
5685 integer_nonzerop (actually iteration count is one) or else. */
5686 else
5687 return;
5688 }
5689
5690 tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5691 build_int_cst (ntype, 1));
5692 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5693 }
5694
5695 /* Finds the candidates for the induction variables. */
5696
5697 static void
find_iv_candidates(struct ivopts_data * data)5698 find_iv_candidates (struct ivopts_data *data)
5699 {
5700 /* Add commonly used ivs. */
5701 add_standard_iv_candidates (data);
5702
5703 /* Add doloop dedicated ivs. */
5704 if (data->doloop_use_p)
5705 add_iv_candidate_for_doloop (data);
5706
5707 /* Add old induction variables. */
5708 add_iv_candidate_for_bivs (data);
5709
5710 /* Add induction variables derived from uses. */
5711 add_iv_candidate_for_groups (data);
5712
5713 set_autoinc_for_original_candidates (data);
5714
5715 /* Record the important candidates. */
5716 record_important_candidates (data);
5717
5718 /* Relate compare iv_use with all candidates. */
5719 if (!data->consider_all_candidates)
5720 relate_compare_use_with_all_cands (data);
5721
5722 if (dump_file && (dump_flags & TDF_DETAILS))
5723 {
5724 unsigned i;
5725
5726 fprintf (dump_file, "\n<Important Candidates>:\t");
5727 for (i = 0; i < data->vcands.length (); i++)
5728 if (data->vcands[i]->important)
5729 fprintf (dump_file, " %d,", data->vcands[i]->id);
5730 fprintf (dump_file, "\n");
5731
5732 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5733 for (i = 0; i < data->vgroups.length (); i++)
5734 {
5735 struct iv_group *group = data->vgroups[i];
5736
5737 if (group->related_cands)
5738 {
5739 fprintf (dump_file, " Group %d:\t", group->id);
5740 dump_bitmap (dump_file, group->related_cands);
5741 }
5742 }
5743 fprintf (dump_file, "\n");
5744 }
5745 }
5746
5747 /* Determines costs of computing use of iv with an iv candidate. */
5748
5749 static void
determine_group_iv_costs(struct ivopts_data * data)5750 determine_group_iv_costs (struct ivopts_data *data)
5751 {
5752 unsigned i, j;
5753 struct iv_cand *cand;
5754 struct iv_group *group;
5755 bitmap to_clear = BITMAP_ALLOC (NULL);
5756
5757 alloc_use_cost_map (data);
5758
5759 for (i = 0; i < data->vgroups.length (); i++)
5760 {
5761 group = data->vgroups[i];
5762
5763 if (data->consider_all_candidates)
5764 {
5765 for (j = 0; j < data->vcands.length (); j++)
5766 {
5767 cand = data->vcands[j];
5768 determine_group_iv_cost (data, group, cand);
5769 }
5770 }
5771 else
5772 {
5773 bitmap_iterator bi;
5774
5775 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5776 {
5777 cand = data->vcands[j];
5778 if (!determine_group_iv_cost (data, group, cand))
5779 bitmap_set_bit (to_clear, j);
5780 }
5781
5782 /* Remove the candidates for that the cost is infinite from
5783 the list of related candidates. */
5784 bitmap_and_compl_into (group->related_cands, to_clear);
5785 bitmap_clear (to_clear);
5786 }
5787 }
5788
5789 BITMAP_FREE (to_clear);
5790
5791 if (dump_file && (dump_flags & TDF_DETAILS))
5792 {
5793 bitmap_iterator bi;
5794
5795 /* Dump invariant variables. */
5796 fprintf (dump_file, "\n<Invariant Vars>:\n");
5797 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5798 {
5799 struct version_info *info = ver_info (data, i);
5800 if (info->inv_id)
5801 {
5802 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5803 print_generic_expr (dump_file, info->name, TDF_SLIM);
5804 fprintf (dump_file, "%s\n",
5805 info->has_nonlin_use ? "" : "\t(eliminable)");
5806 }
5807 }
5808
5809 /* Dump invariant expressions. */
5810 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5811 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5812
5813 for (hash_table<iv_inv_expr_hasher>::iterator it
5814 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5815 ++it)
5816 list.safe_push (*it);
5817
5818 list.qsort (sort_iv_inv_expr_ent);
5819
5820 for (i = 0; i < list.length (); ++i)
5821 {
5822 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5823 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5824 fprintf (dump_file, "\n");
5825 }
5826
5827 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5828
5829 for (i = 0; i < data->vgroups.length (); i++)
5830 {
5831 group = data->vgroups[i];
5832
5833 fprintf (dump_file, "Group %d:\n", i);
5834 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5835 for (j = 0; j < group->n_map_members; j++)
5836 {
5837 if (!group->cost_map[j].cand
5838 || group->cost_map[j].cost.infinite_cost_p ())
5839 continue;
5840
5841 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5842 group->cost_map[j].cand->id,
5843 group->cost_map[j].cost.cost,
5844 group->cost_map[j].cost.complexity);
5845 if (!group->cost_map[j].inv_exprs
5846 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5847 fprintf (dump_file, "NIL;\t");
5848 else
5849 bitmap_print (dump_file,
5850 group->cost_map[j].inv_exprs, "", ";\t");
5851 if (!group->cost_map[j].inv_vars
5852 || bitmap_empty_p (group->cost_map[j].inv_vars))
5853 fprintf (dump_file, "NIL;\n");
5854 else
5855 bitmap_print (dump_file,
5856 group->cost_map[j].inv_vars, "", "\n");
5857 }
5858
5859 fprintf (dump_file, "\n");
5860 }
5861 fprintf (dump_file, "\n");
5862 }
5863 }
5864
5865 /* Determines cost of the candidate CAND. */
5866
5867 static void
determine_iv_cost(struct ivopts_data * data,struct iv_cand * cand)5868 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5869 {
5870 comp_cost cost_base;
5871 int64_t cost, cost_step;
5872 tree base;
5873
5874 gcc_assert (cand->iv != NULL);
5875
5876 /* There are two costs associated with the candidate -- its increment
5877 and its initialization. The second is almost negligible for any loop
5878 that rolls enough, so we take it just very little into account. */
5879
5880 base = cand->iv->base;
5881 cost_base = force_var_cost (data, base, NULL);
5882 /* It will be exceptional that the iv register happens to be initialized with
5883 the proper value at no cost. In general, there will at least be a regcopy
5884 or a const set. */
5885 if (cost_base.cost == 0)
5886 cost_base.cost = COSTS_N_INSNS (1);
5887 /* Doloop decrement should be considered as zero cost. */
5888 if (cand->doloop_p)
5889 cost_step = 0;
5890 else
5891 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5892 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5893
5894 /* Prefer the original ivs unless we may gain something by replacing it.
5895 The reason is to make debugging simpler; so this is not relevant for
5896 artificial ivs created by other optimization passes. */
5897 if ((cand->pos != IP_ORIGINAL
5898 || !SSA_NAME_VAR (cand->var_before)
5899 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5900 /* Prefer doloop as well. */
5901 && !cand->doloop_p)
5902 cost++;
5903
5904 /* Prefer not to insert statements into latch unless there are some
5905 already (so that we do not create unnecessary jumps). */
5906 if (cand->pos == IP_END
5907 && empty_block_p (ip_end_pos (data->current_loop)))
5908 cost++;
5909
5910 cand->cost = cost;
5911 cand->cost_step = cost_step;
5912 }
5913
5914 /* Determines costs of computation of the candidates. */
5915
5916 static void
determine_iv_costs(struct ivopts_data * data)5917 determine_iv_costs (struct ivopts_data *data)
5918 {
5919 unsigned i;
5920
5921 if (dump_file && (dump_flags & TDF_DETAILS))
5922 {
5923 fprintf (dump_file, "<Candidate Costs>:\n");
5924 fprintf (dump_file, " cand\tcost\n");
5925 }
5926
5927 for (i = 0; i < data->vcands.length (); i++)
5928 {
5929 struct iv_cand *cand = data->vcands[i];
5930
5931 determine_iv_cost (data, cand);
5932
5933 if (dump_file && (dump_flags & TDF_DETAILS))
5934 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5935 }
5936
5937 if (dump_file && (dump_flags & TDF_DETAILS))
5938 fprintf (dump_file, "\n");
5939 }
5940
5941 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5942 induction variables. Note N_INVS includes both invariant variables and
5943 invariant expressions. */
5944
5945 static unsigned
ivopts_estimate_reg_pressure(struct ivopts_data * data,unsigned n_invs,unsigned n_cands)5946 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5947 unsigned n_cands)
5948 {
5949 unsigned cost;
5950 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5951 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5952 bool speed = data->speed;
5953
5954 /* If there is a call in the loop body, the call-clobbered registers
5955 are not available for loop invariants. */
5956 if (data->body_includes_call)
5957 available_regs = available_regs - target_clobbered_regs;
5958
5959 /* If we have enough registers. */
5960 if (regs_needed + target_res_regs < available_regs)
5961 cost = n_new;
5962 /* If close to running out of registers, try to preserve them. */
5963 else if (regs_needed <= available_regs)
5964 cost = target_reg_cost [speed] * regs_needed;
5965 /* If we run out of available registers but the number of candidates
5966 does not, we penalize extra registers using target_spill_cost. */
5967 else if (n_cands <= available_regs)
5968 cost = target_reg_cost [speed] * available_regs
5969 + target_spill_cost [speed] * (regs_needed - available_regs);
5970 /* If the number of candidates runs out available registers, we penalize
5971 extra candidate registers using target_spill_cost * 2. Because it is
5972 more expensive to spill induction variable than invariant. */
5973 else
5974 cost = target_reg_cost [speed] * available_regs
5975 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5976 + target_spill_cost [speed] * (regs_needed - n_cands);
5977
5978 /* Finally, add the number of candidates, so that we prefer eliminating
5979 induction variables if possible. */
5980 return cost + n_cands;
5981 }
5982
5983 /* For each size of the induction variable set determine the penalty. */
5984
5985 static void
determine_set_costs(struct ivopts_data * data)5986 determine_set_costs (struct ivopts_data *data)
5987 {
5988 unsigned j, n;
5989 gphi *phi;
5990 gphi_iterator psi;
5991 tree op;
5992 class loop *loop = data->current_loop;
5993 bitmap_iterator bi;
5994
5995 if (dump_file && (dump_flags & TDF_DETAILS))
5996 {
5997 fprintf (dump_file, "<Global Costs>:\n");
5998 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5999 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6000 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6001 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6002 }
6003
6004 n = 0;
6005 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6006 {
6007 phi = psi.phi ();
6008 op = PHI_RESULT (phi);
6009
6010 if (virtual_operand_p (op))
6011 continue;
6012
6013 if (get_iv (data, op))
6014 continue;
6015
6016 if (!POINTER_TYPE_P (TREE_TYPE (op))
6017 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6018 continue;
6019
6020 n++;
6021 }
6022
6023 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6024 {
6025 struct version_info *info = ver_info (data, j);
6026
6027 if (info->inv_id && info->has_nonlin_use)
6028 n++;
6029 }
6030
6031 data->regs_used = n;
6032 if (dump_file && (dump_flags & TDF_DETAILS))
6033 fprintf (dump_file, " regs_used %d\n", n);
6034
6035 if (dump_file && (dump_flags & TDF_DETAILS))
6036 {
6037 fprintf (dump_file, " cost for size:\n");
6038 fprintf (dump_file, " ivs\tcost\n");
6039 for (j = 0; j <= 2 * target_avail_regs; j++)
6040 fprintf (dump_file, " %d\t%d\n", j,
6041 ivopts_estimate_reg_pressure (data, 0, j));
6042 fprintf (dump_file, "\n");
6043 }
6044 }
6045
6046 /* Returns true if A is a cheaper cost pair than B. */
6047
6048 static bool
cheaper_cost_pair(class cost_pair * a,class cost_pair * b)6049 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6050 {
6051 if (!a)
6052 return false;
6053
6054 if (!b)
6055 return true;
6056
6057 if (a->cost < b->cost)
6058 return true;
6059
6060 if (b->cost < a->cost)
6061 return false;
6062
6063 /* In case the costs are the same, prefer the cheaper candidate. */
6064 if (a->cand->cost < b->cand->cost)
6065 return true;
6066
6067 return false;
6068 }
6069
6070 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6071 for more expensive, equal and cheaper respectively. */
6072
6073 static int
compare_cost_pair(class cost_pair * a,class cost_pair * b)6074 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6075 {
6076 if (cheaper_cost_pair (a, b))
6077 return -1;
6078 if (cheaper_cost_pair (b, a))
6079 return 1;
6080
6081 return 0;
6082 }
6083
6084 /* Returns candidate by that USE is expressed in IVS. */
6085
6086 static class cost_pair *
iv_ca_cand_for_group(class iv_ca * ivs,struct iv_group * group)6087 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6088 {
6089 return ivs->cand_for_group[group->id];
6090 }
6091
6092 /* Computes the cost field of IVS structure. */
6093
6094 static void
iv_ca_recount_cost(struct ivopts_data * data,class iv_ca * ivs)6095 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6096 {
6097 comp_cost cost = ivs->cand_use_cost;
6098
6099 cost += ivs->cand_cost;
6100 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6101 ivs->cost = cost;
6102 }
6103
6104 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6105 and IVS. */
6106
6107 static void
iv_ca_set_remove_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6108 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6109 {
6110 bitmap_iterator bi;
6111 unsigned iid;
6112
6113 if (!invs)
6114 return;
6115
6116 gcc_assert (n_inv_uses != NULL);
6117 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6118 {
6119 n_inv_uses[iid]--;
6120 if (n_inv_uses[iid] == 0)
6121 ivs->n_invs--;
6122 }
6123 }
6124
6125 /* Set USE not to be expressed by any candidate in IVS. */
6126
6127 static void
iv_ca_set_no_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6128 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6129 struct iv_group *group)
6130 {
6131 unsigned gid = group->id, cid;
6132 class cost_pair *cp;
6133
6134 cp = ivs->cand_for_group[gid];
6135 if (!cp)
6136 return;
6137 cid = cp->cand->id;
6138
6139 ivs->bad_groups++;
6140 ivs->cand_for_group[gid] = NULL;
6141 ivs->n_cand_uses[cid]--;
6142
6143 if (ivs->n_cand_uses[cid] == 0)
6144 {
6145 bitmap_clear_bit (ivs->cands, cid);
6146 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6147 ivs->n_cands--;
6148 ivs->cand_cost -= cp->cand->cost;
6149 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6150 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6151 }
6152
6153 ivs->cand_use_cost -= cp->cost;
6154 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6155 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6156 iv_ca_recount_cost (data, ivs);
6157 }
6158
6159 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6160 IVS. */
6161
6162 static void
iv_ca_set_add_invs(class iv_ca * ivs,bitmap invs,unsigned * n_inv_uses)6163 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6164 {
6165 bitmap_iterator bi;
6166 unsigned iid;
6167
6168 if (!invs)
6169 return;
6170
6171 gcc_assert (n_inv_uses != NULL);
6172 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6173 {
6174 n_inv_uses[iid]++;
6175 if (n_inv_uses[iid] == 1)
6176 ivs->n_invs++;
6177 }
6178 }
6179
6180 /* Set cost pair for GROUP in set IVS to CP. */
6181
6182 static void
iv_ca_set_cp(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * cp)6183 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6184 struct iv_group *group, class cost_pair *cp)
6185 {
6186 unsigned gid = group->id, cid;
6187
6188 if (ivs->cand_for_group[gid] == cp)
6189 return;
6190
6191 if (ivs->cand_for_group[gid])
6192 iv_ca_set_no_cp (data, ivs, group);
6193
6194 if (cp)
6195 {
6196 cid = cp->cand->id;
6197
6198 ivs->bad_groups--;
6199 ivs->cand_for_group[gid] = cp;
6200 ivs->n_cand_uses[cid]++;
6201 if (ivs->n_cand_uses[cid] == 1)
6202 {
6203 bitmap_set_bit (ivs->cands, cid);
6204 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6205 ivs->n_cands++;
6206 ivs->cand_cost += cp->cand->cost;
6207 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6208 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6209 }
6210
6211 ivs->cand_use_cost += cp->cost;
6212 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6213 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6214 iv_ca_recount_cost (data, ivs);
6215 }
6216 }
6217
6218 /* Extend set IVS by expressing USE by some of the candidates in it
6219 if possible. Consider all important candidates if candidates in
6220 set IVS don't give any result. */
6221
6222 static void
iv_ca_add_group(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group)6223 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6224 struct iv_group *group)
6225 {
6226 class cost_pair *best_cp = NULL, *cp;
6227 bitmap_iterator bi;
6228 unsigned i;
6229 struct iv_cand *cand;
6230
6231 gcc_assert (ivs->upto >= group->id);
6232 ivs->upto++;
6233 ivs->bad_groups++;
6234
6235 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6236 {
6237 cand = data->vcands[i];
6238 cp = get_group_iv_cost (data, group, cand);
6239 if (cheaper_cost_pair (cp, best_cp))
6240 best_cp = cp;
6241 }
6242
6243 if (best_cp == NULL)
6244 {
6245 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6246 {
6247 cand = data->vcands[i];
6248 cp = get_group_iv_cost (data, group, cand);
6249 if (cheaper_cost_pair (cp, best_cp))
6250 best_cp = cp;
6251 }
6252 }
6253
6254 iv_ca_set_cp (data, ivs, group, best_cp);
6255 }
6256
6257 /* Get cost for assignment IVS. */
6258
6259 static comp_cost
iv_ca_cost(class iv_ca * ivs)6260 iv_ca_cost (class iv_ca *ivs)
6261 {
6262 /* This was a conditional expression but it triggered a bug in
6263 Sun C 5.5. */
6264 if (ivs->bad_groups)
6265 return infinite_cost;
6266 else
6267 return ivs->cost;
6268 }
6269
6270 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6271 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6272 respectively. */
6273
6274 static int
iv_ca_compare_deps(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp)6275 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6276 struct iv_group *group, class cost_pair *old_cp,
6277 class cost_pair *new_cp)
6278 {
6279 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6280 unsigned old_n_invs = ivs->n_invs;
6281 iv_ca_set_cp (data, ivs, group, new_cp);
6282 unsigned new_n_invs = ivs->n_invs;
6283 iv_ca_set_cp (data, ivs, group, old_cp);
6284
6285 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6286 }
6287
6288 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6289 it before NEXT. */
6290
6291 static struct iv_ca_delta *
iv_ca_delta_add(struct iv_group * group,class cost_pair * old_cp,class cost_pair * new_cp,struct iv_ca_delta * next)6292 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6293 class cost_pair *new_cp, struct iv_ca_delta *next)
6294 {
6295 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6296
6297 change->group = group;
6298 change->old_cp = old_cp;
6299 change->new_cp = new_cp;
6300 change->next = next;
6301
6302 return change;
6303 }
6304
6305 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6306 are rewritten. */
6307
6308 static struct iv_ca_delta *
iv_ca_delta_join(struct iv_ca_delta * l1,struct iv_ca_delta * l2)6309 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6310 {
6311 struct iv_ca_delta *last;
6312
6313 if (!l2)
6314 return l1;
6315
6316 if (!l1)
6317 return l2;
6318
6319 for (last = l1; last->next; last = last->next)
6320 continue;
6321 last->next = l2;
6322
6323 return l1;
6324 }
6325
6326 /* Reverse the list of changes DELTA, forming the inverse to it. */
6327
6328 static struct iv_ca_delta *
iv_ca_delta_reverse(struct iv_ca_delta * delta)6329 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6330 {
6331 struct iv_ca_delta *act, *next, *prev = NULL;
6332
6333 for (act = delta; act; act = next)
6334 {
6335 next = act->next;
6336 act->next = prev;
6337 prev = act;
6338
6339 std::swap (act->old_cp, act->new_cp);
6340 }
6341
6342 return prev;
6343 }
6344
6345 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6346 reverted instead. */
6347
6348 static void
iv_ca_delta_commit(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta * delta,bool forward)6349 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6350 struct iv_ca_delta *delta, bool forward)
6351 {
6352 class cost_pair *from, *to;
6353 struct iv_ca_delta *act;
6354
6355 if (!forward)
6356 delta = iv_ca_delta_reverse (delta);
6357
6358 for (act = delta; act; act = act->next)
6359 {
6360 from = act->old_cp;
6361 to = act->new_cp;
6362 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6363 iv_ca_set_cp (data, ivs, act->group, to);
6364 }
6365
6366 if (!forward)
6367 iv_ca_delta_reverse (delta);
6368 }
6369
6370 /* Returns true if CAND is used in IVS. */
6371
6372 static bool
iv_ca_cand_used_p(class iv_ca * ivs,struct iv_cand * cand)6373 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6374 {
6375 return ivs->n_cand_uses[cand->id] > 0;
6376 }
6377
6378 /* Returns number of induction variable candidates in the set IVS. */
6379
6380 static unsigned
iv_ca_n_cands(class iv_ca * ivs)6381 iv_ca_n_cands (class iv_ca *ivs)
6382 {
6383 return ivs->n_cands;
6384 }
6385
6386 /* Free the list of changes DELTA. */
6387
6388 static void
iv_ca_delta_free(struct iv_ca_delta ** delta)6389 iv_ca_delta_free (struct iv_ca_delta **delta)
6390 {
6391 struct iv_ca_delta *act, *next;
6392
6393 for (act = *delta; act; act = next)
6394 {
6395 next = act->next;
6396 free (act);
6397 }
6398
6399 *delta = NULL;
6400 }
6401
6402 /* Allocates new iv candidates assignment. */
6403
6404 static class iv_ca *
iv_ca_new(struct ivopts_data * data)6405 iv_ca_new (struct ivopts_data *data)
6406 {
6407 class iv_ca *nw = XNEW (class iv_ca);
6408
6409 nw->upto = 0;
6410 nw->bad_groups = 0;
6411 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6412 data->vgroups.length ());
6413 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6414 nw->cands = BITMAP_ALLOC (NULL);
6415 nw->n_cands = 0;
6416 nw->n_invs = 0;
6417 nw->cand_use_cost = no_cost;
6418 nw->cand_cost = 0;
6419 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6420 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6421 nw->cost = no_cost;
6422
6423 return nw;
6424 }
6425
6426 /* Free memory occupied by the set IVS. */
6427
6428 static void
iv_ca_free(class iv_ca ** ivs)6429 iv_ca_free (class iv_ca **ivs)
6430 {
6431 free ((*ivs)->cand_for_group);
6432 free ((*ivs)->n_cand_uses);
6433 BITMAP_FREE ((*ivs)->cands);
6434 free ((*ivs)->n_inv_var_uses);
6435 free ((*ivs)->n_inv_expr_uses);
6436 free (*ivs);
6437 *ivs = NULL;
6438 }
6439
6440 /* Dumps IVS to FILE. */
6441
6442 static void
iv_ca_dump(struct ivopts_data * data,FILE * file,class iv_ca * ivs)6443 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6444 {
6445 unsigned i;
6446 comp_cost cost = iv_ca_cost (ivs);
6447
6448 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6449 cost.complexity);
6450 fprintf (file, " reg_cost: %d\n",
6451 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6452 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6453 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6454 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6455 bitmap_print (file, ivs->cands, " candidates: ","\n");
6456
6457 for (i = 0; i < ivs->upto; i++)
6458 {
6459 struct iv_group *group = data->vgroups[i];
6460 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6461 if (cp)
6462 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6463 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6464 cp->cost.cost, cp->cost.complexity);
6465 else
6466 fprintf (file, " group:%d --> ??\n", group->id);
6467 }
6468
6469 const char *pref = "";
6470 fprintf (file, " invariant variables: ");
6471 for (i = 1; i <= data->max_inv_var_id; i++)
6472 if (ivs->n_inv_var_uses[i])
6473 {
6474 fprintf (file, "%s%d", pref, i);
6475 pref = ", ";
6476 }
6477
6478 pref = "";
6479 fprintf (file, "\n invariant expressions: ");
6480 for (i = 1; i <= data->max_inv_expr_id; i++)
6481 if (ivs->n_inv_expr_uses[i])
6482 {
6483 fprintf (file, "%s%d", pref, i);
6484 pref = ", ";
6485 }
6486
6487 fprintf (file, "\n\n");
6488 }
6489
6490 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6491 new set, and store differences in DELTA. Number of induction variables
6492 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6493 the function will try to find a solution with mimimal iv candidates. */
6494
6495 static comp_cost
iv_ca_extend(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_ca_delta ** delta,unsigned * n_ivs,bool min_ncand)6496 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6497 struct iv_cand *cand, struct iv_ca_delta **delta,
6498 unsigned *n_ivs, bool min_ncand)
6499 {
6500 unsigned i;
6501 comp_cost cost;
6502 struct iv_group *group;
6503 class cost_pair *old_cp, *new_cp;
6504
6505 *delta = NULL;
6506 for (i = 0; i < ivs->upto; i++)
6507 {
6508 group = data->vgroups[i];
6509 old_cp = iv_ca_cand_for_group (ivs, group);
6510
6511 if (old_cp
6512 && old_cp->cand == cand)
6513 continue;
6514
6515 new_cp = get_group_iv_cost (data, group, cand);
6516 if (!new_cp)
6517 continue;
6518
6519 if (!min_ncand)
6520 {
6521 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6522 /* Skip if new_cp depends on more invariants. */
6523 if (cmp_invs > 0)
6524 continue;
6525
6526 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6527 /* Skip if new_cp is not cheaper. */
6528 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6529 continue;
6530 }
6531
6532 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6533 }
6534
6535 iv_ca_delta_commit (data, ivs, *delta, true);
6536 cost = iv_ca_cost (ivs);
6537 if (n_ivs)
6538 *n_ivs = iv_ca_n_cands (ivs);
6539 iv_ca_delta_commit (data, ivs, *delta, false);
6540
6541 return cost;
6542 }
6543
6544 /* Try narrowing set IVS by removing CAND. Return the cost of
6545 the new set and store the differences in DELTA. START is
6546 the candidate with which we start narrowing. */
6547
6548 static comp_cost
iv_ca_narrow(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * cand,struct iv_cand * start,struct iv_ca_delta ** delta)6549 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6550 struct iv_cand *cand, struct iv_cand *start,
6551 struct iv_ca_delta **delta)
6552 {
6553 unsigned i, ci;
6554 struct iv_group *group;
6555 class cost_pair *old_cp, *new_cp, *cp;
6556 bitmap_iterator bi;
6557 struct iv_cand *cnd;
6558 comp_cost cost, best_cost, acost;
6559
6560 *delta = NULL;
6561 for (i = 0; i < data->vgroups.length (); i++)
6562 {
6563 group = data->vgroups[i];
6564
6565 old_cp = iv_ca_cand_for_group (ivs, group);
6566 if (old_cp->cand != cand)
6567 continue;
6568
6569 best_cost = iv_ca_cost (ivs);
6570 /* Start narrowing with START. */
6571 new_cp = get_group_iv_cost (data, group, start);
6572
6573 if (data->consider_all_candidates)
6574 {
6575 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6576 {
6577 if (ci == cand->id || (start && ci == start->id))
6578 continue;
6579
6580 cnd = data->vcands[ci];
6581
6582 cp = get_group_iv_cost (data, group, cnd);
6583 if (!cp)
6584 continue;
6585
6586 iv_ca_set_cp (data, ivs, group, cp);
6587 acost = iv_ca_cost (ivs);
6588
6589 if (acost < best_cost)
6590 {
6591 best_cost = acost;
6592 new_cp = cp;
6593 }
6594 }
6595 }
6596 else
6597 {
6598 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6599 {
6600 if (ci == cand->id || (start && ci == start->id))
6601 continue;
6602
6603 cnd = data->vcands[ci];
6604
6605 cp = get_group_iv_cost (data, group, cnd);
6606 if (!cp)
6607 continue;
6608
6609 iv_ca_set_cp (data, ivs, group, cp);
6610 acost = iv_ca_cost (ivs);
6611
6612 if (acost < best_cost)
6613 {
6614 best_cost = acost;
6615 new_cp = cp;
6616 }
6617 }
6618 }
6619 /* Restore to old cp for use. */
6620 iv_ca_set_cp (data, ivs, group, old_cp);
6621
6622 if (!new_cp)
6623 {
6624 iv_ca_delta_free (delta);
6625 return infinite_cost;
6626 }
6627
6628 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6629 }
6630
6631 iv_ca_delta_commit (data, ivs, *delta, true);
6632 cost = iv_ca_cost (ivs);
6633 iv_ca_delta_commit (data, ivs, *delta, false);
6634
6635 return cost;
6636 }
6637
6638 /* Try optimizing the set of candidates IVS by removing candidates different
6639 from to EXCEPT_CAND from it. Return cost of the new set, and store
6640 differences in DELTA. */
6641
6642 static comp_cost
iv_ca_prune(struct ivopts_data * data,class iv_ca * ivs,struct iv_cand * except_cand,struct iv_ca_delta ** delta)6643 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6644 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6645 {
6646 bitmap_iterator bi;
6647 struct iv_ca_delta *act_delta, *best_delta;
6648 unsigned i;
6649 comp_cost best_cost, acost;
6650 struct iv_cand *cand;
6651
6652 best_delta = NULL;
6653 best_cost = iv_ca_cost (ivs);
6654
6655 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6656 {
6657 cand = data->vcands[i];
6658
6659 if (cand == except_cand)
6660 continue;
6661
6662 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6663
6664 if (acost < best_cost)
6665 {
6666 best_cost = acost;
6667 iv_ca_delta_free (&best_delta);
6668 best_delta = act_delta;
6669 }
6670 else
6671 iv_ca_delta_free (&act_delta);
6672 }
6673
6674 if (!best_delta)
6675 {
6676 *delta = NULL;
6677 return best_cost;
6678 }
6679
6680 /* Recurse to possibly remove other unnecessary ivs. */
6681 iv_ca_delta_commit (data, ivs, best_delta, true);
6682 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6683 iv_ca_delta_commit (data, ivs, best_delta, false);
6684 *delta = iv_ca_delta_join (best_delta, *delta);
6685 return best_cost;
6686 }
6687
6688 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6689 cheaper local cost for GROUP than BEST_CP. Return pointer to
6690 the corresponding cost_pair, otherwise just return BEST_CP. */
6691
6692 static class cost_pair*
cheaper_cost_with_cand(struct ivopts_data * data,struct iv_group * group,unsigned int cand_idx,struct iv_cand * old_cand,class cost_pair * best_cp)6693 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6694 unsigned int cand_idx, struct iv_cand *old_cand,
6695 class cost_pair *best_cp)
6696 {
6697 struct iv_cand *cand;
6698 class cost_pair *cp;
6699
6700 gcc_assert (old_cand != NULL && best_cp != NULL);
6701 if (cand_idx == old_cand->id)
6702 return best_cp;
6703
6704 cand = data->vcands[cand_idx];
6705 cp = get_group_iv_cost (data, group, cand);
6706 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6707 return cp;
6708
6709 return best_cp;
6710 }
6711
6712 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6713 which are used by more than one iv uses. For each of those candidates,
6714 this function tries to represent iv uses under that candidate using
6715 other ones with lower local cost, then tries to prune the new set.
6716 If the new set has lower cost, It returns the new cost after recording
6717 candidate replacement in list DELTA. */
6718
6719 static comp_cost
iv_ca_replace(struct ivopts_data * data,class iv_ca * ivs,struct iv_ca_delta ** delta)6720 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6721 struct iv_ca_delta **delta)
6722 {
6723 bitmap_iterator bi, bj;
6724 unsigned int i, j, k;
6725 struct iv_cand *cand;
6726 comp_cost orig_cost, acost;
6727 struct iv_ca_delta *act_delta, *tmp_delta;
6728 class cost_pair *old_cp, *best_cp = NULL;
6729
6730 *delta = NULL;
6731 orig_cost = iv_ca_cost (ivs);
6732
6733 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6734 {
6735 if (ivs->n_cand_uses[i] == 1
6736 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6737 continue;
6738
6739 cand = data->vcands[i];
6740
6741 act_delta = NULL;
6742 /* Represent uses under current candidate using other ones with
6743 lower local cost. */
6744 for (j = 0; j < ivs->upto; j++)
6745 {
6746 struct iv_group *group = data->vgroups[j];
6747 old_cp = iv_ca_cand_for_group (ivs, group);
6748
6749 if (old_cp->cand != cand)
6750 continue;
6751
6752 best_cp = old_cp;
6753 if (data->consider_all_candidates)
6754 for (k = 0; k < data->vcands.length (); k++)
6755 best_cp = cheaper_cost_with_cand (data, group, k,
6756 old_cp->cand, best_cp);
6757 else
6758 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6759 best_cp = cheaper_cost_with_cand (data, group, k,
6760 old_cp->cand, best_cp);
6761
6762 if (best_cp == old_cp)
6763 continue;
6764
6765 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6766 }
6767 /* No need for further prune. */
6768 if (!act_delta)
6769 continue;
6770
6771 /* Prune the new candidate set. */
6772 iv_ca_delta_commit (data, ivs, act_delta, true);
6773 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6774 iv_ca_delta_commit (data, ivs, act_delta, false);
6775 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6776
6777 if (acost < orig_cost)
6778 {
6779 *delta = act_delta;
6780 return acost;
6781 }
6782 else
6783 iv_ca_delta_free (&act_delta);
6784 }
6785
6786 return orig_cost;
6787 }
6788
6789 /* Tries to extend the sets IVS in the best possible way in order to
6790 express the GROUP. If ORIGINALP is true, prefer candidates from
6791 the original set of IVs, otherwise favor important candidates not
6792 based on any memory object. */
6793
6794 static bool
try_add_cand_for(struct ivopts_data * data,class iv_ca * ivs,struct iv_group * group,bool originalp)6795 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6796 struct iv_group *group, bool originalp)
6797 {
6798 comp_cost best_cost, act_cost;
6799 unsigned i;
6800 bitmap_iterator bi;
6801 struct iv_cand *cand;
6802 struct iv_ca_delta *best_delta = NULL, *act_delta;
6803 class cost_pair *cp;
6804
6805 iv_ca_add_group (data, ivs, group);
6806 best_cost = iv_ca_cost (ivs);
6807 cp = iv_ca_cand_for_group (ivs, group);
6808 if (cp)
6809 {
6810 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6811 iv_ca_set_no_cp (data, ivs, group);
6812 }
6813
6814 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6815 first try important candidates not based on any memory object. Only if
6816 this fails, try the specific ones. Rationale -- in loops with many
6817 variables the best choice often is to use just one generic biv. If we
6818 added here many ivs specific to the uses, the optimization algorithm later
6819 would be likely to get stuck in a local minimum, thus causing us to create
6820 too many ivs. The approach from few ivs to more seems more likely to be
6821 successful -- starting from few ivs, replacing an expensive use by a
6822 specific iv should always be a win. */
6823 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6824 {
6825 cand = data->vcands[i];
6826
6827 if (originalp && cand->pos !=IP_ORIGINAL)
6828 continue;
6829
6830 if (!originalp && cand->iv->base_object != NULL_TREE)
6831 continue;
6832
6833 if (iv_ca_cand_used_p (ivs, cand))
6834 continue;
6835
6836 cp = get_group_iv_cost (data, group, cand);
6837 if (!cp)
6838 continue;
6839
6840 iv_ca_set_cp (data, ivs, group, cp);
6841 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6842 true);
6843 iv_ca_set_no_cp (data, ivs, group);
6844 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6845
6846 if (act_cost < best_cost)
6847 {
6848 best_cost = act_cost;
6849
6850 iv_ca_delta_free (&best_delta);
6851 best_delta = act_delta;
6852 }
6853 else
6854 iv_ca_delta_free (&act_delta);
6855 }
6856
6857 if (best_cost.infinite_cost_p ())
6858 {
6859 for (i = 0; i < group->n_map_members; i++)
6860 {
6861 cp = group->cost_map + i;
6862 cand = cp->cand;
6863 if (!cand)
6864 continue;
6865
6866 /* Already tried this. */
6867 if (cand->important)
6868 {
6869 if (originalp && cand->pos == IP_ORIGINAL)
6870 continue;
6871 if (!originalp && cand->iv->base_object == NULL_TREE)
6872 continue;
6873 }
6874
6875 if (iv_ca_cand_used_p (ivs, cand))
6876 continue;
6877
6878 act_delta = NULL;
6879 iv_ca_set_cp (data, ivs, group, cp);
6880 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6881 iv_ca_set_no_cp (data, ivs, group);
6882 act_delta = iv_ca_delta_add (group,
6883 iv_ca_cand_for_group (ivs, group),
6884 cp, act_delta);
6885
6886 if (act_cost < best_cost)
6887 {
6888 best_cost = act_cost;
6889
6890 if (best_delta)
6891 iv_ca_delta_free (&best_delta);
6892 best_delta = act_delta;
6893 }
6894 else
6895 iv_ca_delta_free (&act_delta);
6896 }
6897 }
6898
6899 iv_ca_delta_commit (data, ivs, best_delta, true);
6900 iv_ca_delta_free (&best_delta);
6901
6902 return !best_cost.infinite_cost_p ();
6903 }
6904
6905 /* Finds an initial assignment of candidates to uses. */
6906
6907 static class iv_ca *
get_initial_solution(struct ivopts_data * data,bool originalp)6908 get_initial_solution (struct ivopts_data *data, bool originalp)
6909 {
6910 unsigned i;
6911 class iv_ca *ivs = iv_ca_new (data);
6912
6913 for (i = 0; i < data->vgroups.length (); i++)
6914 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6915 {
6916 iv_ca_free (&ivs);
6917 return NULL;
6918 }
6919
6920 return ivs;
6921 }
6922
6923 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6924 points to a bool variable, this function tries to break local
6925 optimal fixed-point by replacing candidates in IVS if it's true. */
6926
6927 static bool
try_improve_iv_set(struct ivopts_data * data,class iv_ca * ivs,bool * try_replace_p)6928 try_improve_iv_set (struct ivopts_data *data,
6929 class iv_ca *ivs, bool *try_replace_p)
6930 {
6931 unsigned i, n_ivs;
6932 comp_cost acost, best_cost = iv_ca_cost (ivs);
6933 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6934 struct iv_cand *cand;
6935
6936 /* Try extending the set of induction variables by one. */
6937 for (i = 0; i < data->vcands.length (); i++)
6938 {
6939 cand = data->vcands[i];
6940
6941 if (iv_ca_cand_used_p (ivs, cand))
6942 continue;
6943
6944 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6945 if (!act_delta)
6946 continue;
6947
6948 /* If we successfully added the candidate and the set is small enough,
6949 try optimizing it by removing other candidates. */
6950 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6951 {
6952 iv_ca_delta_commit (data, ivs, act_delta, true);
6953 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6954 iv_ca_delta_commit (data, ivs, act_delta, false);
6955 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6956 }
6957
6958 if (acost < best_cost)
6959 {
6960 best_cost = acost;
6961 iv_ca_delta_free (&best_delta);
6962 best_delta = act_delta;
6963 }
6964 else
6965 iv_ca_delta_free (&act_delta);
6966 }
6967
6968 if (!best_delta)
6969 {
6970 /* Try removing the candidates from the set instead. */
6971 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6972
6973 if (!best_delta && *try_replace_p)
6974 {
6975 *try_replace_p = false;
6976 /* So far candidate selecting algorithm tends to choose fewer IVs
6977 so that it can handle cases in which loops have many variables
6978 but the best choice is often to use only one general biv. One
6979 weakness is it can't handle opposite cases, in which different
6980 candidates should be chosen with respect to each use. To solve
6981 the problem, we replace candidates in a manner described by the
6982 comments of iv_ca_replace, thus give general algorithm a chance
6983 to break local optimal fixed-point in these cases. */
6984 best_cost = iv_ca_replace (data, ivs, &best_delta);
6985 }
6986
6987 if (!best_delta)
6988 return false;
6989 }
6990
6991 iv_ca_delta_commit (data, ivs, best_delta, true);
6992 iv_ca_delta_free (&best_delta);
6993 return best_cost == iv_ca_cost (ivs);
6994 }
6995
6996 /* Attempts to find the optimal set of induction variables. We do simple
6997 greedy heuristic -- we try to replace at most one candidate in the selected
6998 solution and remove the unused ivs while this improves the cost. */
6999
7000 static class iv_ca *
find_optimal_iv_set_1(struct ivopts_data * data,bool originalp)7001 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7002 {
7003 class iv_ca *set;
7004 bool try_replace_p = true;
7005
7006 /* Get the initial solution. */
7007 set = get_initial_solution (data, originalp);
7008 if (!set)
7009 {
7010 if (dump_file && (dump_flags & TDF_DETAILS))
7011 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7012 return NULL;
7013 }
7014
7015 if (dump_file && (dump_flags & TDF_DETAILS))
7016 {
7017 fprintf (dump_file, "Initial set of candidates:\n");
7018 iv_ca_dump (data, dump_file, set);
7019 }
7020
7021 while (try_improve_iv_set (data, set, &try_replace_p))
7022 {
7023 if (dump_file && (dump_flags & TDF_DETAILS))
7024 {
7025 fprintf (dump_file, "Improved to:\n");
7026 iv_ca_dump (data, dump_file, set);
7027 }
7028 }
7029
7030 /* If the set has infinite_cost, it can't be optimal. */
7031 if (iv_ca_cost (set).infinite_cost_p ())
7032 {
7033 if (dump_file && (dump_flags & TDF_DETAILS))
7034 fprintf (dump_file,
7035 "Overflow to infinite cost in try_improve_iv_set.\n");
7036 iv_ca_free (&set);
7037 }
7038 return set;
7039 }
7040
7041 static class iv_ca *
find_optimal_iv_set(struct ivopts_data * data)7042 find_optimal_iv_set (struct ivopts_data *data)
7043 {
7044 unsigned i;
7045 comp_cost cost, origcost;
7046 class iv_ca *set, *origset;
7047
7048 /* Determine the cost based on a strategy that starts with original IVs,
7049 and try again using a strategy that prefers candidates not based
7050 on any IVs. */
7051 origset = find_optimal_iv_set_1 (data, true);
7052 set = find_optimal_iv_set_1 (data, false);
7053
7054 if (!origset && !set)
7055 return NULL;
7056
7057 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7058 cost = set ? iv_ca_cost (set) : infinite_cost;
7059
7060 if (dump_file && (dump_flags & TDF_DETAILS))
7061 {
7062 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7063 origcost.cost, origcost.complexity);
7064 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7065 cost.cost, cost.complexity);
7066 }
7067
7068 /* Choose the one with the best cost. */
7069 if (origcost <= cost)
7070 {
7071 if (set)
7072 iv_ca_free (&set);
7073 set = origset;
7074 }
7075 else if (origset)
7076 iv_ca_free (&origset);
7077
7078 for (i = 0; i < data->vgroups.length (); i++)
7079 {
7080 struct iv_group *group = data->vgroups[i];
7081 group->selected = iv_ca_cand_for_group (set, group)->cand;
7082 }
7083
7084 return set;
7085 }
7086
7087 /* Creates a new induction variable corresponding to CAND. */
7088
7089 static void
create_new_iv(struct ivopts_data * data,struct iv_cand * cand)7090 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7091 {
7092 gimple_stmt_iterator incr_pos;
7093 tree base;
7094 struct iv_use *use;
7095 struct iv_group *group;
7096 bool after = false;
7097
7098 gcc_assert (cand->iv != NULL);
7099
7100 switch (cand->pos)
7101 {
7102 case IP_NORMAL:
7103 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7104 break;
7105
7106 case IP_END:
7107 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7108 after = true;
7109 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7110 {
7111 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7112 incr_pos = gsi_after_labels (split_edge (e));
7113 after = false;
7114 }
7115 break;
7116
7117 case IP_AFTER_USE:
7118 after = true;
7119 /* fall through */
7120 case IP_BEFORE_USE:
7121 incr_pos = gsi_for_stmt (cand->incremented_at);
7122 break;
7123
7124 case IP_ORIGINAL:
7125 /* Mark that the iv is preserved. */
7126 name_info (data, cand->var_before)->preserve_biv = true;
7127 name_info (data, cand->var_after)->preserve_biv = true;
7128
7129 /* Rewrite the increment so that it uses var_before directly. */
7130 use = find_interesting_uses_op (data, cand->var_after);
7131 group = data->vgroups[use->group_id];
7132 group->selected = cand;
7133 return;
7134 }
7135
7136 gimple_add_tmp_var (cand->var_before);
7137
7138 base = unshare_expr (cand->iv->base);
7139
7140 create_iv (base, unshare_expr (cand->iv->step),
7141 cand->var_before, data->current_loop,
7142 &incr_pos, after, &cand->var_before, &cand->var_after);
7143 }
7144
7145 /* Creates new induction variables described in SET. */
7146
7147 static void
create_new_ivs(struct ivopts_data * data,class iv_ca * set)7148 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7149 {
7150 unsigned i;
7151 struct iv_cand *cand;
7152 bitmap_iterator bi;
7153
7154 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7155 {
7156 cand = data->vcands[i];
7157 create_new_iv (data, cand);
7158 }
7159
7160 if (dump_file && (dump_flags & TDF_DETAILS))
7161 {
7162 fprintf (dump_file, "Selected IV set for loop %d",
7163 data->current_loop->num);
7164 if (data->loop_loc != UNKNOWN_LOCATION)
7165 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7166 LOCATION_LINE (data->loop_loc));
7167 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7168 avg_loop_niter (data->current_loop));
7169 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7170 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7171 {
7172 cand = data->vcands[i];
7173 dump_cand (dump_file, cand);
7174 }
7175 fprintf (dump_file, "\n");
7176 }
7177 }
7178
7179 /* Rewrites USE (definition of iv used in a nonlinear expression)
7180 using candidate CAND. */
7181
7182 static void
rewrite_use_nonlinear_expr(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7183 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7184 struct iv_use *use, struct iv_cand *cand)
7185 {
7186 gassign *ass;
7187 gimple_stmt_iterator bsi;
7188 tree comp, type = get_use_type (use), tgt;
7189
7190 /* An important special case -- if we are asked to express value of
7191 the original iv by itself, just exit; there is no need to
7192 introduce a new computation (that might also need casting the
7193 variable to unsigned and back). */
7194 if (cand->pos == IP_ORIGINAL
7195 && cand->incremented_at == use->stmt)
7196 {
7197 tree op = NULL_TREE;
7198 enum tree_code stmt_code;
7199
7200 gcc_assert (is_gimple_assign (use->stmt));
7201 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7202
7203 /* Check whether we may leave the computation unchanged.
7204 This is the case only if it does not rely on other
7205 computations in the loop -- otherwise, the computation
7206 we rely upon may be removed in remove_unused_ivs,
7207 thus leading to ICE. */
7208 stmt_code = gimple_assign_rhs_code (use->stmt);
7209 if (stmt_code == PLUS_EXPR
7210 || stmt_code == MINUS_EXPR
7211 || stmt_code == POINTER_PLUS_EXPR)
7212 {
7213 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7214 op = gimple_assign_rhs2 (use->stmt);
7215 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7216 op = gimple_assign_rhs1 (use->stmt);
7217 }
7218
7219 if (op != NULL_TREE)
7220 {
7221 if (expr_invariant_in_loop_p (data->current_loop, op))
7222 return;
7223 if (TREE_CODE (op) == SSA_NAME)
7224 {
7225 struct iv *iv = get_iv (data, op);
7226 if (iv != NULL && integer_zerop (iv->step))
7227 return;
7228 }
7229 }
7230 }
7231
7232 switch (gimple_code (use->stmt))
7233 {
7234 case GIMPLE_PHI:
7235 tgt = PHI_RESULT (use->stmt);
7236
7237 /* If we should keep the biv, do not replace it. */
7238 if (name_info (data, tgt)->preserve_biv)
7239 return;
7240
7241 bsi = gsi_after_labels (gimple_bb (use->stmt));
7242 break;
7243
7244 case GIMPLE_ASSIGN:
7245 tgt = gimple_assign_lhs (use->stmt);
7246 bsi = gsi_for_stmt (use->stmt);
7247 break;
7248
7249 default:
7250 gcc_unreachable ();
7251 }
7252
7253 aff_tree aff_inv, aff_var;
7254 if (!get_computation_aff_1 (data->current_loop, use->stmt,
7255 use, cand, &aff_inv, &aff_var))
7256 gcc_unreachable ();
7257
7258 unshare_aff_combination (&aff_inv);
7259 unshare_aff_combination (&aff_var);
7260 /* Prefer CSE opportunity than loop invariant by adding offset at last
7261 so that iv_uses have different offsets can be CSEed. */
7262 poly_widest_int offset = aff_inv.offset;
7263 aff_inv.offset = 0;
7264
7265 gimple_seq stmt_list = NULL, seq = NULL;
7266 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7267 tree comp_op2 = aff_combination_to_tree (&aff_var);
7268 gcc_assert (comp_op1 && comp_op2);
7269
7270 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7271 gimple_seq_add_seq (&stmt_list, seq);
7272 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7273 gimple_seq_add_seq (&stmt_list, seq);
7274
7275 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7276 std::swap (comp_op1, comp_op2);
7277
7278 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7279 {
7280 comp = fold_build_pointer_plus (comp_op1,
7281 fold_convert (sizetype, comp_op2));
7282 comp = fold_build_pointer_plus (comp,
7283 wide_int_to_tree (sizetype, offset));
7284 }
7285 else
7286 {
7287 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7288 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7289 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7290 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7291 }
7292
7293 comp = fold_convert (type, comp);
7294 if (!valid_gimple_rhs_p (comp)
7295 || (gimple_code (use->stmt) != GIMPLE_PHI
7296 /* We can't allow re-allocating the stmt as it might be pointed
7297 to still. */
7298 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7299 >= gimple_num_ops (gsi_stmt (bsi)))))
7300 {
7301 comp = force_gimple_operand (comp, &seq, true, NULL);
7302 gimple_seq_add_seq (&stmt_list, seq);
7303 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7304 {
7305 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7306 /* As this isn't a plain copy we have to reset alignment
7307 information. */
7308 if (SSA_NAME_PTR_INFO (comp))
7309 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7310 }
7311 }
7312
7313 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7314 if (gimple_code (use->stmt) == GIMPLE_PHI)
7315 {
7316 ass = gimple_build_assign (tgt, comp);
7317 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7318
7319 bsi = gsi_for_stmt (use->stmt);
7320 remove_phi_node (&bsi, false);
7321 }
7322 else
7323 {
7324 gimple_assign_set_rhs_from_tree (&bsi, comp);
7325 use->stmt = gsi_stmt (bsi);
7326 }
7327 }
7328
7329 /* Performs a peephole optimization to reorder the iv update statement with
7330 a mem ref to enable instruction combining in later phases. The mem ref uses
7331 the iv value before the update, so the reordering transformation requires
7332 adjustment of the offset. CAND is the selected IV_CAND.
7333
7334 Example:
7335
7336 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7337 iv2 = iv1 + 1;
7338
7339 if (t < val) (1)
7340 goto L;
7341 goto Head;
7342
7343
7344 directly propagating t over to (1) will introduce overlapping live range
7345 thus increase register pressure. This peephole transform it into:
7346
7347
7348 iv2 = iv1 + 1;
7349 t = MEM_REF (base, iv2, 8, 8);
7350 if (t < val)
7351 goto L;
7352 goto Head;
7353 */
7354
7355 static void
adjust_iv_update_pos(struct iv_cand * cand,struct iv_use * use)7356 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7357 {
7358 tree var_after;
7359 gimple *iv_update, *stmt;
7360 basic_block bb;
7361 gimple_stmt_iterator gsi, gsi_iv;
7362
7363 if (cand->pos != IP_NORMAL)
7364 return;
7365
7366 var_after = cand->var_after;
7367 iv_update = SSA_NAME_DEF_STMT (var_after);
7368
7369 bb = gimple_bb (iv_update);
7370 gsi = gsi_last_nondebug_bb (bb);
7371 stmt = gsi_stmt (gsi);
7372
7373 /* Only handle conditional statement for now. */
7374 if (gimple_code (stmt) != GIMPLE_COND)
7375 return;
7376
7377 gsi_prev_nondebug (&gsi);
7378 stmt = gsi_stmt (gsi);
7379 if (stmt != iv_update)
7380 return;
7381
7382 gsi_prev_nondebug (&gsi);
7383 if (gsi_end_p (gsi))
7384 return;
7385
7386 stmt = gsi_stmt (gsi);
7387 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7388 return;
7389
7390 if (stmt != use->stmt)
7391 return;
7392
7393 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7394 return;
7395
7396 if (dump_file && (dump_flags & TDF_DETAILS))
7397 {
7398 fprintf (dump_file, "Reordering \n");
7399 print_gimple_stmt (dump_file, iv_update, 0);
7400 print_gimple_stmt (dump_file, use->stmt, 0);
7401 fprintf (dump_file, "\n");
7402 }
7403
7404 gsi = gsi_for_stmt (use->stmt);
7405 gsi_iv = gsi_for_stmt (iv_update);
7406 gsi_move_before (&gsi_iv, &gsi);
7407
7408 cand->pos = IP_BEFORE_USE;
7409 cand->incremented_at = use->stmt;
7410 }
7411
7412 /* Return the alias pointer type that should be used for a MEM_REF
7413 associated with USE, which has type USE_PTR_ADDRESS. */
7414
7415 static tree
get_alias_ptr_type_for_ptr_address(iv_use * use)7416 get_alias_ptr_type_for_ptr_address (iv_use *use)
7417 {
7418 gcall *call = as_a <gcall *> (use->stmt);
7419 switch (gimple_call_internal_fn (call))
7420 {
7421 case IFN_MASK_LOAD:
7422 case IFN_MASK_STORE:
7423 case IFN_MASK_LOAD_LANES:
7424 case IFN_MASK_STORE_LANES:
7425 /* The second argument contains the correct alias type. */
7426 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7427 return TREE_TYPE (gimple_call_arg (call, 1));
7428
7429 default:
7430 gcc_unreachable ();
7431 }
7432 }
7433
7434
7435 /* Rewrites USE (address that is an iv) using candidate CAND. */
7436
7437 static void
rewrite_use_address(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7438 rewrite_use_address (struct ivopts_data *data,
7439 struct iv_use *use, struct iv_cand *cand)
7440 {
7441 aff_tree aff;
7442 bool ok;
7443
7444 adjust_iv_update_pos (cand, use);
7445 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7446 gcc_assert (ok);
7447 unshare_aff_combination (&aff);
7448
7449 /* To avoid undefined overflow problems, all IV candidates use unsigned
7450 integer types. The drawback is that this makes it impossible for
7451 create_mem_ref to distinguish an IV that is based on a memory object
7452 from one that represents simply an offset.
7453
7454 To work around this problem, we pass a hint to create_mem_ref that
7455 indicates which variable (if any) in aff is an IV based on a memory
7456 object. Note that we only consider the candidate. If this is not
7457 based on an object, the base of the reference is in some subexpression
7458 of the use -- but these will use pointer types, so they are recognized
7459 by the create_mem_ref heuristics anyway. */
7460 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7461 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7462 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7463 tree type = use->mem_type;
7464 tree alias_ptr_type;
7465 if (use->type == USE_PTR_ADDRESS)
7466 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7467 else
7468 {
7469 gcc_assert (type == TREE_TYPE (*use->op_p));
7470 unsigned int align = get_object_alignment (*use->op_p);
7471 if (align != TYPE_ALIGN (type))
7472 type = build_aligned_type (type, align);
7473 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7474 }
7475 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7476 iv, base_hint, data->speed);
7477
7478 if (use->type == USE_PTR_ADDRESS)
7479 {
7480 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7481 ref = fold_convert (get_use_type (use), ref);
7482 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7483 true, GSI_SAME_STMT);
7484 }
7485 else
7486 copy_ref_info (ref, *use->op_p);
7487
7488 *use->op_p = ref;
7489 }
7490
7491 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7492 candidate CAND. */
7493
7494 static void
rewrite_use_compare(struct ivopts_data * data,struct iv_use * use,struct iv_cand * cand)7495 rewrite_use_compare (struct ivopts_data *data,
7496 struct iv_use *use, struct iv_cand *cand)
7497 {
7498 tree comp, op, bound;
7499 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7500 enum tree_code compare;
7501 struct iv_group *group = data->vgroups[use->group_id];
7502 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7503
7504 bound = cp->value;
7505 if (bound)
7506 {
7507 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7508 tree var_type = TREE_TYPE (var);
7509 gimple_seq stmts;
7510
7511 if (dump_file && (dump_flags & TDF_DETAILS))
7512 {
7513 fprintf (dump_file, "Replacing exit test: ");
7514 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7515 }
7516 compare = cp->comp;
7517 bound = unshare_expr (fold_convert (var_type, bound));
7518 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7519 if (stmts)
7520 gsi_insert_seq_on_edge_immediate (
7521 loop_preheader_edge (data->current_loop),
7522 stmts);
7523
7524 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7525 gimple_cond_set_lhs (cond_stmt, var);
7526 gimple_cond_set_code (cond_stmt, compare);
7527 gimple_cond_set_rhs (cond_stmt, op);
7528 return;
7529 }
7530
7531 /* The induction variable elimination failed; just express the original
7532 giv. */
7533 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7534 gcc_assert (comp != NULL_TREE);
7535 gcc_assert (use->op_p != NULL);
7536 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7537 SSA_NAME_VAR (*use->op_p),
7538 true, GSI_SAME_STMT);
7539 }
7540
7541 /* Rewrite the groups using the selected induction variables. */
7542
7543 static void
rewrite_groups(struct ivopts_data * data)7544 rewrite_groups (struct ivopts_data *data)
7545 {
7546 unsigned i, j;
7547
7548 for (i = 0; i < data->vgroups.length (); i++)
7549 {
7550 struct iv_group *group = data->vgroups[i];
7551 struct iv_cand *cand = group->selected;
7552
7553 gcc_assert (cand);
7554
7555 if (group->type == USE_NONLINEAR_EXPR)
7556 {
7557 for (j = 0; j < group->vuses.length (); j++)
7558 {
7559 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7560 update_stmt (group->vuses[j]->stmt);
7561 }
7562 }
7563 else if (address_p (group->type))
7564 {
7565 for (j = 0; j < group->vuses.length (); j++)
7566 {
7567 rewrite_use_address (data, group->vuses[j], cand);
7568 update_stmt (group->vuses[j]->stmt);
7569 }
7570 }
7571 else
7572 {
7573 gcc_assert (group->type == USE_COMPARE);
7574
7575 for (j = 0; j < group->vuses.length (); j++)
7576 {
7577 rewrite_use_compare (data, group->vuses[j], cand);
7578 update_stmt (group->vuses[j]->stmt);
7579 }
7580 }
7581 }
7582 }
7583
7584 /* Removes the ivs that are not used after rewriting. */
7585
7586 static void
remove_unused_ivs(struct ivopts_data * data,bitmap toremove)7587 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7588 {
7589 unsigned j;
7590 bitmap_iterator bi;
7591
7592 /* Figure out an order in which to release SSA DEFs so that we don't
7593 release something that we'd have to propagate into a debug stmt
7594 afterwards. */
7595 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7596 {
7597 struct version_info *info;
7598
7599 info = ver_info (data, j);
7600 if (info->iv
7601 && !integer_zerop (info->iv->step)
7602 && !info->inv_id
7603 && !info->iv->nonlin_use
7604 && !info->preserve_biv)
7605 {
7606 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7607
7608 tree def = info->iv->ssa_name;
7609
7610 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7611 {
7612 imm_use_iterator imm_iter;
7613 use_operand_p use_p;
7614 gimple *stmt;
7615 int count = 0;
7616
7617 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7618 {
7619 if (!gimple_debug_bind_p (stmt))
7620 continue;
7621
7622 /* We just want to determine whether to do nothing
7623 (count == 0), to substitute the computed
7624 expression into a single use of the SSA DEF by
7625 itself (count == 1), or to use a debug temp
7626 because the SSA DEF is used multiple times or as
7627 part of a larger expression (count > 1). */
7628 count++;
7629 if (gimple_debug_bind_get_value (stmt) != def)
7630 count++;
7631
7632 if (count > 1)
7633 BREAK_FROM_IMM_USE_STMT (imm_iter);
7634 }
7635
7636 if (!count)
7637 continue;
7638
7639 struct iv_use dummy_use;
7640 struct iv_cand *best_cand = NULL, *cand;
7641 unsigned i, best_pref = 0, cand_pref;
7642 tree comp = NULL_TREE;
7643
7644 memset (&dummy_use, 0, sizeof (dummy_use));
7645 dummy_use.iv = info->iv;
7646 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7647 {
7648 cand = data->vgroups[i]->selected;
7649 if (cand == best_cand)
7650 continue;
7651 cand_pref = operand_equal_p (cand->iv->step,
7652 info->iv->step, 0)
7653 ? 4 : 0;
7654 cand_pref
7655 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7656 == TYPE_MODE (TREE_TYPE (info->iv->base))
7657 ? 2 : 0;
7658 cand_pref
7659 += TREE_CODE (cand->iv->base) == INTEGER_CST
7660 ? 1 : 0;
7661 if (best_cand == NULL || best_pref < cand_pref)
7662 {
7663 tree this_comp
7664 = get_debug_computation_at (data->current_loop,
7665 SSA_NAME_DEF_STMT (def),
7666 &dummy_use, cand);
7667 if (this_comp)
7668 {
7669 best_cand = cand;
7670 best_pref = cand_pref;
7671 comp = this_comp;
7672 }
7673 }
7674 }
7675
7676 if (!best_cand)
7677 continue;
7678
7679 comp = unshare_expr (comp);
7680 if (count > 1)
7681 {
7682 tree vexpr = make_node (DEBUG_EXPR_DECL);
7683 DECL_ARTIFICIAL (vexpr) = 1;
7684 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7685 if (SSA_NAME_VAR (def))
7686 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7687 else
7688 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7689 gdebug *def_temp
7690 = gimple_build_debug_bind (vexpr, comp, NULL);
7691 gimple_stmt_iterator gsi;
7692
7693 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7694 gsi = gsi_after_labels (gimple_bb
7695 (SSA_NAME_DEF_STMT (def)));
7696 else
7697 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7698
7699 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7700 comp = vexpr;
7701 }
7702
7703 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7704 {
7705 if (!gimple_debug_bind_p (stmt))
7706 continue;
7707
7708 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7709 SET_USE (use_p, comp);
7710
7711 update_stmt (stmt);
7712 }
7713 }
7714 }
7715 }
7716 }
7717
7718 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7719 for hash_map::traverse. */
7720
7721 bool
free_tree_niter_desc(edge const &,tree_niter_desc * const & value,void *)7722 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7723 {
7724 free (value);
7725 return true;
7726 }
7727
7728 /* Frees data allocated by the optimization of a single loop. */
7729
7730 static void
free_loop_data(struct ivopts_data * data)7731 free_loop_data (struct ivopts_data *data)
7732 {
7733 unsigned i, j;
7734 bitmap_iterator bi;
7735 tree obj;
7736
7737 if (data->niters)
7738 {
7739 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7740 delete data->niters;
7741 data->niters = NULL;
7742 }
7743
7744 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7745 {
7746 struct version_info *info;
7747
7748 info = ver_info (data, i);
7749 info->iv = NULL;
7750 info->has_nonlin_use = false;
7751 info->preserve_biv = false;
7752 info->inv_id = 0;
7753 }
7754 bitmap_clear (data->relevant);
7755 bitmap_clear (data->important_candidates);
7756
7757 for (i = 0; i < data->vgroups.length (); i++)
7758 {
7759 struct iv_group *group = data->vgroups[i];
7760
7761 for (j = 0; j < group->vuses.length (); j++)
7762 free (group->vuses[j]);
7763 group->vuses.release ();
7764
7765 BITMAP_FREE (group->related_cands);
7766 for (j = 0; j < group->n_map_members; j++)
7767 {
7768 if (group->cost_map[j].inv_vars)
7769 BITMAP_FREE (group->cost_map[j].inv_vars);
7770 if (group->cost_map[j].inv_exprs)
7771 BITMAP_FREE (group->cost_map[j].inv_exprs);
7772 }
7773
7774 free (group->cost_map);
7775 free (group);
7776 }
7777 data->vgroups.truncate (0);
7778
7779 for (i = 0; i < data->vcands.length (); i++)
7780 {
7781 struct iv_cand *cand = data->vcands[i];
7782
7783 if (cand->inv_vars)
7784 BITMAP_FREE (cand->inv_vars);
7785 if (cand->inv_exprs)
7786 BITMAP_FREE (cand->inv_exprs);
7787 free (cand);
7788 }
7789 data->vcands.truncate (0);
7790
7791 if (data->version_info_size < num_ssa_names)
7792 {
7793 data->version_info_size = 2 * num_ssa_names;
7794 free (data->version_info);
7795 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7796 }
7797
7798 data->max_inv_var_id = 0;
7799 data->max_inv_expr_id = 0;
7800
7801 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7802 SET_DECL_RTL (obj, NULL_RTX);
7803
7804 decl_rtl_to_reset.truncate (0);
7805
7806 data->inv_expr_tab->empty ();
7807
7808 data->iv_common_cand_tab->empty ();
7809 data->iv_common_cands.truncate (0);
7810 }
7811
7812 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7813 loop tree. */
7814
7815 static void
tree_ssa_iv_optimize_finalize(struct ivopts_data * data)7816 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7817 {
7818 free_loop_data (data);
7819 free (data->version_info);
7820 BITMAP_FREE (data->relevant);
7821 BITMAP_FREE (data->important_candidates);
7822
7823 decl_rtl_to_reset.release ();
7824 data->vgroups.release ();
7825 data->vcands.release ();
7826 delete data->inv_expr_tab;
7827 data->inv_expr_tab = NULL;
7828 free_affine_expand_cache (&data->name_expansion_cache);
7829 if (data->base_object_map)
7830 delete data->base_object_map;
7831 delete data->iv_common_cand_tab;
7832 data->iv_common_cand_tab = NULL;
7833 data->iv_common_cands.release ();
7834 obstack_free (&data->iv_obstack, NULL);
7835 }
7836
7837 /* Returns true if the loop body BODY includes any function calls. */
7838
7839 static bool
loop_body_includes_call(basic_block * body,unsigned num_nodes)7840 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7841 {
7842 gimple_stmt_iterator gsi;
7843 unsigned i;
7844
7845 for (i = 0; i < num_nodes; i++)
7846 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7847 {
7848 gimple *stmt = gsi_stmt (gsi);
7849 if (is_gimple_call (stmt)
7850 && !gimple_call_internal_p (stmt)
7851 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7852 return true;
7853 }
7854 return false;
7855 }
7856
7857 /* Determine cost scaling factor for basic blocks in loop. */
7858 #define COST_SCALING_FACTOR_BOUND (20)
7859
7860 static void
determine_scaling_factor(struct ivopts_data * data,basic_block * body)7861 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7862 {
7863 int lfreq = data->current_loop->header->count.to_frequency (cfun);
7864 if (!data->speed || lfreq <= 0)
7865 return;
7866
7867 int max_freq = lfreq;
7868 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7869 {
7870 body[i]->aux = (void *)(intptr_t) 1;
7871 if (max_freq < body[i]->count.to_frequency (cfun))
7872 max_freq = body[i]->count.to_frequency (cfun);
7873 }
7874 if (max_freq > lfreq)
7875 {
7876 int divisor, factor;
7877 /* Check if scaling factor itself needs to be scaled by the bound. This
7878 is to avoid overflow when scaling cost according to profile info. */
7879 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7880 {
7881 divisor = max_freq;
7882 factor = COST_SCALING_FACTOR_BOUND;
7883 }
7884 else
7885 {
7886 divisor = lfreq;
7887 factor = 1;
7888 }
7889 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7890 {
7891 int bfreq = body[i]->count.to_frequency (cfun);
7892 if (bfreq <= lfreq)
7893 continue;
7894
7895 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7896 }
7897 }
7898 }
7899
7900 /* Find doloop comparison use and set its doloop_p on if found. */
7901
7902 static bool
find_doloop_use(struct ivopts_data * data)7903 find_doloop_use (struct ivopts_data *data)
7904 {
7905 struct loop *loop = data->current_loop;
7906
7907 for (unsigned i = 0; i < data->vgroups.length (); i++)
7908 {
7909 struct iv_group *group = data->vgroups[i];
7910 if (group->type == USE_COMPARE)
7911 {
7912 gcc_assert (group->vuses.length () == 1);
7913 struct iv_use *use = group->vuses[0];
7914 gimple *stmt = use->stmt;
7915 if (gimple_code (stmt) == GIMPLE_COND)
7916 {
7917 basic_block bb = gimple_bb (stmt);
7918 edge true_edge, false_edge;
7919 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7920 /* This comparison is used for loop latch. Require latch is empty
7921 for now. */
7922 if ((loop->latch == true_edge->dest
7923 || loop->latch == false_edge->dest)
7924 && empty_block_p (loop->latch))
7925 {
7926 group->doloop_p = true;
7927 if (dump_file && (dump_flags & TDF_DETAILS))
7928 {
7929 fprintf (dump_file, "Doloop cmp iv use: ");
7930 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7931 }
7932 return true;
7933 }
7934 }
7935 }
7936 }
7937
7938 return false;
7939 }
7940
7941 /* For the targets which support doloop, to predict whether later RTL doloop
7942 transformation will perform on this loop, further detect the doloop use and
7943 mark the flag doloop_use_p if predicted. */
7944
7945 void
analyze_and_mark_doloop_use(struct ivopts_data * data)7946 analyze_and_mark_doloop_use (struct ivopts_data *data)
7947 {
7948 data->doloop_use_p = false;
7949
7950 if (!flag_branch_on_count_reg)
7951 return;
7952
7953 if (!generic_predict_doloop_p (data))
7954 return;
7955
7956 if (find_doloop_use (data))
7957 {
7958 data->doloop_use_p = true;
7959 if (dump_file && (dump_flags & TDF_DETAILS))
7960 {
7961 struct loop *loop = data->current_loop;
7962 fprintf (dump_file,
7963 "Predict loop %d can perform"
7964 " doloop optimization later.\n",
7965 loop->num);
7966 flow_loop_dump (loop, dump_file, NULL, 1);
7967 }
7968 }
7969 }
7970
7971 /* Optimizes the LOOP. Returns true if anything changed. */
7972
7973 static bool
tree_ssa_iv_optimize_loop(struct ivopts_data * data,class loop * loop,bitmap toremove)7974 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7975 bitmap toremove)
7976 {
7977 bool changed = false;
7978 class iv_ca *iv_ca;
7979 edge exit = single_dom_exit (loop);
7980 basic_block *body;
7981
7982 gcc_assert (!data->niters);
7983 data->current_loop = loop;
7984 data->loop_loc = find_loop_location (loop).get_location_t ();
7985 data->speed = optimize_loop_for_speed_p (loop);
7986
7987 if (dump_file && (dump_flags & TDF_DETAILS))
7988 {
7989 fprintf (dump_file, "Processing loop %d", loop->num);
7990 if (data->loop_loc != UNKNOWN_LOCATION)
7991 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7992 LOCATION_LINE (data->loop_loc));
7993 fprintf (dump_file, "\n");
7994
7995 if (exit)
7996 {
7997 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7998 exit->src->index, exit->dest->index);
7999 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8000 fprintf (dump_file, "\n");
8001 }
8002
8003 fprintf (dump_file, "\n");
8004 }
8005
8006 body = get_loop_body (loop);
8007 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8008 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8009
8010 data->loop_single_exit_p
8011 = exit != NULL && loop_only_exit_p (loop, body, exit);
8012
8013 /* For each ssa name determines whether it behaves as an induction variable
8014 in some loop. */
8015 if (!find_induction_variables (data))
8016 goto finish;
8017
8018 /* Finds interesting uses (item 1). */
8019 find_interesting_uses (data);
8020 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8021 goto finish;
8022
8023 /* Determine cost scaling factor for basic blocks in loop. */
8024 determine_scaling_factor (data, body);
8025
8026 /* Analyze doloop possibility and mark the doloop use if predicted. */
8027 analyze_and_mark_doloop_use (data);
8028
8029 /* Finds candidates for the induction variables (item 2). */
8030 find_iv_candidates (data);
8031
8032 /* Calculates the costs (item 3, part 1). */
8033 determine_iv_costs (data);
8034 determine_group_iv_costs (data);
8035 determine_set_costs (data);
8036
8037 /* Find the optimal set of induction variables (item 3, part 2). */
8038 iv_ca = find_optimal_iv_set (data);
8039 /* Cleanup basic block aux field. */
8040 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8041 body[i]->aux = NULL;
8042 if (!iv_ca)
8043 goto finish;
8044 changed = true;
8045
8046 /* Create the new induction variables (item 4, part 1). */
8047 create_new_ivs (data, iv_ca);
8048 iv_ca_free (&iv_ca);
8049
8050 /* Rewrite the uses (item 4, part 2). */
8051 rewrite_groups (data);
8052
8053 /* Remove the ivs that are unused after rewriting. */
8054 remove_unused_ivs (data, toremove);
8055
8056 finish:
8057 free (body);
8058 free_loop_data (data);
8059
8060 return changed;
8061 }
8062
8063 /* Main entry point. Optimizes induction variables in loops. */
8064
8065 void
tree_ssa_iv_optimize(void)8066 tree_ssa_iv_optimize (void)
8067 {
8068 class loop *loop;
8069 struct ivopts_data data;
8070 auto_bitmap toremove;
8071
8072 tree_ssa_iv_optimize_init (&data);
8073
8074 /* Optimize the loops starting with the innermost ones. */
8075 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8076 {
8077 if (!dbg_cnt (ivopts_loop))
8078 continue;
8079
8080 if (dump_file && (dump_flags & TDF_DETAILS))
8081 flow_loop_dump (loop, dump_file, NULL, 1);
8082
8083 tree_ssa_iv_optimize_loop (&data, loop, toremove);
8084 }
8085
8086 /* Remove eliminated IV defs. */
8087 release_defs_bitset (toremove);
8088
8089 /* We have changed the structure of induction variables; it might happen
8090 that definitions in the scev database refer to some of them that were
8091 eliminated. */
8092 scev_reset_htab ();
8093 /* Likewise niter and control-IV information. */
8094 free_numbers_of_iterations_estimates (cfun);
8095
8096 tree_ssa_iv_optimize_finalize (&data);
8097 }
8098
8099 #include "gt-tree-ssa-loop-ivopts.h"
8100