xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/tree-parloops.c (revision b7b7574d3bf8eeb51a1fa3977b59142ec6434a55)
1 /* Loop autoparallelization.
2    Copyright (C) 2006, 2007, 2008, 2009, 2010
3    Free Software Foundation, Inc.
4    Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
5    Zdenek Dvorak <dvorakz@suse.cz>.
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "tree.h"
28 #include "rtl.h"
29 #include "tree-flow.h"
30 #include "cfgloop.h"
31 #include "ggc.h"
32 #include "tree-data-ref.h"
33 #include "diagnostic.h"
34 #include "tree-pass.h"
35 #include "tree-scalar-evolution.h"
36 #include "hashtab.h"
37 #include "langhooks.h"
38 #include "tree-vectorizer.h"
39 
40 /* This pass tries to distribute iterations of loops into several threads.
41    The implementation is straightforward -- for each loop we test whether its
42    iterations are independent, and if it is the case (and some additional
43    conditions regarding profitability and correctness are satisfied), we
44    add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
45    machinery do its job.
46 
47    The most of the complexity is in bringing the code into shape expected
48    by the omp expanders:
49    -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
50       variable and that the exit test is at the start of the loop body
51    -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
52       variables by accesses through pointers, and breaking up ssa chains
53       by storing the values incoming to the parallelized loop to a structure
54       passed to the new function as an argument (something similar is done
55       in omp gimplification, unfortunately only a small part of the code
56       can be shared).
57 
58    TODO:
59    -- if there are several parallelizable loops in a function, it may be
60       possible to generate the threads just once (using synchronization to
61       ensure that cross-loop dependences are obeyed).
62    -- handling of common scalar dependence patterns (accumulation, ...)
63    -- handling of non-innermost loops  */
64 
65 /*
66   Reduction handling:
67   currently we use vect_is_simple_reduction() to detect reduction patterns.
68   The code transformation will be introduced by an example.
69 
70 
71 parloop
72 {
73   int sum=1;
74 
75   for (i = 0; i < N; i++)
76    {
77     x[i] = i + 3;
78     sum+=x[i];
79    }
80 }
81 
82 gimple-like code:
83 header_bb:
84 
85   # sum_29 = PHI <sum_11(5), 1(3)>
86   # i_28 = PHI <i_12(5), 0(3)>
87   D.1795_8 = i_28 + 3;
88   x[i_28] = D.1795_8;
89   sum_11 = D.1795_8 + sum_29;
90   i_12 = i_28 + 1;
91   if (N_6(D) > i_12)
92     goto header_bb;
93 
94 
95 exit_bb:
96 
97   # sum_21 = PHI <sum_11(4)>
98   printf (&"%d"[0], sum_21);
99 
100 
101 after reduction transformation (only relevant parts):
102 
103 parloop
104 {
105 
106 ....
107 
108 
109   # Storing the initial value given by the user.  #
110 
111   .paral_data_store.32.sum.27 = 1;
112 
113   #pragma omp parallel num_threads(4)
114 
115   #pragma omp for schedule(static)
116 
117   # The neutral element corresponding to the particular
118   reduction's operation, e.g. 0 for PLUS_EXPR,
119   1 for MULT_EXPR, etc. replaces the user's initial value.  #
120 
121   # sum.27_29 = PHI <sum.27_11, 0>
122 
123   sum.27_11 = D.1827_8 + sum.27_29;
124 
125   GIMPLE_OMP_CONTINUE
126 
127   # Adding this reduction phi is done at create_phi_for_local_result() #
128   # sum.27_56 = PHI <sum.27_11, 0>
129   GIMPLE_OMP_RETURN
130 
131   # Creating the atomic operation is done at
132   create_call_for_reduction_1()  #
133 
134   #pragma omp atomic_load
135   D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136   D.1840_60 = sum.27_56 + D.1839_59;
137   #pragma omp atomic_store (D.1840_60);
138 
139   GIMPLE_OMP_RETURN
140 
141  # collecting the result after the join of the threads is done at
142   create_loads_for_reductions().
143   The value computed by the threads is loaded from the
144   shared struct.  #
145 
146 
147   .paral_data_load.33_52 = &.paral_data_store.32;
148   sum_37 =  .paral_data_load.33_52->sum.27;
149   sum_43 = D.1795_41 + sum_37;
150 
151   exit bb:
152   # sum_21 = PHI <sum_43, sum_26>
153   printf (&"%d"[0], sum_21);
154 
155 ...
156 
157 }
158 
159 */
160 
161 /* Minimal number of iterations of a loop that should be executed in each
162    thread.  */
163 #define MIN_PER_THREAD 100
164 
165 /* Element of the hashtable, representing a
166    reduction in the current loop.  */
167 struct reduction_info
168 {
169   gimple reduc_stmt;		/* reduction statement.  */
170   gimple reduc_phi;		/* The phi node defining the reduction.  */
171   enum tree_code reduction_code;/* code for the reduction operation.  */
172   gimple keep_res;		/* The PHI_RESULT of this phi is the resulting value
173 				   of the reduction variable when existing the loop. */
174   tree initial_value;		/* The initial value of the reduction var before entering the loop.  */
175   tree field;			/*  the name of the field in the parloop data structure intended for reduction.  */
176   tree init;			/* reduction initialization value.  */
177   gimple new_phi;		/* (helper field) Newly created phi node whose result
178 				   will be passed to the atomic operation.  Represents
179 				   the local result each thread computed for the reduction
180 				   operation.  */
181 };
182 
183 /* Equality and hash functions for hashtab code.  */
184 
185 static int
186 reduction_info_eq (const void *aa, const void *bb)
187 {
188   const struct reduction_info *a = (const struct reduction_info *) aa;
189   const struct reduction_info *b = (const struct reduction_info *) bb;
190 
191   return (a->reduc_phi == b->reduc_phi);
192 }
193 
194 static hashval_t
195 reduction_info_hash (const void *aa)
196 {
197   const struct reduction_info *a = (const struct reduction_info *) aa;
198 
199   return htab_hash_pointer (a->reduc_phi);
200 }
201 
202 static struct reduction_info *
203 reduction_phi (htab_t reduction_list, gimple phi)
204 {
205   struct reduction_info tmpred, *red;
206 
207   if (htab_elements (reduction_list) == 0)
208     return NULL;
209 
210   tmpred.reduc_phi = phi;
211   red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
212 
213   return red;
214 }
215 
216 /* Element of hashtable of names to copy.  */
217 
218 struct name_to_copy_elt
219 {
220   unsigned version;	/* The version of the name to copy.  */
221   tree new_name;	/* The new name used in the copy.  */
222   tree field;		/* The field of the structure used to pass the
223 			   value.  */
224 };
225 
226 /* Equality and hash functions for hashtab code.  */
227 
228 static int
229 name_to_copy_elt_eq (const void *aa, const void *bb)
230 {
231   const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232   const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
233 
234   return a->version == b->version;
235 }
236 
237 static hashval_t
238 name_to_copy_elt_hash (const void *aa)
239 {
240   const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
241 
242   return (hashval_t) a->version;
243 }
244 
245 
246 /* Data dependency analysis. Returns true if the iterations of LOOP
247    are independent on each other (that is, if we can execute them
248    in parallel).  */
249 
250 static bool
251 loop_parallel_p (struct loop *loop)
252 {
253   VEC (ddr_p, heap) * dependence_relations;
254   VEC (data_reference_p, heap) *datarefs;
255   lambda_trans_matrix trans;
256   bool ret = false;
257 
258   if (dump_file && (dump_flags & TDF_DETAILS))
259   {
260     fprintf (dump_file, "Considering loop %d\n", loop->num);
261     if (!loop->inner)
262       fprintf (dump_file, "loop is innermost\n");
263     else
264       fprintf (dump_file, "loop NOT innermost\n");
265    }
266 
267   /* Check for problems with dependences.  If the loop can be reversed,
268      the iterations are independent.  */
269   datarefs = VEC_alloc (data_reference_p, heap, 10);
270   dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
271   compute_data_dependences_for_loop (loop, true, &datarefs,
272 				     &dependence_relations);
273   if (dump_file && (dump_flags & TDF_DETAILS))
274     dump_data_dependence_relations (dump_file, dependence_relations);
275 
276   trans = lambda_trans_matrix_new (1, 1);
277   LTM_MATRIX (trans)[0][0] = -1;
278 
279   if (lambda_transform_legal_p (trans, 1, dependence_relations))
280     {
281       ret = true;
282       if (dump_file && (dump_flags & TDF_DETAILS))
283 	fprintf (dump_file, "  SUCCESS: may be parallelized\n");
284     }
285   else if (dump_file && (dump_flags & TDF_DETAILS))
286     fprintf (dump_file,
287 	     "  FAILED: data dependencies exist across iterations\n");
288 
289   free_dependence_relations (dependence_relations);
290   free_data_refs (datarefs);
291 
292   return ret;
293 }
294 
295 /* Return true when LOOP contains basic blocks marked with the
296    BB_IRREDUCIBLE_LOOP flag.  */
297 
298 static inline bool
299 loop_has_blocks_with_irreducible_flag (struct loop *loop)
300 {
301   unsigned i;
302   basic_block *bbs = get_loop_body_in_dom_order (loop);
303   bool res = true;
304 
305   for (i = 0; i < loop->num_nodes; i++)
306     if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
307       goto end;
308 
309   res = false;
310  end:
311   free (bbs);
312   return res;
313 }
314 
315 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
316    The assignment statement is placed on edge ENTRY.  DECL_ADDRESS maps decls
317    to their addresses that can be reused.  The address of OBJ is known to
318    be invariant in the whole function.  Other needed statements are placed
319    right before GSI.  */
320 
321 static tree
322 take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
323 		 gimple_stmt_iterator *gsi)
324 {
325   int uid;
326   void **dslot;
327   struct int_tree_map ielt, *nielt;
328   tree *var_p, name, bvar, addr;
329   gimple stmt;
330   gimple_seq stmts;
331 
332   /* Since the address of OBJ is invariant, the trees may be shared.
333      Avoid rewriting unrelated parts of the code.  */
334   obj = unshare_expr (obj);
335   for (var_p = &obj;
336        handled_component_p (*var_p);
337        var_p = &TREE_OPERAND (*var_p, 0))
338     continue;
339   uid = DECL_UID (*var_p);
340 
341   ielt.uid = uid;
342   dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
343   if (!*dslot)
344     {
345       if (gsi == NULL)
346 	return NULL;
347       addr = build_addr (*var_p, current_function_decl);
348       bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
349       add_referenced_var (bvar);
350       stmt = gimple_build_assign (bvar, addr);
351       name = make_ssa_name (bvar, stmt);
352       gimple_assign_set_lhs (stmt, name);
353       gsi_insert_on_edge_immediate (entry, stmt);
354 
355       nielt = XNEW (struct int_tree_map);
356       nielt->uid = uid;
357       nielt->to = name;
358       *dslot = nielt;
359     }
360   else
361     name = ((struct int_tree_map *) *dslot)->to;
362 
363   if (gsi == NULL)
364     {
365       if (var_p != &obj)
366 	{
367 	  *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
368 	  name = build_fold_addr_expr_with_type (obj, type);
369 	}
370       return fold_convert (type, name);
371     }
372   if (var_p != &obj)
373     {
374       *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
375       name = force_gimple_operand (build_addr (obj, current_function_decl),
376 				   &stmts, true, NULL_TREE);
377       if (!gimple_seq_empty_p (stmts))
378 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
379     }
380 
381   if (TREE_TYPE (name) != type)
382     {
383       name = force_gimple_operand (fold_convert (type, name), &stmts, true,
384 				   NULL_TREE);
385       if (!gimple_seq_empty_p (stmts))
386 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
387     }
388 
389   return name;
390 }
391 
392 /* Callback for htab_traverse.  Create the initialization statement
393    for reduction described in SLOT, and place it at the preheader of
394    the loop described in DATA.  */
395 
396 static int
397 initialize_reductions (void **slot, void *data)
398 {
399   tree init, c;
400   tree bvar, type, arg;
401   edge e;
402 
403   struct reduction_info *const reduc = (struct reduction_info *) *slot;
404   struct loop *loop = (struct loop *) data;
405 
406   /* Create initialization in preheader:
407      reduction_variable = initialization value of reduction.  */
408 
409   /* In the phi node at the header, replace the argument coming
410      from the preheader with the reduction initialization value.  */
411 
412   /* Create a new variable to initialize the reduction.  */
413   type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
414   bvar = create_tmp_var (type, "reduction");
415   add_referenced_var (bvar);
416 
417   c = build_omp_clause (gimple_location (reduc->reduc_stmt),
418 			OMP_CLAUSE_REDUCTION);
419   OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
420   OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
421 
422   init = omp_reduction_init (c, TREE_TYPE (bvar));
423   reduc->init = init;
424 
425   /* Replace the argument representing the initialization value
426      with the initialization value for the reduction (neutral
427      element for the particular operation, e.g. 0 for PLUS_EXPR,
428      1 for MULT_EXPR, etc).
429      Keep the old value in a new variable "reduction_initial",
430      that will be taken in consideration after the parallel
431      computing is done.  */
432 
433   e = loop_preheader_edge (loop);
434   arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
435   /* Create new variable to hold the initial value.  */
436 
437   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
438 	   (reduc->reduc_phi, loop_preheader_edge (loop)), init);
439   reduc->initial_value = arg;
440   return 1;
441 }
442 
443 struct elv_data
444 {
445   struct walk_stmt_info info;
446   edge entry;
447   htab_t decl_address;
448   gimple_stmt_iterator *gsi;
449   bool changed;
450   bool reset;
451 };
452 
453 /* Eliminates references to local variables in *TP out of the single
454    entry single exit region starting at DTA->ENTRY.
455    DECL_ADDRESS contains addresses of the references that had their
456    address taken already.  If the expression is changed, CHANGED is
457    set to true.  Callback for walk_tree.  */
458 
459 static tree
460 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
461 {
462   struct elv_data *const dta = (struct elv_data *) data;
463   tree t = *tp, var, addr, addr_type, type, obj;
464 
465   if (DECL_P (t))
466     {
467       *walk_subtrees = 0;
468 
469       if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
470 	return NULL_TREE;
471 
472       type = TREE_TYPE (t);
473       addr_type = build_pointer_type (type);
474       addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
475 			      dta->gsi);
476       if (dta->gsi == NULL && addr == NULL_TREE)
477 	{
478 	  dta->reset = true;
479 	  return NULL_TREE;
480 	}
481 
482       *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
483 
484       dta->changed = true;
485       return NULL_TREE;
486     }
487 
488   if (TREE_CODE (t) == ADDR_EXPR)
489     {
490       /* ADDR_EXPR may appear in two contexts:
491 	 -- as a gimple operand, when the address taken is a function invariant
492 	 -- as gimple rhs, when the resulting address in not a function
493 	    invariant
494 	 We do not need to do anything special in the latter case (the base of
495 	 the memory reference whose address is taken may be replaced in the
496 	 DECL_P case).  The former case is more complicated, as we need to
497 	 ensure that the new address is still a gimple operand.  Thus, it
498 	 is not sufficient to replace just the base of the memory reference --
499 	 we need to move the whole computation of the address out of the
500 	 loop.  */
501       if (!is_gimple_val (t))
502 	return NULL_TREE;
503 
504       *walk_subtrees = 0;
505       obj = TREE_OPERAND (t, 0);
506       var = get_base_address (obj);
507       if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
508 	return NULL_TREE;
509 
510       addr_type = TREE_TYPE (t);
511       addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
512 			      dta->gsi);
513       if (dta->gsi == NULL && addr == NULL_TREE)
514 	{
515 	  dta->reset = true;
516 	  return NULL_TREE;
517 	}
518       *tp = addr;
519 
520       dta->changed = true;
521       return NULL_TREE;
522     }
523 
524   if (!EXPR_P (t))
525     *walk_subtrees = 0;
526 
527   return NULL_TREE;
528 }
529 
530 /* Moves the references to local variables in STMT at *GSI out of the single
531    entry single exit region starting at ENTRY.  DECL_ADDRESS contains
532    addresses of the references that had their address taken
533    already.  */
534 
535 static void
536 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
537 				htab_t decl_address)
538 {
539   struct elv_data dta;
540   gimple stmt = gsi_stmt (*gsi);
541 
542   memset (&dta.info, '\0', sizeof (dta.info));
543   dta.entry = entry;
544   dta.decl_address = decl_address;
545   dta.changed = false;
546   dta.reset = false;
547 
548   if (gimple_debug_bind_p (stmt))
549     {
550       dta.gsi = NULL;
551       walk_tree (gimple_debug_bind_get_value_ptr (stmt),
552 		 eliminate_local_variables_1, &dta.info, NULL);
553       if (dta.reset)
554 	{
555 	  gimple_debug_bind_reset_value (stmt);
556 	  dta.changed = true;
557 	}
558     }
559   else
560     {
561       dta.gsi = gsi;
562       walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
563     }
564 
565   if (dta.changed)
566     update_stmt (stmt);
567 }
568 
569 /* Eliminates the references to local variables from the single entry
570    single exit region between the ENTRY and EXIT edges.
571 
572    This includes:
573    1) Taking address of a local variable -- these are moved out of the
574    region (and temporary variable is created to hold the address if
575    necessary).
576 
577    2) Dereferencing a local variable -- these are replaced with indirect
578    references.  */
579 
580 static void
581 eliminate_local_variables (edge entry, edge exit)
582 {
583   basic_block bb;
584   VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
585   unsigned i;
586   gimple_stmt_iterator gsi;
587   bool has_debug_stmt = false;
588   htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
589 				     free);
590   basic_block entry_bb = entry->src;
591   basic_block exit_bb = exit->dest;
592 
593   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
594 
595   for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
596     if (bb != entry_bb && bb != exit_bb)
597       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
598 	if (gimple_debug_bind_p (gsi_stmt (gsi)))
599 	  has_debug_stmt = true;
600 	else
601 	  eliminate_local_variables_stmt (entry, &gsi, decl_address);
602 
603   if (has_debug_stmt)
604     for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
605       if (bb != entry_bb && bb != exit_bb)
606 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
607 	  if (gimple_debug_bind_p (gsi_stmt (gsi)))
608 	    eliminate_local_variables_stmt (entry, &gsi, decl_address);
609 
610   htab_delete (decl_address);
611   VEC_free (basic_block, heap, body);
612 }
613 
614 /* Returns true if expression EXPR is not defined between ENTRY and
615    EXIT, i.e. if all its operands are defined outside of the region.  */
616 
617 static bool
618 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
619 {
620   basic_block entry_bb = entry->src;
621   basic_block exit_bb = exit->dest;
622   basic_block def_bb;
623 
624   if (is_gimple_min_invariant (expr))
625     return true;
626 
627   if (TREE_CODE (expr) == SSA_NAME)
628     {
629       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
630       if (def_bb
631 	  && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
632 	  && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
633 	return false;
634 
635       return true;
636     }
637 
638   return false;
639 }
640 
641 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
642    The copies are stored to NAME_COPIES, if NAME was already duplicated,
643    its duplicate stored in NAME_COPIES is returned.
644 
645    Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
646    duplicated, storing the copies in DECL_COPIES.  */
647 
648 static tree
649 separate_decls_in_region_name (tree name,
650 			       htab_t name_copies, htab_t decl_copies,
651 			       bool copy_name_p)
652 {
653   tree copy, var, var_copy;
654   unsigned idx, uid, nuid;
655   struct int_tree_map ielt, *nielt;
656   struct name_to_copy_elt elt, *nelt;
657   void **slot, **dslot;
658 
659   if (TREE_CODE (name) != SSA_NAME)
660     return name;
661 
662   idx = SSA_NAME_VERSION (name);
663   elt.version = idx;
664   slot = htab_find_slot_with_hash (name_copies, &elt, idx,
665 				   copy_name_p ? INSERT : NO_INSERT);
666   if (slot && *slot)
667     return ((struct name_to_copy_elt *) *slot)->new_name;
668 
669   var = SSA_NAME_VAR (name);
670   uid = DECL_UID (var);
671   ielt.uid = uid;
672   dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
673   if (!*dslot)
674     {
675       var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
676       DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
677       add_referenced_var (var_copy);
678       nielt = XNEW (struct int_tree_map);
679       nielt->uid = uid;
680       nielt->to = var_copy;
681       *dslot = nielt;
682 
683       /* Ensure that when we meet this decl next time, we won't duplicate
684          it again.  */
685       nuid = DECL_UID (var_copy);
686       ielt.uid = nuid;
687       dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
688       gcc_assert (!*dslot);
689       nielt = XNEW (struct int_tree_map);
690       nielt->uid = nuid;
691       nielt->to = var_copy;
692       *dslot = nielt;
693     }
694   else
695     var_copy = ((struct int_tree_map *) *dslot)->to;
696 
697   if (copy_name_p)
698     {
699       copy = duplicate_ssa_name (name, NULL);
700       nelt = XNEW (struct name_to_copy_elt);
701       nelt->version = idx;
702       nelt->new_name = copy;
703       nelt->field = NULL_TREE;
704       *slot = nelt;
705     }
706   else
707     {
708       gcc_assert (!slot);
709       copy = name;
710     }
711 
712   SSA_NAME_VAR (copy) = var_copy;
713   return copy;
714 }
715 
716 /* Finds the ssa names used in STMT that are defined outside the
717    region between ENTRY and EXIT and replaces such ssa names with
718    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
719    decls of all ssa names used in STMT (including those defined in
720    LOOP) are replaced with the new temporary variables; the
721    replacement decls are stored in DECL_COPIES.  */
722 
723 static void
724 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
725 			       htab_t name_copies, htab_t decl_copies)
726 {
727   use_operand_p use;
728   def_operand_p def;
729   ssa_op_iter oi;
730   tree name, copy;
731   bool copy_name_p;
732 
733   mark_virtual_ops_for_renaming (stmt);
734 
735   FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
736   {
737     name = DEF_FROM_PTR (def);
738     gcc_assert (TREE_CODE (name) == SSA_NAME);
739     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
740 					  false);
741     gcc_assert (copy == name);
742   }
743 
744   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
745   {
746     name = USE_FROM_PTR (use);
747     if (TREE_CODE (name) != SSA_NAME)
748       continue;
749 
750     copy_name_p = expr_invariant_in_region_p (entry, exit, name);
751     copy = separate_decls_in_region_name (name, name_copies, decl_copies,
752 					  copy_name_p);
753     SET_USE (use, copy);
754   }
755 }
756 
757 /* Finds the ssa names used in STMT that are defined outside the
758    region between ENTRY and EXIT and replaces such ssa names with
759    their duplicates.  The duplicates are stored to NAME_COPIES.  Base
760    decls of all ssa names used in STMT (including those defined in
761    LOOP) are replaced with the new temporary variables; the
762    replacement decls are stored in DECL_COPIES.  */
763 
764 static bool
765 separate_decls_in_region_debug_bind (gimple stmt,
766 				     htab_t name_copies, htab_t decl_copies)
767 {
768   use_operand_p use;
769   ssa_op_iter oi;
770   tree var, name;
771   struct int_tree_map ielt;
772   struct name_to_copy_elt elt;
773   void **slot, **dslot;
774 
775   var = gimple_debug_bind_get_var (stmt);
776   if (TREE_CODE (var) == DEBUG_EXPR_DECL)
777     return true;
778   gcc_assert (DECL_P (var) && SSA_VAR_P (var));
779   ielt.uid = DECL_UID (var);
780   dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
781   if (!dslot)
782     return true;
783   gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
784 
785   FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
786   {
787     name = USE_FROM_PTR (use);
788     if (TREE_CODE (name) != SSA_NAME)
789       continue;
790 
791     elt.version = SSA_NAME_VERSION (name);
792     slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
793     if (!slot)
794       {
795 	gimple_debug_bind_reset_value (stmt);
796 	update_stmt (stmt);
797 	break;
798       }
799 
800     SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
801   }
802 
803   return false;
804 }
805 
806 /* Callback for htab_traverse.  Adds a field corresponding to the reduction
807    specified in SLOT. The type is passed in DATA.  */
808 
809 static int
810 add_field_for_reduction (void **slot, void *data)
811 {
812 
813   struct reduction_info *const red = (struct reduction_info *) *slot;
814   tree const type = (tree) data;
815   tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
816   tree field = build_decl (gimple_location (red->reduc_stmt),
817 			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
818 
819   insert_field_into_struct (type, field);
820 
821   red->field = field;
822 
823   return 1;
824 }
825 
826 /* Callback for htab_traverse.  Adds a field corresponding to a ssa name
827    described in SLOT. The type is passed in DATA.  */
828 
829 static int
830 add_field_for_name (void **slot, void *data)
831 {
832   struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
833   tree type = (tree) data;
834   tree name = ssa_name (elt->version);
835   tree var = SSA_NAME_VAR (name);
836   tree field = build_decl (DECL_SOURCE_LOCATION (var),
837 			   FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
838 
839   insert_field_into_struct (type, field);
840   elt->field = field;
841 
842   return 1;
843 }
844 
845 /* Callback for htab_traverse.  A local result is the intermediate result
846    computed by a single
847    thread, or the initial value in case no iteration was executed.
848    This function creates a phi node reflecting these values.
849    The phi's result will be stored in NEW_PHI field of the
850    reduction's data structure.  */
851 
852 static int
853 create_phi_for_local_result (void **slot, void *data)
854 {
855   struct reduction_info *const reduc = (struct reduction_info *) *slot;
856   const struct loop *const loop = (const struct loop *) data;
857   edge e;
858   gimple new_phi;
859   basic_block store_bb;
860   tree local_res;
861   source_location locus;
862 
863   /* STORE_BB is the block where the phi
864      should be stored.  It is the destination of the loop exit.
865      (Find the fallthru edge from GIMPLE_OMP_CONTINUE).  */
866   store_bb = FALLTHRU_EDGE (loop->latch)->dest;
867 
868   /* STORE_BB has two predecessors.  One coming from  the loop
869      (the reduction's result is computed at the loop),
870      and another coming from a block preceding the loop,
871      when no iterations
872      are executed (the initial value should be taken).  */
873   if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
874     e = EDGE_PRED (store_bb, 1);
875   else
876     e = EDGE_PRED (store_bb, 0);
877   local_res
878     = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
879 		     NULL);
880   locus = gimple_location (reduc->reduc_stmt);
881   new_phi = create_phi_node (local_res, store_bb);
882   SSA_NAME_DEF_STMT (local_res) = new_phi;
883   add_phi_arg (new_phi, reduc->init, e, locus);
884   add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
885 	       FALLTHRU_EDGE (loop->latch), locus);
886   reduc->new_phi = new_phi;
887 
888   return 1;
889 }
890 
891 struct clsn_data
892 {
893   tree store;
894   tree load;
895 
896   basic_block store_bb;
897   basic_block load_bb;
898 };
899 
900 /* Callback for htab_traverse.  Create an atomic instruction for the
901    reduction described in SLOT.
902    DATA annotates the place in memory the atomic operation relates to,
903    and the basic block it needs to be generated in.  */
904 
905 static int
906 create_call_for_reduction_1 (void **slot, void *data)
907 {
908   struct reduction_info *const reduc = (struct reduction_info *) *slot;
909   struct clsn_data *const clsn_data = (struct clsn_data *) data;
910   gimple_stmt_iterator gsi;
911   tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
912   tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
913   tree load_struct;
914   basic_block bb;
915   basic_block new_bb;
916   edge e;
917   tree t, addr, ref, x;
918   tree tmp_load, name;
919   gimple load;
920 
921   load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
922   t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
923 
924   addr = build_addr (t, current_function_decl);
925 
926   /* Create phi node.  */
927   bb = clsn_data->load_bb;
928 
929   e = split_block (bb, t);
930   new_bb = e->dest;
931 
932   tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
933   add_referenced_var (tmp_load);
934   tmp_load = make_ssa_name (tmp_load, NULL);
935   load = gimple_build_omp_atomic_load (tmp_load, addr);
936   SSA_NAME_DEF_STMT (tmp_load) = load;
937   gsi = gsi_start_bb (new_bb);
938   gsi_insert_after (&gsi, load, GSI_NEW_STMT);
939 
940   e = split_block (new_bb, load);
941   new_bb = e->dest;
942   gsi = gsi_start_bb (new_bb);
943   ref = tmp_load;
944   x = fold_build2 (reduc->reduction_code,
945 		   TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
946 		   PHI_RESULT (reduc->new_phi));
947 
948   name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
949 				   GSI_CONTINUE_LINKING);
950 
951   gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
952   return 1;
953 }
954 
955 /* Create the atomic operation at the join point of the threads.
956    REDUCTION_LIST describes the reductions in the LOOP.
957    LD_ST_DATA describes the shared data structure where
958    shared data is stored in and loaded from.  */
959 static void
960 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
961 			   struct clsn_data *ld_st_data)
962 {
963   htab_traverse (reduction_list, create_phi_for_local_result, loop);
964   /* Find the fallthru edge from GIMPLE_OMP_CONTINUE.  */
965   ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
966   htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
967 }
968 
969 /* Callback for htab_traverse.  Loads the final reduction value at the
970    join point of all threads, and inserts it in the right place.  */
971 
972 static int
973 create_loads_for_reductions (void **slot, void *data)
974 {
975   struct reduction_info *const red = (struct reduction_info *) *slot;
976   struct clsn_data *const clsn_data = (struct clsn_data *) data;
977   gimple stmt;
978   gimple_stmt_iterator gsi;
979   tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
980   tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
981   tree load_struct;
982   tree name;
983   tree x;
984 
985   gsi = gsi_after_labels (clsn_data->load_bb);
986   load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
987   load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
988 			NULL_TREE);
989 
990   x = load_struct;
991   name = PHI_RESULT (red->keep_res);
992   stmt = gimple_build_assign (name, x);
993   SSA_NAME_DEF_STMT (name) = stmt;
994 
995   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
996 
997   for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
998        !gsi_end_p (gsi); gsi_next (&gsi))
999     if (gsi_stmt (gsi) == red->keep_res)
1000       {
1001 	remove_phi_node (&gsi, false);
1002 	return 1;
1003       }
1004   gcc_unreachable ();
1005 }
1006 
1007 /* Load the reduction result that was stored in LD_ST_DATA.
1008    REDUCTION_LIST describes the list of reductions that the
1009    loads should be generated for.  */
1010 static void
1011 create_final_loads_for_reduction (htab_t reduction_list,
1012 				  struct clsn_data *ld_st_data)
1013 {
1014   gimple_stmt_iterator gsi;
1015   tree t;
1016   gimple stmt;
1017 
1018   gsi = gsi_after_labels (ld_st_data->load_bb);
1019   t = build_fold_addr_expr (ld_st_data->store);
1020   stmt = gimple_build_assign (ld_st_data->load, t);
1021 
1022   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1023   SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1024 
1025   htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1026 
1027 }
1028 
1029 /* Callback for htab_traverse.  Store the neutral value for the
1030   particular reduction's operation, e.g. 0 for PLUS_EXPR,
1031   1 for MULT_EXPR, etc. into the reduction field.
1032   The reduction is specified in SLOT. The store information is
1033   passed in DATA.  */
1034 
1035 static int
1036 create_stores_for_reduction (void **slot, void *data)
1037 {
1038   struct reduction_info *const red = (struct reduction_info *) *slot;
1039   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1040   tree t;
1041   gimple stmt;
1042   gimple_stmt_iterator gsi;
1043   tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1044 
1045   gsi = gsi_last_bb (clsn_data->store_bb);
1046   t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1047   stmt = gimple_build_assign (t, red->initial_value);
1048   mark_virtual_ops_for_renaming (stmt);
1049   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1050 
1051   return 1;
1052 }
1053 
1054 /* Callback for htab_traverse.  Creates loads to a field of LOAD in LOAD_BB and
1055    store to a field of STORE in STORE_BB for the ssa name and its duplicate
1056    specified in SLOT.  */
1057 
1058 static int
1059 create_loads_and_stores_for_name (void **slot, void *data)
1060 {
1061   struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1062   struct clsn_data *const clsn_data = (struct clsn_data *) data;
1063   tree t;
1064   gimple stmt;
1065   gimple_stmt_iterator gsi;
1066   tree type = TREE_TYPE (elt->new_name);
1067   tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1068   tree load_struct;
1069 
1070   gsi = gsi_last_bb (clsn_data->store_bb);
1071   t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1072   stmt = gimple_build_assign (t, ssa_name (elt->version));
1073   mark_virtual_ops_for_renaming (stmt);
1074   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1075 
1076   gsi = gsi_last_bb (clsn_data->load_bb);
1077   load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1078   t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1079   stmt = gimple_build_assign (elt->new_name, t);
1080   SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1081   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1082 
1083   return 1;
1084 }
1085 
1086 /* Moves all the variables used in LOOP and defined outside of it (including
1087    the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1088    name) to a structure created for this purpose.  The code
1089 
1090    while (1)
1091      {
1092        use (a);
1093        use (b);
1094      }
1095 
1096    is transformed this way:
1097 
1098    bb0:
1099    old.a = a;
1100    old.b = b;
1101 
1102    bb1:
1103    a' = new->a;
1104    b' = new->b;
1105    while (1)
1106      {
1107        use (a');
1108        use (b');
1109      }
1110 
1111    `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT.  The
1112    pointer `new' is intentionally not initialized (the loop will be split to a
1113    separate function later, and `new' will be initialized from its arguments).
1114    LD_ST_DATA holds information about the shared data structure used to pass
1115    information among the threads.  It is initialized here, and
1116    gen_parallel_loop will pass it to create_call_for_reduction that
1117    needs this information.  REDUCTION_LIST describes the reductions
1118    in LOOP.  */
1119 
1120 static void
1121 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1122 			  tree *arg_struct, tree *new_arg_struct,
1123 			  struct clsn_data *ld_st_data)
1124 
1125 {
1126   basic_block bb1 = split_edge (entry);
1127   basic_block bb0 = single_pred (bb1);
1128   htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1129 				    name_to_copy_elt_eq, free);
1130   htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1131 				    free);
1132   unsigned i;
1133   tree type, type_name, nvar;
1134   gimple_stmt_iterator gsi;
1135   struct clsn_data clsn_data;
1136   VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1137   basic_block bb;
1138   basic_block entry_bb = bb1;
1139   basic_block exit_bb = exit->dest;
1140   bool has_debug_stmt = false;
1141 
1142   entry = single_succ_edge (entry_bb);
1143   gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1144 
1145   for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1146     {
1147       if (bb != entry_bb && bb != exit_bb)
1148 	{
1149 	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1150 	    separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1151 					   name_copies, decl_copies);
1152 
1153 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1154 	    {
1155 	      gimple stmt = gsi_stmt (gsi);
1156 
1157 	      if (is_gimple_debug (stmt))
1158 		has_debug_stmt = true;
1159 	      else
1160 		separate_decls_in_region_stmt (entry, exit, stmt,
1161 					       name_copies, decl_copies);
1162 	    }
1163 	}
1164     }
1165 
1166   /* Now process debug bind stmts.  We must not create decls while
1167      processing debug stmts, so we defer their processing so as to
1168      make sure we will have debug info for as many variables as
1169      possible (all of those that were dealt with in the loop above),
1170      and discard those for which we know there's nothing we can
1171      do.  */
1172   if (has_debug_stmt)
1173     for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1174       if (bb != entry_bb && bb != exit_bb)
1175 	{
1176 	  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1177 	    {
1178 	      gimple stmt = gsi_stmt (gsi);
1179 
1180 	      if (gimple_debug_bind_p (stmt))
1181 		{
1182 		  if (separate_decls_in_region_debug_bind (stmt,
1183 							   name_copies,
1184 							   decl_copies))
1185 		    {
1186 		      gsi_remove (&gsi, true);
1187 		      continue;
1188 		    }
1189 		}
1190 
1191 	      gsi_next (&gsi);
1192 	    }
1193 	}
1194 
1195   VEC_free (basic_block, heap, body);
1196 
1197   if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
1198     {
1199       /* It may happen that there is nothing to copy (if there are only
1200          loop carried and external variables in the loop).  */
1201       *arg_struct = NULL;
1202       *new_arg_struct = NULL;
1203     }
1204   else
1205     {
1206       /* Create the type for the structure to store the ssa names to.  */
1207       type = lang_hooks.types.make_type (RECORD_TYPE);
1208       type_name = build_decl (BUILTINS_LOCATION,
1209 			      TYPE_DECL, create_tmp_var_name (".paral_data"),
1210 			      type);
1211       TYPE_NAME (type) = type_name;
1212 
1213       htab_traverse (name_copies, add_field_for_name, type);
1214       if (reduction_list && htab_elements (reduction_list) > 0)
1215 	{
1216 	  /* Create the fields for reductions.  */
1217 	  htab_traverse (reduction_list, add_field_for_reduction,
1218                          type);
1219 	}
1220       layout_type (type);
1221 
1222       /* Create the loads and stores.  */
1223       *arg_struct = create_tmp_var (type, ".paral_data_store");
1224       add_referenced_var (*arg_struct);
1225       nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1226       add_referenced_var (nvar);
1227       *new_arg_struct = make_ssa_name (nvar, NULL);
1228 
1229       ld_st_data->store = *arg_struct;
1230       ld_st_data->load = *new_arg_struct;
1231       ld_st_data->store_bb = bb0;
1232       ld_st_data->load_bb = bb1;
1233 
1234       htab_traverse (name_copies, create_loads_and_stores_for_name,
1235 		     ld_st_data);
1236 
1237       /* Load the calculation from memory (after the join of the threads).  */
1238 
1239       if (reduction_list && htab_elements (reduction_list) > 0)
1240 	{
1241 	  htab_traverse (reduction_list, create_stores_for_reduction,
1242                         ld_st_data);
1243 	  clsn_data.load = make_ssa_name (nvar, NULL);
1244 	  clsn_data.load_bb = exit->dest;
1245 	  clsn_data.store = ld_st_data->store;
1246 	  create_final_loads_for_reduction (reduction_list, &clsn_data);
1247 	}
1248     }
1249 
1250   htab_delete (decl_copies);
1251   htab_delete (name_copies);
1252 }
1253 
1254 /* Bitmap containing uids of functions created by parallelization.  We cannot
1255    allocate it from the default obstack, as it must live across compilation
1256    of several functions; we make it gc allocated instead.  */
1257 
1258 static GTY(()) bitmap parallelized_functions;
1259 
1260 /* Returns true if FN was created by create_loop_fn.  */
1261 
1262 static bool
1263 parallelized_function_p (tree fn)
1264 {
1265   if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1266     return false;
1267 
1268   return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1269 }
1270 
1271 /* Creates and returns an empty function that will receive the body of
1272    a parallelized loop.  */
1273 
1274 static tree
1275 create_loop_fn (void)
1276 {
1277   char buf[100];
1278   char *tname;
1279   tree decl, type, name, t;
1280   struct function *act_cfun = cfun;
1281   static unsigned loopfn_num;
1282 
1283   snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1284   ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1285   clean_symbol_name (tname);
1286   name = get_identifier (tname);
1287   type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1288 
1289   decl = build_decl (BUILTINS_LOCATION,
1290 		     FUNCTION_DECL, name, type);
1291   if (!parallelized_functions)
1292     parallelized_functions = BITMAP_GGC_ALLOC ();
1293   bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1294 
1295   TREE_STATIC (decl) = 1;
1296   TREE_USED (decl) = 1;
1297   DECL_ARTIFICIAL (decl) = 1;
1298   DECL_IGNORED_P (decl) = 0;
1299   TREE_PUBLIC (decl) = 0;
1300   DECL_UNINLINABLE (decl) = 1;
1301   DECL_EXTERNAL (decl) = 0;
1302   DECL_CONTEXT (decl) = NULL_TREE;
1303   DECL_INITIAL (decl) = make_node (BLOCK);
1304 
1305   t = build_decl (BUILTINS_LOCATION,
1306 		  RESULT_DECL, NULL_TREE, void_type_node);
1307   DECL_ARTIFICIAL (t) = 1;
1308   DECL_IGNORED_P (t) = 1;
1309   DECL_RESULT (decl) = t;
1310 
1311   t = build_decl (BUILTINS_LOCATION,
1312 		  PARM_DECL, get_identifier (".paral_data_param"),
1313 		  ptr_type_node);
1314   DECL_ARTIFICIAL (t) = 1;
1315   DECL_ARG_TYPE (t) = ptr_type_node;
1316   DECL_CONTEXT (t) = decl;
1317   TREE_USED (t) = 1;
1318   DECL_ARGUMENTS (decl) = t;
1319 
1320   allocate_struct_function (decl, false);
1321 
1322   /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1323      it.  */
1324   set_cfun (act_cfun);
1325 
1326   return decl;
1327 }
1328 
1329 /* Moves the exit condition of LOOP to the beginning of its header, and
1330    duplicates the part of the last iteration that gets disabled to the
1331    exit of the loop.  NIT is the number of iterations of the loop
1332    (used to initialize the variables in the duplicated part).
1333 
1334    TODO: the common case is that latch of the loop is empty and immediately
1335    follows the loop exit.  In this case, it would be better not to copy the
1336    body of the loop, but only move the entry of the loop directly before the
1337    exit check and increase the number of iterations of the loop by one.
1338    This may need some additional preconditioning in case NIT = ~0.
1339    REDUCTION_LIST describes the reductions in LOOP.  */
1340 
1341 static void
1342 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1343 {
1344   basic_block *bbs, *nbbs, ex_bb, orig_header;
1345   unsigned n;
1346   bool ok;
1347   edge exit = single_dom_exit (loop), hpred;
1348   tree control, control_name, res, t;
1349   gimple phi, nphi, cond_stmt, stmt, cond_nit;
1350   gimple_stmt_iterator gsi;
1351   tree nit_1;
1352 
1353   split_block_after_labels (loop->header);
1354   orig_header = single_succ (loop->header);
1355   hpred = single_succ_edge (loop->header);
1356 
1357   cond_stmt = last_stmt (exit->src);
1358   control = gimple_cond_lhs (cond_stmt);
1359   gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1360 
1361   /* Make sure that we have phi nodes on exit for all loop header phis
1362      (create_parallel_loop requires that).  */
1363   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1364     {
1365       phi = gsi_stmt (gsi);
1366       res = PHI_RESULT (phi);
1367       t = make_ssa_name (SSA_NAME_VAR (res), phi);
1368       SET_PHI_RESULT (phi, t);
1369       nphi = create_phi_node (res, orig_header);
1370       SSA_NAME_DEF_STMT (res) = nphi;
1371       add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1372 
1373       if (res == control)
1374 	{
1375 	  gimple_cond_set_lhs (cond_stmt, t);
1376 	  update_stmt (cond_stmt);
1377 	  control = t;
1378 	}
1379     }
1380   bbs = get_loop_body_in_dom_order (loop);
1381 
1382   for (n = 0; bbs[n] != loop->latch; n++)
1383     continue;
1384   nbbs = XNEWVEC (basic_block, n);
1385   ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1386 				   bbs + 1, n, nbbs);
1387   gcc_assert (ok);
1388   free (bbs);
1389   ex_bb = nbbs[0];
1390   free (nbbs);
1391 
1392   /* Other than reductions, the only gimple reg that should be copied
1393      out of the loop is the control variable.  */
1394 
1395   control_name = NULL_TREE;
1396   for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1397     {
1398       phi = gsi_stmt (gsi);
1399       res = PHI_RESULT (phi);
1400       if (!is_gimple_reg (res))
1401 	{
1402 	  gsi_next (&gsi);
1403 	  continue;
1404 	}
1405 
1406       /* Check if it is a part of reduction.  If it is,
1407          keep the phi at the reduction's keep_res field.  The
1408          PHI_RESULT of this phi is the resulting value of the reduction
1409          variable when exiting the loop.  */
1410 
1411       exit = single_dom_exit (loop);
1412 
1413       if (htab_elements (reduction_list) > 0)
1414 	{
1415 	  struct reduction_info *red;
1416 
1417 	  tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1418 	  red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1419 	  if (red)
1420 	    {
1421 	      red->keep_res = phi;
1422 	      gsi_next (&gsi);
1423 	      continue;
1424 	    }
1425 	}
1426       gcc_assert (control_name == NULL_TREE
1427 		  && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1428       control_name = res;
1429       remove_phi_node (&gsi, false);
1430     }
1431   gcc_assert (control_name != NULL_TREE);
1432 
1433   /* Initialize the control variable to number of iterations
1434      according to the rhs of the exit condition.  */
1435   gsi = gsi_after_labels (ex_bb);
1436   cond_nit = last_stmt (exit->src);
1437   nit_1 =  gimple_cond_rhs (cond_nit);
1438   nit_1 = force_gimple_operand_gsi (&gsi,
1439 				  fold_convert (TREE_TYPE (control_name), nit_1),
1440 				  false, NULL_TREE, false, GSI_SAME_STMT);
1441   stmt = gimple_build_assign (control_name, nit_1);
1442   gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1443   SSA_NAME_DEF_STMT (control_name) = stmt;
1444 }
1445 
1446 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1447    LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1448    NEW_DATA is the variable that should be initialized from the argument
1449    of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
1450    basic block containing GIMPLE_OMP_PARALLEL tree.  */
1451 
1452 static basic_block
1453 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1454 		      tree new_data, unsigned n_threads)
1455 {
1456   gimple_stmt_iterator gsi;
1457   basic_block bb, paral_bb, for_bb, ex_bb;
1458   tree t, param;
1459   gimple stmt, for_stmt, phi, cond_stmt;
1460   tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1461   edge exit, nexit, guard, end, e;
1462 
1463   /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
1464   bb = loop_preheader_edge (loop)->src;
1465   paral_bb = single_pred (bb);
1466   gsi = gsi_last_bb (paral_bb);
1467 
1468   t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1469   OMP_CLAUSE_NUM_THREADS_EXPR (t)
1470     = build_int_cst (integer_type_node, n_threads);
1471   stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1472 
1473   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1474 
1475   /* Initialize NEW_DATA.  */
1476   if (data)
1477     {
1478       gsi = gsi_after_labels (bb);
1479 
1480       param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1481       stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1482       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1483       SSA_NAME_DEF_STMT (param) = stmt;
1484 
1485       stmt = gimple_build_assign (new_data,
1486 				  fold_convert (TREE_TYPE (new_data), param));
1487       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1488       SSA_NAME_DEF_STMT (new_data) = stmt;
1489     }
1490 
1491   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
1492   bb = split_loop_exit_edge (single_dom_exit (loop));
1493   gsi = gsi_last_bb (bb);
1494   gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1495 
1496   /* Extract data for GIMPLE_OMP_FOR.  */
1497   gcc_assert (loop->header == single_dom_exit (loop)->src);
1498   cond_stmt = last_stmt (loop->header);
1499 
1500   cvar = gimple_cond_lhs (cond_stmt);
1501   cvar_base = SSA_NAME_VAR (cvar);
1502   phi = SSA_NAME_DEF_STMT (cvar);
1503   cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1504   initvar = make_ssa_name (cvar_base, NULL);
1505   SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1506 	   initvar);
1507   cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1508 
1509   gsi = gsi_last_bb (loop->latch);
1510   gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1511   gsi_remove (&gsi, true);
1512 
1513   /* Prepare cfg.  */
1514   for_bb = split_edge (loop_preheader_edge (loop));
1515   ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1516   extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1517   gcc_assert (exit == single_dom_exit (loop));
1518 
1519   guard = make_edge (for_bb, ex_bb, 0);
1520   single_succ_edge (loop->latch)->flags = 0;
1521   end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1522   for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1523     {
1524       source_location locus;
1525       tree def;
1526       phi = gsi_stmt (gsi);
1527       stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1528 
1529       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1530       locus = gimple_phi_arg_location_from_edge (stmt,
1531 						 loop_preheader_edge (loop));
1532       add_phi_arg (phi, def, guard, locus);
1533 
1534       def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1535       locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1536       add_phi_arg (phi, def, end, locus);
1537     }
1538   e = redirect_edge_and_branch (exit, nexit->dest);
1539   PENDING_STMT (e) = NULL;
1540 
1541   /* Emit GIMPLE_OMP_FOR.  */
1542   gimple_cond_set_lhs (cond_stmt, cvar_base);
1543   type = TREE_TYPE (cvar);
1544   t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1545   OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1546 
1547   for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1548   gimple_omp_for_set_index (for_stmt, 0, initvar);
1549   gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1550   gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1551   gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1552   gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1553 						cvar_base,
1554 						build_int_cst (type, 1)));
1555 
1556   gsi = gsi_last_bb (for_bb);
1557   gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1558   SSA_NAME_DEF_STMT (initvar) = for_stmt;
1559 
1560   /* Emit GIMPLE_OMP_CONTINUE.  */
1561   gsi = gsi_last_bb (loop->latch);
1562   stmt = gimple_build_omp_continue (cvar_next, cvar);
1563   gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1564   SSA_NAME_DEF_STMT (cvar_next) = stmt;
1565 
1566   /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR.  */
1567   gsi = gsi_last_bb (ex_bb);
1568   gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1569 
1570   return paral_bb;
1571 }
1572 
1573 /* Generates code to execute the iterations of LOOP in N_THREADS
1574    threads in parallel.
1575 
1576    NITER describes number of iterations of LOOP.
1577    REDUCTION_LIST describes the reductions existent in the LOOP.  */
1578 
1579 static void
1580 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1581 		   unsigned n_threads, struct tree_niter_desc *niter)
1582 {
1583   loop_iterator li;
1584   tree many_iterations_cond, type, nit;
1585   tree arg_struct, new_arg_struct;
1586   gimple_seq stmts;
1587   basic_block parallel_head;
1588   edge entry, exit;
1589   struct clsn_data clsn_data;
1590   unsigned prob;
1591 
1592   /* From
1593 
1594      ---------------------------------------------------------------------
1595      loop
1596        {
1597 	 IV = phi (INIT, IV + STEP)
1598 	 BODY1;
1599 	 if (COND)
1600 	   break;
1601 	 BODY2;
1602        }
1603      ---------------------------------------------------------------------
1604 
1605      with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1606      we generate the following code:
1607 
1608      ---------------------------------------------------------------------
1609 
1610      if (MAY_BE_ZERO
1611      || NITER < MIN_PER_THREAD * N_THREADS)
1612      goto original;
1613 
1614      BODY1;
1615      store all local loop-invariant variables used in body of the loop to DATA.
1616      GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1617      load the variables from DATA.
1618      GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1619      BODY2;
1620      BODY1;
1621      GIMPLE_OMP_CONTINUE;
1622      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
1623      GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL
1624      goto end;
1625 
1626      original:
1627      loop
1628        {
1629 	 IV = phi (INIT, IV + STEP)
1630 	 BODY1;
1631 	 if (COND)
1632 	   break;
1633 	 BODY2;
1634        }
1635 
1636      end:
1637 
1638    */
1639 
1640   /* Create two versions of the loop -- in the old one, we know that the
1641      number of iterations is large enough, and we will transform it into the
1642      loop that will be split to loop_fn, the new one will be used for the
1643      remaining iterations.  */
1644 
1645   type = TREE_TYPE (niter->niter);
1646   nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1647 			      NULL_TREE);
1648   if (stmts)
1649     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1650 
1651   many_iterations_cond =
1652     fold_build2 (GE_EXPR, boolean_type_node,
1653 		 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1654   many_iterations_cond
1655     = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1656 		   invert_truthvalue (unshare_expr (niter->may_be_zero)),
1657 		   many_iterations_cond);
1658   many_iterations_cond
1659     = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1660   if (stmts)
1661     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1662   if (!is_gimple_condexpr (many_iterations_cond))
1663     {
1664       many_iterations_cond
1665 	= force_gimple_operand (many_iterations_cond, &stmts,
1666 				true, NULL_TREE);
1667       if (stmts)
1668 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1669     }
1670 
1671   initialize_original_copy_tables ();
1672 
1673   /* We assume that the loop usually iterates a lot.  */
1674   prob = 4 * REG_BR_PROB_BASE / 5;
1675   loop_version (loop, many_iterations_cond, NULL,
1676 		prob, prob, REG_BR_PROB_BASE - prob, true);
1677   update_ssa (TODO_update_ssa);
1678   free_original_copy_tables ();
1679 
1680   /* Base all the induction variables in LOOP on a single control one.  */
1681   canonicalize_loop_ivs (loop, &nit, true);
1682 
1683   /* Ensure that the exit condition is the first statement in the loop.  */
1684   transform_to_exit_first_loop (loop, reduction_list, nit);
1685 
1686   /* Generate initializations for reductions.  */
1687   if (htab_elements (reduction_list) > 0)
1688     htab_traverse (reduction_list, initialize_reductions, loop);
1689 
1690   /* Eliminate the references to local variables from the loop.  */
1691   gcc_assert (single_exit (loop));
1692   entry = loop_preheader_edge (loop);
1693   exit = single_dom_exit (loop);
1694 
1695   eliminate_local_variables (entry, exit);
1696   /* In the old loop, move all variables non-local to the loop to a structure
1697      and back, and create separate decls for the variables used in loop.  */
1698   separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1699 			    &new_arg_struct, &clsn_data);
1700 
1701   /* Create the parallel constructs.  */
1702   parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1703 					new_arg_struct, n_threads);
1704   if (htab_elements (reduction_list) > 0)
1705     create_call_for_reduction (loop, reduction_list, &clsn_data);
1706 
1707   scev_reset ();
1708 
1709   /* Cancel the loop (it is simpler to do it here rather than to teach the
1710      expander to do it).  */
1711   cancel_loop_tree (loop);
1712 
1713   /* Free loop bound estimations that could contain references to
1714      removed statements.  */
1715   FOR_EACH_LOOP (li, loop, 0)
1716     free_numbers_of_iterations_estimates_loop (loop);
1717 
1718   /* Expand the parallel constructs.  We do it directly here instead of running
1719      a separate expand_omp pass, since it is more efficient, and less likely to
1720      cause troubles with further analyses not being able to deal with the
1721      OMP trees.  */
1722 
1723   omp_expand_local (parallel_head);
1724 }
1725 
1726 /* Returns true when LOOP contains vector phi nodes.  */
1727 
1728 static bool
1729 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1730 {
1731   unsigned i;
1732   basic_block *bbs = get_loop_body_in_dom_order (loop);
1733   gimple_stmt_iterator gsi;
1734   bool res = true;
1735 
1736   for (i = 0; i < loop->num_nodes; i++)
1737     for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1738       if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1739 	goto end;
1740 
1741   res = false;
1742  end:
1743   free (bbs);
1744   return res;
1745 }
1746 
1747 /* Create a reduction_info struct, initialize it with REDUC_STMT
1748    and PHI, insert it to the REDUCTION_LIST.  */
1749 
1750 static void
1751 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1752 {
1753   PTR *slot;
1754   struct reduction_info *new_reduction;
1755 
1756   gcc_assert (reduc_stmt);
1757 
1758   if (dump_file && (dump_flags & TDF_DETAILS))
1759     {
1760       fprintf (dump_file,
1761 	       "Detected reduction. reduction stmt is: \n");
1762       print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1763       fprintf (dump_file, "\n");
1764     }
1765 
1766   new_reduction = XCNEW (struct reduction_info);
1767 
1768   new_reduction->reduc_stmt = reduc_stmt;
1769   new_reduction->reduc_phi = phi;
1770   new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1771   slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1772   *slot = new_reduction;
1773 }
1774 
1775 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST.  */
1776 
1777 static void
1778 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1779 {
1780   gimple_stmt_iterator gsi;
1781   loop_vec_info simple_loop_info;
1782 
1783   vect_dump = NULL;
1784   simple_loop_info = vect_analyze_loop_form (loop);
1785 
1786   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1787     {
1788       gimple phi = gsi_stmt (gsi);
1789       affine_iv iv;
1790       tree res = PHI_RESULT (phi);
1791       bool double_reduc;
1792 
1793       if (!is_gimple_reg (res))
1794 	continue;
1795 
1796       if (!simple_iv (loop, loop, res, &iv, true)
1797 	&& simple_loop_info)
1798 	{
1799            gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1800 	   if (reduc_stmt && !double_reduc)
1801               build_new_reduction (reduction_list, reduc_stmt, phi);
1802         }
1803     }
1804     destroy_loop_vec_info (simple_loop_info, true);
1805 }
1806 
1807 /* Try to initialize NITER for code generation part.  */
1808 
1809 static bool
1810 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1811 {
1812   edge exit = single_dom_exit (loop);
1813 
1814   gcc_assert (exit);
1815 
1816   /* We need to know # of iterations, and there should be no uses of values
1817      defined inside loop outside of it, unless the values are invariants of
1818      the loop.  */
1819   if (!number_of_iterations_exit (loop, exit, niter, false))
1820     {
1821       if (dump_file && (dump_flags & TDF_DETAILS))
1822 	fprintf (dump_file, "  FAILED: number of iterations not known\n");
1823       return false;
1824     }
1825 
1826   return true;
1827 }
1828 
1829 /* Try to initialize REDUCTION_LIST for code generation part.
1830    REDUCTION_LIST describes the reductions.  */
1831 
1832 static bool
1833 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1834 {
1835   edge exit = single_dom_exit (loop);
1836   gimple_stmt_iterator gsi;
1837 
1838   gcc_assert (exit);
1839 
1840   gather_scalar_reductions (loop, reduction_list);
1841 
1842 
1843   for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1844     {
1845       gimple phi = gsi_stmt (gsi);
1846       struct reduction_info *red;
1847       imm_use_iterator imm_iter;
1848       use_operand_p use_p;
1849       gimple reduc_phi;
1850       tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1851 
1852       if (is_gimple_reg (val))
1853 	{
1854 	  if (dump_file && (dump_flags & TDF_DETAILS))
1855 	    {
1856 	      fprintf (dump_file, "phi is ");
1857 	      print_gimple_stmt (dump_file, phi, 0, 0);
1858 	      fprintf (dump_file, "arg of phi to exit:   value ");
1859 	      print_generic_expr (dump_file, val, 0);
1860 	      fprintf (dump_file, " used outside loop\n");
1861 	      fprintf (dump_file,
1862 		       "  checking if it a part of reduction pattern:  \n");
1863 	    }
1864 	  if (htab_elements (reduction_list) == 0)
1865 	    {
1866 	      if (dump_file && (dump_flags & TDF_DETAILS))
1867 		fprintf (dump_file,
1868 			 "  FAILED: it is not a part of reduction.\n");
1869 	      return false;
1870 	    }
1871 	  reduc_phi = NULL;
1872 	  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1873 	    {
1874 	      if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1875 		{
1876 		  reduc_phi = USE_STMT (use_p);
1877 		  break;
1878 		}
1879 	    }
1880 	  red = reduction_phi (reduction_list, reduc_phi);
1881 	  if (red == NULL)
1882 	    {
1883 	      if (dump_file && (dump_flags & TDF_DETAILS))
1884 		fprintf (dump_file,
1885 			 "  FAILED: it is not a part of reduction.\n");
1886 	      return false;
1887 	    }
1888 	  if (dump_file && (dump_flags & TDF_DETAILS))
1889 	    {
1890 	      fprintf (dump_file, "reduction phi is  ");
1891 	      print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1892 	      fprintf (dump_file, "reduction stmt is  ");
1893 	      print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1894 	    }
1895 	}
1896     }
1897 
1898   /* The iterations of the loop may communicate only through bivs whose
1899      iteration space can be distributed efficiently.  */
1900   for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1901     {
1902       gimple phi = gsi_stmt (gsi);
1903       tree def = PHI_RESULT (phi);
1904       affine_iv iv;
1905 
1906       if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1907 	{
1908 	  struct reduction_info *red;
1909 
1910 	  red = reduction_phi (reduction_list, phi);
1911 	  if (red == NULL)
1912 	    {
1913 	      if (dump_file && (dump_flags & TDF_DETAILS))
1914 		fprintf (dump_file,
1915 			 "  FAILED: scalar dependency between iterations\n");
1916 	      return false;
1917 	    }
1918 	}
1919     }
1920 
1921 
1922   return true;
1923 }
1924 
1925 /* Detect parallel loops and generate parallel code using libgomp
1926    primitives.  Returns true if some loop was parallelized, false
1927    otherwise.  */
1928 
1929 bool
1930 parallelize_loops (void)
1931 {
1932   unsigned n_threads = flag_tree_parallelize_loops;
1933   bool changed = false;
1934   struct loop *loop;
1935   struct tree_niter_desc niter_desc;
1936   loop_iterator li;
1937   htab_t reduction_list;
1938   HOST_WIDE_INT estimated;
1939   LOC loop_loc;
1940 
1941   /* Do not parallelize loops in the functions created by parallelization.  */
1942   if (parallelized_function_p (cfun->decl))
1943     return false;
1944   if (cfun->has_nonlocal_label)
1945     return false;
1946 
1947   reduction_list = htab_create (10, reduction_info_hash,
1948 				     reduction_info_eq, free);
1949   init_stmt_vec_info_vec ();
1950 
1951   FOR_EACH_LOOP (li, loop, 0)
1952     {
1953       htab_empty (reduction_list);
1954       if (dump_file && (dump_flags & TDF_DETAILS))
1955       {
1956         fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
1957 	if (loop->inner)
1958 	  fprintf (dump_file, "loop %d is not innermost\n",loop->num);
1959 	else
1960 	  fprintf (dump_file, "loop %d is innermost\n",loop->num);
1961       }
1962 
1963       /* If we use autopar in graphite pass, we use its marked dependency
1964       checking results.  */
1965       if (flag_loop_parallelize_all && !loop->can_be_parallel)
1966       {
1967         if (dump_file && (dump_flags & TDF_DETAILS))
1968 	   fprintf (dump_file, "loop is not parallel according to graphite\n");
1969 	continue;
1970       }
1971 
1972       if (!single_dom_exit (loop))
1973       {
1974 
1975         if (dump_file && (dump_flags & TDF_DETAILS))
1976 	  fprintf (dump_file, "loop is !single_dom_exit\n");
1977 
1978 	continue;
1979       }
1980 
1981       if (/* And of course, the loop must be parallelizable.  */
1982 	  !can_duplicate_loop_p (loop)
1983 	  || loop_has_blocks_with_irreducible_flag (loop)
1984 	  || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
1985 	  /* FIXME: the check for vector phi nodes could be removed.  */
1986 	  || loop_has_vector_phi_nodes (loop))
1987 	continue;
1988       estimated = estimated_loop_iterations_int (loop, false);
1989       /* FIXME: Bypass this check as graphite doesn't update the
1990       count and frequency correctly now.  */
1991       if (!flag_loop_parallelize_all
1992 	  && ((estimated !=-1
1993 	     && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
1994 	      /* Do not bother with loops in cold areas.  */
1995 	      || optimize_loop_nest_for_size_p (loop)))
1996 	continue;
1997 
1998       if (!try_get_loop_niter (loop, &niter_desc))
1999 	continue;
2000 
2001       if (!try_create_reduction_list (loop, reduction_list))
2002 	continue;
2003 
2004       if (!flag_loop_parallelize_all && !loop_parallel_p (loop))
2005 	continue;
2006 
2007       changed = true;
2008       if (dump_file && (dump_flags & TDF_DETAILS))
2009       {
2010 	if (loop->inner)
2011 	  fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2012 	else
2013 	  fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2014 	loop_loc = find_loop_location (loop);
2015 	if (loop_loc != UNKNOWN_LOC)
2016 	  fprintf (dump_file, "\nloop at %s:%d: ",
2017 		   LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2018       }
2019       gen_parallel_loop (loop, reduction_list,
2020 			 n_threads, &niter_desc);
2021       verify_flow_info ();
2022       verify_dominators (CDI_DOMINATORS);
2023       verify_loop_structure ();
2024       verify_loop_closed_ssa ();
2025     }
2026 
2027   free_stmt_vec_info_vec ();
2028   htab_delete (reduction_list);
2029 
2030   /* Parallelization will cause new function calls to be inserted through
2031      which local variables will escape.  Reset the points-to solutions
2032      for ESCAPED and CALLUSED.  */
2033   if (changed)
2034     {
2035       pt_solution_reset (&cfun->gimple_df->escaped);
2036       pt_solution_reset (&cfun->gimple_df->callused);
2037     }
2038 
2039   return changed;
2040 }
2041 
2042 #include "gt-tree-parloops.h"
2043