1*e4b17023SJohn Marino /* Matrix layout transformations.
2*e4b17023SJohn Marino Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3*e4b17023SJohn Marino Contributed by Razya Ladelsky <razya@il.ibm.com>
4*e4b17023SJohn Marino Originally written by Revital Eres and Mustafa Hagog.
5*e4b17023SJohn Marino
6*e4b17023SJohn Marino This file is part of GCC.
7*e4b17023SJohn Marino
8*e4b17023SJohn Marino GCC is free software; you can redistribute it and/or modify it under
9*e4b17023SJohn Marino the terms of the GNU General Public License as published by the Free
10*e4b17023SJohn Marino Software Foundation; either version 3, or (at your option) any later
11*e4b17023SJohn Marino version.
12*e4b17023SJohn Marino
13*e4b17023SJohn Marino GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14*e4b17023SJohn Marino WARRANTY; without even the implied warranty of MERCHANTABILITY or
15*e4b17023SJohn Marino FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16*e4b17023SJohn Marino for more details.
17*e4b17023SJohn Marino
18*e4b17023SJohn Marino You should have received a copy of the GNU General Public License
19*e4b17023SJohn Marino along with GCC; see the file COPYING3. If not see
20*e4b17023SJohn Marino <http://www.gnu.org/licenses/>. */
21*e4b17023SJohn Marino
22*e4b17023SJohn Marino /*
23*e4b17023SJohn Marino Matrix flattening optimization tries to replace a N-dimensional
24*e4b17023SJohn Marino matrix with its equivalent M-dimensional matrix, where M < N.
25*e4b17023SJohn Marino This first implementation focuses on global matrices defined dynamically.
26*e4b17023SJohn Marino
27*e4b17023SJohn Marino When N==1, we actually flatten the whole matrix.
28*e4b17023SJohn Marino For instance consider a two-dimensional array a [dim1] [dim2].
29*e4b17023SJohn Marino The code for allocating space for it usually looks like:
30*e4b17023SJohn Marino
31*e4b17023SJohn Marino a = (int **) malloc(dim1 * sizeof(int *));
32*e4b17023SJohn Marino for (i=0; i<dim1; i++)
33*e4b17023SJohn Marino a[i] = (int *) malloc (dim2 * sizeof(int));
34*e4b17023SJohn Marino
35*e4b17023SJohn Marino If the array "a" is found suitable for this optimization,
36*e4b17023SJohn Marino its allocation is replaced by:
37*e4b17023SJohn Marino
38*e4b17023SJohn Marino a = (int *) malloc (dim1 * dim2 *sizeof(int));
39*e4b17023SJohn Marino
40*e4b17023SJohn Marino and all the references to a[i][j] are replaced by a[i * dim2 + j].
41*e4b17023SJohn Marino
42*e4b17023SJohn Marino The two main phases of the optimization are the analysis
43*e4b17023SJohn Marino and transformation.
44*e4b17023SJohn Marino The driver of the optimization is matrix_reorg ().
45*e4b17023SJohn Marino
46*e4b17023SJohn Marino
47*e4b17023SJohn Marino
48*e4b17023SJohn Marino Analysis phase:
49*e4b17023SJohn Marino ===============
50*e4b17023SJohn Marino
51*e4b17023SJohn Marino We'll number the dimensions outside-in, meaning the most external
52*e4b17023SJohn Marino is 0, then 1, and so on.
53*e4b17023SJohn Marino The analysis part of the optimization determines K, the escape
54*e4b17023SJohn Marino level of a N-dimensional matrix (K <= N), that allows flattening of
55*e4b17023SJohn Marino the external dimensions 0,1,..., K-1. Escape level 0 means that the
56*e4b17023SJohn Marino whole matrix escapes and no flattening is possible.
57*e4b17023SJohn Marino
58*e4b17023SJohn Marino The analysis part is implemented in analyze_matrix_allocation_site()
59*e4b17023SJohn Marino and analyze_matrix_accesses().
60*e4b17023SJohn Marino
61*e4b17023SJohn Marino Transformation phase:
62*e4b17023SJohn Marino =====================
63*e4b17023SJohn Marino In this phase we define the new flattened matrices that replace the
64*e4b17023SJohn Marino original matrices in the code.
65*e4b17023SJohn Marino Implemented in transform_allocation_sites(),
66*e4b17023SJohn Marino transform_access_sites().
67*e4b17023SJohn Marino
68*e4b17023SJohn Marino Matrix Transposing
69*e4b17023SJohn Marino ==================
70*e4b17023SJohn Marino The idea of Matrix Transposing is organizing the matrix in a different
71*e4b17023SJohn Marino layout such that the dimensions are reordered.
72*e4b17023SJohn Marino This could produce better cache behavior in some cases.
73*e4b17023SJohn Marino
74*e4b17023SJohn Marino For example, lets look at the matrix accesses in the following loop:
75*e4b17023SJohn Marino
76*e4b17023SJohn Marino for (i=0; i<N; i++)
77*e4b17023SJohn Marino for (j=0; j<M; j++)
78*e4b17023SJohn Marino access to a[i][j]
79*e4b17023SJohn Marino
80*e4b17023SJohn Marino This loop can produce good cache behavior because the elements of
81*e4b17023SJohn Marino the inner dimension are accessed sequentially.
82*e4b17023SJohn Marino
83*e4b17023SJohn Marino However, if the accesses of the matrix were of the following form:
84*e4b17023SJohn Marino
85*e4b17023SJohn Marino for (i=0; i<N; i++)
86*e4b17023SJohn Marino for (j=0; j<M; j++)
87*e4b17023SJohn Marino access to a[j][i]
88*e4b17023SJohn Marino
89*e4b17023SJohn Marino In this loop we iterate the columns and not the rows.
90*e4b17023SJohn Marino Therefore, replacing the rows and columns
91*e4b17023SJohn Marino would have had an organization with better (cache) locality.
92*e4b17023SJohn Marino Replacing the dimensions of the matrix is called matrix transposing.
93*e4b17023SJohn Marino
94*e4b17023SJohn Marino This example, of course, could be enhanced to multiple dimensions matrices
95*e4b17023SJohn Marino as well.
96*e4b17023SJohn Marino
97*e4b17023SJohn Marino Since a program could include all kind of accesses, there is a decision
98*e4b17023SJohn Marino mechanism, implemented in analyze_transpose(), which implements a
99*e4b17023SJohn Marino heuristic that tries to determine whether to transpose the matrix or not,
100*e4b17023SJohn Marino according to the form of the more dominant accesses.
101*e4b17023SJohn Marino This decision is transferred to the flattening mechanism, and whether
102*e4b17023SJohn Marino the matrix was transposed or not, the matrix is flattened (if possible).
103*e4b17023SJohn Marino
104*e4b17023SJohn Marino This decision making is based on profiling information and loop information.
105*e4b17023SJohn Marino If profiling information is available, decision making mechanism will be
106*e4b17023SJohn Marino operated, otherwise the matrix will only be flattened (if possible).
107*e4b17023SJohn Marino
108*e4b17023SJohn Marino Both optimizations are described in the paper "Matrix flattening and
109*e4b17023SJohn Marino transposing in GCC" which was presented in GCC summit 2006.
110*e4b17023SJohn Marino http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf. */
111*e4b17023SJohn Marino
112*e4b17023SJohn Marino #include "config.h"
113*e4b17023SJohn Marino #include "system.h"
114*e4b17023SJohn Marino #include "coretypes.h"
115*e4b17023SJohn Marino #include "tm.h"
116*e4b17023SJohn Marino #include "tree.h"
117*e4b17023SJohn Marino #include "rtl.h"
118*e4b17023SJohn Marino #include "tree-inline.h"
119*e4b17023SJohn Marino #include "tree-flow.h"
120*e4b17023SJohn Marino #include "tree-flow-inline.h"
121*e4b17023SJohn Marino #include "langhooks.h"
122*e4b17023SJohn Marino #include "hashtab.h"
123*e4b17023SJohn Marino #include "flags.h"
124*e4b17023SJohn Marino #include "ggc.h"
125*e4b17023SJohn Marino #include "debug.h"
126*e4b17023SJohn Marino #include "target.h"
127*e4b17023SJohn Marino #include "cgraph.h"
128*e4b17023SJohn Marino #include "diagnostic-core.h"
129*e4b17023SJohn Marino #include "timevar.h"
130*e4b17023SJohn Marino #include "params.h"
131*e4b17023SJohn Marino #include "fibheap.h"
132*e4b17023SJohn Marino #include "intl.h"
133*e4b17023SJohn Marino #include "function.h"
134*e4b17023SJohn Marino #include "basic-block.h"
135*e4b17023SJohn Marino #include "cfgloop.h"
136*e4b17023SJohn Marino #include "tree-iterator.h"
137*e4b17023SJohn Marino #include "tree-pass.h"
138*e4b17023SJohn Marino #include "opts.h"
139*e4b17023SJohn Marino #include "tree-data-ref.h"
140*e4b17023SJohn Marino #include "tree-chrec.h"
141*e4b17023SJohn Marino #include "tree-scalar-evolution.h"
142*e4b17023SJohn Marino #include "tree-ssa-sccvn.h"
143*e4b17023SJohn Marino
144*e4b17023SJohn Marino /* We need to collect a lot of data from the original malloc,
145*e4b17023SJohn Marino particularly as the gimplifier has converted:
146*e4b17023SJohn Marino
147*e4b17023SJohn Marino orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
148*e4b17023SJohn Marino
149*e4b17023SJohn Marino into
150*e4b17023SJohn Marino
151*e4b17023SJohn Marino T3 = <constant> ; ** <constant> is amount to malloc; precomputed **
152*e4b17023SJohn Marino T4 = malloc (T3);
153*e4b17023SJohn Marino T5 = (struct_type *) T4;
154*e4b17023SJohn Marino orig_var = T5;
155*e4b17023SJohn Marino
156*e4b17023SJohn Marino The following struct fields allow us to collect all the necessary data from
157*e4b17023SJohn Marino the gimplified program. The comments in the struct below are all based
158*e4b17023SJohn Marino on the gimple example above. */
159*e4b17023SJohn Marino
160*e4b17023SJohn Marino struct malloc_call_data
161*e4b17023SJohn Marino {
162*e4b17023SJohn Marino gimple call_stmt; /* Tree for "T4 = malloc (T3);" */
163*e4b17023SJohn Marino tree size_var; /* Var decl for T3. */
164*e4b17023SJohn Marino tree malloc_size; /* Tree for "<constant>", the rhs assigned to T3. */
165*e4b17023SJohn Marino };
166*e4b17023SJohn Marino
167*e4b17023SJohn Marino static tree can_calculate_expr_before_stmt (tree, sbitmap);
168*e4b17023SJohn Marino static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
169*e4b17023SJohn Marino
170*e4b17023SJohn Marino /* The front end of the compiler, when parsing statements of the form:
171*e4b17023SJohn Marino
172*e4b17023SJohn Marino var = (type_cast) malloc (sizeof (type));
173*e4b17023SJohn Marino
174*e4b17023SJohn Marino always converts this single statement into the following statements
175*e4b17023SJohn Marino (GIMPLE form):
176*e4b17023SJohn Marino
177*e4b17023SJohn Marino T.1 = sizeof (type);
178*e4b17023SJohn Marino T.2 = malloc (T.1);
179*e4b17023SJohn Marino T.3 = (type_cast) T.2;
180*e4b17023SJohn Marino var = T.3;
181*e4b17023SJohn Marino
182*e4b17023SJohn Marino Since we need to create new malloc statements and modify the original
183*e4b17023SJohn Marino statements somewhat, we need to find all four of the above statements.
184*e4b17023SJohn Marino Currently record_call_1 (called for building cgraph edges) finds and
185*e4b17023SJohn Marino records the statements containing the actual call to malloc, but we
186*e4b17023SJohn Marino need to find the rest of the variables/statements on our own. That
187*e4b17023SJohn Marino is what the following function does. */
188*e4b17023SJohn Marino static void
collect_data_for_malloc_call(gimple stmt,struct malloc_call_data * m_data)189*e4b17023SJohn Marino collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
190*e4b17023SJohn Marino {
191*e4b17023SJohn Marino tree size_var = NULL;
192*e4b17023SJohn Marino tree malloc_fn_decl;
193*e4b17023SJohn Marino tree arg1;
194*e4b17023SJohn Marino
195*e4b17023SJohn Marino gcc_assert (is_gimple_call (stmt));
196*e4b17023SJohn Marino
197*e4b17023SJohn Marino malloc_fn_decl = gimple_call_fndecl (stmt);
198*e4b17023SJohn Marino if (malloc_fn_decl == NULL
199*e4b17023SJohn Marino || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200*e4b17023SJohn Marino return;
201*e4b17023SJohn Marino
202*e4b17023SJohn Marino arg1 = gimple_call_arg (stmt, 0);
203*e4b17023SJohn Marino size_var = arg1;
204*e4b17023SJohn Marino
205*e4b17023SJohn Marino m_data->call_stmt = stmt;
206*e4b17023SJohn Marino m_data->size_var = size_var;
207*e4b17023SJohn Marino if (TREE_CODE (size_var) != VAR_DECL)
208*e4b17023SJohn Marino m_data->malloc_size = size_var;
209*e4b17023SJohn Marino else
210*e4b17023SJohn Marino m_data->malloc_size = NULL_TREE;
211*e4b17023SJohn Marino }
212*e4b17023SJohn Marino
213*e4b17023SJohn Marino /* Information about matrix access site.
214*e4b17023SJohn Marino For example: if an access site of matrix arr is arr[i][j]
215*e4b17023SJohn Marino the ACCESS_SITE_INFO structure will have the address
216*e4b17023SJohn Marino of arr as its stmt. The INDEX_INFO will hold information about the
217*e4b17023SJohn Marino initial address and index of each dimension. */
218*e4b17023SJohn Marino struct access_site_info
219*e4b17023SJohn Marino {
220*e4b17023SJohn Marino /* The statement (MEM_REF or POINTER_PLUS_EXPR). */
221*e4b17023SJohn Marino gimple stmt;
222*e4b17023SJohn Marino
223*e4b17023SJohn Marino /* In case of POINTER_PLUS_EXPR, what is the offset. */
224*e4b17023SJohn Marino tree offset;
225*e4b17023SJohn Marino
226*e4b17023SJohn Marino /* The index which created the offset. */
227*e4b17023SJohn Marino tree index;
228*e4b17023SJohn Marino
229*e4b17023SJohn Marino /* The indirection level of this statement. */
230*e4b17023SJohn Marino int level;
231*e4b17023SJohn Marino
232*e4b17023SJohn Marino /* TRUE for allocation site FALSE for access site. */
233*e4b17023SJohn Marino bool is_alloc;
234*e4b17023SJohn Marino
235*e4b17023SJohn Marino /* The function containing the access site. */
236*e4b17023SJohn Marino tree function_decl;
237*e4b17023SJohn Marino
238*e4b17023SJohn Marino /* This access is iterated in the inner most loop */
239*e4b17023SJohn Marino bool iterated_by_inner_most_loop_p;
240*e4b17023SJohn Marino };
241*e4b17023SJohn Marino
242*e4b17023SJohn Marino typedef struct access_site_info *access_site_info_p;
243*e4b17023SJohn Marino DEF_VEC_P (access_site_info_p);
244*e4b17023SJohn Marino DEF_VEC_ALLOC_P (access_site_info_p, heap);
245*e4b17023SJohn Marino
246*e4b17023SJohn Marino /* Calls to free when flattening a matrix. */
247*e4b17023SJohn Marino
248*e4b17023SJohn Marino struct free_info
249*e4b17023SJohn Marino {
250*e4b17023SJohn Marino gimple stmt;
251*e4b17023SJohn Marino tree func;
252*e4b17023SJohn Marino };
253*e4b17023SJohn Marino
254*e4b17023SJohn Marino /* Information about matrix to flatten. */
255*e4b17023SJohn Marino struct matrix_info
256*e4b17023SJohn Marino {
257*e4b17023SJohn Marino /* Decl tree of this matrix. */
258*e4b17023SJohn Marino tree decl;
259*e4b17023SJohn Marino /* Number of dimensions; number
260*e4b17023SJohn Marino of "*" in the type declaration. */
261*e4b17023SJohn Marino int num_dims;
262*e4b17023SJohn Marino
263*e4b17023SJohn Marino /* Minimum indirection level that escapes, 0 means that
264*e4b17023SJohn Marino the whole matrix escapes, k means that dimensions
265*e4b17023SJohn Marino 0 to ACTUAL_DIM - k escapes. */
266*e4b17023SJohn Marino int min_indirect_level_escape;
267*e4b17023SJohn Marino
268*e4b17023SJohn Marino gimple min_indirect_level_escape_stmt;
269*e4b17023SJohn Marino
270*e4b17023SJohn Marino /* Hold the allocation site for each level (dimension).
271*e4b17023SJohn Marino We can use NUM_DIMS as the upper bound and allocate the array
272*e4b17023SJohn Marino once with this number of elements and no need to use realloc and
273*e4b17023SJohn Marino MAX_MALLOCED_LEVEL. */
274*e4b17023SJohn Marino gimple *malloc_for_level;
275*e4b17023SJohn Marino
276*e4b17023SJohn Marino int max_malloced_level;
277*e4b17023SJohn Marino
278*e4b17023SJohn Marino /* Is the matrix transposed. */
279*e4b17023SJohn Marino bool is_transposed_p;
280*e4b17023SJohn Marino
281*e4b17023SJohn Marino /* The location of the allocation sites (they must be in one
282*e4b17023SJohn Marino function). */
283*e4b17023SJohn Marino tree allocation_function_decl;
284*e4b17023SJohn Marino
285*e4b17023SJohn Marino /* The calls to free for each level of indirection. */
286*e4b17023SJohn Marino struct free_info *free_stmts;
287*e4b17023SJohn Marino
288*e4b17023SJohn Marino /* An array which holds for each dimension its size. where
289*e4b17023SJohn Marino dimension 0 is the outer most (one that contains all the others).
290*e4b17023SJohn Marino */
291*e4b17023SJohn Marino tree *dimension_size;
292*e4b17023SJohn Marino
293*e4b17023SJohn Marino /* An array which holds for each dimension it's original size
294*e4b17023SJohn Marino (before transposing and flattening take place). */
295*e4b17023SJohn Marino tree *dimension_size_orig;
296*e4b17023SJohn Marino
297*e4b17023SJohn Marino /* An array which holds for each dimension the size of the type of
298*e4b17023SJohn Marino of elements accessed in that level (in bytes). */
299*e4b17023SJohn Marino HOST_WIDE_INT *dimension_type_size;
300*e4b17023SJohn Marino
301*e4b17023SJohn Marino int dimension_type_size_len;
302*e4b17023SJohn Marino
303*e4b17023SJohn Marino /* An array collecting the count of accesses for each dimension. */
304*e4b17023SJohn Marino gcov_type *dim_hot_level;
305*e4b17023SJohn Marino
306*e4b17023SJohn Marino /* An array of the accesses to be flattened.
307*e4b17023SJohn Marino elements are of type "struct access_site_info *". */
308*e4b17023SJohn Marino VEC (access_site_info_p, heap) * access_l;
309*e4b17023SJohn Marino
310*e4b17023SJohn Marino /* A map of how the dimensions will be organized at the end of
311*e4b17023SJohn Marino the analyses. */
312*e4b17023SJohn Marino int *dim_map;
313*e4b17023SJohn Marino };
314*e4b17023SJohn Marino
315*e4b17023SJohn Marino /* In each phi node we want to record the indirection level we have when we
316*e4b17023SJohn Marino get to the phi node. Usually we will have phi nodes with more than two
317*e4b17023SJohn Marino arguments, then we must assure that all of them get to the phi node with
318*e4b17023SJohn Marino the same indirection level, otherwise it's not safe to do the flattening.
319*e4b17023SJohn Marino So we record the information regarding the indirection level each time we
320*e4b17023SJohn Marino get to the phi node in this hash table. */
321*e4b17023SJohn Marino
322*e4b17023SJohn Marino struct matrix_access_phi_node
323*e4b17023SJohn Marino {
324*e4b17023SJohn Marino gimple phi;
325*e4b17023SJohn Marino int indirection_level;
326*e4b17023SJohn Marino };
327*e4b17023SJohn Marino
328*e4b17023SJohn Marino /* We use this structure to find if the SSA variable is accessed inside the
329*e4b17023SJohn Marino tree and record the tree containing it. */
330*e4b17023SJohn Marino
331*e4b17023SJohn Marino struct ssa_acc_in_tree
332*e4b17023SJohn Marino {
333*e4b17023SJohn Marino /* The variable whose accesses in the tree we are looking for. */
334*e4b17023SJohn Marino tree ssa_var;
335*e4b17023SJohn Marino /* The tree and code inside it the ssa_var is accessed, currently
336*e4b17023SJohn Marino it could be an MEM_REF or CALL_EXPR. */
337*e4b17023SJohn Marino enum tree_code t_code;
338*e4b17023SJohn Marino tree t_tree;
339*e4b17023SJohn Marino /* The place in the containing tree. */
340*e4b17023SJohn Marino tree *tp;
341*e4b17023SJohn Marino tree second_op;
342*e4b17023SJohn Marino bool var_found;
343*e4b17023SJohn Marino };
344*e4b17023SJohn Marino
345*e4b17023SJohn Marino static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346*e4b17023SJohn Marino sbitmap, bool);
347*e4b17023SJohn Marino static int transform_allocation_sites (void **, void *);
348*e4b17023SJohn Marino static int transform_access_sites (void **, void *);
349*e4b17023SJohn Marino static int analyze_transpose (void **, void *);
350*e4b17023SJohn Marino static int dump_matrix_reorg_analysis (void **, void *);
351*e4b17023SJohn Marino
352*e4b17023SJohn Marino static bool check_transpose_p;
353*e4b17023SJohn Marino
354*e4b17023SJohn Marino /* Hash function used for the phi nodes. */
355*e4b17023SJohn Marino
356*e4b17023SJohn Marino static hashval_t
mat_acc_phi_hash(const void * p)357*e4b17023SJohn Marino mat_acc_phi_hash (const void *p)
358*e4b17023SJohn Marino {
359*e4b17023SJohn Marino const struct matrix_access_phi_node *const ma_phi =
360*e4b17023SJohn Marino (const struct matrix_access_phi_node *) p;
361*e4b17023SJohn Marino
362*e4b17023SJohn Marino return htab_hash_pointer (ma_phi->phi);
363*e4b17023SJohn Marino }
364*e4b17023SJohn Marino
365*e4b17023SJohn Marino /* Equality means phi node pointers are the same. */
366*e4b17023SJohn Marino
367*e4b17023SJohn Marino static int
mat_acc_phi_eq(const void * p1,const void * p2)368*e4b17023SJohn Marino mat_acc_phi_eq (const void *p1, const void *p2)
369*e4b17023SJohn Marino {
370*e4b17023SJohn Marino const struct matrix_access_phi_node *const phi1 =
371*e4b17023SJohn Marino (const struct matrix_access_phi_node *) p1;
372*e4b17023SJohn Marino const struct matrix_access_phi_node *const phi2 =
373*e4b17023SJohn Marino (const struct matrix_access_phi_node *) p2;
374*e4b17023SJohn Marino
375*e4b17023SJohn Marino if (phi1->phi == phi2->phi)
376*e4b17023SJohn Marino return 1;
377*e4b17023SJohn Marino
378*e4b17023SJohn Marino return 0;
379*e4b17023SJohn Marino }
380*e4b17023SJohn Marino
381*e4b17023SJohn Marino /* Hold the PHI nodes we visit during the traversal for escaping
382*e4b17023SJohn Marino analysis. */
383*e4b17023SJohn Marino static htab_t htab_mat_acc_phi_nodes = NULL;
384*e4b17023SJohn Marino
385*e4b17023SJohn Marino /* This hash-table holds the information about the matrices we are
386*e4b17023SJohn Marino going to handle. */
387*e4b17023SJohn Marino static htab_t matrices_to_reorg = NULL;
388*e4b17023SJohn Marino
389*e4b17023SJohn Marino /* Return a hash for MTT, which is really a "matrix_info *". */
390*e4b17023SJohn Marino static hashval_t
mtt_info_hash(const void * mtt)391*e4b17023SJohn Marino mtt_info_hash (const void *mtt)
392*e4b17023SJohn Marino {
393*e4b17023SJohn Marino return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
394*e4b17023SJohn Marino }
395*e4b17023SJohn Marino
396*e4b17023SJohn Marino /* Return true if MTT1 and MTT2 (which are really both of type
397*e4b17023SJohn Marino "matrix_info *") refer to the same decl. */
398*e4b17023SJohn Marino static int
mtt_info_eq(const void * mtt1,const void * mtt2)399*e4b17023SJohn Marino mtt_info_eq (const void *mtt1, const void *mtt2)
400*e4b17023SJohn Marino {
401*e4b17023SJohn Marino const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402*e4b17023SJohn Marino const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
403*e4b17023SJohn Marino
404*e4b17023SJohn Marino if (i1->decl == i2->decl)
405*e4b17023SJohn Marino return true;
406*e4b17023SJohn Marino
407*e4b17023SJohn Marino return false;
408*e4b17023SJohn Marino }
409*e4b17023SJohn Marino
410*e4b17023SJohn Marino /* Return false if STMT may contain a vector expression.
411*e4b17023SJohn Marino In this situation, all matrices should not be flattened. */
412*e4b17023SJohn Marino static bool
may_flatten_matrices_1(gimple stmt)413*e4b17023SJohn Marino may_flatten_matrices_1 (gimple stmt)
414*e4b17023SJohn Marino {
415*e4b17023SJohn Marino switch (gimple_code (stmt))
416*e4b17023SJohn Marino {
417*e4b17023SJohn Marino case GIMPLE_ASSIGN:
418*e4b17023SJohn Marino case GIMPLE_CALL:
419*e4b17023SJohn Marino if (!gimple_has_lhs (stmt))
420*e4b17023SJohn Marino return true;
421*e4b17023SJohn Marino if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
422*e4b17023SJohn Marino {
423*e4b17023SJohn Marino if (dump_file)
424*e4b17023SJohn Marino fprintf (dump_file,
425*e4b17023SJohn Marino "Found vector type, don't flatten matrix\n");
426*e4b17023SJohn Marino return false;
427*e4b17023SJohn Marino }
428*e4b17023SJohn Marino break;
429*e4b17023SJohn Marino case GIMPLE_ASM:
430*e4b17023SJohn Marino /* Asm code could contain vector operations. */
431*e4b17023SJohn Marino return false;
432*e4b17023SJohn Marino break;
433*e4b17023SJohn Marino default:
434*e4b17023SJohn Marino break;
435*e4b17023SJohn Marino }
436*e4b17023SJohn Marino return true;
437*e4b17023SJohn Marino }
438*e4b17023SJohn Marino
439*e4b17023SJohn Marino /* Return false if there are hand-written vectors in the program.
440*e4b17023SJohn Marino We disable the flattening in such a case. */
441*e4b17023SJohn Marino static bool
may_flatten_matrices(struct cgraph_node * node)442*e4b17023SJohn Marino may_flatten_matrices (struct cgraph_node *node)
443*e4b17023SJohn Marino {
444*e4b17023SJohn Marino tree decl;
445*e4b17023SJohn Marino struct function *func;
446*e4b17023SJohn Marino basic_block bb;
447*e4b17023SJohn Marino gimple_stmt_iterator gsi;
448*e4b17023SJohn Marino
449*e4b17023SJohn Marino decl = node->decl;
450*e4b17023SJohn Marino if (node->analyzed)
451*e4b17023SJohn Marino {
452*e4b17023SJohn Marino func = DECL_STRUCT_FUNCTION (decl);
453*e4b17023SJohn Marino FOR_EACH_BB_FN (bb, func)
454*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
455*e4b17023SJohn Marino if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
456*e4b17023SJohn Marino return false;
457*e4b17023SJohn Marino }
458*e4b17023SJohn Marino return true;
459*e4b17023SJohn Marino }
460*e4b17023SJohn Marino
461*e4b17023SJohn Marino /* Given a VAR_DECL, check its type to determine whether it is
462*e4b17023SJohn Marino a definition of a dynamic allocated matrix and therefore is
463*e4b17023SJohn Marino a suitable candidate for the matrix flattening optimization.
464*e4b17023SJohn Marino Return NULL if VAR_DECL is not such decl. Otherwise, allocate
465*e4b17023SJohn Marino a MATRIX_INFO structure, fill it with the relevant information
466*e4b17023SJohn Marino and return a pointer to it.
467*e4b17023SJohn Marino TODO: handle also statically defined arrays. */
468*e4b17023SJohn Marino static struct matrix_info *
analyze_matrix_decl(tree var_decl)469*e4b17023SJohn Marino analyze_matrix_decl (tree var_decl)
470*e4b17023SJohn Marino {
471*e4b17023SJohn Marino struct matrix_info *m_node, tmpmi, *mi;
472*e4b17023SJohn Marino tree var_type;
473*e4b17023SJohn Marino int dim_num = 0;
474*e4b17023SJohn Marino
475*e4b17023SJohn Marino gcc_assert (matrices_to_reorg);
476*e4b17023SJohn Marino
477*e4b17023SJohn Marino if (TREE_CODE (var_decl) == PARM_DECL)
478*e4b17023SJohn Marino var_type = DECL_ARG_TYPE (var_decl);
479*e4b17023SJohn Marino else if (TREE_CODE (var_decl) == VAR_DECL)
480*e4b17023SJohn Marino var_type = TREE_TYPE (var_decl);
481*e4b17023SJohn Marino else
482*e4b17023SJohn Marino return NULL;
483*e4b17023SJohn Marino
484*e4b17023SJohn Marino if (!POINTER_TYPE_P (var_type))
485*e4b17023SJohn Marino return NULL;
486*e4b17023SJohn Marino
487*e4b17023SJohn Marino while (POINTER_TYPE_P (var_type))
488*e4b17023SJohn Marino {
489*e4b17023SJohn Marino var_type = TREE_TYPE (var_type);
490*e4b17023SJohn Marino dim_num++;
491*e4b17023SJohn Marino }
492*e4b17023SJohn Marino
493*e4b17023SJohn Marino if (dim_num <= 1)
494*e4b17023SJohn Marino return NULL;
495*e4b17023SJohn Marino
496*e4b17023SJohn Marino if (!COMPLETE_TYPE_P (var_type)
497*e4b17023SJohn Marino || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
498*e4b17023SJohn Marino return NULL;
499*e4b17023SJohn Marino
500*e4b17023SJohn Marino /* Check to see if this pointer is already in there. */
501*e4b17023SJohn Marino tmpmi.decl = var_decl;
502*e4b17023SJohn Marino mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
503*e4b17023SJohn Marino
504*e4b17023SJohn Marino if (mi)
505*e4b17023SJohn Marino return NULL;
506*e4b17023SJohn Marino
507*e4b17023SJohn Marino /* Record the matrix. */
508*e4b17023SJohn Marino
509*e4b17023SJohn Marino m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
510*e4b17023SJohn Marino m_node->decl = var_decl;
511*e4b17023SJohn Marino m_node->num_dims = dim_num;
512*e4b17023SJohn Marino m_node->free_stmts
513*e4b17023SJohn Marino = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
514*e4b17023SJohn Marino
515*e4b17023SJohn Marino /* Init min_indirect_level_escape to -1 to indicate that no escape
516*e4b17023SJohn Marino analysis has been done yet. */
517*e4b17023SJohn Marino m_node->min_indirect_level_escape = -1;
518*e4b17023SJohn Marino m_node->is_transposed_p = false;
519*e4b17023SJohn Marino
520*e4b17023SJohn Marino return m_node;
521*e4b17023SJohn Marino }
522*e4b17023SJohn Marino
523*e4b17023SJohn Marino /* Free matrix E. */
524*e4b17023SJohn Marino static void
mat_free(void * e)525*e4b17023SJohn Marino mat_free (void *e)
526*e4b17023SJohn Marino {
527*e4b17023SJohn Marino struct matrix_info *mat = (struct matrix_info *) e;
528*e4b17023SJohn Marino
529*e4b17023SJohn Marino if (!mat)
530*e4b17023SJohn Marino return;
531*e4b17023SJohn Marino
532*e4b17023SJohn Marino free (mat->free_stmts);
533*e4b17023SJohn Marino free (mat->dim_hot_level);
534*e4b17023SJohn Marino free (mat->malloc_for_level);
535*e4b17023SJohn Marino }
536*e4b17023SJohn Marino
537*e4b17023SJohn Marino /* Find all potential matrices.
538*e4b17023SJohn Marino TODO: currently we handle only multidimensional
539*e4b17023SJohn Marino dynamically allocated arrays. */
540*e4b17023SJohn Marino static void
find_matrices_decl(void)541*e4b17023SJohn Marino find_matrices_decl (void)
542*e4b17023SJohn Marino {
543*e4b17023SJohn Marino struct matrix_info *tmp;
544*e4b17023SJohn Marino PTR *slot;
545*e4b17023SJohn Marino struct varpool_node *vnode;
546*e4b17023SJohn Marino
547*e4b17023SJohn Marino gcc_assert (matrices_to_reorg);
548*e4b17023SJohn Marino
549*e4b17023SJohn Marino /* For every global variable in the program:
550*e4b17023SJohn Marino Check to see if it's of a candidate type and record it. */
551*e4b17023SJohn Marino for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
552*e4b17023SJohn Marino {
553*e4b17023SJohn Marino tree var_decl = vnode->decl;
554*e4b17023SJohn Marino
555*e4b17023SJohn Marino if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
556*e4b17023SJohn Marino continue;
557*e4b17023SJohn Marino
558*e4b17023SJohn Marino if (matrices_to_reorg)
559*e4b17023SJohn Marino if ((tmp = analyze_matrix_decl (var_decl)))
560*e4b17023SJohn Marino {
561*e4b17023SJohn Marino if (!TREE_ADDRESSABLE (var_decl))
562*e4b17023SJohn Marino {
563*e4b17023SJohn Marino slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
564*e4b17023SJohn Marino *slot = tmp;
565*e4b17023SJohn Marino }
566*e4b17023SJohn Marino }
567*e4b17023SJohn Marino }
568*e4b17023SJohn Marino return;
569*e4b17023SJohn Marino }
570*e4b17023SJohn Marino
571*e4b17023SJohn Marino /* Mark that the matrix MI escapes at level L. */
572*e4b17023SJohn Marino static void
mark_min_matrix_escape_level(struct matrix_info * mi,int l,gimple s)573*e4b17023SJohn Marino mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
574*e4b17023SJohn Marino {
575*e4b17023SJohn Marino if (mi->min_indirect_level_escape == -1
576*e4b17023SJohn Marino || (mi->min_indirect_level_escape > l))
577*e4b17023SJohn Marino {
578*e4b17023SJohn Marino mi->min_indirect_level_escape = l;
579*e4b17023SJohn Marino mi->min_indirect_level_escape_stmt = s;
580*e4b17023SJohn Marino }
581*e4b17023SJohn Marino }
582*e4b17023SJohn Marino
583*e4b17023SJohn Marino /* Find if the SSA variable is accessed inside the
584*e4b17023SJohn Marino tree and record the tree containing it.
585*e4b17023SJohn Marino The only relevant uses are the case of SSA_NAME, or SSA inside
586*e4b17023SJohn Marino MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR. */
587*e4b17023SJohn Marino static void
ssa_accessed_in_tree(tree t,struct ssa_acc_in_tree * a)588*e4b17023SJohn Marino ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
589*e4b17023SJohn Marino {
590*e4b17023SJohn Marino a->t_code = TREE_CODE (t);
591*e4b17023SJohn Marino switch (a->t_code)
592*e4b17023SJohn Marino {
593*e4b17023SJohn Marino case SSA_NAME:
594*e4b17023SJohn Marino if (t == a->ssa_var)
595*e4b17023SJohn Marino a->var_found = true;
596*e4b17023SJohn Marino break;
597*e4b17023SJohn Marino case MEM_REF:
598*e4b17023SJohn Marino if (SSA_VAR_P (TREE_OPERAND (t, 0))
599*e4b17023SJohn Marino && TREE_OPERAND (t, 0) == a->ssa_var)
600*e4b17023SJohn Marino a->var_found = true;
601*e4b17023SJohn Marino break;
602*e4b17023SJohn Marino default:
603*e4b17023SJohn Marino break;
604*e4b17023SJohn Marino }
605*e4b17023SJohn Marino }
606*e4b17023SJohn Marino
607*e4b17023SJohn Marino /* Find if the SSA variable is accessed on the right hand side of
608*e4b17023SJohn Marino gimple call STMT. */
609*e4b17023SJohn Marino
610*e4b17023SJohn Marino static void
ssa_accessed_in_call_rhs(gimple stmt,struct ssa_acc_in_tree * a)611*e4b17023SJohn Marino ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
612*e4b17023SJohn Marino {
613*e4b17023SJohn Marino tree decl;
614*e4b17023SJohn Marino tree arg;
615*e4b17023SJohn Marino size_t i;
616*e4b17023SJohn Marino
617*e4b17023SJohn Marino a->t_code = CALL_EXPR;
618*e4b17023SJohn Marino for (i = 0; i < gimple_call_num_args (stmt); i++)
619*e4b17023SJohn Marino {
620*e4b17023SJohn Marino arg = gimple_call_arg (stmt, i);
621*e4b17023SJohn Marino if (arg == a->ssa_var)
622*e4b17023SJohn Marino {
623*e4b17023SJohn Marino a->var_found = true;
624*e4b17023SJohn Marino decl = gimple_call_fndecl (stmt);
625*e4b17023SJohn Marino a->t_tree = decl;
626*e4b17023SJohn Marino break;
627*e4b17023SJohn Marino }
628*e4b17023SJohn Marino }
629*e4b17023SJohn Marino }
630*e4b17023SJohn Marino
631*e4b17023SJohn Marino /* Find if the SSA variable is accessed on the right hand side of
632*e4b17023SJohn Marino gimple assign STMT. */
633*e4b17023SJohn Marino
634*e4b17023SJohn Marino static void
ssa_accessed_in_assign_rhs(gimple stmt,struct ssa_acc_in_tree * a)635*e4b17023SJohn Marino ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
636*e4b17023SJohn Marino {
637*e4b17023SJohn Marino
638*e4b17023SJohn Marino a->t_code = gimple_assign_rhs_code (stmt);
639*e4b17023SJohn Marino switch (a->t_code)
640*e4b17023SJohn Marino {
641*e4b17023SJohn Marino tree op1, op2;
642*e4b17023SJohn Marino
643*e4b17023SJohn Marino case SSA_NAME:
644*e4b17023SJohn Marino case MEM_REF:
645*e4b17023SJohn Marino CASE_CONVERT:
646*e4b17023SJohn Marino case VIEW_CONVERT_EXPR:
647*e4b17023SJohn Marino ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
648*e4b17023SJohn Marino break;
649*e4b17023SJohn Marino case POINTER_PLUS_EXPR:
650*e4b17023SJohn Marino case PLUS_EXPR:
651*e4b17023SJohn Marino case MULT_EXPR:
652*e4b17023SJohn Marino op1 = gimple_assign_rhs1 (stmt);
653*e4b17023SJohn Marino op2 = gimple_assign_rhs2 (stmt);
654*e4b17023SJohn Marino
655*e4b17023SJohn Marino if (op1 == a->ssa_var)
656*e4b17023SJohn Marino {
657*e4b17023SJohn Marino a->var_found = true;
658*e4b17023SJohn Marino a->second_op = op2;
659*e4b17023SJohn Marino }
660*e4b17023SJohn Marino else if (op2 == a->ssa_var)
661*e4b17023SJohn Marino {
662*e4b17023SJohn Marino a->var_found = true;
663*e4b17023SJohn Marino a->second_op = op1;
664*e4b17023SJohn Marino }
665*e4b17023SJohn Marino break;
666*e4b17023SJohn Marino default:
667*e4b17023SJohn Marino break;
668*e4b17023SJohn Marino }
669*e4b17023SJohn Marino }
670*e4b17023SJohn Marino
671*e4b17023SJohn Marino /* Record the access/allocation site information for matrix MI so we can
672*e4b17023SJohn Marino handle it later in transformation. */
673*e4b17023SJohn Marino static void
record_access_alloc_site_info(struct matrix_info * mi,gimple stmt,tree offset,tree index,int level,bool is_alloc)674*e4b17023SJohn Marino record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
675*e4b17023SJohn Marino tree index, int level, bool is_alloc)
676*e4b17023SJohn Marino {
677*e4b17023SJohn Marino struct access_site_info *acc_info;
678*e4b17023SJohn Marino
679*e4b17023SJohn Marino if (!mi->access_l)
680*e4b17023SJohn Marino mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
681*e4b17023SJohn Marino
682*e4b17023SJohn Marino acc_info
683*e4b17023SJohn Marino = (struct access_site_info *)
684*e4b17023SJohn Marino xcalloc (1, sizeof (struct access_site_info));
685*e4b17023SJohn Marino acc_info->stmt = stmt;
686*e4b17023SJohn Marino acc_info->offset = offset;
687*e4b17023SJohn Marino acc_info->index = index;
688*e4b17023SJohn Marino acc_info->function_decl = current_function_decl;
689*e4b17023SJohn Marino acc_info->level = level;
690*e4b17023SJohn Marino acc_info->is_alloc = is_alloc;
691*e4b17023SJohn Marino
692*e4b17023SJohn Marino VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
693*e4b17023SJohn Marino
694*e4b17023SJohn Marino }
695*e4b17023SJohn Marino
696*e4b17023SJohn Marino /* Record the malloc as the allocation site of the given LEVEL. But
697*e4b17023SJohn Marino first we Make sure that all the size parameters passed to malloc in
698*e4b17023SJohn Marino all the allocation sites could be pre-calculated before the call to
699*e4b17023SJohn Marino the malloc of level 0 (the main malloc call). */
700*e4b17023SJohn Marino static void
add_allocation_site(struct matrix_info * mi,gimple stmt,int level)701*e4b17023SJohn Marino add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
702*e4b17023SJohn Marino {
703*e4b17023SJohn Marino struct malloc_call_data mcd;
704*e4b17023SJohn Marino
705*e4b17023SJohn Marino /* Make sure that the allocation sites are in the same function. */
706*e4b17023SJohn Marino if (!mi->allocation_function_decl)
707*e4b17023SJohn Marino mi->allocation_function_decl = current_function_decl;
708*e4b17023SJohn Marino else if (mi->allocation_function_decl != current_function_decl)
709*e4b17023SJohn Marino {
710*e4b17023SJohn Marino int min_malloc_level;
711*e4b17023SJohn Marino
712*e4b17023SJohn Marino gcc_assert (mi->malloc_for_level);
713*e4b17023SJohn Marino
714*e4b17023SJohn Marino /* Find the minimum malloc level that already has been seen;
715*e4b17023SJohn Marino we known its allocation function must be
716*e4b17023SJohn Marino MI->allocation_function_decl since it's different than
717*e4b17023SJohn Marino CURRENT_FUNCTION_DECL then the escaping level should be
718*e4b17023SJohn Marino MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
719*e4b17023SJohn Marino must be set accordingly. */
720*e4b17023SJohn Marino for (min_malloc_level = 0;
721*e4b17023SJohn Marino min_malloc_level < mi->max_malloced_level
722*e4b17023SJohn Marino && mi->malloc_for_level[min_malloc_level]; min_malloc_level++)
723*e4b17023SJohn Marino ;
724*e4b17023SJohn Marino if (level < min_malloc_level)
725*e4b17023SJohn Marino {
726*e4b17023SJohn Marino mi->allocation_function_decl = current_function_decl;
727*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
728*e4b17023SJohn Marino }
729*e4b17023SJohn Marino else
730*e4b17023SJohn Marino {
731*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
732*e4b17023SJohn Marino /* cannot be that (level == min_malloc_level)
733*e4b17023SJohn Marino we would have returned earlier. */
734*e4b17023SJohn Marino return;
735*e4b17023SJohn Marino }
736*e4b17023SJohn Marino }
737*e4b17023SJohn Marino
738*e4b17023SJohn Marino /* Find the correct malloc information. */
739*e4b17023SJohn Marino collect_data_for_malloc_call (stmt, &mcd);
740*e4b17023SJohn Marino
741*e4b17023SJohn Marino /* We accept only calls to malloc function; we do not accept
742*e4b17023SJohn Marino calls like calloc and realloc. */
743*e4b17023SJohn Marino if (!mi->malloc_for_level)
744*e4b17023SJohn Marino {
745*e4b17023SJohn Marino mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
746*e4b17023SJohn Marino mi->max_malloced_level = level + 1;
747*e4b17023SJohn Marino }
748*e4b17023SJohn Marino else if (mi->max_malloced_level <= level)
749*e4b17023SJohn Marino {
750*e4b17023SJohn Marino mi->malloc_for_level
751*e4b17023SJohn Marino = XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
752*e4b17023SJohn Marino
753*e4b17023SJohn Marino /* Zero the newly allocated items. */
754*e4b17023SJohn Marino memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
755*e4b17023SJohn Marino 0, (level - mi->max_malloced_level) * sizeof (tree));
756*e4b17023SJohn Marino
757*e4b17023SJohn Marino mi->max_malloced_level = level + 1;
758*e4b17023SJohn Marino }
759*e4b17023SJohn Marino mi->malloc_for_level[level] = stmt;
760*e4b17023SJohn Marino }
761*e4b17023SJohn Marino
762*e4b17023SJohn Marino /* Given an assignment statement STMT that we know that its
763*e4b17023SJohn Marino left-hand-side is the matrix MI variable, we traverse the immediate
764*e4b17023SJohn Marino uses backwards until we get to a malloc site. We make sure that
765*e4b17023SJohn Marino there is one and only one malloc site that sets this variable. When
766*e4b17023SJohn Marino we are performing the flattening we generate a new variable that
767*e4b17023SJohn Marino will hold the size for each dimension; each malloc that allocates a
768*e4b17023SJohn Marino dimension has the size parameter; we use that parameter to
769*e4b17023SJohn Marino initialize the dimension size variable so we can use it later in
770*e4b17023SJohn Marino the address calculations. LEVEL is the dimension we're inspecting.
771*e4b17023SJohn Marino Return if STMT is related to an allocation site. */
772*e4b17023SJohn Marino
773*e4b17023SJohn Marino static void
analyze_matrix_allocation_site(struct matrix_info * mi,gimple stmt,int level,sbitmap visited)774*e4b17023SJohn Marino analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
775*e4b17023SJohn Marino int level, sbitmap visited)
776*e4b17023SJohn Marino {
777*e4b17023SJohn Marino if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
778*e4b17023SJohn Marino {
779*e4b17023SJohn Marino tree rhs = gimple_assign_rhs1 (stmt);
780*e4b17023SJohn Marino
781*e4b17023SJohn Marino if (TREE_CODE (rhs) == SSA_NAME)
782*e4b17023SJohn Marino {
783*e4b17023SJohn Marino gimple def = SSA_NAME_DEF_STMT (rhs);
784*e4b17023SJohn Marino
785*e4b17023SJohn Marino analyze_matrix_allocation_site (mi, def, level, visited);
786*e4b17023SJohn Marino return;
787*e4b17023SJohn Marino }
788*e4b17023SJohn Marino /* If we are back to the original matrix variable then we
789*e4b17023SJohn Marino are sure that this is analyzed as an access site. */
790*e4b17023SJohn Marino else if (rhs == mi->decl)
791*e4b17023SJohn Marino return;
792*e4b17023SJohn Marino }
793*e4b17023SJohn Marino /* A result of call to malloc. */
794*e4b17023SJohn Marino else if (is_gimple_call (stmt))
795*e4b17023SJohn Marino {
796*e4b17023SJohn Marino int call_flags = gimple_call_flags (stmt);
797*e4b17023SJohn Marino
798*e4b17023SJohn Marino if (!(call_flags & ECF_MALLOC))
799*e4b17023SJohn Marino {
800*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
801*e4b17023SJohn Marino return;
802*e4b17023SJohn Marino }
803*e4b17023SJohn Marino else
804*e4b17023SJohn Marino {
805*e4b17023SJohn Marino tree malloc_fn_decl;
806*e4b17023SJohn Marino
807*e4b17023SJohn Marino malloc_fn_decl = gimple_call_fndecl (stmt);
808*e4b17023SJohn Marino if (malloc_fn_decl == NULL_TREE)
809*e4b17023SJohn Marino {
810*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
811*e4b17023SJohn Marino return;
812*e4b17023SJohn Marino }
813*e4b17023SJohn Marino if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
814*e4b17023SJohn Marino {
815*e4b17023SJohn Marino if (dump_file)
816*e4b17023SJohn Marino fprintf (dump_file,
817*e4b17023SJohn Marino "Matrix %s is an argument to function %s\n",
818*e4b17023SJohn Marino get_name (mi->decl), get_name (malloc_fn_decl));
819*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
820*e4b17023SJohn Marino return;
821*e4b17023SJohn Marino }
822*e4b17023SJohn Marino }
823*e4b17023SJohn Marino /* This is a call to malloc of level 'level'.
824*e4b17023SJohn Marino mi->max_malloced_level-1 == level means that we've
825*e4b17023SJohn Marino seen a malloc statement of level 'level' before.
826*e4b17023SJohn Marino If the statement is not the same one that we've
827*e4b17023SJohn Marino seen before, then there's another malloc statement
828*e4b17023SJohn Marino for the same level, which means that we need to mark
829*e4b17023SJohn Marino it escaping. */
830*e4b17023SJohn Marino if (mi->malloc_for_level
831*e4b17023SJohn Marino && mi->max_malloced_level-1 == level
832*e4b17023SJohn Marino && mi->malloc_for_level[level] != stmt)
833*e4b17023SJohn Marino {
834*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
835*e4b17023SJohn Marino return;
836*e4b17023SJohn Marino }
837*e4b17023SJohn Marino else
838*e4b17023SJohn Marino add_allocation_site (mi, stmt, level);
839*e4b17023SJohn Marino return;
840*e4b17023SJohn Marino }
841*e4b17023SJohn Marino /* Looks like we don't know what is happening in this
842*e4b17023SJohn Marino statement so be in the safe side and mark it as escaping. */
843*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
844*e4b17023SJohn Marino }
845*e4b17023SJohn Marino
846*e4b17023SJohn Marino /* The transposing decision making.
847*e4b17023SJohn Marino In order to calculate the profitability of transposing, we collect two
848*e4b17023SJohn Marino types of information regarding the accesses:
849*e4b17023SJohn Marino 1. profiling information used to express the hotness of an access, that
850*e4b17023SJohn Marino is how often the matrix is accessed by this access site (count of the
851*e4b17023SJohn Marino access site).
852*e4b17023SJohn Marino 2. which dimension in the access site is iterated by the inner
853*e4b17023SJohn Marino most loop containing this access.
854*e4b17023SJohn Marino
855*e4b17023SJohn Marino The matrix will have a calculated value of weighted hotness for each
856*e4b17023SJohn Marino dimension.
857*e4b17023SJohn Marino Intuitively the hotness level of a dimension is a function of how
858*e4b17023SJohn Marino many times it was the most frequently accessed dimension in the
859*e4b17023SJohn Marino highly executed access sites of this matrix.
860*e4b17023SJohn Marino
861*e4b17023SJohn Marino As computed by following equation:
862*e4b17023SJohn Marino m n
863*e4b17023SJohn Marino __ __
864*e4b17023SJohn Marino \ \ dim_hot_level[i] +=
865*e4b17023SJohn Marino /_ /_
866*e4b17023SJohn Marino j i
867*e4b17023SJohn Marino acc[j]->dim[i]->iter_by_inner_loop * count(j)
868*e4b17023SJohn Marino
869*e4b17023SJohn Marino Where n is the number of dims and m is the number of the matrix
870*e4b17023SJohn Marino access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
871*e4b17023SJohn Marino iterates over dim[i] in innermost loop, and is 0 otherwise.
872*e4b17023SJohn Marino
873*e4b17023SJohn Marino The organization of the new matrix should be according to the
874*e4b17023SJohn Marino hotness of each dimension. The hotness of the dimension implies
875*e4b17023SJohn Marino the locality of the elements.*/
876*e4b17023SJohn Marino static int
analyze_transpose(void ** slot,void * data ATTRIBUTE_UNUSED)877*e4b17023SJohn Marino analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878*e4b17023SJohn Marino {
879*e4b17023SJohn Marino struct matrix_info *mi = (struct matrix_info *) *slot;
880*e4b17023SJohn Marino int min_escape_l = mi->min_indirect_level_escape;
881*e4b17023SJohn Marino struct loop *loop;
882*e4b17023SJohn Marino affine_iv iv;
883*e4b17023SJohn Marino struct access_site_info *acc_info;
884*e4b17023SJohn Marino int i;
885*e4b17023SJohn Marino
886*e4b17023SJohn Marino if (min_escape_l < 2 || !mi->access_l)
887*e4b17023SJohn Marino {
888*e4b17023SJohn Marino if (mi->access_l)
889*e4b17023SJohn Marino {
890*e4b17023SJohn Marino FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
891*e4b17023SJohn Marino free (acc_info);
892*e4b17023SJohn Marino VEC_free (access_site_info_p, heap, mi->access_l);
893*e4b17023SJohn Marino
894*e4b17023SJohn Marino }
895*e4b17023SJohn Marino return 1;
896*e4b17023SJohn Marino }
897*e4b17023SJohn Marino if (!mi->dim_hot_level)
898*e4b17023SJohn Marino mi->dim_hot_level =
899*e4b17023SJohn Marino (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
900*e4b17023SJohn Marino
901*e4b17023SJohn Marino
902*e4b17023SJohn Marino for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
903*e4b17023SJohn Marino i++)
904*e4b17023SJohn Marino {
905*e4b17023SJohn Marino if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
906*e4b17023SJohn Marino && acc_info->level < min_escape_l)
907*e4b17023SJohn Marino {
908*e4b17023SJohn Marino loop = loop_containing_stmt (acc_info->stmt);
909*e4b17023SJohn Marino if (!loop || loop->inner)
910*e4b17023SJohn Marino {
911*e4b17023SJohn Marino free (acc_info);
912*e4b17023SJohn Marino continue;
913*e4b17023SJohn Marino }
914*e4b17023SJohn Marino if (simple_iv (loop, loop, acc_info->offset, &iv, true))
915*e4b17023SJohn Marino {
916*e4b17023SJohn Marino if (iv.step != NULL)
917*e4b17023SJohn Marino {
918*e4b17023SJohn Marino HOST_WIDE_INT istep;
919*e4b17023SJohn Marino
920*e4b17023SJohn Marino istep = int_cst_value (iv.step);
921*e4b17023SJohn Marino if (istep != 0)
922*e4b17023SJohn Marino {
923*e4b17023SJohn Marino acc_info->iterated_by_inner_most_loop_p = 1;
924*e4b17023SJohn Marino mi->dim_hot_level[acc_info->level] +=
925*e4b17023SJohn Marino gimple_bb (acc_info->stmt)->count;
926*e4b17023SJohn Marino }
927*e4b17023SJohn Marino
928*e4b17023SJohn Marino }
929*e4b17023SJohn Marino }
930*e4b17023SJohn Marino }
931*e4b17023SJohn Marino free (acc_info);
932*e4b17023SJohn Marino }
933*e4b17023SJohn Marino VEC_free (access_site_info_p, heap, mi->access_l);
934*e4b17023SJohn Marino
935*e4b17023SJohn Marino return 1;
936*e4b17023SJohn Marino }
937*e4b17023SJohn Marino
938*e4b17023SJohn Marino /* Find the index which defines the OFFSET from base.
939*e4b17023SJohn Marino We walk from use to def until we find how the offset was defined. */
940*e4b17023SJohn Marino static tree
get_index_from_offset(tree offset,gimple def_stmt)941*e4b17023SJohn Marino get_index_from_offset (tree offset, gimple def_stmt)
942*e4b17023SJohn Marino {
943*e4b17023SJohn Marino tree op1, op2, index;
944*e4b17023SJohn Marino
945*e4b17023SJohn Marino if (gimple_code (def_stmt) == GIMPLE_PHI)
946*e4b17023SJohn Marino return NULL;
947*e4b17023SJohn Marino if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
948*e4b17023SJohn Marino && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
949*e4b17023SJohn Marino return get_index_from_offset (offset,
950*e4b17023SJohn Marino SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
951*e4b17023SJohn Marino else if (is_gimple_assign (def_stmt)
952*e4b17023SJohn Marino && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
953*e4b17023SJohn Marino {
954*e4b17023SJohn Marino op1 = gimple_assign_rhs1 (def_stmt);
955*e4b17023SJohn Marino op2 = gimple_assign_rhs2 (def_stmt);
956*e4b17023SJohn Marino if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
957*e4b17023SJohn Marino return NULL;
958*e4b17023SJohn Marino index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
959*e4b17023SJohn Marino return index;
960*e4b17023SJohn Marino }
961*e4b17023SJohn Marino else
962*e4b17023SJohn Marino return NULL_TREE;
963*e4b17023SJohn Marino }
964*e4b17023SJohn Marino
965*e4b17023SJohn Marino /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
966*e4b17023SJohn Marino of the type related to the SSA_VAR, or the type related to the
967*e4b17023SJohn Marino lhs of STMT, in the case that it is an MEM_REF. */
968*e4b17023SJohn Marino static void
update_type_size(struct matrix_info * mi,gimple stmt,tree ssa_var,int current_indirect_level)969*e4b17023SJohn Marino update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
970*e4b17023SJohn Marino int current_indirect_level)
971*e4b17023SJohn Marino {
972*e4b17023SJohn Marino tree lhs;
973*e4b17023SJohn Marino HOST_WIDE_INT type_size;
974*e4b17023SJohn Marino
975*e4b17023SJohn Marino /* Update type according to the type of the MEM_REF expr. */
976*e4b17023SJohn Marino if (is_gimple_assign (stmt)
977*e4b17023SJohn Marino && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
978*e4b17023SJohn Marino {
979*e4b17023SJohn Marino lhs = gimple_assign_lhs (stmt);
980*e4b17023SJohn Marino gcc_assert (POINTER_TYPE_P
981*e4b17023SJohn Marino (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
982*e4b17023SJohn Marino type_size =
983*e4b17023SJohn Marino int_size_in_bytes (TREE_TYPE
984*e4b17023SJohn Marino (TREE_TYPE
985*e4b17023SJohn Marino (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
986*e4b17023SJohn Marino }
987*e4b17023SJohn Marino else
988*e4b17023SJohn Marino type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989*e4b17023SJohn Marino
990*e4b17023SJohn Marino /* Record the size of elements accessed (as a whole)
991*e4b17023SJohn Marino in the current indirection level (dimension). If the size of
992*e4b17023SJohn Marino elements is not known at compile time, mark it as escaping. */
993*e4b17023SJohn Marino if (type_size <= 0)
994*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
995*e4b17023SJohn Marino else
996*e4b17023SJohn Marino {
997*e4b17023SJohn Marino int l = current_indirect_level;
998*e4b17023SJohn Marino
999*e4b17023SJohn Marino if (!mi->dimension_type_size)
1000*e4b17023SJohn Marino {
1001*e4b17023SJohn Marino mi->dimension_type_size
1002*e4b17023SJohn Marino = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1003*e4b17023SJohn Marino mi->dimension_type_size_len = l + 1;
1004*e4b17023SJohn Marino }
1005*e4b17023SJohn Marino else if (mi->dimension_type_size_len < l + 1)
1006*e4b17023SJohn Marino {
1007*e4b17023SJohn Marino mi->dimension_type_size
1008*e4b17023SJohn Marino = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1009*e4b17023SJohn Marino (l + 1) * sizeof (HOST_WIDE_INT));
1010*e4b17023SJohn Marino memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1011*e4b17023SJohn Marino 0, (l + 1 - mi->dimension_type_size_len)
1012*e4b17023SJohn Marino * sizeof (HOST_WIDE_INT));
1013*e4b17023SJohn Marino mi->dimension_type_size_len = l + 1;
1014*e4b17023SJohn Marino }
1015*e4b17023SJohn Marino /* Make sure all the accesses in the same level have the same size
1016*e4b17023SJohn Marino of the type. */
1017*e4b17023SJohn Marino if (!mi->dimension_type_size[l])
1018*e4b17023SJohn Marino mi->dimension_type_size[l] = type_size;
1019*e4b17023SJohn Marino else if (mi->dimension_type_size[l] != type_size)
1020*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, l, stmt);
1021*e4b17023SJohn Marino }
1022*e4b17023SJohn Marino }
1023*e4b17023SJohn Marino
1024*e4b17023SJohn Marino /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1025*e4b17023SJohn Marino ssa var that we want to check because it came from some use of matrix
1026*e4b17023SJohn Marino MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1027*e4b17023SJohn Marino far. */
1028*e4b17023SJohn Marino
1029*e4b17023SJohn Marino static int
analyze_accesses_for_call_stmt(struct matrix_info * mi,tree ssa_var,gimple use_stmt,int current_indirect_level)1030*e4b17023SJohn Marino analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1031*e4b17023SJohn Marino gimple use_stmt, int current_indirect_level)
1032*e4b17023SJohn Marino {
1033*e4b17023SJohn Marino tree fndecl = gimple_call_fndecl (use_stmt);
1034*e4b17023SJohn Marino
1035*e4b17023SJohn Marino if (gimple_call_lhs (use_stmt))
1036*e4b17023SJohn Marino {
1037*e4b17023SJohn Marino tree lhs = gimple_call_lhs (use_stmt);
1038*e4b17023SJohn Marino struct ssa_acc_in_tree lhs_acc, rhs_acc;
1039*e4b17023SJohn Marino
1040*e4b17023SJohn Marino memset (&lhs_acc, 0, sizeof (lhs_acc));
1041*e4b17023SJohn Marino memset (&rhs_acc, 0, sizeof (rhs_acc));
1042*e4b17023SJohn Marino
1043*e4b17023SJohn Marino lhs_acc.ssa_var = ssa_var;
1044*e4b17023SJohn Marino lhs_acc.t_code = ERROR_MARK;
1045*e4b17023SJohn Marino ssa_accessed_in_tree (lhs, &lhs_acc);
1046*e4b17023SJohn Marino rhs_acc.ssa_var = ssa_var;
1047*e4b17023SJohn Marino rhs_acc.t_code = ERROR_MARK;
1048*e4b17023SJohn Marino ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1049*e4b17023SJohn Marino
1050*e4b17023SJohn Marino /* The SSA must be either in the left side or in the right side,
1051*e4b17023SJohn Marino to understand what is happening.
1052*e4b17023SJohn Marino In case the SSA_NAME is found in both sides we should be escaping
1053*e4b17023SJohn Marino at this level because in this case we cannot calculate the
1054*e4b17023SJohn Marino address correctly. */
1055*e4b17023SJohn Marino if ((lhs_acc.var_found && rhs_acc.var_found
1056*e4b17023SJohn Marino && lhs_acc.t_code == MEM_REF)
1057*e4b17023SJohn Marino || (!rhs_acc.var_found && !lhs_acc.var_found))
1058*e4b17023SJohn Marino {
1059*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1060*e4b17023SJohn Marino return current_indirect_level;
1061*e4b17023SJohn Marino }
1062*e4b17023SJohn Marino gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1063*e4b17023SJohn Marino
1064*e4b17023SJohn Marino /* If we are storing to the matrix at some level, then mark it as
1065*e4b17023SJohn Marino escaping at that level. */
1066*e4b17023SJohn Marino if (lhs_acc.var_found)
1067*e4b17023SJohn Marino {
1068*e4b17023SJohn Marino int l = current_indirect_level + 1;
1069*e4b17023SJohn Marino
1070*e4b17023SJohn Marino gcc_assert (lhs_acc.t_code == MEM_REF);
1071*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, l, use_stmt);
1072*e4b17023SJohn Marino return current_indirect_level;
1073*e4b17023SJohn Marino }
1074*e4b17023SJohn Marino }
1075*e4b17023SJohn Marino
1076*e4b17023SJohn Marino if (fndecl)
1077*e4b17023SJohn Marino {
1078*e4b17023SJohn Marino if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1079*e4b17023SJohn Marino {
1080*e4b17023SJohn Marino if (dump_file)
1081*e4b17023SJohn Marino fprintf (dump_file,
1082*e4b17023SJohn Marino "Matrix %s: Function call %s, level %d escapes.\n",
1083*e4b17023SJohn Marino get_name (mi->decl), get_name (fndecl),
1084*e4b17023SJohn Marino current_indirect_level);
1085*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1086*e4b17023SJohn Marino }
1087*e4b17023SJohn Marino else if (mi->free_stmts[current_indirect_level].stmt != NULL
1088*e4b17023SJohn Marino && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1089*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1090*e4b17023SJohn Marino else
1091*e4b17023SJohn Marino {
1092*e4b17023SJohn Marino /*Record the free statements so we can delete them
1093*e4b17023SJohn Marino later. */
1094*e4b17023SJohn Marino int l = current_indirect_level;
1095*e4b17023SJohn Marino
1096*e4b17023SJohn Marino mi->free_stmts[l].stmt = use_stmt;
1097*e4b17023SJohn Marino mi->free_stmts[l].func = current_function_decl;
1098*e4b17023SJohn Marino }
1099*e4b17023SJohn Marino }
1100*e4b17023SJohn Marino return current_indirect_level;
1101*e4b17023SJohn Marino }
1102*e4b17023SJohn Marino
1103*e4b17023SJohn Marino /* USE_STMT represents a phi node of the ssa var that we want to
1104*e4b17023SJohn Marino check because it came from some use of matrix
1105*e4b17023SJohn Marino MI.
1106*e4b17023SJohn Marino We check all the escaping levels that get to the PHI node
1107*e4b17023SJohn Marino and make sure they are all the same escaping;
1108*e4b17023SJohn Marino if not (which is rare) we let the escaping level be the
1109*e4b17023SJohn Marino minimum level that gets into that PHI because starting from
1110*e4b17023SJohn Marino that level we cannot expect the behavior of the indirections.
1111*e4b17023SJohn Marino CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1112*e4b17023SJohn Marino
1113*e4b17023SJohn Marino static void
analyze_accesses_for_phi_node(struct matrix_info * mi,gimple use_stmt,int current_indirect_level,sbitmap visited,bool record_accesses)1114*e4b17023SJohn Marino analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1115*e4b17023SJohn Marino int current_indirect_level, sbitmap visited,
1116*e4b17023SJohn Marino bool record_accesses)
1117*e4b17023SJohn Marino {
1118*e4b17023SJohn Marino
1119*e4b17023SJohn Marino struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1120*e4b17023SJohn Marino
1121*e4b17023SJohn Marino tmp_maphi.phi = use_stmt;
1122*e4b17023SJohn Marino if ((maphi = (struct matrix_access_phi_node *)
1123*e4b17023SJohn Marino htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1124*e4b17023SJohn Marino {
1125*e4b17023SJohn Marino if (maphi->indirection_level == current_indirect_level)
1126*e4b17023SJohn Marino return;
1127*e4b17023SJohn Marino else
1128*e4b17023SJohn Marino {
1129*e4b17023SJohn Marino int level = MIN (maphi->indirection_level,
1130*e4b17023SJohn Marino current_indirect_level);
1131*e4b17023SJohn Marino size_t j;
1132*e4b17023SJohn Marino gimple stmt = NULL;
1133*e4b17023SJohn Marino
1134*e4b17023SJohn Marino maphi->indirection_level = level;
1135*e4b17023SJohn Marino for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1136*e4b17023SJohn Marino {
1137*e4b17023SJohn Marino tree def = PHI_ARG_DEF (use_stmt, j);
1138*e4b17023SJohn Marino
1139*e4b17023SJohn Marino if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1140*e4b17023SJohn Marino stmt = SSA_NAME_DEF_STMT (def);
1141*e4b17023SJohn Marino }
1142*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, stmt);
1143*e4b17023SJohn Marino }
1144*e4b17023SJohn Marino return;
1145*e4b17023SJohn Marino }
1146*e4b17023SJohn Marino maphi = (struct matrix_access_phi_node *)
1147*e4b17023SJohn Marino xcalloc (1, sizeof (struct matrix_access_phi_node));
1148*e4b17023SJohn Marino maphi->phi = use_stmt;
1149*e4b17023SJohn Marino maphi->indirection_level = current_indirect_level;
1150*e4b17023SJohn Marino
1151*e4b17023SJohn Marino /* Insert to hash table. */
1152*e4b17023SJohn Marino pmaphi = (struct matrix_access_phi_node **)
1153*e4b17023SJohn Marino htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1154*e4b17023SJohn Marino gcc_assert (pmaphi);
1155*e4b17023SJohn Marino *pmaphi = maphi;
1156*e4b17023SJohn Marino
1157*e4b17023SJohn Marino if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1158*e4b17023SJohn Marino {
1159*e4b17023SJohn Marino SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1160*e4b17023SJohn Marino analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1161*e4b17023SJohn Marino current_indirect_level, false, visited,
1162*e4b17023SJohn Marino record_accesses);
1163*e4b17023SJohn Marino RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1164*e4b17023SJohn Marino }
1165*e4b17023SJohn Marino }
1166*e4b17023SJohn Marino
1167*e4b17023SJohn Marino /* USE_STMT represents an assign statement (the rhs or lhs include
1168*e4b17023SJohn Marino the ssa var that we want to check because it came from some use of matrix
1169*e4b17023SJohn Marino MI. CURRENT_INDIRECT_LEVEL is the indirection level we reached so far. */
1170*e4b17023SJohn Marino
1171*e4b17023SJohn Marino static int
analyze_accesses_for_assign_stmt(struct matrix_info * mi,tree ssa_var,gimple use_stmt,int current_indirect_level,bool last_op,sbitmap visited,bool record_accesses)1172*e4b17023SJohn Marino analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1173*e4b17023SJohn Marino gimple use_stmt, int current_indirect_level,
1174*e4b17023SJohn Marino bool last_op, sbitmap visited,
1175*e4b17023SJohn Marino bool record_accesses)
1176*e4b17023SJohn Marino {
1177*e4b17023SJohn Marino tree lhs = gimple_get_lhs (use_stmt);
1178*e4b17023SJohn Marino struct ssa_acc_in_tree lhs_acc, rhs_acc;
1179*e4b17023SJohn Marino
1180*e4b17023SJohn Marino memset (&lhs_acc, 0, sizeof (lhs_acc));
1181*e4b17023SJohn Marino memset (&rhs_acc, 0, sizeof (rhs_acc));
1182*e4b17023SJohn Marino
1183*e4b17023SJohn Marino lhs_acc.ssa_var = ssa_var;
1184*e4b17023SJohn Marino lhs_acc.t_code = ERROR_MARK;
1185*e4b17023SJohn Marino ssa_accessed_in_tree (lhs, &lhs_acc);
1186*e4b17023SJohn Marino rhs_acc.ssa_var = ssa_var;
1187*e4b17023SJohn Marino rhs_acc.t_code = ERROR_MARK;
1188*e4b17023SJohn Marino ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1189*e4b17023SJohn Marino
1190*e4b17023SJohn Marino /* The SSA must be either in the left side or in the right side,
1191*e4b17023SJohn Marino to understand what is happening.
1192*e4b17023SJohn Marino In case the SSA_NAME is found in both sides we should be escaping
1193*e4b17023SJohn Marino at this level because in this case we cannot calculate the
1194*e4b17023SJohn Marino address correctly. */
1195*e4b17023SJohn Marino if ((lhs_acc.var_found && rhs_acc.var_found
1196*e4b17023SJohn Marino && lhs_acc.t_code == MEM_REF)
1197*e4b17023SJohn Marino || (!rhs_acc.var_found && !lhs_acc.var_found))
1198*e4b17023SJohn Marino {
1199*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1200*e4b17023SJohn Marino return current_indirect_level;
1201*e4b17023SJohn Marino }
1202*e4b17023SJohn Marino gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1203*e4b17023SJohn Marino
1204*e4b17023SJohn Marino /* If we are storing to the matrix at some level, then mark it as
1205*e4b17023SJohn Marino escaping at that level. */
1206*e4b17023SJohn Marino if (lhs_acc.var_found)
1207*e4b17023SJohn Marino {
1208*e4b17023SJohn Marino int l = current_indirect_level + 1;
1209*e4b17023SJohn Marino
1210*e4b17023SJohn Marino gcc_assert (lhs_acc.t_code == MEM_REF);
1211*e4b17023SJohn Marino
1212*e4b17023SJohn Marino if (!(gimple_assign_copy_p (use_stmt)
1213*e4b17023SJohn Marino || gimple_assign_cast_p (use_stmt))
1214*e4b17023SJohn Marino || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1215*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, l, use_stmt);
1216*e4b17023SJohn Marino else
1217*e4b17023SJohn Marino {
1218*e4b17023SJohn Marino gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1219*e4b17023SJohn Marino analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1220*e4b17023SJohn Marino if (record_accesses)
1221*e4b17023SJohn Marino record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1222*e4b17023SJohn Marino NULL_TREE, l, true);
1223*e4b17023SJohn Marino update_type_size (mi, use_stmt, NULL, l);
1224*e4b17023SJohn Marino }
1225*e4b17023SJohn Marino return current_indirect_level;
1226*e4b17023SJohn Marino }
1227*e4b17023SJohn Marino /* Now, check the right-hand-side, to see how the SSA variable
1228*e4b17023SJohn Marino is used. */
1229*e4b17023SJohn Marino if (rhs_acc.var_found)
1230*e4b17023SJohn Marino {
1231*e4b17023SJohn Marino if (rhs_acc.t_code != MEM_REF
1232*e4b17023SJohn Marino && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1233*e4b17023SJohn Marino {
1234*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1235*e4b17023SJohn Marino return current_indirect_level;
1236*e4b17023SJohn Marino }
1237*e4b17023SJohn Marino /* If the access in the RHS has an indirection increase the
1238*e4b17023SJohn Marino indirection level. */
1239*e4b17023SJohn Marino if (rhs_acc.t_code == MEM_REF)
1240*e4b17023SJohn Marino {
1241*e4b17023SJohn Marino if (record_accesses)
1242*e4b17023SJohn Marino record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1243*e4b17023SJohn Marino NULL_TREE,
1244*e4b17023SJohn Marino current_indirect_level, true);
1245*e4b17023SJohn Marino current_indirect_level += 1;
1246*e4b17023SJohn Marino }
1247*e4b17023SJohn Marino else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1248*e4b17023SJohn Marino {
1249*e4b17023SJohn Marino gcc_assert (rhs_acc.second_op);
1250*e4b17023SJohn Marino if (last_op)
1251*e4b17023SJohn Marino /* Currently we support only one PLUS expression on the
1252*e4b17023SJohn Marino SSA_NAME that holds the base address of the current
1253*e4b17023SJohn Marino indirection level; to support more general case there
1254*e4b17023SJohn Marino is a need to hold a stack of expressions and regenerate
1255*e4b17023SJohn Marino the calculation later. */
1256*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level,
1257*e4b17023SJohn Marino use_stmt);
1258*e4b17023SJohn Marino else
1259*e4b17023SJohn Marino {
1260*e4b17023SJohn Marino tree index;
1261*e4b17023SJohn Marino tree op1, op2;
1262*e4b17023SJohn Marino
1263*e4b17023SJohn Marino op1 = gimple_assign_rhs1 (use_stmt);
1264*e4b17023SJohn Marino op2 = gimple_assign_rhs2 (use_stmt);
1265*e4b17023SJohn Marino
1266*e4b17023SJohn Marino op2 = (op1 == ssa_var) ? op2 : op1;
1267*e4b17023SJohn Marino if (TREE_CODE (op2) == INTEGER_CST)
1268*e4b17023SJohn Marino index =
1269*e4b17023SJohn Marino build_int_cst (TREE_TYPE (op1),
1270*e4b17023SJohn Marino TREE_INT_CST_LOW (op2) /
1271*e4b17023SJohn Marino int_size_in_bytes (TREE_TYPE (op1)));
1272*e4b17023SJohn Marino else
1273*e4b17023SJohn Marino {
1274*e4b17023SJohn Marino index =
1275*e4b17023SJohn Marino get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1276*e4b17023SJohn Marino if (index == NULL_TREE)
1277*e4b17023SJohn Marino {
1278*e4b17023SJohn Marino mark_min_matrix_escape_level (mi,
1279*e4b17023SJohn Marino current_indirect_level,
1280*e4b17023SJohn Marino use_stmt);
1281*e4b17023SJohn Marino return current_indirect_level;
1282*e4b17023SJohn Marino }
1283*e4b17023SJohn Marino }
1284*e4b17023SJohn Marino if (record_accesses)
1285*e4b17023SJohn Marino record_access_alloc_site_info (mi, use_stmt, op2,
1286*e4b17023SJohn Marino index,
1287*e4b17023SJohn Marino current_indirect_level, false);
1288*e4b17023SJohn Marino }
1289*e4b17023SJohn Marino }
1290*e4b17023SJohn Marino /* If we are storing this level of indirection mark it as
1291*e4b17023SJohn Marino escaping. */
1292*e4b17023SJohn Marino if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1293*e4b17023SJohn Marino {
1294*e4b17023SJohn Marino int l = current_indirect_level;
1295*e4b17023SJohn Marino
1296*e4b17023SJohn Marino /* One exception is when we are storing to the matrix
1297*e4b17023SJohn Marino variable itself; this is the case of malloc, we must make
1298*e4b17023SJohn Marino sure that it's the one and only one call to malloc so
1299*e4b17023SJohn Marino we call analyze_matrix_allocation_site to check
1300*e4b17023SJohn Marino this out. */
1301*e4b17023SJohn Marino if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1302*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, current_indirect_level,
1303*e4b17023SJohn Marino use_stmt);
1304*e4b17023SJohn Marino else
1305*e4b17023SJohn Marino {
1306*e4b17023SJohn Marino /* Also update the escaping level. */
1307*e4b17023SJohn Marino analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1308*e4b17023SJohn Marino if (record_accesses)
1309*e4b17023SJohn Marino record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1310*e4b17023SJohn Marino NULL_TREE, l, true);
1311*e4b17023SJohn Marino }
1312*e4b17023SJohn Marino }
1313*e4b17023SJohn Marino else
1314*e4b17023SJohn Marino {
1315*e4b17023SJohn Marino /* We are placing it in an SSA, follow that SSA. */
1316*e4b17023SJohn Marino analyze_matrix_accesses (mi, lhs,
1317*e4b17023SJohn Marino current_indirect_level,
1318*e4b17023SJohn Marino rhs_acc.t_code == POINTER_PLUS_EXPR,
1319*e4b17023SJohn Marino visited, record_accesses);
1320*e4b17023SJohn Marino }
1321*e4b17023SJohn Marino }
1322*e4b17023SJohn Marino return current_indirect_level;
1323*e4b17023SJohn Marino }
1324*e4b17023SJohn Marino
1325*e4b17023SJohn Marino /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1326*e4b17023SJohn Marino follow its uses and level of indirection and find out the minimum
1327*e4b17023SJohn Marino indirection level it escapes in (the highest dimension) and the maximum
1328*e4b17023SJohn Marino level it is accessed in (this will be the actual dimension of the
1329*e4b17023SJohn Marino matrix). The information is accumulated in MI.
1330*e4b17023SJohn Marino We look at the immediate uses, if one escapes we finish; if not,
1331*e4b17023SJohn Marino we make a recursive call for each one of the immediate uses of the
1332*e4b17023SJohn Marino resulting SSA name. */
1333*e4b17023SJohn Marino static void
analyze_matrix_accesses(struct matrix_info * mi,tree ssa_var,int current_indirect_level,bool last_op,sbitmap visited,bool record_accesses)1334*e4b17023SJohn Marino analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1335*e4b17023SJohn Marino int current_indirect_level, bool last_op,
1336*e4b17023SJohn Marino sbitmap visited, bool record_accesses)
1337*e4b17023SJohn Marino {
1338*e4b17023SJohn Marino imm_use_iterator imm_iter;
1339*e4b17023SJohn Marino use_operand_p use_p;
1340*e4b17023SJohn Marino
1341*e4b17023SJohn Marino update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1342*e4b17023SJohn Marino current_indirect_level);
1343*e4b17023SJohn Marino
1344*e4b17023SJohn Marino /* We don't go beyond the escaping level when we are performing the
1345*e4b17023SJohn Marino flattening. NOTE: we keep the last indirection level that doesn't
1346*e4b17023SJohn Marino escape. */
1347*e4b17023SJohn Marino if (mi->min_indirect_level_escape > -1
1348*e4b17023SJohn Marino && mi->min_indirect_level_escape <= current_indirect_level)
1349*e4b17023SJohn Marino return;
1350*e4b17023SJohn Marino
1351*e4b17023SJohn Marino /* Now go over the uses of the SSA_NAME and check how it is used in
1352*e4b17023SJohn Marino each one of them. We are mainly looking for the pattern MEM_REF,
1353*e4b17023SJohn Marino then a POINTER_PLUS_EXPR, then MEM_REF etc. while in between there could
1354*e4b17023SJohn Marino be any number of copies and casts. */
1355*e4b17023SJohn Marino gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1356*e4b17023SJohn Marino
1357*e4b17023SJohn Marino FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1358*e4b17023SJohn Marino {
1359*e4b17023SJohn Marino gimple use_stmt = USE_STMT (use_p);
1360*e4b17023SJohn Marino if (gimple_code (use_stmt) == GIMPLE_PHI)
1361*e4b17023SJohn Marino /* We check all the escaping levels that get to the PHI node
1362*e4b17023SJohn Marino and make sure they are all the same escaping;
1363*e4b17023SJohn Marino if not (which is rare) we let the escaping level be the
1364*e4b17023SJohn Marino minimum level that gets into that PHI because starting from
1365*e4b17023SJohn Marino that level we cannot expect the behavior of the indirections. */
1366*e4b17023SJohn Marino
1367*e4b17023SJohn Marino analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1368*e4b17023SJohn Marino visited, record_accesses);
1369*e4b17023SJohn Marino
1370*e4b17023SJohn Marino else if (is_gimple_call (use_stmt))
1371*e4b17023SJohn Marino analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1372*e4b17023SJohn Marino current_indirect_level);
1373*e4b17023SJohn Marino else if (is_gimple_assign (use_stmt))
1374*e4b17023SJohn Marino current_indirect_level =
1375*e4b17023SJohn Marino analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1376*e4b17023SJohn Marino current_indirect_level, last_op,
1377*e4b17023SJohn Marino visited, record_accesses);
1378*e4b17023SJohn Marino }
1379*e4b17023SJohn Marino }
1380*e4b17023SJohn Marino
1381*e4b17023SJohn Marino typedef struct
1382*e4b17023SJohn Marino {
1383*e4b17023SJohn Marino tree fn;
1384*e4b17023SJohn Marino gimple stmt;
1385*e4b17023SJohn Marino } check_var_data;
1386*e4b17023SJohn Marino
1387*e4b17023SJohn Marino /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1388*e4b17023SJohn Marino the malloc size expression and check that those aren't changed
1389*e4b17023SJohn Marino over the function. */
1390*e4b17023SJohn Marino static tree
check_var_notmodified_p(tree * tp,int * walk_subtrees,void * data)1391*e4b17023SJohn Marino check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1392*e4b17023SJohn Marino {
1393*e4b17023SJohn Marino basic_block bb;
1394*e4b17023SJohn Marino tree t = *tp;
1395*e4b17023SJohn Marino check_var_data *callback_data = (check_var_data*) data;
1396*e4b17023SJohn Marino tree fn = callback_data->fn;
1397*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1398*e4b17023SJohn Marino gimple stmt;
1399*e4b17023SJohn Marino
1400*e4b17023SJohn Marino if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1401*e4b17023SJohn Marino return NULL_TREE;
1402*e4b17023SJohn Marino
1403*e4b17023SJohn Marino FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1404*e4b17023SJohn Marino {
1405*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1406*e4b17023SJohn Marino {
1407*e4b17023SJohn Marino stmt = gsi_stmt (gsi);
1408*e4b17023SJohn Marino if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1409*e4b17023SJohn Marino continue;
1410*e4b17023SJohn Marino if (gimple_get_lhs (stmt) == t)
1411*e4b17023SJohn Marino {
1412*e4b17023SJohn Marino callback_data->stmt = stmt;
1413*e4b17023SJohn Marino return t;
1414*e4b17023SJohn Marino }
1415*e4b17023SJohn Marino }
1416*e4b17023SJohn Marino }
1417*e4b17023SJohn Marino *walk_subtrees = 1;
1418*e4b17023SJohn Marino return NULL_TREE;
1419*e4b17023SJohn Marino }
1420*e4b17023SJohn Marino
1421*e4b17023SJohn Marino /* Go backwards in the use-def chains and find out the expression
1422*e4b17023SJohn Marino represented by the possible SSA name in STMT, until it is composed
1423*e4b17023SJohn Marino of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1424*e4b17023SJohn Marino we make sure that all the arguments represent the same subexpression,
1425*e4b17023SJohn Marino otherwise we fail. */
1426*e4b17023SJohn Marino
1427*e4b17023SJohn Marino static tree
can_calculate_stmt_before_stmt(gimple stmt,sbitmap visited)1428*e4b17023SJohn Marino can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1429*e4b17023SJohn Marino {
1430*e4b17023SJohn Marino tree op1, op2, res;
1431*e4b17023SJohn Marino enum tree_code code;
1432*e4b17023SJohn Marino
1433*e4b17023SJohn Marino switch (gimple_code (stmt))
1434*e4b17023SJohn Marino {
1435*e4b17023SJohn Marino case GIMPLE_ASSIGN:
1436*e4b17023SJohn Marino code = gimple_assign_rhs_code (stmt);
1437*e4b17023SJohn Marino op1 = gimple_assign_rhs1 (stmt);
1438*e4b17023SJohn Marino
1439*e4b17023SJohn Marino switch (code)
1440*e4b17023SJohn Marino {
1441*e4b17023SJohn Marino case POINTER_PLUS_EXPR:
1442*e4b17023SJohn Marino case PLUS_EXPR:
1443*e4b17023SJohn Marino case MINUS_EXPR:
1444*e4b17023SJohn Marino case MULT_EXPR:
1445*e4b17023SJohn Marino
1446*e4b17023SJohn Marino op2 = gimple_assign_rhs2 (stmt);
1447*e4b17023SJohn Marino op1 = can_calculate_expr_before_stmt (op1, visited);
1448*e4b17023SJohn Marino if (!op1)
1449*e4b17023SJohn Marino return NULL_TREE;
1450*e4b17023SJohn Marino op2 = can_calculate_expr_before_stmt (op2, visited);
1451*e4b17023SJohn Marino if (op2)
1452*e4b17023SJohn Marino return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1453*e4b17023SJohn Marino return NULL_TREE;
1454*e4b17023SJohn Marino
1455*e4b17023SJohn Marino CASE_CONVERT:
1456*e4b17023SJohn Marino res = can_calculate_expr_before_stmt (op1, visited);
1457*e4b17023SJohn Marino if (res != NULL_TREE)
1458*e4b17023SJohn Marino return build1 (code, gimple_expr_type (stmt), res);
1459*e4b17023SJohn Marino else
1460*e4b17023SJohn Marino return NULL_TREE;
1461*e4b17023SJohn Marino
1462*e4b17023SJohn Marino default:
1463*e4b17023SJohn Marino if (gimple_assign_single_p (stmt))
1464*e4b17023SJohn Marino return can_calculate_expr_before_stmt (op1, visited);
1465*e4b17023SJohn Marino else
1466*e4b17023SJohn Marino return NULL_TREE;
1467*e4b17023SJohn Marino }
1468*e4b17023SJohn Marino
1469*e4b17023SJohn Marino case GIMPLE_PHI:
1470*e4b17023SJohn Marino {
1471*e4b17023SJohn Marino size_t j;
1472*e4b17023SJohn Marino
1473*e4b17023SJohn Marino res = NULL_TREE;
1474*e4b17023SJohn Marino /* Make sure all the arguments represent the same value. */
1475*e4b17023SJohn Marino for (j = 0; j < gimple_phi_num_args (stmt); j++)
1476*e4b17023SJohn Marino {
1477*e4b17023SJohn Marino tree new_res;
1478*e4b17023SJohn Marino tree def = PHI_ARG_DEF (stmt, j);
1479*e4b17023SJohn Marino
1480*e4b17023SJohn Marino new_res = can_calculate_expr_before_stmt (def, visited);
1481*e4b17023SJohn Marino if (res == NULL_TREE)
1482*e4b17023SJohn Marino res = new_res;
1483*e4b17023SJohn Marino else if (!new_res || !expressions_equal_p (res, new_res))
1484*e4b17023SJohn Marino return NULL_TREE;
1485*e4b17023SJohn Marino }
1486*e4b17023SJohn Marino return res;
1487*e4b17023SJohn Marino }
1488*e4b17023SJohn Marino
1489*e4b17023SJohn Marino default:
1490*e4b17023SJohn Marino return NULL_TREE;
1491*e4b17023SJohn Marino }
1492*e4b17023SJohn Marino }
1493*e4b17023SJohn Marino
1494*e4b17023SJohn Marino /* Go backwards in the use-def chains and find out the expression
1495*e4b17023SJohn Marino represented by the possible SSA name in EXPR, until it is composed
1496*e4b17023SJohn Marino of only VAR_DECL, PARM_DECL and INT_CST. In case of phi nodes
1497*e4b17023SJohn Marino we make sure that all the arguments represent the same subexpression,
1498*e4b17023SJohn Marino otherwise we fail. */
1499*e4b17023SJohn Marino static tree
can_calculate_expr_before_stmt(tree expr,sbitmap visited)1500*e4b17023SJohn Marino can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1501*e4b17023SJohn Marino {
1502*e4b17023SJohn Marino gimple def_stmt;
1503*e4b17023SJohn Marino tree res;
1504*e4b17023SJohn Marino
1505*e4b17023SJohn Marino switch (TREE_CODE (expr))
1506*e4b17023SJohn Marino {
1507*e4b17023SJohn Marino case SSA_NAME:
1508*e4b17023SJohn Marino /* Case of loop, we don't know to represent this expression. */
1509*e4b17023SJohn Marino if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1510*e4b17023SJohn Marino return NULL_TREE;
1511*e4b17023SJohn Marino
1512*e4b17023SJohn Marino SET_BIT (visited, SSA_NAME_VERSION (expr));
1513*e4b17023SJohn Marino def_stmt = SSA_NAME_DEF_STMT (expr);
1514*e4b17023SJohn Marino res = can_calculate_stmt_before_stmt (def_stmt, visited);
1515*e4b17023SJohn Marino RESET_BIT (visited, SSA_NAME_VERSION (expr));
1516*e4b17023SJohn Marino return res;
1517*e4b17023SJohn Marino case VAR_DECL:
1518*e4b17023SJohn Marino case PARM_DECL:
1519*e4b17023SJohn Marino case INTEGER_CST:
1520*e4b17023SJohn Marino return expr;
1521*e4b17023SJohn Marino
1522*e4b17023SJohn Marino default:
1523*e4b17023SJohn Marino return NULL_TREE;
1524*e4b17023SJohn Marino }
1525*e4b17023SJohn Marino }
1526*e4b17023SJohn Marino
1527*e4b17023SJohn Marino /* There should be only one allocation function for the dimensions
1528*e4b17023SJohn Marino that don't escape. Here we check the allocation sites in this
1529*e4b17023SJohn Marino function. We must make sure that all the dimensions are allocated
1530*e4b17023SJohn Marino using malloc and that the malloc size parameter expression could be
1531*e4b17023SJohn Marino pre-calculated before the call to the malloc of dimension 0.
1532*e4b17023SJohn Marino
1533*e4b17023SJohn Marino Given a candidate matrix for flattening -- MI -- check if it's
1534*e4b17023SJohn Marino appropriate for flattening -- we analyze the allocation
1535*e4b17023SJohn Marino sites that we recorded in the previous analysis. The result of the
1536*e4b17023SJohn Marino analysis is a level of indirection (matrix dimension) in which the
1537*e4b17023SJohn Marino flattening is safe. We check the following conditions:
1538*e4b17023SJohn Marino 1. There is only one allocation site for each dimension.
1539*e4b17023SJohn Marino 2. The allocation sites of all the dimensions are in the same
1540*e4b17023SJohn Marino function.
1541*e4b17023SJohn Marino (The above two are being taken care of during the analysis when
1542*e4b17023SJohn Marino we check the allocation site).
1543*e4b17023SJohn Marino 3. All the dimensions that we flatten are allocated at once; thus
1544*e4b17023SJohn Marino the total size must be known before the allocation of the
1545*e4b17023SJohn Marino dimension 0 (top level) -- we must make sure we represent the
1546*e4b17023SJohn Marino size of the allocation as an expression of global parameters or
1547*e4b17023SJohn Marino constants and that those doesn't change over the function. */
1548*e4b17023SJohn Marino
1549*e4b17023SJohn Marino static int
check_allocation_function(void ** slot,void * data ATTRIBUTE_UNUSED)1550*e4b17023SJohn Marino check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1551*e4b17023SJohn Marino {
1552*e4b17023SJohn Marino int level;
1553*e4b17023SJohn Marino struct matrix_info *mi = (struct matrix_info *) *slot;
1554*e4b17023SJohn Marino sbitmap visited;
1555*e4b17023SJohn Marino
1556*e4b17023SJohn Marino if (!mi->malloc_for_level)
1557*e4b17023SJohn Marino return 1;
1558*e4b17023SJohn Marino
1559*e4b17023SJohn Marino visited = sbitmap_alloc (num_ssa_names);
1560*e4b17023SJohn Marino
1561*e4b17023SJohn Marino /* Do nothing if the current function is not the allocation
1562*e4b17023SJohn Marino function of MI. */
1563*e4b17023SJohn Marino if (mi->allocation_function_decl != current_function_decl
1564*e4b17023SJohn Marino /* We aren't in the main allocation function yet. */
1565*e4b17023SJohn Marino || !mi->malloc_for_level[0])
1566*e4b17023SJohn Marino return 1;
1567*e4b17023SJohn Marino
1568*e4b17023SJohn Marino for (level = 1; level < mi->max_malloced_level; level++)
1569*e4b17023SJohn Marino if (!mi->malloc_for_level[level])
1570*e4b17023SJohn Marino break;
1571*e4b17023SJohn Marino
1572*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, NULL);
1573*e4b17023SJohn Marino
1574*e4b17023SJohn Marino /* Check if the expression of the size passed to malloc could be
1575*e4b17023SJohn Marino pre-calculated before the malloc of level 0. */
1576*e4b17023SJohn Marino for (level = 1; level < mi->min_indirect_level_escape; level++)
1577*e4b17023SJohn Marino {
1578*e4b17023SJohn Marino gimple call_stmt;
1579*e4b17023SJohn Marino tree size;
1580*e4b17023SJohn Marino struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1581*e4b17023SJohn Marino
1582*e4b17023SJohn Marino call_stmt = mi->malloc_for_level[level];
1583*e4b17023SJohn Marino
1584*e4b17023SJohn Marino /* Find the correct malloc information. */
1585*e4b17023SJohn Marino collect_data_for_malloc_call (call_stmt, &mcd);
1586*e4b17023SJohn Marino
1587*e4b17023SJohn Marino /* No need to check anticipation for constants. */
1588*e4b17023SJohn Marino if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1589*e4b17023SJohn Marino {
1590*e4b17023SJohn Marino if (!mi->dimension_size)
1591*e4b17023SJohn Marino {
1592*e4b17023SJohn Marino mi->dimension_size =
1593*e4b17023SJohn Marino (tree *) xcalloc (mi->min_indirect_level_escape,
1594*e4b17023SJohn Marino sizeof (tree));
1595*e4b17023SJohn Marino mi->dimension_size_orig =
1596*e4b17023SJohn Marino (tree *) xcalloc (mi->min_indirect_level_escape,
1597*e4b17023SJohn Marino sizeof (tree));
1598*e4b17023SJohn Marino }
1599*e4b17023SJohn Marino mi->dimension_size[level] = mcd.size_var;
1600*e4b17023SJohn Marino mi->dimension_size_orig[level] = mcd.size_var;
1601*e4b17023SJohn Marino continue;
1602*e4b17023SJohn Marino }
1603*e4b17023SJohn Marino /* ??? Here we should also add the way to calculate the size
1604*e4b17023SJohn Marino expression not only know that it is anticipated. */
1605*e4b17023SJohn Marino sbitmap_zero (visited);
1606*e4b17023SJohn Marino size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1607*e4b17023SJohn Marino if (size == NULL_TREE)
1608*e4b17023SJohn Marino {
1609*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, level, call_stmt);
1610*e4b17023SJohn Marino if (dump_file)
1611*e4b17023SJohn Marino fprintf (dump_file,
1612*e4b17023SJohn Marino "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1613*e4b17023SJohn Marino get_name (mi->decl), level);
1614*e4b17023SJohn Marino break;
1615*e4b17023SJohn Marino }
1616*e4b17023SJohn Marino if (!mi->dimension_size)
1617*e4b17023SJohn Marino {
1618*e4b17023SJohn Marino mi->dimension_size =
1619*e4b17023SJohn Marino (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1620*e4b17023SJohn Marino mi->dimension_size_orig =
1621*e4b17023SJohn Marino (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1622*e4b17023SJohn Marino }
1623*e4b17023SJohn Marino mi->dimension_size[level] = size;
1624*e4b17023SJohn Marino mi->dimension_size_orig[level] = size;
1625*e4b17023SJohn Marino }
1626*e4b17023SJohn Marino
1627*e4b17023SJohn Marino /* We don't need those anymore. */
1628*e4b17023SJohn Marino for (level = mi->min_indirect_level_escape;
1629*e4b17023SJohn Marino level < mi->max_malloced_level; level++)
1630*e4b17023SJohn Marino mi->malloc_for_level[level] = NULL;
1631*e4b17023SJohn Marino return 1;
1632*e4b17023SJohn Marino }
1633*e4b17023SJohn Marino
1634*e4b17023SJohn Marino /* Track all access and allocation sites. */
1635*e4b17023SJohn Marino static void
find_sites_in_func(bool record)1636*e4b17023SJohn Marino find_sites_in_func (bool record)
1637*e4b17023SJohn Marino {
1638*e4b17023SJohn Marino sbitmap visited_stmts_1;
1639*e4b17023SJohn Marino
1640*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1641*e4b17023SJohn Marino gimple stmt;
1642*e4b17023SJohn Marino basic_block bb;
1643*e4b17023SJohn Marino struct matrix_info tmpmi, *mi;
1644*e4b17023SJohn Marino
1645*e4b17023SJohn Marino visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1646*e4b17023SJohn Marino
1647*e4b17023SJohn Marino FOR_EACH_BB (bb)
1648*e4b17023SJohn Marino {
1649*e4b17023SJohn Marino for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650*e4b17023SJohn Marino {
1651*e4b17023SJohn Marino tree lhs;
1652*e4b17023SJohn Marino
1653*e4b17023SJohn Marino stmt = gsi_stmt (gsi);
1654*e4b17023SJohn Marino lhs = gimple_get_lhs (stmt);
1655*e4b17023SJohn Marino if (lhs != NULL_TREE
1656*e4b17023SJohn Marino && TREE_CODE (lhs) == VAR_DECL)
1657*e4b17023SJohn Marino {
1658*e4b17023SJohn Marino tmpmi.decl = lhs;
1659*e4b17023SJohn Marino if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1660*e4b17023SJohn Marino &tmpmi)))
1661*e4b17023SJohn Marino {
1662*e4b17023SJohn Marino sbitmap_zero (visited_stmts_1);
1663*e4b17023SJohn Marino analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1664*e4b17023SJohn Marino }
1665*e4b17023SJohn Marino }
1666*e4b17023SJohn Marino if (is_gimple_assign (stmt)
1667*e4b17023SJohn Marino && gimple_assign_single_p (stmt)
1668*e4b17023SJohn Marino && TREE_CODE (lhs) == SSA_NAME
1669*e4b17023SJohn Marino && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1670*e4b17023SJohn Marino {
1671*e4b17023SJohn Marino tmpmi.decl = gimple_assign_rhs1 (stmt);
1672*e4b17023SJohn Marino if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1673*e4b17023SJohn Marino &tmpmi)))
1674*e4b17023SJohn Marino {
1675*e4b17023SJohn Marino sbitmap_zero (visited_stmts_1);
1676*e4b17023SJohn Marino analyze_matrix_accesses (mi, lhs, 0,
1677*e4b17023SJohn Marino false, visited_stmts_1, record);
1678*e4b17023SJohn Marino }
1679*e4b17023SJohn Marino }
1680*e4b17023SJohn Marino }
1681*e4b17023SJohn Marino }
1682*e4b17023SJohn Marino sbitmap_free (visited_stmts_1);
1683*e4b17023SJohn Marino }
1684*e4b17023SJohn Marino
1685*e4b17023SJohn Marino /* Traverse the use-def chains to see if there are matrices that
1686*e4b17023SJohn Marino are passed through pointers and we cannot know how they are accessed.
1687*e4b17023SJohn Marino For each SSA-name defined by a global variable of our interest,
1688*e4b17023SJohn Marino we traverse the use-def chains of the SSA and follow the indirections,
1689*e4b17023SJohn Marino and record in what level of indirection the use of the variable
1690*e4b17023SJohn Marino escapes. A use of a pointer escapes when it is passed to a function,
1691*e4b17023SJohn Marino stored into memory or assigned (except in malloc and free calls). */
1692*e4b17023SJohn Marino
1693*e4b17023SJohn Marino static void
record_all_accesses_in_func(void)1694*e4b17023SJohn Marino record_all_accesses_in_func (void)
1695*e4b17023SJohn Marino {
1696*e4b17023SJohn Marino unsigned i;
1697*e4b17023SJohn Marino sbitmap visited_stmts_1;
1698*e4b17023SJohn Marino
1699*e4b17023SJohn Marino visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1700*e4b17023SJohn Marino
1701*e4b17023SJohn Marino for (i = 0; i < num_ssa_names; i++)
1702*e4b17023SJohn Marino {
1703*e4b17023SJohn Marino struct matrix_info tmpmi, *mi;
1704*e4b17023SJohn Marino tree ssa_var = ssa_name (i);
1705*e4b17023SJohn Marino tree rhs, lhs;
1706*e4b17023SJohn Marino
1707*e4b17023SJohn Marino if (!ssa_var
1708*e4b17023SJohn Marino || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1709*e4b17023SJohn Marino || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1710*e4b17023SJohn Marino continue;
1711*e4b17023SJohn Marino rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1712*e4b17023SJohn Marino lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1713*e4b17023SJohn Marino if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1714*e4b17023SJohn Marino continue;
1715*e4b17023SJohn Marino
1716*e4b17023SJohn Marino /* If the RHS is a matrix that we want to analyze, follow the def-use
1717*e4b17023SJohn Marino chain for this SSA_VAR and check for escapes or apply the
1718*e4b17023SJohn Marino flattening. */
1719*e4b17023SJohn Marino tmpmi.decl = rhs;
1720*e4b17023SJohn Marino if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1721*e4b17023SJohn Marino {
1722*e4b17023SJohn Marino /* This variable will track the visited PHI nodes, so we can limit
1723*e4b17023SJohn Marino its size to the maximum number of SSA names. */
1724*e4b17023SJohn Marino sbitmap_zero (visited_stmts_1);
1725*e4b17023SJohn Marino analyze_matrix_accesses (mi, ssa_var,
1726*e4b17023SJohn Marino 0, false, visited_stmts_1, true);
1727*e4b17023SJohn Marino
1728*e4b17023SJohn Marino }
1729*e4b17023SJohn Marino }
1730*e4b17023SJohn Marino sbitmap_free (visited_stmts_1);
1731*e4b17023SJohn Marino }
1732*e4b17023SJohn Marino
1733*e4b17023SJohn Marino /* Used when we want to convert the expression: RESULT = something *
1734*e4b17023SJohn Marino ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1735*e4b17023SJohn Marino of 2, shift operations can be done, else division and
1736*e4b17023SJohn Marino multiplication. */
1737*e4b17023SJohn Marino
1738*e4b17023SJohn Marino static tree
compute_offset(HOST_WIDE_INT orig,HOST_WIDE_INT new_val,tree result)1739*e4b17023SJohn Marino compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1740*e4b17023SJohn Marino {
1741*e4b17023SJohn Marino
1742*e4b17023SJohn Marino int x, y;
1743*e4b17023SJohn Marino tree result1, ratio, log, orig_tree, new_tree;
1744*e4b17023SJohn Marino
1745*e4b17023SJohn Marino x = exact_log2 (orig);
1746*e4b17023SJohn Marino y = exact_log2 (new_val);
1747*e4b17023SJohn Marino
1748*e4b17023SJohn Marino if (x != -1 && y != -1)
1749*e4b17023SJohn Marino {
1750*e4b17023SJohn Marino if (x == y)
1751*e4b17023SJohn Marino return result;
1752*e4b17023SJohn Marino else if (x > y)
1753*e4b17023SJohn Marino {
1754*e4b17023SJohn Marino log = build_int_cst (TREE_TYPE (result), x - y);
1755*e4b17023SJohn Marino result1 =
1756*e4b17023SJohn Marino fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1757*e4b17023SJohn Marino return result1;
1758*e4b17023SJohn Marino }
1759*e4b17023SJohn Marino log = build_int_cst (TREE_TYPE (result), y - x);
1760*e4b17023SJohn Marino result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1761*e4b17023SJohn Marino
1762*e4b17023SJohn Marino return result1;
1763*e4b17023SJohn Marino }
1764*e4b17023SJohn Marino orig_tree = build_int_cst (TREE_TYPE (result), orig);
1765*e4b17023SJohn Marino new_tree = build_int_cst (TREE_TYPE (result), new_val);
1766*e4b17023SJohn Marino ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1767*e4b17023SJohn Marino result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1768*e4b17023SJohn Marino
1769*e4b17023SJohn Marino return result1;
1770*e4b17023SJohn Marino }
1771*e4b17023SJohn Marino
1772*e4b17023SJohn Marino
1773*e4b17023SJohn Marino /* We know that we are allowed to perform matrix flattening (according to the
1774*e4b17023SJohn Marino escape analysis), so we traverse the use-def chains of the SSA vars
1775*e4b17023SJohn Marino defined by the global variables pointing to the matrices of our interest.
1776*e4b17023SJohn Marino in each use of the SSA we calculate the offset from the base address
1777*e4b17023SJohn Marino according to the following equation:
1778*e4b17023SJohn Marino
1779*e4b17023SJohn Marino a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1780*e4b17023SJohn Marino escaping level is m <= k, and a' is the new allocated matrix,
1781*e4b17023SJohn Marino will be translated to :
1782*e4b17023SJohn Marino
1783*e4b17023SJohn Marino b[I(m+1)]...[Ik]
1784*e4b17023SJohn Marino
1785*e4b17023SJohn Marino where
1786*e4b17023SJohn Marino b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1787*e4b17023SJohn Marino */
1788*e4b17023SJohn Marino
1789*e4b17023SJohn Marino static int
transform_access_sites(void ** slot,void * data ATTRIBUTE_UNUSED)1790*e4b17023SJohn Marino transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1791*e4b17023SJohn Marino {
1792*e4b17023SJohn Marino gimple_stmt_iterator gsi;
1793*e4b17023SJohn Marino struct matrix_info *mi = (struct matrix_info *) *slot;
1794*e4b17023SJohn Marino int min_escape_l = mi->min_indirect_level_escape;
1795*e4b17023SJohn Marino struct access_site_info *acc_info;
1796*e4b17023SJohn Marino enum tree_code code;
1797*e4b17023SJohn Marino int i;
1798*e4b17023SJohn Marino
1799*e4b17023SJohn Marino if (min_escape_l < 2 || !mi->access_l)
1800*e4b17023SJohn Marino return 1;
1801*e4b17023SJohn Marino for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1802*e4b17023SJohn Marino i++)
1803*e4b17023SJohn Marino {
1804*e4b17023SJohn Marino /* This is possible because we collect the access sites before
1805*e4b17023SJohn Marino we determine the final minimum indirection level. */
1806*e4b17023SJohn Marino if (acc_info->level >= min_escape_l)
1807*e4b17023SJohn Marino {
1808*e4b17023SJohn Marino free (acc_info);
1809*e4b17023SJohn Marino continue;
1810*e4b17023SJohn Marino }
1811*e4b17023SJohn Marino if (acc_info->is_alloc)
1812*e4b17023SJohn Marino {
1813*e4b17023SJohn Marino if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1814*e4b17023SJohn Marino {
1815*e4b17023SJohn Marino ssa_op_iter iter;
1816*e4b17023SJohn Marino tree def;
1817*e4b17023SJohn Marino gimple stmt = acc_info->stmt;
1818*e4b17023SJohn Marino tree lhs;
1819*e4b17023SJohn Marino
1820*e4b17023SJohn Marino FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1821*e4b17023SJohn Marino mark_sym_for_renaming (SSA_NAME_VAR (def));
1822*e4b17023SJohn Marino gsi = gsi_for_stmt (stmt);
1823*e4b17023SJohn Marino gcc_assert (is_gimple_assign (acc_info->stmt));
1824*e4b17023SJohn Marino lhs = gimple_assign_lhs (acc_info->stmt);
1825*e4b17023SJohn Marino if (TREE_CODE (lhs) == SSA_NAME
1826*e4b17023SJohn Marino && acc_info->level < min_escape_l - 1)
1827*e4b17023SJohn Marino {
1828*e4b17023SJohn Marino imm_use_iterator imm_iter;
1829*e4b17023SJohn Marino use_operand_p use_p;
1830*e4b17023SJohn Marino gimple use_stmt;
1831*e4b17023SJohn Marino
1832*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1833*e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1834*e4b17023SJohn Marino {
1835*e4b17023SJohn Marino tree rhs, tmp;
1836*e4b17023SJohn Marino gimple new_stmt;
1837*e4b17023SJohn Marino
1838*e4b17023SJohn Marino gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1839*e4b17023SJohn Marino == MEM_REF);
1840*e4b17023SJohn Marino /* Emit convert statement to convert to type of use. */
1841*e4b17023SJohn Marino tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1842*e4b17023SJohn Marino add_referenced_var (tmp);
1843*e4b17023SJohn Marino rhs = gimple_assign_rhs1 (acc_info->stmt);
1844*e4b17023SJohn Marino rhs = fold_convert (TREE_TYPE (tmp),
1845*e4b17023SJohn Marino TREE_OPERAND (rhs, 0));
1846*e4b17023SJohn Marino new_stmt = gimple_build_assign (tmp, rhs);
1847*e4b17023SJohn Marino tmp = make_ssa_name (tmp, new_stmt);
1848*e4b17023SJohn Marino gimple_assign_set_lhs (new_stmt, tmp);
1849*e4b17023SJohn Marino gsi = gsi_for_stmt (acc_info->stmt);
1850*e4b17023SJohn Marino gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1851*e4b17023SJohn Marino SET_USE (use_p, tmp);
1852*e4b17023SJohn Marino }
1853*e4b17023SJohn Marino }
1854*e4b17023SJohn Marino if (acc_info->level < min_escape_l - 1)
1855*e4b17023SJohn Marino gsi_remove (&gsi, true);
1856*e4b17023SJohn Marino }
1857*e4b17023SJohn Marino free (acc_info);
1858*e4b17023SJohn Marino continue;
1859*e4b17023SJohn Marino }
1860*e4b17023SJohn Marino code = gimple_assign_rhs_code (acc_info->stmt);
1861*e4b17023SJohn Marino if (code == MEM_REF
1862*e4b17023SJohn Marino && acc_info->level < min_escape_l - 1)
1863*e4b17023SJohn Marino {
1864*e4b17023SJohn Marino /* Replace the MEM_REF with NOP (cast) usually we are casting
1865*e4b17023SJohn Marino from "pointer to type" to "type". */
1866*e4b17023SJohn Marino tree t =
1867*e4b17023SJohn Marino build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1868*e4b17023SJohn Marino TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1869*e4b17023SJohn Marino gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1870*e4b17023SJohn Marino gimple_assign_set_rhs1 (acc_info->stmt, t);
1871*e4b17023SJohn Marino }
1872*e4b17023SJohn Marino else if (code == POINTER_PLUS_EXPR
1873*e4b17023SJohn Marino && acc_info->level < (min_escape_l))
1874*e4b17023SJohn Marino {
1875*e4b17023SJohn Marino imm_use_iterator imm_iter;
1876*e4b17023SJohn Marino use_operand_p use_p;
1877*e4b17023SJohn Marino
1878*e4b17023SJohn Marino tree offset;
1879*e4b17023SJohn Marino int k = acc_info->level;
1880*e4b17023SJohn Marino tree num_elements, total_elements;
1881*e4b17023SJohn Marino tree tmp1;
1882*e4b17023SJohn Marino tree d_size = mi->dimension_size[k];
1883*e4b17023SJohn Marino
1884*e4b17023SJohn Marino /* We already make sure in the analysis that the first operand
1885*e4b17023SJohn Marino is the base and the second is the offset. */
1886*e4b17023SJohn Marino offset = acc_info->offset;
1887*e4b17023SJohn Marino if (mi->dim_map[k] == min_escape_l - 1)
1888*e4b17023SJohn Marino {
1889*e4b17023SJohn Marino if (!check_transpose_p || mi->is_transposed_p == false)
1890*e4b17023SJohn Marino tmp1 = offset;
1891*e4b17023SJohn Marino else
1892*e4b17023SJohn Marino {
1893*e4b17023SJohn Marino tree new_offset;
1894*e4b17023SJohn Marino
1895*e4b17023SJohn Marino new_offset =
1896*e4b17023SJohn Marino compute_offset (mi->dimension_type_size[min_escape_l],
1897*e4b17023SJohn Marino mi->dimension_type_size[k + 1], offset);
1898*e4b17023SJohn Marino
1899*e4b17023SJohn Marino total_elements = new_offset;
1900*e4b17023SJohn Marino if (new_offset != offset)
1901*e4b17023SJohn Marino {
1902*e4b17023SJohn Marino gsi = gsi_for_stmt (acc_info->stmt);
1903*e4b17023SJohn Marino tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1904*e4b17023SJohn Marino true, NULL,
1905*e4b17023SJohn Marino true, GSI_SAME_STMT);
1906*e4b17023SJohn Marino }
1907*e4b17023SJohn Marino else
1908*e4b17023SJohn Marino tmp1 = offset;
1909*e4b17023SJohn Marino }
1910*e4b17023SJohn Marino }
1911*e4b17023SJohn Marino else
1912*e4b17023SJohn Marino {
1913*e4b17023SJohn Marino d_size = mi->dimension_size[mi->dim_map[k] + 1];
1914*e4b17023SJohn Marino num_elements =
1915*e4b17023SJohn Marino fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1916*e4b17023SJohn Marino fold_convert (sizetype, d_size));
1917*e4b17023SJohn Marino add_referenced_var (d_size);
1918*e4b17023SJohn Marino gsi = gsi_for_stmt (acc_info->stmt);
1919*e4b17023SJohn Marino tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1920*e4b17023SJohn Marino NULL, true, GSI_SAME_STMT);
1921*e4b17023SJohn Marino }
1922*e4b17023SJohn Marino /* Replace the offset if needed. */
1923*e4b17023SJohn Marino if (tmp1 != offset)
1924*e4b17023SJohn Marino {
1925*e4b17023SJohn Marino if (TREE_CODE (offset) == SSA_NAME)
1926*e4b17023SJohn Marino {
1927*e4b17023SJohn Marino gimple use_stmt;
1928*e4b17023SJohn Marino
1929*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1930*e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1931*e4b17023SJohn Marino if (use_stmt == acc_info->stmt)
1932*e4b17023SJohn Marino SET_USE (use_p, tmp1);
1933*e4b17023SJohn Marino }
1934*e4b17023SJohn Marino else
1935*e4b17023SJohn Marino {
1936*e4b17023SJohn Marino gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1937*e4b17023SJohn Marino gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1938*e4b17023SJohn Marino update_stmt (acc_info->stmt);
1939*e4b17023SJohn Marino }
1940*e4b17023SJohn Marino }
1941*e4b17023SJohn Marino }
1942*e4b17023SJohn Marino /* ??? meanwhile this happens because we record the same access
1943*e4b17023SJohn Marino site more than once; we should be using a hash table to
1944*e4b17023SJohn Marino avoid this and insert the STMT of the access site only
1945*e4b17023SJohn Marino once.
1946*e4b17023SJohn Marino else
1947*e4b17023SJohn Marino gcc_unreachable (); */
1948*e4b17023SJohn Marino free (acc_info);
1949*e4b17023SJohn Marino }
1950*e4b17023SJohn Marino VEC_free (access_site_info_p, heap, mi->access_l);
1951*e4b17023SJohn Marino
1952*e4b17023SJohn Marino update_ssa (TODO_update_ssa);
1953*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
1954*e4b17023SJohn Marino verify_ssa (true);
1955*e4b17023SJohn Marino #endif
1956*e4b17023SJohn Marino return 1;
1957*e4b17023SJohn Marino }
1958*e4b17023SJohn Marino
1959*e4b17023SJohn Marino /* Sort A array of counts. Arrange DIM_MAP to reflect the new order. */
1960*e4b17023SJohn Marino
1961*e4b17023SJohn Marino static void
sort_dim_hot_level(gcov_type * a,int * dim_map,int n)1962*e4b17023SJohn Marino sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1963*e4b17023SJohn Marino {
1964*e4b17023SJohn Marino int i, j, tmp1;
1965*e4b17023SJohn Marino gcov_type tmp;
1966*e4b17023SJohn Marino
1967*e4b17023SJohn Marino for (i = 0; i < n - 1; i++)
1968*e4b17023SJohn Marino {
1969*e4b17023SJohn Marino for (j = 0; j < n - 1 - i; j++)
1970*e4b17023SJohn Marino {
1971*e4b17023SJohn Marino if (a[j + 1] < a[j])
1972*e4b17023SJohn Marino {
1973*e4b17023SJohn Marino tmp = a[j]; /* swap a[j] and a[j+1] */
1974*e4b17023SJohn Marino a[j] = a[j + 1];
1975*e4b17023SJohn Marino a[j + 1] = tmp;
1976*e4b17023SJohn Marino tmp1 = dim_map[j];
1977*e4b17023SJohn Marino dim_map[j] = dim_map[j + 1];
1978*e4b17023SJohn Marino dim_map[j + 1] = tmp1;
1979*e4b17023SJohn Marino }
1980*e4b17023SJohn Marino }
1981*e4b17023SJohn Marino }
1982*e4b17023SJohn Marino }
1983*e4b17023SJohn Marino
1984*e4b17023SJohn Marino /* Replace multiple mallocs (one for each dimension) to one malloc
1985*e4b17023SJohn Marino with the size of DIM1*DIM2*...*DIMN*size_of_element
1986*e4b17023SJohn Marino Make sure that we hold the size in the malloc site inside a
1987*e4b17023SJohn Marino new global variable; this way we ensure that the size doesn't
1988*e4b17023SJohn Marino change and it is accessible from all the other functions that
1989*e4b17023SJohn Marino uses the matrix. Also, the original calls to free are deleted,
1990*e4b17023SJohn Marino and replaced by a new call to free the flattened matrix. */
1991*e4b17023SJohn Marino
1992*e4b17023SJohn Marino static int
transform_allocation_sites(void ** slot,void * data ATTRIBUTE_UNUSED)1993*e4b17023SJohn Marino transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1994*e4b17023SJohn Marino {
1995*e4b17023SJohn Marino int i;
1996*e4b17023SJohn Marino struct matrix_info *mi;
1997*e4b17023SJohn Marino tree type, oldfn, prev_dim_size;
1998*e4b17023SJohn Marino gimple call_stmt_0, use_stmt;
1999*e4b17023SJohn Marino struct cgraph_node *c_node;
2000*e4b17023SJohn Marino struct cgraph_edge *e;
2001*e4b17023SJohn Marino gimple_stmt_iterator gsi;
2002*e4b17023SJohn Marino struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2003*e4b17023SJohn Marino HOST_WIDE_INT element_size;
2004*e4b17023SJohn Marino
2005*e4b17023SJohn Marino imm_use_iterator imm_iter;
2006*e4b17023SJohn Marino use_operand_p use_p;
2007*e4b17023SJohn Marino tree old_size_0, tmp;
2008*e4b17023SJohn Marino int min_escape_l;
2009*e4b17023SJohn Marino int id;
2010*e4b17023SJohn Marino
2011*e4b17023SJohn Marino mi = (struct matrix_info *) *slot;
2012*e4b17023SJohn Marino
2013*e4b17023SJohn Marino min_escape_l = mi->min_indirect_level_escape;
2014*e4b17023SJohn Marino
2015*e4b17023SJohn Marino if (!mi->malloc_for_level)
2016*e4b17023SJohn Marino mi->min_indirect_level_escape = 0;
2017*e4b17023SJohn Marino
2018*e4b17023SJohn Marino if (mi->min_indirect_level_escape < 2)
2019*e4b17023SJohn Marino return 1;
2020*e4b17023SJohn Marino
2021*e4b17023SJohn Marino mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2022*e4b17023SJohn Marino for (i = 0; i < mi->min_indirect_level_escape; i++)
2023*e4b17023SJohn Marino mi->dim_map[i] = i;
2024*e4b17023SJohn Marino if (check_transpose_p)
2025*e4b17023SJohn Marino {
2026*e4b17023SJohn Marino int i;
2027*e4b17023SJohn Marino
2028*e4b17023SJohn Marino if (dump_file)
2029*e4b17023SJohn Marino {
2030*e4b17023SJohn Marino fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2031*e4b17023SJohn Marino for (i = 0; i < min_escape_l; i++)
2032*e4b17023SJohn Marino {
2033*e4b17023SJohn Marino fprintf (dump_file, "dim %d before sort ", i);
2034*e4b17023SJohn Marino if (mi->dim_hot_level)
2035*e4b17023SJohn Marino fprintf (dump_file,
2036*e4b17023SJohn Marino "count is " HOST_WIDEST_INT_PRINT_DEC " \n",
2037*e4b17023SJohn Marino mi->dim_hot_level[i]);
2038*e4b17023SJohn Marino }
2039*e4b17023SJohn Marino }
2040*e4b17023SJohn Marino sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2041*e4b17023SJohn Marino mi->min_indirect_level_escape);
2042*e4b17023SJohn Marino if (dump_file)
2043*e4b17023SJohn Marino for (i = 0; i < min_escape_l; i++)
2044*e4b17023SJohn Marino {
2045*e4b17023SJohn Marino fprintf (dump_file, "dim %d after sort\n", i);
2046*e4b17023SJohn Marino if (mi->dim_hot_level)
2047*e4b17023SJohn Marino fprintf (dump_file, "count is " HOST_WIDE_INT_PRINT_DEC
2048*e4b17023SJohn Marino " \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2049*e4b17023SJohn Marino }
2050*e4b17023SJohn Marino for (i = 0; i < mi->min_indirect_level_escape; i++)
2051*e4b17023SJohn Marino {
2052*e4b17023SJohn Marino if (dump_file)
2053*e4b17023SJohn Marino fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2054*e4b17023SJohn Marino mi->dim_map[i]);
2055*e4b17023SJohn Marino if (mi->dim_map[i] != i)
2056*e4b17023SJohn Marino {
2057*e4b17023SJohn Marino if (dump_file)
2058*e4b17023SJohn Marino fprintf (dump_file,
2059*e4b17023SJohn Marino "Transposed dimensions: dim %d is now dim %d\n",
2060*e4b17023SJohn Marino mi->dim_map[i], i);
2061*e4b17023SJohn Marino mi->is_transposed_p = true;
2062*e4b17023SJohn Marino }
2063*e4b17023SJohn Marino }
2064*e4b17023SJohn Marino }
2065*e4b17023SJohn Marino else
2066*e4b17023SJohn Marino {
2067*e4b17023SJohn Marino for (i = 0; i < mi->min_indirect_level_escape; i++)
2068*e4b17023SJohn Marino mi->dim_map[i] = i;
2069*e4b17023SJohn Marino }
2070*e4b17023SJohn Marino /* Call statement of allocation site of level 0. */
2071*e4b17023SJohn Marino call_stmt_0 = mi->malloc_for_level[0];
2072*e4b17023SJohn Marino
2073*e4b17023SJohn Marino /* Finds the correct malloc information. */
2074*e4b17023SJohn Marino collect_data_for_malloc_call (call_stmt_0, &mcd);
2075*e4b17023SJohn Marino
2076*e4b17023SJohn Marino mi->dimension_size[0] = mcd.size_var;
2077*e4b17023SJohn Marino mi->dimension_size_orig[0] = mcd.size_var;
2078*e4b17023SJohn Marino /* Make sure that the variables in the size expression for
2079*e4b17023SJohn Marino all the dimensions (above level 0) aren't modified in
2080*e4b17023SJohn Marino the allocation function. */
2081*e4b17023SJohn Marino for (i = 1; i < mi->min_indirect_level_escape; i++)
2082*e4b17023SJohn Marino {
2083*e4b17023SJohn Marino tree t;
2084*e4b17023SJohn Marino check_var_data data;
2085*e4b17023SJohn Marino
2086*e4b17023SJohn Marino /* mi->dimension_size must contain the expression of the size calculated
2087*e4b17023SJohn Marino in check_allocation_function. */
2088*e4b17023SJohn Marino gcc_assert (mi->dimension_size[i]);
2089*e4b17023SJohn Marino
2090*e4b17023SJohn Marino data.fn = mi->allocation_function_decl;
2091*e4b17023SJohn Marino data.stmt = NULL;
2092*e4b17023SJohn Marino t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2093*e4b17023SJohn Marino check_var_notmodified_p,
2094*e4b17023SJohn Marino &data);
2095*e4b17023SJohn Marino if (t != NULL_TREE)
2096*e4b17023SJohn Marino {
2097*e4b17023SJohn Marino mark_min_matrix_escape_level (mi, i, data.stmt);
2098*e4b17023SJohn Marino break;
2099*e4b17023SJohn Marino }
2100*e4b17023SJohn Marino }
2101*e4b17023SJohn Marino
2102*e4b17023SJohn Marino if (mi->min_indirect_level_escape < 2)
2103*e4b17023SJohn Marino return 1;
2104*e4b17023SJohn Marino
2105*e4b17023SJohn Marino /* Since we should make sure that the size expression is available
2106*e4b17023SJohn Marino before the call to malloc of level 0. */
2107*e4b17023SJohn Marino gsi = gsi_for_stmt (call_stmt_0);
2108*e4b17023SJohn Marino
2109*e4b17023SJohn Marino /* Find out the size of each dimension by looking at the malloc
2110*e4b17023SJohn Marino sites and create a global variable to hold it.
2111*e4b17023SJohn Marino We add the assignment to the global before the malloc of level 0. */
2112*e4b17023SJohn Marino
2113*e4b17023SJohn Marino /* To be able to produce gimple temporaries. */
2114*e4b17023SJohn Marino oldfn = current_function_decl;
2115*e4b17023SJohn Marino current_function_decl = mi->allocation_function_decl;
2116*e4b17023SJohn Marino push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2117*e4b17023SJohn Marino
2118*e4b17023SJohn Marino /* Set the dimension sizes as follows:
2119*e4b17023SJohn Marino DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2120*e4b17023SJohn Marino where n is the maximum non escaping level. */
2121*e4b17023SJohn Marino element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2122*e4b17023SJohn Marino prev_dim_size = NULL_TREE;
2123*e4b17023SJohn Marino
2124*e4b17023SJohn Marino for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2125*e4b17023SJohn Marino {
2126*e4b17023SJohn Marino tree dim_size, dim_var;
2127*e4b17023SJohn Marino gimple stmt;
2128*e4b17023SJohn Marino tree d_type_size;
2129*e4b17023SJohn Marino
2130*e4b17023SJohn Marino /* Now put the size expression in a global variable and initialize it to
2131*e4b17023SJohn Marino the size expression before the malloc of level 0. */
2132*e4b17023SJohn Marino dim_var =
2133*e4b17023SJohn Marino add_new_static_var (TREE_TYPE
2134*e4b17023SJohn Marino (mi->dimension_size_orig[mi->dim_map[i]]));
2135*e4b17023SJohn Marino type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2136*e4b17023SJohn Marino
2137*e4b17023SJohn Marino /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE. */
2138*e4b17023SJohn Marino /* Find which dim ID becomes dim I. */
2139*e4b17023SJohn Marino for (id = 0; id < mi->min_indirect_level_escape; id++)
2140*e4b17023SJohn Marino if (mi->dim_map[id] == i)
2141*e4b17023SJohn Marino break;
2142*e4b17023SJohn Marino d_type_size =
2143*e4b17023SJohn Marino build_int_cst (type, mi->dimension_type_size[id + 1]);
2144*e4b17023SJohn Marino if (!prev_dim_size)
2145*e4b17023SJohn Marino prev_dim_size = build_int_cst (type, element_size);
2146*e4b17023SJohn Marino if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2147*e4b17023SJohn Marino {
2148*e4b17023SJohn Marino dim_size = mi->dimension_size_orig[id];
2149*e4b17023SJohn Marino }
2150*e4b17023SJohn Marino else
2151*e4b17023SJohn Marino {
2152*e4b17023SJohn Marino dim_size =
2153*e4b17023SJohn Marino fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2154*e4b17023SJohn Marino d_type_size);
2155*e4b17023SJohn Marino
2156*e4b17023SJohn Marino dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2157*e4b17023SJohn Marino }
2158*e4b17023SJohn Marino dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2159*e4b17023SJohn Marino true, GSI_SAME_STMT);
2160*e4b17023SJohn Marino /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE. */
2161*e4b17023SJohn Marino stmt = gimple_build_assign (dim_var, dim_size);
2162*e4b17023SJohn Marino mark_symbols_for_renaming (stmt);
2163*e4b17023SJohn Marino gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2164*e4b17023SJohn Marino
2165*e4b17023SJohn Marino prev_dim_size = mi->dimension_size[i] = dim_var;
2166*e4b17023SJohn Marino }
2167*e4b17023SJohn Marino update_ssa (TODO_update_ssa);
2168*e4b17023SJohn Marino /* Replace the malloc size argument in the malloc of level 0 to be
2169*e4b17023SJohn Marino the size of all the dimensions. */
2170*e4b17023SJohn Marino c_node = cgraph_get_node (mi->allocation_function_decl);
2171*e4b17023SJohn Marino gcc_checking_assert (c_node);
2172*e4b17023SJohn Marino old_size_0 = gimple_call_arg (call_stmt_0, 0);
2173*e4b17023SJohn Marino tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2174*e4b17023SJohn Marino NULL, true, GSI_SAME_STMT);
2175*e4b17023SJohn Marino if (TREE_CODE (old_size_0) == SSA_NAME)
2176*e4b17023SJohn Marino {
2177*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2178*e4b17023SJohn Marino FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2179*e4b17023SJohn Marino if (use_stmt == call_stmt_0)
2180*e4b17023SJohn Marino SET_USE (use_p, tmp);
2181*e4b17023SJohn Marino }
2182*e4b17023SJohn Marino /* When deleting the calls to malloc we need also to remove the edge from
2183*e4b17023SJohn Marino the call graph to keep it consistent. Notice that cgraph_edge may
2184*e4b17023SJohn Marino create a new node in the call graph if there is no node for the given
2185*e4b17023SJohn Marino declaration; this shouldn't be the case but currently there is no way to
2186*e4b17023SJohn Marino check this outside of "cgraph.c". */
2187*e4b17023SJohn Marino for (i = 1; i < mi->min_indirect_level_escape; i++)
2188*e4b17023SJohn Marino {
2189*e4b17023SJohn Marino gimple_stmt_iterator gsi;
2190*e4b17023SJohn Marino
2191*e4b17023SJohn Marino gimple call_stmt = mi->malloc_for_level[i];
2192*e4b17023SJohn Marino gcc_assert (is_gimple_call (call_stmt));
2193*e4b17023SJohn Marino e = cgraph_edge (c_node, call_stmt);
2194*e4b17023SJohn Marino gcc_assert (e);
2195*e4b17023SJohn Marino cgraph_remove_edge (e);
2196*e4b17023SJohn Marino gsi = gsi_for_stmt (call_stmt);
2197*e4b17023SJohn Marino /* Remove the call stmt. */
2198*e4b17023SJohn Marino gsi_remove (&gsi, true);
2199*e4b17023SJohn Marino /* Remove the assignment of the allocated area. */
2200*e4b17023SJohn Marino FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2201*e4b17023SJohn Marino gimple_call_lhs (call_stmt))
2202*e4b17023SJohn Marino {
2203*e4b17023SJohn Marino gsi = gsi_for_stmt (use_stmt);
2204*e4b17023SJohn Marino gsi_remove (&gsi, true);
2205*e4b17023SJohn Marino }
2206*e4b17023SJohn Marino }
2207*e4b17023SJohn Marino update_ssa (TODO_update_ssa);
2208*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
2209*e4b17023SJohn Marino verify_ssa (true);
2210*e4b17023SJohn Marino #endif
2211*e4b17023SJohn Marino /* Delete the calls to free. */
2212*e4b17023SJohn Marino for (i = 1; i < mi->min_indirect_level_escape; i++)
2213*e4b17023SJohn Marino {
2214*e4b17023SJohn Marino gimple_stmt_iterator gsi;
2215*e4b17023SJohn Marino
2216*e4b17023SJohn Marino /* ??? wonder why this case is possible but we failed on it once. */
2217*e4b17023SJohn Marino if (!mi->free_stmts[i].stmt)
2218*e4b17023SJohn Marino continue;
2219*e4b17023SJohn Marino
2220*e4b17023SJohn Marino c_node = cgraph_get_node (mi->free_stmts[i].func);
2221*e4b17023SJohn Marino gcc_checking_assert (c_node);
2222*e4b17023SJohn Marino gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2223*e4b17023SJohn Marino e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2224*e4b17023SJohn Marino gcc_assert (e);
2225*e4b17023SJohn Marino cgraph_remove_edge (e);
2226*e4b17023SJohn Marino current_function_decl = mi->free_stmts[i].func;
2227*e4b17023SJohn Marino set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2228*e4b17023SJohn Marino gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2229*e4b17023SJohn Marino gsi_remove (&gsi, true);
2230*e4b17023SJohn Marino }
2231*e4b17023SJohn Marino /* Return to the previous situation. */
2232*e4b17023SJohn Marino current_function_decl = oldfn;
2233*e4b17023SJohn Marino pop_cfun ();
2234*e4b17023SJohn Marino return 1;
2235*e4b17023SJohn Marino
2236*e4b17023SJohn Marino }
2237*e4b17023SJohn Marino
2238*e4b17023SJohn Marino
2239*e4b17023SJohn Marino /* Print out the results of the escape analysis. */
2240*e4b17023SJohn Marino static int
dump_matrix_reorg_analysis(void ** slot,void * data ATTRIBUTE_UNUSED)2241*e4b17023SJohn Marino dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2242*e4b17023SJohn Marino {
2243*e4b17023SJohn Marino struct matrix_info *mi = (struct matrix_info *) *slot;
2244*e4b17023SJohn Marino
2245*e4b17023SJohn Marino if (!dump_file)
2246*e4b17023SJohn Marino return 1;
2247*e4b17023SJohn Marino fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2248*e4b17023SJohn Marino get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2249*e4b17023SJohn Marino fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2250*e4b17023SJohn Marino fprintf (dump_file, "\n");
2251*e4b17023SJohn Marino if (mi->min_indirect_level_escape >= 2)
2252*e4b17023SJohn Marino fprintf (dump_file, "Flattened %d dimensions \n",
2253*e4b17023SJohn Marino mi->min_indirect_level_escape);
2254*e4b17023SJohn Marino return 1;
2255*e4b17023SJohn Marino }
2256*e4b17023SJohn Marino
2257*e4b17023SJohn Marino /* Perform matrix flattening. */
2258*e4b17023SJohn Marino
2259*e4b17023SJohn Marino static unsigned int
matrix_reorg(void)2260*e4b17023SJohn Marino matrix_reorg (void)
2261*e4b17023SJohn Marino {
2262*e4b17023SJohn Marino struct cgraph_node *node;
2263*e4b17023SJohn Marino
2264*e4b17023SJohn Marino if (profile_info)
2265*e4b17023SJohn Marino check_transpose_p = true;
2266*e4b17023SJohn Marino else
2267*e4b17023SJohn Marino check_transpose_p = false;
2268*e4b17023SJohn Marino /* If there are hand written vectors, we skip this optimization. */
2269*e4b17023SJohn Marino for (node = cgraph_nodes; node; node = node->next)
2270*e4b17023SJohn Marino if (!may_flatten_matrices (node))
2271*e4b17023SJohn Marino return 0;
2272*e4b17023SJohn Marino matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2273*e4b17023SJohn Marino /* Find and record all potential matrices in the program. */
2274*e4b17023SJohn Marino find_matrices_decl ();
2275*e4b17023SJohn Marino /* Analyze the accesses of the matrices (escaping analysis). */
2276*e4b17023SJohn Marino for (node = cgraph_nodes; node; node = node->next)
2277*e4b17023SJohn Marino if (node->analyzed)
2278*e4b17023SJohn Marino {
2279*e4b17023SJohn Marino tree temp_fn;
2280*e4b17023SJohn Marino
2281*e4b17023SJohn Marino temp_fn = current_function_decl;
2282*e4b17023SJohn Marino current_function_decl = node->decl;
2283*e4b17023SJohn Marino push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2284*e4b17023SJohn Marino bitmap_obstack_initialize (NULL);
2285*e4b17023SJohn Marino gimple_register_cfg_hooks ();
2286*e4b17023SJohn Marino
2287*e4b17023SJohn Marino if (!gimple_in_ssa_p (cfun))
2288*e4b17023SJohn Marino {
2289*e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
2290*e4b17023SJohn Marino free_dominance_info (CDI_POST_DOMINATORS);
2291*e4b17023SJohn Marino pop_cfun ();
2292*e4b17023SJohn Marino current_function_decl = temp_fn;
2293*e4b17023SJohn Marino bitmap_obstack_release (NULL);
2294*e4b17023SJohn Marino
2295*e4b17023SJohn Marino return 0;
2296*e4b17023SJohn Marino }
2297*e4b17023SJohn Marino
2298*e4b17023SJohn Marino #ifdef ENABLE_CHECKING
2299*e4b17023SJohn Marino verify_flow_info ();
2300*e4b17023SJohn Marino #endif
2301*e4b17023SJohn Marino
2302*e4b17023SJohn Marino if (!matrices_to_reorg)
2303*e4b17023SJohn Marino {
2304*e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
2305*e4b17023SJohn Marino free_dominance_info (CDI_POST_DOMINATORS);
2306*e4b17023SJohn Marino pop_cfun ();
2307*e4b17023SJohn Marino current_function_decl = temp_fn;
2308*e4b17023SJohn Marino bitmap_obstack_release (NULL);
2309*e4b17023SJohn Marino
2310*e4b17023SJohn Marino return 0;
2311*e4b17023SJohn Marino }
2312*e4b17023SJohn Marino
2313*e4b17023SJohn Marino /* Create htap for phi nodes. */
2314*e4b17023SJohn Marino htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2315*e4b17023SJohn Marino mat_acc_phi_eq, free);
2316*e4b17023SJohn Marino if (!check_transpose_p)
2317*e4b17023SJohn Marino find_sites_in_func (false);
2318*e4b17023SJohn Marino else
2319*e4b17023SJohn Marino {
2320*e4b17023SJohn Marino find_sites_in_func (true);
2321*e4b17023SJohn Marino loop_optimizer_init (LOOPS_NORMAL);
2322*e4b17023SJohn Marino if (current_loops)
2323*e4b17023SJohn Marino scev_initialize ();
2324*e4b17023SJohn Marino htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2325*e4b17023SJohn Marino if (current_loops)
2326*e4b17023SJohn Marino {
2327*e4b17023SJohn Marino scev_finalize ();
2328*e4b17023SJohn Marino loop_optimizer_finalize ();
2329*e4b17023SJohn Marino current_loops = NULL;
2330*e4b17023SJohn Marino }
2331*e4b17023SJohn Marino }
2332*e4b17023SJohn Marino /* If the current function is the allocation function for any of
2333*e4b17023SJohn Marino the matrices we check its allocation and the escaping level. */
2334*e4b17023SJohn Marino htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2335*e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
2336*e4b17023SJohn Marino free_dominance_info (CDI_POST_DOMINATORS);
2337*e4b17023SJohn Marino pop_cfun ();
2338*e4b17023SJohn Marino current_function_decl = temp_fn;
2339*e4b17023SJohn Marino bitmap_obstack_release (NULL);
2340*e4b17023SJohn Marino }
2341*e4b17023SJohn Marino htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2342*e4b17023SJohn Marino /* Now transform the accesses. */
2343*e4b17023SJohn Marino for (node = cgraph_nodes; node; node = node->next)
2344*e4b17023SJohn Marino if (node->analyzed)
2345*e4b17023SJohn Marino {
2346*e4b17023SJohn Marino /* Remember that allocation sites have been handled. */
2347*e4b17023SJohn Marino tree temp_fn;
2348*e4b17023SJohn Marino
2349*e4b17023SJohn Marino temp_fn = current_function_decl;
2350*e4b17023SJohn Marino current_function_decl = node->decl;
2351*e4b17023SJohn Marino push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2352*e4b17023SJohn Marino bitmap_obstack_initialize (NULL);
2353*e4b17023SJohn Marino gimple_register_cfg_hooks ();
2354*e4b17023SJohn Marino record_all_accesses_in_func ();
2355*e4b17023SJohn Marino htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2356*e4b17023SJohn Marino cgraph_rebuild_references ();
2357*e4b17023SJohn Marino free_dominance_info (CDI_DOMINATORS);
2358*e4b17023SJohn Marino free_dominance_info (CDI_POST_DOMINATORS);
2359*e4b17023SJohn Marino pop_cfun ();
2360*e4b17023SJohn Marino current_function_decl = temp_fn;
2361*e4b17023SJohn Marino bitmap_obstack_release (NULL);
2362*e4b17023SJohn Marino }
2363*e4b17023SJohn Marino htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2364*e4b17023SJohn Marino
2365*e4b17023SJohn Marino current_function_decl = NULL;
2366*e4b17023SJohn Marino set_cfun (NULL);
2367*e4b17023SJohn Marino matrices_to_reorg = NULL;
2368*e4b17023SJohn Marino return 0;
2369*e4b17023SJohn Marino }
2370*e4b17023SJohn Marino
2371*e4b17023SJohn Marino
2372*e4b17023SJohn Marino /* The condition for matrix flattening to be performed. */
2373*e4b17023SJohn Marino static bool
gate_matrix_reorg(void)2374*e4b17023SJohn Marino gate_matrix_reorg (void)
2375*e4b17023SJohn Marino {
2376*e4b17023SJohn Marino return flag_ipa_matrix_reorg && flag_whole_program;
2377*e4b17023SJohn Marino }
2378*e4b17023SJohn Marino
2379*e4b17023SJohn Marino struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2380*e4b17023SJohn Marino {
2381*e4b17023SJohn Marino {
2382*e4b17023SJohn Marino SIMPLE_IPA_PASS,
2383*e4b17023SJohn Marino "matrix-reorg", /* name */
2384*e4b17023SJohn Marino gate_matrix_reorg, /* gate */
2385*e4b17023SJohn Marino matrix_reorg, /* execute */
2386*e4b17023SJohn Marino NULL, /* sub */
2387*e4b17023SJohn Marino NULL, /* next */
2388*e4b17023SJohn Marino 0, /* static_pass_number */
2389*e4b17023SJohn Marino TV_NONE, /* tv_id */
2390*e4b17023SJohn Marino 0, /* properties_required */
2391*e4b17023SJohn Marino 0, /* properties_provided */
2392*e4b17023SJohn Marino 0, /* properties_destroyed */
2393*e4b17023SJohn Marino 0, /* todo_flags_start */
2394*e4b17023SJohn Marino TODO_dump_cgraph /* todo_flags_finish */
2395*e4b17023SJohn Marino }
2396*e4b17023SJohn Marino };
2397