xref: /netbsd-src/external/gpl3/gcc/dist/gcc/tree-vect-slp-patterns.cc (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1 /* SLP - Pattern matcher on SLP trees
2    Copyright (C) 2020-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "target.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "tree-pass.h"
29 #include "ssa.h"
30 #include "optabs-tree.h"
31 #include "insn-config.h"
32 #include "recog.h"		/* FIXME: for insn_data */
33 #include "fold-const.h"
34 #include "stor-layout.h"
35 #include "gimple-iterator.h"
36 #include "cfgloop.h"
37 #include "tree-vectorizer.h"
38 #include "langhooks.h"
39 #include "gimple-walk.h"
40 #include "dbgcnt.h"
41 #include "tree-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "gimple-fold.h"
44 #include "internal-fn.h"
45 
46 /* SLP Pattern matching mechanism.
47 
48   This extension to the SLP vectorizer allows one to transform the generated SLP
49   tree based on any pattern.  The difference between this and the normal vect
50   pattern matcher is that unlike the former, this matcher allows you to match
51   with instructions that do not belong to the same SSA dominator graph.
52 
53   The only requirement that this pattern matcher has is that you are only
54   only allowed to either match an entire group or none.
55 
56   The pattern matcher currently only allows you to perform replacements to
57   internal functions.
58 
59   Once the patterns are matched it is one way, these cannot be undone.  It is
60   currently not supported to match patterns recursively.
61 
62   To add a new pattern, implement the vect_pattern class and add the type to
63   slp_patterns.
64 
65 */
66 
67 /*******************************************************************************
68  * vect_pattern class
69  ******************************************************************************/
70 
71 /* Default implementation of recognize that performs matching, validation and
72    replacement of nodes but that can be overriden if required.  */
73 
74 static bool
vect_pattern_validate_optab(internal_fn ifn,slp_tree node)75 vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
76 {
77   tree vectype = SLP_TREE_VECTYPE (node);
78   if (ifn == IFN_LAST || !vectype)
79     return false;
80 
81   if (dump_enabled_p ())
82     dump_printf_loc (MSG_NOTE, vect_location,
83 		     "Found %s pattern in SLP tree\n",
84 		     internal_fn_name (ifn));
85 
86   if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
87     {
88       if (dump_enabled_p ())
89 	dump_printf_loc (MSG_NOTE, vect_location,
90 			 "Target supports %s vectorization with mode %T\n",
91 			 internal_fn_name (ifn), vectype);
92     }
93   else
94     {
95       if (dump_enabled_p ())
96         {
97 	  if (!vectype)
98 	    dump_printf_loc (MSG_NOTE, vect_location,
99 			     "Target does not support vector type for %G\n",
100 			     STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node)));
101 	  else
102 	    dump_printf_loc (MSG_NOTE, vect_location,
103 			     "Target does not support %s for vector type "
104 			     "%T\n", internal_fn_name (ifn), vectype);
105 	}
106       return false;
107     }
108   return true;
109 }
110 
111 /*******************************************************************************
112  * General helper types
113  ******************************************************************************/
114 
115 /* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
116    be matched when looking for expressions that we are interested matching for
117    complex numbers addition and mla.  */
118 
119 typedef enum _complex_operation : unsigned {
120   PLUS_PLUS,
121   MINUS_PLUS,
122   PLUS_MINUS,
123   MULT_MULT,
124   CMPLX_NONE
125 } complex_operation_t;
126 
127 /*******************************************************************************
128  * General helper functions
129  ******************************************************************************/
130 
131 /* Helper function of linear_loads_p that checks to see if the load permutation
132    is sequential and in monotonically increasing order of loads with no gaps.
133 */
134 
135 static inline complex_perm_kinds_t
is_linear_load_p(load_permutation_t loads)136 is_linear_load_p (load_permutation_t loads)
137 {
138   if (loads.length() == 0)
139     return PERM_UNKNOWN;
140 
141   unsigned load, i;
142   complex_perm_kinds_t candidates[4]
143     = { PERM_ODDODD
144       , PERM_EVENEVEN
145       , PERM_EVENODD
146       , PERM_ODDEVEN
147       };
148 
149   int valid_patterns = 4;
150   FOR_EACH_VEC_ELT (loads, i, load)
151     {
152       unsigned adj_load = load % 2;
153       if (candidates[0] != PERM_UNKNOWN && adj_load != 1)
154 	{
155 	  candidates[0] = PERM_UNKNOWN;
156 	  valid_patterns--;
157 	}
158       if (candidates[1] != PERM_UNKNOWN && adj_load != 0)
159 	{
160 	  candidates[1] = PERM_UNKNOWN;
161 	  valid_patterns--;
162 	}
163       if (candidates[2] != PERM_UNKNOWN && load != i)
164 	{
165 	  candidates[2] = PERM_UNKNOWN;
166 	  valid_patterns--;
167 	}
168       if (candidates[3] != PERM_UNKNOWN
169 	  && load != (i % 2 == 0 ? i + 1 : i - 1))
170 	{
171 	  candidates[3] = PERM_UNKNOWN;
172 	  valid_patterns--;
173 	}
174 
175       if (valid_patterns == 0)
176 	return PERM_UNKNOWN;
177     }
178 
179   for (i = 0; i < sizeof(candidates); i++)
180     if (candidates[i] != PERM_UNKNOWN)
181       return candidates[i];
182 
183   return PERM_UNKNOWN;
184 }
185 
186 /* Combine complex_perm_kinds A and B into a new permute kind that describes the
187    resulting operation.  */
188 
189 static inline complex_perm_kinds_t
vect_merge_perms(complex_perm_kinds_t a,complex_perm_kinds_t b)190 vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
191 {
192   if (a == b)
193     return a;
194 
195   if (a == PERM_TOP)
196     return b;
197 
198   if (b == PERM_TOP)
199     return a;
200 
201   return PERM_UNKNOWN;
202 }
203 
204 /* Check to see if all loads rooted in ROOT are linear.  Linearity is
205    defined as having no gaps between values loaded.  */
206 
207 static complex_perm_kinds_t
linear_loads_p(slp_tree_to_load_perm_map_t * perm_cache,slp_tree root)208 linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
209 {
210   if (!root)
211     return PERM_UNKNOWN;
212 
213   unsigned i;
214   complex_perm_kinds_t *tmp;
215 
216   if ((tmp = perm_cache->get (root)) != NULL)
217     return *tmp;
218 
219   complex_perm_kinds_t retval = PERM_UNKNOWN;
220   perm_cache->put (root, retval);
221 
222   /* If it's a load node, then just read the load permute.  */
223   if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
224     {
225       retval = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
226       perm_cache->put (root, retval);
227       return retval;
228     }
229   else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
230     {
231       retval = PERM_TOP;
232       perm_cache->put (root, retval);
233       return retval;
234     }
235 
236   complex_perm_kinds_t kind = PERM_TOP;
237 
238   slp_tree child;
239   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
240     {
241       complex_perm_kinds_t res = linear_loads_p (perm_cache, child);
242       kind = vect_merge_perms (kind, res);
243       /* Unknown and Top are not valid on blends as they produce no permute.  */
244       retval = kind;
245       if (kind == PERM_UNKNOWN || kind == PERM_TOP)
246 	return retval;
247     }
248 
249   retval = kind;
250 
251   perm_cache->put (root, retval);
252   return retval;
253 }
254 
255 
256 /* This function attempts to make a node rooted in NODE is linear.  If the node
257    if already linear than the node itself is returned in RESULT.
258 
259    If the node is not linear then a new VEC_PERM_EXPR node is created with a
260    lane permute that when applied will make the node linear.   If such a
261    permute cannot be created then FALSE is returned from the function.
262 
263    Here linearity is defined as having a sequential, monotically increasing
264    load position inside the load permute generated by the loads reachable from
265    NODE.  */
266 
267 static slp_tree
vect_build_swap_evenodd_node(slp_tree node)268 vect_build_swap_evenodd_node (slp_tree node)
269 {
270   /* Attempt to linearise the permute.  */
271   vec<std::pair<unsigned, unsigned> > zipped;
272   zipped.create (SLP_TREE_LANES (node));
273 
274   for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
275     {
276       zipped.quick_push (std::make_pair (0, x+1));
277       zipped.quick_push (std::make_pair (0, x));
278     }
279 
280   /* Create the new permute node and store it instead.  */
281   slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
282   SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
283   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
284   SLP_TREE_CHILDREN (vnode).quick_push (node);
285   SLP_TREE_REF_COUNT (vnode) = 1;
286   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
287   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
288   SLP_TREE_REF_COUNT (node)++;
289   return vnode;
290 }
291 
292 /* Checks to see of the expression represented by NODE is a gimple assign with
293    code CODE.  */
294 
295 static inline bool
vect_match_expression_p(slp_tree node,tree_code code)296 vect_match_expression_p (slp_tree node, tree_code code)
297 {
298   if (!node
299       || !SLP_TREE_REPRESENTATIVE (node))
300     return false;
301 
302   gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
303   if (!is_gimple_assign (expr)
304       || gimple_assign_rhs_code (expr) != code)
305     return false;
306 
307   return true;
308 }
309 
310 /* Check if the given lane permute in PERMUTES matches an alternating sequence
311    of {even odd even odd ...}.  This to account for unrolled loops.  Further
312    mode there resulting permute must be linear.   */
313 
314 static inline bool
vect_check_evenodd_blend(lane_permutation_t & permutes,unsigned even,unsigned odd)315 vect_check_evenodd_blend (lane_permutation_t &permutes,
316 			 unsigned even, unsigned odd)
317 {
318   if (permutes.length () == 0
319       || permutes.length () % 2 != 0)
320     return false;
321 
322   unsigned val[2] = {even, odd};
323   unsigned seed = 0;
324   for (unsigned i = 0; i < permutes.length (); i++)
325     if (permutes[i].first != val[i % 2]
326 	|| permutes[i].second != seed++)
327       return false;
328 
329   return true;
330 }
331 
332 /* This function will match the two gimple expressions representing NODE1 and
333    NODE2 in parallel and returns the pair operation that represents the two
334    expressions in the two statements.
335 
336    If match is successful then the corresponding complex_operation is
337    returned and the arguments to the two matched operations are returned in OPS.
338 
339    If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
340    from the two nodes alternatingly.
341 
342    If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
343 
344    e.g. the following gimple statements
345 
346    stmt 0 _39 = _37 + _12;
347    stmt 1 _6 = _38 - _36;
348 
349    will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
350 */
351 
352 static complex_operation_t
vect_detect_pair_op(slp_tree node1,slp_tree node2,lane_permutation_t & lanes,bool two_operands=true,vec<slp_tree> * ops=NULL)353 vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
354 		     bool two_operands = true, vec<slp_tree> *ops = NULL)
355 {
356   complex_operation_t result = CMPLX_NONE;
357 
358   if (vect_match_expression_p (node1, MINUS_EXPR)
359       && vect_match_expression_p (node2, PLUS_EXPR)
360       && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
361     result = MINUS_PLUS;
362   else if (vect_match_expression_p (node1, PLUS_EXPR)
363 	   && vect_match_expression_p (node2, MINUS_EXPR)
364 	   && (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
365     result = PLUS_MINUS;
366   else if (vect_match_expression_p (node1, PLUS_EXPR)
367 	   && vect_match_expression_p (node2, PLUS_EXPR))
368     result = PLUS_PLUS;
369   else if (vect_match_expression_p (node1, MULT_EXPR)
370 	   && vect_match_expression_p (node2, MULT_EXPR))
371     result = MULT_MULT;
372 
373   if (result != CMPLX_NONE && ops != NULL)
374     {
375       if (two_operands)
376 	{
377 	  auto l0node = SLP_TREE_CHILDREN (node1);
378 	  auto l1node = SLP_TREE_CHILDREN (node2);
379 
380 	  /* Check if the tree is connected as we expect it.  */
381 	  if (!((l0node[0] == l1node[0] && l0node[1] == l1node[1])
382 	      || (l0node[0] == l1node[1] && l0node[1] == l1node[0])))
383 	    return CMPLX_NONE;
384 	}
385       ops->safe_push (node1);
386       ops->safe_push (node2);
387     }
388   return result;
389 }
390 
391 /* Overload of vect_detect_pair_op that matches against the representative
392    statements in the children of NODE.  It is expected that NODE has exactly
393    two children and when TWO_OPERANDS then NODE must be a VEC_PERM.  */
394 
395 static complex_operation_t
vect_detect_pair_op(slp_tree node,bool two_operands=true,vec<slp_tree> * ops=NULL)396 vect_detect_pair_op (slp_tree node, bool two_operands = true,
397 		     vec<slp_tree> *ops = NULL)
398 {
399   if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
400     return CMPLX_NONE;
401 
402   if (SLP_TREE_CHILDREN (node).length () != 2)
403     return CMPLX_NONE;
404 
405   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
406   lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
407 
408   return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
409 			      ops);
410 }
411 
412 /*******************************************************************************
413  * complex_pattern class
414  ******************************************************************************/
415 
416 /* SLP Complex Numbers pattern matching.
417 
418   As an example, the following simple loop:
419 
420     double a[restrict N]; double b[restrict N]; double c[restrict N];
421 
422     for (int i=0; i < N; i+=2)
423     {
424       c[i] = a[i] - b[i+1];
425       c[i+1] = a[i+1] + b[i];
426     }
427 
428   which represents a complex addition on with a rotation of 90* around the
429   argand plane. i.e. if `a` and `b` were complex numbers then this would be the
430   same as `a + (b * I)`.
431 
432   Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
433   both recognized in order for the pattern to work.  As an SLP tree this is
434   represented as
435 
436                 +--------------------------------+
437                 |       stmt 0 *_9 = _10;        |
438                 |       stmt 1 *_15 = _16;       |
439                 +--------------------------------+
440                                 |
441                                 |
442                                 v
443                 +--------------------------------+
444                 |     stmt 0 _10 = _4 - _8;      |
445                 |    stmt 1 _16 = _12 + _14;     |
446                 | lane permutation { 0[0] 1[1] } |
447                 +--------------------------------+
448                             |        |
449                             |        |
450                             |        |
451                +-----+      |        |      +-----+
452                |     |      |        |      |     |
453          +-----| { } |<-----+        +----->| { } --------+
454          |     |     |   +------------------|     |       |
455          |     +-----+   |                  +-----+       |
456          |        |      |                                |
457          |        |      |                                |
458          |        +------|------------------+             |
459          |               |                  |             |
460          v               v                  v             v
461      +--------------------------+     +--------------------------------+
462      |     stmt 0 _8 = *_7;     |     |        stmt 0 _4 = *_3;        |
463      |    stmt 1 _14 = *_13;    |     |       stmt 1 _12 = *_11;       |
464      | load permutation { 1 0 } |     |    load permutation { 0 1 }    |
465      +--------------------------+     +--------------------------------+
466 
467   The pattern matcher allows you to replace both statements 0 and 1 or none at
468   all.  Because this operation is a two operands operation the actual nodes
469   being replaced are those in the { } nodes.  The actual scalar statements
470   themselves are not replaced or used during the matching but instead the
471   SLP_TREE_REPRESENTATIVE statements are inspected.  You are also allowed to
472   replace and match on any number of nodes.
473 
474   Because the pattern matcher matches on the representative statement for the
475   SLP node the case of two_operators it allows you to match the children of the
476   node.  This is done using the method `recognize ()`.
477 
478 */
479 
480 /* The complex_pattern class contains common code for pattern matchers that work
481    on complex numbers.  These provide functionality to allow de-construction and
482    validation of sequences depicting/transforming REAL and IMAG pairs.  */
483 
484 class complex_pattern : public vect_pattern
485 {
486   protected:
487     auto_vec<slp_tree> m_workset;
complex_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)488     complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
489       : vect_pattern (node, m_ops, ifn)
490     {
491       this->m_workset.safe_push (*node);
492     }
493 
494   public:
495     void build (vec_info *);
496 
497     static internal_fn
498     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *, slp_tree *,
499 	     vec<slp_tree> *);
500 };
501 
502 /* Create a replacement pattern statement for each node in m_node and inserts
503    the new statement into m_node as the new representative statement.  The old
504    statement is marked as being in a pattern defined by the new statement.  The
505    statement is created as call to internal function IFN with m_num_args
506    arguments.
507 
508    Futhermore the new pattern is also added to the vectorization information
509    structure VINFO and the old statement STMT_INFO is marked as unused while
510    the new statement is marked as used and the number of SLP uses of the new
511    statement is incremented.
512 
513    The newly created SLP nodes are marked as SLP only and will be dissolved
514    if SLP is aborted.
515 
516    The newly created gimple call is returned and the BB remains unchanged.
517 
518    This default method is designed to only match against simple operands where
519    all the input and output types are the same.
520 */
521 
522 void
build(vec_info * vinfo)523 complex_pattern::build (vec_info *vinfo)
524 {
525   stmt_vec_info stmt_info;
526 
527   auto_vec<tree> args;
528   args.create (this->m_num_args);
529   args.quick_grow_cleared (this->m_num_args);
530   slp_tree node;
531   unsigned ix;
532   stmt_vec_info call_stmt_info;
533   gcall *call_stmt = NULL;
534 
535   /* Now modify the nodes themselves.  */
536   FOR_EACH_VEC_ELT (this->m_workset, ix, node)
537     {
538       /* Calculate the location of the statement in NODE to replace.  */
539       stmt_info = SLP_TREE_REPRESENTATIVE (node);
540       stmt_vec_info reduc_def
541 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
542       gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
543       tree lhs_old_stmt = gimple_get_lhs (old_stmt);
544       tree type = TREE_TYPE (lhs_old_stmt);
545 
546       /* Create the argument set for use by gimple_build_call_internal_vec.  */
547       for (unsigned i = 0; i < this->m_num_args; i++)
548 	args[i] = lhs_old_stmt;
549 
550       /* Create the new pattern statements.  */
551       call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
552       tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
553       gimple_call_set_lhs (call_stmt, var);
554       gimple_set_location (call_stmt, gimple_location (old_stmt));
555       gimple_call_set_nothrow (call_stmt, true);
556 
557       /* Adjust the book-keeping for the new and old statements for use during
558 	 SLP.  This is required to get the right VF and statement during SLP
559 	 analysis.  These changes are created after relevancy has been set for
560 	 the nodes as such we need to manually update them.  Any changes will be
561 	 undone if SLP is cancelled.  */
562       call_stmt_info
563 	= vinfo->add_pattern_stmt (call_stmt, stmt_info);
564 
565       /* Make sure to mark the representative statement pure_slp and
566 	 relevant and transfer reduction info. */
567       STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
568       STMT_SLP_TYPE (call_stmt_info) = pure_slp;
569       STMT_VINFO_REDUC_DEF (call_stmt_info) = reduc_def;
570 
571       gimple_set_bb (call_stmt, gimple_bb (stmt_info->stmt));
572       STMT_VINFO_VECTYPE (call_stmt_info) = SLP_TREE_VECTYPE (node);
573       STMT_VINFO_SLP_VECT_ONLY_PATTERN (call_stmt_info) = true;
574 
575       /* Since we are replacing all the statements in the group with the same
576 	 thing it doesn't really matter.  So just set it every time a new stmt
577 	 is created.  */
578       SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
579       SLP_TREE_LANE_PERMUTATION (node).release ();
580       SLP_TREE_CODE (node) = CALL_EXPR;
581     }
582 }
583 
584 /*******************************************************************************
585  * complex_add_pattern class
586  ******************************************************************************/
587 
588 class complex_add_pattern : public complex_pattern
589 {
590   protected:
complex_add_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)591     complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
592       : complex_pattern (node, m_ops, ifn)
593     {
594       this->m_num_args = 2;
595     }
596 
597   public:
598     void build (vec_info *);
599     static internal_fn
600     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
601 	     slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
602 
603     static vect_pattern*
604     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
605 	       slp_tree *);
606 
607     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)608     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
609     {
610       return new complex_add_pattern (node, m_ops, ifn);
611     }
612 };
613 
614 /* Perform a replacement of the detected complex add pattern with the new
615    instruction sequences.  */
616 
617 void
build(vec_info * vinfo)618 complex_add_pattern::build (vec_info *vinfo)
619 {
620   SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
621 
622   slp_tree node = this->m_ops[0];
623   vec<slp_tree> children = SLP_TREE_CHILDREN (node);
624 
625   /* First re-arrange the children.  */
626   SLP_TREE_CHILDREN (*this->m_node)[0] = children[0];
627   SLP_TREE_CHILDREN (*this->m_node)[1] =
628     vect_build_swap_evenodd_node (children[1]);
629 
630   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[0])++;
631   SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (*this->m_node)[1])++;
632   vect_free_slp_tree (this->m_ops[0]);
633   vect_free_slp_tree (this->m_ops[1]);
634 
635   complex_pattern::build (vinfo);
636 }
637 
638 /* Pattern matcher for trying to match complex addition pattern in SLP tree.
639 
640    If no match is found then IFN is set to IFN_LAST.
641    This function matches the patterns shaped as:
642 
643    c[i] = a[i] - b[i+1];
644    c[i+1] = a[i+1] + b[i];
645 
646    If a match occurred then TRUE is returned, else FALSE.  The initial match is
647    expected to be in OP1 and the initial match operands in args0.  */
648 
649 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t *,slp_tree * node,vec<slp_tree> * ops)650 complex_add_pattern::matches (complex_operation_t op,
651 			      slp_tree_to_load_perm_map_t *perm_cache,
652 			      slp_compat_nodes_map_t * /* compat_cache */,
653 			      slp_tree *node, vec<slp_tree> *ops)
654 {
655   internal_fn ifn = IFN_LAST;
656 
657   /* Find the two components.  Rotation in the complex plane will modify
658      the operations:
659 
660       * Rotation  0: + +
661       * Rotation 90: - +
662       * Rotation 180: - -
663       * Rotation 270: + -
664 
665       Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
666       to care about them here.  */
667   if (op == MINUS_PLUS)
668     ifn = IFN_COMPLEX_ADD_ROT90;
669   else if (op == PLUS_MINUS)
670     ifn = IFN_COMPLEX_ADD_ROT270;
671   else
672     return ifn;
673 
674   /* verify that there is a permute, otherwise this isn't a pattern we
675      we support.  */
676   gcc_assert (ops->length () == 2);
677 
678   vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
679 
680   /* First node must be unpermuted.  */
681   if (linear_loads_p (perm_cache, children[0]) != PERM_EVENODD)
682     return IFN_LAST;
683 
684   /* Second node must be permuted.  */
685   if (linear_loads_p (perm_cache, children[1]) != PERM_ODDEVEN)
686     return IFN_LAST;
687 
688   if (!vect_pattern_validate_optab (ifn, *node))
689     return IFN_LAST;
690 
691   return ifn;
692 }
693 
694 /* Attempt to recognize a complex add pattern.  */
695 
696 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,slp_tree * node)697 complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
698 				slp_compat_nodes_map_t *compat_cache,
699 				slp_tree *node)
700 {
701   auto_vec<slp_tree> ops;
702   complex_operation_t op
703     = vect_detect_pair_op (*node, true, &ops);
704   internal_fn ifn
705     = complex_add_pattern::matches (op, perm_cache, compat_cache, node, &ops);
706   if (ifn == IFN_LAST)
707     return NULL;
708 
709   return new complex_add_pattern (node, &ops, ifn);
710 }
711 
712 /*******************************************************************************
713  * complex_mul_pattern
714  ******************************************************************************/
715 
716 /* Helper function to check if PERM is KIND or PERM_TOP.  */
717 
718 static inline bool
is_eq_or_top(slp_tree_to_load_perm_map_t * perm_cache,slp_tree op1,complex_perm_kinds_t kind1,slp_tree op2,complex_perm_kinds_t kind2)719 is_eq_or_top (slp_tree_to_load_perm_map_t *perm_cache,
720 	      slp_tree op1, complex_perm_kinds_t kind1,
721 	      slp_tree op2, complex_perm_kinds_t kind2)
722 {
723   complex_perm_kinds_t perm1 = linear_loads_p (perm_cache, op1);
724   if (perm1 != kind1 && perm1 != PERM_TOP)
725     return false;
726 
727   complex_perm_kinds_t perm2 = linear_loads_p (perm_cache, op2);
728   if (perm2 != kind2 && perm2 != PERM_TOP)
729     return false;
730 
731   return true;
732 }
733 
734 enum _conj_status { CONJ_NONE, CONJ_FST, CONJ_SND };
735 
736 static inline bool
compatible_complex_nodes_p(slp_compat_nodes_map_t * compat_cache,slp_tree a,int * pa,slp_tree b,int * pb)737 compatible_complex_nodes_p (slp_compat_nodes_map_t *compat_cache,
738 			    slp_tree a, int *pa, slp_tree b, int *pb)
739 {
740   bool *tmp;
741   std::pair<slp_tree, slp_tree> key = std::make_pair(a, b);
742   if ((tmp = compat_cache->get (key)) != NULL)
743     return *tmp;
744 
745    compat_cache->put (key, false);
746 
747   if (SLP_TREE_CHILDREN (a).length () != SLP_TREE_CHILDREN (b).length ())
748     return false;
749 
750   if (SLP_TREE_DEF_TYPE (a) != SLP_TREE_DEF_TYPE (b))
751     return false;
752 
753   /* Only internal nodes can be loads, as such we can't check further if they
754      are externals.  */
755   if (SLP_TREE_DEF_TYPE (a) != vect_internal_def)
756     {
757       for (unsigned i = 0; i < SLP_TREE_SCALAR_OPS (a).length (); i++)
758 	{
759 	  tree op1 = SLP_TREE_SCALAR_OPS (a)[pa[i % 2]];
760 	  tree op2 = SLP_TREE_SCALAR_OPS (b)[pb[i % 2]];
761 	  if (!operand_equal_p (op1, op2, 0))
762 	    return false;
763 	}
764 
765       compat_cache->put (key, true);
766       return true;
767     }
768 
769   auto a_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (a));
770   auto b_stmt = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (b));
771 
772   if (gimple_code (a_stmt) != gimple_code (b_stmt))
773     return false;
774 
775   /* code, children, type, externals, loads, constants  */
776   if (gimple_num_args (a_stmt) != gimple_num_args (b_stmt))
777     return false;
778 
779   /* At this point, a and b are known to be the same gimple operations.  */
780   if (is_gimple_call (a_stmt))
781     {
782 	if (!compatible_calls_p (dyn_cast <gcall *> (a_stmt),
783 				 dyn_cast <gcall *> (b_stmt)))
784 	  return false;
785     }
786   else if (!is_gimple_assign (a_stmt))
787     return false;
788   else
789     {
790       tree_code acode = gimple_assign_rhs_code (a_stmt);
791       tree_code bcode = gimple_assign_rhs_code (b_stmt);
792       if ((acode == REALPART_EXPR || acode == IMAGPART_EXPR)
793 	  && (bcode == REALPART_EXPR || bcode == IMAGPART_EXPR))
794 	return true;
795 
796       if (acode != bcode)
797 	return false;
798     }
799 
800   if (!SLP_TREE_LOAD_PERMUTATION (a).exists ()
801       || !SLP_TREE_LOAD_PERMUTATION (b).exists ())
802     {
803       for (unsigned i = 0; i < gimple_num_args (a_stmt); i++)
804 	{
805 	  tree t1 = gimple_arg (a_stmt, i);
806 	  tree t2 = gimple_arg (b_stmt, i);
807 	  if (TREE_CODE (t1) != TREE_CODE (t2))
808 	    return false;
809 
810 	  /* If SSA name then we will need to inspect the children
811 	     so we can punt here.  */
812 	  if (TREE_CODE (t1) == SSA_NAME)
813 	    continue;
814 
815 	  if (!operand_equal_p (t1, t2, 0))
816 	    return false;
817 	}
818     }
819   else
820     {
821       auto dr1 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (a));
822       auto dr2 = STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (b));
823       /* Don't check the last dimension as that's checked by the lineary
824 	 checks.  This check is also much stricter than what we need
825 	 because it doesn't consider loading from adjacent elements
826 	 in the same struct as loading from the same base object.
827 	 But for now, I'll play it safe.  */
828       if (!same_data_refs (dr1, dr2, 1))
829 	return false;
830     }
831 
832   for (unsigned i = 0; i < SLP_TREE_CHILDREN (a).length (); i++)
833     {
834       if (!compatible_complex_nodes_p (compat_cache,
835 				       SLP_TREE_CHILDREN (a)[i], pa,
836 				       SLP_TREE_CHILDREN (b)[i], pb))
837 	return false;
838     }
839 
840   compat_cache->put (key, true);
841   return true;
842 }
843 
844 static inline bool
vect_validate_multiplication(slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,vec<slp_tree> & left_op,vec<slp_tree> & right_op,bool subtract,enum _conj_status * _status)845 vect_validate_multiplication (slp_tree_to_load_perm_map_t *perm_cache,
846 			      slp_compat_nodes_map_t *compat_cache,
847 			      vec<slp_tree> &left_op,
848 			      vec<slp_tree> &right_op,
849 			      bool subtract,
850 			      enum _conj_status *_status)
851 {
852   auto_vec<slp_tree> ops;
853   enum _conj_status stats = CONJ_NONE;
854 
855   /* The complex operations can occur in two layouts and two permute sequences
856      so declare them and re-use them.  */
857   int styles[][4] = { { 0, 2, 1, 3} /* {L1, R1} + {L2, R2}.  */
858 		    , { 0, 3, 1, 2} /* {L1, R2} + {L2, R1}.  */
859 		    };
860 
861   /* Now for the corresponding permutes that go with these values.  */
862   complex_perm_kinds_t perms[][4]
863     = { { PERM_EVENEVEN, PERM_ODDODD, PERM_EVENODD, PERM_ODDEVEN }
864       , { PERM_EVENODD, PERM_ODDEVEN, PERM_EVENEVEN, PERM_ODDODD }
865       };
866 
867   /* These permutes are used during comparisons of externals on which
868      we require strict equality.  */
869   int cq[][4][2]
870     = { { { 0, 0 }, { 1, 1 }, { 0, 1 }, { 1, 0 } }
871       , { { 0, 1 }, { 1, 0 }, { 0, 0 }, { 1, 1 } }
872       };
873 
874   /* Default to style and perm 0, most operations use this one.  */
875   int style = 0;
876   int perm = subtract ? 1 : 0;
877 
878   /* Check if we have a negate operation, if so absorb the node and continue
879      looking.  */
880   bool neg0 = vect_match_expression_p (right_op[0], NEGATE_EXPR);
881   bool neg1 = vect_match_expression_p (right_op[1], NEGATE_EXPR);
882 
883   /* Determine which style we're looking at.  We only have different ones
884      whenever a conjugate is involved.  */
885   if (neg0 && neg1)
886     ;
887   else if (neg0)
888     {
889       right_op[0] = SLP_TREE_CHILDREN (right_op[0])[0];
890       stats = CONJ_FST;
891       if (subtract)
892 	perm = 0;
893     }
894   else if (neg1)
895     {
896       right_op[1] = SLP_TREE_CHILDREN (right_op[1])[0];
897       stats = CONJ_SND;
898       perm = 1;
899     }
900 
901   *_status = stats;
902 
903   /* Flatten the inputs after we've remapped them.  */
904   ops.create (4);
905   ops.safe_splice (left_op);
906   ops.safe_splice (right_op);
907 
908   /* Extract out the elements to check.  */
909   slp_tree op0 = ops[styles[style][0]];
910   slp_tree op1 = ops[styles[style][1]];
911   slp_tree op2 = ops[styles[style][2]];
912   slp_tree op3 = ops[styles[style][3]];
913 
914   /* Do cheapest test first.  If failed no need to analyze further.  */
915   if (linear_loads_p (perm_cache, op0) != perms[perm][0]
916       || linear_loads_p (perm_cache, op1) != perms[perm][1]
917       || !is_eq_or_top (perm_cache, op2, perms[perm][2], op3, perms[perm][3]))
918     return false;
919 
920   return compatible_complex_nodes_p (compat_cache, op0, cq[perm][0], op1,
921 				     cq[perm][1])
922 	 && compatible_complex_nodes_p (compat_cache, op2, cq[perm][2], op3,
923 					cq[perm][3]);
924 }
925 
926 /* This function combines two nodes containing only even and only odd lanes
927    together into a single node which contains the nodes in even/odd order
928    by using a lane permute.
929 
930    The lanes in EVEN and ODD are duplicated 2 times inside the vectors.
931    So for a lanes = 4 EVEN contains {EVEN1, EVEN1, EVEN2, EVEN2}.
932 
933    The tree REPRESENTATION is taken from the supplied REP along with the
934    vectype which must be the same between all three nodes.
935 */
936 
937 static slp_tree
vect_build_combine_node(slp_tree even,slp_tree odd,slp_tree rep)938 vect_build_combine_node (slp_tree even, slp_tree odd, slp_tree rep)
939 {
940   vec<std::pair<unsigned, unsigned> > perm;
941   perm.create (SLP_TREE_LANES (rep));
942 
943   for (unsigned x = 0; x < SLP_TREE_LANES (rep); x+=2)
944     {
945       perm.quick_push (std::make_pair (0, x));
946       perm.quick_push (std::make_pair (1, x+1));
947     }
948 
949   slp_tree vnode = vect_create_new_slp_node (2, SLP_TREE_CODE (even));
950   SLP_TREE_CODE (vnode) = VEC_PERM_EXPR;
951   SLP_TREE_LANE_PERMUTATION (vnode) = perm;
952 
953   SLP_TREE_CHILDREN (vnode).create (2);
954   SLP_TREE_CHILDREN (vnode).quick_push (even);
955   SLP_TREE_CHILDREN (vnode).quick_push (odd);
956   SLP_TREE_REF_COUNT (even)++;
957   SLP_TREE_REF_COUNT (odd)++;
958   SLP_TREE_REF_COUNT (vnode) = 1;
959 
960   SLP_TREE_LANES (vnode) = SLP_TREE_LANES (rep);
961   gcc_assert (perm.length () == SLP_TREE_LANES (vnode));
962   /* Representation is set to that of the current node as the vectorizer
963      can't deal with VEC_PERMs with no representation, as would be the
964      case with invariants.  */
965   SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (rep);
966   SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (rep);
967   return vnode;
968 }
969 
970 class complex_mul_pattern : public complex_pattern
971 {
972   protected:
complex_mul_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)973     complex_mul_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
974       : complex_pattern (node, m_ops, ifn)
975     {
976       this->m_num_args = 2;
977     }
978 
979   public:
980     void build (vec_info *);
981     static internal_fn
982     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
983 	     slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
984 
985     static vect_pattern*
986     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
987 	       slp_tree *);
988 
989     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)990     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
991     {
992       return new complex_mul_pattern (node, m_ops, ifn);
993     }
994 
995 };
996 
997 /* Pattern matcher for trying to match complex multiply and complex multiply
998    and accumulate pattern in SLP tree.  If the operation matches then IFN
999    is set to the operation it matched and the arguments to the two
1000    replacement statements are put in m_ops.
1001 
1002    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1003 
1004    This function matches the patterns shaped as:
1005 
1006    double ax = (b[i+1] * a[i]);
1007    double bx = (a[i+1] * b[i]);
1008 
1009    c[i] = c[i] - ax;
1010    c[i+1] = c[i+1] + bx;
1011 
1012    If a match occurred then TRUE is returned, else FALSE.  The initial match is
1013    expected to be in OP1 and the initial match operands in args0.  */
1014 
1015 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,slp_tree * node,vec<slp_tree> * ops)1016 complex_mul_pattern::matches (complex_operation_t op,
1017 			      slp_tree_to_load_perm_map_t *perm_cache,
1018 			      slp_compat_nodes_map_t *compat_cache,
1019 			      slp_tree *node, vec<slp_tree> *ops)
1020 {
1021   internal_fn ifn = IFN_LAST;
1022 
1023   if (op != MINUS_PLUS)
1024     return IFN_LAST;
1025 
1026   auto childs = *ops;
1027   auto l0node = SLP_TREE_CHILDREN (childs[0]);
1028 
1029   bool mul0 = vect_match_expression_p (l0node[0], MULT_EXPR);
1030   bool mul1 = vect_match_expression_p (l0node[1], MULT_EXPR);
1031   if (!mul0 && !mul1)
1032     return IFN_LAST;
1033 
1034   /* Now operand2+4 may lead to another expression.  */
1035   auto_vec<slp_tree> left_op, right_op;
1036   slp_tree add0 = NULL;
1037 
1038   /* Check if we may be a multiply add.  It's only valid to form FMAs
1039      with -ffp-contract=fast.  */
1040   if (!mul0
1041       && (flag_fp_contract_mode == FP_CONTRACT_FAST
1042 	  || !FLOAT_TYPE_P (SLP_TREE_VECTYPE (*node)))
1043       && vect_match_expression_p (l0node[0], PLUS_EXPR))
1044     {
1045       auto vals = SLP_TREE_CHILDREN (l0node[0]);
1046       /* Check if it's a multiply, otherwise no idea what this is.  */
1047       if (!(mul0 = vect_match_expression_p (vals[1], MULT_EXPR)))
1048 	return IFN_LAST;
1049 
1050       /* Check if the ADD is linear, otherwise it's not valid complex FMA.  */
1051       if (linear_loads_p (perm_cache, vals[0]) != PERM_EVENODD)
1052 	return IFN_LAST;
1053 
1054       left_op.safe_splice (SLP_TREE_CHILDREN (vals[1]));
1055       add0 = vals[0];
1056     }
1057   else
1058     left_op.safe_splice (SLP_TREE_CHILDREN (l0node[0]));
1059 
1060   right_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1061 
1062   if (left_op.length () != 2
1063       || right_op.length () != 2
1064       || !mul0
1065       || !mul1
1066       || linear_loads_p (perm_cache, left_op[1]) == PERM_ODDEVEN)
1067     return IFN_LAST;
1068 
1069   enum _conj_status status;
1070   if (!vect_validate_multiplication (perm_cache, compat_cache, left_op,
1071 				     right_op, false, &status))
1072     return IFN_LAST;
1073 
1074   if (status == CONJ_NONE)
1075     {
1076       if (add0)
1077 	ifn = IFN_COMPLEX_FMA;
1078       else
1079 	ifn = IFN_COMPLEX_MUL;
1080     }
1081   else
1082     {
1083       if(add0)
1084 	ifn = IFN_COMPLEX_FMA_CONJ;
1085       else
1086 	ifn = IFN_COMPLEX_MUL_CONJ;
1087     }
1088 
1089   if (!vect_pattern_validate_optab (ifn, *node))
1090     return IFN_LAST;
1091 
1092   ops->truncate (0);
1093   ops->create (add0 ? 4 : 3);
1094 
1095   if (add0)
1096     ops->quick_push (add0);
1097 
1098   complex_perm_kinds_t kind = linear_loads_p (perm_cache, left_op[0]);
1099   if (kind == PERM_EVENODD || kind == PERM_TOP)
1100     {
1101       ops->quick_push (left_op[1]);
1102       ops->quick_push (right_op[1]);
1103       ops->quick_push (left_op[0]);
1104     }
1105   else if (kind == PERM_EVENEVEN && status != CONJ_SND)
1106     {
1107       ops->quick_push (left_op[0]);
1108       ops->quick_push (right_op[0]);
1109       ops->quick_push (left_op[1]);
1110     }
1111   else
1112     {
1113       ops->quick_push (left_op[0]);
1114       ops->quick_push (right_op[1]);
1115       ops->quick_push (left_op[1]);
1116     }
1117 
1118   return ifn;
1119 }
1120 
1121 /* Attempt to recognize a complex mul pattern.  */
1122 
1123 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,slp_tree * node)1124 complex_mul_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1125 				slp_compat_nodes_map_t *compat_cache,
1126 				slp_tree *node)
1127 {
1128   auto_vec<slp_tree> ops;
1129   complex_operation_t op
1130     = vect_detect_pair_op (*node, true, &ops);
1131   internal_fn ifn
1132     = complex_mul_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1133   if (ifn == IFN_LAST)
1134     return NULL;
1135 
1136   return new complex_mul_pattern (node, &ops, ifn);
1137 }
1138 
1139 /* Perform a replacement of the detected complex mul pattern with the new
1140    instruction sequences.  */
1141 
1142 void
build(vec_info * vinfo)1143 complex_mul_pattern::build (vec_info *vinfo)
1144 {
1145   slp_tree node;
1146   unsigned i;
1147   switch (this->m_ifn)
1148   {
1149     case IFN_COMPLEX_MUL:
1150     case IFN_COMPLEX_MUL_CONJ:
1151       {
1152 	slp_tree newnode
1153 	  = vect_build_combine_node (this->m_ops[0], this->m_ops[1],
1154 				     *this->m_node);
1155 	SLP_TREE_REF_COUNT (this->m_ops[2])++;
1156 
1157 	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1158 	  vect_free_slp_tree (node);
1159 
1160 	/* First re-arrange the children.  */
1161 	SLP_TREE_CHILDREN (*this->m_node).reserve_exact (2);
1162 	SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[2];
1163 	SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1164 	break;
1165       }
1166     case IFN_COMPLEX_FMA:
1167     case IFN_COMPLEX_FMA_CONJ:
1168       {
1169 	SLP_TREE_REF_COUNT (this->m_ops[0])++;
1170 	slp_tree newnode
1171 	  = vect_build_combine_node (this->m_ops[1], this->m_ops[2],
1172 				     *this->m_node);
1173 	SLP_TREE_REF_COUNT (this->m_ops[3])++;
1174 
1175 	FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1176 	  vect_free_slp_tree (node);
1177 
1178 	/* First re-arrange the children.  */
1179 	SLP_TREE_CHILDREN (*this->m_node).safe_grow (3);
1180 	SLP_TREE_CHILDREN (*this->m_node)[0] = this->m_ops[3];
1181 	SLP_TREE_CHILDREN (*this->m_node)[1] = newnode;
1182 	SLP_TREE_CHILDREN (*this->m_node)[2] = this->m_ops[0];
1183 
1184 	/* Tell the builder to expect an extra argument.  */
1185 	this->m_num_args++;
1186 	break;
1187       }
1188     default:
1189       gcc_unreachable ();
1190   }
1191 
1192   /* And then rewrite the node itself.  */
1193   complex_pattern::build (vinfo);
1194 }
1195 
1196 /*******************************************************************************
1197  * complex_fms_pattern class
1198  ******************************************************************************/
1199 
1200 class complex_fms_pattern : public complex_pattern
1201 {
1202   protected:
complex_fms_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1203     complex_fms_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1204       : complex_pattern (node, m_ops, ifn)
1205     {
1206       this->m_num_args = 3;
1207     }
1208 
1209   public:
1210     void build (vec_info *);
1211     static internal_fn
1212     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1213 	     slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1214 
1215     static vect_pattern*
1216     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1217 	       slp_tree *);
1218 
1219     static vect_pattern*
mkInstance(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1220     mkInstance (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
1221     {
1222       return new complex_fms_pattern (node, m_ops, ifn);
1223     }
1224 };
1225 
1226 
1227 /* Pattern matcher for trying to match complex multiply and subtract pattern
1228    in SLP tree.  If the operation matches then IFN is set to the operation
1229    it matched and the arguments to the two replacement statements are put in
1230    m_ops.
1231 
1232    If no match is found then IFN is set to IFN_LAST and m_ops is unchanged.
1233 
1234    This function matches the patterns shaped as:
1235 
1236    double ax = (b[i+1] * a[i]) + (b[i] * a[i]);
1237    double bx = (a[i+1] * b[i]) - (a[i+1] * b[i+1]);
1238 
1239    c[i] = c[i] - ax;
1240    c[i+1] = c[i+1] + bx;
1241 
1242    If a match occurred then TRUE is returned, else FALSE.  The initial match is
1243    expected to be in OP1 and the initial match operands in args0.  */
1244 
1245 internal_fn
matches(complex_operation_t op,slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,slp_tree * ref_node,vec<slp_tree> * ops)1246 complex_fms_pattern::matches (complex_operation_t op,
1247 			      slp_tree_to_load_perm_map_t *perm_cache,
1248 			      slp_compat_nodes_map_t *compat_cache,
1249 			      slp_tree * ref_node, vec<slp_tree> *ops)
1250 {
1251   internal_fn ifn = IFN_LAST;
1252 
1253   /* We need to ignore the two_operands nodes that may also match,
1254      for that we can check if they have any scalar statements and also
1255      check that it's not a permute node as we're looking for a normal
1256      MINUS_EXPR operation.  */
1257   if (op != CMPLX_NONE)
1258     return IFN_LAST;
1259 
1260   slp_tree root = *ref_node;
1261   if (!vect_match_expression_p (root, MINUS_EXPR))
1262     return IFN_LAST;
1263 
1264   /* TODO: Support invariants here, with the new layout CADD now
1265 	   can match before we get a chance to try CFMS.  */
1266   auto nodes = SLP_TREE_CHILDREN (root);
1267   if (!vect_match_expression_p (nodes[1], MULT_EXPR)
1268       || vect_detect_pair_op (nodes[0]) != PLUS_MINUS)
1269     return IFN_LAST;
1270 
1271   auto childs = SLP_TREE_CHILDREN (nodes[0]);
1272   auto l0node = SLP_TREE_CHILDREN (childs[0]);
1273 
1274   /* Now operand2+4 may lead to another expression.  */
1275   auto_vec<slp_tree> left_op, right_op;
1276   left_op.safe_splice (SLP_TREE_CHILDREN (l0node[1]));
1277   right_op.safe_splice (SLP_TREE_CHILDREN (nodes[1]));
1278 
1279   /* If these nodes don't have any children then they're
1280      not ones we're interested in.  */
1281   if (left_op.length () != 2
1282       || right_op.length () != 2
1283       || !vect_match_expression_p (l0node[1], MULT_EXPR))
1284     return IFN_LAST;
1285 
1286   enum _conj_status status;
1287   if (!vect_validate_multiplication (perm_cache, compat_cache, right_op,
1288 				     left_op, true, &status))
1289     return IFN_LAST;
1290 
1291   if (status == CONJ_NONE)
1292     ifn = IFN_COMPLEX_FMS;
1293   else
1294     ifn = IFN_COMPLEX_FMS_CONJ;
1295 
1296   if (!vect_pattern_validate_optab (ifn, *ref_node))
1297     return IFN_LAST;
1298 
1299   ops->truncate (0);
1300   ops->create (4);
1301 
1302   complex_perm_kinds_t kind = linear_loads_p (perm_cache, right_op[0]);
1303   if (kind == PERM_EVENODD)
1304     {
1305       ops->quick_push (l0node[0]);
1306       ops->quick_push (right_op[0]);
1307       ops->quick_push (right_op[1]);
1308       ops->quick_push (left_op[1]);
1309     }
1310   else
1311     {
1312       ops->quick_push (l0node[0]);
1313       ops->quick_push (right_op[1]);
1314       ops->quick_push (right_op[0]);
1315       ops->quick_push (left_op[0]);
1316     }
1317 
1318   return ifn;
1319 }
1320 
1321 /* Attempt to recognize a complex mul pattern.  */
1322 
1323 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * compat_cache,slp_tree * node)1324 complex_fms_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1325 				slp_compat_nodes_map_t *compat_cache,
1326 				slp_tree *node)
1327 {
1328   auto_vec<slp_tree> ops;
1329   complex_operation_t op
1330     = vect_detect_pair_op (*node, true, &ops);
1331   internal_fn ifn
1332     = complex_fms_pattern::matches (op, perm_cache, compat_cache, node, &ops);
1333   if (ifn == IFN_LAST)
1334     return NULL;
1335 
1336   return new complex_fms_pattern (node, &ops, ifn);
1337 }
1338 
1339 /* Perform a replacement of the detected complex mul pattern with the new
1340    instruction sequences.  */
1341 
1342 void
build(vec_info * vinfo)1343 complex_fms_pattern::build (vec_info *vinfo)
1344 {
1345   slp_tree node;
1346   unsigned i;
1347   slp_tree newnode =
1348     vect_build_combine_node (this->m_ops[2], this->m_ops[3], *this->m_node);
1349   SLP_TREE_REF_COUNT (this->m_ops[0])++;
1350   SLP_TREE_REF_COUNT (this->m_ops[1])++;
1351 
1352   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (*this->m_node), i, node)
1353     vect_free_slp_tree (node);
1354 
1355   SLP_TREE_CHILDREN (*this->m_node).release ();
1356   SLP_TREE_CHILDREN (*this->m_node).create (3);
1357 
1358   /* First re-arrange the children.  */
1359   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[1]);
1360   SLP_TREE_CHILDREN (*this->m_node).quick_push (newnode);
1361   SLP_TREE_CHILDREN (*this->m_node).quick_push (this->m_ops[0]);
1362 
1363   /* And then rewrite the node itself.  */
1364   complex_pattern::build (vinfo);
1365 }
1366 
1367 /*******************************************************************************
1368  * complex_operations_pattern class
1369  ******************************************************************************/
1370 
1371 /* This function combines all the existing pattern matchers above into one class
1372    that shares the functionality between them.  The initial match is shared
1373    between all complex operations.  */
1374 
1375 class complex_operations_pattern : public complex_pattern
1376 {
1377   protected:
complex_operations_pattern(slp_tree * node,vec<slp_tree> * m_ops,internal_fn ifn)1378     complex_operations_pattern (slp_tree *node, vec<slp_tree> *m_ops,
1379 				internal_fn ifn)
1380       : complex_pattern (node, m_ops, ifn)
1381     {
1382       this->m_num_args = 0;
1383     }
1384 
1385   public:
1386     void build (vec_info *);
1387     static internal_fn
1388     matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
1389 	     slp_compat_nodes_map_t *, slp_tree *, vec<slp_tree> *);
1390 
1391     static vect_pattern*
1392     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1393 	       slp_tree *);
1394 };
1395 
1396 /* Dummy matches implementation for proxy object.  */
1397 
1398 internal_fn
1399 complex_operations_pattern::
matches(complex_operation_t,slp_tree_to_load_perm_map_t *,slp_compat_nodes_map_t *,slp_tree *,vec<slp_tree> *)1400 matches (complex_operation_t /* op */,
1401 	 slp_tree_to_load_perm_map_t * /* perm_cache */,
1402 	 slp_compat_nodes_map_t * /* compat_cache */,
1403 	 slp_tree * /* ref_node */, vec<slp_tree> * /* ops */)
1404 {
1405   return IFN_LAST;
1406 }
1407 
1408 /* Attempt to recognize a complex mul pattern.  */
1409 
1410 vect_pattern*
recognize(slp_tree_to_load_perm_map_t * perm_cache,slp_compat_nodes_map_t * ccache,slp_tree * node)1411 complex_operations_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
1412 				       slp_compat_nodes_map_t *ccache,
1413 				       slp_tree *node)
1414 {
1415   auto_vec<slp_tree> ops;
1416   complex_operation_t op
1417     = vect_detect_pair_op (*node, true, &ops);
1418   internal_fn ifn = IFN_LAST;
1419 
1420   ifn  = complex_fms_pattern::matches (op, perm_cache, ccache, node, &ops);
1421   if (ifn != IFN_LAST)
1422     return complex_fms_pattern::mkInstance (node, &ops, ifn);
1423 
1424   ifn  = complex_mul_pattern::matches (op, perm_cache, ccache, node, &ops);
1425   if (ifn != IFN_LAST)
1426     return complex_mul_pattern::mkInstance (node, &ops, ifn);
1427 
1428   ifn  = complex_add_pattern::matches (op, perm_cache, ccache, node, &ops);
1429   if (ifn != IFN_LAST)
1430     return complex_add_pattern::mkInstance (node, &ops, ifn);
1431 
1432   return NULL;
1433 }
1434 
1435 /* Dummy implementation of build.  */
1436 
1437 void
build(vec_info *)1438 complex_operations_pattern::build (vec_info * /* vinfo */)
1439 {
1440   gcc_unreachable ();
1441 }
1442 
1443 
1444 /* The addsub_pattern.  */
1445 
1446 class addsub_pattern : public vect_pattern
1447 {
1448   public:
addsub_pattern(slp_tree * node,internal_fn ifn)1449     addsub_pattern (slp_tree *node, internal_fn ifn)
1450 	: vect_pattern (node, NULL, ifn) {};
1451 
1452     void build (vec_info *);
1453 
1454     static vect_pattern*
1455     recognize (slp_tree_to_load_perm_map_t *, slp_compat_nodes_map_t *,
1456 	       slp_tree *);
1457 };
1458 
1459 vect_pattern *
recognize(slp_tree_to_load_perm_map_t *,slp_compat_nodes_map_t *,slp_tree * node_)1460 addsub_pattern::recognize (slp_tree_to_load_perm_map_t *,
1461 			   slp_compat_nodes_map_t *, slp_tree *node_)
1462 {
1463   slp_tree node = *node_;
1464   if (SLP_TREE_CODE (node) != VEC_PERM_EXPR
1465       || SLP_TREE_CHILDREN (node).length () != 2
1466       || SLP_TREE_LANE_PERMUTATION (node).length () % 2)
1467     return NULL;
1468 
1469   /* Match a blend of a plus and a minus op with the same number of plus and
1470      minus lanes on the same operands.  */
1471   unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1472   unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1473   if (l0 == l1)
1474     return NULL;
1475   bool l0add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0],
1476 					  PLUS_EXPR);
1477   if (!l0add_p
1478       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l0], MINUS_EXPR))
1479     return NULL;
1480   bool l1add_p = vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1],
1481 					  PLUS_EXPR);
1482   if (!l1add_p
1483       && !vect_match_expression_p (SLP_TREE_CHILDREN (node)[l1], MINUS_EXPR))
1484     return NULL;
1485 
1486   slp_tree l0node = SLP_TREE_CHILDREN (node)[l0];
1487   slp_tree l1node = SLP_TREE_CHILDREN (node)[l1];
1488   if (!((SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[0]
1489 	 && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[1])
1490 	|| (SLP_TREE_CHILDREN (l0node)[0] == SLP_TREE_CHILDREN (l1node)[1]
1491 	    && SLP_TREE_CHILDREN (l0node)[1] == SLP_TREE_CHILDREN (l1node)[0])))
1492     return NULL;
1493 
1494   for (unsigned i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i)
1495     {
1496       std::pair<unsigned, unsigned> perm = SLP_TREE_LANE_PERMUTATION (node)[i];
1497       /* It has to be alternating -, +, -,
1498 	 While we could permute the .ADDSUB inputs and the .ADDSUB output
1499 	 that's only profitable over the add + sub + blend if at least
1500 	 one of the permute is optimized which we can't determine here.  */
1501       if (perm.first != ((i & 1) ? l1 : l0)
1502 	  || perm.second != i)
1503 	return NULL;
1504     }
1505 
1506   /* Now we have either { -, +, -, + ... } (!l0add_p) or { +, -, +, - ... }
1507      (l0add_p), see whether we have FMA variants.  We can only form FMAs
1508      if allowed via -ffp-contract=fast.  */
1509   if (flag_fp_contract_mode != FP_CONTRACT_FAST
1510       && FLOAT_TYPE_P (SLP_TREE_VECTYPE (l0node)))
1511     ;
1512   else if (!l0add_p
1513 	   && vect_match_expression_p (SLP_TREE_CHILDREN (l0node)[0], MULT_EXPR))
1514     {
1515       /* (c * d) -+ a */
1516       if (vect_pattern_validate_optab (IFN_VEC_FMADDSUB, node))
1517 	return new addsub_pattern (node_, IFN_VEC_FMADDSUB);
1518     }
1519   else if (l0add_p
1520 	   && vect_match_expression_p (SLP_TREE_CHILDREN (l1node)[0], MULT_EXPR))
1521     {
1522       /* (c * d) +- a */
1523       if (vect_pattern_validate_optab (IFN_VEC_FMSUBADD, node))
1524 	return new addsub_pattern (node_, IFN_VEC_FMSUBADD);
1525     }
1526 
1527   if (!l0add_p && vect_pattern_validate_optab (IFN_VEC_ADDSUB, node))
1528     return new addsub_pattern (node_, IFN_VEC_ADDSUB);
1529 
1530   return NULL;
1531 }
1532 
1533 void
build(vec_info * vinfo)1534 addsub_pattern::build (vec_info *vinfo)
1535 {
1536   slp_tree node = *m_node;
1537 
1538   unsigned l0 = SLP_TREE_LANE_PERMUTATION (node)[0].first;
1539   unsigned l1 = SLP_TREE_LANE_PERMUTATION (node)[1].first;
1540 
1541   switch (m_ifn)
1542     {
1543     case IFN_VEC_ADDSUB:
1544       {
1545 	slp_tree sub = SLP_TREE_CHILDREN (node)[l0];
1546 	slp_tree add = SLP_TREE_CHILDREN (node)[l1];
1547 
1548 	/* Modify the blend node in-place.  */
1549 	SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (sub)[0];
1550 	SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (sub)[1];
1551 	SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1552 	SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1553 
1554 	/* Build IFN_VEC_ADDSUB from the sub representative operands.  */
1555 	stmt_vec_info rep = SLP_TREE_REPRESENTATIVE (sub);
1556 	gcall *call = gimple_build_call_internal (IFN_VEC_ADDSUB, 2,
1557 						  gimple_assign_rhs1 (rep->stmt),
1558 						  gimple_assign_rhs2 (rep->stmt));
1559 	gimple_call_set_lhs (call, make_ssa_name
1560 			     (TREE_TYPE (gimple_assign_lhs (rep->stmt))));
1561 	gimple_call_set_nothrow (call, true);
1562 	gimple_set_bb (call, gimple_bb (rep->stmt));
1563 	stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, rep);
1564 	SLP_TREE_REPRESENTATIVE (node) = new_rep;
1565 	STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1566 	STMT_SLP_TYPE (new_rep) = pure_slp;
1567 	STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1568 	STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1569 	STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (rep));
1570 	SLP_TREE_CODE (node) = ERROR_MARK;
1571 	SLP_TREE_LANE_PERMUTATION (node).release ();
1572 
1573 	vect_free_slp_tree (sub);
1574 	vect_free_slp_tree (add);
1575 	break;
1576       }
1577     case IFN_VEC_FMADDSUB:
1578     case IFN_VEC_FMSUBADD:
1579       {
1580 	slp_tree sub, add;
1581 	if (m_ifn == IFN_VEC_FMADDSUB)
1582 	  {
1583 	    sub = SLP_TREE_CHILDREN (node)[l0];
1584 	    add = SLP_TREE_CHILDREN (node)[l1];
1585 	  }
1586 	else /* m_ifn == IFN_VEC_FMSUBADD */
1587 	  {
1588 	    sub = SLP_TREE_CHILDREN (node)[l1];
1589 	    add = SLP_TREE_CHILDREN (node)[l0];
1590 	  }
1591 	slp_tree mul = SLP_TREE_CHILDREN (sub)[0];
1592 	/* Modify the blend node in-place.  */
1593 	SLP_TREE_CHILDREN (node).safe_grow (3, true);
1594 	SLP_TREE_CHILDREN (node)[0] = SLP_TREE_CHILDREN (mul)[0];
1595 	SLP_TREE_CHILDREN (node)[1] = SLP_TREE_CHILDREN (mul)[1];
1596 	SLP_TREE_CHILDREN (node)[2] = SLP_TREE_CHILDREN (sub)[1];
1597 	SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[0])++;
1598 	SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[1])++;
1599 	SLP_TREE_REF_COUNT (SLP_TREE_CHILDREN (node)[2])++;
1600 
1601 	/* Build IFN_VEC_FMADDSUB from the mul/sub representative operands.  */
1602 	stmt_vec_info srep = SLP_TREE_REPRESENTATIVE (sub);
1603 	stmt_vec_info mrep = SLP_TREE_REPRESENTATIVE (mul);
1604 	gcall *call = gimple_build_call_internal (m_ifn, 3,
1605 						  gimple_assign_rhs1 (mrep->stmt),
1606 						  gimple_assign_rhs2 (mrep->stmt),
1607 						  gimple_assign_rhs2 (srep->stmt));
1608 	gimple_call_set_lhs (call, make_ssa_name
1609 			     (TREE_TYPE (gimple_assign_lhs (srep->stmt))));
1610 	gimple_call_set_nothrow (call, true);
1611 	gimple_set_bb (call, gimple_bb (srep->stmt));
1612 	stmt_vec_info new_rep = vinfo->add_pattern_stmt (call, srep);
1613 	SLP_TREE_REPRESENTATIVE (node) = new_rep;
1614 	STMT_VINFO_RELEVANT (new_rep) = vect_used_in_scope;
1615 	STMT_SLP_TYPE (new_rep) = pure_slp;
1616 	STMT_VINFO_VECTYPE (new_rep) = SLP_TREE_VECTYPE (node);
1617 	STMT_VINFO_SLP_VECT_ONLY_PATTERN (new_rep) = true;
1618 	STMT_VINFO_REDUC_DEF (new_rep) = STMT_VINFO_REDUC_DEF (vect_orig_stmt (srep));
1619 	SLP_TREE_CODE (node) = ERROR_MARK;
1620 	SLP_TREE_LANE_PERMUTATION (node).release ();
1621 
1622 	vect_free_slp_tree (sub);
1623 	vect_free_slp_tree (add);
1624 	break;
1625       }
1626     default:;
1627     }
1628 }
1629 
1630 /*******************************************************************************
1631  * Pattern matching definitions
1632  ******************************************************************************/
1633 
1634 #define SLP_PATTERN(x) &x::recognize
1635 vect_pattern_decl_t slp_patterns[]
1636 {
1637   /* For least amount of back-tracking and more efficient matching
1638      order patterns from the largest to the smallest.  Especially if they
1639      overlap in what they can detect.  */
1640 
1641   SLP_PATTERN (complex_operations_pattern),
1642   SLP_PATTERN (addsub_pattern)
1643 };
1644 #undef SLP_PATTERN
1645 
1646 /* Set the number of SLP pattern matchers available.  */
1647 size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);
1648