xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/config/tilepro/gen-mul-tables.cc (revision b2c35e17b976cf7ccd7250c86c6f5e95090ed636)
1 /* Multiply table generator for tile.
2    Copyright (C) 2011-2020 Free Software Foundation, Inc.
3    Contributed by Walter Lee (walt@tilera.com)
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published
9    by the Free Software Foundation; either version 3, or (at your
10    option) any later version.
11 
12    GCC is distributed in the hope that it will be useful, but WITHOUT
13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15    License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* This program creates a table used to compile multiply by constant
22    efficiently.
23 
24    This program should be compiled by a c++ compiler.  If it's
25    compiled with -DTILEPRO, it generates the multiply table for
26    TILEPro; otherwise it generates the multiply table for TILE-Gx.
27    Running the program produces the table in stdout.
28 
29    The program works by generating every possible combination of up to
30    MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31    shift) and computing the multiplier computed by those instructions.
32    For example,
33 
34    s2a r2,r1,r1
35    s2a r3,r2,r2
36 
37    multiplies r1 by 25.
38 
39    There are usually multiple instruction sequences to multiply by a
40    given constant. This program keeps only the cheapest one.
41    "Cheapest" is defined first by the minimum theoretical schedule
42    length, and if those are equal then by the number of instructions,
43    and if those are equal then by which instructions we "prefer"
44    (e.g. because we think one might take infinitesimally less power
45    than another, or simply because we arbitrarily pick one to be more
46    canonical).
47 
48    Once this program has determined the best instruction sequence for
49    each multiplier, it emits them in a table sorted by the multiplier
50    so the user can binary-search it to look for a match.  The table is
51    pruned based on various criteria to keep its sizes reasonable.  */
52 
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <assert.h>
56 #include <string.h>
57 #define __STDC_LIMIT_MACROS
58 #include <stdint.h>
59 
60 #include <map>
61 
62 #ifdef TILEPRO
63 
64 /* The string representing the architecture.  */
65 #define ARCH "tilepro"
66 
67 /* The type for the multiplication.  */
68 typedef int MUL_TYPE;
69 
70 #else
71 
72 /* The string representing the architecture.  */
73 #define ARCH "tilegx"
74 
75 /* The type for the multiplication.  */
76 typedef long long MUL_TYPE;
77 
78 #endif
79 
80 /* Longest instruction sequence this will produce. With the current
81    stupid algorithm runtime grows very quickly with this number.  */
82 #define MAX_INSTRUCTIONS 4
83 
84 /* Maximum number of subexpressions in the expression DAG being
85    generated.  This is the same as the number of instructions, except
86    that zero and the original register we'd like to multiply by a
87    constant are also thrown into the mix.  */
88 #define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
89 
90 #define MIN(x, y)  ((x) <= (y) ? (x) : (y))
91 #define MAX(x, y)  ((x) >= (y) ? (x) : (y))
92 
93 /* For this program a unary op is one which has only one nonconstant
94    operand.  So shift left by 5 is considered unary.  */
95 typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
96 typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
97 
98 /* This describes an operation like 'add two registers' or 'left-shift
99    by 7'.
100 
101    We call something a 'unary' operator if it only takes in one
102    register as input, even though it might have an implicit second
103    constant operand.  Currently this is used for left-shift by
104    constant.  */
105 class Operator
106 {
107 public:
108   /* Construct for a binary operator.  */
109   Operator (const char *pattern, const char *name, binary_op_func func,
110 	    int cost)
111     : m_pattern (pattern), m_name (name), m_top_index (-1),
112       m_unary_func (0), m_binary_func (func), m_cost (cost),
113       m_rhs_if_unary (0)
114   {
115   }
116 
117   /* Construct for a unary operator.  */
118   Operator (const char *pattern, const char *name, unary_op_func func,
119 	    int rhs_if_unary, int cost)
120     : m_pattern (pattern), m_name (name), m_top_index (-1),
121       m_unary_func (func), m_binary_func (0), m_cost (cost),
122       m_rhs_if_unary (rhs_if_unary)
123   {
124   }
125 
126   bool is_unary () const
127   {
128     return m_binary_func == NULL;
129   }
130 
131   /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
132   const char *m_pattern;
133 
134   /* Name of the opcode for this operation, e.g. add.  */
135   const char *m_name;
136 
137   /* We don't have enough bits in our output representation to store
138      the original insn_code value, so we store a compressed form
139      instead.  These values are decoded back into insn_code via the
140      machine-generated multiply_insn_seq_decode_opcode lookup
141      table.  */
142   int m_top_index;
143 
144   /* Unary operator to apply, or NULL if this is a binary operator.  */
145   unary_op_func m_unary_func;
146 
147   /* Binary operator to apply, or NULL if this is a unary operator.  */
148   binary_op_func m_binary_func;
149 
150   /* Function of how expensive we consider this operator. Higher is
151      worse.  */
152   int m_cost;
153 
154   /* the RHS value to write into the C file if unary; used for shift
155      count.  */
156   int m_rhs_if_unary;
157 };
158 
159 
160 /* An entry in an expression DAG.  */
161 class Expr
162 {
163 public:
164   Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
165     m_critical_path_length (0)
166   {
167   }
168 
169   /* The operator being applied to the operands.  */
170   const Operator *m_op;
171 
172   /* The index of the left-hand operand in the array of subexpressions
173      already computed.  */
174   int m_lhs;
175 
176   /* For binary ops ,this is the index of the left-hand operand in the
177      array of subexpressions already computed. For unary ops, it is
178      specific to the op (e.g. it might hold a constant shift
179      count).  */
180   int m_rhs;
181 
182   /* The multiplier produced by this expression tree. For example, for
183      the tree ((x << 5) + x), the value would be 33.  */
184   MUL_TYPE m_produced_val;
185 
186   /* How far is this expression from the root, i.e. how many cycles
187      minimum will it take to compute this?  */
188   int m_critical_path_length;
189 };
190 
191 
192 /* Make function pointers for the various linear operators we can
193    apply to compute a multiplicative value.  */
194 
195 static MUL_TYPE
196 add (MUL_TYPE n1, MUL_TYPE n2)
197 {
198   return n1 + n2;
199 }
200 
201 static MUL_TYPE
202 sub (MUL_TYPE n1, MUL_TYPE n2)
203 {
204   return n1 - n2;
205 }
206 
207 static MUL_TYPE
208 s1a (MUL_TYPE n1, MUL_TYPE n2)
209 {
210   return n1 * 2 + n2;
211 }
212 
213 static MUL_TYPE
214 s2a (MUL_TYPE n1, MUL_TYPE n2)
215 {
216   return n1 * 4 + n2;
217 }
218 
219 static MUL_TYPE
220 s3a (MUL_TYPE n1, MUL_TYPE n2)
221 {
222   return n1 * 8 + n2;
223 }
224 
225 #define SHIFT(count)                            \
226 static MUL_TYPE                                 \
227 shift##count(MUL_TYPE n)                        \
228 {                                               \
229   return n << (count);                          \
230 }
231 
232 SHIFT (1);
233 SHIFT (2);
234 SHIFT (3);
235 SHIFT (4);
236 SHIFT (5);
237 SHIFT (6);
238 SHIFT (7);
239 SHIFT (8);
240 SHIFT (9);
241 SHIFT (10);
242 SHIFT (11);
243 SHIFT (12);
244 SHIFT (13);
245 SHIFT (14);
246 SHIFT (15);
247 SHIFT (16);
248 SHIFT (17);
249 SHIFT (18);
250 SHIFT (19);
251 SHIFT (20);
252 SHIFT (21);
253 SHIFT (22);
254 SHIFT (23);
255 SHIFT (24);
256 SHIFT (25);
257 SHIFT (26);
258 SHIFT (27);
259 SHIFT (28);
260 SHIFT (29);
261 SHIFT (30);
262 SHIFT (31);
263 #ifndef TILEPRO
264 SHIFT (32);
265 SHIFT (33);
266 SHIFT (34);
267 SHIFT (35);
268 SHIFT (36);
269 SHIFT (37);
270 SHIFT (38);
271 SHIFT (39);
272 SHIFT (40);
273 SHIFT (41);
274 SHIFT (42);
275 SHIFT (43);
276 SHIFT (44);
277 SHIFT (45);
278 SHIFT (46);
279 SHIFT (47);
280 SHIFT (48);
281 SHIFT (49);
282 SHIFT (50);
283 SHIFT (51);
284 SHIFT (52);
285 SHIFT (53);
286 SHIFT (54);
287 SHIFT (55);
288 SHIFT (56);
289 SHIFT (57);
290 SHIFT (58);
291 SHIFT (59);
292 SHIFT (60);
293 SHIFT (61);
294 SHIFT (62);
295 SHIFT (63);
296 #endif
297 
298 #ifdef TILEPRO
299 static Operator ops[] = {
300   Operator ("CODE_FOR_addsi3", "add", add, 1040),
301   Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
302   Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
303   Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
304   Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
305   /* Note: shl by 1 is not necessary, since adding a value to itself
306      produces the same result. But the architecture team thinks
307      left-shift may use slightly less power, so we might as well
308      prefer it.  */
309   Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
310   Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
311   Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
312   Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
313   Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
314   Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
315   Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
316   Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
317   Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
318   Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
319   Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
320   Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
321   Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
322   Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
323   Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
324   Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
325   Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
326   Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
327   Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
328   Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
329   Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
330   Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
331   Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
332   Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
333   Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
334   Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
335   Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
336   Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
337   Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
338   Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
339   Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
340 };
341 #else
342 static Operator ops[] = {
343   Operator ("CODE_FOR_adddi3", "add", add, 1070),
344   Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
345   Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
346   Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
347   Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
348   // Note: shl by 1 is not necessary, since adding a value to itself
349   // produces the same result. But the architecture team thinks left-shift
350   // may use slightly less power, so we might as well prefer it.
351   Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
352   Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
353   Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
354   Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
355   Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
356   Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
357   Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
358   Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
359   Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
360   Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
361   Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
362   Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
363   Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
364   Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
365   Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
366   Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
367   Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
368   Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
369   Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
370   Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
371   Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
372   Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
373   Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
374   Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
375   Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
376   Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
377   Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
378   Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
379   Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
380   Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
381   Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
382   Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
383   Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
384   Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
385   Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
386   Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
387   Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
388   Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
389   Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
390   Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
391   Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
392   Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
393   Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
394   Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
395   Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
396   Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
397   Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
398   Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
399   Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
400   Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
401   Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
402   Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
403   Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
404   Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
405   Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
406   Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
407   Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
408   Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
409   Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
410   Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
411   Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
412   Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
413   Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
414 };
415 #endif
416 
417 /* An ExpressionTree is an expression DAG.  */
418 class ExpressionTree
419 {
420 public:
421   ExpressionTree () : m_num_vals (2)
422   {
423     m_exprs[0].m_produced_val = 0;
424     m_exprs[1].m_produced_val = 1;
425   }
426 
427   void copy_technique_from (ExpressionTree * other)
428   {
429     /* TODO: write this; other can compute the same products with less
430        cost.  We update this ExpressionTree object because some int is
431        already mapped to it.  */
432   }
433 
434   int m_num_vals;
435   Expr m_exprs[MAX_SUBEXPRS];
436 
437   int cost () const
438   {
439     int cost = 0;
440     for (int j = 2; j < m_num_vals; j++)
441         cost += m_exprs[j].m_op->m_cost;
442       return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
443   }
444 };
445 
446 
447 typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
448 
449 
450 static void
451 find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
452 {
453   /* Don't look more if we can't add any new values to the expression
454      tree.  */
455   const int num_vals = s.m_num_vals;
456   if (num_vals == MAX_SUBEXPRS)
457     return;
458 
459   /* Grow array to make room for new values added as we loop.  */
460   s.m_num_vals = num_vals + 1;
461 
462   const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
463   const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
464 
465   for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
466     {
467       const Operator *const op = &ops[f];
468 
469       for (int i = 0; i < num_vals; i++)
470 	{
471 	  /* Only allow zero as the first operand to sub, otherwise
472 	     it is useless.  */
473 	  if (i == 0 && op->m_binary_func != sub)
474 	    continue;
475 
476 	  /* Unary ops don't actually use the second operand, so as a
477 	     big hack we trick it into only looping once in the inner
478 	     loop.  */
479 	  const int j_end = op->is_unary () ? 2 : num_vals;
480 
481 	  /* We never allow zero as the second operand, as it is
482 	     always useless.  */
483 	  for (int j = 1; j < j_end; j++)
484 	    {
485 	      /* Does this expression use the immediately previous
486 		 expression?  */
487 	      const bool uses_prev_value =
488 		(i == num_vals - 1
489 		 || (!op->is_unary () && j == num_vals - 1));
490 
491 	      if (!uses_prev_value)
492 		{
493 		  /* For search efficiency, prune redundant
494 		     instruction orderings.
495 
496 		     This op does not take the immediately preceding
497 		     value as input, which means we could have done it
498 		     in the previous slot. If its opcode is less than
499 		     the previous instruction's, we'll declare this
500 		     instruction order non-canonical and not pursue
501 		     this branch of the search.  */
502 		  if (op->m_top_index < prev_top_index)
503 		    continue;
504 		}
505 
506 	      MUL_TYPE n;
507 	      if (op->is_unary ())
508 		{
509 		  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
510 		}
511 	      else
512 		{
513 		  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
514 					 s.m_exprs[j].m_produced_val);
515 		}
516 
517 	      bool duplicate = false;
518 	      for (int k = 0; k < num_vals; k++)
519 		if (n == s.m_exprs[j].m_produced_val)
520 		  {
521 		    duplicate = true;
522 		    break;
523 		  }
524 
525 	      if (duplicate)
526 		continue;
527 
528 	      /* See if we found the best solution for n.  */
529 	      Expr *e = &s.m_exprs[num_vals];
530 	      e->m_op = op;
531 	      e->m_lhs = i;
532 	      e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
533 	      e->m_produced_val = n;
534 	      e->m_critical_path_length =
535 		1 + MAX (s.m_exprs[i].m_critical_path_length,
536 			 s.m_exprs[j].m_critical_path_length);
537 
538 	      ExpressionTreeMap::iterator best (best_solution.find (n));
539 	      if (best == best_solution.end ()
540 		  || (*best).second->cost () > s.cost ())
541 		best_solution[n] = new ExpressionTree (s);
542 
543 	      /* Recurse and find more.  */
544 	      find_sequences (s, best_solution);
545 	    }
546 	}
547     }
548 
549   /* Restore old size.  */
550   s.m_num_vals = num_vals;
551 }
552 
553 
554 /* For each insn_code used by an operator, choose a compact number so
555    it takes less space in the output data structure. This prints out a
556    lookup table used to map the compactified number back to an
557    insn_code.  */
558 static void
559 create_insn_code_compression_table ()
560 {
561   int next_index = 1;
562 
563   /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
564   printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
565 	  "  CODE_FOR_nothing /* must be first */ ", ARCH);
566 
567   for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
568     {
569       Operator *op = &ops[i];
570       int index = -1;
571 
572       /* See if some previous Operator was using the same insn_code.
573 	 If so, reuse its table entry.  */
574       for (size_t j = 0; j < i; j++)
575 	{
576 	  Operator *old = &ops[j];
577 	  if (strcmp (old->m_pattern, op->m_pattern) == 0)
578 	    {
579 	      index = old->m_top_index;
580 	      break;
581 	    }
582 	}
583 
584       if (index == -1)
585 	{
586 	  /* We need to make a new entry in the table.  */
587 	  printf (",\n  %s", op->m_pattern);
588 	  index = next_index++;
589 	}
590 
591       op->m_top_index = index;
592     }
593 
594   printf ("\n};\n\n");
595 }
596 
597 
598 /* These are constants we've seen in code, that we want to keep table
599    entries for.  */
600 static int multiply_constants_used[] = {
601   -11796480,
602   -27439,
603   -25148,
604   -22820,
605   -21709,
606   -20995,
607   -20284,
608   -20239,
609   -19595,
610   -19447,
611   -19183,
612   -19165,
613   -18730,
614   -17828,
615   -17799,
616   -17237,
617   -17036,
618   -16549,
619   -16423,
620   -16294,
621   -16244,
622   -16069,
623   -15137,
624   -15083,
625   -15038,
626   -14924,
627   -14905,
628   -14752,
629   -14731,
630   -14529,
631   -14273,
632   -14090,
633   -14084,
634   -14043,
635   -13850,
636   -13802,
637   -13631,
638   -13455,
639   -13275,
640   -12879,
641   -12700,
642   -12534,
643   -12399,
644   -12131,
645   -12112,
646   -12054,
647   -12019,
648   -11759,
649   -11585,
650   -11467,
651   -11395,
652   -11295,
653   -11248,
654   -11148,
655   -11116,
656   -11086,
657   -11059,
658   -11018,
659   -10811,
660   -10538,
661   -10258,
662   -10217,
663   -10033,
664   -9766,
665   -9754,
666   -9534,
667   -9527,
668   -9467,
669   -9262,
670   -9232,
671   -9222,
672   -9198,
673   -9191,
674   -9113,
675   -8825,
676   -8756,
677   -8697,
678   -8693,
679   -8565,
680   -8342,
681   -8208,
682   -8200,
683   -8170,
684   -8102,
685   -7770,
686   -7678,
687   -7562,
688   -7376,
689   -7373,
690   -7221,
691   -7121,
692   -6835,
693   -6810,
694   -6626,
695   -6581,
696   -6461,
697   -6278,
698   -6263,
699   -6163,
700   -6029,
701   -5816,
702   -5540,
703   -5461,
704   -5384,
705   -5329,
706   -4985,
707   -4926,
708   -4815,
709   -4788,
710   -4758,
711   -4433,
712   -4229,
713   -4209,
714   -4176,
715   -4104,
716   -4095,
717   -4078,
718   -3941,
719   -3818,
720   -3600,
721   -3474,
722   -3314,
723   -3264,
724   -3196,
725   -3072,
726   -2912,
727   -2836,
728   -2773,
729   -2269,
730   -2184,
731   -2100,
732   -1730,
733   -1512,
734   -1500,
735   -1396,
736   -1344,
737   -1312,
738   -1297,
739   -1059,
740   -1058,
741   1027,
742   1049,
743   1059,
744   1100,
745   1104,
746   1108,
747   1136,
748   1200,
749   1204,
750   1242,
751   1292,
752   1304,
753   1312,
754   1320,
755   1336,
756   1344,
757   1348,
758   1360,
759   1364,
760   1395,
761   1448,
762   1460,
763   1461,
764   1472,
765   1488,
766   1500,
767   1512,
768   1568,
769   1576,
770   1649,
771   1664,
772   1684,
773   1696,
774   1744,
775   1812,
776   1822,
777   1884,
778   1963,
779   1978,
780   2000,
781   2012,
782   2014,
783   2037,
784   2039,
785   2100,
786   2139,
787   2144,
788   2184,
789   2237,
790   2260,
791   2320,
792   2408,
793   2446,
794   2447,
795   2499,
796   2531,
797   2578,
798   2592,
799   2611,
800   2633,
801   2704,
802   2730,
803   2773,
804   2880,
805   2896,
806   2998,
807   3000,
808   3001,
809   3021,
810   3079,
811   3112,
812   3150,
813   3179,
814   3192,
815   3240,
816   3264,
817   3271,
818   3283,
819   3328,
820   3363,
821   3367,
822   3453,
823   3529,
824   3570,
825   3580,
826   3600,
827   3624,
828   3707,
829   3783,
830   3826,
831   3897,
832   3941,
833   3962,
834   3989,
835   4000,
836   4025,
837   4073,
838   4093,
839   4099,
840   4108,
841   4184,
842   4209,
843   4369,
844   4376,
845   4416,
846   4433,
847   4434,
848   4482,
849   4582,
850   4712,
851   4717,
852   4813,
853   4815,
854   4864,
855   5000,
856   5027,
857   5040,
858   5091,
859   5195,
860   5243,
861   5260,
862   5285,
863   5329,
864   5331,
865   5350,
866   5361,
867   5387,
868   5461,
869   5492,
870   5529,
871   5573,
872   5793,
873   5819,
874   5915,
875   5946,
876   5992,
877   6000,
878   6164,
879   6205,
880   6262,
881   6263,
882   6269,
883   6270,
884   6387,
885   6400,
886   6406,
887   6476,
888   6541,
889   6565,
890   6568,
891   6626,
892   6656,
893   6732,
894   6810,
895   6817,
896   6859,
897   7040,
898   7053,
899   7141,
900   7169,
901   7221,
902   7223,
903   7274,
904   7282,
905   7350,
906   7369,
907   7373,
908   7442,
909   7447,
910   7471,
911   7518,
912   7542,
913   7566,
914   7587,
915   7663,
916   7678,
917   7682,
918   7748,
919   7752,
920   7791,
921   8000,
922   8026,
923   8048,
924   8170,
925   8203,
926   8204,
927   8290,
928   8368,
929   8520,
930   8640,
931   8666,
932   8672,
933   8697,
934   8716,
935   8728,
936   8756,
937   8820,
938   8875,
939   8918,
940   8956,
941   9058,
942   9154,
943   9175,
944   9191,
945   9217,
946   9262,
947   9321,
948   9373,
949   9434,
950   9465,
951   9514,
952   9534,
953   9633,
954   9746,
955   9810,
956   9850,
957   9947,
958   9973,
959   10000,
960   10009,
961   10033,
962   10055,
963   10217,
964   10248,
965   10298,
966   10310,
967   10323,
968   10368,
969   10438,
970   10456,
971   10486,
972   10538,
973   10664,
974   10695,
975   10700,
976   10703,
977   10832,
978   10887,
979   10935,
980   10958,
981   11018,
982   11059,
983   11061,
984   11086,
985   11116,
986   11148,
987   11190,
988   11249,
989   11314,
990   11332,
991   11363,
992   11409,
993   11415,
994   11443,
995   11467,
996   11512,
997   11522,
998   11529,
999   11585,
1000   11759,
1001   11768,
1002   11795,
1003   11893,
1004   11997,
1005   12131,
1006   12299,
1007   12536,
1008   12543,
1009   12893,
1010   12945,
1011   12998,
1012   13109,
1013   13213,
1014   13685,
1015   13930,
1016   14023,
1017   14024,
1018   14271,
1019   14564,
1020   14647,
1021   15326,
1022   15850,
1023   15855,
1024   15929,
1025   16000,
1026   16154,
1027   16496,
1028   16807,
1029   16819,
1030   16984,
1031   17203,
1032   17223,
1033   17333,
1034   17760,
1035   17799,
1036   17837,
1037   18029,
1038   18068,
1039   18336,
1040   18515,
1041   19595,
1042   20017,
1043   20131,
1044   20862,
1045   20995,
1046   21709,
1047   22554,
1048   25000,
1049   25172,
1050   25600,
1051   25733,
1052   27439,
1053   38470,
1054   46802,
1055   50000,
1056   11796480,
1057   16843009,
1058   23592960,
1059 };
1060 
1061 
1062 const int num_mult_constants_used =
1063   (int)(sizeof multiply_constants_used
1064 	/ sizeof multiply_constants_used[0]);
1065 
1066 
1067 #define XSIZE (sizeof multiply_constants_used / \
1068 	       sizeof multiply_constants_used[0] + 32) / 32
1069 unsigned multiply_constants_avail[XSIZE];
1070 #undef XSIZE
1071 
1072 
1073 /* bsearch helper function.  */
1074 static int
1075 compare_constants (const void *key, const void *t)
1076 {
1077   return (*(int*)key) - *((int*)t);
1078 }
1079 
1080 
1081 static int *
1082 find_mult_constants_used (int multiplier)
1083 {
1084   return (int *) bsearch (&multiplier, multiply_constants_used,
1085 			  num_mult_constants_used,
1086 			  sizeof multiply_constants_used[0],
1087 			  compare_constants);
1088 }
1089 
1090 
1091 int num_ops (ExpressionTree *s)
1092 {
1093   int n = 0;
1094   for (int i = 0; i < s->m_num_vals; i++)
1095     {
1096       Expr *e = &s->m_exprs[i];
1097       if (e->m_op != NULL)
1098 	n++;
1099     }
1100   return n;
1101 }
1102 
1103 
1104 #ifdef TILEPRO
1105 bool
1106 tilepro_emit (int multiplier, int num_ops)
1107 {
1108   int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1109 
1110   /* Keep constants in range [-1024, 1024].  */
1111   if (abs_multiplier <= 1024)
1112     return true;
1113 
1114   /* Keep constants near powers of two.  */
1115   int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1116   int next_pow2 = prev_pow2 << 1;
1117 
1118   if ((abs_multiplier - prev_pow2 <= 10)
1119       || (next_pow2 - abs_multiplier <= 10))
1120     return true;
1121 
1122   /* Keep constants near powers of ten.  */
1123   {
1124     long long j = 1;
1125     long long prev_pow10;
1126     long long next_pow10;
1127 
1128     while (((j * 10) < abs_multiplier)
1129 	   && (j < (j * 10)))
1130       j = j * 10;
1131 
1132     prev_pow10 = j;
1133     next_pow10 = j * 10;
1134 
1135     if ((abs_multiplier - prev_pow10 <= 10)
1136 	|| (next_pow10 - abs_multiplier <= 10))
1137       return true;
1138   }
1139 
1140   /* Keep short sequences that have two or fewer ops.  */
1141   if (num_ops <= 2)
1142     return true;
1143 
1144   /* Keep constants that are mostly 0's or mostly 1's.  */
1145   if (__builtin_popcount (multiplier) <= 2 ||
1146       __builtin_popcount (multiplier) >= 30)
1147     return true;
1148 
1149   /* Keep constants seen in actual code.  */
1150   if ((find_mult_constants_used (multiplier)))
1151     return true;
1152 
1153   return false;
1154 }
1155 #else
1156 bool
1157 tilegx_emit (long long multiplier, int num_ops)
1158 {
1159   long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1160 
1161   /* Keep constants in range [-1024, 1024].  */
1162   if (abs_multiplier <= 1024)
1163     return true;
1164 
1165   /* Otherwise exclude sequences with four ops.  */
1166   if (num_ops > 3)
1167     return false;
1168 
1169   /* Keep constants near powers of two.  */
1170   {
1171     unsigned long long prev_pow2 =
1172       1LL << (63 - __builtin_clzll (abs_multiplier));
1173     unsigned long long next_pow2 = prev_pow2 << 1;
1174 
1175     /* handle overflow case. */
1176     if (next_pow2 == 0)
1177       {
1178 	if (prev_pow2 - abs_multiplier <= 10)
1179 	  return true;
1180       }
1181     else if ((abs_multiplier - prev_pow2 <= 10)
1182 	     || (next_pow2 - abs_multiplier <= 10))
1183       return true;
1184   }
1185 
1186   /* Keep constants near powers of ten.  */
1187   {
1188     long long j = 1;
1189     long long prev_pow10;
1190     long long next_pow10;
1191 
1192     while (((j * 10) < abs_multiplier)
1193 	   && (j < (j * 10)))
1194       j = j * 10;
1195 
1196     prev_pow10 = j;
1197     next_pow10 = j * 10;
1198 
1199     if ((abs_multiplier - prev_pow10 <= 100)
1200 	|| (next_pow10
1201 	    && (next_pow10 - abs_multiplier <= 100)))
1202       return true;
1203   }
1204 
1205   if (num_ops <= 2)
1206     return true;
1207 
1208   /* Keep constants that are mostly 0's or mostly 1's.  */
1209   if (__builtin_popcountll (multiplier) <= 2 ||
1210       __builtin_popcountll (multiplier) >= 62)
1211     return true;
1212 
1213   /* Keep constants seen in actual code.  */
1214   if (find_mult_constants_used (multiplier))
1215     return true;
1216 
1217   return false;
1218 }
1219 #endif
1220 
1221 
1222 int
1223 main ()
1224 {
1225   ExpressionTreeMap best_solution;
1226   ExpressionTree s;
1227 
1228 #ifdef TILEPRO
1229   printf ("/* Constant multiply table for TILEPro.\n");
1230 #else
1231   printf ("/* Constant multiply table for TILE-Gx.\n");
1232 #endif
1233   printf ("   Copyright (C) 2011-2020 Free Software Foundation, Inc.\n");
1234   printf ("   Contributed by Walter Lee (walt@tilera.com)\n");
1235   printf ("\n");
1236   printf ("   This file is part of GCC.\n");
1237   printf ("\n");
1238   printf ("   GCC is free software; you can redistribute it and/or modify it\n");
1239   printf ("   under the terms of the GNU General Public License as published\n");
1240   printf ("   by the Free Software Foundation; either version 3, or (at your\n");
1241   printf ("   option) any later version.\n");
1242   printf ("\n");
1243   printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1244   printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1245   printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
1246   printf ("   License for more details.\n");
1247   printf ("\n");
1248   printf ("   You should have received a copy of the GNU General Public License\n");
1249   printf ("   along with GCC; see the file COPYING3.  If not see\n");
1250   printf ("   <http://www.gnu.org/licenses/>.  */\n");
1251   printf ("\n");
1252   printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1253   printf ("   Make any required changes there.  */\n");
1254   printf ("\n");
1255   printf ("#include \"config.h\"\n");
1256   printf ("#include \"system.h\"\n");
1257   printf ("#include \"coretypes.h\"\n");
1258   printf ("#include \"backend.h\"\n");
1259   printf ("#include \"rtl.h\"\n");
1260   printf ("#include \"expmed.h\"\n");
1261   printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1262   create_insn_code_compression_table ();
1263 
1264   /* Try all combinations of operators and see what constants we
1265      produce.  For each possible constant, record the most efficient
1266      way to generate it.  */
1267   find_sequences (s, best_solution);
1268 
1269   printf ("const struct %s_multiply_insn_seq "
1270 	  "%s_multiply_insn_seq_table[] = {\n",
1271 	  ARCH, ARCH);
1272 
1273   const char *comma_separator = "";
1274 
1275   ExpressionTreeMap::iterator i (best_solution.begin ());
1276   for (; i != best_solution.end (); ++i)
1277     {
1278       ExpressionTree *s = (*i).second;
1279       const MUL_TYPE n = (*i).first;
1280 
1281       if (n == 0 || n == 1)
1282 	{
1283 	  /* Both of these require zero operations, so these entries
1284 	     are bogus and should never be used.  */
1285 	  continue;
1286 	}
1287 
1288       /* Prune the list of entries to keep the table to a reasonable
1289 	 size.  */
1290 #ifdef TILEPRO
1291       if (!tilepro_emit (n, num_ops (s)))
1292 	continue;
1293 #else
1294       if (!tilegx_emit (n, num_ops (s)))
1295 	continue;
1296 #endif
1297 
1298       printf ("%s", comma_separator);
1299 
1300 #ifdef TILEPRO
1301       const MUL_TYPE int_min = INT32_MIN;
1302 #else
1303       const MUL_TYPE int_min = INT64_MIN;
1304 #endif
1305       if (n == int_min)
1306 	{
1307 	  /* Handle C's woeful lack of unary negation. Without this,
1308 	     printing out INT_MIN in decimal will yield an unsigned
1309 	     int which could generate a compiler warning.  */
1310 #ifdef TILEPRO
1311 	  printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
1312 		  (unsigned) n);
1313 #else
1314 	  printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
1315 		  (unsigned MUL_TYPE) n);
1316 #endif
1317 	}
1318       else
1319 	{
1320 #ifdef TILEPRO
1321 	  printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
1322 #else
1323 	  printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
1324 #endif
1325 	}
1326 
1327       bool first = true;
1328       for (int j = 0; j < s->m_num_vals; j++)
1329 	{
1330 	  Expr *e = &s->m_exprs[j];
1331 
1332 	  const Operator *op = e->m_op;
1333 	  if (op == NULL)
1334 	    continue;
1335 
1336 	  char buf[1024];
1337 	  snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1338 		    first ? "" : "    ",
1339 		    op->m_top_index,
1340 		    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1341 
1342 	  char opnd0[10];
1343 	  if (e->m_lhs)
1344 	    snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1345 	  else
1346 	    snprintf (opnd0, sizeof opnd0, "zero");
1347 
1348 	  printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1349 		  buf, op->m_name, j, opnd0,
1350 		  op->is_unary () ? "" : "r", e->m_rhs);
1351 
1352 	  first = false;
1353 	}
1354       printf ("   }");
1355       comma_separator = ",\n";
1356     }
1357 
1358   printf ("\n};\n\n");
1359   printf ("const int %s_multiply_insn_seq_table_size =\n"
1360 	  "  (int) (sizeof %s_multiply_insn_seq_table\n"
1361 	  "         / sizeof %s_multiply_insn_seq_table[0]);\n",
1362 	  ARCH, ARCH, ARCH);
1363 
1364   return EXIT_SUCCESS;
1365 }
1366