xref: /netbsd-src/external/gpl3/gcc/dist/gcc/gimple-ssa-store-merging.cc (revision 0a3071956a3a9fdebdbf7f338cf2d439b45fc728)
1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2022 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory stores
22    of constant values, values loaded from memory, bitwise operations on those,
23    or bit-field values, to consecutive locations, into fewer wider stores.
24 
25    For example, if we have a sequence peforming four byte stores to
26    consecutive memory locations:
27    [p     ] := imm1;
28    [p + 1B] := imm2;
29    [p + 2B] := imm3;
30    [p + 3B] := imm4;
31    we can transform this into a single 4-byte store if the target supports it:
32    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 
34    Or:
35    [p     ] := [q     ];
36    [p + 1B] := [q + 1B];
37    [p + 2B] := [q + 2B];
38    [p + 3B] := [q + 3B];
39    if there is no overlap can be transformed into a single 4-byte
40    load followed by single 4-byte store.
41 
42    Or:
43    [p     ] := [q     ] ^ imm1;
44    [p + 1B] := [q + 1B] ^ imm2;
45    [p + 2B] := [q + 2B] ^ imm3;
46    [p + 3B] := [q + 3B] ^ imm4;
47    if there is no overlap can be transformed into a single 4-byte
48    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 
50    Or:
51    [p:1 ] := imm;
52    [p:31] := val & 0x7FFFFFFF;
53    we can transform this into a single 4-byte store if the target supports it:
54    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 
56    The algorithm is applied to each basic block in three phases:
57 
58    1) Scan through the basic block and record assignments to destinations
59    that can be expressed as a store to memory of a certain size at a certain
60    bit offset from base expressions we can handle.  For bit-fields we also
61    record the surrounding bit region, i.e. bits that could be stored in
62    a read-modify-write operation when storing the bit-field.  Record store
63    chains to different bases in a hash_map (m_stores) and make sure to
64    terminate such chains when appropriate (for example when the stored
65    values get used subsequently).
66    These stores can be a result of structure element initializers, array stores
67    etc.  A store_immediate_info object is recorded for every such store.
68    Record as many such assignments to a single base as possible until a
69    statement that interferes with the store sequence is encountered.
70    Each store has up to 2 operands, which can be a either constant, a memory
71    load or an SSA name, from which the value to be stored can be computed.
72    At most one of the operands can be a constant.  The operands are recorded
73    in store_operand_info struct.
74 
75    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76    store_immediate_info objects) and coalesce contiguous stores into
77    merged_store_group objects.  For bit-field stores, we don't need to
78    require the stores to be contiguous, just their surrounding bit regions
79    have to be contiguous.  If the expression being stored is different
80    between adjacent stores, such as one store storing a constant and
81    following storing a value loaded from memory, or if the loaded memory
82    objects are not adjacent, a new merged_store_group is created as well.
83 
84    For example, given the stores:
85    [p     ] := 0;
86    [p + 1B] := 1;
87    [p + 3B] := 0;
88    [p + 4B] := 1;
89    [p + 5B] := 0;
90    [p + 6B] := 0;
91    This phase would produce two merged_store_group objects, one recording the
92    two bytes stored in the memory region [p : p + 1] and another
93    recording the four bytes stored in the memory region [p + 3 : p + 6].
94 
95    3) The merged_store_group objects produced in phase 2) are processed
96    to generate the sequence of wider stores that set the contiguous memory
97    regions to the sequence of bytes that correspond to it.  This may emit
98    multiple stores per store group to handle contiguous stores that are not
99    of a size that is a power of 2.  For example it can try to emit a 40-bit
100    store as a 32-bit store followed by an 8-bit store.
101    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102    or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 
104    Note on endianness and example:
105    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106    [p     ] := 0x1234;
107    [p + 2B] := 0x5678;
108    [p + 4B] := 0xab;
109    [p + 5B] := 0xcd;
110 
111    The memory layout for little-endian (LE) and big-endian (BE) must be:
112   p |LE|BE|
113   ---------
114   0 |34|12|
115   1 |12|34|
116   2 |78|56|
117   3 |56|78|
118   4 |ab|ab|
119   5 |cd|cd|
120 
121   To merge these into a single 48-bit merged value 'val' in phase 2)
122   on little-endian we insert stores to higher (consecutive) bitpositions
123   into the most significant bits of the merged value.
124   The final merged value would be: 0xcdab56781234
125 
126   For big-endian we insert stores to higher bitpositions into the least
127   significant bits of the merged value.
128   The final merged value would be: 0x12345678abcd
129 
130   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131   followed by a 16-bit store.  Again, we must consider endianness when
132   breaking down the 48-bit value 'val' computed above.
133   For little endian we emit:
134   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136 
137   Whereas for big-endian we emit:
138   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140 
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h"	/* For get_bit_range.  */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173 
174 /* The maximum size (in bits) of the stores this pass should generate.  */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 
178 /* Limit to bound the number of aliasing checks for loads with the same
179    vuse as the corresponding store.  */
180 #define MAX_STORE_ALIAS_CHECKS 64
181 
182 namespace {
183 
184 struct bswap_stat
185 {
186   /* Number of hand-written 16-bit nop / bswaps found.  */
187   int found_16bit;
188 
189   /* Number of hand-written 32-bit nop / bswaps found.  */
190   int found_32bit;
191 
192   /* Number of hand-written 64-bit nop / bswaps found.  */
193   int found_64bit;
194 } nop_stats, bswap_stats;
195 
196 /* A symbolic number structure is used to detect byte permutation and selection
197    patterns of a source.  To achieve that, its field N contains an artificial
198    number consisting of BITS_PER_MARKER sized markers tracking where does each
199    byte come from in the source:
200 
201    0	   - target byte has the value 0
202    FF	   - target byte has an unknown value (eg. due to sign extension)
203    1..size - marker value is the byte index in the source (0 for lsb).
204 
205    To detect permutations on memory sources (arrays and structures), a symbolic
206    number is also associated:
207    - a base address BASE_ADDR and an OFFSET giving the address of the source;
208    - a range which gives the difference between the highest and lowest accessed
209      memory location to make such a symbolic number;
210    - the address SRC of the source element of lowest address as a convenience
211      to easily get BASE_ADDR + offset + lowest bytepos;
212    - number of expressions N_OPS bitwise ored together to represent
213      approximate cost of the computation.
214 
215    Note 1: the range is different from size as size reflects the size of the
216    type of the current expression.  For instance, for an array char a[],
217    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219    time a range of 1.
220 
221    Note 2: for non-memory sources, range holds the same value as size.
222 
223    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
224 
225 struct symbolic_number {
226   uint64_t n;
227   tree type;
228   tree base_addr;
229   tree offset;
230   poly_int64_pod bytepos;
231   tree src;
232   tree alias_set;
233   tree vuse;
234   unsigned HOST_WIDE_INT range;
235   int n_ops;
236 };
237 
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 
244 /* The number which the find_bswap_or_nop_1 result should match in
245    order to have a nop.  The number is masked according to the size of
246    the symbolic number before using it.  */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248   (uint64_t)0x08070605 << 32 | 0x04030201)
249 
250 /* The number which the find_bswap_or_nop_1 result should match in
251    order to have a byte swap.  The number is masked according to the
252    size of the symbolic number before using it.  */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254   (uint64_t)0x01020304 << 32 | 0x05060708)
255 
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257    number N.  Return false if the requested operation is not permitted
258    on a symbolic number.  */
259 
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262 		 struct symbolic_number *n,
263 		 int count)
264 {
265   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266   uint64_t head_marker;
267 
268   if (count < 0
269       || count >= TYPE_PRECISION (n->type)
270       || count % BITS_PER_UNIT != 0)
271     return false;
272   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 
274   /* Zero out the extra bits of N in order to avoid them being shifted
275      into the significant bits.  */
276   if (size < 64 / BITS_PER_MARKER)
277     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 
279   switch (code)
280     {
281     case LSHIFT_EXPR:
282       n->n <<= count;
283       break;
284     case RSHIFT_EXPR:
285       head_marker = HEAD_MARKER (n->n, size);
286       n->n >>= count;
287       /* Arithmetic shift of signed type: result is dependent on the value.  */
288       if (!TYPE_UNSIGNED (n->type) && head_marker)
289 	for (i = 0; i < count / BITS_PER_MARKER; i++)
290 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 		  << ((size - 1 - i) * BITS_PER_MARKER);
292       break;
293     case LROTATE_EXPR:
294       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295       break;
296     case RROTATE_EXPR:
297       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298       break;
299     default:
300       return false;
301     }
302   /* Zero unused bits for size.  */
303   if (size < 64 / BITS_PER_MARKER)
304     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305   return true;
306 }
307 
308 /* Perform sanity checking for the symbolic number N and the gimple
309    statement STMT.  */
310 
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314   tree lhs_type;
315 
316   lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
317 
318   if (TREE_CODE (lhs_type) != INTEGER_TYPE
319       && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320     return false;
321 
322   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323     return false;
324 
325   return true;
326 }
327 
328 /* Initialize the symbolic number N for the bswap pass from the base element
329    SRC manipulated by the bitwise OR expression.  */
330 
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334   int size;
335 
336   if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
337     return false;
338 
339   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340   n->src = src;
341 
342   /* Set up the symbolic number N by setting each byte to a value between 1 and
343      the byte size of rhs1.  The highest order byte is set to n->size and the
344      lowest order byte to 1.  */
345   n->type = TREE_TYPE (src);
346   size = TYPE_PRECISION (n->type);
347   if (size % BITS_PER_UNIT != 0)
348     return false;
349   size /= BITS_PER_UNIT;
350   if (size > 64 / BITS_PER_MARKER)
351     return false;
352   n->range = size;
353   n->n = CMPNOP;
354   n->n_ops = 1;
355 
356   if (size < 64 / BITS_PER_MARKER)
357     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 
359   return true;
360 }
361 
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363    the answer. If so, REF is that memory source and the base of the memory area
364    accessed and the offset of the access from that base are recorded in N.  */
365 
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369   /* Leaf node is an array or component ref. Memorize its base and
370      offset from base to compare to other such leaf node.  */
371   poly_int64 bitsize, bitpos, bytepos;
372   machine_mode mode;
373   int unsignedp, reversep, volatilep;
374   tree offset, base_addr;
375 
376   /* Not prepared to handle PDP endian.  */
377   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378     return false;
379 
380   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381     return false;
382 
383   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 				   &unsignedp, &reversep, &volatilep);
385 
386   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387     /* Do not rewrite TARGET_MEM_REF.  */
388     return false;
389   else if (TREE_CODE (base_addr) == MEM_REF)
390     {
391       poly_offset_int bit_offset = 0;
392       tree off = TREE_OPERAND (base_addr, 1);
393 
394       if (!integer_zerop (off))
395 	{
396 	  poly_offset_int boff = mem_ref_offset (base_addr);
397 	  boff <<= LOG2_BITS_PER_UNIT;
398 	  bit_offset += boff;
399 	}
400 
401       base_addr = TREE_OPERAND (base_addr, 0);
402 
403       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
404       if (maybe_lt (bit_offset, 0))
405 	{
406 	  tree byte_offset = wide_int_to_tree
407 	    (sizetype, bits_to_bytes_round_down (bit_offset));
408 	  bit_offset = num_trailing_bits (bit_offset);
409 	  if (offset)
410 	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 	  else
412 	    offset = byte_offset;
413 	}
414 
415       bitpos += bit_offset.force_shwi ();
416     }
417   else
418     base_addr = build_fold_addr_expr (base_addr);
419 
420   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421     return false;
422   if (!multiple_p (bitsize, BITS_PER_UNIT))
423     return false;
424   if (reversep)
425     return false;
426 
427   if (!init_symbolic_number (n, ref))
428     return false;
429   n->base_addr = base_addr;
430   n->offset = offset;
431   n->bytepos = bytepos;
432   n->alias_set = reference_alias_ptr_type (ref);
433   n->vuse = gimple_vuse (stmt);
434   return true;
435 }
436 
437 /* Compute the symbolic number N representing the result of a bitwise OR,
438    bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439    are respectively SOURCE_STMT1 and SOURCE_STMT2.  CODE is the operation.  */
440 
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n,enum tree_code code)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 			gimple *source_stmt2, struct symbolic_number *n2,
444 			struct symbolic_number *n, enum tree_code code)
445 {
446   int i, size;
447   uint64_t mask;
448   gimple *source_stmt;
449   struct symbolic_number *n_start;
450 
451   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452   if (TREE_CODE (rhs1) == BIT_FIELD_REF
453       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454     rhs1 = TREE_OPERAND (rhs1, 0);
455   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456   if (TREE_CODE (rhs2) == BIT_FIELD_REF
457       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458     rhs2 = TREE_OPERAND (rhs2, 0);
459 
460   /* Sources are different, cancel bswap if they are not memory location with
461      the same base (array, structure, ...).  */
462   if (rhs1 != rhs2)
463     {
464       uint64_t inc;
465       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466       struct symbolic_number *toinc_n_ptr, *n_end;
467       basic_block bb1, bb2;
468 
469       if (!n1->base_addr || !n2->base_addr
470 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 	return NULL;
472 
473       if (!n1->offset != !n2->offset
474 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 	return NULL;
476 
477       start1 = 0;
478       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 	return NULL;
480 
481       if (start1 < start2)
482 	{
483 	  n_start = n1;
484 	  start_sub = start2 - start1;
485 	}
486       else
487 	{
488 	  n_start = n2;
489 	  start_sub = start1 - start2;
490 	}
491 
492       bb1 = gimple_bb (source_stmt1);
493       bb2 = gimple_bb (source_stmt2);
494       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 	source_stmt = source_stmt1;
496       else
497 	source_stmt = source_stmt2;
498 
499       /* Find the highest address at which a load is performed and
500 	 compute related info.  */
501       end1 = start1 + (n1->range - 1);
502       end2 = start2 + (n2->range - 1);
503       if (end1 < end2)
504 	{
505 	  end = end2;
506 	  end_sub = end2 - end1;
507 	}
508       else
509 	{
510 	  end = end1;
511 	  end_sub = end1 - end2;
512 	}
513       n_end = (end2 > end1) ? n2 : n1;
514 
515       /* Find symbolic number whose lsb is the most significant.  */
516       if (BYTES_BIG_ENDIAN)
517 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
518       else
519 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 
521       n->range = end - MIN (start1, start2) + 1;
522 
523       /* Check that the range of memory covered can be represented by
524 	 a symbolic number.  */
525       if (n->range > 64 / BITS_PER_MARKER)
526 	return NULL;
527 
528       /* Reinterpret byte marks in symbolic number holding the value of
529 	 bigger weight according to target endianness.  */
530       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 	{
534 	  unsigned marker
535 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
537 	    toinc_n_ptr->n += inc;
538 	}
539     }
540   else
541     {
542       n->range = n1->range;
543       n_start = n1;
544       source_stmt = source_stmt1;
545     }
546 
547   if (!n1->alias_set
548       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549     n->alias_set = n1->alias_set;
550   else
551     n->alias_set = ptr_type_node;
552   n->vuse = n_start->vuse;
553   n->base_addr = n_start->base_addr;
554   n->offset = n_start->offset;
555   n->src = n_start->src;
556   n->bytepos = n_start->bytepos;
557   n->type = n_start->type;
558   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559   uint64_t res_n = n1->n | n2->n;
560 
561   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562     {
563       uint64_t masked1, masked2;
564 
565       masked1 = n1->n & mask;
566       masked2 = n2->n & mask;
567       /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x.  */
568       if (masked1 && masked2)
569 	{
570 	  /* + can carry into upper bits, just punt.  */
571 	  if (code == PLUS_EXPR)
572 	    return NULL;
573 	  /* x | x is still x.  */
574 	  if (code == BIT_IOR_EXPR && masked1 == masked2)
575 	    continue;
576 	  if (code == BIT_XOR_EXPR)
577 	    {
578 	      /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 		 unknown values and unknown ^ unknown is unknown.  */
580 	      if (masked1 == masked2
581 		  && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 				 << i * BITS_PER_MARKER))
583 		{
584 		  res_n &= ~mask;
585 		  continue;
586 		}
587 	    }
588 	  /* Otherwise set the byte to unknown, it might still be
589 	     later masked off.  */
590 	  res_n |= mask;
591 	}
592     }
593   n->n = res_n;
594   n->n_ops = n1->n_ops + n2->n_ops;
595 
596   return source_stmt;
597 }
598 
599 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600    the operation given by the rhs of STMT on the result.  If the operation
601    could successfully be executed the function returns a gimple stmt whose
602    rhs's first tree is the expression of the source operand and NULL
603    otherwise.  */
604 
605 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)606 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607 {
608   enum tree_code code;
609   tree rhs1, rhs2 = NULL;
610   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611   enum gimple_rhs_class rhs_class;
612 
613   if (!limit || !is_gimple_assign (stmt))
614     return NULL;
615 
616   rhs1 = gimple_assign_rhs1 (stmt);
617 
618   if (find_bswap_or_nop_load (stmt, rhs1, n))
619     return stmt;
620 
621   /* Handle BIT_FIELD_REF.  */
622   if (TREE_CODE (rhs1) == BIT_FIELD_REF
623       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
624     {
625       if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626 	  || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627 	return NULL;
628 
629       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631       if (bitpos % BITS_PER_UNIT == 0
632 	  && bitsize % BITS_PER_UNIT == 0
633 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
634 	{
635 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
636 	  if (BYTES_BIG_ENDIAN)
637 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
638 
639 	  /* Shift.  */
640 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641 	    return NULL;
642 
643 	  /* Mask.  */
644 	  uint64_t mask = 0;
645 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647 	       i++, tmp <<= BITS_PER_UNIT)
648 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 	  n->n &= mask;
650 
651 	  /* Convert.  */
652 	  n->type = TREE_TYPE (rhs1);
653 	  if (!n->base_addr)
654 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
655 
656 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
657 	}
658 
659       return NULL;
660     }
661 
662   if (TREE_CODE (rhs1) != SSA_NAME)
663     return NULL;
664 
665   code = gimple_assign_rhs_code (stmt);
666   rhs_class = gimple_assign_rhs_class (stmt);
667   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
668 
669   if (rhs_class == GIMPLE_BINARY_RHS)
670     rhs2 = gimple_assign_rhs2 (stmt);
671 
672   /* Handle unary rhs and binary rhs with integer constants as second
673      operand.  */
674 
675   if (rhs_class == GIMPLE_UNARY_RHS
676       || (rhs_class == GIMPLE_BINARY_RHS
677 	  && TREE_CODE (rhs2) == INTEGER_CST))
678     {
679       if (code != BIT_AND_EXPR
680 	  && code != LSHIFT_EXPR
681 	  && code != RSHIFT_EXPR
682 	  && code != LROTATE_EXPR
683 	  && code != RROTATE_EXPR
684 	  && !CONVERT_EXPR_CODE_P (code))
685 	return NULL;
686 
687       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
688 
689       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 	 we have to initialize the symbolic number.  */
691       if (!source_stmt1)
692 	{
693 	  if (gimple_assign_load_p (stmt)
694 	      || !init_symbolic_number (n, rhs1))
695 	    return NULL;
696 	  source_stmt1 = stmt;
697 	}
698 
699       switch (code)
700 	{
701 	case BIT_AND_EXPR:
702 	  {
703 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704 	    uint64_t val = int_cst_value (rhs2), mask = 0;
705 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
706 
707 	    /* Only constants masking full bytes are allowed.  */
708 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
710 		return NULL;
711 	      else if (val & tmp)
712 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
713 
714 	    n->n &= mask;
715 	  }
716 	  break;
717 	case LSHIFT_EXPR:
718 	case RSHIFT_EXPR:
719 	case LROTATE_EXPR:
720 	case RROTATE_EXPR:
721 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722 	    return NULL;
723 	  break;
724 	CASE_CONVERT:
725 	  {
726 	    int i, type_size, old_type_size;
727 	    tree type;
728 
729 	    type = TREE_TYPE (gimple_assign_lhs (stmt));
730 	    type_size = TYPE_PRECISION (type);
731 	    if (type_size % BITS_PER_UNIT != 0)
732 	      return NULL;
733 	    type_size /= BITS_PER_UNIT;
734 	    if (type_size > 64 / BITS_PER_MARKER)
735 	      return NULL;
736 
737 	    /* Sign extension: result is dependent on the value.  */
738 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740 		&& HEAD_MARKER (n->n, old_type_size))
741 	      for (i = 0; i < type_size - old_type_size; i++)
742 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
744 
745 	    if (type_size < 64 / BITS_PER_MARKER)
746 	      {
747 		/* If STMT casts to a smaller type mask out the bits not
748 		   belonging to the target type.  */
749 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
750 	      }
751 	    n->type = type;
752 	    if (!n->base_addr)
753 	      n->range = type_size;
754 	  }
755 	  break;
756 	default:
757 	  return NULL;
758 	};
759       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
760     }
761 
762   /* Handle binary rhs.  */
763 
764   if (rhs_class == GIMPLE_BINARY_RHS)
765     {
766       struct symbolic_number n1, n2;
767       gimple *source_stmt, *source_stmt2;
768 
769       if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
770 	return NULL;
771 
772       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
773 
774       switch (code)
775 	{
776 	case BIT_IOR_EXPR:
777 	case BIT_XOR_EXPR:
778 	case PLUS_EXPR:
779 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
780 
781 	  if (!source_stmt1)
782 	    return NULL;
783 
784 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
785 
786 	  if (!source_stmt2)
787 	    return NULL;
788 
789 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790 	    return NULL;
791 
792 	  if (n1.vuse != n2.vuse)
793 	    return NULL;
794 
795 	  source_stmt
796 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797 				      code);
798 
799 	  if (!source_stmt)
800 	    return NULL;
801 
802 	  if (!verify_symbolic_number_p (n, stmt))
803 	    return NULL;
804 
805 	  break;
806 	default:
807 	  return NULL;
808 	}
809       return source_stmt;
810     }
811   return NULL;
812 }
813 
814 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815    *CMPXCHG, *CMPNOP and adjust *N.  */
816 
817 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop,bool * cast64_to_32)818 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
819 			    uint64_t *cmpnop, bool *cast64_to_32)
820 {
821   unsigned rsize;
822   uint64_t tmpn, mask;
823 
824   /* The number which the find_bswap_or_nop_1 result should match in order
825      to have a full byte swap.  The number is shifted to the right
826      according to the size of the symbolic number before using it.  */
827   *cmpxchg = CMPXCHG;
828   *cmpnop = CMPNOP;
829   *cast64_to_32 = false;
830 
831   /* Find real size of result (highest non-zero byte).  */
832   if (n->base_addr)
833     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834   else
835     rsize = n->range;
836 
837   /* Zero out the bits corresponding to untouched bytes in original gimple
838      expression.  */
839   if (n->range < (int) sizeof (int64_t))
840     {
841       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
842       if (n->base_addr == NULL
843 	  && n->range == 4
844 	  && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
845 	{
846 	  /* If all bytes in n->n are either 0 or in [5..8] range, this
847 	     might be a candidate for (unsigned) __builtin_bswap64 (src).
848 	     It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 	     or (unsigned short) __builtin_bswap32 (src).  */
850 	  *cast64_to_32 = true;
851 	  for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852 	    if ((tmpn & MARKER_MASK)
853 		&& ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
854 	      {
855 		*cast64_to_32 = false;
856 		break;
857 	      }
858 	}
859       if (*cast64_to_32)
860 	*cmpxchg &= mask;
861       else
862 	*cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
863       *cmpnop &= mask;
864     }
865 
866   /* Zero out the bits corresponding to unused bytes in the result of the
867      gimple expression.  */
868   if (rsize < n->range)
869     {
870       if (BYTES_BIG_ENDIAN)
871 	{
872 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
873 	  *cmpxchg &= mask;
874 	  if (n->range - rsize == sizeof (int64_t))
875 	    *cmpnop = 0;
876 	  else
877 	    *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
878 	}
879       else
880 	{
881 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
882 	  if (n->range - rsize == sizeof (int64_t))
883 	    *cmpxchg = 0;
884 	  else
885 	    *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
886 	  *cmpnop &= mask;
887 	}
888       n->range = rsize;
889     }
890 
891   if (*cast64_to_32)
892     n->range = 8;
893   n->range *= BITS_PER_UNIT;
894 }
895 
896 /* Check if STMT completes a bswap implementation or a read in a given
897    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898    accordingly.  It also sets N to represent the kind of operations
899    performed: size of the resulting expression and whether it works on
900    a memory source, and if so alias-set and vuse.  At last, the
901    function returns a stmt whose rhs's first tree is the source
902    expression.  */
903 
904 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap,bool * cast64_to_32,uint64_t * mask)905 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906 		   bool *cast64_to_32, uint64_t *mask)
907 {
908   tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
909   if (!tree_fits_uhwi_p (type_size))
910     return NULL;
911 
912   /* The last parameter determines the depth search limit.  It usually
913      correlates directly to the number n of bytes to be touched.  We
914      increase that number by 2 * (log2(n) + 1) here in order to also
915      cover signed -> unsigned conversions of the src operand as can be seen
916      in libgcc, and for initial shift/and operation of the src operand.  */
917   int limit = tree_to_uhwi (type_size);
918   limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
919   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
920 
921   if (!ins_stmt)
922     {
923       if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924 	  || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925 	return NULL;
926       unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927       if (sz != 16 && sz != 32 && sz != 64)
928 	return NULL;
929       tree rhs = gimple_assign_rhs1 (stmt);
930       if (CONSTRUCTOR_NELTS (rhs) == 0)
931 	return NULL;
932       tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933       unsigned HOST_WIDE_INT eltsz
934 	= int_size_in_bytes (eltype) * BITS_PER_UNIT;
935       if (TYPE_PRECISION (eltype) != eltsz)
936 	return NULL;
937       constructor_elt *elt;
938       unsigned int i;
939       tree type = build_nonstandard_integer_type (sz, 1);
940       FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
941 	{
942 	  if (TREE_CODE (elt->value) != SSA_NAME
943 	      || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944 	    return NULL;
945 	  struct symbolic_number n1;
946 	  gimple *source_stmt
947 	    = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948 				   limit - 1);
949 
950 	  if (!source_stmt)
951 	    return NULL;
952 
953 	  n1.type = type;
954 	  if (!n1.base_addr)
955 	    n1.range = sz / BITS_PER_UNIT;
956 
957 	  if (i == 0)
958 	    {
959 	      ins_stmt = source_stmt;
960 	      *n = n1;
961 	    }
962 	  else
963 	    {
964 	      if (n->vuse != n1.vuse)
965 		return NULL;
966 
967 	      struct symbolic_number n0 = *n;
968 
969 	      if (!BYTES_BIG_ENDIAN)
970 		{
971 		  if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972 		    return NULL;
973 		}
974 	      else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975 		return NULL;
976 	      ins_stmt
977 		= perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978 					  BIT_IOR_EXPR);
979 
980 	      if (!ins_stmt)
981 		return NULL;
982 	    }
983 	}
984     }
985 
986   uint64_t cmpxchg, cmpnop;
987   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
988 
989   /* A complete byte swap should make the symbolic number to start with
990      the largest digit in the highest order byte. Unchanged symbolic
991      number indicates a read with same endianness as target architecture.  */
992   *mask = ~(uint64_t) 0;
993   if (n->n == cmpnop)
994     *bswap = false;
995   else if (n->n == cmpxchg)
996     *bswap = true;
997   else
998     {
999       int set = 0;
1000       for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001 	if ((n->n & msk) == 0)
1002 	  *mask &= ~msk;
1003 	else if ((n->n & msk) == (cmpxchg & msk))
1004 	  set++;
1005 	else
1006 	  return NULL;
1007       if (set < 2)
1008 	return NULL;
1009       *bswap = true;
1010     }
1011 
1012   /* Useless bit manipulation performed by code.  */
1013   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014     return NULL;
1015 
1016   return ins_stmt;
1017 }
1018 
1019 const pass_data pass_data_optimize_bswap =
1020 {
1021   GIMPLE_PASS, /* type */
1022   "bswap", /* name */
1023   OPTGROUP_NONE, /* optinfo_flags */
1024   TV_NONE, /* tv_id */
1025   PROP_ssa, /* properties_required */
1026   0, /* properties_provided */
1027   0, /* properties_destroyed */
1028   0, /* todo_flags_start */
1029   0, /* todo_flags_finish */
1030 };
1031 
1032 class pass_optimize_bswap : public gimple_opt_pass
1033 {
1034 public:
pass_optimize_bswap(gcc::context * ctxt)1035   pass_optimize_bswap (gcc::context *ctxt)
1036     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1037   {}
1038 
1039   /* opt_pass methods: */
gate(function *)1040   virtual bool gate (function *)
1041     {
1042       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1043     }
1044 
1045   virtual unsigned int execute (function *);
1046 
1047 }; // class pass_optimize_bswap
1048 
1049 /* Helper function for bswap_replace.  Build VIEW_CONVERT_EXPR from
1050    VAL to TYPE.  If VAL has different type size, emit a NOP_EXPR cast
1051    first.  */
1052 
1053 static tree
bswap_view_convert(gimple_stmt_iterator * gsi,tree type,tree val,bool before)1054 bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055 		    bool before)
1056 {
1057   gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058 	      || POINTER_TYPE_P (TREE_TYPE (val)));
1059   if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1060     {
1061       HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
1062       if (POINTER_TYPE_P (TREE_TYPE (val)))
1063 	{
1064 	  gimple *g
1065 	    = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066 				   NOP_EXPR, val);
1067 	  if (before)
1068 	    gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069 	  else
1070 	    gsi_insert_after (gsi, g, GSI_NEW_STMT);
1071 	  val = gimple_assign_lhs (g);
1072 	}
1073       tree itype = build_nonstandard_integer_type (prec, 1);
1074       gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
1075       if (before)
1076 	gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077       else
1078 	gsi_insert_after (gsi, g, GSI_NEW_STMT);
1079       val = gimple_assign_lhs (g);
1080     }
1081   return build1 (VIEW_CONVERT_EXPR, type, val);
1082 }
1083 
1084 /* Perform the bswap optimization: replace the expression computed in the rhs
1085    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086    bswap, load or load + bswap expression.
1087    Which of these alternatives replace the rhs is given by N->base_addr (non
1088    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
1089    load to perform are also given in N while the builtin bswap invoke is given
1090    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
1091    load statements involved to construct the rhs in gsi_stmt (GSI) and
1092    N->range gives the size of the rhs expression for maintaining some
1093    statistics.
1094 
1095    Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096    non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
1098 
1099 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap,uint64_t mask)1100 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
1101 	       tree bswap_type, tree load_type, struct symbolic_number *n,
1102 	       bool bswap, uint64_t mask)
1103 {
1104   tree src, tmp, tgt = NULL_TREE;
1105   gimple *bswap_stmt, *mask_stmt = NULL;
1106   tree_code conv_code = NOP_EXPR;
1107 
1108   gimple *cur_stmt = gsi_stmt (gsi);
1109   src = n->src;
1110   if (cur_stmt)
1111     {
1112       tgt = gimple_assign_lhs (cur_stmt);
1113       if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114 	  && tgt
1115 	  && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116 	conv_code = VIEW_CONVERT_EXPR;
1117     }
1118 
1119   /* Need to load the value from memory first.  */
1120   if (n->base_addr)
1121     {
1122       gimple_stmt_iterator gsi_ins = gsi;
1123       if (ins_stmt)
1124 	gsi_ins = gsi_for_stmt (ins_stmt);
1125       tree addr_expr, addr_tmp, val_expr, val_tmp;
1126       tree load_offset_ptr, aligned_load_type;
1127       gimple *load_stmt;
1128       unsigned align = get_object_alignment (src);
1129       poly_int64 load_offset = 0;
1130 
1131       if (cur_stmt)
1132 	{
1133 	  basic_block ins_bb = gimple_bb (ins_stmt);
1134 	  basic_block cur_bb = gimple_bb (cur_stmt);
1135 	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136 	    return NULL_TREE;
1137 
1138 	  /* Move cur_stmt just before one of the load of the original
1139 	     to ensure it has the same VUSE.  See PR61517 for what could
1140 	     go wrong.  */
1141 	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142 	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143 	  gsi_move_before (&gsi, &gsi_ins);
1144 	  gsi = gsi_for_stmt (cur_stmt);
1145 	}
1146       else
1147 	gsi = gsi_ins;
1148 
1149       /* Compute address to load from and cast according to the size
1150 	 of the load.  */
1151       addr_expr = build_fold_addr_expr (src);
1152       if (is_gimple_mem_ref_addr (addr_expr))
1153 	addr_tmp = unshare_expr (addr_expr);
1154       else
1155 	{
1156 	  addr_tmp = unshare_expr (n->base_addr);
1157 	  if (!is_gimple_mem_ref_addr (addr_tmp))
1158 	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159 						   is_gimple_mem_ref_addr,
1160 						   NULL_TREE, true,
1161 						   GSI_SAME_STMT);
1162 	  load_offset = n->bytepos;
1163 	  if (n->offset)
1164 	    {
1165 	      tree off
1166 		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167 					    true, NULL_TREE, true,
1168 					    GSI_SAME_STMT);
1169 	      gimple *stmt
1170 		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171 				       POINTER_PLUS_EXPR, addr_tmp, off);
1172 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173 	      addr_tmp = gimple_assign_lhs (stmt);
1174 	    }
1175 	}
1176 
1177       /* Perform the load.  */
1178       aligned_load_type = load_type;
1179       if (align < TYPE_ALIGN (load_type))
1180 	aligned_load_type = build_aligned_type (load_type, align);
1181       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183 			      load_offset_ptr);
1184 
1185       if (!bswap)
1186 	{
1187 	  if (n->range == 16)
1188 	    nop_stats.found_16bit++;
1189 	  else if (n->range == 32)
1190 	    nop_stats.found_32bit++;
1191 	  else
1192 	    {
1193 	      gcc_assert (n->range == 64);
1194 	      nop_stats.found_64bit++;
1195 	    }
1196 
1197 	  /* Convert the result of load if necessary.  */
1198 	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1199 	    {
1200 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201 					    "load_dst");
1202 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1203 	      gimple_set_vuse (load_stmt, n->vuse);
1204 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1205 	      if (conv_code == VIEW_CONVERT_EXPR)
1206 		val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207 					      true);
1208 	      gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
1209 	      update_stmt (cur_stmt);
1210 	    }
1211 	  else if (cur_stmt)
1212 	    {
1213 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214 	      gimple_set_vuse (cur_stmt, n->vuse);
1215 	      update_stmt (cur_stmt);
1216 	    }
1217 	  else
1218 	    {
1219 	      tgt = make_ssa_name (load_type);
1220 	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221 	      gimple_set_vuse (cur_stmt, n->vuse);
1222 	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1223 	    }
1224 
1225 	  if (dump_file)
1226 	    {
1227 	      fprintf (dump_file,
1228 		       "%d bit load in target endianness found at: ",
1229 		       (int) n->range);
1230 	      print_gimple_stmt (dump_file, cur_stmt, 0);
1231 	    }
1232 	  return tgt;
1233 	}
1234       else
1235 	{
1236 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1238 	  gimple_set_vuse (load_stmt, n->vuse);
1239 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1240 	}
1241       src = val_tmp;
1242     }
1243   else if (!bswap)
1244     {
1245       gimple *g = NULL;
1246       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1247 	{
1248 	  if (!is_gimple_val (src))
1249 	    return NULL_TREE;
1250 	  if (conv_code == VIEW_CONVERT_EXPR)
1251 	    src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
1252 	  g = gimple_build_assign (tgt, conv_code, src);
1253 	}
1254       else if (cur_stmt)
1255 	g = gimple_build_assign (tgt, src);
1256       else
1257 	tgt = src;
1258       if (n->range == 16)
1259 	nop_stats.found_16bit++;
1260       else if (n->range == 32)
1261 	nop_stats.found_32bit++;
1262       else
1263 	{
1264 	  gcc_assert (n->range == 64);
1265 	  nop_stats.found_64bit++;
1266 	}
1267       if (dump_file)
1268 	{
1269 	  fprintf (dump_file,
1270 		   "%d bit reshuffle in target endianness found at: ",
1271 		   (int) n->range);
1272 	  if (cur_stmt)
1273 	    print_gimple_stmt (dump_file, cur_stmt, 0);
1274 	  else
1275 	    {
1276 	      print_generic_expr (dump_file, tgt, TDF_NONE);
1277 	      fprintf (dump_file, "\n");
1278 	    }
1279 	}
1280       if (cur_stmt)
1281 	gsi_replace (&gsi, g, true);
1282       return tgt;
1283     }
1284   else if (TREE_CODE (src) == BIT_FIELD_REF)
1285     src = TREE_OPERAND (src, 0);
1286 
1287   if (n->range == 16)
1288     bswap_stats.found_16bit++;
1289   else if (n->range == 32)
1290     bswap_stats.found_32bit++;
1291   else
1292     {
1293       gcc_assert (n->range == 64);
1294       bswap_stats.found_64bit++;
1295     }
1296 
1297   tmp = src;
1298 
1299   /* Convert the src expression if necessary.  */
1300   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1301     {
1302       gimple *convert_stmt;
1303 
1304       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1307     }
1308 
1309   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1310      are considered as rotation of 2N bit values by N bits is generally not
1311      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1312      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1313   if (bswap && n->range == 16)
1314     {
1315       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317       bswap_stmt = gimple_build_assign (NULL, src);
1318     }
1319   else
1320     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1321 
1322   if (tgt == NULL_TREE)
1323     tgt = make_ssa_name (bswap_type);
1324   tmp = tgt;
1325 
1326   if (mask != ~(uint64_t) 0)
1327     {
1328       tree m = build_int_cst (bswap_type, mask);
1329       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330       gimple_set_lhs (bswap_stmt, tmp);
1331       mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332       tmp = tgt;
1333     }
1334 
1335   /* Convert the result if necessary.  */
1336   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1337     {
1338       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1339       tree atmp = tmp;
1340       gimple_stmt_iterator gsi2 = gsi;
1341       if (conv_code == VIEW_CONVERT_EXPR)
1342 	atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343       gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344       gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
1345     }
1346 
1347   gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1348 
1349   if (dump_file)
1350     {
1351       fprintf (dump_file, "%d bit bswap implementation found at: ",
1352 	       (int) n->range);
1353       if (cur_stmt)
1354 	print_gimple_stmt (dump_file, cur_stmt, 0);
1355       else
1356 	{
1357 	  print_generic_expr (dump_file, tgt, TDF_NONE);
1358 	  fprintf (dump_file, "\n");
1359 	}
1360     }
1361 
1362   if (cur_stmt)
1363     {
1364       if (mask_stmt)
1365 	gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
1366       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367       gsi_remove (&gsi, true);
1368     }
1369   else
1370     {
1371       gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372       if (mask_stmt)
1373 	gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1374     }
1375   return tgt;
1376 }
1377 
1378 /* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379    using bswap optimizations.  CDI_DOMINATORS need to be
1380    computed on entry.  Return true if it has been optimized and
1381    TODO_update_ssa is needed.  */
1382 
1383 static bool
maybe_optimize_vector_constructor(gimple * cur_stmt)1384 maybe_optimize_vector_constructor (gimple *cur_stmt)
1385 {
1386   tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387   struct symbolic_number n;
1388   bool bswap;
1389 
1390   gcc_assert (is_gimple_assign (cur_stmt)
1391 	      && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1392 
1393   tree rhs = gimple_assign_rhs1 (cur_stmt);
1394   if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395       || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396       || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397     return false;
1398 
1399   HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400   switch (sz)
1401     {
1402     case 16:
1403       load_type = bswap_type = uint16_type_node;
1404       break;
1405     case 32:
1406       if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407 	  && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1408 	{
1409 	  load_type = uint32_type_node;
1410 	  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411 	  bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1412 	}
1413       else
1414 	return false;
1415       break;
1416     case 64:
1417       if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418 	  && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419 	      || (word_mode == SImode
1420 		  && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421 		  && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1422 	{
1423 	  load_type = uint64_type_node;
1424 	  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425 	  bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1426 	}
1427       else
1428 	return false;
1429       break;
1430     default:
1431       return false;
1432     }
1433 
1434   bool cast64_to_32;
1435   uint64_t mask;
1436   gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437 					&cast64_to_32, &mask);
1438   if (!ins_stmt
1439       || n.range != (unsigned HOST_WIDE_INT) sz
1440       || cast64_to_32
1441       || mask != ~(uint64_t) 0)
1442     return false;
1443 
1444   if (bswap && !fndecl && n.range != 16)
1445     return false;
1446 
1447   memset (&nop_stats, 0, sizeof (nop_stats));
1448   memset (&bswap_stats, 0, sizeof (bswap_stats));
1449   return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1450 			bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
1451 }
1452 
1453 /* Find manual byte swap implementations as well as load in a given
1454    endianness. Byte swaps are turned into a bswap builtin invokation
1455    while endian loads are converted to bswap builtin invokation or
1456    simple load according to the target endianness.  */
1457 
1458 unsigned int
execute(function * fun)1459 pass_optimize_bswap::execute (function *fun)
1460 {
1461   basic_block bb;
1462   bool bswap32_p, bswap64_p;
1463   bool changed = false;
1464   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1465 
1466   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470 		   || (bswap32_p && word_mode == SImode)));
1471 
1472   /* Determine the argument type of the builtins.  The code later on
1473      assumes that the return and argument type are the same.  */
1474   if (bswap32_p)
1475     {
1476       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1478     }
1479 
1480   if (bswap64_p)
1481     {
1482       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1484     }
1485 
1486   memset (&nop_stats, 0, sizeof (nop_stats));
1487   memset (&bswap_stats, 0, sizeof (bswap_stats));
1488   calculate_dominance_info (CDI_DOMINATORS);
1489 
1490   FOR_EACH_BB_FN (bb, fun)
1491     {
1492       gimple_stmt_iterator gsi;
1493 
1494       /* We do a reverse scan for bswap patterns to make sure we get the
1495 	 widest match. As bswap pattern matching doesn't handle previously
1496 	 inserted smaller bswap replacements as sub-patterns, the wider
1497 	 variant wouldn't be detected.  */
1498       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1499 	{
1500 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502 	  enum tree_code code;
1503 	  struct symbolic_number n;
1504 	  bool bswap, cast64_to_32;
1505 	  uint64_t mask;
1506 
1507 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 	     might be moved to a different basic block by bswap_replace and gsi
1509 	     must not points to it if that's the case.  Moving the gsi_prev
1510 	     there make sure that gsi points to the statement previous to
1511 	     cur_stmt while still making sure that all statements are
1512 	     considered in this basic block.  */
1513 	  gsi_prev (&gsi);
1514 
1515 	  if (!is_gimple_assign (cur_stmt))
1516 	    continue;
1517 
1518 	  code = gimple_assign_rhs_code (cur_stmt);
1519 	  switch (code)
1520 	    {
1521 	    case LROTATE_EXPR:
1522 	    case RROTATE_EXPR:
1523 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525 		     % BITS_PER_UNIT)
1526 		continue;
1527 	      /* Fall through.  */
1528 	    case BIT_IOR_EXPR:
1529 	    case BIT_XOR_EXPR:
1530 	    case PLUS_EXPR:
1531 	      break;
1532 	    case CONSTRUCTOR:
1533 	      {
1534 		tree rhs = gimple_assign_rhs1 (cur_stmt);
1535 		if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536 		    && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537 		  break;
1538 	      }
1539 	      continue;
1540 	    default:
1541 	      continue;
1542 	    }
1543 
1544 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545 					&cast64_to_32, &mask);
1546 
1547 	  if (!ins_stmt)
1548 	    continue;
1549 
1550 	  switch (n.range)
1551 	    {
1552 	    case 16:
1553 	      /* Already in canonical form, nothing to do.  */
1554 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555 		continue;
1556 	      load_type = bswap_type = uint16_type_node;
1557 	      break;
1558 	    case 32:
1559 	      load_type = uint32_type_node;
1560 	      if (bswap32_p)
1561 		{
1562 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563 		  bswap_type = bswap32_type;
1564 		}
1565 	      break;
1566 	    case 64:
1567 	      load_type = uint64_type_node;
1568 	      if (bswap64_p)
1569 		{
1570 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571 		  bswap_type = bswap64_type;
1572 		}
1573 	      break;
1574 	    default:
1575 	      continue;
1576 	    }
1577 
1578 	  if (bswap && !fndecl && n.range != 16)
1579 	    continue;
1580 
1581 	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1582 			     bswap_type, load_type, &n, bswap, mask))
1583 	    changed = true;
1584 	}
1585     }
1586 
1587   statistics_counter_event (fun, "16-bit nop implementations found",
1588 			    nop_stats.found_16bit);
1589   statistics_counter_event (fun, "32-bit nop implementations found",
1590 			    nop_stats.found_32bit);
1591   statistics_counter_event (fun, "64-bit nop implementations found",
1592 			    nop_stats.found_64bit);
1593   statistics_counter_event (fun, "16-bit bswap implementations found",
1594 			    bswap_stats.found_16bit);
1595   statistics_counter_event (fun, "32-bit bswap implementations found",
1596 			    bswap_stats.found_32bit);
1597   statistics_counter_event (fun, "64-bit bswap implementations found",
1598 			    bswap_stats.found_64bit);
1599 
1600   return (changed ? TODO_update_ssa : 0);
1601 }
1602 
1603 } // anon namespace
1604 
1605 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1606 make_pass_optimize_bswap (gcc::context *ctxt)
1607 {
1608   return new pass_optimize_bswap (ctxt);
1609 }
1610 
1611 namespace {
1612 
1613 /* Struct recording one operand for the store, which is either a constant,
1614    then VAL represents the constant and all the other fields are zero, or
1615    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616    and the other fields also reflect the memory load, or an SSA name, then
1617    VAL represents the SSA name and all the other fields are zero.  */
1618 
1619 class store_operand_info
1620 {
1621 public:
1622   tree val;
1623   tree base_addr;
1624   poly_uint64 bitsize;
1625   poly_uint64 bitpos;
1626   poly_uint64 bitregion_start;
1627   poly_uint64 bitregion_end;
1628   gimple *stmt;
1629   bool bit_not_p;
1630   store_operand_info ();
1631 };
1632 
store_operand_info()1633 store_operand_info::store_operand_info ()
1634   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1635     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1636 {
1637 }
1638 
1639 /* Struct recording the information about a single store of an immediate
1640    to memory.  These are created in the first phase and coalesced into
1641    merged_store_group objects in the second phase.  */
1642 
1643 class store_immediate_info
1644 {
1645 public:
1646   unsigned HOST_WIDE_INT bitsize;
1647   unsigned HOST_WIDE_INT bitpos;
1648   unsigned HOST_WIDE_INT bitregion_start;
1649   /* This is one past the last bit of the bit region.  */
1650   unsigned HOST_WIDE_INT bitregion_end;
1651   gimple *stmt;
1652   unsigned int order;
1653   /* INTEGER_CST for constant store, STRING_CST for string store,
1654      MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655      BIT_INSERT_EXPR for bit insertion.
1656      LROTATE_EXPR if it can be only bswap optimized and
1657      ops are not really meaningful.
1658      NOP_EXPR if bswap optimization detected identity, ops
1659      are not meaningful.  */
1660   enum tree_code rhs_code;
1661   /* Two fields for bswap optimization purposes.  */
1662   struct symbolic_number n;
1663   gimple *ins_stmt;
1664   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1665   bool bit_not_p;
1666   /* True if ops have been swapped and thus ops[1] represents
1667      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1668   bool ops_swapped_p;
1669   /* The index number of the landing pad, or 0 if there is none.  */
1670   int lp_nr;
1671   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1672      just the first one.  */
1673   store_operand_info ops[2];
1674   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1675 			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1676 			gimple *, unsigned int, enum tree_code,
1677 			struct symbolic_number &, gimple *, bool, int,
1678 			const store_operand_info &,
1679 			const store_operand_info &);
1680 };
1681 
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1682 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1683 					    unsigned HOST_WIDE_INT bp,
1684 					    unsigned HOST_WIDE_INT brs,
1685 					    unsigned HOST_WIDE_INT bre,
1686 					    gimple *st,
1687 					    unsigned int ord,
1688 					    enum tree_code rhscode,
1689 					    struct symbolic_number &nr,
1690 					    gimple *ins_stmtp,
1691 					    bool bitnotp,
1692 					    int nr2,
1693 					    const store_operand_info &op0r,
1694 					    const store_operand_info &op1r)
1695   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1696     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1697     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1698     lp_nr (nr2), ops { op0r, op1r }
1699 {
1700 }
1701 
1702 /* Struct representing a group of stores to contiguous memory locations.
1703    These are produced by the second phase (coalescing) and consumed in the
1704    third phase that outputs the widened stores.  */
1705 
1706 class merged_store_group
1707 {
1708 public:
1709   unsigned HOST_WIDE_INT start;
1710   unsigned HOST_WIDE_INT width;
1711   unsigned HOST_WIDE_INT bitregion_start;
1712   unsigned HOST_WIDE_INT bitregion_end;
1713   /* The size of the allocated memory for val and mask.  */
1714   unsigned HOST_WIDE_INT buf_size;
1715   unsigned HOST_WIDE_INT align_base;
1716   poly_uint64 load_align_base[2];
1717 
1718   unsigned int align;
1719   unsigned int load_align[2];
1720   unsigned int first_order;
1721   unsigned int last_order;
1722   bool bit_insertion;
1723   bool string_concatenation;
1724   bool only_constants;
1725   bool consecutive;
1726   unsigned int first_nonmergeable_order;
1727   int lp_nr;
1728 
1729   auto_vec<store_immediate_info *> stores;
1730   /* We record the first and last original statements in the sequence because
1731      we'll need their vuse/vdef and replacement position.  It's easier to keep
1732      track of them separately as 'stores' is reordered by apply_stores.  */
1733   gimple *last_stmt;
1734   gimple *first_stmt;
1735   unsigned char *val;
1736   unsigned char *mask;
1737 
1738   merged_store_group (store_immediate_info *);
1739   ~merged_store_group ();
1740   bool can_be_merged_into (store_immediate_info *);
1741   void merge_into (store_immediate_info *);
1742   void merge_overlapping (store_immediate_info *);
1743   bool apply_stores ();
1744 private:
1745   void do_merge (store_immediate_info *);
1746 };
1747 
1748 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1749 
1750 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1751 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1752 {
1753   if (!fd)
1754     return;
1755 
1756   for (unsigned int i = 0; i < len; i++)
1757     fprintf (fd, "%02x ", ptr[i]);
1758   fprintf (fd, "\n");
1759 }
1760 
1761 /* Clear out LEN bits starting from bit START in the byte array
1762    PTR.  This clears the bits to the *right* from START.
1763    START must be within [0, BITS_PER_UNIT) and counts starting from
1764    the least significant bit.  */
1765 
1766 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1767 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768 		     unsigned int len)
1769 {
1770   if (len == 0)
1771     return;
1772   /* Clear len bits to the right of start.  */
1773   else if (len <= start + 1)
1774     {
1775       unsigned char mask = (~(~0U << len));
1776       mask = mask << (start + 1U - len);
1777       ptr[0] &= ~mask;
1778     }
1779   else if (start != BITS_PER_UNIT - 1)
1780     {
1781       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783 			   len - (start % BITS_PER_UNIT) - 1);
1784     }
1785   else if (start == BITS_PER_UNIT - 1
1786 	   && len > BITS_PER_UNIT)
1787     {
1788       unsigned int nbytes = len / BITS_PER_UNIT;
1789       memset (ptr, 0, nbytes);
1790       if (len % BITS_PER_UNIT != 0)
1791 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792 			     len % BITS_PER_UNIT);
1793     }
1794   else
1795     gcc_unreachable ();
1796 }
1797 
1798 /* In the byte array PTR clear the bit region starting at bit
1799    START and is LEN bits wide.
1800    For regions spanning multiple bytes do this recursively until we reach
1801    zero LEN or a region contained within a single byte.  */
1802 
1803 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1804 clear_bit_region (unsigned char *ptr, unsigned int start,
1805 		  unsigned int len)
1806 {
1807   /* Degenerate base case.  */
1808   if (len == 0)
1809     return;
1810   else if (start >= BITS_PER_UNIT)
1811     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812   /* Second base case.  */
1813   else if ((start + len) <= BITS_PER_UNIT)
1814     {
1815       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1816       mask >>= BITS_PER_UNIT - (start + len);
1817 
1818       ptr[0] &= ~mask;
1819 
1820       return;
1821     }
1822   /* Clear most significant bits in a byte and proceed with the next byte.  */
1823   else if (start != 0)
1824     {
1825       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1826       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1827     }
1828   /* Whole bytes need to be cleared.  */
1829   else if (start == 0 && len > BITS_PER_UNIT)
1830     {
1831       unsigned int nbytes = len / BITS_PER_UNIT;
1832       /* We could recurse on each byte but we clear whole bytes, so a simple
1833 	 memset will do.  */
1834       memset (ptr, '\0', nbytes);
1835       /* Clear the remaining sub-byte region if there is one.  */
1836       if (len % BITS_PER_UNIT != 0)
1837 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1838     }
1839   else
1840     gcc_unreachable ();
1841 }
1842 
1843 /* Write BITLEN bits of EXPR to the byte array PTR at
1844    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1845    Return true if the operation succeeded.  */
1846 
1847 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1848 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1849 		       unsigned int total_bytes)
1850 {
1851   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1852   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853 			|| (bitpos % BITS_PER_UNIT)
1854 			|| !int_mode_for_size (bitlen, 0).exists ());
1855   bool empty_ctor_p
1856     = (TREE_CODE (expr) == CONSTRUCTOR
1857        && CONSTRUCTOR_NELTS (expr) == 0
1858        && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859 		       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1860 
1861   if (!sub_byte_op_p)
1862     {
1863       if (first_byte >= total_bytes)
1864 	return false;
1865       total_bytes -= first_byte;
1866       if (empty_ctor_p)
1867 	{
1868 	  unsigned HOST_WIDE_INT rhs_bytes
1869 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870 	  if (rhs_bytes > total_bytes)
1871 	    return false;
1872 	  memset (ptr + first_byte, '\0', rhs_bytes);
1873 	  return true;
1874 	}
1875       return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1876     }
1877 
1878   /* LITTLE-ENDIAN
1879      We are writing a non byte-sized quantity or at a position that is not
1880      at a byte boundary.
1881      |--------|--------|--------| ptr + first_byte
1882            ^              ^
1883            xxx xxxxxxxx xxx< bp>
1884            |______EXPR____|
1885 
1886      First native_encode_expr EXPR into a temporary buffer and shift each
1887      byte in the buffer by 'bp' (carrying the bits over as necessary).
1888      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889                                               <------bitlen---->< bp>
1890     Then we clear the destination bits:
1891     |---00000|00000000|000-----| ptr + first_byte
1892         <-------bitlen--->< bp>
1893 
1894     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1896 
1897    BIG-ENDIAN
1898    We are writing a non byte-sized quantity or at a position that is not
1899    at a byte boundary.
1900      ptr + first_byte |--------|--------|--------|
1901                             ^              ^
1902                        <bp >xxx xxxxxxxx xxx
1903                             |_____EXPR_____|
1904 
1905      First native_encode_expr EXPR into a temporary buffer and shift each
1906      byte in the buffer to the right by (carrying the bits over as necessary).
1907      We shift by as much as needed to align the most significant bit of EXPR
1908      with bitpos:
1909      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910         <---bitlen---->          <bp ><-----bitlen----->
1911     Then we clear the destination bits:
1912     ptr + first_byte |-----000||00000000||00000---|
1913                       <bp ><-------bitlen----->
1914 
1915     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917     The awkwardness comes from the fact that bitpos is counted from the
1918     most significant bit of a byte.  */
1919 
1920   /* We must be dealing with fixed-size data at this point, since the
1921      total size is also fixed.  */
1922   unsigned int byte_size;
1923   if (empty_ctor_p)
1924     {
1925       unsigned HOST_WIDE_INT rhs_bytes
1926 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927       if (rhs_bytes > total_bytes)
1928 	return false;
1929       byte_size = rhs_bytes;
1930     }
1931   else
1932     {
1933       fixed_size_mode mode
1934 	= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1935       byte_size
1936 	= mode == BLKmode
1937 	? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938 	: GET_MODE_SIZE (mode);
1939     }
1940   /* Allocate an extra byte so that we have space to shift into.  */
1941   byte_size++;
1942   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1943   memset (tmpbuf, '\0', byte_size);
1944   /* The store detection code should only have allowed constants that are
1945      accepted by native_encode_expr or empty ctors.  */
1946   if (!empty_ctor_p
1947       && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1948     gcc_unreachable ();
1949 
1950   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951      bytes to write.  This means it can write more than
1952      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1954      bitlen and zero out the bits that are not relevant as well (that may
1955      contain a sign bit due to sign-extension).  */
1956   unsigned int padding
1957     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1958   /* On big-endian the padding is at the 'front' so just skip the initial
1959      bytes.  */
1960   if (BYTES_BIG_ENDIAN)
1961     tmpbuf += padding;
1962 
1963   byte_size -= padding;
1964 
1965   if (bitlen % BITS_PER_UNIT != 0)
1966     {
1967       if (BYTES_BIG_ENDIAN)
1968 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970       else
1971 	clear_bit_region (tmpbuf, bitlen,
1972 			  byte_size * BITS_PER_UNIT - bitlen);
1973     }
1974   /* Left shifting relies on the last byte being clear if bitlen is
1975      a multiple of BITS_PER_UNIT, which might not be clear if
1976      there are padding bytes.  */
1977   else if (!BYTES_BIG_ENDIAN)
1978     tmpbuf[byte_size - 1] = '\0';
1979 
1980   /* Clear the bit region in PTR where the bits from TMPBUF will be
1981      inserted into.  */
1982   if (BYTES_BIG_ENDIAN)
1983     clear_bit_region_be (ptr + first_byte,
1984 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985   else
1986     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1987 
1988   int shift_amnt;
1989   int bitlen_mod = bitlen % BITS_PER_UNIT;
1990   int bitpos_mod = bitpos % BITS_PER_UNIT;
1991 
1992   bool skip_byte = false;
1993   if (BYTES_BIG_ENDIAN)
1994     {
1995       /* BITPOS and BITLEN are exactly aligned and no shifting
1996 	 is necessary.  */
1997       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998 	  || (bitpos_mod == 0 && bitlen_mod == 0))
1999 	shift_amnt = 0;
2000       /* |. . . . . . . .|
2001 	  <bp >   <blen >.
2002 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 	 of the value until it aligns with 'bp' in the next byte over.  */
2004       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2005 	{
2006 	  shift_amnt = bitlen_mod + bitpos_mod;
2007 	  skip_byte = bitlen_mod != 0;
2008 	}
2009       /* |. . . . . . . .|
2010 	  <----bp--->
2011 	    <---blen---->.
2012 	 Shift the value right within the same byte so it aligns with 'bp'.  */
2013       else
2014 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2015     }
2016   else
2017     shift_amnt = bitpos % BITS_PER_UNIT;
2018 
2019   /* Create the shifted version of EXPR.  */
2020   if (!BYTES_BIG_ENDIAN)
2021     {
2022       shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
2023       if (shift_amnt == 0)
2024 	byte_size--;
2025     }
2026   else
2027     {
2028       gcc_assert (BYTES_BIG_ENDIAN);
2029       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030       /* If shifting right forced us to move into the next byte skip the now
2031 	 empty byte.  */
2032       if (skip_byte)
2033 	{
2034 	  tmpbuf++;
2035 	  byte_size--;
2036 	}
2037     }
2038 
2039   /* Insert the bits from TMPBUF.  */
2040   for (unsigned int i = 0; i < byte_size; i++)
2041     ptr[first_byte + i] |= tmpbuf[i];
2042 
2043   return true;
2044 }
2045 
2046 /* Sorting function for store_immediate_info objects.
2047    Sorts them by bitposition.  */
2048 
2049 static int
sort_by_bitpos(const void * x,const void * y)2050 sort_by_bitpos (const void *x, const void *y)
2051 {
2052   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2054 
2055   if ((*tmp)->bitpos < (*tmp2)->bitpos)
2056     return -1;
2057   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058     return 1;
2059   else
2060     /* If they are the same let's use the order which is guaranteed to
2061        be different.  */
2062     return (*tmp)->order - (*tmp2)->order;
2063 }
2064 
2065 /* Sorting function for store_immediate_info objects.
2066    Sorts them by the order field.  */
2067 
2068 static int
sort_by_order(const void * x,const void * y)2069 sort_by_order (const void *x, const void *y)
2070 {
2071   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2073 
2074   if ((*tmp)->order < (*tmp2)->order)
2075     return -1;
2076   else if ((*tmp)->order > (*tmp2)->order)
2077     return 1;
2078 
2079   gcc_unreachable ();
2080 }
2081 
2082 /* Initialize a merged_store_group object from a store_immediate_info
2083    object.  */
2084 
merged_store_group(store_immediate_info * info)2085 merged_store_group::merged_store_group (store_immediate_info *info)
2086 {
2087   start = info->bitpos;
2088   width = info->bitsize;
2089   bitregion_start = info->bitregion_start;
2090   bitregion_end = info->bitregion_end;
2091   /* VAL has memory allocated for it in apply_stores once the group
2092      width has been finalized.  */
2093   val = NULL;
2094   mask = NULL;
2095   bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096   string_concatenation = info->rhs_code == STRING_CST;
2097   only_constants = info->rhs_code == INTEGER_CST;
2098   consecutive = true;
2099   first_nonmergeable_order = ~0U;
2100   lp_nr = info->lp_nr;
2101   unsigned HOST_WIDE_INT align_bitpos = 0;
2102   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103 			  &align, &align_bitpos);
2104   align_base = start - align_bitpos;
2105   for (int i = 0; i < 2; ++i)
2106     {
2107       store_operand_info &op = info->ops[i];
2108       if (op.base_addr == NULL_TREE)
2109 	{
2110 	  load_align[i] = 0;
2111 	  load_align_base[i] = 0;
2112 	}
2113       else
2114 	{
2115 	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116 	  load_align_base[i] = op.bitpos - align_bitpos;
2117 	}
2118     }
2119   stores.create (1);
2120   stores.safe_push (info);
2121   last_stmt = info->stmt;
2122   last_order = info->order;
2123   first_stmt = last_stmt;
2124   first_order = last_order;
2125   buf_size = 0;
2126 }
2127 
~merged_store_group()2128 merged_store_group::~merged_store_group ()
2129 {
2130   if (val)
2131     XDELETEVEC (val);
2132 }
2133 
2134 /* Return true if the store described by INFO can be merged into the group.  */
2135 
2136 bool
can_be_merged_into(store_immediate_info * info)2137 merged_store_group::can_be_merged_into (store_immediate_info *info)
2138 {
2139   /* Do not merge bswap patterns.  */
2140   if (info->rhs_code == LROTATE_EXPR)
2141     return false;
2142 
2143   if (info->lp_nr != lp_nr)
2144     return false;
2145 
2146   /* The canonical case.  */
2147   if (info->rhs_code == stores[0]->rhs_code)
2148     return true;
2149 
2150   /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST.  */
2151   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
2152     return !string_concatenation;
2153 
2154   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
2155     return !string_concatenation;
2156 
2157   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158      only for small regions since this can generate a lot of instructions.  */
2159   if (info->rhs_code == MEM_REF
2160       && (stores[0]->rhs_code == INTEGER_CST
2161 	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162       && info->bitregion_start == stores[0]->bitregion_start
2163       && info->bitregion_end == stores[0]->bitregion_end
2164       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2165     return !string_concatenation;
2166 
2167   if (stores[0]->rhs_code == MEM_REF
2168       && (info->rhs_code == INTEGER_CST
2169 	  || info->rhs_code == BIT_INSERT_EXPR)
2170       && info->bitregion_start == stores[0]->bitregion_start
2171       && info->bitregion_end == stores[0]->bitregion_end
2172       && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
2173     return !string_concatenation;
2174 
2175   /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR.  */
2176   if (info->rhs_code == STRING_CST
2177       && stores[0]->rhs_code == INTEGER_CST
2178       && stores[0]->bitsize == CHAR_BIT)
2179     return !bit_insertion;
2180 
2181   if (stores[0]->rhs_code == STRING_CST
2182       && info->rhs_code == INTEGER_CST
2183       && info->bitsize == CHAR_BIT)
2184     return !bit_insertion;
2185 
2186   return false;
2187 }
2188 
2189 /* Helper method for merge_into and merge_overlapping to do
2190    the common part.  */
2191 
2192 void
do_merge(store_immediate_info * info)2193 merged_store_group::do_merge (store_immediate_info *info)
2194 {
2195   bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196   bitregion_end = MAX (bitregion_end, info->bitregion_end);
2197 
2198   unsigned int this_align;
2199   unsigned HOST_WIDE_INT align_bitpos = 0;
2200   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201 			  &this_align, &align_bitpos);
2202   if (this_align > align)
2203     {
2204       align = this_align;
2205       align_base = info->bitpos - align_bitpos;
2206     }
2207   for (int i = 0; i < 2; ++i)
2208     {
2209       store_operand_info &op = info->ops[i];
2210       if (!op.base_addr)
2211 	continue;
2212 
2213       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214       if (this_align > load_align[i])
2215 	{
2216 	  load_align[i] = this_align;
2217 	  load_align_base[i] = op.bitpos - align_bitpos;
2218 	}
2219     }
2220 
2221   gimple *stmt = info->stmt;
2222   stores.safe_push (info);
2223   if (info->order > last_order)
2224     {
2225       last_order = info->order;
2226       last_stmt = stmt;
2227     }
2228   else if (info->order < first_order)
2229     {
2230       first_order = info->order;
2231       first_stmt = stmt;
2232     }
2233 
2234   if (info->bitpos != start + width)
2235     consecutive = false;
2236 
2237   /* We need to use extraction if there is any bit-field.  */
2238   if (info->rhs_code == BIT_INSERT_EXPR)
2239     {
2240       bit_insertion = true;
2241       gcc_assert (!string_concatenation);
2242     }
2243 
2244   /* We want to use concatenation if there is any string.  */
2245   if (info->rhs_code == STRING_CST)
2246     {
2247       string_concatenation = true;
2248       gcc_assert (!bit_insertion);
2249     }
2250 
2251   /* But we cannot use it if we don't have consecutive stores.  */
2252   if (!consecutive)
2253     string_concatenation = false;
2254 
2255   if (info->rhs_code != INTEGER_CST)
2256     only_constants = false;
2257 }
2258 
2259 /* Merge a store recorded by INFO into this merged store.
2260    The store is not overlapping with the existing recorded
2261    stores.  */
2262 
2263 void
merge_into(store_immediate_info * info)2264 merged_store_group::merge_into (store_immediate_info *info)
2265 {
2266   do_merge (info);
2267 
2268   /* Make sure we're inserting in the position we think we're inserting.  */
2269   gcc_assert (info->bitpos >= start + width
2270 	      && info->bitregion_start <= bitregion_end);
2271 
2272   width = info->bitpos + info->bitsize - start;
2273 }
2274 
2275 /* Merge a store described by INFO into this merged store.
2276    INFO overlaps in some way with the current store (i.e. it's not contiguous
2277    which is handled by merged_store_group::merge_into).  */
2278 
2279 void
merge_overlapping(store_immediate_info * info)2280 merged_store_group::merge_overlapping (store_immediate_info *info)
2281 {
2282   do_merge (info);
2283 
2284   /* If the store extends the size of the group, extend the width.  */
2285   if (info->bitpos + info->bitsize > start + width)
2286     width = info->bitpos + info->bitsize - start;
2287 }
2288 
2289 /* Go through all the recorded stores in this group in program order and
2290    apply their values to the VAL byte array to create the final merged
2291    value.  Return true if the operation succeeded.  */
2292 
2293 bool
apply_stores()2294 merged_store_group::apply_stores ()
2295 {
2296   store_immediate_info *info;
2297   unsigned int i;
2298 
2299   /* Make sure we have more than one store in the group, otherwise we cannot
2300      merge anything.  */
2301   if (bitregion_start % BITS_PER_UNIT != 0
2302       || bitregion_end % BITS_PER_UNIT != 0
2303       || stores.length () == 1)
2304     return false;
2305 
2306   buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2307 
2308   /* Really do string concatenation for large strings only.  */
2309   if (buf_size <= MOVE_MAX)
2310     string_concatenation = false;
2311 
2312   /* String concatenation only works for byte aligned start and end.  */
2313   if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2314     string_concatenation = false;
2315 
2316   /* Create a power-of-2-sized buffer for native_encode_expr.  */
2317   if (!string_concatenation)
2318     buf_size = 1 << ceil_log2 (buf_size);
2319 
2320   val = XNEWVEC (unsigned char, 2 * buf_size);
2321   mask = val + buf_size;
2322   memset (val, 0, buf_size);
2323   memset (mask, ~0U, buf_size);
2324 
2325   stores.qsort (sort_by_order);
2326 
2327   FOR_EACH_VEC_ELT (stores, i, info)
2328     {
2329       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
2330       tree cst;
2331       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2332 	cst = info->ops[0].val;
2333       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2334 	cst = info->ops[1].val;
2335       else
2336 	cst = NULL_TREE;
2337       bool ret = true;
2338       if (cst && info->rhs_code != BIT_INSERT_EXPR)
2339 	ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2340 				     buf_size);
2341       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2342       if (BYTES_BIG_ENDIAN)
2343 	clear_bit_region_be (m, (BITS_PER_UNIT - 1
2344 				 - (pos_in_buffer % BITS_PER_UNIT)),
2345 			     info->bitsize);
2346       else
2347 	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2348       if (cst && dump_file && (dump_flags & TDF_DETAILS))
2349 	{
2350 	  if (ret)
2351 	    {
2352 	      fputs ("After writing ", dump_file);
2353 	      print_generic_expr (dump_file, cst, TDF_NONE);
2354 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2355 		       " at position %d\n", info->bitsize, pos_in_buffer);
2356 	      fputs ("  the merged value contains ", dump_file);
2357 	      dump_char_array (dump_file, val, buf_size);
2358 	      fputs ("  the merged mask contains  ", dump_file);
2359 	      dump_char_array (dump_file, mask, buf_size);
2360 	      if (bit_insertion)
2361 		fputs ("  bit insertion is required\n", dump_file);
2362 	      if (string_concatenation)
2363 		fputs ("  string concatenation is required\n", dump_file);
2364 	    }
2365 	  else
2366 	    fprintf (dump_file, "Failed to merge stores\n");
2367 	}
2368       if (!ret)
2369 	return false;
2370     }
2371   stores.qsort (sort_by_bitpos);
2372   return true;
2373 }
2374 
2375 /* Structure describing the store chain.  */
2376 
2377 class imm_store_chain_info
2378 {
2379 public:
2380   /* Doubly-linked list that imposes an order on chain processing.
2381      PNXP (prev's next pointer) points to the head of a list, or to
2382      the next field in the previous chain in the list.
2383      See pass_store_merging::m_stores_head for more rationale.  */
2384   imm_store_chain_info *next, **pnxp;
2385   tree base_addr;
2386   auto_vec<store_immediate_info *> m_store_info;
2387   auto_vec<merged_store_group *> m_merged_store_groups;
2388 
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2389   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2390   : next (inspt), pnxp (&inspt), base_addr (b_a)
2391   {
2392     inspt = this;
2393     if (next)
2394       {
2395 	gcc_checking_assert (pnxp == next->pnxp);
2396 	next->pnxp = &next;
2397       }
2398   }
~imm_store_chain_info()2399   ~imm_store_chain_info ()
2400   {
2401     *pnxp = next;
2402     if (next)
2403       {
2404 	gcc_checking_assert (&next == next->pnxp);
2405 	next->pnxp = pnxp;
2406       }
2407   }
2408   bool terminate_and_process_chain ();
2409   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2410 			   unsigned int);
2411   bool coalesce_immediate_stores ();
2412   bool output_merged_store (merged_store_group *);
2413   bool output_merged_stores ();
2414 };
2415 
2416 const pass_data pass_data_tree_store_merging = {
2417   GIMPLE_PASS,     /* type */
2418   "store-merging", /* name */
2419   OPTGROUP_NONE,   /* optinfo_flags */
2420   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2421   PROP_ssa,	/* properties_required */
2422   0,		   /* properties_provided */
2423   0,		   /* properties_destroyed */
2424   0,		   /* todo_flags_start */
2425   TODO_update_ssa, /* todo_flags_finish */
2426 };
2427 
2428 class pass_store_merging : public gimple_opt_pass
2429 {
2430 public:
pass_store_merging(gcc::context * ctxt)2431   pass_store_merging (gcc::context *ctxt)
2432     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2433       m_n_chains (0), m_n_stores (0)
2434   {
2435   }
2436 
2437   /* Pass not supported for PDP-endian, nor for insane hosts or
2438      target character sizes where native_{encode,interpret}_expr
2439      doesn't work properly.  */
2440   virtual bool
gate(function *)2441   gate (function *)
2442   {
2443     return flag_store_merging
2444 	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2445 	   && CHAR_BIT == 8
2446 	   && BITS_PER_UNIT == 8;
2447   }
2448 
2449   virtual unsigned int execute (function *);
2450 
2451 private:
2452   hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2453 
2454   /* Form a doubly-linked stack of the elements of m_stores, so that
2455      we can iterate over them in a predictable way.  Using this order
2456      avoids extraneous differences in the compiler output just because
2457      of tree pointer variations (e.g. different chains end up in
2458      different positions of m_stores, so they are handled in different
2459      orders, so they allocate or release SSA names in different
2460      orders, and when they get reused, subsequent passes end up
2461      getting different SSA names, which may ultimately change
2462      decisions when going out of SSA).  */
2463   imm_store_chain_info *m_stores_head;
2464 
2465   /* The number of store chains currently tracked.  */
2466   unsigned m_n_chains;
2467   /* The number of stores currently tracked.  */
2468   unsigned m_n_stores;
2469 
2470   bool process_store (gimple *);
2471   bool terminate_and_process_chain (imm_store_chain_info *);
2472   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2473   bool terminate_and_process_all_chains ();
2474 }; // class pass_store_merging
2475 
2476 /* Terminate and process all recorded chains.  Return true if any changes
2477    were made.  */
2478 
2479 bool
terminate_and_process_all_chains()2480 pass_store_merging::terminate_and_process_all_chains ()
2481 {
2482   bool ret = false;
2483   while (m_stores_head)
2484     ret |= terminate_and_process_chain (m_stores_head);
2485   gcc_assert (m_stores.is_empty ());
2486   return ret;
2487 }
2488 
2489 /* Terminate all chains that are affected by the statement STMT.
2490    CHAIN_INFO is the chain we should ignore from the checks if
2491    non-NULL.  Return true if any changes were made.  */
2492 
2493 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2494 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2495 						     **chain_info,
2496 						   gimple *stmt)
2497 {
2498   bool ret = false;
2499 
2500   /* If the statement doesn't touch memory it can't alias.  */
2501   if (!gimple_vuse (stmt))
2502     return false;
2503 
2504   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2505   ao_ref store_lhs_ref;
2506   ao_ref_init (&store_lhs_ref, store_lhs);
2507   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2508     {
2509       next = cur->next;
2510 
2511       /* We already checked all the stores in chain_info and terminated the
2512 	 chain if necessary.  Skip it here.  */
2513       if (chain_info && *chain_info == cur)
2514 	continue;
2515 
2516       store_immediate_info *info;
2517       unsigned int i;
2518       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2519 	{
2520 	  tree lhs = gimple_assign_lhs (info->stmt);
2521 	  ao_ref lhs_ref;
2522 	  ao_ref_init (&lhs_ref, lhs);
2523 	  if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2524 	      || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2525 	      || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2526 						   &lhs_ref, false)))
2527 	    {
2528 	      if (dump_file && (dump_flags & TDF_DETAILS))
2529 		{
2530 		  fprintf (dump_file, "stmt causes chain termination:\n");
2531 		  print_gimple_stmt (dump_file, stmt, 0);
2532 		}
2533 	      ret |= terminate_and_process_chain (cur);
2534 	      break;
2535 	    }
2536 	}
2537     }
2538 
2539   return ret;
2540 }
2541 
2542 /* Helper function.  Terminate the recorded chain storing to base object
2543    BASE.  Return true if the merging and output was successful.  The m_stores
2544    entry is removed after the processing in any case.  */
2545 
2546 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2547 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2548 {
2549   m_n_stores -= chain_info->m_store_info.length ();
2550   m_n_chains--;
2551   bool ret = chain_info->terminate_and_process_chain ();
2552   m_stores.remove (chain_info->base_addr);
2553   delete chain_info;
2554   return ret;
2555 }
2556 
2557 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2558    may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
2559    be able to sink load of REF across stores between FIRST and LAST, up
2560    to right before LAST.  */
2561 
2562 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2563 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2564 {
2565   ao_ref r;
2566   ao_ref_init (&r, ref);
2567   unsigned int count = 0;
2568   tree vop = gimple_vdef (last);
2569   gimple *stmt;
2570 
2571   /* Return true conservatively if the basic blocks are different.  */
2572   if (gimple_bb (first) != gimple_bb (last))
2573     return true;
2574 
2575   do
2576     {
2577       stmt = SSA_NAME_DEF_STMT (vop);
2578       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2579 	return true;
2580       if (gimple_store_p (stmt)
2581 	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2582 	return true;
2583       /* Avoid quadratic compile time by bounding the number of checks
2584 	 we perform.  */
2585       if (++count > MAX_STORE_ALIAS_CHECKS)
2586 	return true;
2587       vop = gimple_vuse (stmt);
2588     }
2589   while (stmt != first);
2590 
2591   return false;
2592 }
2593 
2594 /* Return true if INFO->ops[IDX] is mergeable with the
2595    corresponding loads already in MERGED_STORE group.
2596    BASE_ADDR is the base address of the whole store group.  */
2597 
2598 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2599 compatible_load_p (merged_store_group *merged_store,
2600 		   store_immediate_info *info,
2601 		   tree base_addr, int idx)
2602 {
2603   store_immediate_info *infof = merged_store->stores[0];
2604   if (!info->ops[idx].base_addr
2605       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2606 		   info->bitpos - infof->bitpos)
2607       || !operand_equal_p (info->ops[idx].base_addr,
2608 			   infof->ops[idx].base_addr, 0))
2609     return false;
2610 
2611   store_immediate_info *infol = merged_store->stores.last ();
2612   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2613   /* In this case all vuses should be the same, e.g.
2614      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2615      or
2616      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2617      and we can emit the coalesced load next to any of those loads.  */
2618   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2619       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2620     return true;
2621 
2622   /* Otherwise, at least for now require that the load has the same
2623      vuse as the store.  See following examples.  */
2624   if (gimple_vuse (info->stmt) != load_vuse)
2625     return false;
2626 
2627   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2628       || (infof != infol
2629 	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2630     return false;
2631 
2632   /* If the load is from the same location as the store, already
2633      the construction of the immediate chain info guarantees no intervening
2634      stores, so no further checks are needed.  Example:
2635      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2636   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2637       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2638     return true;
2639 
2640   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2641      of the stores in the group, or any other stores in between those.
2642      Previous calls to compatible_load_p ensured that for all the
2643      merged_store->stores IDX loads, no stmts starting with
2644      merged_store->first_stmt and ending right before merged_store->last_stmt
2645      clobbers those loads.  */
2646   gimple *first = merged_store->first_stmt;
2647   gimple *last = merged_store->last_stmt;
2648   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2649      comes before the so far first load, we'll be changing
2650      merged_store->first_stmt.  In that case we need to give up if
2651      any of the earlier processed loads clobber with the stmts in the new
2652      range.  */
2653   if (info->order < merged_store->first_order)
2654     {
2655       for (store_immediate_info *infoc : merged_store->stores)
2656 	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2657 	  return false;
2658       first = info->stmt;
2659     }
2660   /* Similarly, we could change merged_store->last_stmt, so ensure
2661      in that case no stmts in the new range clobber any of the earlier
2662      processed loads.  */
2663   else if (info->order > merged_store->last_order)
2664     {
2665       for (store_immediate_info *infoc : merged_store->stores)
2666 	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2667 	  return false;
2668       last = info->stmt;
2669     }
2670   /* And finally, we'd be adding a new load to the set, ensure it isn't
2671      clobbered in the new range.  */
2672   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2673     return false;
2674 
2675   /* Otherwise, we are looking for:
2676      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2677      or
2678      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2679   return true;
2680 }
2681 
2682 /* Add all refs loaded to compute VAL to REFS vector.  */
2683 
2684 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2685 gather_bswap_load_refs (vec<tree> *refs, tree val)
2686 {
2687   if (TREE_CODE (val) != SSA_NAME)
2688     return;
2689 
2690   gimple *stmt = SSA_NAME_DEF_STMT (val);
2691   if (!is_gimple_assign (stmt))
2692     return;
2693 
2694   if (gimple_assign_load_p (stmt))
2695     {
2696       refs->safe_push (gimple_assign_rhs1 (stmt));
2697       return;
2698     }
2699 
2700   switch (gimple_assign_rhs_class (stmt))
2701     {
2702     case GIMPLE_BINARY_RHS:
2703       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2704       /* FALLTHRU */
2705     case GIMPLE_UNARY_RHS:
2706       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2707       break;
2708     default:
2709       gcc_unreachable ();
2710     }
2711 }
2712 
2713 /* Check if there are any stores in M_STORE_INFO after index I
2714    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2715    a potential group ending with END that have their order
2716    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2717    all the stores already merged and the one under consideration
2718    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2719    Consider:
2720      MEM[(long long int *)p_28] = 0;
2721      MEM[(long long int *)p_28 + 8B] = 0;
2722      MEM[(long long int *)p_28 + 16B] = 0;
2723      MEM[(long long int *)p_28 + 24B] = 0;
2724      _129 = (int) _130;
2725      MEM[(int *)p_28 + 8B] = _129;
2726      MEM[(int *)p_28].a = -1;
2727    We already have
2728      MEM[(long long int *)p_28] = 0;
2729      MEM[(int *)p_28].a = -1;
2730    stmts in the current group and need to consider if it is safe to
2731    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2732    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2733    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2734    into the group and merging of those 3 stores is successful, merged
2735    stmts will be emitted at the latest store from that group, i.e.
2736    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2737    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2738    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2739    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2740    into the group.  That way it will be its own store group and will
2741    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2742    INTEGER_CST stores, those are mergeable using merge_overlapping,
2743    so don't return false for those.
2744 
2745    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2746    (exclusive), whether they don't overlap the bitrange START to END
2747    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2748    prevent merging in cases like:
2749      MEM <char[12]> [&b + 8B] = {};
2750      MEM[(short *) &b] = 5;
2751      _5 = *x_4(D);
2752      MEM <long long unsigned int> [&b + 2B] = _5;
2753      MEM[(char *)&b + 16B] = 88;
2754      MEM[(int *)&b + 20B] = 1;
2755    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2756    be merged with it, because the = _5 store overlaps these and is in between
2757    them in sort_by_order ordering.  If it was merged, the merged store would
2758    go after the = _5 store and thus change behavior.  */
2759 
2760 static bool
check_no_overlap(const vec<store_immediate_info * > & m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2761 check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2762 		  unsigned int i,
2763 		  bool all_integer_cst_p, unsigned int first_order,
2764 		  unsigned int last_order, unsigned HOST_WIDE_INT start,
2765 		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2766 		  unsigned end_earlier)
2767 {
2768   unsigned int len = m_store_info.length ();
2769   for (unsigned int j = first_earlier; j < end_earlier; j++)
2770     {
2771       store_immediate_info *info = m_store_info[j];
2772       if (info->order > first_order
2773 	  && info->order < last_order
2774 	  && info->bitpos + info->bitsize > start)
2775 	return false;
2776     }
2777   for (++i; i < len; ++i)
2778     {
2779       store_immediate_info *info = m_store_info[i];
2780       if (info->bitpos >= end)
2781 	break;
2782       if (info->order < last_order
2783 	  && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2784 	return false;
2785     }
2786   return true;
2787 }
2788 
2789 /* Return true if m_store_info[first] and at least one following store
2790    form a group which store try_size bitsize value which is byte swapped
2791    from a memory load or some value, or identity from some value.
2792    This uses the bswap pass APIs.  */
2793 
2794 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2795 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2796 					  unsigned int first,
2797 					  unsigned int try_size,
2798 					  unsigned int first_earlier)
2799 {
2800   unsigned int len = m_store_info.length (), last = first;
2801   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2802   if (width >= try_size)
2803     return false;
2804   for (unsigned int i = first + 1; i < len; ++i)
2805     {
2806       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2807 	  || m_store_info[i]->lp_nr != merged_store->lp_nr
2808 	  || m_store_info[i]->ins_stmt == NULL)
2809 	return false;
2810       width += m_store_info[i]->bitsize;
2811       if (width >= try_size)
2812 	{
2813 	  last = i;
2814 	  break;
2815 	}
2816     }
2817   if (width != try_size)
2818     return false;
2819 
2820   bool allow_unaligned
2821     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2822   /* Punt if the combined store would not be aligned and we need alignment.  */
2823   if (!allow_unaligned)
2824     {
2825       unsigned int align = merged_store->align;
2826       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2827       for (unsigned int i = first + 1; i <= last; ++i)
2828 	{
2829 	  unsigned int this_align;
2830 	  unsigned HOST_WIDE_INT align_bitpos = 0;
2831 	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2832 				  &this_align, &align_bitpos);
2833 	  if (this_align > align)
2834 	    {
2835 	      align = this_align;
2836 	      align_base = m_store_info[i]->bitpos - align_bitpos;
2837 	    }
2838 	}
2839       unsigned HOST_WIDE_INT align_bitpos
2840 	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2841       if (align_bitpos)
2842 	align = least_bit_hwi (align_bitpos);
2843       if (align < try_size)
2844 	return false;
2845     }
2846 
2847   tree type;
2848   switch (try_size)
2849     {
2850     case 16: type = uint16_type_node; break;
2851     case 32: type = uint32_type_node; break;
2852     case 64: type = uint64_type_node; break;
2853     default: gcc_unreachable ();
2854     }
2855   struct symbolic_number n;
2856   gimple *ins_stmt = NULL;
2857   int vuse_store = -1;
2858   unsigned int first_order = merged_store->first_order;
2859   unsigned int last_order = merged_store->last_order;
2860   gimple *first_stmt = merged_store->first_stmt;
2861   gimple *last_stmt = merged_store->last_stmt;
2862   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2863   store_immediate_info *infof = m_store_info[first];
2864 
2865   for (unsigned int i = first; i <= last; ++i)
2866     {
2867       store_immediate_info *info = m_store_info[i];
2868       struct symbolic_number this_n = info->n;
2869       this_n.type = type;
2870       if (!this_n.base_addr)
2871 	this_n.range = try_size / BITS_PER_UNIT;
2872       else
2873 	/* Update vuse in case it has changed by output_merged_stores.  */
2874 	this_n.vuse = gimple_vuse (info->ins_stmt);
2875       unsigned int bitpos = info->bitpos - infof->bitpos;
2876       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2877 			    BYTES_BIG_ENDIAN
2878 			    ? try_size - info->bitsize - bitpos
2879 			    : bitpos))
2880 	return false;
2881       if (this_n.base_addr && vuse_store)
2882 	{
2883 	  unsigned int j;
2884 	  for (j = first; j <= last; ++j)
2885 	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2886 	      break;
2887 	  if (j > last)
2888 	    {
2889 	      if (vuse_store == 1)
2890 		return false;
2891 	      vuse_store = 0;
2892 	    }
2893 	}
2894       if (i == first)
2895 	{
2896 	  n = this_n;
2897 	  ins_stmt = info->ins_stmt;
2898 	}
2899       else
2900 	{
2901 	  if (n.base_addr && n.vuse != this_n.vuse)
2902 	    {
2903 	      if (vuse_store == 0)
2904 		return false;
2905 	      vuse_store = 1;
2906 	    }
2907 	  if (info->order > last_order)
2908 	    {
2909 	      last_order = info->order;
2910 	      last_stmt = info->stmt;
2911 	    }
2912 	  else if (info->order < first_order)
2913 	    {
2914 	      first_order = info->order;
2915 	      first_stmt = info->stmt;
2916 	    }
2917 	  end = MAX (end, info->bitpos + info->bitsize);
2918 
2919 	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2920 					     &this_n, &n, BIT_IOR_EXPR);
2921 	  if (ins_stmt == NULL)
2922 	    return false;
2923 	}
2924     }
2925 
2926   uint64_t cmpxchg, cmpnop;
2927   bool cast64_to_32;
2928   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
2929 
2930   /* A complete byte swap should make the symbolic number to start with
2931      the largest digit in the highest order byte.  Unchanged symbolic
2932      number indicates a read with same endianness as target architecture.  */
2933   if (n.n != cmpnop && n.n != cmpxchg)
2934     return false;
2935 
2936   /* For now.  */
2937   if (cast64_to_32)
2938     return false;
2939 
2940   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2941     return false;
2942 
2943   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2944 			 merged_store->start, end, first_earlier, first))
2945     return false;
2946 
2947   /* Don't handle memory copy this way if normal non-bswap processing
2948      would handle it too.  */
2949   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2950     {
2951       unsigned int i;
2952       for (i = first; i <= last; ++i)
2953 	if (m_store_info[i]->rhs_code != MEM_REF)
2954 	  break;
2955       if (i == last + 1)
2956 	return false;
2957     }
2958 
2959   if (n.n == cmpxchg)
2960     switch (try_size)
2961       {
2962       case 16:
2963 	/* Will emit LROTATE_EXPR.  */
2964 	break;
2965       case 32:
2966 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2967 	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2968 	  break;
2969 	return false;
2970       case 64:
2971 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2972 	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2973 	  break;
2974 	return false;
2975       default:
2976 	gcc_unreachable ();
2977       }
2978 
2979   if (!allow_unaligned && n.base_addr)
2980     {
2981       unsigned int align = get_object_alignment (n.src);
2982       if (align < try_size)
2983 	return false;
2984     }
2985 
2986   /* If each load has vuse of the corresponding store, need to verify
2987      the loads can be sunk right before the last store.  */
2988   if (vuse_store == 1)
2989     {
2990       auto_vec<tree, 64> refs;
2991       for (unsigned int i = first; i <= last; ++i)
2992 	gather_bswap_load_refs (&refs,
2993 				gimple_assign_rhs1 (m_store_info[i]->stmt));
2994 
2995       for (tree ref : refs)
2996 	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2997 	  return false;
2998       n.vuse = NULL_TREE;
2999     }
3000 
3001   infof->n = n;
3002   infof->ins_stmt = ins_stmt;
3003   for (unsigned int i = first; i <= last; ++i)
3004     {
3005       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3006       m_store_info[i]->ops[0].base_addr = NULL_TREE;
3007       m_store_info[i]->ops[1].base_addr = NULL_TREE;
3008       if (i != first)
3009 	merged_store->merge_into (m_store_info[i]);
3010     }
3011 
3012   return true;
3013 }
3014 
3015 /* Go through the candidate stores recorded in m_store_info and merge them
3016    into merged_store_group objects recorded into m_merged_store_groups
3017    representing the widened stores.  Return true if coalescing was successful
3018    and the number of widened stores is fewer than the original number
3019    of stores.  */
3020 
3021 bool
coalesce_immediate_stores()3022 imm_store_chain_info::coalesce_immediate_stores ()
3023 {
3024   /* Anything less can't be processed.  */
3025   if (m_store_info.length () < 2)
3026     return false;
3027 
3028   if (dump_file && (dump_flags & TDF_DETAILS))
3029     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3030 	     m_store_info.length ());
3031 
3032   store_immediate_info *info;
3033   unsigned int i, ignore = 0;
3034   unsigned int first_earlier = 0;
3035   unsigned int end_earlier = 0;
3036 
3037   /* Order the stores by the bitposition they write to.  */
3038   m_store_info.qsort (sort_by_bitpos);
3039 
3040   info = m_store_info[0];
3041   merged_store_group *merged_store = new merged_store_group (info);
3042   if (dump_file && (dump_flags & TDF_DETAILS))
3043     fputs ("New store group\n", dump_file);
3044 
3045   FOR_EACH_VEC_ELT (m_store_info, i, info)
3046     {
3047       unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3048 
3049       if (i <= ignore)
3050 	goto done;
3051 
3052       while (first_earlier < end_earlier
3053 	     && (m_store_info[first_earlier]->bitpos
3054 		 + m_store_info[first_earlier]->bitsize
3055 		 <= merged_store->start))
3056 	first_earlier++;
3057 
3058       /* First try to handle group of stores like:
3059 	 p[0] = data >> 24;
3060 	 p[1] = data >> 16;
3061 	 p[2] = data >> 8;
3062 	 p[3] = data;
3063 	 using the bswap framework.  */
3064       if (info->bitpos == merged_store->start + merged_store->width
3065 	  && merged_store->stores.length () == 1
3066 	  && merged_store->stores[0]->ins_stmt != NULL
3067 	  && info->lp_nr == merged_store->lp_nr
3068 	  && info->ins_stmt != NULL)
3069 	{
3070 	  unsigned int try_size;
3071 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
3072 	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
3073 				    first_earlier))
3074 	      break;
3075 
3076 	  if (try_size >= 16)
3077 	    {
3078 	      ignore = i + merged_store->stores.length () - 1;
3079 	      m_merged_store_groups.safe_push (merged_store);
3080 	      if (ignore < m_store_info.length ())
3081 		{
3082 		  merged_store = new merged_store_group (m_store_info[ignore]);
3083 		  end_earlier = ignore;
3084 		}
3085 	      else
3086 		merged_store = NULL;
3087 	      goto done;
3088 	    }
3089 	}
3090 
3091       new_bitregion_start
3092 	= MIN (merged_store->bitregion_start, info->bitregion_start);
3093       new_bitregion_end
3094 	= MAX (merged_store->bitregion_end, info->bitregion_end);
3095 
3096       if (info->order >= merged_store->first_nonmergeable_order
3097 	  || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
3098 	      > (unsigned) param_store_merging_max_size))
3099 	;
3100 
3101       /* |---store 1---|
3102 	       |---store 2---|
3103 	 Overlapping stores.  */
3104       else if (IN_RANGE (info->bitpos, merged_store->start,
3105 			 merged_store->start + merged_store->width - 1)
3106 	       /* |---store 1---||---store 2---|
3107 		  Handle also the consecutive INTEGER_CST stores case here,
3108 		  as we have here the code to deal with overlaps.  */
3109 	       || (info->bitregion_start <= merged_store->bitregion_end
3110 		   && info->rhs_code == INTEGER_CST
3111 		   && merged_store->only_constants
3112 		   && merged_store->can_be_merged_into (info)))
3113 	{
3114 	  /* Only allow overlapping stores of constants.  */
3115 	  if (info->rhs_code == INTEGER_CST
3116 	      && merged_store->only_constants
3117 	      && info->lp_nr == merged_store->lp_nr)
3118 	    {
3119 	      unsigned int first_order
3120 		= MIN (merged_store->first_order, info->order);
3121 	      unsigned int last_order
3122 		= MAX (merged_store->last_order, info->order);
3123 	      unsigned HOST_WIDE_INT end
3124 		= MAX (merged_store->start + merged_store->width,
3125 		       info->bitpos + info->bitsize);
3126 	      if (check_no_overlap (m_store_info, i, true, first_order,
3127 				    last_order, merged_store->start, end,
3128 				    first_earlier, end_earlier))
3129 		{
3130 		  /* check_no_overlap call above made sure there are no
3131 		     overlapping stores with non-INTEGER_CST rhs_code
3132 		     in between the first and last of the stores we've
3133 		     just merged.  If there are any INTEGER_CST rhs_code
3134 		     stores in between, we need to merge_overlapping them
3135 		     even if in the sort_by_bitpos order there are other
3136 		     overlapping stores in between.  Keep those stores as is.
3137 		     Example:
3138 			MEM[(int *)p_28] = 0;
3139 			MEM[(char *)p_28 + 3B] = 1;
3140 			MEM[(char *)p_28 + 1B] = 2;
3141 			MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3142 		     We can't merge the zero store with the store of two and
3143 		     not merge anything else, because the store of one is
3144 		     in the original order in between those two, but in
3145 		     store_by_bitpos order it comes after the last store that
3146 		     we can't merge with them.  We can merge the first 3 stores
3147 		     and keep the last store as is though.  */
3148 		  unsigned int len = m_store_info.length ();
3149 		  unsigned int try_order = last_order;
3150 		  unsigned int first_nonmergeable_order;
3151 		  unsigned int k;
3152 		  bool last_iter = false;
3153 		  int attempts = 0;
3154 		  do
3155 		    {
3156 		      unsigned int max_order = 0;
3157 		      unsigned int min_order = first_order;
3158 		      unsigned first_nonmergeable_int_order = ~0U;
3159 		      unsigned HOST_WIDE_INT this_end = end;
3160 		      k = i;
3161 		      first_nonmergeable_order = ~0U;
3162 		      for (unsigned int j = i + 1; j < len; ++j)
3163 			{
3164 			  store_immediate_info *info2 = m_store_info[j];
3165 			  if (info2->bitpos >= this_end)
3166 			    break;
3167 			  if (info2->order < try_order)
3168 			    {
3169 			      if (info2->rhs_code != INTEGER_CST
3170 				  || info2->lp_nr != merged_store->lp_nr)
3171 				{
3172 				  /* Normally check_no_overlap makes sure this
3173 				     doesn't happen, but if end grows below,
3174 				     then we need to process more stores than
3175 				     check_no_overlap verified.  Example:
3176 				      MEM[(int *)p_5] = 0;
3177 				      MEM[(short *)p_5 + 3B] = 1;
3178 				      MEM[(char *)p_5 + 4B] = _9;
3179 				      MEM[(char *)p_5 + 2B] = 2;  */
3180 				  k = 0;
3181 				  break;
3182 				}
3183 			      k = j;
3184 			      min_order = MIN (min_order, info2->order);
3185 			      this_end = MAX (this_end,
3186 					      info2->bitpos + info2->bitsize);
3187 			    }
3188 			  else if (info2->rhs_code == INTEGER_CST
3189 				   && info2->lp_nr == merged_store->lp_nr
3190 				   && !last_iter)
3191 			    {
3192 			      max_order = MAX (max_order, info2->order + 1);
3193 			      first_nonmergeable_int_order
3194 				= MIN (first_nonmergeable_int_order,
3195 				       info2->order);
3196 			    }
3197 			  else
3198 			    first_nonmergeable_order
3199 			      = MIN (first_nonmergeable_order, info2->order);
3200 			}
3201 		      if (k > i
3202 			  && !check_no_overlap (m_store_info, len - 1, true,
3203 						min_order, try_order,
3204 						merged_store->start, this_end,
3205 						first_earlier, end_earlier))
3206 			k = 0;
3207 		      if (k == 0)
3208 			{
3209 			  if (last_order == try_order)
3210 			    break;
3211 			  /* If this failed, but only because we grew
3212 			     try_order, retry with the last working one,
3213 			     so that we merge at least something.  */
3214 			  try_order = last_order;
3215 			  last_iter = true;
3216 			  continue;
3217 			}
3218 		      last_order = try_order;
3219 		      /* Retry with a larger try_order to see if we could
3220 			 merge some further INTEGER_CST stores.  */
3221 		      if (max_order
3222 			  && (first_nonmergeable_int_order
3223 			      < first_nonmergeable_order))
3224 			{
3225 			  try_order = MIN (max_order,
3226 					   first_nonmergeable_order);
3227 			  try_order
3228 			    = MIN (try_order,
3229 				   merged_store->first_nonmergeable_order);
3230 			  if (try_order > last_order && ++attempts < 16)
3231 			    continue;
3232 			}
3233 		      first_nonmergeable_order
3234 			= MIN (first_nonmergeable_order,
3235 			       first_nonmergeable_int_order);
3236 		      end = this_end;
3237 		      break;
3238 		    }
3239 		  while (1);
3240 
3241 		  if (k != 0)
3242 		    {
3243 		      merged_store->merge_overlapping (info);
3244 
3245 		      merged_store->first_nonmergeable_order
3246 			= MIN (merged_store->first_nonmergeable_order,
3247 			       first_nonmergeable_order);
3248 
3249 		      for (unsigned int j = i + 1; j <= k; j++)
3250 			{
3251 			  store_immediate_info *info2 = m_store_info[j];
3252 			  gcc_assert (info2->bitpos < end);
3253 			  if (info2->order < last_order)
3254 			    {
3255 			      gcc_assert (info2->rhs_code == INTEGER_CST);
3256 			      if (info != info2)
3257 				merged_store->merge_overlapping (info2);
3258 			    }
3259 			  /* Other stores are kept and not merged in any
3260 			     way.  */
3261 			}
3262 		      ignore = k;
3263 		      goto done;
3264 		    }
3265 		}
3266 	    }
3267 	}
3268       /* |---store 1---||---store 2---|
3269 	 This store is consecutive to the previous one.
3270 	 Merge it into the current store group.  There can be gaps in between
3271 	 the stores, but there can't be gaps in between bitregions.  */
3272       else if (info->bitregion_start <= merged_store->bitregion_end
3273 	       && merged_store->can_be_merged_into (info))
3274 	{
3275 	  store_immediate_info *infof = merged_store->stores[0];
3276 
3277 	  /* All the rhs_code ops that take 2 operands are commutative,
3278 	     swap the operands if it could make the operands compatible.  */
3279 	  if (infof->ops[0].base_addr
3280 	      && infof->ops[1].base_addr
3281 	      && info->ops[0].base_addr
3282 	      && info->ops[1].base_addr
3283 	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3284 			   info->bitpos - infof->bitpos)
3285 	      && operand_equal_p (info->ops[1].base_addr,
3286 				  infof->ops[0].base_addr, 0))
3287 	    {
3288 	      std::swap (info->ops[0], info->ops[1]);
3289 	      info->ops_swapped_p = true;
3290 	    }
3291 	  if (check_no_overlap (m_store_info, i, false,
3292 				MIN (merged_store->first_order, info->order),
3293 				MAX (merged_store->last_order, info->order),
3294 				merged_store->start,
3295 				MAX (merged_store->start + merged_store->width,
3296 				     info->bitpos + info->bitsize),
3297 				first_earlier, end_earlier))
3298 	    {
3299 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
3300 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3301 		{
3302 		  info->rhs_code = BIT_INSERT_EXPR;
3303 		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3304 		  info->ops[0].base_addr = NULL_TREE;
3305 		}
3306 	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3307 		{
3308 		  for (store_immediate_info *infoj : merged_store->stores)
3309 		    {
3310 		      infoj->rhs_code = BIT_INSERT_EXPR;
3311 		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3312 		      infoj->ops[0].base_addr = NULL_TREE;
3313 		    }
3314 		  merged_store->bit_insertion = true;
3315 		}
3316 	      if ((infof->ops[0].base_addr
3317 		   ? compatible_load_p (merged_store, info, base_addr, 0)
3318 		   : !info->ops[0].base_addr)
3319 		  && (infof->ops[1].base_addr
3320 		      ? compatible_load_p (merged_store, info, base_addr, 1)
3321 		      : !info->ops[1].base_addr))
3322 		{
3323 		  merged_store->merge_into (info);
3324 		  goto done;
3325 		}
3326 	    }
3327 	}
3328 
3329       /* |---store 1---| <gap> |---store 2---|.
3330 	 Gap between stores or the rhs not compatible.  Start a new group.  */
3331 
3332       /* Try to apply all the stores recorded for the group to determine
3333 	 the bitpattern they write and discard it if that fails.
3334 	 This will also reject single-store groups.  */
3335       if (merged_store->apply_stores ())
3336 	m_merged_store_groups.safe_push (merged_store);
3337       else
3338 	delete merged_store;
3339 
3340       merged_store = new merged_store_group (info);
3341       end_earlier = i;
3342       if (dump_file && (dump_flags & TDF_DETAILS))
3343 	fputs ("New store group\n", dump_file);
3344 
3345     done:
3346       if (dump_file && (dump_flags & TDF_DETAILS))
3347 	{
3348 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3349 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3350 		   i, info->bitsize, info->bitpos);
3351 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3352 	  fputc ('\n', dump_file);
3353 	}
3354     }
3355 
3356   /* Record or discard the last store group.  */
3357   if (merged_store)
3358     {
3359       if (merged_store->apply_stores ())
3360 	m_merged_store_groups.safe_push (merged_store);
3361       else
3362 	delete merged_store;
3363     }
3364 
3365   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3366 
3367   bool success
3368     = !m_merged_store_groups.is_empty ()
3369       && m_merged_store_groups.length () < m_store_info.length ();
3370 
3371   if (success && dump_file)
3372     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3373 	     m_merged_store_groups.length ());
3374 
3375   return success;
3376 }
3377 
3378 /* Return the type to use for the merged stores or loads described by STMTS.
3379    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3380    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3381    of the MEM_REFs if any.  */
3382 
3383 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3384 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3385 			  unsigned short *cliquep, unsigned short *basep)
3386 {
3387   gimple *stmt;
3388   unsigned int i;
3389   tree type = NULL_TREE;
3390   tree ret = NULL_TREE;
3391   *cliquep = 0;
3392   *basep = 0;
3393 
3394   FOR_EACH_VEC_ELT (stmts, i, stmt)
3395     {
3396       tree ref = is_load ? gimple_assign_rhs1 (stmt)
3397 			 : gimple_assign_lhs (stmt);
3398       tree type1 = reference_alias_ptr_type (ref);
3399       tree base = get_base_address (ref);
3400 
3401       if (i == 0)
3402 	{
3403 	  if (TREE_CODE (base) == MEM_REF)
3404 	    {
3405 	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
3406 	      *basep = MR_DEPENDENCE_BASE (base);
3407 	    }
3408 	  ret = type = type1;
3409 	  continue;
3410 	}
3411       if (!alias_ptr_types_compatible_p (type, type1))
3412 	ret = ptr_type_node;
3413       if (TREE_CODE (base) != MEM_REF
3414 	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3415 	  || *basep != MR_DEPENDENCE_BASE (base))
3416 	{
3417 	  *cliquep = 0;
3418 	  *basep = 0;
3419 	}
3420     }
3421   return ret;
3422 }
3423 
3424 /* Return the location_t information we can find among the statements
3425    in STMTS.  */
3426 
3427 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3428 get_location_for_stmts (vec<gimple *> &stmts)
3429 {
3430   for (gimple *stmt : stmts)
3431     if (gimple_has_location (stmt))
3432       return gimple_location (stmt);
3433 
3434   return UNKNOWN_LOCATION;
3435 }
3436 
3437 /* Used to decribe a store resulting from splitting a wide store in smaller
3438    regularly-sized stores in split_group.  */
3439 
3440 class split_store
3441 {
3442 public:
3443   unsigned HOST_WIDE_INT bytepos;
3444   unsigned HOST_WIDE_INT size;
3445   unsigned HOST_WIDE_INT align;
3446   auto_vec<store_immediate_info *> orig_stores;
3447   /* True if there is a single orig stmt covering the whole split store.  */
3448   bool orig;
3449   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3450 	       unsigned HOST_WIDE_INT);
3451 };
3452 
3453 /* Simple constructor.  */
3454 
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3455 split_store::split_store (unsigned HOST_WIDE_INT bp,
3456 			  unsigned HOST_WIDE_INT sz,
3457 			  unsigned HOST_WIDE_INT al)
3458 			  : bytepos (bp), size (sz), align (al), orig (false)
3459 {
3460   orig_stores.create (0);
3461 }
3462 
3463 /* Record all stores in GROUP that write to the region starting at BITPOS and
3464    is of size BITSIZE.  Record infos for such statements in STORES if
3465    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3466    if there is exactly one original store in the range (in that case ignore
3467    clobber stmts, unless there are only clobber stmts).  */
3468 
3469 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3470 find_constituent_stores (class merged_store_group *group,
3471 			 vec<store_immediate_info *> *stores,
3472 			 unsigned int *first,
3473 			 unsigned HOST_WIDE_INT bitpos,
3474 			 unsigned HOST_WIDE_INT bitsize)
3475 {
3476   store_immediate_info *info, *ret = NULL;
3477   unsigned int i;
3478   bool second = false;
3479   bool update_first = true;
3480   unsigned HOST_WIDE_INT end = bitpos + bitsize;
3481   for (i = *first; group->stores.iterate (i, &info); ++i)
3482     {
3483       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3484       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3485       if (stmt_end <= bitpos)
3486 	{
3487 	  /* BITPOS passed to this function never decreases from within the
3488 	     same split_group call, so optimize and don't scan info records
3489 	     which are known to end before or at BITPOS next time.
3490 	     Only do it if all stores before this one also pass this.  */
3491 	  if (update_first)
3492 	    *first = i + 1;
3493 	  continue;
3494 	}
3495       else
3496 	update_first = false;
3497 
3498       /* The stores in GROUP are ordered by bitposition so if we're past
3499 	 the region for this group return early.  */
3500       if (stmt_start >= end)
3501 	return ret;
3502 
3503       if (gimple_clobber_p (info->stmt))
3504 	{
3505 	  if (stores)
3506 	    stores->safe_push (info);
3507 	  if (ret == NULL)
3508 	    ret = info;
3509 	  continue;
3510 	}
3511       if (stores)
3512 	{
3513 	  stores->safe_push (info);
3514 	  if (ret && !gimple_clobber_p (ret->stmt))
3515 	    {
3516 	      ret = NULL;
3517 	      second = true;
3518 	    }
3519 	}
3520       else if (ret && !gimple_clobber_p (ret->stmt))
3521 	return NULL;
3522       if (!second)
3523 	ret = info;
3524     }
3525   return ret;
3526 }
3527 
3528 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3529    store have multiple uses.  If any SSA_NAME has multiple uses, also
3530    count statements needed to compute it.  */
3531 
3532 static unsigned
count_multiple_uses(store_immediate_info * info)3533 count_multiple_uses (store_immediate_info *info)
3534 {
3535   gimple *stmt = info->stmt;
3536   unsigned ret = 0;
3537   switch (info->rhs_code)
3538     {
3539     case INTEGER_CST:
3540     case STRING_CST:
3541       return 0;
3542     case BIT_AND_EXPR:
3543     case BIT_IOR_EXPR:
3544     case BIT_XOR_EXPR:
3545       if (info->bit_not_p)
3546 	{
3547 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3548 	    ret = 1; /* Fall through below to return
3549 			the BIT_NOT_EXPR stmt and then
3550 			BIT_{AND,IOR,XOR}_EXPR and anything it
3551 			uses.  */
3552 	  else
3553 	    /* stmt is after this the BIT_NOT_EXPR.  */
3554 	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3555 	}
3556       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3557 	{
3558 	  ret += 1 + info->ops[0].bit_not_p;
3559 	  if (info->ops[1].base_addr)
3560 	    ret += 1 + info->ops[1].bit_not_p;
3561 	  return ret + 1;
3562 	}
3563       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3564       /* stmt is now the BIT_*_EXPR.  */
3565       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3566 	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3567       else if (info->ops[info->ops_swapped_p].bit_not_p)
3568 	{
3569 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3570 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3571 	    ++ret;
3572 	}
3573       if (info->ops[1].base_addr == NULL_TREE)
3574 	{
3575 	  gcc_checking_assert (!info->ops_swapped_p);
3576 	  return ret;
3577 	}
3578       if (!has_single_use (gimple_assign_rhs2 (stmt)))
3579 	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3580       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3581 	{
3582 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3583 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3584 	    ++ret;
3585 	}
3586       return ret;
3587     case MEM_REF:
3588       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3589 	return 1 + info->ops[0].bit_not_p;
3590       else if (info->ops[0].bit_not_p)
3591 	{
3592 	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3593 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3594 	    return 1;
3595 	}
3596       return 0;
3597     case BIT_INSERT_EXPR:
3598       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3599     default:
3600       gcc_unreachable ();
3601     }
3602 }
3603 
3604 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3605    vector (if non-NULL) with split_store structs describing the byte offset
3606    (from the base), the bit size and alignment of each store as well as the
3607    original statements involved in each such split group.
3608    This is to separate the splitting strategy from the statement
3609    building/emission/linking done in output_merged_store.
3610    Return number of new stores.
3611    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3612    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3613    BZERO_FIRST may be true only when the first store covers the whole group
3614    and clears it; if BZERO_FIRST is true, keep that first store in the set
3615    unmodified and emit further stores for the overrides only.
3616    If SPLIT_STORES is NULL, it is just a dry run to count number of
3617    new stores.  */
3618 
3619 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3620 split_group (merged_store_group *group, bool allow_unaligned_store,
3621 	     bool allow_unaligned_load, bool bzero_first,
3622 	     vec<split_store *> *split_stores,
3623 	     unsigned *total_orig,
3624 	     unsigned *total_new)
3625 {
3626   unsigned HOST_WIDE_INT pos = group->bitregion_start;
3627   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3628   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3629   unsigned HOST_WIDE_INT group_align = group->align;
3630   unsigned HOST_WIDE_INT align_base = group->align_base;
3631   unsigned HOST_WIDE_INT group_load_align = group_align;
3632   bool any_orig = false;
3633 
3634   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3635 
3636   /* For bswap framework using sets of stores, all the checking has been done
3637      earlier in try_coalesce_bswap and the result always needs to be emitted
3638      as a single store.  Likewise for string concatenation.  */
3639   if (group->stores[0]->rhs_code == LROTATE_EXPR
3640       || group->stores[0]->rhs_code == NOP_EXPR
3641       || group->string_concatenation)
3642     {
3643       gcc_assert (!bzero_first);
3644       if (total_orig)
3645 	{
3646 	  /* Avoid the old/new stmt count heuristics.  It should be
3647 	     always beneficial.  */
3648 	  total_new[0] = 1;
3649 	  total_orig[0] = 2;
3650 	}
3651 
3652       if (split_stores)
3653 	{
3654 	  unsigned HOST_WIDE_INT align_bitpos
3655 	    = (group->start - align_base) & (group_align - 1);
3656 	  unsigned HOST_WIDE_INT align = group_align;
3657 	  if (align_bitpos)
3658 	    align = least_bit_hwi (align_bitpos);
3659 	  bytepos = group->start / BITS_PER_UNIT;
3660 	  split_store *store
3661 	    = new split_store (bytepos, group->width, align);
3662 	  unsigned int first = 0;
3663 	  find_constituent_stores (group, &store->orig_stores,
3664 				   &first, group->start, group->width);
3665 	  split_stores->safe_push (store);
3666 	}
3667 
3668       return 1;
3669     }
3670 
3671   unsigned int ret = 0, first = 0;
3672   unsigned HOST_WIDE_INT try_pos = bytepos;
3673 
3674   if (total_orig)
3675     {
3676       unsigned int i;
3677       store_immediate_info *info = group->stores[0];
3678 
3679       total_new[0] = 0;
3680       total_orig[0] = 1; /* The orig store.  */
3681       info = group->stores[0];
3682       if (info->ops[0].base_addr)
3683 	total_orig[0]++;
3684       if (info->ops[1].base_addr)
3685 	total_orig[0]++;
3686       switch (info->rhs_code)
3687 	{
3688 	case BIT_AND_EXPR:
3689 	case BIT_IOR_EXPR:
3690 	case BIT_XOR_EXPR:
3691 	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3692 	  break;
3693 	default:
3694 	  break;
3695 	}
3696       total_orig[0] *= group->stores.length ();
3697 
3698       FOR_EACH_VEC_ELT (group->stores, i, info)
3699 	{
3700 	  total_new[0] += count_multiple_uses (info);
3701 	  total_orig[0] += (info->bit_not_p
3702 			    + info->ops[0].bit_not_p
3703 			    + info->ops[1].bit_not_p);
3704 	}
3705     }
3706 
3707   if (!allow_unaligned_load)
3708     for (int i = 0; i < 2; ++i)
3709       if (group->load_align[i])
3710 	group_load_align = MIN (group_load_align, group->load_align[i]);
3711 
3712   if (bzero_first)
3713     {
3714       store_immediate_info *gstore;
3715       FOR_EACH_VEC_ELT (group->stores, first, gstore)
3716 	if (!gimple_clobber_p (gstore->stmt))
3717 	  break;
3718       ++first;
3719       ret = 1;
3720       if (split_stores)
3721 	{
3722 	  split_store *store
3723 	    = new split_store (bytepos, gstore->bitsize, align_base);
3724 	  store->orig_stores.safe_push (gstore);
3725 	  store->orig = true;
3726 	  any_orig = true;
3727 	  split_stores->safe_push (store);
3728 	}
3729     }
3730 
3731   while (size > 0)
3732     {
3733       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3734 	  && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3735 	      || (bzero_first && group->val[try_pos - bytepos] == 0)))
3736 	{
3737 	  /* Skip padding bytes.  */
3738 	  ++try_pos;
3739 	  size -= BITS_PER_UNIT;
3740 	  continue;
3741 	}
3742 
3743       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3744       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3745       unsigned HOST_WIDE_INT align_bitpos
3746 	= (try_bitpos - align_base) & (group_align - 1);
3747       unsigned HOST_WIDE_INT align = group_align;
3748       bool found_orig = false;
3749       if (align_bitpos)
3750 	align = least_bit_hwi (align_bitpos);
3751       if (!allow_unaligned_store)
3752 	try_size = MIN (try_size, align);
3753       if (!allow_unaligned_load)
3754 	{
3755 	  /* If we can't do or don't want to do unaligned stores
3756 	     as well as loads, we need to take the loads into account
3757 	     as well.  */
3758 	  unsigned HOST_WIDE_INT load_align = group_load_align;
3759 	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3760 	  if (align_bitpos)
3761 	    load_align = least_bit_hwi (align_bitpos);
3762 	  for (int i = 0; i < 2; ++i)
3763 	    if (group->load_align[i])
3764 	      {
3765 		align_bitpos
3766 		  = known_alignment (try_bitpos
3767 				     - group->stores[0]->bitpos
3768 				     + group->stores[0]->ops[i].bitpos
3769 				     - group->load_align_base[i]);
3770 		if (align_bitpos & (group_load_align - 1))
3771 		  {
3772 		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3773 		    load_align = MIN (load_align, a);
3774 		  }
3775 	      }
3776 	  try_size = MIN (try_size, load_align);
3777 	}
3778       store_immediate_info *info
3779 	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3780       if (info && !gimple_clobber_p (info->stmt))
3781 	{
3782 	  /* If there is just one original statement for the range, see if
3783 	     we can just reuse the original store which could be even larger
3784 	     than try_size.  */
3785 	  unsigned HOST_WIDE_INT stmt_end
3786 	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3787 	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3788 					  stmt_end - try_bitpos);
3789 	  if (info && info->bitpos >= try_bitpos)
3790 	    {
3791 	      store_immediate_info *info2 = NULL;
3792 	      unsigned int first_copy = first;
3793 	      if (info->bitpos > try_bitpos
3794 		  && stmt_end - try_bitpos <= try_size)
3795 		{
3796 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3797 						   try_bitpos,
3798 						   info->bitpos - try_bitpos);
3799 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3800 		}
3801 	      if (info2 == NULL && stmt_end - try_bitpos < try_size)
3802 		{
3803 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3804 						   stmt_end,
3805 						   (try_bitpos + try_size)
3806 						   - stmt_end);
3807 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3808 		}
3809 	      if (info2 == NULL)
3810 		{
3811 		  try_size = stmt_end - try_bitpos;
3812 		  found_orig = true;
3813 		  goto found;
3814 		}
3815 	    }
3816 	}
3817 
3818       /* Approximate store bitsize for the case when there are no padding
3819 	 bits.  */
3820       while (try_size > size)
3821 	try_size /= 2;
3822       /* Now look for whole padding bytes at the end of that bitsize.  */
3823       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3824 	if (group->mask[try_pos - bytepos + nonmasked - 1]
3825 	    != (unsigned char) ~0U
3826 	    && (!bzero_first
3827 		|| group->val[try_pos - bytepos + nonmasked - 1] != 0))
3828 	  break;
3829       if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3830 	{
3831 	  /* If entire try_size range is padding, skip it.  */
3832 	  try_pos += try_size / BITS_PER_UNIT;
3833 	  size -= try_size;
3834 	  continue;
3835 	}
3836       /* Otherwise try to decrease try_size if second half, last 3 quarters
3837 	 etc. are padding.  */
3838       nonmasked *= BITS_PER_UNIT;
3839       while (nonmasked <= try_size / 2)
3840 	try_size /= 2;
3841       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3842 	{
3843 	  /* Now look for whole padding bytes at the start of that bitsize.  */
3844 	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3845 	  for (masked = 0; masked < try_bytesize; ++masked)
3846 	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3847 		&& (!bzero_first
3848 		    || group->val[try_pos - bytepos + masked] != 0))
3849 	      break;
3850 	  masked *= BITS_PER_UNIT;
3851 	  gcc_assert (masked < try_size);
3852 	  if (masked >= try_size / 2)
3853 	    {
3854 	      while (masked >= try_size / 2)
3855 		{
3856 		  try_size /= 2;
3857 		  try_pos += try_size / BITS_PER_UNIT;
3858 		  size -= try_size;
3859 		  masked -= try_size;
3860 		}
3861 	      /* Need to recompute the alignment, so just retry at the new
3862 		 position.  */
3863 	      continue;
3864 	    }
3865 	}
3866 
3867     found:
3868       ++ret;
3869 
3870       if (split_stores)
3871 	{
3872 	  split_store *store
3873 	    = new split_store (try_pos, try_size, align);
3874 	  info = find_constituent_stores (group, &store->orig_stores,
3875 					  &first, try_bitpos, try_size);
3876 	  if (info
3877 	      && !gimple_clobber_p (info->stmt)
3878 	      && info->bitpos >= try_bitpos
3879 	      && info->bitpos + info->bitsize <= try_bitpos + try_size
3880 	      && (store->orig_stores.length () == 1
3881 		  || found_orig
3882 		  || (info->bitpos == try_bitpos
3883 		      && (info->bitpos + info->bitsize
3884 			  == try_bitpos + try_size))))
3885 	    {
3886 	      store->orig = true;
3887 	      any_orig = true;
3888 	    }
3889 	  split_stores->safe_push (store);
3890 	}
3891 
3892       try_pos += try_size / BITS_PER_UNIT;
3893       size -= try_size;
3894     }
3895 
3896   if (total_orig)
3897     {
3898       unsigned int i;
3899       split_store *store;
3900       /* If we are reusing some original stores and any of the
3901 	 original SSA_NAMEs had multiple uses, we need to subtract
3902 	 those now before we add the new ones.  */
3903       if (total_new[0] && any_orig)
3904 	{
3905 	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3906 	    if (store->orig)
3907 	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3908 	}
3909       total_new[0] += ret; /* The new store.  */
3910       store_immediate_info *info = group->stores[0];
3911       if (info->ops[0].base_addr)
3912 	total_new[0] += ret;
3913       if (info->ops[1].base_addr)
3914 	total_new[0] += ret;
3915       switch (info->rhs_code)
3916 	{
3917 	case BIT_AND_EXPR:
3918 	case BIT_IOR_EXPR:
3919 	case BIT_XOR_EXPR:
3920 	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3921 	  break;
3922 	default:
3923 	  break;
3924 	}
3925       FOR_EACH_VEC_ELT (*split_stores, i, store)
3926 	{
3927 	  unsigned int j;
3928 	  bool bit_not_p[3] = { false, false, false };
3929 	  /* If all orig_stores have certain bit_not_p set, then
3930 	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3931 	     If some orig_stores have certain bit_not_p set, then
3932 	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3933 	     it.  */
3934 	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3935 	    {
3936 	      if (info->ops[0].bit_not_p)
3937 		bit_not_p[0] = true;
3938 	      if (info->ops[1].bit_not_p)
3939 		bit_not_p[1] = true;
3940 	      if (info->bit_not_p)
3941 		bit_not_p[2] = true;
3942 	    }
3943 	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3944 	}
3945 
3946     }
3947 
3948   return ret;
3949 }
3950 
3951 /* Return the operation through which the operand IDX (if < 2) or
3952    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3953    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3954    the bits should be xored with mask.  */
3955 
3956 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3957 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3958 {
3959   unsigned int i;
3960   store_immediate_info *info;
3961   unsigned int cnt = 0;
3962   bool any_paddings = false;
3963   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3964     {
3965       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3966       if (bit_not_p)
3967 	{
3968 	  ++cnt;
3969 	  tree lhs = gimple_assign_lhs (info->stmt);
3970 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3971 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3972 	    any_paddings = true;
3973 	}
3974     }
3975   mask = NULL_TREE;
3976   if (cnt == 0)
3977     return NOP_EXPR;
3978   if (cnt == split_store->orig_stores.length () && !any_paddings)
3979     return BIT_NOT_EXPR;
3980 
3981   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3982   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3983   unsigned char *buf
3984     = XALLOCAVEC (unsigned char, buf_size);
3985   memset (buf, ~0U, buf_size);
3986   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3987     {
3988       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3989       if (!bit_not_p)
3990 	continue;
3991       /* Clear regions with bit_not_p and invert afterwards, rather than
3992 	 clear regions with !bit_not_p, so that gaps in between stores aren't
3993 	 set in the mask.  */
3994       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3995       unsigned HOST_WIDE_INT prec = bitsize;
3996       unsigned int pos_in_buffer = 0;
3997       if (any_paddings)
3998 	{
3999 	  tree lhs = gimple_assign_lhs (info->stmt);
4000 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4001 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4002 	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
4003 	}
4004       if (info->bitpos < try_bitpos)
4005 	{
4006 	  gcc_assert (info->bitpos + bitsize > try_bitpos);
4007 	  if (!BYTES_BIG_ENDIAN)
4008 	    {
4009 	      if (prec <= try_bitpos - info->bitpos)
4010 		continue;
4011 	      prec -= try_bitpos - info->bitpos;
4012 	    }
4013 	  bitsize -= try_bitpos - info->bitpos;
4014 	  if (BYTES_BIG_ENDIAN && prec > bitsize)
4015 	    prec = bitsize;
4016 	}
4017       else
4018 	pos_in_buffer = info->bitpos - try_bitpos;
4019       if (prec < bitsize)
4020 	{
4021 	  /* If this is a bool inversion, invert just the least significant
4022 	     prec bits rather than all bits of it.  */
4023 	  if (BYTES_BIG_ENDIAN)
4024 	    {
4025 	      pos_in_buffer += bitsize - prec;
4026 	      if (pos_in_buffer >= split_store->size)
4027 		continue;
4028 	    }
4029 	  bitsize = prec;
4030 	}
4031       if (pos_in_buffer + bitsize > split_store->size)
4032 	bitsize = split_store->size - pos_in_buffer;
4033       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4034       if (BYTES_BIG_ENDIAN)
4035 	clear_bit_region_be (p, (BITS_PER_UNIT - 1
4036 				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4037       else
4038 	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4039     }
4040   for (unsigned int i = 0; i < buf_size; ++i)
4041     buf[i] = ~buf[i];
4042   mask = native_interpret_expr (int_type, buf, buf_size);
4043   return BIT_XOR_EXPR;
4044 }
4045 
4046 /* Given a merged store group GROUP output the widened version of it.
4047    The store chain is against the base object BASE.
4048    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4049    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4050    Make sure that the number of statements output is less than the number of
4051    original statements.  If a better sequence is possible emit it and
4052    return true.  */
4053 
4054 bool
output_merged_store(merged_store_group * group)4055 imm_store_chain_info::output_merged_store (merged_store_group *group)
4056 {
4057   const unsigned HOST_WIDE_INT start_byte_pos
4058     = group->bitregion_start / BITS_PER_UNIT;
4059   unsigned int orig_num_stmts = group->stores.length ();
4060   if (orig_num_stmts < 2)
4061     return false;
4062 
4063   bool allow_unaligned_store
4064     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4065   bool allow_unaligned_load = allow_unaligned_store;
4066   bool bzero_first = false;
4067   store_immediate_info *store;
4068   unsigned int num_clobber_stmts = 0;
4069   if (group->stores[0]->rhs_code == INTEGER_CST)
4070     {
4071       unsigned int i;
4072       FOR_EACH_VEC_ELT (group->stores, i, store)
4073 	if (gimple_clobber_p (store->stmt))
4074 	  num_clobber_stmts++;
4075 	else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4076 		 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4077 		 && group->start == store->bitpos
4078 		 && group->width == store->bitsize
4079 		 && (group->start % BITS_PER_UNIT) == 0
4080 		 && (group->width % BITS_PER_UNIT) == 0)
4081 	  {
4082 	    bzero_first = true;
4083 	    break;
4084 	  }
4085 	else
4086 	  break;
4087       FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4088 	if (gimple_clobber_p (store->stmt))
4089 	  num_clobber_stmts++;
4090       if (num_clobber_stmts == orig_num_stmts)
4091 	return false;
4092       orig_num_stmts -= num_clobber_stmts;
4093     }
4094   if (allow_unaligned_store || bzero_first)
4095     {
4096       /* If unaligned stores are allowed, see how many stores we'd emit
4097 	 for unaligned and how many stores we'd emit for aligned stores.
4098 	 Only use unaligned stores if it allows fewer stores than aligned.
4099 	 Similarly, if there is a whole region clear first, prefer expanding
4100 	 it together compared to expanding clear first followed by merged
4101 	 further stores.  */
4102       unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
4103       int pass_min = 0;
4104       for (int pass = 0; pass < 4; ++pass)
4105 	{
4106 	  if (!allow_unaligned_store && (pass & 1) != 0)
4107 	    continue;
4108 	  if (!bzero_first && (pass & 2) != 0)
4109 	    continue;
4110 	  cnt[pass] = split_group (group, (pass & 1) != 0,
4111 				   allow_unaligned_load, (pass & 2) != 0,
4112 				   NULL, NULL, NULL);
4113 	  if (cnt[pass] < cnt[pass_min])
4114 	    pass_min = pass;
4115 	}
4116       if ((pass_min & 1) == 0)
4117 	allow_unaligned_store = false;
4118       if ((pass_min & 2) == 0)
4119 	bzero_first = false;
4120     }
4121 
4122   auto_vec<class split_store *, 32> split_stores;
4123   split_store *split_store;
4124   unsigned total_orig, total_new, i;
4125   split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
4126 	       &split_stores, &total_orig, &total_new);
4127 
4128   /* Determine if there is a clobber covering the whole group at the start,
4129      followed by proposed split stores that cover the whole group.  In that
4130      case, prefer the transformation even if
4131      split_stores.length () == orig_num_stmts.  */
4132   bool clobber_first = false;
4133   if (num_clobber_stmts
4134       && gimple_clobber_p (group->stores[0]->stmt)
4135       && group->start == group->stores[0]->bitpos
4136       && group->width == group->stores[0]->bitsize
4137       && (group->start % BITS_PER_UNIT) == 0
4138       && (group->width % BITS_PER_UNIT) == 0)
4139     {
4140       clobber_first = true;
4141       unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4142       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4143 	if (split_store->bytepos != pos)
4144 	  {
4145 	    clobber_first = false;
4146 	    break;
4147 	  }
4148 	else
4149 	  pos += split_store->size / BITS_PER_UNIT;
4150       if (pos != (group->start + group->width) / BITS_PER_UNIT)
4151 	clobber_first = false;
4152     }
4153 
4154   if (split_stores.length () >= orig_num_stmts + clobber_first)
4155     {
4156 
4157       /* We didn't manage to reduce the number of statements.  Bail out.  */
4158       if (dump_file && (dump_flags & TDF_DETAILS))
4159 	fprintf (dump_file, "Exceeded original number of stmts (%u)."
4160 			    "  Not profitable to emit new sequence.\n",
4161 		 orig_num_stmts);
4162       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4163 	delete split_store;
4164       return false;
4165     }
4166   if (total_orig <= total_new)
4167     {
4168       /* If number of estimated new statements is above estimated original
4169 	 statements, bail out too.  */
4170       if (dump_file && (dump_flags & TDF_DETAILS))
4171 	fprintf (dump_file, "Estimated number of original stmts (%u)"
4172 			    " not larger than estimated number of new"
4173 			    " stmts (%u).\n",
4174 		 total_orig, total_new);
4175       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4176 	delete split_store;
4177       return false;
4178     }
4179   if (group->stores[0]->rhs_code == INTEGER_CST)
4180     {
4181       bool all_orig = true;
4182       FOR_EACH_VEC_ELT (split_stores, i, split_store)
4183 	if (!split_store->orig)
4184 	  {
4185 	    all_orig = false;
4186 	    break;
4187 	  }
4188       if (all_orig)
4189 	{
4190 	  unsigned int cnt = split_stores.length ();
4191 	  store_immediate_info *store;
4192 	  FOR_EACH_VEC_ELT (group->stores, i, store)
4193 	    if (gimple_clobber_p (store->stmt))
4194 	      ++cnt;
4195 	  /* Punt if we wouldn't make any real changes, i.e. keep all
4196 	     orig stmts + all clobbers.  */
4197 	  if (cnt == group->stores.length ())
4198 	    {
4199 	      if (dump_file && (dump_flags & TDF_DETAILS))
4200 		fprintf (dump_file, "Exceeded original number of stmts (%u)."
4201 				    "  Not profitable to emit new sequence.\n",
4202 			 orig_num_stmts);
4203 	      FOR_EACH_VEC_ELT (split_stores, i, split_store)
4204 		delete split_store;
4205 	      return false;
4206 	    }
4207 	}
4208     }
4209 
4210   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4211   gimple_seq seq = NULL;
4212   tree last_vdef, new_vuse;
4213   last_vdef = gimple_vdef (group->last_stmt);
4214   new_vuse = gimple_vuse (group->last_stmt);
4215   tree bswap_res = NULL_TREE;
4216 
4217   /* Clobbers are not removed.  */
4218   if (gimple_clobber_p (group->last_stmt))
4219     {
4220       new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4221       gimple_set_vdef (group->last_stmt, new_vuse);
4222     }
4223 
4224   if (group->stores[0]->rhs_code == LROTATE_EXPR
4225       || group->stores[0]->rhs_code == NOP_EXPR)
4226     {
4227       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4228       gimple *ins_stmt = group->stores[0]->ins_stmt;
4229       struct symbolic_number *n = &group->stores[0]->n;
4230       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4231 
4232       switch (n->range)
4233 	{
4234 	case 16:
4235 	  load_type = bswap_type = uint16_type_node;
4236 	  break;
4237 	case 32:
4238 	  load_type = uint32_type_node;
4239 	  if (bswap)
4240 	    {
4241 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4242 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4243 	    }
4244 	  break;
4245 	case 64:
4246 	  load_type = uint64_type_node;
4247 	  if (bswap)
4248 	    {
4249 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4250 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4251 	    }
4252 	  break;
4253 	default:
4254 	  gcc_unreachable ();
4255 	}
4256 
4257       /* If the loads have each vuse of the corresponding store,
4258 	 we've checked the aliasing already in try_coalesce_bswap and
4259 	 we want to sink the need load into seq.  So need to use new_vuse
4260 	 on the load.  */
4261       if (n->base_addr)
4262 	{
4263 	  if (n->vuse == NULL)
4264 	    {
4265 	      n->vuse = new_vuse;
4266 	      ins_stmt = NULL;
4267 	    }
4268 	  else
4269 	    /* Update vuse in case it has changed by output_merged_stores.  */
4270 	    n->vuse = gimple_vuse (ins_stmt);
4271 	}
4272       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4273 				 bswap_type, load_type, n, bswap,
4274 				 ~(uint64_t) 0);
4275       gcc_assert (bswap_res);
4276     }
4277 
4278   gimple *stmt = NULL;
4279   auto_vec<gimple *, 32> orig_stmts;
4280   gimple_seq this_seq;
4281   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
4282 				      is_gimple_mem_ref_addr, NULL_TREE);
4283   gimple_seq_add_seq_without_update (&seq, this_seq);
4284 
4285   tree load_addr[2] = { NULL_TREE, NULL_TREE };
4286   gimple_seq load_seq[2] = { NULL, NULL };
4287   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4288   for (int j = 0; j < 2; ++j)
4289     {
4290       store_operand_info &op = group->stores[0]->ops[j];
4291       if (op.base_addr == NULL_TREE)
4292 	continue;
4293 
4294       store_immediate_info *infol = group->stores.last ();
4295       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4296 	{
4297 	  /* We can't pick the location randomly; while we've verified
4298 	     all the loads have the same vuse, they can be still in different
4299 	     basic blocks and we need to pick the one from the last bb:
4300 	     int x = q[0];
4301 	     if (x == N) return;
4302 	     int y = q[1];
4303 	     p[0] = x;
4304 	     p[1] = y;
4305 	     otherwise if we put the wider load at the q[0] load, we might
4306 	     segfault if q[1] is not mapped.  */
4307 	  basic_block bb = gimple_bb (op.stmt);
4308 	  gimple *ostmt = op.stmt;
4309 	  store_immediate_info *info;
4310 	  FOR_EACH_VEC_ELT (group->stores, i, info)
4311 	    {
4312 	      gimple *tstmt = info->ops[j].stmt;
4313 	      basic_block tbb = gimple_bb (tstmt);
4314 	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4315 		{
4316 		  ostmt = tstmt;
4317 		  bb = tbb;
4318 		}
4319 	    }
4320 	  load_gsi[j] = gsi_for_stmt (ostmt);
4321 	  load_addr[j]
4322 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
4323 				      &load_seq[j], is_gimple_mem_ref_addr,
4324 				      NULL_TREE);
4325 	}
4326       else if (operand_equal_p (base_addr, op.base_addr, 0))
4327 	load_addr[j] = addr;
4328       else
4329 	{
4330 	  load_addr[j]
4331 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
4332 				      &this_seq, is_gimple_mem_ref_addr,
4333 				      NULL_TREE);
4334 	  gimple_seq_add_seq_without_update (&seq, this_seq);
4335 	}
4336     }
4337 
4338   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4339     {
4340       const unsigned HOST_WIDE_INT try_size = split_store->size;
4341       const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4342       const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4343       const unsigned HOST_WIDE_INT try_align = split_store->align;
4344       const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
4345       tree dest, src;
4346       location_t loc;
4347 
4348       if (split_store->orig)
4349 	{
4350 	  /* If there is just a single non-clobber constituent store
4351 	     which covers the whole area, just reuse the lhs and rhs.  */
4352 	  gimple *orig_stmt = NULL;
4353 	  store_immediate_info *store;
4354 	  unsigned int j;
4355 	  FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4356 	    if (!gimple_clobber_p (store->stmt))
4357 	      {
4358 		orig_stmt = store->stmt;
4359 		break;
4360 	      }
4361 	  dest = gimple_assign_lhs (orig_stmt);
4362 	  src = gimple_assign_rhs1 (orig_stmt);
4363 	  loc = gimple_location (orig_stmt);
4364 	}
4365       else
4366 	{
4367 	  store_immediate_info *info;
4368 	  unsigned short clique, base;
4369 	  unsigned int k;
4370 	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4371 	    orig_stmts.safe_push (info->stmt);
4372 	  tree offset_type
4373 	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4374 	  tree dest_type;
4375 	  loc = get_location_for_stmts (orig_stmts);
4376 	  orig_stmts.truncate (0);
4377 
4378 	  if (group->string_concatenation)
4379 	    dest_type
4380 	      = build_array_type_nelts (char_type_node,
4381 					try_size / BITS_PER_UNIT);
4382 	  else
4383 	    {
4384 	      dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4385 	      dest_type = build_aligned_type (dest_type, try_align);
4386 	    }
4387 	  dest = fold_build2 (MEM_REF, dest_type, addr,
4388 			      build_int_cst (offset_type, try_pos));
4389 	  if (TREE_CODE (dest) == MEM_REF)
4390 	    {
4391 	      MR_DEPENDENCE_CLIQUE (dest) = clique;
4392 	      MR_DEPENDENCE_BASE (dest) = base;
4393 	    }
4394 
4395 	  tree mask;
4396 	  if (bswap_res || group->string_concatenation)
4397 	    mask = integer_zero_node;
4398 	  else
4399 	    mask = native_interpret_expr (dest_type,
4400 					  group->mask + try_offset,
4401 					  group->buf_size);
4402 
4403 	  tree ops[2];
4404 	  for (int j = 0;
4405 	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4406 	       ++j)
4407 	    {
4408 	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
4409 	      if (bswap_res)
4410 		ops[j] = bswap_res;
4411 	      else if (group->string_concatenation)
4412 		{
4413 		  ops[j] = build_string (try_size / BITS_PER_UNIT,
4414 					 (const char *) group->val + try_offset);
4415 		  TREE_TYPE (ops[j]) = dest_type;
4416 		}
4417 	      else if (op.base_addr)
4418 		{
4419 		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4420 		    orig_stmts.safe_push (info->ops[j].stmt);
4421 
4422 		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
4423 							  &clique, &base);
4424 		  location_t load_loc = get_location_for_stmts (orig_stmts);
4425 		  orig_stmts.truncate (0);
4426 
4427 		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
4428 		  unsigned HOST_WIDE_INT align_bitpos
4429 		    = known_alignment (try_bitpos
4430 				       - split_store->orig_stores[0]->bitpos
4431 				       + op.bitpos);
4432 		  if (align_bitpos & (load_align - 1))
4433 		    load_align = least_bit_hwi (align_bitpos);
4434 
4435 		  tree load_int_type
4436 		    = build_nonstandard_integer_type (try_size, UNSIGNED);
4437 		  load_int_type
4438 		    = build_aligned_type (load_int_type, load_align);
4439 
4440 		  poly_uint64 load_pos
4441 		    = exact_div (try_bitpos
4442 				 - split_store->orig_stores[0]->bitpos
4443 				 + op.bitpos,
4444 				 BITS_PER_UNIT);
4445 		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4446 					build_int_cst (offset_type, load_pos));
4447 		  if (TREE_CODE (ops[j]) == MEM_REF)
4448 		    {
4449 		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4450 		      MR_DEPENDENCE_BASE (ops[j]) = base;
4451 		    }
4452 		  if (!integer_zerop (mask))
4453 		    {
4454 		      /* The load might load some bits (that will be masked
4455 			 off later on) uninitialized, avoid -W*uninitialized
4456 			 warnings in that case.  */
4457 		      suppress_warning (ops[j], OPT_Wuninitialized);
4458 		    }
4459 
4460 		  stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
4461 		  gimple_set_location (stmt, load_loc);
4462 		  if (gsi_bb (load_gsi[j]))
4463 		    {
4464 		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4465 		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4466 		    }
4467 		  else
4468 		    {
4469 		      gimple_set_vuse (stmt, new_vuse);
4470 		      gimple_seq_add_stmt_without_update (&seq, stmt);
4471 		    }
4472 		  ops[j] = gimple_assign_lhs (stmt);
4473 		  tree xor_mask;
4474 		  enum tree_code inv_op
4475 		    = invert_op (split_store, j, dest_type, xor_mask);
4476 		  if (inv_op != NOP_EXPR)
4477 		    {
4478 		      stmt = gimple_build_assign (make_ssa_name (dest_type),
4479 						  inv_op, ops[j], xor_mask);
4480 		      gimple_set_location (stmt, load_loc);
4481 		      ops[j] = gimple_assign_lhs (stmt);
4482 
4483 		      if (gsi_bb (load_gsi[j]))
4484 			gimple_seq_add_stmt_without_update (&load_seq[j],
4485 							    stmt);
4486 		      else
4487 			gimple_seq_add_stmt_without_update (&seq, stmt);
4488 		    }
4489 		}
4490 	      else
4491 		ops[j] = native_interpret_expr (dest_type,
4492 						group->val + try_offset,
4493 						group->buf_size);
4494 	    }
4495 
4496 	  switch (split_store->orig_stores[0]->rhs_code)
4497 	    {
4498 	    case BIT_AND_EXPR:
4499 	    case BIT_IOR_EXPR:
4500 	    case BIT_XOR_EXPR:
4501 	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4502 		{
4503 		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
4504 		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4505 		}
4506 	      location_t bit_loc;
4507 	      bit_loc = get_location_for_stmts (orig_stmts);
4508 	      orig_stmts.truncate (0);
4509 
4510 	      stmt
4511 		= gimple_build_assign (make_ssa_name (dest_type),
4512 				       split_store->orig_stores[0]->rhs_code,
4513 				       ops[0], ops[1]);
4514 	      gimple_set_location (stmt, bit_loc);
4515 	      /* If there is just one load and there is a separate
4516 		 load_seq[0], emit the bitwise op right after it.  */
4517 	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4518 		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4519 	      /* Otherwise, if at least one load is in seq, we need to
4520 		 emit the bitwise op right before the store.  If there
4521 		 are two loads and are emitted somewhere else, it would
4522 		 be better to emit the bitwise op as early as possible;
4523 		 we don't track where that would be possible right now
4524 		 though.  */
4525 	      else
4526 		gimple_seq_add_stmt_without_update (&seq, stmt);
4527 	      src = gimple_assign_lhs (stmt);
4528 	      tree xor_mask;
4529 	      enum tree_code inv_op;
4530 	      inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4531 	      if (inv_op != NOP_EXPR)
4532 		{
4533 		  stmt = gimple_build_assign (make_ssa_name (dest_type),
4534 					      inv_op, src, xor_mask);
4535 		  gimple_set_location (stmt, bit_loc);
4536 		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4537 		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4538 		  else
4539 		    gimple_seq_add_stmt_without_update (&seq, stmt);
4540 		  src = gimple_assign_lhs (stmt);
4541 		}
4542 	      break;
4543 	    case LROTATE_EXPR:
4544 	    case NOP_EXPR:
4545 	      src = ops[0];
4546 	      if (!is_gimple_val (src))
4547 		{
4548 		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4549 					      src);
4550 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4551 		  src = gimple_assign_lhs (stmt);
4552 		}
4553 	      if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4554 		{
4555 		  stmt = gimple_build_assign (make_ssa_name (dest_type),
4556 					      NOP_EXPR, src);
4557 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4558 		  src = gimple_assign_lhs (stmt);
4559 		}
4560 	      inv_op = invert_op (split_store, 2, dest_type, xor_mask);
4561 	      if (inv_op != NOP_EXPR)
4562 		{
4563 		  stmt = gimple_build_assign (make_ssa_name (dest_type),
4564 					      inv_op, src, xor_mask);
4565 		  gimple_set_location (stmt, loc);
4566 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4567 		  src = gimple_assign_lhs (stmt);
4568 		}
4569 	      break;
4570 	    default:
4571 	      src = ops[0];
4572 	      break;
4573 	    }
4574 
4575 	  /* If bit insertion is required, we use the source as an accumulator
4576 	     into which the successive bit-field values are manually inserted.
4577 	     FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4578 	  if (group->bit_insertion)
4579 	    FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4580 	      if (info->rhs_code == BIT_INSERT_EXPR
4581 		  && info->bitpos < try_bitpos + try_size
4582 		  && info->bitpos + info->bitsize > try_bitpos)
4583 		{
4584 		  /* Mask, truncate, convert to final type, shift and ior into
4585 		     the accumulator.  Note that every step can be a no-op.  */
4586 		  const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4587 		  const HOST_WIDE_INT end_gap
4588 		    = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4589 		  tree tem = info->ops[0].val;
4590 		  if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4591 		    {
4592 		      const unsigned HOST_WIDE_INT size
4593 			= tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4594 		      tree integer_type
4595 			= build_nonstandard_integer_type (size, UNSIGNED);
4596 		      tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4597 					  integer_type, tem);
4598 		    }
4599 		  if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4600 		    {
4601 		      tree bitfield_type
4602 			= build_nonstandard_integer_type (info->bitsize,
4603 							  UNSIGNED);
4604 		      tem = gimple_convert (&seq, loc, bitfield_type, tem);
4605 		    }
4606 		  else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4607 		    {
4608 		      wide_int imask
4609 			= wi::mask (info->bitsize, false,
4610 				    TYPE_PRECISION (TREE_TYPE (tem)));
4611 		      tem = gimple_build (&seq, loc,
4612 					  BIT_AND_EXPR, TREE_TYPE (tem), tem,
4613 					  wide_int_to_tree (TREE_TYPE (tem),
4614 							    imask));
4615 		    }
4616 		  const HOST_WIDE_INT shift
4617 		    = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4618 		  if (shift < 0)
4619 		    tem = gimple_build (&seq, loc,
4620 					RSHIFT_EXPR, TREE_TYPE (tem), tem,
4621 					build_int_cst (NULL_TREE, -shift));
4622 		  tem = gimple_convert (&seq, loc, dest_type, tem);
4623 		  if (shift > 0)
4624 		    tem = gimple_build (&seq, loc,
4625 					LSHIFT_EXPR, dest_type, tem,
4626 					build_int_cst (NULL_TREE, shift));
4627 		  src = gimple_build (&seq, loc,
4628 				      BIT_IOR_EXPR, dest_type, tem, src);
4629 		}
4630 
4631 	  if (!integer_zerop (mask))
4632 	    {
4633 	      tree tem = make_ssa_name (dest_type);
4634 	      tree load_src = unshare_expr (dest);
4635 	      /* The load might load some or all bits uninitialized,
4636 		 avoid -W*uninitialized warnings in that case.
4637 		 As optimization, it would be nice if all the bits are
4638 		 provably uninitialized (no stores at all yet or previous
4639 		 store a CLOBBER) we'd optimize away the load and replace
4640 		 it e.g. with 0.  */
4641 	      suppress_warning (load_src, OPT_Wuninitialized);
4642 	      stmt = gimple_build_assign (tem, load_src);
4643 	      gimple_set_location (stmt, loc);
4644 	      gimple_set_vuse (stmt, new_vuse);
4645 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4646 
4647 	      /* FIXME: If there is a single chunk of zero bits in mask,
4648 		 perhaps use BIT_INSERT_EXPR instead?  */
4649 	      stmt = gimple_build_assign (make_ssa_name (dest_type),
4650 					  BIT_AND_EXPR, tem, mask);
4651 	      gimple_set_location (stmt, loc);
4652 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4653 	      tem = gimple_assign_lhs (stmt);
4654 
4655 	      if (TREE_CODE (src) == INTEGER_CST)
4656 		src = wide_int_to_tree (dest_type,
4657 					wi::bit_and_not (wi::to_wide (src),
4658 							 wi::to_wide (mask)));
4659 	      else
4660 		{
4661 		  tree nmask
4662 		    = wide_int_to_tree (dest_type,
4663 					wi::bit_not (wi::to_wide (mask)));
4664 		  stmt = gimple_build_assign (make_ssa_name (dest_type),
4665 					      BIT_AND_EXPR, src, nmask);
4666 		  gimple_set_location (stmt, loc);
4667 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4668 		  src = gimple_assign_lhs (stmt);
4669 		}
4670 	      stmt = gimple_build_assign (make_ssa_name (dest_type),
4671 					  BIT_IOR_EXPR, tem, src);
4672 	      gimple_set_location (stmt, loc);
4673 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4674 	      src = gimple_assign_lhs (stmt);
4675 	    }
4676 	}
4677 
4678       stmt = gimple_build_assign (dest, src);
4679       gimple_set_location (stmt, loc);
4680       gimple_set_vuse (stmt, new_vuse);
4681       gimple_seq_add_stmt_without_update (&seq, stmt);
4682 
4683       if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4684 	add_stmt_to_eh_lp (stmt, group->lp_nr);
4685 
4686       tree new_vdef;
4687       if (i < split_stores.length () - 1)
4688 	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4689       else
4690 	new_vdef = last_vdef;
4691 
4692       gimple_set_vdef (stmt, new_vdef);
4693       SSA_NAME_DEF_STMT (new_vdef) = stmt;
4694       new_vuse = new_vdef;
4695     }
4696 
4697   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4698     delete split_store;
4699 
4700   gcc_assert (seq);
4701   if (dump_file)
4702     {
4703       fprintf (dump_file,
4704 	       "New sequence of %u stores to replace old one of %u stores\n",
4705 	       split_stores.length (), orig_num_stmts);
4706       if (dump_flags & TDF_DETAILS)
4707 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4708     }
4709 
4710   if (gimple_clobber_p (group->last_stmt))
4711     update_stmt (group->last_stmt);
4712 
4713   if (group->lp_nr > 0)
4714     {
4715       /* We're going to insert a sequence of (potentially) throwing stores
4716 	 into an active EH region.  This means that we're going to create
4717 	 new basic blocks with EH edges pointing to the post landing pad
4718 	 and, therefore, to have to update its PHI nodes, if any.  For the
4719 	 virtual PHI node, we're going to use the VDEFs created above, but
4720 	 for the other nodes, we need to record the original reaching defs.  */
4721       eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4722       basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4723       basic_block last_bb = gimple_bb (group->last_stmt);
4724       edge last_edge = find_edge (last_bb, lp_bb);
4725       auto_vec<tree, 16> last_defs;
4726       gphi_iterator gpi;
4727       for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4728 	{
4729 	  gphi *phi = gpi.phi ();
4730 	  tree last_def;
4731 	  if (virtual_operand_p (gimple_phi_result (phi)))
4732 	    last_def = NULL_TREE;
4733 	  else
4734 	    last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4735 	  last_defs.safe_push (last_def);
4736 	}
4737 
4738       /* Do the insertion.  Then, if new basic blocks have been created in the
4739 	 process, rewind the chain of VDEFs create above to walk the new basic
4740 	 blocks and update the corresponding arguments of the PHI nodes.  */
4741       update_modified_stmts (seq);
4742       if (gimple_find_sub_bbs (seq, &last_gsi))
4743 	while (last_vdef != gimple_vuse (group->last_stmt))
4744 	  {
4745 	    gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4746 	    if (stmt_could_throw_p (cfun, stmt))
4747 	      {
4748 		edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4749 		unsigned int i;
4750 		for (gpi = gsi_start_phis (lp_bb), i = 0;
4751 		     !gsi_end_p (gpi);
4752 		     gsi_next (&gpi), i++)
4753 		  {
4754 		    gphi *phi = gpi.phi ();
4755 		    tree new_def;
4756 		    if (virtual_operand_p (gimple_phi_result (phi)))
4757 		      new_def = last_vdef;
4758 		    else
4759 		      new_def = last_defs[i];
4760 		    add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4761 		  }
4762 	      }
4763 	    last_vdef = gimple_vuse (stmt);
4764 	  }
4765     }
4766   else
4767     gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4768 
4769   for (int j = 0; j < 2; ++j)
4770     if (load_seq[j])
4771       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4772 
4773   return true;
4774 }
4775 
4776 /* Process the merged_store_group objects created in the coalescing phase.
4777    The stores are all against the base object BASE.
4778    Try to output the widened stores and delete the original statements if
4779    successful.  Return true iff any changes were made.  */
4780 
4781 bool
output_merged_stores()4782 imm_store_chain_info::output_merged_stores ()
4783 {
4784   unsigned int i;
4785   merged_store_group *merged_store;
4786   bool ret = false;
4787   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4788     {
4789       if (dbg_cnt (store_merging)
4790 	  && output_merged_store (merged_store))
4791 	{
4792 	  unsigned int j;
4793 	  store_immediate_info *store;
4794 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4795 	    {
4796 	      gimple *stmt = store->stmt;
4797 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4798 	      /* Don't remove clobbers, they are still useful even if
4799 		 everything is overwritten afterwards.  */
4800 	      if (gimple_clobber_p (stmt))
4801 		continue;
4802 	      gsi_remove (&gsi, true);
4803 	      if (store->lp_nr)
4804 		remove_stmt_from_eh_lp (stmt);
4805 	      if (stmt != merged_store->last_stmt)
4806 		{
4807 		  unlink_stmt_vdef (stmt);
4808 		  release_defs (stmt);
4809 		}
4810 	    }
4811 	  ret = true;
4812 	}
4813     }
4814   if (ret && dump_file)
4815     fprintf (dump_file, "Merging successful!\n");
4816 
4817   return ret;
4818 }
4819 
4820 /* Coalesce the store_immediate_info objects recorded against the base object
4821    BASE in the first phase and output them.
4822    Delete the allocated structures.
4823    Return true if any changes were made.  */
4824 
4825 bool
terminate_and_process_chain()4826 imm_store_chain_info::terminate_and_process_chain ()
4827 {
4828   if (dump_file && (dump_flags & TDF_DETAILS))
4829     fprintf (dump_file, "Terminating chain with %u stores\n",
4830 	     m_store_info.length ());
4831   /* Process store chain.  */
4832   bool ret = false;
4833   if (m_store_info.length () > 1)
4834     {
4835       ret = coalesce_immediate_stores ();
4836       if (ret)
4837 	ret = output_merged_stores ();
4838     }
4839 
4840   /* Delete all the entries we allocated ourselves.  */
4841   store_immediate_info *info;
4842   unsigned int i;
4843   FOR_EACH_VEC_ELT (m_store_info, i, info)
4844     delete info;
4845 
4846   merged_store_group *merged_info;
4847   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4848     delete merged_info;
4849 
4850   return ret;
4851 }
4852 
4853 /* Return true iff LHS is a destination potentially interesting for
4854    store merging.  In practice these are the codes that get_inner_reference
4855    can process.  */
4856 
4857 static bool
lhs_valid_for_store_merging_p(tree lhs)4858 lhs_valid_for_store_merging_p (tree lhs)
4859 {
4860   if (DECL_P (lhs))
4861     return true;
4862 
4863   switch (TREE_CODE (lhs))
4864     {
4865     case ARRAY_REF:
4866     case ARRAY_RANGE_REF:
4867     case BIT_FIELD_REF:
4868     case COMPONENT_REF:
4869     case MEM_REF:
4870     case VIEW_CONVERT_EXPR:
4871       return true;
4872     default:
4873       return false;
4874     }
4875 }
4876 
4877 /* Return true if the tree RHS is a constant we want to consider
4878    during store merging.  In practice accept all codes that
4879    native_encode_expr accepts.  */
4880 
4881 static bool
rhs_valid_for_store_merging_p(tree rhs)4882 rhs_valid_for_store_merging_p (tree rhs)
4883 {
4884   unsigned HOST_WIDE_INT size;
4885   if (TREE_CODE (rhs) == CONSTRUCTOR
4886       && CONSTRUCTOR_NELTS (rhs) == 0
4887       && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4888       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4889     return true;
4890   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4891 	  && native_encode_expr (rhs, NULL, size) != 0);
4892 }
4893 
4894 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4895    and return true on success or false on failure.  */
4896 
4897 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4898 adjust_bit_pos (poly_offset_int byte_off,
4899 		poly_int64  *pbitpos,
4900 		poly_uint64 *pbitregion_start,
4901 		poly_uint64 *pbitregion_end)
4902 {
4903   poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4904   bit_off += *pbitpos;
4905 
4906   if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4907     {
4908       if (maybe_ne (*pbitregion_end, 0U))
4909 	{
4910 	  bit_off = byte_off << LOG2_BITS_PER_UNIT;
4911 	  bit_off += *pbitregion_start;
4912 	  if (bit_off.to_uhwi (pbitregion_start))
4913 	    {
4914 	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
4915 	      bit_off += *pbitregion_end;
4916 	      if (!bit_off.to_uhwi (pbitregion_end))
4917 		*pbitregion_end = 0;
4918 	    }
4919 	  else
4920 	    *pbitregion_end = 0;
4921 	}
4922       return true;
4923     }
4924   else
4925     return false;
4926 }
4927 
4928 /* If MEM is a memory reference usable for store merging (either as
4929    store destination or for loads), return the non-NULL base_addr
4930    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4931    Otherwise return NULL, *PBITPOS should be still valid even for that
4932    case.  */
4933 
4934 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4935 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4936 			     poly_uint64 *pbitpos,
4937 			     poly_uint64 *pbitregion_start,
4938 			     poly_uint64 *pbitregion_end)
4939 {
4940   poly_int64 bitsize, bitpos;
4941   poly_uint64 bitregion_start = 0, bitregion_end = 0;
4942   machine_mode mode;
4943   int unsignedp = 0, reversep = 0, volatilep = 0;
4944   tree offset;
4945   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4946 					&unsignedp, &reversep, &volatilep);
4947   *pbitsize = bitsize;
4948   if (known_le (bitsize, 0))
4949     return NULL_TREE;
4950 
4951   if (TREE_CODE (mem) == COMPONENT_REF
4952       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4953     {
4954       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4955       if (maybe_ne (bitregion_end, 0U))
4956 	bitregion_end += 1;
4957     }
4958 
4959   if (reversep)
4960     return NULL_TREE;
4961 
4962   /* We do not want to rewrite TARGET_MEM_REFs.  */
4963   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4964     return NULL_TREE;
4965   /* In some cases get_inner_reference may return a
4966      MEM_REF [ptr + byteoffset].  For the purposes of this pass
4967      canonicalize the base_addr to MEM_REF [ptr] and take
4968      byteoffset into account in the bitpos.  This occurs in
4969      PR 23684 and this way we can catch more chains.  */
4970   else if (TREE_CODE (base_addr) == MEM_REF)
4971     {
4972       if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4973 			   &bitregion_start, &bitregion_end))
4974 	return NULL_TREE;
4975       base_addr = TREE_OPERAND (base_addr, 0);
4976     }
4977   /* get_inner_reference returns the base object, get at its
4978      address now.  */
4979   else
4980     {
4981       if (maybe_lt (bitpos, 0))
4982 	return NULL_TREE;
4983       base_addr = build_fold_addr_expr (base_addr);
4984     }
4985 
4986   if (offset)
4987     {
4988       /* If the access is variable offset then a base decl has to be
4989 	 address-taken to be able to emit pointer-based stores to it.
4990 	 ???  We might be able to get away with re-using the original
4991 	 base up to the first variable part and then wrapping that inside
4992 	 a BIT_FIELD_REF.  */
4993       tree base = get_base_address (base_addr);
4994       if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4995 	return NULL_TREE;
4996 
4997       /* Similarly to above for the base, remove constant from the offset.  */
4998       if (TREE_CODE (offset) == PLUS_EXPR
4999 	  && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
5000 	  && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5001 			     &bitpos, &bitregion_start, &bitregion_end))
5002 	offset = TREE_OPERAND (offset, 0);
5003 
5004       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5005 			  base_addr, offset);
5006     }
5007 
5008   if (known_eq (bitregion_end, 0U))
5009     {
5010       bitregion_start = round_down_to_byte_boundary (bitpos);
5011       bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5012     }
5013 
5014   *pbitsize = bitsize;
5015   *pbitpos = bitpos;
5016   *pbitregion_start = bitregion_start;
5017   *pbitregion_end = bitregion_end;
5018   return base_addr;
5019 }
5020 
5021 /* Return true if STMT is a load that can be used for store merging.
5022    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
5023    BITREGION_END are properties of the corresponding store.  */
5024 
5025 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)5026 handled_load (gimple *stmt, store_operand_info *op,
5027 	      poly_uint64 bitsize, poly_uint64 bitpos,
5028 	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
5029 {
5030   if (!is_gimple_assign (stmt))
5031     return false;
5032   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5033     {
5034       tree rhs1 = gimple_assign_rhs1 (stmt);
5035       if (TREE_CODE (rhs1) == SSA_NAME
5036 	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5037 			   bitregion_start, bitregion_end))
5038 	{
5039 	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5040 	     been optimized earlier, but if allowed here, would confuse the
5041 	     multiple uses counting.  */
5042 	  if (op->bit_not_p)
5043 	    return false;
5044 	  op->bit_not_p = !op->bit_not_p;
5045 	  return true;
5046 	}
5047       return false;
5048     }
5049   if (gimple_vuse (stmt)
5050       && gimple_assign_load_p (stmt)
5051       && !stmt_can_throw_internal (cfun, stmt)
5052       && !gimple_has_volatile_ops (stmt))
5053     {
5054       tree mem = gimple_assign_rhs1 (stmt);
5055       op->base_addr
5056 	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5057 				       &op->bitregion_start,
5058 				       &op->bitregion_end);
5059       if (op->base_addr != NULL_TREE
5060 	  && known_eq (op->bitsize, bitsize)
5061 	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5062 	  && known_ge (op->bitpos - op->bitregion_start,
5063 		       bitpos - bitregion_start)
5064 	  && known_ge (op->bitregion_end - op->bitpos,
5065 		       bitregion_end - bitpos))
5066 	{
5067 	  op->stmt = stmt;
5068 	  op->val = mem;
5069 	  op->bit_not_p = false;
5070 	  return true;
5071 	}
5072     }
5073   return false;
5074 }
5075 
5076 /* Return the index number of the landing pad for STMT, if any.  */
5077 
5078 static int
lp_nr_for_store(gimple * stmt)5079 lp_nr_for_store (gimple *stmt)
5080 {
5081   if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5082     return 0;
5083 
5084   if (!stmt_could_throw_p (cfun, stmt))
5085     return 0;
5086 
5087   return lookup_stmt_eh_lp (stmt);
5088 }
5089 
5090 /* Record the store STMT for store merging optimization if it can be
5091    optimized.  Return true if any changes were made.  */
5092 
5093 bool
process_store(gimple * stmt)5094 pass_store_merging::process_store (gimple *stmt)
5095 {
5096   tree lhs = gimple_assign_lhs (stmt);
5097   tree rhs = gimple_assign_rhs1 (stmt);
5098   poly_uint64 bitsize, bitpos = 0;
5099   poly_uint64 bitregion_start = 0, bitregion_end = 0;
5100   tree base_addr
5101     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5102 				   &bitregion_start, &bitregion_end);
5103   if (known_eq (bitsize, 0U))
5104     return false;
5105 
5106   bool invalid = (base_addr == NULL_TREE
5107 		  || (maybe_gt (bitsize,
5108 				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
5109 		      && TREE_CODE (rhs) != INTEGER_CST
5110 		      && (TREE_CODE (rhs) != CONSTRUCTOR
5111 			  || CONSTRUCTOR_NELTS (rhs) != 0)));
5112   enum tree_code rhs_code = ERROR_MARK;
5113   bool bit_not_p = false;
5114   struct symbolic_number n;
5115   gimple *ins_stmt = NULL;
5116   store_operand_info ops[2];
5117   if (invalid)
5118     ;
5119   else if (TREE_CODE (rhs) == STRING_CST)
5120     {
5121       rhs_code = STRING_CST;
5122       ops[0].val = rhs;
5123     }
5124   else if (rhs_valid_for_store_merging_p (rhs))
5125     {
5126       rhs_code = INTEGER_CST;
5127       ops[0].val = rhs;
5128     }
5129   else if (TREE_CODE (rhs) == SSA_NAME)
5130     {
5131       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5132       if (!is_gimple_assign (def_stmt))
5133 	invalid = true;
5134       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5135 			     bitregion_start, bitregion_end))
5136 	rhs_code = MEM_REF;
5137       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5138 	{
5139 	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
5140 	  if (TREE_CODE (rhs1) == SSA_NAME
5141 	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5142 	    {
5143 	      bit_not_p = true;
5144 	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
5145 	    }
5146 	}
5147 
5148       if (rhs_code == ERROR_MARK && !invalid)
5149 	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5150 	  {
5151 	  case BIT_AND_EXPR:
5152 	  case BIT_IOR_EXPR:
5153 	  case BIT_XOR_EXPR:
5154 	    tree rhs1, rhs2;
5155 	    rhs1 = gimple_assign_rhs1 (def_stmt);
5156 	    rhs2 = gimple_assign_rhs2 (def_stmt);
5157 	    invalid = true;
5158 	    if (TREE_CODE (rhs1) != SSA_NAME)
5159 	      break;
5160 	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5161 	    if (!is_gimple_assign (def_stmt1)
5162 		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5163 				  bitregion_start, bitregion_end))
5164 	      break;
5165 	    if (rhs_valid_for_store_merging_p (rhs2))
5166 	      ops[1].val = rhs2;
5167 	    else if (TREE_CODE (rhs2) != SSA_NAME)
5168 	      break;
5169 	    else
5170 	      {
5171 		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5172 		if (!is_gimple_assign (def_stmt2))
5173 		  break;
5174 		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5175 					bitregion_start, bitregion_end))
5176 		  break;
5177 	      }
5178 	    invalid = false;
5179 	    break;
5180 	  default:
5181 	    invalid = true;
5182 	    break;
5183 	  }
5184 
5185       unsigned HOST_WIDE_INT const_bitsize;
5186       if (bitsize.is_constant (&const_bitsize)
5187 	  && (const_bitsize % BITS_PER_UNIT) == 0
5188 	  && const_bitsize <= 64
5189 	  && multiple_p (bitpos, BITS_PER_UNIT))
5190 	{
5191 	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5192 	  if (ins_stmt)
5193 	    {
5194 	      uint64_t nn = n.n;
5195 	      for (unsigned HOST_WIDE_INT i = 0;
5196 		   i < const_bitsize;
5197 		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
5198 		if ((nn & MARKER_MASK) == 0
5199 		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5200 		  {
5201 		    ins_stmt = NULL;
5202 		    break;
5203 		  }
5204 	      if (ins_stmt)
5205 		{
5206 		  if (invalid)
5207 		    {
5208 		      rhs_code = LROTATE_EXPR;
5209 		      ops[0].base_addr = NULL_TREE;
5210 		      ops[1].base_addr = NULL_TREE;
5211 		    }
5212 		  invalid = false;
5213 		}
5214 	    }
5215 	}
5216 
5217       if (invalid
5218 	  && bitsize.is_constant (&const_bitsize)
5219 	  && ((const_bitsize % BITS_PER_UNIT) != 0
5220 	      || !multiple_p (bitpos, BITS_PER_UNIT))
5221 	  && const_bitsize <= MAX_FIXED_MODE_SIZE)
5222 	{
5223 	  /* Bypass a conversion to the bit-field type.  */
5224 	  if (!bit_not_p
5225 	      && is_gimple_assign (def_stmt)
5226 	      && CONVERT_EXPR_CODE_P (rhs_code))
5227 	    {
5228 	      tree rhs1 = gimple_assign_rhs1 (def_stmt);
5229 	      if (TREE_CODE (rhs1) == SSA_NAME
5230 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
5231 		rhs = rhs1;
5232 	    }
5233 	  rhs_code = BIT_INSERT_EXPR;
5234 	  bit_not_p = false;
5235 	  ops[0].val = rhs;
5236 	  ops[0].base_addr = NULL_TREE;
5237 	  ops[1].base_addr = NULL_TREE;
5238 	  invalid = false;
5239 	}
5240     }
5241   else
5242     invalid = true;
5243 
5244   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5245   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5246   if (invalid
5247       || !bitsize.is_constant (&const_bitsize)
5248       || !bitpos.is_constant (&const_bitpos)
5249       || !bitregion_start.is_constant (&const_bitregion_start)
5250       || !bitregion_end.is_constant (&const_bitregion_end))
5251     return terminate_all_aliasing_chains (NULL, stmt);
5252 
5253   if (!ins_stmt)
5254     memset (&n, 0, sizeof (n));
5255 
5256   class imm_store_chain_info **chain_info = NULL;
5257   bool ret = false;
5258   if (base_addr)
5259     chain_info = m_stores.get (base_addr);
5260 
5261   store_immediate_info *info;
5262   if (chain_info)
5263     {
5264       unsigned int ord = (*chain_info)->m_store_info.length ();
5265       info = new store_immediate_info (const_bitsize, const_bitpos,
5266 				       const_bitregion_start,
5267 				       const_bitregion_end,
5268 				       stmt, ord, rhs_code, n, ins_stmt,
5269 				       bit_not_p, lp_nr_for_store (stmt),
5270 				       ops[0], ops[1]);
5271       if (dump_file && (dump_flags & TDF_DETAILS))
5272 	{
5273 	  fprintf (dump_file, "Recording immediate store from stmt:\n");
5274 	  print_gimple_stmt (dump_file, stmt, 0);
5275 	}
5276       (*chain_info)->m_store_info.safe_push (info);
5277       m_n_stores++;
5278       ret |= terminate_all_aliasing_chains (chain_info, stmt);
5279       /* If we reach the limit of stores to merge in a chain terminate and
5280 	 process the chain now.  */
5281       if ((*chain_info)->m_store_info.length ()
5282 	  == (unsigned int) param_max_stores_to_merge)
5283 	{
5284 	  if (dump_file && (dump_flags & TDF_DETAILS))
5285 	    fprintf (dump_file,
5286 		     "Reached maximum number of statements to merge:\n");
5287 	  ret |= terminate_and_process_chain (*chain_info);
5288 	}
5289     }
5290   else
5291     {
5292       /* Store aliases any existing chain?  */
5293       ret |= terminate_all_aliasing_chains (NULL, stmt);
5294 
5295       /* Start a new chain.  */
5296       class imm_store_chain_info *new_chain
5297 	  = new imm_store_chain_info (m_stores_head, base_addr);
5298       info = new store_immediate_info (const_bitsize, const_bitpos,
5299 				       const_bitregion_start,
5300 				       const_bitregion_end,
5301 				       stmt, 0, rhs_code, n, ins_stmt,
5302 				       bit_not_p, lp_nr_for_store (stmt),
5303 				       ops[0], ops[1]);
5304       new_chain->m_store_info.safe_push (info);
5305       m_n_stores++;
5306       m_stores.put (base_addr, new_chain);
5307       m_n_chains++;
5308       if (dump_file && (dump_flags & TDF_DETAILS))
5309 	{
5310 	  fprintf (dump_file, "Starting active chain number %u with statement:\n",
5311 		   m_n_chains);
5312 	  print_gimple_stmt (dump_file, stmt, 0);
5313 	  fprintf (dump_file, "The base object is:\n");
5314 	  print_generic_expr (dump_file, base_addr);
5315 	  fprintf (dump_file, "\n");
5316 	}
5317     }
5318 
5319   /* Prune oldest chains so that after adding the chain or store above
5320      we're again within the limits set by the params.  */
5321   if (m_n_chains > (unsigned)param_max_store_chains_to_track
5322       || m_n_stores > (unsigned)param_max_stores_to_track)
5323     {
5324       if (dump_file && (dump_flags & TDF_DETAILS))
5325 	fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5326 		 "terminating oldest chain(s).\n", m_n_chains,
5327 		 param_max_store_chains_to_track, m_n_stores,
5328 		 param_max_stores_to_track);
5329       imm_store_chain_info **e = &m_stores_head;
5330       unsigned idx = 0;
5331       unsigned n_stores = 0;
5332       while (*e)
5333 	{
5334 	  if (idx >= (unsigned)param_max_store_chains_to_track
5335 	      || (n_stores + (*e)->m_store_info.length ()
5336 		  > (unsigned)param_max_stores_to_track))
5337 	    ret |= terminate_and_process_chain (*e);
5338 	  else
5339 	    {
5340 	      n_stores += (*e)->m_store_info.length ();
5341 	      e = &(*e)->next;
5342 	      ++idx;
5343 	    }
5344 	}
5345     }
5346 
5347   return ret;
5348 }
5349 
5350 /* Return true if STMT is a store valid for store merging.  */
5351 
5352 static bool
store_valid_for_store_merging_p(gimple * stmt)5353 store_valid_for_store_merging_p (gimple *stmt)
5354 {
5355   return gimple_assign_single_p (stmt)
5356 	 && gimple_vdef (stmt)
5357 	 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5358 	 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
5359 }
5360 
5361 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5362 
5363 /* Return the status of basic block BB wrt store merging.  */
5364 
5365 static enum basic_block_status
get_status_for_store_merging(basic_block bb)5366 get_status_for_store_merging (basic_block bb)
5367 {
5368   unsigned int num_statements = 0;
5369   unsigned int num_constructors = 0;
5370   gimple_stmt_iterator gsi;
5371   edge e;
5372   gimple *last_stmt = NULL;
5373 
5374   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5375     {
5376       gimple *stmt = gsi_stmt (gsi);
5377 
5378       if (is_gimple_debug (stmt))
5379 	continue;
5380 
5381       last_stmt = stmt;
5382 
5383       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5384 	break;
5385 
5386       if (is_gimple_assign (stmt)
5387 	  && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5388 	{
5389 	  tree rhs = gimple_assign_rhs1 (stmt);
5390 	  if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5391 	      && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5392 	      && gimple_assign_lhs (stmt) != NULL_TREE)
5393 	    {
5394 	      HOST_WIDE_INT sz
5395 		= int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5396 	      if (sz == 16 || sz == 32 || sz == 64)
5397 		{
5398 		  num_constructors = 1;
5399 		  break;
5400 		}
5401 	    }
5402 	}
5403     }
5404 
5405   if (num_statements == 0 && num_constructors == 0)
5406     return BB_INVALID;
5407 
5408   if (cfun->can_throw_non_call_exceptions && cfun->eh
5409       && store_valid_for_store_merging_p (last_stmt)
5410       && (e = find_fallthru_edge (bb->succs))
5411       && e->dest == bb->next_bb)
5412     return BB_EXTENDED_VALID;
5413 
5414   return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
5415 }
5416 
5417 /* Entry point for the pass.  Go over each basic block recording chains of
5418    immediate stores.  Upon encountering a terminating statement (as defined
5419    by stmt_terminates_chain_p) process the recorded stores and emit the widened
5420    variants.  */
5421 
5422 unsigned int
execute(function * fun)5423 pass_store_merging::execute (function *fun)
5424 {
5425   basic_block bb;
5426   hash_set<gimple *> orig_stmts;
5427   bool changed = false, open_chains = false;
5428 
5429   /* If the function can throw and catch non-call exceptions, we'll be trying
5430      to merge stores across different basic blocks so we need to first unsplit
5431      the EH edges in order to streamline the CFG of the function.  */
5432   if (cfun->can_throw_non_call_exceptions && cfun->eh)
5433     unsplit_eh_edges ();
5434 
5435   calculate_dominance_info (CDI_DOMINATORS);
5436 
5437   FOR_EACH_BB_FN (bb, fun)
5438     {
5439       const basic_block_status bb_status = get_status_for_store_merging (bb);
5440       gimple_stmt_iterator gsi;
5441 
5442       if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5443 	{
5444 	  changed |= terminate_and_process_all_chains ();
5445 	  open_chains = false;
5446 	}
5447 
5448       if (bb_status == BB_INVALID)
5449 	continue;
5450 
5451       if (dump_file && (dump_flags & TDF_DETAILS))
5452 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5453 
5454       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
5455 	{
5456 	  gimple *stmt = gsi_stmt (gsi);
5457 	  gsi_next (&gsi);
5458 
5459 	  if (is_gimple_debug (stmt))
5460 	    continue;
5461 
5462 	  if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5463 	    {
5464 	      /* Terminate all chains.  */
5465 	      if (dump_file && (dump_flags & TDF_DETAILS))
5466 		fprintf (dump_file, "Volatile access terminates "
5467 				    "all chains\n");
5468 	      changed |= terminate_and_process_all_chains ();
5469 	      open_chains = false;
5470 	      continue;
5471 	    }
5472 
5473 	  if (is_gimple_assign (stmt)
5474 	      && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5475 	      && maybe_optimize_vector_constructor (stmt))
5476 	    continue;
5477 
5478 	  if (store_valid_for_store_merging_p (stmt))
5479 	    changed |= process_store (stmt);
5480 	  else
5481 	    changed |= terminate_all_aliasing_chains (NULL, stmt);
5482 	}
5483 
5484       if (bb_status == BB_EXTENDED_VALID)
5485 	open_chains = true;
5486       else
5487 	{
5488 	  changed |= terminate_and_process_all_chains ();
5489 	  open_chains = false;
5490 	}
5491     }
5492 
5493   if (open_chains)
5494     changed |= terminate_and_process_all_chains ();
5495 
5496   /* If the function can throw and catch non-call exceptions and something
5497      changed during the pass, then the CFG has (very likely) changed too.  */
5498   if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5499     {
5500       free_dominance_info (CDI_DOMINATORS);
5501       return TODO_cleanup_cfg;
5502     }
5503 
5504   return 0;
5505 }
5506 
5507 } // anon namespace
5508 
5509 /* Construct and return a store merging pass object.  */
5510 
5511 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5512 make_pass_store_merging (gcc::context *ctxt)
5513 {
5514   return new pass_store_merging (ctxt);
5515 }
5516 
5517 #if CHECKING_P
5518 
5519 namespace selftest {
5520 
5521 /* Selftests for store merging helpers.  */
5522 
5523 /* Assert that all elements of the byte arrays X and Y, both of length N
5524    are equal.  */
5525 
5526 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5527 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5528 {
5529   for (unsigned int i = 0; i < n; i++)
5530     {
5531       if (x[i] != y[i])
5532 	{
5533 	  fprintf (stderr, "Arrays do not match.  X:\n");
5534 	  dump_char_array (stderr, x, n);
5535 	  fprintf (stderr, "Y:\n");
5536 	  dump_char_array (stderr, y, n);
5537 	}
5538       ASSERT_EQ (x[i], y[i]);
5539     }
5540 }
5541 
5542 /* Test shift_bytes_in_array_left and that it carries bits across between
5543    bytes correctly.  */
5544 
5545 static void
verify_shift_bytes_in_array_left(void)5546 verify_shift_bytes_in_array_left (void)
5547 {
5548    /* byte 1   | byte 0
5549       00011111 | 11100000.  */
5550   unsigned char orig[2] = { 0xe0, 0x1f };
5551   unsigned char in[2];
5552   memcpy (in, orig, sizeof orig);
5553 
5554   unsigned char expected[2] = { 0x80, 0x7f };
5555   shift_bytes_in_array_left (in, sizeof (in), 2);
5556   verify_array_eq (in, expected, sizeof (in));
5557 
5558   memcpy (in, orig, sizeof orig);
5559   memcpy (expected, orig, sizeof orig);
5560   /* Check that shifting by zero doesn't change anything.  */
5561   shift_bytes_in_array_left (in, sizeof (in), 0);
5562   verify_array_eq (in, expected, sizeof (in));
5563 
5564 }
5565 
5566 /* Test shift_bytes_in_array_right and that it carries bits across between
5567    bytes correctly.  */
5568 
5569 static void
verify_shift_bytes_in_array_right(void)5570 verify_shift_bytes_in_array_right (void)
5571 {
5572    /* byte 1   | byte 0
5573       00011111 | 11100000.  */
5574   unsigned char orig[2] = { 0x1f, 0xe0};
5575   unsigned char in[2];
5576   memcpy (in, orig, sizeof orig);
5577   unsigned char expected[2] = { 0x07, 0xf8};
5578   shift_bytes_in_array_right (in, sizeof (in), 2);
5579   verify_array_eq (in, expected, sizeof (in));
5580 
5581   memcpy (in, orig, sizeof orig);
5582   memcpy (expected, orig, sizeof orig);
5583   /* Check that shifting by zero doesn't change anything.  */
5584   shift_bytes_in_array_right (in, sizeof (in), 0);
5585   verify_array_eq (in, expected, sizeof (in));
5586 }
5587 
5588 /* Test clear_bit_region that it clears exactly the bits asked and
5589    nothing more.  */
5590 
5591 static void
verify_clear_bit_region(void)5592 verify_clear_bit_region (void)
5593 {
5594   /* Start with all bits set and test clearing various patterns in them.  */
5595   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5596   unsigned char in[3];
5597   unsigned char expected[3];
5598   memcpy (in, orig, sizeof in);
5599 
5600   /* Check zeroing out all the bits.  */
5601   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5602   expected[0] = expected[1] = expected[2] = 0;
5603   verify_array_eq (in, expected, sizeof in);
5604 
5605   memcpy (in, orig, sizeof in);
5606   /* Leave the first and last bits intact.  */
5607   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5608   expected[0] = 0x1;
5609   expected[1] = 0;
5610   expected[2] = 0x80;
5611   verify_array_eq (in, expected, sizeof in);
5612 }
5613 
5614 /* Test clear_bit_region_be that it clears exactly the bits asked and
5615    nothing more.  */
5616 
5617 static void
verify_clear_bit_region_be(void)5618 verify_clear_bit_region_be (void)
5619 {
5620   /* Start with all bits set and test clearing various patterns in them.  */
5621   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5622   unsigned char in[3];
5623   unsigned char expected[3];
5624   memcpy (in, orig, sizeof in);
5625 
5626   /* Check zeroing out all the bits.  */
5627   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5628   expected[0] = expected[1] = expected[2] = 0;
5629   verify_array_eq (in, expected, sizeof in);
5630 
5631   memcpy (in, orig, sizeof in);
5632   /* Leave the first and last bits intact.  */
5633   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5634   expected[0] = 0x80;
5635   expected[1] = 0;
5636   expected[2] = 0x1;
5637   verify_array_eq (in, expected, sizeof in);
5638 }
5639 
5640 
5641 /* Run all of the selftests within this file.  */
5642 
5643 void
store_merging_cc_tests(void)5644 store_merging_cc_tests (void)
5645 {
5646   verify_shift_bytes_in_array_left ();
5647   verify_shift_bytes_in_array_right ();
5648   verify_clear_bit_region ();
5649   verify_clear_bit_region_be ();
5650 }
5651 
5652 } // namespace selftest
5653 #endif /* CHECKING_P.  */
5654