xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/gimple-ssa-store-merging.c (revision 4ac76180e904e771b9d522c7e57296d371f06499)
1 /* GIMPLE store merging and byte swapping passes.
2    Copyright (C) 2009-2020 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of the store merging pass is to combine multiple memory stores
22    of constant values, values loaded from memory, bitwise operations on those,
23    or bit-field values, to consecutive locations, into fewer wider stores.
24 
25    For example, if we have a sequence peforming four byte stores to
26    consecutive memory locations:
27    [p     ] := imm1;
28    [p + 1B] := imm2;
29    [p + 2B] := imm3;
30    [p + 3B] := imm4;
31    we can transform this into a single 4-byte store if the target supports it:
32    [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
33 
34    Or:
35    [p     ] := [q     ];
36    [p + 1B] := [q + 1B];
37    [p + 2B] := [q + 2B];
38    [p + 3B] := [q + 3B];
39    if there is no overlap can be transformed into a single 4-byte
40    load followed by single 4-byte store.
41 
42    Or:
43    [p     ] := [q     ] ^ imm1;
44    [p + 1B] := [q + 1B] ^ imm2;
45    [p + 2B] := [q + 2B] ^ imm3;
46    [p + 3B] := [q + 3B] ^ imm4;
47    if there is no overlap can be transformed into a single 4-byte
48    load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49 
50    Or:
51    [p:1 ] := imm;
52    [p:31] := val & 0x7FFFFFFF;
53    we can transform this into a single 4-byte store if the target supports it:
54    [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55 
56    The algorithm is applied to each basic block in three phases:
57 
58    1) Scan through the basic block and record assignments to destinations
59    that can be expressed as a store to memory of a certain size at a certain
60    bit offset from base expressions we can handle.  For bit-fields we also
61    record the surrounding bit region, i.e. bits that could be stored in
62    a read-modify-write operation when storing the bit-field.  Record store
63    chains to different bases in a hash_map (m_stores) and make sure to
64    terminate such chains when appropriate (for example when the stored
65    values get used subsequently).
66    These stores can be a result of structure element initializers, array stores
67    etc.  A store_immediate_info object is recorded for every such store.
68    Record as many such assignments to a single base as possible until a
69    statement that interferes with the store sequence is encountered.
70    Each store has up to 2 operands, which can be a either constant, a memory
71    load or an SSA name, from which the value to be stored can be computed.
72    At most one of the operands can be a constant.  The operands are recorded
73    in store_operand_info struct.
74 
75    2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
76    store_immediate_info objects) and coalesce contiguous stores into
77    merged_store_group objects.  For bit-field stores, we don't need to
78    require the stores to be contiguous, just their surrounding bit regions
79    have to be contiguous.  If the expression being stored is different
80    between adjacent stores, such as one store storing a constant and
81    following storing a value loaded from memory, or if the loaded memory
82    objects are not adjacent, a new merged_store_group is created as well.
83 
84    For example, given the stores:
85    [p     ] := 0;
86    [p + 1B] := 1;
87    [p + 3B] := 0;
88    [p + 4B] := 1;
89    [p + 5B] := 0;
90    [p + 6B] := 0;
91    This phase would produce two merged_store_group objects, one recording the
92    two bytes stored in the memory region [p : p + 1] and another
93    recording the four bytes stored in the memory region [p + 3 : p + 6].
94 
95    3) The merged_store_group objects produced in phase 2) are processed
96    to generate the sequence of wider stores that set the contiguous memory
97    regions to the sequence of bytes that correspond to it.  This may emit
98    multiple stores per store group to handle contiguous stores that are not
99    of a size that is a power of 2.  For example it can try to emit a 40-bit
100    store as a 32-bit store followed by an 8-bit store.
101    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102    or TARGET_SLOW_UNALIGNED_ACCESS settings.
103 
104    Note on endianness and example:
105    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106    [p     ] := 0x1234;
107    [p + 2B] := 0x5678;
108    [p + 4B] := 0xab;
109    [p + 5B] := 0xcd;
110 
111    The memory layout for little-endian (LE) and big-endian (BE) must be:
112   p |LE|BE|
113   ---------
114   0 |34|12|
115   1 |12|34|
116   2 |78|56|
117   3 |56|78|
118   4 |ab|ab|
119   5 |cd|cd|
120 
121   To merge these into a single 48-bit merged value 'val' in phase 2)
122   on little-endian we insert stores to higher (consecutive) bitpositions
123   into the most significant bits of the merged value.
124   The final merged value would be: 0xcdab56781234
125 
126   For big-endian we insert stores to higher bitpositions into the least
127   significant bits of the merged value.
128   The final merged value would be: 0x12345678abcd
129 
130   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131   followed by a 16-bit store.  Again, we must consider endianness when
132   breaking down the 48-bit value 'val' computed above.
133   For little endian we emit:
134   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
136 
137   Whereas for big-endian we emit:
138   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
140 
141 #include "config.h"
142 #include "system.h"
143 #include "coretypes.h"
144 #include "backend.h"
145 #include "tree.h"
146 #include "gimple.h"
147 #include "builtins.h"
148 #include "fold-const.h"
149 #include "tree-pass.h"
150 #include "ssa.h"
151 #include "gimple-pretty-print.h"
152 #include "alias.h"
153 #include "fold-const.h"
154 #include "print-tree.h"
155 #include "tree-hash-traits.h"
156 #include "gimple-iterator.h"
157 #include "gimplify.h"
158 #include "gimple-fold.h"
159 #include "stor-layout.h"
160 #include "timevar.h"
161 #include "cfganal.h"
162 #include "cfgcleanup.h"
163 #include "tree-cfg.h"
164 #include "except.h"
165 #include "tree-eh.h"
166 #include "target.h"
167 #include "gimplify-me.h"
168 #include "rtl.h"
169 #include "expr.h"	/* For get_bit_range.  */
170 #include "optabs-tree.h"
171 #include "dbgcnt.h"
172 #include "selftest.h"
173 
174 /* The maximum size (in bits) of the stores this pass should generate.  */
175 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
176 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177 
178 /* Limit to bound the number of aliasing checks for loads with the same
179    vuse as the corresponding store.  */
180 #define MAX_STORE_ALIAS_CHECKS 64
181 
182 namespace {
183 
184 struct bswap_stat
185 {
186   /* Number of hand-written 16-bit nop / bswaps found.  */
187   int found_16bit;
188 
189   /* Number of hand-written 32-bit nop / bswaps found.  */
190   int found_32bit;
191 
192   /* Number of hand-written 64-bit nop / bswaps found.  */
193   int found_64bit;
194 } nop_stats, bswap_stats;
195 
196 /* A symbolic number structure is used to detect byte permutation and selection
197    patterns of a source.  To achieve that, its field N contains an artificial
198    number consisting of BITS_PER_MARKER sized markers tracking where does each
199    byte come from in the source:
200 
201    0	   - target byte has the value 0
202    FF	   - target byte has an unknown value (eg. due to sign extension)
203    1..size - marker value is the byte index in the source (0 for lsb).
204 
205    To detect permutations on memory sources (arrays and structures), a symbolic
206    number is also associated:
207    - a base address BASE_ADDR and an OFFSET giving the address of the source;
208    - a range which gives the difference between the highest and lowest accessed
209      memory location to make such a symbolic number;
210    - the address SRC of the source element of lowest address as a convenience
211      to easily get BASE_ADDR + offset + lowest bytepos;
212    - number of expressions N_OPS bitwise ored together to represent
213      approximate cost of the computation.
214 
215    Note 1: the range is different from size as size reflects the size of the
216    type of the current expression.  For instance, for an array char a[],
217    (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218    (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219    time a range of 1.
220 
221    Note 2: for non-memory sources, range holds the same value as size.
222 
223    Note 3: SRC points to the SSA_NAME in case of non-memory source.  */
224 
225 struct symbolic_number {
226   uint64_t n;
227   tree type;
228   tree base_addr;
229   tree offset;
230   poly_int64_pod bytepos;
231   tree src;
232   tree alias_set;
233   tree vuse;
234   unsigned HOST_WIDE_INT range;
235   int n_ops;
236 };
237 
238 #define BITS_PER_MARKER 8
239 #define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240 #define MARKER_BYTE_UNKNOWN MARKER_MASK
241 #define HEAD_MARKER(n, size) \
242   ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243 
244 /* The number which the find_bswap_or_nop_1 result should match in
245    order to have a nop.  The number is masked according to the size of
246    the symbolic number before using it.  */
247 #define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248   (uint64_t)0x08070605 << 32 | 0x04030201)
249 
250 /* The number which the find_bswap_or_nop_1 result should match in
251    order to have a byte swap.  The number is masked according to the
252    size of the symbolic number before using it.  */
253 #define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254   (uint64_t)0x01020304 << 32 | 0x05060708)
255 
256 /* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257    number N.  Return false if the requested operation is not permitted
258    on a symbolic number.  */
259 
260 inline bool
do_shift_rotate(enum tree_code code,struct symbolic_number * n,int count)261 do_shift_rotate (enum tree_code code,
262 		 struct symbolic_number *n,
263 		 int count)
264 {
265   int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266   uint64_t head_marker;
267 
268   if (count < 0
269       || count >= TYPE_PRECISION (n->type)
270       || count % BITS_PER_UNIT != 0)
271     return false;
272   count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273 
274   /* Zero out the extra bits of N in order to avoid them being shifted
275      into the significant bits.  */
276   if (size < 64 / BITS_PER_MARKER)
277     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278 
279   switch (code)
280     {
281     case LSHIFT_EXPR:
282       n->n <<= count;
283       break;
284     case RSHIFT_EXPR:
285       head_marker = HEAD_MARKER (n->n, size);
286       n->n >>= count;
287       /* Arithmetic shift of signed type: result is dependent on the value.  */
288       if (!TYPE_UNSIGNED (n->type) && head_marker)
289 	for (i = 0; i < count / BITS_PER_MARKER; i++)
290 	  n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 		  << ((size - 1 - i) * BITS_PER_MARKER);
292       break;
293     case LROTATE_EXPR:
294       n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295       break;
296     case RROTATE_EXPR:
297       n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298       break;
299     default:
300       return false;
301     }
302   /* Zero unused bits for size.  */
303   if (size < 64 / BITS_PER_MARKER)
304     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305   return true;
306 }
307 
308 /* Perform sanity checking for the symbolic number N and the gimple
309    statement STMT.  */
310 
311 inline bool
verify_symbolic_number_p(struct symbolic_number * n,gimple * stmt)312 verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313 {
314   tree lhs_type;
315 
316   lhs_type = gimple_expr_type (stmt);
317 
318   if (TREE_CODE (lhs_type) != INTEGER_TYPE
319       && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
320     return false;
321 
322   if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323     return false;
324 
325   return true;
326 }
327 
328 /* Initialize the symbolic number N for the bswap pass from the base element
329    SRC manipulated by the bitwise OR expression.  */
330 
331 bool
init_symbolic_number(struct symbolic_number * n,tree src)332 init_symbolic_number (struct symbolic_number *n, tree src)
333 {
334   int size;
335 
336   if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337     return false;
338 
339   n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340   n->src = src;
341 
342   /* Set up the symbolic number N by setting each byte to a value between 1 and
343      the byte size of rhs1.  The highest order byte is set to n->size and the
344      lowest order byte to 1.  */
345   n->type = TREE_TYPE (src);
346   size = TYPE_PRECISION (n->type);
347   if (size % BITS_PER_UNIT != 0)
348     return false;
349   size /= BITS_PER_UNIT;
350   if (size > 64 / BITS_PER_MARKER)
351     return false;
352   n->range = size;
353   n->n = CMPNOP;
354   n->n_ops = 1;
355 
356   if (size < 64 / BITS_PER_MARKER)
357     n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358 
359   return true;
360 }
361 
362 /* Check if STMT might be a byte swap or a nop from a memory source and returns
363    the answer. If so, REF is that memory source and the base of the memory area
364    accessed and the offset of the access from that base are recorded in N.  */
365 
366 bool
find_bswap_or_nop_load(gimple * stmt,tree ref,struct symbolic_number * n)367 find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368 {
369   /* Leaf node is an array or component ref. Memorize its base and
370      offset from base to compare to other such leaf node.  */
371   poly_int64 bitsize, bitpos, bytepos;
372   machine_mode mode;
373   int unsignedp, reversep, volatilep;
374   tree offset, base_addr;
375 
376   /* Not prepared to handle PDP endian.  */
377   if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378     return false;
379 
380   if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381     return false;
382 
383   base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 				   &unsignedp, &reversep, &volatilep);
385 
386   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387     /* Do not rewrite TARGET_MEM_REF.  */
388     return false;
389   else if (TREE_CODE (base_addr) == MEM_REF)
390     {
391       poly_offset_int bit_offset = 0;
392       tree off = TREE_OPERAND (base_addr, 1);
393 
394       if (!integer_zerop (off))
395 	{
396 	  poly_offset_int boff = mem_ref_offset (base_addr);
397 	  boff <<= LOG2_BITS_PER_UNIT;
398 	  bit_offset += boff;
399 	}
400 
401       base_addr = TREE_OPERAND (base_addr, 0);
402 
403       /* Avoid returning a negative bitpos as this may wreak havoc later.  */
404       if (maybe_lt (bit_offset, 0))
405 	{
406 	  tree byte_offset = wide_int_to_tree
407 	    (sizetype, bits_to_bytes_round_down (bit_offset));
408 	  bit_offset = num_trailing_bits (bit_offset);
409 	  if (offset)
410 	    offset = size_binop (PLUS_EXPR, offset, byte_offset);
411 	  else
412 	    offset = byte_offset;
413 	}
414 
415       bitpos += bit_offset.force_shwi ();
416     }
417   else
418     base_addr = build_fold_addr_expr (base_addr);
419 
420   if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
421     return false;
422   if (!multiple_p (bitsize, BITS_PER_UNIT))
423     return false;
424   if (reversep)
425     return false;
426 
427   if (!init_symbolic_number (n, ref))
428     return false;
429   n->base_addr = base_addr;
430   n->offset = offset;
431   n->bytepos = bytepos;
432   n->alias_set = reference_alias_ptr_type (ref);
433   n->vuse = gimple_vuse (stmt);
434   return true;
435 }
436 
437 /* Compute the symbolic number N representing the result of a bitwise OR on 2
438    symbolic number N1 and N2 whose source statements are respectively
439    SOURCE_STMT1 and SOURCE_STMT2.  */
440 
441 gimple *
perform_symbolic_merge(gimple * source_stmt1,struct symbolic_number * n1,gimple * source_stmt2,struct symbolic_number * n2,struct symbolic_number * n)442 perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 			gimple *source_stmt2, struct symbolic_number *n2,
444 			struct symbolic_number *n)
445 {
446   int i, size;
447   uint64_t mask;
448   gimple *source_stmt;
449   struct symbolic_number *n_start;
450 
451   tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452   if (TREE_CODE (rhs1) == BIT_FIELD_REF
453       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454     rhs1 = TREE_OPERAND (rhs1, 0);
455   tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456   if (TREE_CODE (rhs2) == BIT_FIELD_REF
457       && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458     rhs2 = TREE_OPERAND (rhs2, 0);
459 
460   /* Sources are different, cancel bswap if they are not memory location with
461      the same base (array, structure, ...).  */
462   if (rhs1 != rhs2)
463     {
464       uint64_t inc;
465       HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
466       struct symbolic_number *toinc_n_ptr, *n_end;
467       basic_block bb1, bb2;
468 
469       if (!n1->base_addr || !n2->base_addr
470 	  || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 	return NULL;
472 
473       if (!n1->offset != !n2->offset
474 	  || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 	return NULL;
476 
477       start1 = 0;
478       if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 	return NULL;
480 
481       if (start1 < start2)
482 	{
483 	  n_start = n1;
484 	  start_sub = start2 - start1;
485 	}
486       else
487 	{
488 	  n_start = n2;
489 	  start_sub = start1 - start2;
490 	}
491 
492       bb1 = gimple_bb (source_stmt1);
493       bb2 = gimple_bb (source_stmt2);
494       if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 	source_stmt = source_stmt1;
496       else
497 	source_stmt = source_stmt2;
498 
499       /* Find the highest address at which a load is performed and
500 	 compute related info.  */
501       end1 = start1 + (n1->range - 1);
502       end2 = start2 + (n2->range - 1);
503       if (end1 < end2)
504 	{
505 	  end = end2;
506 	  end_sub = end2 - end1;
507 	}
508       else
509 	{
510 	  end = end1;
511 	  end_sub = end1 - end2;
512 	}
513       n_end = (end2 > end1) ? n2 : n1;
514 
515       /* Find symbolic number whose lsb is the most significant.  */
516       if (BYTES_BIG_ENDIAN)
517 	toinc_n_ptr = (n_end == n1) ? n2 : n1;
518       else
519 	toinc_n_ptr = (n_start == n1) ? n2 : n1;
520 
521       n->range = end - MIN (start1, start2) + 1;
522 
523       /* Check that the range of memory covered can be represented by
524 	 a symbolic number.  */
525       if (n->range > 64 / BITS_PER_MARKER)
526 	return NULL;
527 
528       /* Reinterpret byte marks in symbolic number holding the value of
529 	 bigger weight according to target endianness.  */
530       inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531       size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532       for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 	{
534 	  unsigned marker
535 	    = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 	  if (marker && marker != MARKER_BYTE_UNKNOWN)
537 	    toinc_n_ptr->n += inc;
538 	}
539     }
540   else
541     {
542       n->range = n1->range;
543       n_start = n1;
544       source_stmt = source_stmt1;
545     }
546 
547   if (!n1->alias_set
548       || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549     n->alias_set = n1->alias_set;
550   else
551     n->alias_set = ptr_type_node;
552   n->vuse = n_start->vuse;
553   n->base_addr = n_start->base_addr;
554   n->offset = n_start->offset;
555   n->src = n_start->src;
556   n->bytepos = n_start->bytepos;
557   n->type = n_start->type;
558   size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559 
560   for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561     {
562       uint64_t masked1, masked2;
563 
564       masked1 = n1->n & mask;
565       masked2 = n2->n & mask;
566       if (masked1 && masked2 && masked1 != masked2)
567 	return NULL;
568     }
569   n->n = n1->n | n2->n;
570   n->n_ops = n1->n_ops + n2->n_ops;
571 
572   return source_stmt;
573 }
574 
575 /* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576    the operation given by the rhs of STMT on the result.  If the operation
577    could successfully be executed the function returns a gimple stmt whose
578    rhs's first tree is the expression of the source operand and NULL
579    otherwise.  */
580 
581 gimple *
find_bswap_or_nop_1(gimple * stmt,struct symbolic_number * n,int limit)582 find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583 {
584   enum tree_code code;
585   tree rhs1, rhs2 = NULL;
586   gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587   enum gimple_rhs_class rhs_class;
588 
589   if (!limit || !is_gimple_assign (stmt))
590     return NULL;
591 
592   rhs1 = gimple_assign_rhs1 (stmt);
593 
594   if (find_bswap_or_nop_load (stmt, rhs1, n))
595     return stmt;
596 
597   /* Handle BIT_FIELD_REF.  */
598   if (TREE_CODE (rhs1) == BIT_FIELD_REF
599       && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600     {
601       unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602       unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603       if (bitpos % BITS_PER_UNIT == 0
604 	  && bitsize % BITS_PER_UNIT == 0
605 	  && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 	{
607 	  /* Handle big-endian bit numbering in BIT_FIELD_REF.  */
608 	  if (BYTES_BIG_ENDIAN)
609 	    bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610 
611 	  /* Shift.  */
612 	  if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 	    return NULL;
614 
615 	  /* Mask.  */
616 	  uint64_t mask = 0;
617 	  uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 	  for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 	       i++, tmp <<= BITS_PER_UNIT)
620 	    mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 	  n->n &= mask;
622 
623 	  /* Convert.  */
624 	  n->type = TREE_TYPE (rhs1);
625 	  if (!n->base_addr)
626 	    n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627 
628 	  return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 	}
630 
631       return NULL;
632     }
633 
634   if (TREE_CODE (rhs1) != SSA_NAME)
635     return NULL;
636 
637   code = gimple_assign_rhs_code (stmt);
638   rhs_class = gimple_assign_rhs_class (stmt);
639   rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640 
641   if (rhs_class == GIMPLE_BINARY_RHS)
642     rhs2 = gimple_assign_rhs2 (stmt);
643 
644   /* Handle unary rhs and binary rhs with integer constants as second
645      operand.  */
646 
647   if (rhs_class == GIMPLE_UNARY_RHS
648       || (rhs_class == GIMPLE_BINARY_RHS
649 	  && TREE_CODE (rhs2) == INTEGER_CST))
650     {
651       if (code != BIT_AND_EXPR
652 	  && code != LSHIFT_EXPR
653 	  && code != RSHIFT_EXPR
654 	  && code != LROTATE_EXPR
655 	  && code != RROTATE_EXPR
656 	  && !CONVERT_EXPR_CODE_P (code))
657 	return NULL;
658 
659       source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660 
661       /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 	 we have to initialize the symbolic number.  */
663       if (!source_stmt1)
664 	{
665 	  if (gimple_assign_load_p (stmt)
666 	      || !init_symbolic_number (n, rhs1))
667 	    return NULL;
668 	  source_stmt1 = stmt;
669 	}
670 
671       switch (code)
672 	{
673 	case BIT_AND_EXPR:
674 	  {
675 	    int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 	    uint64_t val = int_cst_value (rhs2), mask = 0;
677 	    uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678 
679 	    /* Only constants masking full bytes are allowed.  */
680 	    for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 	      if ((val & tmp) != 0 && (val & tmp) != tmp)
682 		return NULL;
683 	      else if (val & tmp)
684 		mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685 
686 	    n->n &= mask;
687 	  }
688 	  break;
689 	case LSHIFT_EXPR:
690 	case RSHIFT_EXPR:
691 	case LROTATE_EXPR:
692 	case RROTATE_EXPR:
693 	  if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 	    return NULL;
695 	  break;
696 	CASE_CONVERT:
697 	  {
698 	    int i, type_size, old_type_size;
699 	    tree type;
700 
701 	    type = gimple_expr_type (stmt);
702 	    type_size = TYPE_PRECISION (type);
703 	    if (type_size % BITS_PER_UNIT != 0)
704 	      return NULL;
705 	    type_size /= BITS_PER_UNIT;
706 	    if (type_size > 64 / BITS_PER_MARKER)
707 	      return NULL;
708 
709 	    /* Sign extension: result is dependent on the value.  */
710 	    old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 	    if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 		&& HEAD_MARKER (n->n, old_type_size))
713 	      for (i = 0; i < type_size - old_type_size; i++)
714 		n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 			<< ((type_size - 1 - i) * BITS_PER_MARKER);
716 
717 	    if (type_size < 64 / BITS_PER_MARKER)
718 	      {
719 		/* If STMT casts to a smaller type mask out the bits not
720 		   belonging to the target type.  */
721 		n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 	      }
723 	    n->type = type;
724 	    if (!n->base_addr)
725 	      n->range = type_size;
726 	  }
727 	  break;
728 	default:
729 	  return NULL;
730 	};
731       return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732     }
733 
734   /* Handle binary rhs.  */
735 
736   if (rhs_class == GIMPLE_BINARY_RHS)
737     {
738       struct symbolic_number n1, n2;
739       gimple *source_stmt, *source_stmt2;
740 
741       if (code != BIT_IOR_EXPR)
742 	return NULL;
743 
744       if (TREE_CODE (rhs2) != SSA_NAME)
745 	return NULL;
746 
747       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748 
749       switch (code)
750 	{
751 	case BIT_IOR_EXPR:
752 	  source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753 
754 	  if (!source_stmt1)
755 	    return NULL;
756 
757 	  source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758 
759 	  if (!source_stmt2)
760 	    return NULL;
761 
762 	  if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 	    return NULL;
764 
765 	  if (n1.vuse != n2.vuse)
766 	    return NULL;
767 
768 	  source_stmt
769 	    = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770 
771 	  if (!source_stmt)
772 	    return NULL;
773 
774 	  if (!verify_symbolic_number_p (n, stmt))
775 	    return NULL;
776 
777 	  break;
778 	default:
779 	  return NULL;
780 	}
781       return source_stmt;
782     }
783   return NULL;
784 }
785 
786 /* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787    *CMPXCHG, *CMPNOP and adjust *N.  */
788 
789 void
find_bswap_or_nop_finalize(struct symbolic_number * n,uint64_t * cmpxchg,uint64_t * cmpnop)790 find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 			    uint64_t *cmpnop)
792 {
793   unsigned rsize;
794   uint64_t tmpn, mask;
795 
796   /* The number which the find_bswap_or_nop_1 result should match in order
797      to have a full byte swap.  The number is shifted to the right
798      according to the size of the symbolic number before using it.  */
799   *cmpxchg = CMPXCHG;
800   *cmpnop = CMPNOP;
801 
802   /* Find real size of result (highest non-zero byte).  */
803   if (n->base_addr)
804     for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805   else
806     rsize = n->range;
807 
808   /* Zero out the bits corresponding to untouched bytes in original gimple
809      expression.  */
810   if (n->range < (int) sizeof (int64_t))
811     {
812       mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
813       *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814       *cmpnop &= mask;
815     }
816 
817   /* Zero out the bits corresponding to unused bytes in the result of the
818      gimple expression.  */
819   if (rsize < n->range)
820     {
821       if (BYTES_BIG_ENDIAN)
822 	{
823 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
824 	  *cmpxchg &= mask;
825 	  if (n->range - rsize == sizeof (int64_t))
826 	    *cmpnop = 0;
827 	  else
828 	    *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
829 	}
830       else
831 	{
832 	  mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
833 	  if (n->range - rsize == sizeof (int64_t))
834 	    *cmpxchg = 0;
835 	  else
836 	    *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
837 	  *cmpnop &= mask;
838 	}
839       n->range = rsize;
840     }
841 
842   n->range *= BITS_PER_UNIT;
843 }
844 
845 /* Check if STMT completes a bswap implementation or a read in a given
846    endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
847    accordingly.  It also sets N to represent the kind of operations
848    performed: size of the resulting expression and whether it works on
849    a memory source, and if so alias-set and vuse.  At last, the
850    function returns a stmt whose rhs's first tree is the source
851    expression.  */
852 
853 gimple *
find_bswap_or_nop(gimple * stmt,struct symbolic_number * n,bool * bswap)854 find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
855 {
856   /* The last parameter determines the depth search limit.  It usually
857      correlates directly to the number n of bytes to be touched.  We
858      increase that number by 2 * (log2(n) + 1) here in order to also
859      cover signed -> unsigned conversions of the src operand as can be seen
860      in libgcc, and for initial shift/and operation of the src operand.  */
861   int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
862   limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
863   gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
864 
865   if (!ins_stmt)
866     return NULL;
867 
868   uint64_t cmpxchg, cmpnop;
869   find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
870 
871   /* A complete byte swap should make the symbolic number to start with
872      the largest digit in the highest order byte. Unchanged symbolic
873      number indicates a read with same endianness as target architecture.  */
874   if (n->n == cmpnop)
875     *bswap = false;
876   else if (n->n == cmpxchg)
877     *bswap = true;
878   else
879     return NULL;
880 
881   /* Useless bit manipulation performed by code.  */
882   if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
883     return NULL;
884 
885   return ins_stmt;
886 }
887 
888 const pass_data pass_data_optimize_bswap =
889 {
890   GIMPLE_PASS, /* type */
891   "bswap", /* name */
892   OPTGROUP_NONE, /* optinfo_flags */
893   TV_NONE, /* tv_id */
894   PROP_ssa, /* properties_required */
895   0, /* properties_provided */
896   0, /* properties_destroyed */
897   0, /* todo_flags_start */
898   0, /* todo_flags_finish */
899 };
900 
901 class pass_optimize_bswap : public gimple_opt_pass
902 {
903 public:
pass_optimize_bswap(gcc::context * ctxt)904   pass_optimize_bswap (gcc::context *ctxt)
905     : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
906   {}
907 
908   /* opt_pass methods: */
gate(function *)909   virtual bool gate (function *)
910     {
911       return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
912     }
913 
914   virtual unsigned int execute (function *);
915 
916 }; // class pass_optimize_bswap
917 
918 /* Perform the bswap optimization: replace the expression computed in the rhs
919    of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
920    bswap, load or load + bswap expression.
921    Which of these alternatives replace the rhs is given by N->base_addr (non
922    null if a load is needed) and BSWAP.  The type, VUSE and set-alias of the
923    load to perform are also given in N while the builtin bswap invoke is given
924    in FNDEL.  Finally, if a load is involved, INS_STMT refers to one of the
925    load statements involved to construct the rhs in gsi_stmt (GSI) and
926    N->range gives the size of the rhs expression for maintaining some
927    statistics.
928 
929    Note that if the replacement involve a load and if gsi_stmt (GSI) is
930    non-NULL, that stmt is moved just after INS_STMT to do the load with the
931    same VUSE which can lead to gsi_stmt (GSI) changing of basic block.  */
932 
933 tree
bswap_replace(gimple_stmt_iterator gsi,gimple * ins_stmt,tree fndecl,tree bswap_type,tree load_type,struct symbolic_number * n,bool bswap)934 bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
935 	       tree bswap_type, tree load_type, struct symbolic_number *n,
936 	       bool bswap)
937 {
938   tree src, tmp, tgt = NULL_TREE;
939   gimple *bswap_stmt;
940 
941   gimple *cur_stmt = gsi_stmt (gsi);
942   src = n->src;
943   if (cur_stmt)
944     tgt = gimple_assign_lhs (cur_stmt);
945 
946   /* Need to load the value from memory first.  */
947   if (n->base_addr)
948     {
949       gimple_stmt_iterator gsi_ins = gsi;
950       if (ins_stmt)
951 	gsi_ins = gsi_for_stmt (ins_stmt);
952       tree addr_expr, addr_tmp, val_expr, val_tmp;
953       tree load_offset_ptr, aligned_load_type;
954       gimple *load_stmt;
955       unsigned align = get_object_alignment (src);
956       poly_int64 load_offset = 0;
957 
958       if (cur_stmt)
959 	{
960 	  basic_block ins_bb = gimple_bb (ins_stmt);
961 	  basic_block cur_bb = gimple_bb (cur_stmt);
962 	  if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
963 	    return NULL_TREE;
964 
965 	  /* Move cur_stmt just before one of the load of the original
966 	     to ensure it has the same VUSE.  See PR61517 for what could
967 	     go wrong.  */
968 	  if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
969 	    reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
970 	  gsi_move_before (&gsi, &gsi_ins);
971 	  gsi = gsi_for_stmt (cur_stmt);
972 	}
973       else
974 	gsi = gsi_ins;
975 
976       /* Compute address to load from and cast according to the size
977 	 of the load.  */
978       addr_expr = build_fold_addr_expr (src);
979       if (is_gimple_mem_ref_addr (addr_expr))
980 	addr_tmp = unshare_expr (addr_expr);
981       else
982 	{
983 	  addr_tmp = unshare_expr (n->base_addr);
984 	  if (!is_gimple_mem_ref_addr (addr_tmp))
985 	    addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
986 						   is_gimple_mem_ref_addr,
987 						   NULL_TREE, true,
988 						   GSI_SAME_STMT);
989 	  load_offset = n->bytepos;
990 	  if (n->offset)
991 	    {
992 	      tree off
993 		= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
994 					    true, NULL_TREE, true,
995 					    GSI_SAME_STMT);
996 	      gimple *stmt
997 		= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
998 				       POINTER_PLUS_EXPR, addr_tmp, off);
999 	      gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1000 	      addr_tmp = gimple_assign_lhs (stmt);
1001 	    }
1002 	}
1003 
1004       /* Perform the load.  */
1005       aligned_load_type = load_type;
1006       if (align < TYPE_ALIGN (load_type))
1007 	aligned_load_type = build_aligned_type (load_type, align);
1008       load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1009       val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1010 			      load_offset_ptr);
1011 
1012       if (!bswap)
1013 	{
1014 	  if (n->range == 16)
1015 	    nop_stats.found_16bit++;
1016 	  else if (n->range == 32)
1017 	    nop_stats.found_32bit++;
1018 	  else
1019 	    {
1020 	      gcc_assert (n->range == 64);
1021 	      nop_stats.found_64bit++;
1022 	    }
1023 
1024 	  /* Convert the result of load if necessary.  */
1025 	  if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
1026 	    {
1027 	      val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1028 					    "load_dst");
1029 	      load_stmt = gimple_build_assign (val_tmp, val_expr);
1030 	      gimple_set_vuse (load_stmt, n->vuse);
1031 	      gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1032 	      gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
1033 	      update_stmt (cur_stmt);
1034 	    }
1035 	  else if (cur_stmt)
1036 	    {
1037 	      gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1038 	      gimple_set_vuse (cur_stmt, n->vuse);
1039 	      update_stmt (cur_stmt);
1040 	    }
1041 	  else
1042 	    {
1043 	      tgt = make_ssa_name (load_type);
1044 	      cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1045 	      gimple_set_vuse (cur_stmt, n->vuse);
1046 	      gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
1047 	    }
1048 
1049 	  if (dump_file)
1050 	    {
1051 	      fprintf (dump_file,
1052 		       "%d bit load in target endianness found at: ",
1053 		       (int) n->range);
1054 	      print_gimple_stmt (dump_file, cur_stmt, 0);
1055 	    }
1056 	  return tgt;
1057 	}
1058       else
1059 	{
1060 	  val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1061 	  load_stmt = gimple_build_assign (val_tmp, val_expr);
1062 	  gimple_set_vuse (load_stmt, n->vuse);
1063 	  gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1064 	}
1065       src = val_tmp;
1066     }
1067   else if (!bswap)
1068     {
1069       gimple *g = NULL;
1070       if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
1071 	{
1072 	  if (!is_gimple_val (src))
1073 	    return NULL_TREE;
1074 	  g = gimple_build_assign (tgt, NOP_EXPR, src);
1075 	}
1076       else if (cur_stmt)
1077 	g = gimple_build_assign (tgt, src);
1078       else
1079 	tgt = src;
1080       if (n->range == 16)
1081 	nop_stats.found_16bit++;
1082       else if (n->range == 32)
1083 	nop_stats.found_32bit++;
1084       else
1085 	{
1086 	  gcc_assert (n->range == 64);
1087 	  nop_stats.found_64bit++;
1088 	}
1089       if (dump_file)
1090 	{
1091 	  fprintf (dump_file,
1092 		   "%d bit reshuffle in target endianness found at: ",
1093 		   (int) n->range);
1094 	  if (cur_stmt)
1095 	    print_gimple_stmt (dump_file, cur_stmt, 0);
1096 	  else
1097 	    {
1098 	      print_generic_expr (dump_file, tgt, TDF_NONE);
1099 	      fprintf (dump_file, "\n");
1100 	    }
1101 	}
1102       if (cur_stmt)
1103 	gsi_replace (&gsi, g, true);
1104       return tgt;
1105     }
1106   else if (TREE_CODE (src) == BIT_FIELD_REF)
1107     src = TREE_OPERAND (src, 0);
1108 
1109   if (n->range == 16)
1110     bswap_stats.found_16bit++;
1111   else if (n->range == 32)
1112     bswap_stats.found_32bit++;
1113   else
1114     {
1115       gcc_assert (n->range == 64);
1116       bswap_stats.found_64bit++;
1117     }
1118 
1119   tmp = src;
1120 
1121   /* Convert the src expression if necessary.  */
1122   if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1123     {
1124       gimple *convert_stmt;
1125 
1126       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1127       convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1128       gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1129     }
1130 
1131   /* Canonical form for 16 bit bswap is a rotate expression.  Only 16bit values
1132      are considered as rotation of 2N bit values by N bits is generally not
1133      equivalent to a bswap.  Consider for instance 0x01020304 r>> 16 which
1134      gives 0x03040102 while a bswap for that value is 0x04030201.  */
1135   if (bswap && n->range == 16)
1136     {
1137       tree count = build_int_cst (NULL, BITS_PER_UNIT);
1138       src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1139       bswap_stmt = gimple_build_assign (NULL, src);
1140     }
1141   else
1142     bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1143 
1144   if (tgt == NULL_TREE)
1145     tgt = make_ssa_name (bswap_type);
1146   tmp = tgt;
1147 
1148   /* Convert the result if necessary.  */
1149   if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1150     {
1151       gimple *convert_stmt;
1152 
1153       tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1154       convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1155       gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1156     }
1157 
1158   gimple_set_lhs (bswap_stmt, tmp);
1159 
1160   if (dump_file)
1161     {
1162       fprintf (dump_file, "%d bit bswap implementation found at: ",
1163 	       (int) n->range);
1164       if (cur_stmt)
1165 	print_gimple_stmt (dump_file, cur_stmt, 0);
1166       else
1167 	{
1168 	  print_generic_expr (dump_file, tgt, TDF_NONE);
1169 	  fprintf (dump_file, "\n");
1170 	}
1171     }
1172 
1173   if (cur_stmt)
1174     {
1175       gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1176       gsi_remove (&gsi, true);
1177     }
1178   else
1179     gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1180   return tgt;
1181 }
1182 
1183 /* Find manual byte swap implementations as well as load in a given
1184    endianness. Byte swaps are turned into a bswap builtin invokation
1185    while endian loads are converted to bswap builtin invokation or
1186    simple load according to the target endianness.  */
1187 
1188 unsigned int
execute(function * fun)1189 pass_optimize_bswap::execute (function *fun)
1190 {
1191   basic_block bb;
1192   bool bswap32_p, bswap64_p;
1193   bool changed = false;
1194   tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1195 
1196   bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1197 	       && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1198   bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1199 	       && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1200 		   || (bswap32_p && word_mode == SImode)));
1201 
1202   /* Determine the argument type of the builtins.  The code later on
1203      assumes that the return and argument type are the same.  */
1204   if (bswap32_p)
1205     {
1206       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1207       bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208     }
1209 
1210   if (bswap64_p)
1211     {
1212       tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1213       bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1214     }
1215 
1216   memset (&nop_stats, 0, sizeof (nop_stats));
1217   memset (&bswap_stats, 0, sizeof (bswap_stats));
1218   calculate_dominance_info (CDI_DOMINATORS);
1219 
1220   FOR_EACH_BB_FN (bb, fun)
1221     {
1222       gimple_stmt_iterator gsi;
1223 
1224       /* We do a reverse scan for bswap patterns to make sure we get the
1225 	 widest match. As bswap pattern matching doesn't handle previously
1226 	 inserted smaller bswap replacements as sub-patterns, the wider
1227 	 variant wouldn't be detected.  */
1228       for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1229 	{
1230 	  gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1231 	  tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1232 	  enum tree_code code;
1233 	  struct symbolic_number n;
1234 	  bool bswap;
1235 
1236 	  /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1237 	     might be moved to a different basic block by bswap_replace and gsi
1238 	     must not points to it if that's the case.  Moving the gsi_prev
1239 	     there make sure that gsi points to the statement previous to
1240 	     cur_stmt while still making sure that all statements are
1241 	     considered in this basic block.  */
1242 	  gsi_prev (&gsi);
1243 
1244 	  if (!is_gimple_assign (cur_stmt))
1245 	    continue;
1246 
1247 	  code = gimple_assign_rhs_code (cur_stmt);
1248 	  switch (code)
1249 	    {
1250 	    case LROTATE_EXPR:
1251 	    case RROTATE_EXPR:
1252 	      if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1253 		  || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1254 		     % BITS_PER_UNIT)
1255 		continue;
1256 	      /* Fall through.  */
1257 	    case BIT_IOR_EXPR:
1258 	      break;
1259 	    default:
1260 	      continue;
1261 	    }
1262 
1263 	  ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1264 
1265 	  if (!ins_stmt)
1266 	    continue;
1267 
1268 	  switch (n.range)
1269 	    {
1270 	    case 16:
1271 	      /* Already in canonical form, nothing to do.  */
1272 	      if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1273 		continue;
1274 	      load_type = bswap_type = uint16_type_node;
1275 	      break;
1276 	    case 32:
1277 	      load_type = uint32_type_node;
1278 	      if (bswap32_p)
1279 		{
1280 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1281 		  bswap_type = bswap32_type;
1282 		}
1283 	      break;
1284 	    case 64:
1285 	      load_type = uint64_type_node;
1286 	      if (bswap64_p)
1287 		{
1288 		  fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1289 		  bswap_type = bswap64_type;
1290 		}
1291 	      break;
1292 	    default:
1293 	      continue;
1294 	    }
1295 
1296 	  if (bswap && !fndecl && n.range != 16)
1297 	    continue;
1298 
1299 	  if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1300 			     bswap_type, load_type, &n, bswap))
1301 	    changed = true;
1302 	}
1303     }
1304 
1305   statistics_counter_event (fun, "16-bit nop implementations found",
1306 			    nop_stats.found_16bit);
1307   statistics_counter_event (fun, "32-bit nop implementations found",
1308 			    nop_stats.found_32bit);
1309   statistics_counter_event (fun, "64-bit nop implementations found",
1310 			    nop_stats.found_64bit);
1311   statistics_counter_event (fun, "16-bit bswap implementations found",
1312 			    bswap_stats.found_16bit);
1313   statistics_counter_event (fun, "32-bit bswap implementations found",
1314 			    bswap_stats.found_32bit);
1315   statistics_counter_event (fun, "64-bit bswap implementations found",
1316 			    bswap_stats.found_64bit);
1317 
1318   return (changed ? TODO_update_ssa : 0);
1319 }
1320 
1321 } // anon namespace
1322 
1323 gimple_opt_pass *
make_pass_optimize_bswap(gcc::context * ctxt)1324 make_pass_optimize_bswap (gcc::context *ctxt)
1325 {
1326   return new pass_optimize_bswap (ctxt);
1327 }
1328 
1329 namespace {
1330 
1331 /* Struct recording one operand for the store, which is either a constant,
1332    then VAL represents the constant and all the other fields are zero, or
1333    a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1334    and the other fields also reflect the memory load, or an SSA name, then
1335    VAL represents the SSA name and all the other fields are zero,  */
1336 
1337 class store_operand_info
1338 {
1339 public:
1340   tree val;
1341   tree base_addr;
1342   poly_uint64 bitsize;
1343   poly_uint64 bitpos;
1344   poly_uint64 bitregion_start;
1345   poly_uint64 bitregion_end;
1346   gimple *stmt;
1347   bool bit_not_p;
1348   store_operand_info ();
1349 };
1350 
store_operand_info()1351 store_operand_info::store_operand_info ()
1352   : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
1353     bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
1354 {
1355 }
1356 
1357 /* Struct recording the information about a single store of an immediate
1358    to memory.  These are created in the first phase and coalesced into
1359    merged_store_group objects in the second phase.  */
1360 
1361 class store_immediate_info
1362 {
1363 public:
1364   unsigned HOST_WIDE_INT bitsize;
1365   unsigned HOST_WIDE_INT bitpos;
1366   unsigned HOST_WIDE_INT bitregion_start;
1367   /* This is one past the last bit of the bit region.  */
1368   unsigned HOST_WIDE_INT bitregion_end;
1369   gimple *stmt;
1370   unsigned int order;
1371   /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1372      BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1373      for bit insertion.
1374      LROTATE_EXPR if it can be only bswap optimized and
1375      ops are not really meaningful.
1376      NOP_EXPR if bswap optimization detected identity, ops
1377      are not meaningful.  */
1378   enum tree_code rhs_code;
1379   /* Two fields for bswap optimization purposes.  */
1380   struct symbolic_number n;
1381   gimple *ins_stmt;
1382   /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing.  */
1383   bool bit_not_p;
1384   /* True if ops have been swapped and thus ops[1] represents
1385      rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2.  */
1386   bool ops_swapped_p;
1387   /* The index number of the landing pad, or 0 if there is none.  */
1388   int lp_nr;
1389   /* Operands.  For BIT_*_EXPR rhs_code both operands are used, otherwise
1390      just the first one.  */
1391   store_operand_info ops[2];
1392   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1393 			unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1394 			gimple *, unsigned int, enum tree_code,
1395 			struct symbolic_number &, gimple *, bool, int,
1396 			const store_operand_info &,
1397 			const store_operand_info &);
1398 };
1399 
store_immediate_info(unsigned HOST_WIDE_INT bs,unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT brs,unsigned HOST_WIDE_INT bre,gimple * st,unsigned int ord,enum tree_code rhscode,struct symbolic_number & nr,gimple * ins_stmtp,bool bitnotp,int nr2,const store_operand_info & op0r,const store_operand_info & op1r)1400 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
1401 					    unsigned HOST_WIDE_INT bp,
1402 					    unsigned HOST_WIDE_INT brs,
1403 					    unsigned HOST_WIDE_INT bre,
1404 					    gimple *st,
1405 					    unsigned int ord,
1406 					    enum tree_code rhscode,
1407 					    struct symbolic_number &nr,
1408 					    gimple *ins_stmtp,
1409 					    bool bitnotp,
1410 					    int nr2,
1411 					    const store_operand_info &op0r,
1412 					    const store_operand_info &op1r)
1413   : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
1414     stmt (st), order (ord), rhs_code (rhscode), n (nr),
1415     ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1416     lp_nr (nr2)
1417 #if __cplusplus >= 201103L
1418     , ops { op0r, op1r }
1419 {
1420 }
1421 #else
1422 {
1423   ops[0] = op0r;
1424   ops[1] = op1r;
1425 }
1426 #endif
1427 
1428 /* Struct representing a group of stores to contiguous memory locations.
1429    These are produced by the second phase (coalescing) and consumed in the
1430    third phase that outputs the widened stores.  */
1431 
1432 class merged_store_group
1433 {
1434 public:
1435   unsigned HOST_WIDE_INT start;
1436   unsigned HOST_WIDE_INT width;
1437   unsigned HOST_WIDE_INT bitregion_start;
1438   unsigned HOST_WIDE_INT bitregion_end;
1439   /* The size of the allocated memory for val and mask.  */
1440   unsigned HOST_WIDE_INT buf_size;
1441   unsigned HOST_WIDE_INT align_base;
1442   poly_uint64 load_align_base[2];
1443 
1444   unsigned int align;
1445   unsigned int load_align[2];
1446   unsigned int first_order;
1447   unsigned int last_order;
1448   bool bit_insertion;
1449   bool only_constants;
1450   unsigned int first_nonmergeable_order;
1451   int lp_nr;
1452 
1453   auto_vec<store_immediate_info *> stores;
1454   /* We record the first and last original statements in the sequence because
1455      we'll need their vuse/vdef and replacement position.  It's easier to keep
1456      track of them separately as 'stores' is reordered by apply_stores.  */
1457   gimple *last_stmt;
1458   gimple *first_stmt;
1459   unsigned char *val;
1460   unsigned char *mask;
1461 
1462   merged_store_group (store_immediate_info *);
1463   ~merged_store_group ();
1464   bool can_be_merged_into (store_immediate_info *);
1465   void merge_into (store_immediate_info *);
1466   void merge_overlapping (store_immediate_info *);
1467   bool apply_stores ();
1468 private:
1469   void do_merge (store_immediate_info *);
1470 };
1471 
1472 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
1473 
1474 static void
dump_char_array(FILE * fd,unsigned char * ptr,unsigned int len)1475 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1476 {
1477   if (!fd)
1478     return;
1479 
1480   for (unsigned int i = 0; i < len; i++)
1481     fprintf (fd, "%02x ", ptr[i]);
1482   fprintf (fd, "\n");
1483 }
1484 
1485 /* Clear out LEN bits starting from bit START in the byte array
1486    PTR.  This clears the bits to the *right* from START.
1487    START must be within [0, BITS_PER_UNIT) and counts starting from
1488    the least significant bit.  */
1489 
1490 static void
clear_bit_region_be(unsigned char * ptr,unsigned int start,unsigned int len)1491 clear_bit_region_be (unsigned char *ptr, unsigned int start,
1492 		     unsigned int len)
1493 {
1494   if (len == 0)
1495     return;
1496   /* Clear len bits to the right of start.  */
1497   else if (len <= start + 1)
1498     {
1499       unsigned char mask = (~(~0U << len));
1500       mask = mask << (start + 1U - len);
1501       ptr[0] &= ~mask;
1502     }
1503   else if (start != BITS_PER_UNIT - 1)
1504     {
1505       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1506       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1507 			   len - (start % BITS_PER_UNIT) - 1);
1508     }
1509   else if (start == BITS_PER_UNIT - 1
1510 	   && len > BITS_PER_UNIT)
1511     {
1512       unsigned int nbytes = len / BITS_PER_UNIT;
1513       memset (ptr, 0, nbytes);
1514       if (len % BITS_PER_UNIT != 0)
1515 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1516 			     len % BITS_PER_UNIT);
1517     }
1518   else
1519     gcc_unreachable ();
1520 }
1521 
1522 /* In the byte array PTR clear the bit region starting at bit
1523    START and is LEN bits wide.
1524    For regions spanning multiple bytes do this recursively until we reach
1525    zero LEN or a region contained within a single byte.  */
1526 
1527 static void
clear_bit_region(unsigned char * ptr,unsigned int start,unsigned int len)1528 clear_bit_region (unsigned char *ptr, unsigned int start,
1529 		  unsigned int len)
1530 {
1531   /* Degenerate base case.  */
1532   if (len == 0)
1533     return;
1534   else if (start >= BITS_PER_UNIT)
1535     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1536   /* Second base case.  */
1537   else if ((start + len) <= BITS_PER_UNIT)
1538     {
1539       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
1540       mask >>= BITS_PER_UNIT - (start + len);
1541 
1542       ptr[0] &= ~mask;
1543 
1544       return;
1545     }
1546   /* Clear most significant bits in a byte and proceed with the next byte.  */
1547   else if (start != 0)
1548     {
1549       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1550       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
1551     }
1552   /* Whole bytes need to be cleared.  */
1553   else if (start == 0 && len > BITS_PER_UNIT)
1554     {
1555       unsigned int nbytes = len / BITS_PER_UNIT;
1556       /* We could recurse on each byte but we clear whole bytes, so a simple
1557 	 memset will do.  */
1558       memset (ptr, '\0', nbytes);
1559       /* Clear the remaining sub-byte region if there is one.  */
1560       if (len % BITS_PER_UNIT != 0)
1561 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1562     }
1563   else
1564     gcc_unreachable ();
1565 }
1566 
1567 /* Write BITLEN bits of EXPR to the byte array PTR at
1568    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
1569    Return true if the operation succeeded.  */
1570 
1571 static bool
encode_tree_to_bitpos(tree expr,unsigned char * ptr,int bitlen,int bitpos,unsigned int total_bytes)1572 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
1573 		       unsigned int total_bytes)
1574 {
1575   unsigned int first_byte = bitpos / BITS_PER_UNIT;
1576   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1577 			|| (bitpos % BITS_PER_UNIT)
1578 			|| !int_mode_for_size (bitlen, 0).exists ());
1579   bool empty_ctor_p
1580     = (TREE_CODE (expr) == CONSTRUCTOR
1581        && CONSTRUCTOR_NELTS (expr) == 0
1582        && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1583 		       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
1584 
1585   if (!sub_byte_op_p)
1586     {
1587       if (first_byte >= total_bytes)
1588 	return false;
1589       total_bytes -= first_byte;
1590       if (empty_ctor_p)
1591 	{
1592 	  unsigned HOST_WIDE_INT rhs_bytes
1593 	    = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1594 	  if (rhs_bytes > total_bytes)
1595 	    return false;
1596 	  memset (ptr + first_byte, '\0', rhs_bytes);
1597 	  return true;
1598 	}
1599       return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1600     }
1601 
1602   /* LITTLE-ENDIAN
1603      We are writing a non byte-sized quantity or at a position that is not
1604      at a byte boundary.
1605      |--------|--------|--------| ptr + first_byte
1606            ^              ^
1607            xxx xxxxxxxx xxx< bp>
1608            |______EXPR____|
1609 
1610      First native_encode_expr EXPR into a temporary buffer and shift each
1611      byte in the buffer by 'bp' (carrying the bits over as necessary).
1612      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1613                                               <------bitlen---->< bp>
1614     Then we clear the destination bits:
1615     |---00000|00000000|000-----| ptr + first_byte
1616         <-------bitlen--->< bp>
1617 
1618     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1619     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1620 
1621    BIG-ENDIAN
1622    We are writing a non byte-sized quantity or at a position that is not
1623    at a byte boundary.
1624      ptr + first_byte |--------|--------|--------|
1625                             ^              ^
1626                        <bp >xxx xxxxxxxx xxx
1627                             |_____EXPR_____|
1628 
1629      First native_encode_expr EXPR into a temporary buffer and shift each
1630      byte in the buffer to the right by (carrying the bits over as necessary).
1631      We shift by as much as needed to align the most significant bit of EXPR
1632      with bitpos:
1633      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1634         <---bitlen---->          <bp ><-----bitlen----->
1635     Then we clear the destination bits:
1636     ptr + first_byte |-----000||00000000||00000---|
1637                       <bp ><-------bitlen----->
1638 
1639     Finally we ORR the bytes of the shifted EXPR into the cleared region:
1640     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1641     The awkwardness comes from the fact that bitpos is counted from the
1642     most significant bit of a byte.  */
1643 
1644   /* We must be dealing with fixed-size data at this point, since the
1645      total size is also fixed.  */
1646   unsigned int byte_size;
1647   if (empty_ctor_p)
1648     {
1649       unsigned HOST_WIDE_INT rhs_bytes
1650 	= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1651       if (rhs_bytes > total_bytes)
1652 	return false;
1653       byte_size = rhs_bytes;
1654     }
1655   else
1656     {
1657       fixed_size_mode mode
1658 	= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1659       byte_size = GET_MODE_SIZE (mode);
1660     }
1661   /* Allocate an extra byte so that we have space to shift into.  */
1662   byte_size++;
1663   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
1664   memset (tmpbuf, '\0', byte_size);
1665   /* The store detection code should only have allowed constants that are
1666      accepted by native_encode_expr or empty ctors.  */
1667   if (!empty_ctor_p
1668       && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
1669     gcc_unreachable ();
1670 
1671   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1672      bytes to write.  This means it can write more than
1673      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1674      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
1675      bitlen and zero out the bits that are not relevant as well (that may
1676      contain a sign bit due to sign-extension).  */
1677   unsigned int padding
1678     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
1679   /* On big-endian the padding is at the 'front' so just skip the initial
1680      bytes.  */
1681   if (BYTES_BIG_ENDIAN)
1682     tmpbuf += padding;
1683 
1684   byte_size -= padding;
1685 
1686   if (bitlen % BITS_PER_UNIT != 0)
1687     {
1688       if (BYTES_BIG_ENDIAN)
1689 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1690 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1691       else
1692 	clear_bit_region (tmpbuf, bitlen,
1693 			  byte_size * BITS_PER_UNIT - bitlen);
1694     }
1695   /* Left shifting relies on the last byte being clear if bitlen is
1696      a multiple of BITS_PER_UNIT, which might not be clear if
1697      there are padding bytes.  */
1698   else if (!BYTES_BIG_ENDIAN)
1699     tmpbuf[byte_size - 1] = '\0';
1700 
1701   /* Clear the bit region in PTR where the bits from TMPBUF will be
1702      inserted into.  */
1703   if (BYTES_BIG_ENDIAN)
1704     clear_bit_region_be (ptr + first_byte,
1705 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1706   else
1707     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1708 
1709   int shift_amnt;
1710   int bitlen_mod = bitlen % BITS_PER_UNIT;
1711   int bitpos_mod = bitpos % BITS_PER_UNIT;
1712 
1713   bool skip_byte = false;
1714   if (BYTES_BIG_ENDIAN)
1715     {
1716       /* BITPOS and BITLEN are exactly aligned and no shifting
1717 	 is necessary.  */
1718       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1719 	  || (bitpos_mod == 0 && bitlen_mod == 0))
1720 	shift_amnt = 0;
1721       /* |. . . . . . . .|
1722 	  <bp >   <blen >.
1723 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1724 	 of the value until it aligns with 'bp' in the next byte over.  */
1725       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1726 	{
1727 	  shift_amnt = bitlen_mod + bitpos_mod;
1728 	  skip_byte = bitlen_mod != 0;
1729 	}
1730       /* |. . . . . . . .|
1731 	  <----bp--->
1732 	    <---blen---->.
1733 	 Shift the value right within the same byte so it aligns with 'bp'.  */
1734       else
1735 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1736     }
1737   else
1738     shift_amnt = bitpos % BITS_PER_UNIT;
1739 
1740   /* Create the shifted version of EXPR.  */
1741   if (!BYTES_BIG_ENDIAN)
1742     {
1743       shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
1744       if (shift_amnt == 0)
1745 	byte_size--;
1746     }
1747   else
1748     {
1749       gcc_assert (BYTES_BIG_ENDIAN);
1750       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1751       /* If shifting right forced us to move into the next byte skip the now
1752 	 empty byte.  */
1753       if (skip_byte)
1754 	{
1755 	  tmpbuf++;
1756 	  byte_size--;
1757 	}
1758     }
1759 
1760   /* Insert the bits from TMPBUF.  */
1761   for (unsigned int i = 0; i < byte_size; i++)
1762     ptr[first_byte + i] |= tmpbuf[i];
1763 
1764   return true;
1765 }
1766 
1767 /* Sorting function for store_immediate_info objects.
1768    Sorts them by bitposition.  */
1769 
1770 static int
sort_by_bitpos(const void * x,const void * y)1771 sort_by_bitpos (const void *x, const void *y)
1772 {
1773   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1774   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1775 
1776   if ((*tmp)->bitpos < (*tmp2)->bitpos)
1777     return -1;
1778   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1779     return 1;
1780   else
1781     /* If they are the same let's use the order which is guaranteed to
1782        be different.  */
1783     return (*tmp)->order - (*tmp2)->order;
1784 }
1785 
1786 /* Sorting function for store_immediate_info objects.
1787    Sorts them by the order field.  */
1788 
1789 static int
sort_by_order(const void * x,const void * y)1790 sort_by_order (const void *x, const void *y)
1791 {
1792   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1793   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1794 
1795   if ((*tmp)->order < (*tmp2)->order)
1796     return -1;
1797   else if ((*tmp)->order > (*tmp2)->order)
1798     return 1;
1799 
1800   gcc_unreachable ();
1801 }
1802 
1803 /* Initialize a merged_store_group object from a store_immediate_info
1804    object.  */
1805 
merged_store_group(store_immediate_info * info)1806 merged_store_group::merged_store_group (store_immediate_info *info)
1807 {
1808   start = info->bitpos;
1809   width = info->bitsize;
1810   bitregion_start = info->bitregion_start;
1811   bitregion_end = info->bitregion_end;
1812   /* VAL has memory allocated for it in apply_stores once the group
1813      width has been finalized.  */
1814   val = NULL;
1815   mask = NULL;
1816   bit_insertion = false;
1817   only_constants = info->rhs_code == INTEGER_CST;
1818   first_nonmergeable_order = ~0U;
1819   lp_nr = info->lp_nr;
1820   unsigned HOST_WIDE_INT align_bitpos = 0;
1821   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1822 			  &align, &align_bitpos);
1823   align_base = start - align_bitpos;
1824   for (int i = 0; i < 2; ++i)
1825     {
1826       store_operand_info &op = info->ops[i];
1827       if (op.base_addr == NULL_TREE)
1828 	{
1829 	  load_align[i] = 0;
1830 	  load_align_base[i] = 0;
1831 	}
1832       else
1833 	{
1834 	  get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1835 	  load_align_base[i] = op.bitpos - align_bitpos;
1836 	}
1837     }
1838   stores.create (1);
1839   stores.safe_push (info);
1840   last_stmt = info->stmt;
1841   last_order = info->order;
1842   first_stmt = last_stmt;
1843   first_order = last_order;
1844   buf_size = 0;
1845 }
1846 
~merged_store_group()1847 merged_store_group::~merged_store_group ()
1848 {
1849   if (val)
1850     XDELETEVEC (val);
1851 }
1852 
1853 /* Return true if the store described by INFO can be merged into the group.  */
1854 
1855 bool
can_be_merged_into(store_immediate_info * info)1856 merged_store_group::can_be_merged_into (store_immediate_info *info)
1857 {
1858   /* Do not merge bswap patterns.  */
1859   if (info->rhs_code == LROTATE_EXPR)
1860     return false;
1861 
1862   if (info->lp_nr != lp_nr)
1863     return false;
1864 
1865   /* The canonical case.  */
1866   if (info->rhs_code == stores[0]->rhs_code)
1867     return true;
1868 
1869   /* BIT_INSERT_EXPR is compatible with INTEGER_CST.  */
1870   if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1871     return true;
1872 
1873   if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1874     return true;
1875 
1876   /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
1877   if (info->rhs_code == MEM_REF
1878       && (stores[0]->rhs_code == INTEGER_CST
1879 	  || stores[0]->rhs_code == BIT_INSERT_EXPR)
1880       && info->bitregion_start == stores[0]->bitregion_start
1881       && info->bitregion_end == stores[0]->bitregion_end)
1882     return true;
1883 
1884   if (stores[0]->rhs_code == MEM_REF
1885       && (info->rhs_code == INTEGER_CST
1886 	  || info->rhs_code == BIT_INSERT_EXPR)
1887       && info->bitregion_start == stores[0]->bitregion_start
1888       && info->bitregion_end == stores[0]->bitregion_end)
1889     return true;
1890 
1891   return false;
1892 }
1893 
1894 /* Helper method for merge_into and merge_overlapping to do
1895    the common part.  */
1896 
1897 void
do_merge(store_immediate_info * info)1898 merged_store_group::do_merge (store_immediate_info *info)
1899 {
1900   bitregion_start = MIN (bitregion_start, info->bitregion_start);
1901   bitregion_end = MAX (bitregion_end, info->bitregion_end);
1902 
1903   unsigned int this_align;
1904   unsigned HOST_WIDE_INT align_bitpos = 0;
1905   get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1906 			  &this_align, &align_bitpos);
1907   if (this_align > align)
1908     {
1909       align = this_align;
1910       align_base = info->bitpos - align_bitpos;
1911     }
1912   for (int i = 0; i < 2; ++i)
1913     {
1914       store_operand_info &op = info->ops[i];
1915       if (!op.base_addr)
1916 	continue;
1917 
1918       get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1919       if (this_align > load_align[i])
1920 	{
1921 	  load_align[i] = this_align;
1922 	  load_align_base[i] = op.bitpos - align_bitpos;
1923 	}
1924     }
1925 
1926   gimple *stmt = info->stmt;
1927   stores.safe_push (info);
1928   if (info->order > last_order)
1929     {
1930       last_order = info->order;
1931       last_stmt = stmt;
1932     }
1933   else if (info->order < first_order)
1934     {
1935       first_order = info->order;
1936       first_stmt = stmt;
1937     }
1938   if (info->rhs_code != INTEGER_CST)
1939     only_constants = false;
1940 }
1941 
1942 /* Merge a store recorded by INFO into this merged store.
1943    The store is not overlapping with the existing recorded
1944    stores.  */
1945 
1946 void
merge_into(store_immediate_info * info)1947 merged_store_group::merge_into (store_immediate_info *info)
1948 {
1949   /* Make sure we're inserting in the position we think we're inserting.  */
1950   gcc_assert (info->bitpos >= start + width
1951 	      && info->bitregion_start <= bitregion_end);
1952 
1953   width = info->bitpos + info->bitsize - start;
1954   do_merge (info);
1955 }
1956 
1957 /* Merge a store described by INFO into this merged store.
1958    INFO overlaps in some way with the current store (i.e. it's not contiguous
1959    which is handled by merged_store_group::merge_into).  */
1960 
1961 void
merge_overlapping(store_immediate_info * info)1962 merged_store_group::merge_overlapping (store_immediate_info *info)
1963 {
1964   /* If the store extends the size of the group, extend the width.  */
1965   if (info->bitpos + info->bitsize > start + width)
1966     width = info->bitpos + info->bitsize - start;
1967 
1968   do_merge (info);
1969 }
1970 
1971 /* Go through all the recorded stores in this group in program order and
1972    apply their values to the VAL byte array to create the final merged
1973    value.  Return true if the operation succeeded.  */
1974 
1975 bool
apply_stores()1976 merged_store_group::apply_stores ()
1977 {
1978   /* Make sure we have more than one store in the group, otherwise we cannot
1979      merge anything.  */
1980   if (bitregion_start % BITS_PER_UNIT != 0
1981       || bitregion_end % BITS_PER_UNIT != 0
1982       || stores.length () == 1)
1983     return false;
1984 
1985   stores.qsort (sort_by_order);
1986   store_immediate_info *info;
1987   unsigned int i;
1988   /* Create a power-of-2-sized buffer for native_encode_expr.  */
1989   buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1990   val = XNEWVEC (unsigned char, 2 * buf_size);
1991   mask = val + buf_size;
1992   memset (val, 0, buf_size);
1993   memset (mask, ~0U, buf_size);
1994 
1995   FOR_EACH_VEC_ELT (stores, i, info)
1996     {
1997       unsigned int pos_in_buffer = info->bitpos - bitregion_start;
1998       tree cst;
1999       if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2000 	cst = info->ops[0].val;
2001       else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2002 	cst = info->ops[1].val;
2003       else
2004 	cst = NULL_TREE;
2005       bool ret = true;
2006       if (cst)
2007 	{
2008 	  if (info->rhs_code == BIT_INSERT_EXPR)
2009 	    bit_insertion = true;
2010 	  else
2011 	    ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2012 					 pos_in_buffer, buf_size);
2013 	}
2014       unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2015       if (BYTES_BIG_ENDIAN)
2016 	clear_bit_region_be (m, (BITS_PER_UNIT - 1
2017 				 - (pos_in_buffer % BITS_PER_UNIT)),
2018 			     info->bitsize);
2019       else
2020 	clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
2021       if (cst && dump_file && (dump_flags & TDF_DETAILS))
2022 	{
2023 	  if (ret)
2024 	    {
2025 	      fputs ("After writing ", dump_file);
2026 	      print_generic_expr (dump_file, cst, TDF_NONE);
2027 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
2028 		       " at position %d\n", info->bitsize, pos_in_buffer);
2029 	      fputs ("  the merged value contains ", dump_file);
2030 	      dump_char_array (dump_file, val, buf_size);
2031 	      fputs ("  the merged mask contains  ", dump_file);
2032 	      dump_char_array (dump_file, mask, buf_size);
2033 	      if (bit_insertion)
2034 		fputs ("  bit insertion is required\n", dump_file);
2035 	    }
2036 	  else
2037 	    fprintf (dump_file, "Failed to merge stores\n");
2038 	}
2039       if (!ret)
2040 	return false;
2041     }
2042   stores.qsort (sort_by_bitpos);
2043   return true;
2044 }
2045 
2046 /* Structure describing the store chain.  */
2047 
2048 class imm_store_chain_info
2049 {
2050 public:
2051   /* Doubly-linked list that imposes an order on chain processing.
2052      PNXP (prev's next pointer) points to the head of a list, or to
2053      the next field in the previous chain in the list.
2054      See pass_store_merging::m_stores_head for more rationale.  */
2055   imm_store_chain_info *next, **pnxp;
2056   tree base_addr;
2057   auto_vec<store_immediate_info *> m_store_info;
2058   auto_vec<merged_store_group *> m_merged_store_groups;
2059 
imm_store_chain_info(imm_store_chain_info * & inspt,tree b_a)2060   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2061   : next (inspt), pnxp (&inspt), base_addr (b_a)
2062   {
2063     inspt = this;
2064     if (next)
2065       {
2066 	gcc_checking_assert (pnxp == next->pnxp);
2067 	next->pnxp = &next;
2068       }
2069   }
~imm_store_chain_info()2070   ~imm_store_chain_info ()
2071   {
2072     *pnxp = next;
2073     if (next)
2074       {
2075 	gcc_checking_assert (&next == next->pnxp);
2076 	next->pnxp = pnxp;
2077       }
2078   }
2079   bool terminate_and_process_chain ();
2080   bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2081 			   unsigned int);
2082   bool coalesce_immediate_stores ();
2083   bool output_merged_store (merged_store_group *);
2084   bool output_merged_stores ();
2085 };
2086 
2087 const pass_data pass_data_tree_store_merging = {
2088   GIMPLE_PASS,     /* type */
2089   "store-merging", /* name */
2090   OPTGROUP_NONE,   /* optinfo_flags */
2091   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
2092   PROP_ssa,	/* properties_required */
2093   0,		   /* properties_provided */
2094   0,		   /* properties_destroyed */
2095   0,		   /* todo_flags_start */
2096   TODO_update_ssa, /* todo_flags_finish */
2097 };
2098 
2099 class pass_store_merging : public gimple_opt_pass
2100 {
2101 public:
pass_store_merging(gcc::context * ctxt)2102   pass_store_merging (gcc::context *ctxt)
2103     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
2104   {
2105   }
2106 
2107   /* Pass not supported for PDP-endian, nor for insane hosts or
2108      target character sizes where native_{encode,interpret}_expr
2109      doesn't work properly.  */
2110   virtual bool
gate(function *)2111   gate (function *)
2112   {
2113     return flag_store_merging
2114 	   && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
2115 	   && CHAR_BIT == 8
2116 	   && BITS_PER_UNIT == 8;
2117   }
2118 
2119   virtual unsigned int execute (function *);
2120 
2121 private:
2122   hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
2123 
2124   /* Form a doubly-linked stack of the elements of m_stores, so that
2125      we can iterate over them in a predictable way.  Using this order
2126      avoids extraneous differences in the compiler output just because
2127      of tree pointer variations (e.g. different chains end up in
2128      different positions of m_stores, so they are handled in different
2129      orders, so they allocate or release SSA names in different
2130      orders, and when they get reused, subsequent passes end up
2131      getting different SSA names, which may ultimately change
2132      decisions when going out of SSA).  */
2133   imm_store_chain_info *m_stores_head;
2134 
2135   bool process_store (gimple *);
2136   bool terminate_and_process_chain (imm_store_chain_info *);
2137   bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
2138   bool terminate_and_process_all_chains ();
2139 }; // class pass_store_merging
2140 
2141 /* Terminate and process all recorded chains.  Return true if any changes
2142    were made.  */
2143 
2144 bool
terminate_and_process_all_chains()2145 pass_store_merging::terminate_and_process_all_chains ()
2146 {
2147   bool ret = false;
2148   while (m_stores_head)
2149     ret |= terminate_and_process_chain (m_stores_head);
2150   gcc_assert (m_stores.is_empty ());
2151   return ret;
2152 }
2153 
2154 /* Terminate all chains that are affected by the statement STMT.
2155    CHAIN_INFO is the chain we should ignore from the checks if
2156    non-NULL.  Return true if any changes were made.  */
2157 
2158 bool
terminate_all_aliasing_chains(imm_store_chain_info ** chain_info,gimple * stmt)2159 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
2160 						     **chain_info,
2161 						   gimple *stmt)
2162 {
2163   bool ret = false;
2164 
2165   /* If the statement doesn't touch memory it can't alias.  */
2166   if (!gimple_vuse (stmt))
2167     return false;
2168 
2169   tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
2170   ao_ref store_lhs_ref;
2171   ao_ref_init (&store_lhs_ref, store_lhs);
2172   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
2173     {
2174       next = cur->next;
2175 
2176       /* We already checked all the stores in chain_info and terminated the
2177 	 chain if necessary.  Skip it here.  */
2178       if (chain_info && *chain_info == cur)
2179 	continue;
2180 
2181       store_immediate_info *info;
2182       unsigned int i;
2183       FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
2184 	{
2185 	  tree lhs = gimple_assign_lhs (info->stmt);
2186 	  ao_ref lhs_ref;
2187 	  ao_ref_init (&lhs_ref, lhs);
2188 	  if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2189 	      || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2190 	      || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2191 						   &lhs_ref, false)))
2192 	    {
2193 	      if (dump_file && (dump_flags & TDF_DETAILS))
2194 		{
2195 		  fprintf (dump_file, "stmt causes chain termination:\n");
2196 		  print_gimple_stmt (dump_file, stmt, 0);
2197 		}
2198 	      ret |= terminate_and_process_chain (cur);
2199 	      break;
2200 	    }
2201 	}
2202     }
2203 
2204   return ret;
2205 }
2206 
2207 /* Helper function.  Terminate the recorded chain storing to base object
2208    BASE.  Return true if the merging and output was successful.  The m_stores
2209    entry is removed after the processing in any case.  */
2210 
2211 bool
terminate_and_process_chain(imm_store_chain_info * chain_info)2212 pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
2213 {
2214   bool ret = chain_info->terminate_and_process_chain ();
2215   m_stores.remove (chain_info->base_addr);
2216   delete chain_info;
2217   return ret;
2218 }
2219 
2220 /* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2221    may clobber REF.  FIRST and LAST must have non-NULL vdef.  We want to
2222    be able to sink load of REF across stores between FIRST and LAST, up
2223    to right before LAST.  */
2224 
2225 bool
stmts_may_clobber_ref_p(gimple * first,gimple * last,tree ref)2226 stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2227 {
2228   ao_ref r;
2229   ao_ref_init (&r, ref);
2230   unsigned int count = 0;
2231   tree vop = gimple_vdef (last);
2232   gimple *stmt;
2233 
2234   /* Return true conservatively if the basic blocks are different.  */
2235   if (gimple_bb (first) != gimple_bb (last))
2236     return true;
2237 
2238   do
2239     {
2240       stmt = SSA_NAME_DEF_STMT (vop);
2241       if (stmt_may_clobber_ref_p_1 (stmt, &r))
2242 	return true;
2243       if (gimple_store_p (stmt)
2244 	  && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2245 	return true;
2246       /* Avoid quadratic compile time by bounding the number of checks
2247 	 we perform.  */
2248       if (++count > MAX_STORE_ALIAS_CHECKS)
2249 	return true;
2250       vop = gimple_vuse (stmt);
2251     }
2252   while (stmt != first);
2253 
2254   return false;
2255 }
2256 
2257 /* Return true if INFO->ops[IDX] is mergeable with the
2258    corresponding loads already in MERGED_STORE group.
2259    BASE_ADDR is the base address of the whole store group.  */
2260 
2261 bool
compatible_load_p(merged_store_group * merged_store,store_immediate_info * info,tree base_addr,int idx)2262 compatible_load_p (merged_store_group *merged_store,
2263 		   store_immediate_info *info,
2264 		   tree base_addr, int idx)
2265 {
2266   store_immediate_info *infof = merged_store->stores[0];
2267   if (!info->ops[idx].base_addr
2268       || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2269 		   info->bitpos - infof->bitpos)
2270       || !operand_equal_p (info->ops[idx].base_addr,
2271 			   infof->ops[idx].base_addr, 0))
2272     return false;
2273 
2274   store_immediate_info *infol = merged_store->stores.last ();
2275   tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2276   /* In this case all vuses should be the same, e.g.
2277      _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2278      or
2279      _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2280      and we can emit the coalesced load next to any of those loads.  */
2281   if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2282       && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2283     return true;
2284 
2285   /* Otherwise, at least for now require that the load has the same
2286      vuse as the store.  See following examples.  */
2287   if (gimple_vuse (info->stmt) != load_vuse)
2288     return false;
2289 
2290   if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2291       || (infof != infol
2292 	  && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2293     return false;
2294 
2295   /* If the load is from the same location as the store, already
2296      the construction of the immediate chain info guarantees no intervening
2297      stores, so no further checks are needed.  Example:
2298      _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4;  */
2299   if (known_eq (info->ops[idx].bitpos, info->bitpos)
2300       && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2301     return true;
2302 
2303   /* Otherwise, we need to punt if any of the loads can be clobbered by any
2304      of the stores in the group, or any other stores in between those.
2305      Previous calls to compatible_load_p ensured that for all the
2306      merged_store->stores IDX loads, no stmts starting with
2307      merged_store->first_stmt and ending right before merged_store->last_stmt
2308      clobbers those loads.  */
2309   gimple *first = merged_store->first_stmt;
2310   gimple *last = merged_store->last_stmt;
2311   unsigned int i;
2312   store_immediate_info *infoc;
2313   /* The stores are sorted by increasing store bitpos, so if info->stmt store
2314      comes before the so far first load, we'll be changing
2315      merged_store->first_stmt.  In that case we need to give up if
2316      any of the earlier processed loads clobber with the stmts in the new
2317      range.  */
2318   if (info->order < merged_store->first_order)
2319     {
2320       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2321 	if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2322 	  return false;
2323       first = info->stmt;
2324     }
2325   /* Similarly, we could change merged_store->last_stmt, so ensure
2326      in that case no stmts in the new range clobber any of the earlier
2327      processed loads.  */
2328   else if (info->order > merged_store->last_order)
2329     {
2330       FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2331 	if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2332 	  return false;
2333       last = info->stmt;
2334     }
2335   /* And finally, we'd be adding a new load to the set, ensure it isn't
2336      clobbered in the new range.  */
2337   if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2338     return false;
2339 
2340   /* Otherwise, we are looking for:
2341      _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2342      or
2343      _1 = s.a; t.a = _1; _2 = s.b; t.b = _2;  */
2344   return true;
2345 }
2346 
2347 /* Add all refs loaded to compute VAL to REFS vector.  */
2348 
2349 void
gather_bswap_load_refs(vec<tree> * refs,tree val)2350 gather_bswap_load_refs (vec<tree> *refs, tree val)
2351 {
2352   if (TREE_CODE (val) != SSA_NAME)
2353     return;
2354 
2355   gimple *stmt = SSA_NAME_DEF_STMT (val);
2356   if (!is_gimple_assign (stmt))
2357     return;
2358 
2359   if (gimple_assign_load_p (stmt))
2360     {
2361       refs->safe_push (gimple_assign_rhs1 (stmt));
2362       return;
2363     }
2364 
2365   switch (gimple_assign_rhs_class (stmt))
2366     {
2367     case GIMPLE_BINARY_RHS:
2368       gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2369       /* FALLTHRU */
2370     case GIMPLE_UNARY_RHS:
2371       gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2372       break;
2373     default:
2374       gcc_unreachable ();
2375     }
2376 }
2377 
2378 /* Check if there are any stores in M_STORE_INFO after index I
2379    (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2380    a potential group ending with END that have their order
2381    smaller than LAST_ORDER.  ALL_INTEGER_CST_P is true if
2382    all the stores already merged and the one under consideration
2383    have rhs_code of INTEGER_CST.  Return true if there are no such stores.
2384    Consider:
2385      MEM[(long long int *)p_28] = 0;
2386      MEM[(long long int *)p_28 + 8B] = 0;
2387      MEM[(long long int *)p_28 + 16B] = 0;
2388      MEM[(long long int *)p_28 + 24B] = 0;
2389      _129 = (int) _130;
2390      MEM[(int *)p_28 + 8B] = _129;
2391      MEM[(int *)p_28].a = -1;
2392    We already have
2393      MEM[(long long int *)p_28] = 0;
2394      MEM[(int *)p_28].a = -1;
2395    stmts in the current group and need to consider if it is safe to
2396    add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2397    There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2398    store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2399    into the group and merging of those 3 stores is successful, merged
2400    stmts will be emitted at the latest store from that group, i.e.
2401    LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2402    The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2403    the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2404    so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2405    into the group.  That way it will be its own store group and will
2406    not be touched.  If ALL_INTEGER_CST_P and there are overlapping
2407    INTEGER_CST stores, those are mergeable using merge_overlapping,
2408    so don't return false for those.
2409 
2410    Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2411    (exclusive), whether they don't overlap the bitrange START to END
2412    and have order in between FIRST_ORDER and LAST_ORDER.  This is to
2413    prevent merging in cases like:
2414      MEM <char[12]> [&b + 8B] = {};
2415      MEM[(short *) &b] = 5;
2416      _5 = *x_4(D);
2417      MEM <long long unsigned int> [&b + 2B] = _5;
2418      MEM[(char *)&b + 16B] = 88;
2419      MEM[(int *)&b + 20B] = 1;
2420    The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2421    be merged with it, because the = _5 store overlaps these and is in between
2422    them in sort_by_order ordering.  If it was merged, the merged store would
2423    go after the = _5 store and thus change behavior.  */
2424 
2425 static bool
check_no_overlap(vec<store_immediate_info * > m_store_info,unsigned int i,bool all_integer_cst_p,unsigned int first_order,unsigned int last_order,unsigned HOST_WIDE_INT start,unsigned HOST_WIDE_INT end,unsigned int first_earlier,unsigned end_earlier)2426 check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2427 		  bool all_integer_cst_p, unsigned int first_order,
2428 		  unsigned int last_order, unsigned HOST_WIDE_INT start,
2429 		  unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2430 		  unsigned end_earlier)
2431 {
2432   unsigned int len = m_store_info.length ();
2433   for (unsigned int j = first_earlier; j < end_earlier; j++)
2434     {
2435       store_immediate_info *info = m_store_info[j];
2436       if (info->order > first_order
2437 	  && info->order < last_order
2438 	  && info->bitpos + info->bitsize > start)
2439 	return false;
2440     }
2441   for (++i; i < len; ++i)
2442     {
2443       store_immediate_info *info = m_store_info[i];
2444       if (info->bitpos >= end)
2445 	break;
2446       if (info->order < last_order
2447 	  && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
2448 	return false;
2449     }
2450   return true;
2451 }
2452 
2453 /* Return true if m_store_info[first] and at least one following store
2454    form a group which store try_size bitsize value which is byte swapped
2455    from a memory load or some value, or identity from some value.
2456    This uses the bswap pass APIs.  */
2457 
2458 bool
try_coalesce_bswap(merged_store_group * merged_store,unsigned int first,unsigned int try_size,unsigned int first_earlier)2459 imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2460 					  unsigned int first,
2461 					  unsigned int try_size,
2462 					  unsigned int first_earlier)
2463 {
2464   unsigned int len = m_store_info.length (), last = first;
2465   unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2466   if (width >= try_size)
2467     return false;
2468   for (unsigned int i = first + 1; i < len; ++i)
2469     {
2470       if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2471 	  || m_store_info[i]->lp_nr != merged_store->lp_nr
2472 	  || m_store_info[i]->ins_stmt == NULL)
2473 	return false;
2474       width += m_store_info[i]->bitsize;
2475       if (width >= try_size)
2476 	{
2477 	  last = i;
2478 	  break;
2479 	}
2480     }
2481   if (width != try_size)
2482     return false;
2483 
2484   bool allow_unaligned
2485     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
2486   /* Punt if the combined store would not be aligned and we need alignment.  */
2487   if (!allow_unaligned)
2488     {
2489       unsigned int align = merged_store->align;
2490       unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2491       for (unsigned int i = first + 1; i <= last; ++i)
2492 	{
2493 	  unsigned int this_align;
2494 	  unsigned HOST_WIDE_INT align_bitpos = 0;
2495 	  get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2496 				  &this_align, &align_bitpos);
2497 	  if (this_align > align)
2498 	    {
2499 	      align = this_align;
2500 	      align_base = m_store_info[i]->bitpos - align_bitpos;
2501 	    }
2502 	}
2503       unsigned HOST_WIDE_INT align_bitpos
2504 	= (m_store_info[first]->bitpos - align_base) & (align - 1);
2505       if (align_bitpos)
2506 	align = least_bit_hwi (align_bitpos);
2507       if (align < try_size)
2508 	return false;
2509     }
2510 
2511   tree type;
2512   switch (try_size)
2513     {
2514     case 16: type = uint16_type_node; break;
2515     case 32: type = uint32_type_node; break;
2516     case 64: type = uint64_type_node; break;
2517     default: gcc_unreachable ();
2518     }
2519   struct symbolic_number n;
2520   gimple *ins_stmt = NULL;
2521   int vuse_store = -1;
2522   unsigned int first_order = merged_store->first_order;
2523   unsigned int last_order = merged_store->last_order;
2524   gimple *first_stmt = merged_store->first_stmt;
2525   gimple *last_stmt = merged_store->last_stmt;
2526   unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
2527   store_immediate_info *infof = m_store_info[first];
2528 
2529   for (unsigned int i = first; i <= last; ++i)
2530     {
2531       store_immediate_info *info = m_store_info[i];
2532       struct symbolic_number this_n = info->n;
2533       this_n.type = type;
2534       if (!this_n.base_addr)
2535 	this_n.range = try_size / BITS_PER_UNIT;
2536       else
2537 	/* Update vuse in case it has changed by output_merged_stores.  */
2538 	this_n.vuse = gimple_vuse (info->ins_stmt);
2539       unsigned int bitpos = info->bitpos - infof->bitpos;
2540       if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2541 			    BYTES_BIG_ENDIAN
2542 			    ? try_size - info->bitsize - bitpos
2543 			    : bitpos))
2544 	return false;
2545       if (this_n.base_addr && vuse_store)
2546 	{
2547 	  unsigned int j;
2548 	  for (j = first; j <= last; ++j)
2549 	    if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2550 	      break;
2551 	  if (j > last)
2552 	    {
2553 	      if (vuse_store == 1)
2554 		return false;
2555 	      vuse_store = 0;
2556 	    }
2557 	}
2558       if (i == first)
2559 	{
2560 	  n = this_n;
2561 	  ins_stmt = info->ins_stmt;
2562 	}
2563       else
2564 	{
2565 	  if (n.base_addr && n.vuse != this_n.vuse)
2566 	    {
2567 	      if (vuse_store == 0)
2568 		return false;
2569 	      vuse_store = 1;
2570 	    }
2571 	  if (info->order > last_order)
2572 	    {
2573 	      last_order = info->order;
2574 	      last_stmt = info->stmt;
2575 	    }
2576 	  else if (info->order < first_order)
2577 	    {
2578 	      first_order = info->order;
2579 	      first_stmt = info->stmt;
2580 	    }
2581 	  end = MAX (end, info->bitpos + info->bitsize);
2582 
2583 	  ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2584 					     &this_n, &n);
2585 	  if (ins_stmt == NULL)
2586 	    return false;
2587 	}
2588     }
2589 
2590   uint64_t cmpxchg, cmpnop;
2591   find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2592 
2593   /* A complete byte swap should make the symbolic number to start with
2594      the largest digit in the highest order byte.  Unchanged symbolic
2595      number indicates a read with same endianness as target architecture.  */
2596   if (n.n != cmpnop && n.n != cmpxchg)
2597     return false;
2598 
2599   if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2600     return false;
2601 
2602   if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2603 			 merged_store->start, end, first_earlier, first))
2604     return false;
2605 
2606   /* Don't handle memory copy this way if normal non-bswap processing
2607      would handle it too.  */
2608   if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2609     {
2610       unsigned int i;
2611       for (i = first; i <= last; ++i)
2612 	if (m_store_info[i]->rhs_code != MEM_REF)
2613 	  break;
2614       if (i == last + 1)
2615 	return false;
2616     }
2617 
2618   if (n.n == cmpxchg)
2619     switch (try_size)
2620       {
2621       case 16:
2622 	/* Will emit LROTATE_EXPR.  */
2623 	break;
2624       case 32:
2625 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2626 	    && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2627 	  break;
2628 	return false;
2629       case 64:
2630 	if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2631 	    && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2632 	  break;
2633 	return false;
2634       default:
2635 	gcc_unreachable ();
2636       }
2637 
2638   if (!allow_unaligned && n.base_addr)
2639     {
2640       unsigned int align = get_object_alignment (n.src);
2641       if (align < try_size)
2642 	return false;
2643     }
2644 
2645   /* If each load has vuse of the corresponding store, need to verify
2646      the loads can be sunk right before the last store.  */
2647   if (vuse_store == 1)
2648     {
2649       auto_vec<tree, 64> refs;
2650       for (unsigned int i = first; i <= last; ++i)
2651 	gather_bswap_load_refs (&refs,
2652 				gimple_assign_rhs1 (m_store_info[i]->stmt));
2653 
2654       unsigned int i;
2655       tree ref;
2656       FOR_EACH_VEC_ELT (refs, i, ref)
2657 	if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2658 	  return false;
2659       n.vuse = NULL_TREE;
2660     }
2661 
2662   infof->n = n;
2663   infof->ins_stmt = ins_stmt;
2664   for (unsigned int i = first; i <= last; ++i)
2665     {
2666       m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2667       m_store_info[i]->ops[0].base_addr = NULL_TREE;
2668       m_store_info[i]->ops[1].base_addr = NULL_TREE;
2669       if (i != first)
2670 	merged_store->merge_into (m_store_info[i]);
2671     }
2672 
2673   return true;
2674 }
2675 
2676 /* Go through the candidate stores recorded in m_store_info and merge them
2677    into merged_store_group objects recorded into m_merged_store_groups
2678    representing the widened stores.  Return true if coalescing was successful
2679    and the number of widened stores is fewer than the original number
2680    of stores.  */
2681 
2682 bool
coalesce_immediate_stores()2683 imm_store_chain_info::coalesce_immediate_stores ()
2684 {
2685   /* Anything less can't be processed.  */
2686   if (m_store_info.length () < 2)
2687     return false;
2688 
2689   if (dump_file && (dump_flags & TDF_DETAILS))
2690     fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
2691 	     m_store_info.length ());
2692 
2693   store_immediate_info *info;
2694   unsigned int i, ignore = 0;
2695   unsigned int first_earlier = 0;
2696   unsigned int end_earlier = 0;
2697 
2698   /* Order the stores by the bitposition they write to.  */
2699   m_store_info.qsort (sort_by_bitpos);
2700 
2701   info = m_store_info[0];
2702   merged_store_group *merged_store = new merged_store_group (info);
2703   if (dump_file && (dump_flags & TDF_DETAILS))
2704     fputs ("New store group\n", dump_file);
2705 
2706   FOR_EACH_VEC_ELT (m_store_info, i, info)
2707     {
2708       unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2709 
2710       if (i <= ignore)
2711 	goto done;
2712 
2713       while (first_earlier < end_earlier
2714 	     && (m_store_info[first_earlier]->bitpos
2715 		 + m_store_info[first_earlier]->bitsize
2716 		 <= merged_store->start))
2717 	first_earlier++;
2718 
2719       /* First try to handle group of stores like:
2720 	 p[0] = data >> 24;
2721 	 p[1] = data >> 16;
2722 	 p[2] = data >> 8;
2723 	 p[3] = data;
2724 	 using the bswap framework.  */
2725       if (info->bitpos == merged_store->start + merged_store->width
2726 	  && merged_store->stores.length () == 1
2727 	  && merged_store->stores[0]->ins_stmt != NULL
2728 	  && info->lp_nr == merged_store->lp_nr
2729 	  && info->ins_stmt != NULL)
2730 	{
2731 	  unsigned int try_size;
2732 	  for (try_size = 64; try_size >= 16; try_size >>= 1)
2733 	    if (try_coalesce_bswap (merged_store, i - 1, try_size,
2734 				    first_earlier))
2735 	      break;
2736 
2737 	  if (try_size >= 16)
2738 	    {
2739 	      ignore = i + merged_store->stores.length () - 1;
2740 	      m_merged_store_groups.safe_push (merged_store);
2741 	      if (ignore < m_store_info.length ())
2742 		{
2743 		  merged_store = new merged_store_group (m_store_info[ignore]);
2744 		  end_earlier = ignore;
2745 		}
2746 	      else
2747 		merged_store = NULL;
2748 	      goto done;
2749 	    }
2750 	}
2751 
2752       new_bitregion_start
2753 	= MIN (merged_store->bitregion_start, info->bitregion_start);
2754       new_bitregion_end
2755 	= MAX (merged_store->bitregion_end, info->bitregion_end);
2756 
2757       if (info->order >= merged_store->first_nonmergeable_order
2758 	  || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2759 	      > (unsigned) param_store_merging_max_size))
2760 	;
2761 
2762       /* |---store 1---|
2763 	       |---store 2---|
2764 	 Overlapping stores.  */
2765       else if (IN_RANGE (info->bitpos, merged_store->start,
2766 			 merged_store->start + merged_store->width - 1)
2767 	       /* |---store 1---||---store 2---|
2768 		  Handle also the consecutive INTEGER_CST stores case here,
2769 		  as we have here the code to deal with overlaps.  */
2770 	       || (info->bitregion_start <= merged_store->bitregion_end
2771 		   && info->rhs_code == INTEGER_CST
2772 		   && merged_store->only_constants
2773 		   && merged_store->can_be_merged_into (info)))
2774 	{
2775 	  /* Only allow overlapping stores of constants.  */
2776 	  if (info->rhs_code == INTEGER_CST
2777 	      && merged_store->only_constants
2778 	      && info->lp_nr == merged_store->lp_nr)
2779 	    {
2780 	      unsigned int first_order
2781 		= MIN (merged_store->first_order, info->order);
2782 	      unsigned int last_order
2783 		= MAX (merged_store->last_order, info->order);
2784 	      unsigned HOST_WIDE_INT end
2785 		= MAX (merged_store->start + merged_store->width,
2786 		       info->bitpos + info->bitsize);
2787 	      if (check_no_overlap (m_store_info, i, true, first_order,
2788 				    last_order, merged_store->start, end,
2789 				    first_earlier, end_earlier))
2790 		{
2791 		  /* check_no_overlap call above made sure there are no
2792 		     overlapping stores with non-INTEGER_CST rhs_code
2793 		     in between the first and last of the stores we've
2794 		     just merged.  If there are any INTEGER_CST rhs_code
2795 		     stores in between, we need to merge_overlapping them
2796 		     even if in the sort_by_bitpos order there are other
2797 		     overlapping stores in between.  Keep those stores as is.
2798 		     Example:
2799 			MEM[(int *)p_28] = 0;
2800 			MEM[(char *)p_28 + 3B] = 1;
2801 			MEM[(char *)p_28 + 1B] = 2;
2802 			MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2803 		     We can't merge the zero store with the store of two and
2804 		     not merge anything else, because the store of one is
2805 		     in the original order in between those two, but in
2806 		     store_by_bitpos order it comes after the last store that
2807 		     we can't merge with them.  We can merge the first 3 stores
2808 		     and keep the last store as is though.  */
2809 		  unsigned int len = m_store_info.length ();
2810 		  unsigned int try_order = last_order;
2811 		  unsigned int first_nonmergeable_order;
2812 		  unsigned int k;
2813 		  bool last_iter = false;
2814 		  int attempts = 0;
2815 		  do
2816 		    {
2817 		      unsigned int max_order = 0;
2818 		      unsigned int min_order = first_order;
2819 		      unsigned first_nonmergeable_int_order = ~0U;
2820 		      unsigned HOST_WIDE_INT this_end = end;
2821 		      k = i;
2822 		      first_nonmergeable_order = ~0U;
2823 		      for (unsigned int j = i + 1; j < len; ++j)
2824 			{
2825 			  store_immediate_info *info2 = m_store_info[j];
2826 			  if (info2->bitpos >= this_end)
2827 			    break;
2828 			  if (info2->order < try_order)
2829 			    {
2830 			      if (info2->rhs_code != INTEGER_CST
2831 				  || info2->lp_nr != merged_store->lp_nr)
2832 				{
2833 				  /* Normally check_no_overlap makes sure this
2834 				     doesn't happen, but if end grows below,
2835 				     then we need to process more stores than
2836 				     check_no_overlap verified.  Example:
2837 				      MEM[(int *)p_5] = 0;
2838 				      MEM[(short *)p_5 + 3B] = 1;
2839 				      MEM[(char *)p_5 + 4B] = _9;
2840 				      MEM[(char *)p_5 + 2B] = 2;  */
2841 				  k = 0;
2842 				  break;
2843 				}
2844 			      k = j;
2845 			      min_order = MIN (min_order, info2->order);
2846 			      this_end = MAX (this_end,
2847 					      info2->bitpos + info2->bitsize);
2848 			    }
2849 			  else if (info2->rhs_code == INTEGER_CST
2850 				   && info2->lp_nr == merged_store->lp_nr
2851 				   && !last_iter)
2852 			    {
2853 			      max_order = MAX (max_order, info2->order + 1);
2854 			      first_nonmergeable_int_order
2855 				= MIN (first_nonmergeable_int_order,
2856 				       info2->order);
2857 			    }
2858 			  else
2859 			    first_nonmergeable_order
2860 			      = MIN (first_nonmergeable_order, info2->order);
2861 			}
2862 		      if (k > i
2863 			  && !check_no_overlap (m_store_info, len - 1, true,
2864 						min_order, try_order,
2865 						merged_store->start, this_end,
2866 						first_earlier, end_earlier))
2867 			k = 0;
2868 		      if (k == 0)
2869 			{
2870 			  if (last_order == try_order)
2871 			    break;
2872 			  /* If this failed, but only because we grew
2873 			     try_order, retry with the last working one,
2874 			     so that we merge at least something.  */
2875 			  try_order = last_order;
2876 			  last_iter = true;
2877 			  continue;
2878 			}
2879 		      last_order = try_order;
2880 		      /* Retry with a larger try_order to see if we could
2881 			 merge some further INTEGER_CST stores.  */
2882 		      if (max_order
2883 			  && (first_nonmergeable_int_order
2884 			      < first_nonmergeable_order))
2885 			{
2886 			  try_order = MIN (max_order,
2887 					   first_nonmergeable_order);
2888 			  try_order
2889 			    = MIN (try_order,
2890 				   merged_store->first_nonmergeable_order);
2891 			  if (try_order > last_order && ++attempts < 16)
2892 			    continue;
2893 			}
2894 		      first_nonmergeable_order
2895 			= MIN (first_nonmergeable_order,
2896 			       first_nonmergeable_int_order);
2897 		      end = this_end;
2898 		      break;
2899 		    }
2900 		  while (1);
2901 
2902 		  if (k != 0)
2903 		    {
2904 		      merged_store->merge_overlapping (info);
2905 
2906 		      merged_store->first_nonmergeable_order
2907 			= MIN (merged_store->first_nonmergeable_order,
2908 			       first_nonmergeable_order);
2909 
2910 		      for (unsigned int j = i + 1; j <= k; j++)
2911 			{
2912 			  store_immediate_info *info2 = m_store_info[j];
2913 			  gcc_assert (info2->bitpos < end);
2914 			  if (info2->order < last_order)
2915 			    {
2916 			      gcc_assert (info2->rhs_code == INTEGER_CST);
2917 			      if (info != info2)
2918 				merged_store->merge_overlapping (info2);
2919 			    }
2920 			  /* Other stores are kept and not merged in any
2921 			     way.  */
2922 			}
2923 		      ignore = k;
2924 		      goto done;
2925 		    }
2926 		}
2927 	    }
2928 	}
2929       /* |---store 1---||---store 2---|
2930 	 This store is consecutive to the previous one.
2931 	 Merge it into the current store group.  There can be gaps in between
2932 	 the stores, but there can't be gaps in between bitregions.  */
2933       else if (info->bitregion_start <= merged_store->bitregion_end
2934 	       && merged_store->can_be_merged_into (info))
2935 	{
2936 	  store_immediate_info *infof = merged_store->stores[0];
2937 
2938 	  /* All the rhs_code ops that take 2 operands are commutative,
2939 	     swap the operands if it could make the operands compatible.  */
2940 	  if (infof->ops[0].base_addr
2941 	      && infof->ops[1].base_addr
2942 	      && info->ops[0].base_addr
2943 	      && info->ops[1].base_addr
2944 	      && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2945 			   info->bitpos - infof->bitpos)
2946 	      && operand_equal_p (info->ops[1].base_addr,
2947 				  infof->ops[0].base_addr, 0))
2948 	    {
2949 	      std::swap (info->ops[0], info->ops[1]);
2950 	      info->ops_swapped_p = true;
2951 	    }
2952 	  if (check_no_overlap (m_store_info, i, false,
2953 				MIN (merged_store->first_order, info->order),
2954 				MAX (merged_store->last_order, info->order),
2955 				merged_store->start,
2956 				MAX (merged_store->start + merged_store->width,
2957 				     info->bitpos + info->bitsize),
2958 				first_earlier, end_earlier))
2959 	    {
2960 	      /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores.  */
2961 	      if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2962 		{
2963 		  info->rhs_code = BIT_INSERT_EXPR;
2964 		  info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2965 		  info->ops[0].base_addr = NULL_TREE;
2966 		}
2967 	      else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2968 		{
2969 		  store_immediate_info *infoj;
2970 		  unsigned int j;
2971 		  FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2972 		    {
2973 		      infoj->rhs_code = BIT_INSERT_EXPR;
2974 		      infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2975 		      infoj->ops[0].base_addr = NULL_TREE;
2976 		    }
2977 		}
2978 	      if ((infof->ops[0].base_addr
2979 		   ? compatible_load_p (merged_store, info, base_addr, 0)
2980 		   : !info->ops[0].base_addr)
2981 		  && (infof->ops[1].base_addr
2982 		      ? compatible_load_p (merged_store, info, base_addr, 1)
2983 		      : !info->ops[1].base_addr))
2984 		{
2985 		  merged_store->merge_into (info);
2986 		  goto done;
2987 		}
2988 	    }
2989 	}
2990 
2991       /* |---store 1---| <gap> |---store 2---|.
2992 	 Gap between stores or the rhs not compatible.  Start a new group.  */
2993 
2994       /* Try to apply all the stores recorded for the group to determine
2995 	 the bitpattern they write and discard it if that fails.
2996 	 This will also reject single-store groups.  */
2997       if (merged_store->apply_stores ())
2998 	m_merged_store_groups.safe_push (merged_store);
2999       else
3000 	delete merged_store;
3001 
3002       merged_store = new merged_store_group (info);
3003       end_earlier = i;
3004       if (dump_file && (dump_flags & TDF_DETAILS))
3005 	fputs ("New store group\n", dump_file);
3006 
3007     done:
3008       if (dump_file && (dump_flags & TDF_DETAILS))
3009 	{
3010 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3011 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3012 		   i, info->bitsize, info->bitpos);
3013 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3014 	  fputc ('\n', dump_file);
3015 	}
3016     }
3017 
3018   /* Record or discard the last store group.  */
3019   if (merged_store)
3020     {
3021       if (merged_store->apply_stores ())
3022 	m_merged_store_groups.safe_push (merged_store);
3023       else
3024 	delete merged_store;
3025     }
3026 
3027   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
3028 
3029   bool success
3030     = !m_merged_store_groups.is_empty ()
3031       && m_merged_store_groups.length () < m_store_info.length ();
3032 
3033   if (success && dump_file)
3034     fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
3035 	     m_merged_store_groups.length ());
3036 
3037   return success;
3038 }
3039 
3040 /* Return the type to use for the merged stores or loads described by STMTS.
3041    This is needed to get the alias sets right.  If IS_LOAD, look for rhs,
3042    otherwise lhs.  Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3043    of the MEM_REFs if any.  */
3044 
3045 static tree
get_alias_type_for_stmts(vec<gimple * > & stmts,bool is_load,unsigned short * cliquep,unsigned short * basep)3046 get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3047 			  unsigned short *cliquep, unsigned short *basep)
3048 {
3049   gimple *stmt;
3050   unsigned int i;
3051   tree type = NULL_TREE;
3052   tree ret = NULL_TREE;
3053   *cliquep = 0;
3054   *basep = 0;
3055 
3056   FOR_EACH_VEC_ELT (stmts, i, stmt)
3057     {
3058       tree ref = is_load ? gimple_assign_rhs1 (stmt)
3059 			 : gimple_assign_lhs (stmt);
3060       tree type1 = reference_alias_ptr_type (ref);
3061       tree base = get_base_address (ref);
3062 
3063       if (i == 0)
3064 	{
3065 	  if (TREE_CODE (base) == MEM_REF)
3066 	    {
3067 	      *cliquep = MR_DEPENDENCE_CLIQUE (base);
3068 	      *basep = MR_DEPENDENCE_BASE (base);
3069 	    }
3070 	  ret = type = type1;
3071 	  continue;
3072 	}
3073       if (!alias_ptr_types_compatible_p (type, type1))
3074 	ret = ptr_type_node;
3075       if (TREE_CODE (base) != MEM_REF
3076 	  || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3077 	  || *basep != MR_DEPENDENCE_BASE (base))
3078 	{
3079 	  *cliquep = 0;
3080 	  *basep = 0;
3081 	}
3082     }
3083   return ret;
3084 }
3085 
3086 /* Return the location_t information we can find among the statements
3087    in STMTS.  */
3088 
3089 static location_t
get_location_for_stmts(vec<gimple * > & stmts)3090 get_location_for_stmts (vec<gimple *> &stmts)
3091 {
3092   gimple *stmt;
3093   unsigned int i;
3094 
3095   FOR_EACH_VEC_ELT (stmts, i, stmt)
3096     if (gimple_has_location (stmt))
3097       return gimple_location (stmt);
3098 
3099   return UNKNOWN_LOCATION;
3100 }
3101 
3102 /* Used to decribe a store resulting from splitting a wide store in smaller
3103    regularly-sized stores in split_group.  */
3104 
3105 class split_store
3106 {
3107 public:
3108   unsigned HOST_WIDE_INT bytepos;
3109   unsigned HOST_WIDE_INT size;
3110   unsigned HOST_WIDE_INT align;
3111   auto_vec<store_immediate_info *> orig_stores;
3112   /* True if there is a single orig stmt covering the whole split store.  */
3113   bool orig;
3114   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3115 	       unsigned HOST_WIDE_INT);
3116 };
3117 
3118 /* Simple constructor.  */
3119 
split_store(unsigned HOST_WIDE_INT bp,unsigned HOST_WIDE_INT sz,unsigned HOST_WIDE_INT al)3120 split_store::split_store (unsigned HOST_WIDE_INT bp,
3121 			  unsigned HOST_WIDE_INT sz,
3122 			  unsigned HOST_WIDE_INT al)
3123 			  : bytepos (bp), size (sz), align (al), orig (false)
3124 {
3125   orig_stores.create (0);
3126 }
3127 
3128 /* Record all stores in GROUP that write to the region starting at BITPOS and
3129    is of size BITSIZE.  Record infos for such statements in STORES if
3130    non-NULL.  The stores in GROUP must be sorted by bitposition.  Return INFO
3131    if there is exactly one original store in the range (in that case ignore
3132    clobber stmts, unless there are only clobber stmts).  */
3133 
3134 static store_immediate_info *
find_constituent_stores(class merged_store_group * group,vec<store_immediate_info * > * stores,unsigned int * first,unsigned HOST_WIDE_INT bitpos,unsigned HOST_WIDE_INT bitsize)3135 find_constituent_stores (class merged_store_group *group,
3136 			 vec<store_immediate_info *> *stores,
3137 			 unsigned int *first,
3138 			 unsigned HOST_WIDE_INT bitpos,
3139 			 unsigned HOST_WIDE_INT bitsize)
3140 {
3141   store_immediate_info *info, *ret = NULL;
3142   unsigned int i;
3143   bool second = false;
3144   bool update_first = true;
3145   unsigned HOST_WIDE_INT end = bitpos + bitsize;
3146   for (i = *first; group->stores.iterate (i, &info); ++i)
3147     {
3148       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3149       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
3150       if (stmt_end <= bitpos)
3151 	{
3152 	  /* BITPOS passed to this function never decreases from within the
3153 	     same split_group call, so optimize and don't scan info records
3154 	     which are known to end before or at BITPOS next time.
3155 	     Only do it if all stores before this one also pass this.  */
3156 	  if (update_first)
3157 	    *first = i + 1;
3158 	  continue;
3159 	}
3160       else
3161 	update_first = false;
3162 
3163       /* The stores in GROUP are ordered by bitposition so if we're past
3164 	 the region for this group return early.  */
3165       if (stmt_start >= end)
3166 	return ret;
3167 
3168       if (gimple_clobber_p (info->stmt))
3169 	{
3170 	  if (stores)
3171 	    stores->safe_push (info);
3172 	  if (ret == NULL)
3173 	    ret = info;
3174 	  continue;
3175 	}
3176       if (stores)
3177 	{
3178 	  stores->safe_push (info);
3179 	  if (ret && !gimple_clobber_p (ret->stmt))
3180 	    {
3181 	      ret = NULL;
3182 	      second = true;
3183 	    }
3184 	}
3185       else if (ret && !gimple_clobber_p (ret->stmt))
3186 	return NULL;
3187       if (!second)
3188 	ret = info;
3189     }
3190   return ret;
3191 }
3192 
3193 /* Return how many SSA_NAMEs used to compute value to store in the INFO
3194    store have multiple uses.  If any SSA_NAME has multiple uses, also
3195    count statements needed to compute it.  */
3196 
3197 static unsigned
count_multiple_uses(store_immediate_info * info)3198 count_multiple_uses (store_immediate_info *info)
3199 {
3200   gimple *stmt = info->stmt;
3201   unsigned ret = 0;
3202   switch (info->rhs_code)
3203     {
3204     case INTEGER_CST:
3205       return 0;
3206     case BIT_AND_EXPR:
3207     case BIT_IOR_EXPR:
3208     case BIT_XOR_EXPR:
3209       if (info->bit_not_p)
3210 	{
3211 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3212 	    ret = 1; /* Fall through below to return
3213 			the BIT_NOT_EXPR stmt and then
3214 			BIT_{AND,IOR,XOR}_EXPR and anything it
3215 			uses.  */
3216 	  else
3217 	    /* stmt is after this the BIT_NOT_EXPR.  */
3218 	    stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3219 	}
3220       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3221 	{
3222 	  ret += 1 + info->ops[0].bit_not_p;
3223 	  if (info->ops[1].base_addr)
3224 	    ret += 1 + info->ops[1].bit_not_p;
3225 	  return ret + 1;
3226 	}
3227       stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3228       /* stmt is now the BIT_*_EXPR.  */
3229       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3230 	ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3231       else if (info->ops[info->ops_swapped_p].bit_not_p)
3232 	{
3233 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3234 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3235 	    ++ret;
3236 	}
3237       if (info->ops[1].base_addr == NULL_TREE)
3238 	{
3239 	  gcc_checking_assert (!info->ops_swapped_p);
3240 	  return ret;
3241 	}
3242       if (!has_single_use (gimple_assign_rhs2 (stmt)))
3243 	ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3244       else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
3245 	{
3246 	  gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3247 	  if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3248 	    ++ret;
3249 	}
3250       return ret;
3251     case MEM_REF:
3252       if (!has_single_use (gimple_assign_rhs1 (stmt)))
3253 	return 1 + info->ops[0].bit_not_p;
3254       else if (info->ops[0].bit_not_p)
3255 	{
3256 	  stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3257 	  if (!has_single_use (gimple_assign_rhs1 (stmt)))
3258 	    return 1;
3259 	}
3260       return 0;
3261     case BIT_INSERT_EXPR:
3262       return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
3263     default:
3264       gcc_unreachable ();
3265     }
3266 }
3267 
3268 /* Split a merged store described by GROUP by populating the SPLIT_STORES
3269    vector (if non-NULL) with split_store structs describing the byte offset
3270    (from the base), the bit size and alignment of each store as well as the
3271    original statements involved in each such split group.
3272    This is to separate the splitting strategy from the statement
3273    building/emission/linking done in output_merged_store.
3274    Return number of new stores.
3275    If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3276    If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3277    BZERO_FIRST may be true only when the first store covers the whole group
3278    and clears it; if BZERO_FIRST is true, keep that first store in the set
3279    unmodified and emit further stores for the overrides only.
3280    If SPLIT_STORES is NULL, it is just a dry run to count number of
3281    new stores.  */
3282 
3283 static unsigned int
split_group(merged_store_group * group,bool allow_unaligned_store,bool allow_unaligned_load,bool bzero_first,vec<split_store * > * split_stores,unsigned * total_orig,unsigned * total_new)3284 split_group (merged_store_group *group, bool allow_unaligned_store,
3285 	     bool allow_unaligned_load, bool bzero_first,
3286 	     vec<split_store *> *split_stores,
3287 	     unsigned *total_orig,
3288 	     unsigned *total_new)
3289 {
3290   unsigned HOST_WIDE_INT pos = group->bitregion_start;
3291   unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3292   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
3293   unsigned HOST_WIDE_INT group_align = group->align;
3294   unsigned HOST_WIDE_INT align_base = group->align_base;
3295   unsigned HOST_WIDE_INT group_load_align = group_align;
3296   bool any_orig = false;
3297 
3298   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3299 
3300   if (group->stores[0]->rhs_code == LROTATE_EXPR
3301       || group->stores[0]->rhs_code == NOP_EXPR)
3302     {
3303       gcc_assert (!bzero_first);
3304       /* For bswap framework using sets of stores, all the checking
3305 	 has been done earlier in try_coalesce_bswap and needs to be
3306 	 emitted as a single store.  */
3307       if (total_orig)
3308 	{
3309 	  /* Avoid the old/new stmt count heuristics.  It should be
3310 	     always beneficial.  */
3311 	  total_new[0] = 1;
3312 	  total_orig[0] = 2;
3313 	}
3314 
3315       if (split_stores)
3316 	{
3317 	  unsigned HOST_WIDE_INT align_bitpos
3318 	    = (group->start - align_base) & (group_align - 1);
3319 	  unsigned HOST_WIDE_INT align = group_align;
3320 	  if (align_bitpos)
3321 	    align = least_bit_hwi (align_bitpos);
3322 	  bytepos = group->start / BITS_PER_UNIT;
3323 	  split_store *store
3324 	    = new split_store (bytepos, group->width, align);
3325 	  unsigned int first = 0;
3326 	  find_constituent_stores (group, &store->orig_stores,
3327 				   &first, group->start, group->width);
3328 	  split_stores->safe_push (store);
3329 	}
3330 
3331       return 1;
3332     }
3333 
3334   unsigned int ret = 0, first = 0;
3335   unsigned HOST_WIDE_INT try_pos = bytepos;
3336 
3337   if (total_orig)
3338     {
3339       unsigned int i;
3340       store_immediate_info *info = group->stores[0];
3341 
3342       total_new[0] = 0;
3343       total_orig[0] = 1; /* The orig store.  */
3344       info = group->stores[0];
3345       if (info->ops[0].base_addr)
3346 	total_orig[0]++;
3347       if (info->ops[1].base_addr)
3348 	total_orig[0]++;
3349       switch (info->rhs_code)
3350 	{
3351 	case BIT_AND_EXPR:
3352 	case BIT_IOR_EXPR:
3353 	case BIT_XOR_EXPR:
3354 	  total_orig[0]++; /* The orig BIT_*_EXPR stmt.  */
3355 	  break;
3356 	default:
3357 	  break;
3358 	}
3359       total_orig[0] *= group->stores.length ();
3360 
3361       FOR_EACH_VEC_ELT (group->stores, i, info)
3362 	{
3363 	  total_new[0] += count_multiple_uses (info);
3364 	  total_orig[0] += (info->bit_not_p
3365 			    + info->ops[0].bit_not_p
3366 			    + info->ops[1].bit_not_p);
3367 	}
3368     }
3369 
3370   if (!allow_unaligned_load)
3371     for (int i = 0; i < 2; ++i)
3372       if (group->load_align[i])
3373 	group_load_align = MIN (group_load_align, group->load_align[i]);
3374 
3375   if (bzero_first)
3376     {
3377       store_immediate_info *gstore;
3378       FOR_EACH_VEC_ELT (group->stores, first, gstore)
3379 	if (!gimple_clobber_p (gstore->stmt))
3380 	  break;
3381       ++first;
3382       ret = 1;
3383       if (split_stores)
3384 	{
3385 	  split_store *store
3386 	    = new split_store (bytepos, gstore->bitsize, align_base);
3387 	  store->orig_stores.safe_push (gstore);
3388 	  store->orig = true;
3389 	  any_orig = true;
3390 	  split_stores->safe_push (store);
3391 	}
3392     }
3393 
3394   while (size > 0)
3395     {
3396       if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3397 	  && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3398 	      || (bzero_first && group->val[try_pos - bytepos] == 0)))
3399 	{
3400 	  /* Skip padding bytes.  */
3401 	  ++try_pos;
3402 	  size -= BITS_PER_UNIT;
3403 	  continue;
3404 	}
3405 
3406       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3407       unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3408       unsigned HOST_WIDE_INT align_bitpos
3409 	= (try_bitpos - align_base) & (group_align - 1);
3410       unsigned HOST_WIDE_INT align = group_align;
3411       bool found_orig = false;
3412       if (align_bitpos)
3413 	align = least_bit_hwi (align_bitpos);
3414       if (!allow_unaligned_store)
3415 	try_size = MIN (try_size, align);
3416       if (!allow_unaligned_load)
3417 	{
3418 	  /* If we can't do or don't want to do unaligned stores
3419 	     as well as loads, we need to take the loads into account
3420 	     as well.  */
3421 	  unsigned HOST_WIDE_INT load_align = group_load_align;
3422 	  align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3423 	  if (align_bitpos)
3424 	    load_align = least_bit_hwi (align_bitpos);
3425 	  for (int i = 0; i < 2; ++i)
3426 	    if (group->load_align[i])
3427 	      {
3428 		align_bitpos
3429 		  = known_alignment (try_bitpos
3430 				     - group->stores[0]->bitpos
3431 				     + group->stores[0]->ops[i].bitpos
3432 				     - group->load_align_base[i]);
3433 		if (align_bitpos & (group_load_align - 1))
3434 		  {
3435 		    unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3436 		    load_align = MIN (load_align, a);
3437 		  }
3438 	      }
3439 	  try_size = MIN (try_size, load_align);
3440 	}
3441       store_immediate_info *info
3442 	= find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
3443       if (info && !gimple_clobber_p (info->stmt))
3444 	{
3445 	  /* If there is just one original statement for the range, see if
3446 	     we can just reuse the original store which could be even larger
3447 	     than try_size.  */
3448 	  unsigned HOST_WIDE_INT stmt_end
3449 	    = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
3450 	  info = find_constituent_stores (group, NULL, &first, try_bitpos,
3451 					  stmt_end - try_bitpos);
3452 	  if (info && info->bitpos >= try_bitpos)
3453 	    {
3454 	      store_immediate_info *info2 = NULL;
3455 	      unsigned int first_copy = first;
3456 	      if (info->bitpos > try_bitpos
3457 		  && stmt_end - try_bitpos <= try_size)
3458 		{
3459 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3460 						   try_bitpos,
3461 						   info->bitpos - try_bitpos);
3462 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3463 		}
3464 	      if (info2 == NULL && stmt_end - try_bitpos < try_size)
3465 		{
3466 		  info2 = find_constituent_stores (group, NULL, &first_copy,
3467 						   stmt_end,
3468 						   (try_bitpos + try_size)
3469 						   - stmt_end);
3470 		  gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3471 		}
3472 	      if (info2 == NULL)
3473 		{
3474 		  try_size = stmt_end - try_bitpos;
3475 		  found_orig = true;
3476 		  goto found;
3477 		}
3478 	    }
3479 	}
3480 
3481       /* Approximate store bitsize for the case when there are no padding
3482 	 bits.  */
3483       while (try_size > size)
3484 	try_size /= 2;
3485       /* Now look for whole padding bytes at the end of that bitsize.  */
3486       for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3487 	if (group->mask[try_pos - bytepos + nonmasked - 1]
3488 	    != (unsigned char) ~0U
3489 	    && (!bzero_first
3490 		|| group->val[try_pos - bytepos + nonmasked - 1] != 0))
3491 	  break;
3492       if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
3493 	{
3494 	  /* If entire try_size range is padding, skip it.  */
3495 	  try_pos += try_size / BITS_PER_UNIT;
3496 	  size -= try_size;
3497 	  continue;
3498 	}
3499       /* Otherwise try to decrease try_size if second half, last 3 quarters
3500 	 etc. are padding.  */
3501       nonmasked *= BITS_PER_UNIT;
3502       while (nonmasked <= try_size / 2)
3503 	try_size /= 2;
3504       if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
3505 	{
3506 	  /* Now look for whole padding bytes at the start of that bitsize.  */
3507 	  unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3508 	  for (masked = 0; masked < try_bytesize; ++masked)
3509 	    if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3510 		&& (!bzero_first
3511 		    || group->val[try_pos - bytepos + masked] != 0))
3512 	      break;
3513 	  masked *= BITS_PER_UNIT;
3514 	  gcc_assert (masked < try_size);
3515 	  if (masked >= try_size / 2)
3516 	    {
3517 	      while (masked >= try_size / 2)
3518 		{
3519 		  try_size /= 2;
3520 		  try_pos += try_size / BITS_PER_UNIT;
3521 		  size -= try_size;
3522 		  masked -= try_size;
3523 		}
3524 	      /* Need to recompute the alignment, so just retry at the new
3525 		 position.  */
3526 	      continue;
3527 	    }
3528 	}
3529 
3530     found:
3531       ++ret;
3532 
3533       if (split_stores)
3534 	{
3535 	  split_store *store
3536 	    = new split_store (try_pos, try_size, align);
3537 	  info = find_constituent_stores (group, &store->orig_stores,
3538 					  &first, try_bitpos, try_size);
3539 	  if (info
3540 	      && !gimple_clobber_p (info->stmt)
3541 	      && info->bitpos >= try_bitpos
3542 	      && info->bitpos + info->bitsize <= try_bitpos + try_size
3543 	      && (store->orig_stores.length () == 1
3544 		  || found_orig
3545 		  || (info->bitpos == try_bitpos
3546 		      && (info->bitpos + info->bitsize
3547 			  == try_bitpos + try_size))))
3548 	    {
3549 	      store->orig = true;
3550 	      any_orig = true;
3551 	    }
3552 	  split_stores->safe_push (store);
3553 	}
3554 
3555       try_pos += try_size / BITS_PER_UNIT;
3556       size -= try_size;
3557     }
3558 
3559   if (total_orig)
3560     {
3561       unsigned int i;
3562       split_store *store;
3563       /* If we are reusing some original stores and any of the
3564 	 original SSA_NAMEs had multiple uses, we need to subtract
3565 	 those now before we add the new ones.  */
3566       if (total_new[0] && any_orig)
3567 	{
3568 	  FOR_EACH_VEC_ELT (*split_stores, i, store)
3569 	    if (store->orig)
3570 	      total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3571 	}
3572       total_new[0] += ret; /* The new store.  */
3573       store_immediate_info *info = group->stores[0];
3574       if (info->ops[0].base_addr)
3575 	total_new[0] += ret;
3576       if (info->ops[1].base_addr)
3577 	total_new[0] += ret;
3578       switch (info->rhs_code)
3579 	{
3580 	case BIT_AND_EXPR:
3581 	case BIT_IOR_EXPR:
3582 	case BIT_XOR_EXPR:
3583 	  total_new[0] += ret; /* The new BIT_*_EXPR stmt.  */
3584 	  break;
3585 	default:
3586 	  break;
3587 	}
3588       FOR_EACH_VEC_ELT (*split_stores, i, store)
3589 	{
3590 	  unsigned int j;
3591 	  bool bit_not_p[3] = { false, false, false };
3592 	  /* If all orig_stores have certain bit_not_p set, then
3593 	     we'd use a BIT_NOT_EXPR stmt and need to account for it.
3594 	     If some orig_stores have certain bit_not_p set, then
3595 	     we'd use a BIT_XOR_EXPR with a mask and need to account for
3596 	     it.  */
3597 	  FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3598 	    {
3599 	      if (info->ops[0].bit_not_p)
3600 		bit_not_p[0] = true;
3601 	      if (info->ops[1].bit_not_p)
3602 		bit_not_p[1] = true;
3603 	      if (info->bit_not_p)
3604 		bit_not_p[2] = true;
3605 	    }
3606 	  total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3607 	}
3608 
3609     }
3610 
3611   return ret;
3612 }
3613 
3614 /* Return the operation through which the operand IDX (if < 2) or
3615    result (IDX == 2) should be inverted.  If NOP_EXPR, no inversion
3616    is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3617    the bits should be xored with mask.  */
3618 
3619 static enum tree_code
invert_op(split_store * split_store,int idx,tree int_type,tree & mask)3620 invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3621 {
3622   unsigned int i;
3623   store_immediate_info *info;
3624   unsigned int cnt = 0;
3625   bool any_paddings = false;
3626   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3627     {
3628       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3629       if (bit_not_p)
3630 	{
3631 	  ++cnt;
3632 	  tree lhs = gimple_assign_lhs (info->stmt);
3633 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3634 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3635 	    any_paddings = true;
3636 	}
3637     }
3638   mask = NULL_TREE;
3639   if (cnt == 0)
3640     return NOP_EXPR;
3641   if (cnt == split_store->orig_stores.length () && !any_paddings)
3642     return BIT_NOT_EXPR;
3643 
3644   unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3645   unsigned buf_size = split_store->size / BITS_PER_UNIT;
3646   unsigned char *buf
3647     = XALLOCAVEC (unsigned char, buf_size);
3648   memset (buf, ~0U, buf_size);
3649   FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3650     {
3651       bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3652       if (!bit_not_p)
3653 	continue;
3654       /* Clear regions with bit_not_p and invert afterwards, rather than
3655 	 clear regions with !bit_not_p, so that gaps in between stores aren't
3656 	 set in the mask.  */
3657       unsigned HOST_WIDE_INT bitsize = info->bitsize;
3658       unsigned HOST_WIDE_INT prec = bitsize;
3659       unsigned int pos_in_buffer = 0;
3660       if (any_paddings)
3661 	{
3662 	  tree lhs = gimple_assign_lhs (info->stmt);
3663 	  if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3664 	      && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3665 	    prec = TYPE_PRECISION (TREE_TYPE (lhs));
3666 	}
3667       if (info->bitpos < try_bitpos)
3668 	{
3669 	  gcc_assert (info->bitpos + bitsize > try_bitpos);
3670 	  if (!BYTES_BIG_ENDIAN)
3671 	    {
3672 	      if (prec <= try_bitpos - info->bitpos)
3673 		continue;
3674 	      prec -= try_bitpos - info->bitpos;
3675 	    }
3676 	  bitsize -= try_bitpos - info->bitpos;
3677 	  if (BYTES_BIG_ENDIAN && prec > bitsize)
3678 	    prec = bitsize;
3679 	}
3680       else
3681 	pos_in_buffer = info->bitpos - try_bitpos;
3682       if (prec < bitsize)
3683 	{
3684 	  /* If this is a bool inversion, invert just the least significant
3685 	     prec bits rather than all bits of it.  */
3686 	  if (BYTES_BIG_ENDIAN)
3687 	    {
3688 	      pos_in_buffer += bitsize - prec;
3689 	      if (pos_in_buffer >= split_store->size)
3690 		continue;
3691 	    }
3692 	  bitsize = prec;
3693 	}
3694       if (pos_in_buffer + bitsize > split_store->size)
3695 	bitsize = split_store->size - pos_in_buffer;
3696       unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3697       if (BYTES_BIG_ENDIAN)
3698 	clear_bit_region_be (p, (BITS_PER_UNIT - 1
3699 				 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3700       else
3701 	clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3702     }
3703   for (unsigned int i = 0; i < buf_size; ++i)
3704     buf[i] = ~buf[i];
3705   mask = native_interpret_expr (int_type, buf, buf_size);
3706   return BIT_XOR_EXPR;
3707 }
3708 
3709 /* Given a merged store group GROUP output the widened version of it.
3710    The store chain is against the base object BASE.
3711    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3712    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3713    Make sure that the number of statements output is less than the number of
3714    original statements.  If a better sequence is possible emit it and
3715    return true.  */
3716 
3717 bool
output_merged_store(merged_store_group * group)3718 imm_store_chain_info::output_merged_store (merged_store_group *group)
3719 {
3720   split_store *split_store;
3721   unsigned int i;
3722   unsigned HOST_WIDE_INT start_byte_pos
3723     = group->bitregion_start / BITS_PER_UNIT;
3724 
3725   unsigned int orig_num_stmts = group->stores.length ();
3726   if (orig_num_stmts < 2)
3727     return false;
3728 
3729   auto_vec<class split_store *, 32> split_stores;
3730   bool allow_unaligned_store
3731     = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
3732   bool allow_unaligned_load = allow_unaligned_store;
3733   bool bzero_first = false;
3734   store_immediate_info *store;
3735   unsigned int num_clobber_stmts = 0;
3736   if (group->stores[0]->rhs_code == INTEGER_CST)
3737     {
3738       FOR_EACH_VEC_ELT (group->stores, i, store)
3739 	if (gimple_clobber_p (store->stmt))
3740 	  num_clobber_stmts++;
3741 	else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3742 		 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3743 		 && group->start == store->bitpos
3744 		 && group->width == store->bitsize
3745 		 && (group->start % BITS_PER_UNIT) == 0
3746 		 && (group->width % BITS_PER_UNIT) == 0)
3747 	  {
3748 	    bzero_first = true;
3749 	    break;
3750 	  }
3751 	else
3752 	  break;
3753       FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3754 	if (gimple_clobber_p (store->stmt))
3755 	  num_clobber_stmts++;
3756       if (num_clobber_stmts == orig_num_stmts)
3757 	return false;
3758       orig_num_stmts -= num_clobber_stmts;
3759     }
3760   if (allow_unaligned_store || bzero_first)
3761     {
3762       /* If unaligned stores are allowed, see how many stores we'd emit
3763 	 for unaligned and how many stores we'd emit for aligned stores.
3764 	 Only use unaligned stores if it allows fewer stores than aligned.
3765 	 Similarly, if there is a whole region clear first, prefer expanding
3766 	 it together compared to expanding clear first followed by merged
3767 	 further stores.  */
3768       unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3769       int pass_min = 0;
3770       for (int pass = 0; pass < 4; ++pass)
3771 	{
3772 	  if (!allow_unaligned_store && (pass & 1) != 0)
3773 	    continue;
3774 	  if (!bzero_first && (pass & 2) != 0)
3775 	    continue;
3776 	  cnt[pass] = split_group (group, (pass & 1) != 0,
3777 				   allow_unaligned_load, (pass & 2) != 0,
3778 				   NULL, NULL, NULL);
3779 	  if (cnt[pass] < cnt[pass_min])
3780 	    pass_min = pass;
3781 	}
3782       if ((pass_min & 1) == 0)
3783 	allow_unaligned_store = false;
3784       if ((pass_min & 2) == 0)
3785 	bzero_first = false;
3786     }
3787   unsigned total_orig, total_new;
3788   split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
3789 	       &split_stores, &total_orig, &total_new);
3790 
3791   /* Determine if there is a clobber covering the whole group at the start,
3792      followed by proposed split stores that cover the whole group.  In that
3793      case, prefer the transformation even if
3794      split_stores.length () == orig_num_stmts.  */
3795   bool clobber_first = false;
3796   if (num_clobber_stmts
3797       && gimple_clobber_p (group->stores[0]->stmt)
3798       && group->start == group->stores[0]->bitpos
3799       && group->width == group->stores[0]->bitsize
3800       && (group->start % BITS_PER_UNIT) == 0
3801       && (group->width % BITS_PER_UNIT) == 0)
3802     {
3803       clobber_first = true;
3804       unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3805       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3806 	if (split_store->bytepos != pos)
3807 	  {
3808 	    clobber_first = false;
3809 	    break;
3810 	  }
3811 	else
3812 	  pos += split_store->size / BITS_PER_UNIT;
3813       if (pos != (group->start + group->width) / BITS_PER_UNIT)
3814 	clobber_first = false;
3815     }
3816 
3817   if (split_stores.length () >= orig_num_stmts + clobber_first)
3818     {
3819 
3820       /* We didn't manage to reduce the number of statements.  Bail out.  */
3821       if (dump_file && (dump_flags & TDF_DETAILS))
3822 	fprintf (dump_file, "Exceeded original number of stmts (%u)."
3823 			    "  Not profitable to emit new sequence.\n",
3824 		 orig_num_stmts);
3825       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3826 	delete split_store;
3827       return false;
3828     }
3829   if (total_orig <= total_new)
3830     {
3831       /* If number of estimated new statements is above estimated original
3832 	 statements, bail out too.  */
3833       if (dump_file && (dump_flags & TDF_DETAILS))
3834 	fprintf (dump_file, "Estimated number of original stmts (%u)"
3835 			    " not larger than estimated number of new"
3836 			    " stmts (%u).\n",
3837 		 total_orig, total_new);
3838       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3839 	delete split_store;
3840       return false;
3841     }
3842   if (group->stores[0]->rhs_code == INTEGER_CST)
3843     {
3844       bool all_orig = true;
3845       FOR_EACH_VEC_ELT (split_stores, i, split_store)
3846 	if (!split_store->orig)
3847 	  {
3848 	    all_orig = false;
3849 	    break;
3850 	  }
3851       if (all_orig)
3852 	{
3853 	  unsigned int cnt = split_stores.length ();
3854 	  store_immediate_info *store;
3855 	  FOR_EACH_VEC_ELT (group->stores, i, store)
3856 	    if (gimple_clobber_p (store->stmt))
3857 	      ++cnt;
3858 	  /* Punt if we wouldn't make any real changes, i.e. keep all
3859 	     orig stmts + all clobbers.  */
3860 	  if (cnt == group->stores.length ())
3861 	    {
3862 	      if (dump_file && (dump_flags & TDF_DETAILS))
3863 		fprintf (dump_file, "Exceeded original number of stmts (%u)."
3864 				    "  Not profitable to emit new sequence.\n",
3865 			 orig_num_stmts);
3866 	      FOR_EACH_VEC_ELT (split_stores, i, split_store)
3867 		delete split_store;
3868 	      return false;
3869 	    }
3870 	}
3871     }
3872 
3873   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3874   gimple_seq seq = NULL;
3875   tree last_vdef, new_vuse;
3876   last_vdef = gimple_vdef (group->last_stmt);
3877   new_vuse = gimple_vuse (group->last_stmt);
3878   tree bswap_res = NULL_TREE;
3879 
3880   /* Clobbers are not removed.  */
3881   if (gimple_clobber_p (group->last_stmt))
3882     {
3883       new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3884       gimple_set_vdef (group->last_stmt, new_vuse);
3885     }
3886 
3887   if (group->stores[0]->rhs_code == LROTATE_EXPR
3888       || group->stores[0]->rhs_code == NOP_EXPR)
3889     {
3890       tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3891       gimple *ins_stmt = group->stores[0]->ins_stmt;
3892       struct symbolic_number *n = &group->stores[0]->n;
3893       bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3894 
3895       switch (n->range)
3896 	{
3897 	case 16:
3898 	  load_type = bswap_type = uint16_type_node;
3899 	  break;
3900 	case 32:
3901 	  load_type = uint32_type_node;
3902 	  if (bswap)
3903 	    {
3904 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3905 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3906 	    }
3907 	  break;
3908 	case 64:
3909 	  load_type = uint64_type_node;
3910 	  if (bswap)
3911 	    {
3912 	      fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3913 	      bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3914 	    }
3915 	  break;
3916 	default:
3917 	  gcc_unreachable ();
3918 	}
3919 
3920       /* If the loads have each vuse of the corresponding store,
3921 	 we've checked the aliasing already in try_coalesce_bswap and
3922 	 we want to sink the need load into seq.  So need to use new_vuse
3923 	 on the load.  */
3924       if (n->base_addr)
3925 	{
3926 	  if (n->vuse == NULL)
3927 	    {
3928 	      n->vuse = new_vuse;
3929 	      ins_stmt = NULL;
3930 	    }
3931 	  else
3932 	    /* Update vuse in case it has changed by output_merged_stores.  */
3933 	    n->vuse = gimple_vuse (ins_stmt);
3934 	}
3935       bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3936 				 bswap_type, load_type, n, bswap);
3937       gcc_assert (bswap_res);
3938     }
3939 
3940   gimple *stmt = NULL;
3941   auto_vec<gimple *, 32> orig_stmts;
3942   gimple_seq this_seq;
3943   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
3944 				      is_gimple_mem_ref_addr, NULL_TREE);
3945   gimple_seq_add_seq_without_update (&seq, this_seq);
3946 
3947   tree load_addr[2] = { NULL_TREE, NULL_TREE };
3948   gimple_seq load_seq[2] = { NULL, NULL };
3949   gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3950   for (int j = 0; j < 2; ++j)
3951     {
3952       store_operand_info &op = group->stores[0]->ops[j];
3953       if (op.base_addr == NULL_TREE)
3954 	continue;
3955 
3956       store_immediate_info *infol = group->stores.last ();
3957       if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3958 	{
3959 	  /* We can't pick the location randomly; while we've verified
3960 	     all the loads have the same vuse, they can be still in different
3961 	     basic blocks and we need to pick the one from the last bb:
3962 	     int x = q[0];
3963 	     if (x == N) return;
3964 	     int y = q[1];
3965 	     p[0] = x;
3966 	     p[1] = y;
3967 	     otherwise if we put the wider load at the q[0] load, we might
3968 	     segfault if q[1] is not mapped.  */
3969 	  basic_block bb = gimple_bb (op.stmt);
3970 	  gimple *ostmt = op.stmt;
3971 	  store_immediate_info *info;
3972 	  FOR_EACH_VEC_ELT (group->stores, i, info)
3973 	    {
3974 	      gimple *tstmt = info->ops[j].stmt;
3975 	      basic_block tbb = gimple_bb (tstmt);
3976 	      if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3977 		{
3978 		  ostmt = tstmt;
3979 		  bb = tbb;
3980 		}
3981 	    }
3982 	  load_gsi[j] = gsi_for_stmt (ostmt);
3983 	  load_addr[j]
3984 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3985 				      &load_seq[j], is_gimple_mem_ref_addr,
3986 				      NULL_TREE);
3987 	}
3988       else if (operand_equal_p (base_addr, op.base_addr, 0))
3989 	load_addr[j] = addr;
3990       else
3991 	{
3992 	  load_addr[j]
3993 	    = force_gimple_operand_1 (unshare_expr (op.base_addr),
3994 				      &this_seq, is_gimple_mem_ref_addr,
3995 				      NULL_TREE);
3996 	  gimple_seq_add_seq_without_update (&seq, this_seq);
3997 	}
3998     }
3999 
4000   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4001     {
4002       unsigned HOST_WIDE_INT try_size = split_store->size;
4003       unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4004       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4005       unsigned HOST_WIDE_INT align = split_store->align;
4006       tree dest, src;
4007       location_t loc;
4008       if (split_store->orig)
4009 	{
4010 	  /* If there is just a single non-clobber constituent store
4011 	     which covers the whole area, just reuse the lhs and rhs.  */
4012 	  gimple *orig_stmt = NULL;
4013 	  store_immediate_info *store;
4014 	  unsigned int j;
4015 	  FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4016 	    if (!gimple_clobber_p (store->stmt))
4017 	      {
4018 		orig_stmt = store->stmt;
4019 		break;
4020 	      }
4021 	  dest = gimple_assign_lhs (orig_stmt);
4022 	  src = gimple_assign_rhs1 (orig_stmt);
4023 	  loc = gimple_location (orig_stmt);
4024 	}
4025       else
4026 	{
4027 	  store_immediate_info *info;
4028 	  unsigned short clique, base;
4029 	  unsigned int k;
4030 	  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4031 	    orig_stmts.safe_push (info->stmt);
4032 	  tree offset_type
4033 	    = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
4034 	  loc = get_location_for_stmts (orig_stmts);
4035 	  orig_stmts.truncate (0);
4036 
4037 	  tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4038 	  int_type = build_aligned_type (int_type, align);
4039 	  dest = fold_build2 (MEM_REF, int_type, addr,
4040 			      build_int_cst (offset_type, try_pos));
4041 	  if (TREE_CODE (dest) == MEM_REF)
4042 	    {
4043 	      MR_DEPENDENCE_CLIQUE (dest) = clique;
4044 	      MR_DEPENDENCE_BASE (dest) = base;
4045 	    }
4046 
4047 	  tree mask;
4048 	  if (bswap_res)
4049 	    mask = integer_zero_node;
4050 	  else
4051 	    mask = native_interpret_expr (int_type,
4052 					  group->mask + try_pos
4053 					  - start_byte_pos,
4054 					  group->buf_size);
4055 
4056 	  tree ops[2];
4057 	  for (int j = 0;
4058 	       j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4059 	       ++j)
4060 	    {
4061 	      store_operand_info &op = split_store->orig_stores[0]->ops[j];
4062 	      if (bswap_res)
4063 		ops[j] = bswap_res;
4064 	      else if (op.base_addr)
4065 		{
4066 		  FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4067 		    orig_stmts.safe_push (info->ops[j].stmt);
4068 
4069 		  offset_type = get_alias_type_for_stmts (orig_stmts, true,
4070 							  &clique, &base);
4071 		  location_t load_loc = get_location_for_stmts (orig_stmts);
4072 		  orig_stmts.truncate (0);
4073 
4074 		  unsigned HOST_WIDE_INT load_align = group->load_align[j];
4075 		  unsigned HOST_WIDE_INT align_bitpos
4076 		    = known_alignment (try_bitpos
4077 				       - split_store->orig_stores[0]->bitpos
4078 				       + op.bitpos);
4079 		  if (align_bitpos & (load_align - 1))
4080 		    load_align = least_bit_hwi (align_bitpos);
4081 
4082 		  tree load_int_type
4083 		    = build_nonstandard_integer_type (try_size, UNSIGNED);
4084 		  load_int_type
4085 		    = build_aligned_type (load_int_type, load_align);
4086 
4087 		  poly_uint64 load_pos
4088 		    = exact_div (try_bitpos
4089 				 - split_store->orig_stores[0]->bitpos
4090 				 + op.bitpos,
4091 				 BITS_PER_UNIT);
4092 		  ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4093 					build_int_cst (offset_type, load_pos));
4094 		  if (TREE_CODE (ops[j]) == MEM_REF)
4095 		    {
4096 		      MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4097 		      MR_DEPENDENCE_BASE (ops[j]) = base;
4098 		    }
4099 		  if (!integer_zerop (mask))
4100 		    /* The load might load some bits (that will be masked off
4101 		       later on) uninitialized, avoid -W*uninitialized
4102 		       warnings in that case.  */
4103 		    TREE_NO_WARNING (ops[j]) = 1;
4104 
4105 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4106 					      ops[j]);
4107 		  gimple_set_location (stmt, load_loc);
4108 		  if (gsi_bb (load_gsi[j]))
4109 		    {
4110 		      gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4111 		      gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4112 		    }
4113 		  else
4114 		    {
4115 		      gimple_set_vuse (stmt, new_vuse);
4116 		      gimple_seq_add_stmt_without_update (&seq, stmt);
4117 		    }
4118 		  ops[j] = gimple_assign_lhs (stmt);
4119 		  tree xor_mask;
4120 		  enum tree_code inv_op
4121 		    = invert_op (split_store, j, int_type, xor_mask);
4122 		  if (inv_op != NOP_EXPR)
4123 		    {
4124 		      stmt = gimple_build_assign (make_ssa_name (int_type),
4125 						  inv_op, ops[j], xor_mask);
4126 		      gimple_set_location (stmt, load_loc);
4127 		      ops[j] = gimple_assign_lhs (stmt);
4128 
4129 		      if (gsi_bb (load_gsi[j]))
4130 			gimple_seq_add_stmt_without_update (&load_seq[j],
4131 							    stmt);
4132 		      else
4133 			gimple_seq_add_stmt_without_update (&seq, stmt);
4134 		    }
4135 		}
4136 	      else
4137 		ops[j] = native_interpret_expr (int_type,
4138 						group->val + try_pos
4139 						- start_byte_pos,
4140 						group->buf_size);
4141 	    }
4142 
4143 	  switch (split_store->orig_stores[0]->rhs_code)
4144 	    {
4145 	    case BIT_AND_EXPR:
4146 	    case BIT_IOR_EXPR:
4147 	    case BIT_XOR_EXPR:
4148 	      FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4149 		{
4150 		  tree rhs1 = gimple_assign_rhs1 (info->stmt);
4151 		  orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4152 		}
4153 	      location_t bit_loc;
4154 	      bit_loc = get_location_for_stmts (orig_stmts);
4155 	      orig_stmts.truncate (0);
4156 
4157 	      stmt
4158 		= gimple_build_assign (make_ssa_name (int_type),
4159 				       split_store->orig_stores[0]->rhs_code,
4160 				       ops[0], ops[1]);
4161 	      gimple_set_location (stmt, bit_loc);
4162 	      /* If there is just one load and there is a separate
4163 		 load_seq[0], emit the bitwise op right after it.  */
4164 	      if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4165 		gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4166 	      /* Otherwise, if at least one load is in seq, we need to
4167 		 emit the bitwise op right before the store.  If there
4168 		 are two loads and are emitted somewhere else, it would
4169 		 be better to emit the bitwise op as early as possible;
4170 		 we don't track where that would be possible right now
4171 		 though.  */
4172 	      else
4173 		gimple_seq_add_stmt_without_update (&seq, stmt);
4174 	      src = gimple_assign_lhs (stmt);
4175 	      tree xor_mask;
4176 	      enum tree_code inv_op;
4177 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4178 	      if (inv_op != NOP_EXPR)
4179 		{
4180 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4181 					      inv_op, src, xor_mask);
4182 		  gimple_set_location (stmt, bit_loc);
4183 		  if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4184 		    gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4185 		  else
4186 		    gimple_seq_add_stmt_without_update (&seq, stmt);
4187 		  src = gimple_assign_lhs (stmt);
4188 		}
4189 	      break;
4190 	    case LROTATE_EXPR:
4191 	    case NOP_EXPR:
4192 	      src = ops[0];
4193 	      if (!is_gimple_val (src))
4194 		{
4195 		  stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4196 					      src);
4197 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4198 		  src = gimple_assign_lhs (stmt);
4199 		}
4200 	      if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4201 		{
4202 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4203 					      NOP_EXPR, src);
4204 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4205 		  src = gimple_assign_lhs (stmt);
4206 		}
4207 	      inv_op = invert_op (split_store, 2, int_type, xor_mask);
4208 	      if (inv_op != NOP_EXPR)
4209 		{
4210 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4211 					      inv_op, src, xor_mask);
4212 		  gimple_set_location (stmt, loc);
4213 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4214 		  src = gimple_assign_lhs (stmt);
4215 		}
4216 	      break;
4217 	    default:
4218 	      src = ops[0];
4219 	      break;
4220 	    }
4221 
4222 	  /* If bit insertion is required, we use the source as an accumulator
4223 	     into which the successive bit-field values are manually inserted.
4224 	     FIXME: perhaps use BIT_INSERT_EXPR instead in some cases?  */
4225 	  if (group->bit_insertion)
4226 	    FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4227 	      if (info->rhs_code == BIT_INSERT_EXPR
4228 		  && info->bitpos < try_bitpos + try_size
4229 		  && info->bitpos + info->bitsize > try_bitpos)
4230 		{
4231 		  /* Mask, truncate, convert to final type, shift and ior into
4232 		     the accumulator.  Note that every step can be a no-op.  */
4233 		  const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4234 		  const HOST_WIDE_INT end_gap
4235 		    = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4236 		  tree tem = info->ops[0].val;
4237 		  if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4238 		    {
4239 		      tree bitfield_type
4240 			= build_nonstandard_integer_type (info->bitsize,
4241 							  UNSIGNED);
4242 		      tem = gimple_convert (&seq, loc, bitfield_type, tem);
4243 		    }
4244 		  else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
4245 		    {
4246 		      const unsigned HOST_WIDE_INT imask
4247 			= (HOST_WIDE_INT_1U << info->bitsize) - 1;
4248 		      tem = gimple_build (&seq, loc,
4249 					  BIT_AND_EXPR, TREE_TYPE (tem), tem,
4250 					  build_int_cst (TREE_TYPE (tem),
4251 							 imask));
4252 		    }
4253 		  const HOST_WIDE_INT shift
4254 		    = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4255 		  if (shift < 0)
4256 		    tem = gimple_build (&seq, loc,
4257 					RSHIFT_EXPR, TREE_TYPE (tem), tem,
4258 					build_int_cst (NULL_TREE, -shift));
4259 		  tem = gimple_convert (&seq, loc, int_type, tem);
4260 		  if (shift > 0)
4261 		    tem = gimple_build (&seq, loc,
4262 					LSHIFT_EXPR, int_type, tem,
4263 					build_int_cst (NULL_TREE, shift));
4264 		  src = gimple_build (&seq, loc,
4265 				      BIT_IOR_EXPR, int_type, tem, src);
4266 		}
4267 
4268 	  if (!integer_zerop (mask))
4269 	    {
4270 	      tree tem = make_ssa_name (int_type);
4271 	      tree load_src = unshare_expr (dest);
4272 	      /* The load might load some or all bits uninitialized,
4273 		 avoid -W*uninitialized warnings in that case.
4274 		 As optimization, it would be nice if all the bits are
4275 		 provably uninitialized (no stores at all yet or previous
4276 		 store a CLOBBER) we'd optimize away the load and replace
4277 		 it e.g. with 0.  */
4278 	      TREE_NO_WARNING (load_src) = 1;
4279 	      stmt = gimple_build_assign (tem, load_src);
4280 	      gimple_set_location (stmt, loc);
4281 	      gimple_set_vuse (stmt, new_vuse);
4282 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4283 
4284 	      /* FIXME: If there is a single chunk of zero bits in mask,
4285 		 perhaps use BIT_INSERT_EXPR instead?  */
4286 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4287 					  BIT_AND_EXPR, tem, mask);
4288 	      gimple_set_location (stmt, loc);
4289 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4290 	      tem = gimple_assign_lhs (stmt);
4291 
4292 	      if (TREE_CODE (src) == INTEGER_CST)
4293 		src = wide_int_to_tree (int_type,
4294 					wi::bit_and_not (wi::to_wide (src),
4295 							 wi::to_wide (mask)));
4296 	      else
4297 		{
4298 		  tree nmask
4299 		    = wide_int_to_tree (int_type,
4300 					wi::bit_not (wi::to_wide (mask)));
4301 		  stmt = gimple_build_assign (make_ssa_name (int_type),
4302 					      BIT_AND_EXPR, src, nmask);
4303 		  gimple_set_location (stmt, loc);
4304 		  gimple_seq_add_stmt_without_update (&seq, stmt);
4305 		  src = gimple_assign_lhs (stmt);
4306 		}
4307 	      stmt = gimple_build_assign (make_ssa_name (int_type),
4308 					  BIT_IOR_EXPR, tem, src);
4309 	      gimple_set_location (stmt, loc);
4310 	      gimple_seq_add_stmt_without_update (&seq, stmt);
4311 	      src = gimple_assign_lhs (stmt);
4312 	    }
4313 	}
4314 
4315       stmt = gimple_build_assign (dest, src);
4316       gimple_set_location (stmt, loc);
4317       gimple_set_vuse (stmt, new_vuse);
4318       gimple_seq_add_stmt_without_update (&seq, stmt);
4319 
4320       if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4321 	add_stmt_to_eh_lp (stmt, group->lp_nr);
4322 
4323       tree new_vdef;
4324       if (i < split_stores.length () - 1)
4325 	new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
4326       else
4327 	new_vdef = last_vdef;
4328 
4329       gimple_set_vdef (stmt, new_vdef);
4330       SSA_NAME_DEF_STMT (new_vdef) = stmt;
4331       new_vuse = new_vdef;
4332     }
4333 
4334   FOR_EACH_VEC_ELT (split_stores, i, split_store)
4335     delete split_store;
4336 
4337   gcc_assert (seq);
4338   if (dump_file)
4339     {
4340       fprintf (dump_file,
4341 	       "New sequence of %u stores to replace old one of %u stores\n",
4342 	       split_stores.length (), orig_num_stmts);
4343       if (dump_flags & TDF_DETAILS)
4344 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4345     }
4346 
4347   if (gimple_clobber_p (group->last_stmt))
4348     update_stmt (group->last_stmt);
4349 
4350   if (group->lp_nr > 0)
4351     {
4352       /* We're going to insert a sequence of (potentially) throwing stores
4353 	 into an active EH region.  This means that we're going to create
4354 	 new basic blocks with EH edges pointing to the post landing pad
4355 	 and, therefore, to have to update its PHI nodes, if any.  For the
4356 	 virtual PHI node, we're going to use the VDEFs created above, but
4357 	 for the other nodes, we need to record the original reaching defs.  */
4358       eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4359       basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4360       basic_block last_bb = gimple_bb (group->last_stmt);
4361       edge last_edge = find_edge (last_bb, lp_bb);
4362       auto_vec<tree, 16> last_defs;
4363       gphi_iterator gpi;
4364       for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4365 	{
4366 	  gphi *phi = gpi.phi ();
4367 	  tree last_def;
4368 	  if (virtual_operand_p (gimple_phi_result (phi)))
4369 	    last_def = NULL_TREE;
4370 	  else
4371 	    last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4372 	  last_defs.safe_push (last_def);
4373 	}
4374 
4375       /* Do the insertion.  Then, if new basic blocks have been created in the
4376 	 process, rewind the chain of VDEFs create above to walk the new basic
4377 	 blocks and update the corresponding arguments of the PHI nodes.  */
4378       update_modified_stmts (seq);
4379       if (gimple_find_sub_bbs (seq, &last_gsi))
4380 	while (last_vdef != gimple_vuse (group->last_stmt))
4381 	  {
4382 	    gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4383 	    if (stmt_could_throw_p (cfun, stmt))
4384 	      {
4385 		edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4386 		unsigned int i;
4387 		for (gpi = gsi_start_phis (lp_bb), i = 0;
4388 		     !gsi_end_p (gpi);
4389 		     gsi_next (&gpi), i++)
4390 		  {
4391 		    gphi *phi = gpi.phi ();
4392 		    tree new_def;
4393 		    if (virtual_operand_p (gimple_phi_result (phi)))
4394 		      new_def = last_vdef;
4395 		    else
4396 		      new_def = last_defs[i];
4397 		    add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4398 		  }
4399 	      }
4400 	    last_vdef = gimple_vuse (stmt);
4401 	  }
4402     }
4403   else
4404     gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4405 
4406   for (int j = 0; j < 2; ++j)
4407     if (load_seq[j])
4408       gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
4409 
4410   return true;
4411 }
4412 
4413 /* Process the merged_store_group objects created in the coalescing phase.
4414    The stores are all against the base object BASE.
4415    Try to output the widened stores and delete the original statements if
4416    successful.  Return true iff any changes were made.  */
4417 
4418 bool
output_merged_stores()4419 imm_store_chain_info::output_merged_stores ()
4420 {
4421   unsigned int i;
4422   merged_store_group *merged_store;
4423   bool ret = false;
4424   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4425     {
4426       if (dbg_cnt (store_merging)
4427 	  && output_merged_store (merged_store))
4428 	{
4429 	  unsigned int j;
4430 	  store_immediate_info *store;
4431 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4432 	    {
4433 	      gimple *stmt = store->stmt;
4434 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4435 	      /* Don't remove clobbers, they are still useful even if
4436 		 everything is overwritten afterwards.  */
4437 	      if (gimple_clobber_p (stmt))
4438 		continue;
4439 	      gsi_remove (&gsi, true);
4440 	      if (store->lp_nr)
4441 		remove_stmt_from_eh_lp (stmt);
4442 	      if (stmt != merged_store->last_stmt)
4443 		{
4444 		  unlink_stmt_vdef (stmt);
4445 		  release_defs (stmt);
4446 		}
4447 	    }
4448 	  ret = true;
4449 	}
4450     }
4451   if (ret && dump_file)
4452     fprintf (dump_file, "Merging successful!\n");
4453 
4454   return ret;
4455 }
4456 
4457 /* Coalesce the store_immediate_info objects recorded against the base object
4458    BASE in the first phase and output them.
4459    Delete the allocated structures.
4460    Return true if any changes were made.  */
4461 
4462 bool
terminate_and_process_chain()4463 imm_store_chain_info::terminate_and_process_chain ()
4464 {
4465   /* Process store chain.  */
4466   bool ret = false;
4467   if (m_store_info.length () > 1)
4468     {
4469       ret = coalesce_immediate_stores ();
4470       if (ret)
4471 	ret = output_merged_stores ();
4472     }
4473 
4474   /* Delete all the entries we allocated ourselves.  */
4475   store_immediate_info *info;
4476   unsigned int i;
4477   FOR_EACH_VEC_ELT (m_store_info, i, info)
4478     delete info;
4479 
4480   merged_store_group *merged_info;
4481   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4482     delete merged_info;
4483 
4484   return ret;
4485 }
4486 
4487 /* Return true iff LHS is a destination potentially interesting for
4488    store merging.  In practice these are the codes that get_inner_reference
4489    can process.  */
4490 
4491 static bool
lhs_valid_for_store_merging_p(tree lhs)4492 lhs_valid_for_store_merging_p (tree lhs)
4493 {
4494   if (DECL_P (lhs))
4495     return true;
4496 
4497   switch (TREE_CODE (lhs))
4498     {
4499     case ARRAY_REF:
4500     case ARRAY_RANGE_REF:
4501     case BIT_FIELD_REF:
4502     case COMPONENT_REF:
4503     case MEM_REF:
4504       return true;
4505     default:
4506       return false;
4507     }
4508 
4509   gcc_unreachable ();
4510 }
4511 
4512 /* Return true if the tree RHS is a constant we want to consider
4513    during store merging.  In practice accept all codes that
4514    native_encode_expr accepts.  */
4515 
4516 static bool
rhs_valid_for_store_merging_p(tree rhs)4517 rhs_valid_for_store_merging_p (tree rhs)
4518 {
4519   unsigned HOST_WIDE_INT size;
4520   if (TREE_CODE (rhs) == CONSTRUCTOR
4521       && CONSTRUCTOR_NELTS (rhs) == 0
4522       && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4523       && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4524     return true;
4525   return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4526 	  && native_encode_expr (rhs, NULL, size) != 0);
4527 }
4528 
4529 /* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4530    and return true on success or false on failure.  */
4531 
4532 static bool
adjust_bit_pos(poly_offset_int byte_off,poly_int64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4533 adjust_bit_pos (poly_offset_int byte_off,
4534 		poly_int64  *pbitpos,
4535 		poly_uint64 *pbitregion_start,
4536 		poly_uint64 *pbitregion_end)
4537 {
4538   poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4539   bit_off += *pbitpos;
4540 
4541   if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4542     {
4543       if (maybe_ne (*pbitregion_end, 0U))
4544 	{
4545 	  bit_off = byte_off << LOG2_BITS_PER_UNIT;
4546 	  bit_off += *pbitregion_start;
4547 	  if (bit_off.to_uhwi (pbitregion_start))
4548 	    {
4549 	      bit_off = byte_off << LOG2_BITS_PER_UNIT;
4550 	      bit_off += *pbitregion_end;
4551 	      if (!bit_off.to_uhwi (pbitregion_end))
4552 		*pbitregion_end = 0;
4553 	    }
4554 	  else
4555 	    *pbitregion_end = 0;
4556 	}
4557       return true;
4558     }
4559   else
4560     return false;
4561 }
4562 
4563 /* If MEM is a memory reference usable for store merging (either as
4564    store destination or for loads), return the non-NULL base_addr
4565    and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4566    Otherwise return NULL, *PBITPOS should be still valid even for that
4567    case.  */
4568 
4569 static tree
mem_valid_for_store_merging(tree mem,poly_uint64 * pbitsize,poly_uint64 * pbitpos,poly_uint64 * pbitregion_start,poly_uint64 * pbitregion_end)4570 mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4571 			     poly_uint64 *pbitpos,
4572 			     poly_uint64 *pbitregion_start,
4573 			     poly_uint64 *pbitregion_end)
4574 {
4575   poly_int64 bitsize, bitpos;
4576   poly_uint64 bitregion_start = 0, bitregion_end = 0;
4577   machine_mode mode;
4578   int unsignedp = 0, reversep = 0, volatilep = 0;
4579   tree offset;
4580   tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4581 					&unsignedp, &reversep, &volatilep);
4582   *pbitsize = bitsize;
4583   if (known_le (bitsize, 0))
4584     return NULL_TREE;
4585 
4586   if (TREE_CODE (mem) == COMPONENT_REF
4587       && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4588     {
4589       get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
4590       if (maybe_ne (bitregion_end, 0U))
4591 	bitregion_end += 1;
4592     }
4593 
4594   if (reversep)
4595     return NULL_TREE;
4596 
4597   /* We do not want to rewrite TARGET_MEM_REFs.  */
4598   if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4599     return NULL_TREE;
4600   /* In some cases get_inner_reference may return a
4601      MEM_REF [ptr + byteoffset].  For the purposes of this pass
4602      canonicalize the base_addr to MEM_REF [ptr] and take
4603      byteoffset into account in the bitpos.  This occurs in
4604      PR 23684 and this way we can catch more chains.  */
4605   else if (TREE_CODE (base_addr) == MEM_REF)
4606     {
4607       if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4608 			   &bitregion_start, &bitregion_end))
4609 	return NULL_TREE;
4610       base_addr = TREE_OPERAND (base_addr, 0);
4611     }
4612   /* get_inner_reference returns the base object, get at its
4613      address now.  */
4614   else
4615     {
4616       if (maybe_lt (bitpos, 0))
4617 	return NULL_TREE;
4618       base_addr = build_fold_addr_expr (base_addr);
4619     }
4620 
4621   if (offset)
4622     {
4623       /* If the access is variable offset then a base decl has to be
4624 	 address-taken to be able to emit pointer-based stores to it.
4625 	 ???  We might be able to get away with re-using the original
4626 	 base up to the first variable part and then wrapping that inside
4627 	 a BIT_FIELD_REF.  */
4628       tree base = get_base_address (base_addr);
4629       if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
4630 	return NULL_TREE;
4631 
4632       /* Similarly to above for the base, remove constant from the offset.  */
4633       if (TREE_CODE (offset) == PLUS_EXPR
4634 	  && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4635 	  && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4636 			     &bitpos, &bitregion_start, &bitregion_end))
4637 	offset = TREE_OPERAND (offset, 0);
4638 
4639       base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4640 			  base_addr, offset);
4641     }
4642 
4643   if (known_eq (bitregion_end, 0U))
4644     {
4645       bitregion_start = round_down_to_byte_boundary (bitpos);
4646       bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4647     }
4648 
4649   *pbitsize = bitsize;
4650   *pbitpos = bitpos;
4651   *pbitregion_start = bitregion_start;
4652   *pbitregion_end = bitregion_end;
4653   return base_addr;
4654 }
4655 
4656 /* Return true if STMT is a load that can be used for store merging.
4657    In that case fill in *OP.  BITSIZE, BITPOS, BITREGION_START and
4658    BITREGION_END are properties of the corresponding store.  */
4659 
4660 static bool
handled_load(gimple * stmt,store_operand_info * op,poly_uint64 bitsize,poly_uint64 bitpos,poly_uint64 bitregion_start,poly_uint64 bitregion_end)4661 handled_load (gimple *stmt, store_operand_info *op,
4662 	      poly_uint64 bitsize, poly_uint64 bitpos,
4663 	      poly_uint64 bitregion_start, poly_uint64 bitregion_end)
4664 {
4665   if (!is_gimple_assign (stmt))
4666     return false;
4667   if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4668     {
4669       tree rhs1 = gimple_assign_rhs1 (stmt);
4670       if (TREE_CODE (rhs1) == SSA_NAME
4671 	  && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4672 			   bitregion_start, bitregion_end))
4673 	{
4674 	  /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4675 	     been optimized earlier, but if allowed here, would confuse the
4676 	     multiple uses counting.  */
4677 	  if (op->bit_not_p)
4678 	    return false;
4679 	  op->bit_not_p = !op->bit_not_p;
4680 	  return true;
4681 	}
4682       return false;
4683     }
4684   if (gimple_vuse (stmt)
4685       && gimple_assign_load_p (stmt)
4686       && !stmt_can_throw_internal (cfun, stmt)
4687       && !gimple_has_volatile_ops (stmt))
4688     {
4689       tree mem = gimple_assign_rhs1 (stmt);
4690       op->base_addr
4691 	= mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4692 				       &op->bitregion_start,
4693 				       &op->bitregion_end);
4694       if (op->base_addr != NULL_TREE
4695 	  && known_eq (op->bitsize, bitsize)
4696 	  && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4697 	  && known_ge (op->bitpos - op->bitregion_start,
4698 		       bitpos - bitregion_start)
4699 	  && known_ge (op->bitregion_end - op->bitpos,
4700 		       bitregion_end - bitpos))
4701 	{
4702 	  op->stmt = stmt;
4703 	  op->val = mem;
4704 	  op->bit_not_p = false;
4705 	  return true;
4706 	}
4707     }
4708   return false;
4709 }
4710 
4711 /* Return the index number of the landing pad for STMT, if any.  */
4712 
4713 static int
lp_nr_for_store(gimple * stmt)4714 lp_nr_for_store (gimple *stmt)
4715 {
4716   if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4717     return 0;
4718 
4719   if (!stmt_could_throw_p (cfun, stmt))
4720     return 0;
4721 
4722   return lookup_stmt_eh_lp (stmt);
4723 }
4724 
4725 /* Record the store STMT for store merging optimization if it can be
4726    optimized.  Return true if any changes were made.  */
4727 
4728 bool
process_store(gimple * stmt)4729 pass_store_merging::process_store (gimple *stmt)
4730 {
4731   tree lhs = gimple_assign_lhs (stmt);
4732   tree rhs = gimple_assign_rhs1 (stmt);
4733   poly_uint64 bitsize, bitpos;
4734   poly_uint64 bitregion_start, bitregion_end;
4735   tree base_addr
4736     = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4737 				   &bitregion_start, &bitregion_end);
4738   if (known_eq (bitsize, 0U))
4739     return false;
4740 
4741   bool invalid = (base_addr == NULL_TREE
4742 		  || (maybe_gt (bitsize,
4743 				(unsigned int) MAX_BITSIZE_MODE_ANY_INT)
4744 		      && TREE_CODE (rhs) != INTEGER_CST
4745 		      && (TREE_CODE (rhs) != CONSTRUCTOR
4746 			  || CONSTRUCTOR_NELTS (rhs) != 0)));
4747   enum tree_code rhs_code = ERROR_MARK;
4748   bool bit_not_p = false;
4749   struct symbolic_number n;
4750   gimple *ins_stmt = NULL;
4751   store_operand_info ops[2];
4752   if (invalid)
4753     ;
4754   else if (rhs_valid_for_store_merging_p (rhs))
4755     {
4756       rhs_code = INTEGER_CST;
4757       ops[0].val = rhs;
4758     }
4759   else if (TREE_CODE (rhs) != SSA_NAME)
4760     invalid = true;
4761   else
4762     {
4763       gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4764       if (!is_gimple_assign (def_stmt))
4765 	invalid = true;
4766       else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4767 			     bitregion_start, bitregion_end))
4768 	rhs_code = MEM_REF;
4769       else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4770 	{
4771 	  tree rhs1 = gimple_assign_rhs1 (def_stmt);
4772 	  if (TREE_CODE (rhs1) == SSA_NAME
4773 	      && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4774 	    {
4775 	      bit_not_p = true;
4776 	      def_stmt = SSA_NAME_DEF_STMT (rhs1);
4777 	    }
4778 	}
4779 
4780       if (rhs_code == ERROR_MARK && !invalid)
4781 	switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4782 	  {
4783 	  case BIT_AND_EXPR:
4784 	  case BIT_IOR_EXPR:
4785 	  case BIT_XOR_EXPR:
4786 	    tree rhs1, rhs2;
4787 	    rhs1 = gimple_assign_rhs1 (def_stmt);
4788 	    rhs2 = gimple_assign_rhs2 (def_stmt);
4789 	    invalid = true;
4790 	    if (TREE_CODE (rhs1) != SSA_NAME)
4791 	      break;
4792 	    def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4793 	    if (!is_gimple_assign (def_stmt1)
4794 		|| !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4795 				  bitregion_start, bitregion_end))
4796 	      break;
4797 	    if (rhs_valid_for_store_merging_p (rhs2))
4798 	      ops[1].val = rhs2;
4799 	    else if (TREE_CODE (rhs2) != SSA_NAME)
4800 	      break;
4801 	    else
4802 	      {
4803 		def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4804 		if (!is_gimple_assign (def_stmt2))
4805 		  break;
4806 		else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4807 					bitregion_start, bitregion_end))
4808 		  break;
4809 	      }
4810 	    invalid = false;
4811 	    break;
4812 	  default:
4813 	    invalid = true;
4814 	    break;
4815 	  }
4816 
4817       unsigned HOST_WIDE_INT const_bitsize;
4818       if (bitsize.is_constant (&const_bitsize)
4819 	  && (const_bitsize % BITS_PER_UNIT) == 0
4820 	  && const_bitsize <= 64
4821 	  && multiple_p (bitpos, BITS_PER_UNIT))
4822 	{
4823 	  ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4824 	  if (ins_stmt)
4825 	    {
4826 	      uint64_t nn = n.n;
4827 	      for (unsigned HOST_WIDE_INT i = 0;
4828 		   i < const_bitsize;
4829 		   i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4830 		if ((nn & MARKER_MASK) == 0
4831 		    || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4832 		  {
4833 		    ins_stmt = NULL;
4834 		    break;
4835 		  }
4836 	      if (ins_stmt)
4837 		{
4838 		  if (invalid)
4839 		    {
4840 		      rhs_code = LROTATE_EXPR;
4841 		      ops[0].base_addr = NULL_TREE;
4842 		      ops[1].base_addr = NULL_TREE;
4843 		    }
4844 		  invalid = false;
4845 		}
4846 	    }
4847 	}
4848 
4849       if (invalid
4850 	  && bitsize.is_constant (&const_bitsize)
4851 	  && ((const_bitsize % BITS_PER_UNIT) != 0
4852 	      || !multiple_p (bitpos, BITS_PER_UNIT))
4853 	  && const_bitsize <= 64)
4854 	{
4855 	  /* Bypass a conversion to the bit-field type.  */
4856 	  if (!bit_not_p
4857 	      && is_gimple_assign (def_stmt)
4858 	      && CONVERT_EXPR_CODE_P (rhs_code))
4859 	    {
4860 	      tree rhs1 = gimple_assign_rhs1 (def_stmt);
4861 	      if (TREE_CODE (rhs1) == SSA_NAME
4862 		  && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
4863 		rhs = rhs1;
4864 	    }
4865 	  rhs_code = BIT_INSERT_EXPR;
4866 	  bit_not_p = false;
4867 	  ops[0].val = rhs;
4868 	  ops[0].base_addr = NULL_TREE;
4869 	  ops[1].base_addr = NULL_TREE;
4870 	  invalid = false;
4871 	}
4872     }
4873 
4874   unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4875   unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4876   if (invalid
4877       || !bitsize.is_constant (&const_bitsize)
4878       || !bitpos.is_constant (&const_bitpos)
4879       || !bitregion_start.is_constant (&const_bitregion_start)
4880       || !bitregion_end.is_constant (&const_bitregion_end))
4881     return terminate_all_aliasing_chains (NULL, stmt);
4882 
4883   if (!ins_stmt)
4884     memset (&n, 0, sizeof (n));
4885 
4886   class imm_store_chain_info **chain_info = NULL;
4887   bool ret = false;
4888   if (base_addr)
4889     chain_info = m_stores.get (base_addr);
4890 
4891   store_immediate_info *info;
4892   if (chain_info)
4893     {
4894       unsigned int ord = (*chain_info)->m_store_info.length ();
4895       info = new store_immediate_info (const_bitsize, const_bitpos,
4896 				       const_bitregion_start,
4897 				       const_bitregion_end,
4898 				       stmt, ord, rhs_code, n, ins_stmt,
4899 				       bit_not_p, lp_nr_for_store (stmt),
4900 				       ops[0], ops[1]);
4901       if (dump_file && (dump_flags & TDF_DETAILS))
4902 	{
4903 	  fprintf (dump_file, "Recording immediate store from stmt:\n");
4904 	  print_gimple_stmt (dump_file, stmt, 0);
4905 	}
4906       (*chain_info)->m_store_info.safe_push (info);
4907       ret |= terminate_all_aliasing_chains (chain_info, stmt);
4908       /* If we reach the limit of stores to merge in a chain terminate and
4909 	 process the chain now.  */
4910       if ((*chain_info)->m_store_info.length ()
4911 	  == (unsigned int) param_max_stores_to_merge)
4912 	{
4913 	  if (dump_file && (dump_flags & TDF_DETAILS))
4914 	    fprintf (dump_file,
4915 		     "Reached maximum number of statements to merge:\n");
4916 	  ret |= terminate_and_process_chain (*chain_info);
4917 	}
4918       return ret;
4919     }
4920 
4921   /* Store aliases any existing chain?  */
4922   ret |= terminate_all_aliasing_chains (NULL, stmt);
4923   /* Start a new chain.  */
4924   class imm_store_chain_info *new_chain
4925     = new imm_store_chain_info (m_stores_head, base_addr);
4926   info = new store_immediate_info (const_bitsize, const_bitpos,
4927 				   const_bitregion_start,
4928 				   const_bitregion_end,
4929 				   stmt, 0, rhs_code, n, ins_stmt,
4930 				   bit_not_p, lp_nr_for_store (stmt),
4931 				   ops[0], ops[1]);
4932   new_chain->m_store_info.safe_push (info);
4933   m_stores.put (base_addr, new_chain);
4934   if (dump_file && (dump_flags & TDF_DETAILS))
4935     {
4936       fprintf (dump_file, "Starting new chain with statement:\n");
4937       print_gimple_stmt (dump_file, stmt, 0);
4938       fprintf (dump_file, "The base object is:\n");
4939       print_generic_expr (dump_file, base_addr);
4940       fprintf (dump_file, "\n");
4941     }
4942   return ret;
4943 }
4944 
4945 /* Return true if STMT is a store valid for store merging.  */
4946 
4947 static bool
store_valid_for_store_merging_p(gimple * stmt)4948 store_valid_for_store_merging_p (gimple *stmt)
4949 {
4950   return gimple_assign_single_p (stmt)
4951 	 && gimple_vdef (stmt)
4952 	 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4953 	 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
4954 }
4955 
4956 enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4957 
4958 /* Return the status of basic block BB wrt store merging.  */
4959 
4960 static enum basic_block_status
get_status_for_store_merging(basic_block bb)4961 get_status_for_store_merging (basic_block bb)
4962 {
4963   unsigned int num_statements = 0;
4964   gimple_stmt_iterator gsi;
4965   edge e;
4966   gimple *last_stmt = NULL;
4967 
4968   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4969     {
4970       gimple *stmt = gsi_stmt (gsi);
4971 
4972       if (is_gimple_debug (stmt))
4973 	continue;
4974 
4975       last_stmt = stmt;
4976 
4977       if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4978 	break;
4979     }
4980 
4981   if (num_statements == 0)
4982     return BB_INVALID;
4983 
4984   if (cfun->can_throw_non_call_exceptions && cfun->eh
4985       && store_valid_for_store_merging_p (last_stmt)
4986       && (e = find_fallthru_edge (bb->succs))
4987       && e->dest == bb->next_bb)
4988     return BB_EXTENDED_VALID;
4989 
4990   return num_statements >= 2 ? BB_VALID : BB_INVALID;
4991 }
4992 
4993 /* Entry point for the pass.  Go over each basic block recording chains of
4994    immediate stores.  Upon encountering a terminating statement (as defined
4995    by stmt_terminates_chain_p) process the recorded stores and emit the widened
4996    variants.  */
4997 
4998 unsigned int
execute(function * fun)4999 pass_store_merging::execute (function *fun)
5000 {
5001   basic_block bb;
5002   hash_set<gimple *> orig_stmts;
5003   bool changed = false, open_chains = false;
5004 
5005   /* If the function can throw and catch non-call exceptions, we'll be trying
5006      to merge stores across different basic blocks so we need to first unsplit
5007      the EH edges in order to streamline the CFG of the function.  */
5008   if (cfun->can_throw_non_call_exceptions && cfun->eh)
5009     unsplit_eh_edges ();
5010 
5011   calculate_dominance_info (CDI_DOMINATORS);
5012 
5013   FOR_EACH_BB_FN (bb, fun)
5014     {
5015       const basic_block_status bb_status = get_status_for_store_merging (bb);
5016       gimple_stmt_iterator gsi;
5017 
5018       if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5019 	{
5020 	  changed |= terminate_and_process_all_chains ();
5021 	  open_chains = false;
5022 	}
5023 
5024       if (bb_status == BB_INVALID)
5025 	continue;
5026 
5027       if (dump_file && (dump_flags & TDF_DETAILS))
5028 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5029 
5030       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5031 	{
5032 	  gimple *stmt = gsi_stmt (gsi);
5033 
5034 	  if (is_gimple_debug (stmt))
5035 	    continue;
5036 
5037 	  if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
5038 	    {
5039 	      /* Terminate all chains.  */
5040 	      if (dump_file && (dump_flags & TDF_DETAILS))
5041 		fprintf (dump_file, "Volatile access terminates "
5042 				    "all chains\n");
5043 	      changed |= terminate_and_process_all_chains ();
5044 	      open_chains = false;
5045 	      continue;
5046 	    }
5047 
5048 	  if (store_valid_for_store_merging_p (stmt))
5049 	    changed |= process_store (stmt);
5050 	  else
5051 	    changed |= terminate_all_aliasing_chains (NULL, stmt);
5052 	}
5053 
5054       if (bb_status == BB_EXTENDED_VALID)
5055 	open_chains = true;
5056       else
5057 	{
5058 	  changed |= terminate_and_process_all_chains ();
5059 	  open_chains = false;
5060 	}
5061     }
5062 
5063   if (open_chains)
5064     changed |= terminate_and_process_all_chains ();
5065 
5066   /* If the function can throw and catch non-call exceptions and something
5067      changed during the pass, then the CFG has (very likely) changed too.  */
5068   if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5069     {
5070       free_dominance_info (CDI_DOMINATORS);
5071       return TODO_cleanup_cfg;
5072     }
5073 
5074   return 0;
5075 }
5076 
5077 } // anon namespace
5078 
5079 /* Construct and return a store merging pass object.  */
5080 
5081 gimple_opt_pass *
make_pass_store_merging(gcc::context * ctxt)5082 make_pass_store_merging (gcc::context *ctxt)
5083 {
5084   return new pass_store_merging (ctxt);
5085 }
5086 
5087 #if CHECKING_P
5088 
5089 namespace selftest {
5090 
5091 /* Selftests for store merging helpers.  */
5092 
5093 /* Assert that all elements of the byte arrays X and Y, both of length N
5094    are equal.  */
5095 
5096 static void
verify_array_eq(unsigned char * x,unsigned char * y,unsigned int n)5097 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5098 {
5099   for (unsigned int i = 0; i < n; i++)
5100     {
5101       if (x[i] != y[i])
5102 	{
5103 	  fprintf (stderr, "Arrays do not match.  X:\n");
5104 	  dump_char_array (stderr, x, n);
5105 	  fprintf (stderr, "Y:\n");
5106 	  dump_char_array (stderr, y, n);
5107 	}
5108       ASSERT_EQ (x[i], y[i]);
5109     }
5110 }
5111 
5112 /* Test shift_bytes_in_array_left and that it carries bits across between
5113    bytes correctly.  */
5114 
5115 static void
verify_shift_bytes_in_array_left(void)5116 verify_shift_bytes_in_array_left (void)
5117 {
5118    /* byte 1   | byte 0
5119       00011111 | 11100000.  */
5120   unsigned char orig[2] = { 0xe0, 0x1f };
5121   unsigned char in[2];
5122   memcpy (in, orig, sizeof orig);
5123 
5124   unsigned char expected[2] = { 0x80, 0x7f };
5125   shift_bytes_in_array_left (in, sizeof (in), 2);
5126   verify_array_eq (in, expected, sizeof (in));
5127 
5128   memcpy (in, orig, sizeof orig);
5129   memcpy (expected, orig, sizeof orig);
5130   /* Check that shifting by zero doesn't change anything.  */
5131   shift_bytes_in_array_left (in, sizeof (in), 0);
5132   verify_array_eq (in, expected, sizeof (in));
5133 
5134 }
5135 
5136 /* Test shift_bytes_in_array_right and that it carries bits across between
5137    bytes correctly.  */
5138 
5139 static void
verify_shift_bytes_in_array_right(void)5140 verify_shift_bytes_in_array_right (void)
5141 {
5142    /* byte 1   | byte 0
5143       00011111 | 11100000.  */
5144   unsigned char orig[2] = { 0x1f, 0xe0};
5145   unsigned char in[2];
5146   memcpy (in, orig, sizeof orig);
5147   unsigned char expected[2] = { 0x07, 0xf8};
5148   shift_bytes_in_array_right (in, sizeof (in), 2);
5149   verify_array_eq (in, expected, sizeof (in));
5150 
5151   memcpy (in, orig, sizeof orig);
5152   memcpy (expected, orig, sizeof orig);
5153   /* Check that shifting by zero doesn't change anything.  */
5154   shift_bytes_in_array_right (in, sizeof (in), 0);
5155   verify_array_eq (in, expected, sizeof (in));
5156 }
5157 
5158 /* Test clear_bit_region that it clears exactly the bits asked and
5159    nothing more.  */
5160 
5161 static void
verify_clear_bit_region(void)5162 verify_clear_bit_region (void)
5163 {
5164   /* Start with all bits set and test clearing various patterns in them.  */
5165   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5166   unsigned char in[3];
5167   unsigned char expected[3];
5168   memcpy (in, orig, sizeof in);
5169 
5170   /* Check zeroing out all the bits.  */
5171   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5172   expected[0] = expected[1] = expected[2] = 0;
5173   verify_array_eq (in, expected, sizeof in);
5174 
5175   memcpy (in, orig, sizeof in);
5176   /* Leave the first and last bits intact.  */
5177   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5178   expected[0] = 0x1;
5179   expected[1] = 0;
5180   expected[2] = 0x80;
5181   verify_array_eq (in, expected, sizeof in);
5182 }
5183 
5184 /* Test clear_bit_region_be that it clears exactly the bits asked and
5185    nothing more.  */
5186 
5187 static void
verify_clear_bit_region_be(void)5188 verify_clear_bit_region_be (void)
5189 {
5190   /* Start with all bits set and test clearing various patterns in them.  */
5191   unsigned char orig[3] = { 0xff, 0xff, 0xff};
5192   unsigned char in[3];
5193   unsigned char expected[3];
5194   memcpy (in, orig, sizeof in);
5195 
5196   /* Check zeroing out all the bits.  */
5197   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5198   expected[0] = expected[1] = expected[2] = 0;
5199   verify_array_eq (in, expected, sizeof in);
5200 
5201   memcpy (in, orig, sizeof in);
5202   /* Leave the first and last bits intact.  */
5203   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5204   expected[0] = 0x80;
5205   expected[1] = 0;
5206   expected[2] = 0x1;
5207   verify_array_eq (in, expected, sizeof in);
5208 }
5209 
5210 
5211 /* Run all of the selftests within this file.  */
5212 
5213 void
store_merging_c_tests(void)5214 store_merging_c_tests (void)
5215 {
5216   verify_shift_bytes_in_array_left ();
5217   verify_shift_bytes_in_array_right ();
5218   verify_clear_bit_region ();
5219   verify_clear_bit_region_be ();
5220 }
5221 
5222 } // namespace selftest
5223 #endif /* CHECKING_P.  */
5224