xref: /netbsd-src/external/gpl3/gcc.old/dist/gcc/gimple-ssa-store-merging.c (revision d90047b5d07facf36e6c01dcc0bded8997ce9cc2)
1 /* GIMPLE store merging pass.
2    Copyright (C) 2016-2017 Free Software Foundation, Inc.
3    Contributed by ARM Ltd.
4 
5    This file is part of GCC.
6 
7    GCC is free software; you can redistribute it and/or modify it
8    under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 3, or (at your option)
10    any later version.
11 
12    GCC is distributed in the hope that it will be useful, but
13    WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15    General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with GCC; see the file COPYING3.  If not see
19    <http://www.gnu.org/licenses/>.  */
20 
21 /* The purpose of this pass is to combine multiple memory stores of
22    constant values to consecutive memory locations into fewer wider stores.
23    For example, if we have a sequence peforming four byte stores to
24    consecutive memory locations:
25    [p     ] := imm1;
26    [p + 1B] := imm2;
27    [p + 2B] := imm3;
28    [p + 3B] := imm4;
29    we can transform this into a single 4-byte store if the target supports it:
30   [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
31 
32    The algorithm is applied to each basic block in three phases:
33 
34    1) Scan through the basic block recording constant assignments to
35    destinations that can be expressed as a store to memory of a certain size
36    at a certain bit offset.  Record store chains to different bases in a
37    hash_map (m_stores) and make sure to terminate such chains when appropriate
38    (for example when when the stored values get used subsequently).
39    These stores can be a result of structure element initializers, array stores
40    etc.  A store_immediate_info object is recorded for every such store.
41    Record as many such assignments to a single base as possible until a
42    statement that interferes with the store sequence is encountered.
43 
44    2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
45    store_immediate_info objects) and coalesce contiguous stores into
46    merged_store_group objects.
47 
48    For example, given the stores:
49    [p     ] := 0;
50    [p + 1B] := 1;
51    [p + 3B] := 0;
52    [p + 4B] := 1;
53    [p + 5B] := 0;
54    [p + 6B] := 0;
55    This phase would produce two merged_store_group objects, one recording the
56    two bytes stored in the memory region [p : p + 1] and another
57    recording the four bytes stored in the memory region [p + 3 : p + 6].
58 
59    3) The merged_store_group objects produced in phase 2) are processed
60    to generate the sequence of wider stores that set the contiguous memory
61    regions to the sequence of bytes that correspond to it.  This may emit
62    multiple stores per store group to handle contiguous stores that are not
63    of a size that is a power of 2.  For example it can try to emit a 40-bit
64    store as a 32-bit store followed by an 8-bit store.
65    We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
66    SLOW_UNALIGNED_ACCESS rules.
67 
68    Note on endianness and example:
69    Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
70    [p     ] := 0x1234;
71    [p + 2B] := 0x5678;
72    [p + 4B] := 0xab;
73    [p + 5B] := 0xcd;
74 
75    The memory layout for little-endian (LE) and big-endian (BE) must be:
76   p |LE|BE|
77   ---------
78   0 |34|12|
79   1 |12|34|
80   2 |78|56|
81   3 |56|78|
82   4 |ab|ab|
83   5 |cd|cd|
84 
85   To merge these into a single 48-bit merged value 'val' in phase 2)
86   on little-endian we insert stores to higher (consecutive) bitpositions
87   into the most significant bits of the merged value.
88   The final merged value would be: 0xcdab56781234
89 
90   For big-endian we insert stores to higher bitpositions into the least
91   significant bits of the merged value.
92   The final merged value would be: 0x12345678abcd
93 
94   Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
95   followed by a 16-bit store.  Again, we must consider endianness when
96   breaking down the 48-bit value 'val' computed above.
97   For little endian we emit:
98   [p]      (32-bit) := 0x56781234; // val & 0x0000ffffffff;
99   [p + 4B] (16-bit) := 0xcdab;    // (val & 0xffff00000000) >> 32;
100 
101   Whereas for big-endian we emit:
102   [p]      (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
103   [p + 4B] (16-bit) := 0xabcd;     //  val & 0x00000000ffff;  */
104 
105 #include "config.h"
106 #include "system.h"
107 #include "coretypes.h"
108 #include "backend.h"
109 #include "tree.h"
110 #include "gimple.h"
111 #include "builtins.h"
112 #include "fold-const.h"
113 #include "tree-pass.h"
114 #include "ssa.h"
115 #include "gimple-pretty-print.h"
116 #include "alias.h"
117 #include "fold-const.h"
118 #include "params.h"
119 #include "print-tree.h"
120 #include "tree-hash-traits.h"
121 #include "gimple-iterator.h"
122 #include "gimplify.h"
123 #include "stor-layout.h"
124 #include "timevar.h"
125 #include "tree-cfg.h"
126 #include "tree-eh.h"
127 #include "target.h"
128 #include "gimplify-me.h"
129 #include "selftest.h"
130 
131 /* The maximum size (in bits) of the stores this pass should generate.  */
132 #define MAX_STORE_BITSIZE (BITS_PER_WORD)
133 #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
134 
135 namespace {
136 
137 /* Struct recording the information about a single store of an immediate
138    to memory.  These are created in the first phase and coalesced into
139    merged_store_group objects in the second phase.  */
140 
141 struct store_immediate_info
142 {
143   unsigned HOST_WIDE_INT bitsize;
144   unsigned HOST_WIDE_INT bitpos;
145   gimple *stmt;
146   unsigned int order;
147   store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
148 			gimple *, unsigned int);
149 };
150 
151 store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
152 					    unsigned HOST_WIDE_INT bp,
153 					    gimple *st,
154 					    unsigned int ord)
155   : bitsize (bs), bitpos (bp), stmt (st), order (ord)
156 {
157 }
158 
159 /* Struct representing a group of stores to contiguous memory locations.
160    These are produced by the second phase (coalescing) and consumed in the
161    third phase that outputs the widened stores.  */
162 
163 struct merged_store_group
164 {
165   unsigned HOST_WIDE_INT start;
166   unsigned HOST_WIDE_INT width;
167   /* The size of the allocated memory for val.  */
168   unsigned HOST_WIDE_INT buf_size;
169 
170   unsigned int align;
171   unsigned int first_order;
172   unsigned int last_order;
173 
174   auto_vec<struct store_immediate_info *> stores;
175   /* We record the first and last original statements in the sequence because
176      we'll need their vuse/vdef and replacement position.  It's easier to keep
177      track of them separately as 'stores' is reordered by apply_stores.  */
178   gimple *last_stmt;
179   gimple *first_stmt;
180   unsigned char *val;
181 
182   merged_store_group (store_immediate_info *);
183   ~merged_store_group ();
184   void merge_into (store_immediate_info *);
185   void merge_overlapping (store_immediate_info *);
186   bool apply_stores ();
187 };
188 
189 /* Debug helper.  Dump LEN elements of byte array PTR to FD in hex.  */
190 
191 static void
192 dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
193 {
194   if (!fd)
195     return;
196 
197   for (unsigned int i = 0; i < len; i++)
198     fprintf (fd, "%x ", ptr[i]);
199   fprintf (fd, "\n");
200 }
201 
202 /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
203    bits between adjacent elements.  AMNT should be within
204    [0, BITS_PER_UNIT).
205    Example, AMNT = 2:
206    00011111|11100000 << 2 = 01111111|10000000
207    PTR[1]  | PTR[0]         PTR[1]  | PTR[0].  */
208 
209 static void
210 shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
211 {
212   if (amnt == 0)
213     return;
214 
215   unsigned char carry_over = 0U;
216   unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
217   unsigned char clear_mask = (~0U) << amnt;
218 
219   for (unsigned int i = 0; i < sz; i++)
220     {
221       unsigned prev_carry_over = carry_over;
222       carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
223 
224       ptr[i] <<= amnt;
225       if (i != 0)
226 	{
227 	  ptr[i] &= clear_mask;
228 	  ptr[i] |= prev_carry_over;
229 	}
230     }
231 }
232 
233 /* Like shift_bytes_in_array but for big-endian.
234    Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
235    bits between adjacent elements.  AMNT should be within
236    [0, BITS_PER_UNIT).
237    Example, AMNT = 2:
238    00011111|11100000 >> 2 = 00000111|11111000
239    PTR[0]  | PTR[1]         PTR[0]  | PTR[1].  */
240 
241 static void
242 shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
243 			    unsigned int amnt)
244 {
245   if (amnt == 0)
246     return;
247 
248   unsigned char carry_over = 0U;
249   unsigned char carry_mask = ~(~0U << amnt);
250 
251   for (unsigned int i = 0; i < sz; i++)
252     {
253       unsigned prev_carry_over = carry_over;
254       carry_over = ptr[i] & carry_mask;
255 
256       carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
257       ptr[i] >>= amnt;
258       ptr[i] |= prev_carry_over;
259     }
260 }
261 
262 /* Clear out LEN bits starting from bit START in the byte array
263    PTR.  This clears the bits to the *right* from START.
264    START must be within [0, BITS_PER_UNIT) and counts starting from
265    the least significant bit.  */
266 
267 static void
268 clear_bit_region_be (unsigned char *ptr, unsigned int start,
269 		     unsigned int len)
270 {
271   if (len == 0)
272     return;
273   /* Clear len bits to the right of start.  */
274   else if (len <= start + 1)
275     {
276       unsigned char mask = (~(~0U << len));
277       mask = mask << (start + 1U - len);
278       ptr[0] &= ~mask;
279     }
280   else if (start != BITS_PER_UNIT - 1)
281     {
282       clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
283       clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
284 			   len - (start % BITS_PER_UNIT) - 1);
285     }
286   else if (start == BITS_PER_UNIT - 1
287 	   && len > BITS_PER_UNIT)
288     {
289       unsigned int nbytes = len / BITS_PER_UNIT;
290       for (unsigned int i = 0; i < nbytes; i++)
291 	ptr[i] = 0U;
292       if (len % BITS_PER_UNIT != 0)
293 	clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
294 			     len % BITS_PER_UNIT);
295     }
296   else
297     gcc_unreachable ();
298 }
299 
300 /* In the byte array PTR clear the bit region starting at bit
301    START and is LEN bits wide.
302    For regions spanning multiple bytes do this recursively until we reach
303    zero LEN or a region contained within a single byte.  */
304 
305 static void
306 clear_bit_region (unsigned char *ptr, unsigned int start,
307 		  unsigned int len)
308 {
309   /* Degenerate base case.  */
310   if (len == 0)
311     return;
312   else if (start >= BITS_PER_UNIT)
313     clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
314   /* Second base case.  */
315   else if ((start + len) <= BITS_PER_UNIT)
316     {
317       unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
318       mask >>= BITS_PER_UNIT - (start + len);
319 
320       ptr[0] &= ~mask;
321 
322       return;
323     }
324   /* Clear most significant bits in a byte and proceed with the next byte.  */
325   else if (start != 0)
326     {
327       clear_bit_region (ptr, start, BITS_PER_UNIT - start);
328       clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
329     }
330   /* Whole bytes need to be cleared.  */
331   else if (start == 0 && len > BITS_PER_UNIT)
332     {
333       unsigned int nbytes = len / BITS_PER_UNIT;
334       /* We could recurse on each byte but do the loop here to avoid
335 	 recursing too deep.  */
336       memset (ptr, '\0', nbytes);
337       /* Clear the remaining sub-byte region if there is one.  */
338       if (len % BITS_PER_UNIT != 0)
339 	clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
340     }
341   else
342     gcc_unreachable ();
343 }
344 
345 /* Write BITLEN bits of EXPR to the byte array PTR at
346    bit position BITPOS.  PTR should contain TOTAL_BYTES elements.
347    Return true if the operation succeeded.  */
348 
349 static bool
350 encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
351 		       unsigned int total_bytes)
352 {
353   unsigned int first_byte = bitpos / BITS_PER_UNIT;
354   tree tmp_int = expr;
355   bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
356 			|| (bitpos % BITS_PER_UNIT)
357 			|| mode_for_size (bitlen, MODE_INT, 0) == BLKmode);
358 
359   if (!sub_byte_op_p)
360     return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0)
361 	    != 0);
362 
363   /* LITTLE-ENDIAN
364      We are writing a non byte-sized quantity or at a position that is not
365      at a byte boundary.
366      |--------|--------|--------| ptr + first_byte
367            ^              ^
368            xxx xxxxxxxx xxx< bp>
369            |______EXPR____|
370 
371      First native_encode_expr EXPR into a temporary buffer and shift each
372      byte in the buffer by 'bp' (carrying the bits over as necessary).
373      |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
374                                               <------bitlen---->< bp>
375     Then we clear the destination bits:
376     |---00000|00000000|000-----| ptr + first_byte
377         <-------bitlen--->< bp>
378 
379     Finally we ORR the bytes of the shifted EXPR into the cleared region:
380     |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
381 
382    BIG-ENDIAN
383    We are writing a non byte-sized quantity or at a position that is not
384    at a byte boundary.
385      ptr + first_byte |--------|--------|--------|
386                             ^              ^
387                        <bp >xxx xxxxxxxx xxx
388                             |_____EXPR_____|
389 
390      First native_encode_expr EXPR into a temporary buffer and shift each
391      byte in the buffer to the right by (carrying the bits over as necessary).
392      We shift by as much as needed to align the most significant bit of EXPR
393      with bitpos:
394      |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
395         <---bitlen---->          <bp ><-----bitlen----->
396     Then we clear the destination bits:
397     ptr + first_byte |-----000||00000000||00000---|
398                       <bp ><-------bitlen----->
399 
400     Finally we ORR the bytes of the shifted EXPR into the cleared region:
401     ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
402     The awkwardness comes from the fact that bitpos is counted from the
403     most significant bit of a byte.  */
404 
405   /* Allocate an extra byte so that we have space to shift into.  */
406   unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
407   unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
408   memset (tmpbuf, '\0', byte_size);
409   /* The store detection code should only have allowed constants that are
410      accepted by native_encode_expr.  */
411   if (native_encode_expr (expr, tmpbuf, byte_size - 1, 0) == 0)
412     gcc_unreachable ();
413 
414   /* The native_encode_expr machinery uses TYPE_MODE to determine how many
415      bytes to write.  This means it can write more than
416      ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
417      write 8 bytes for a bitlen of 40).  Skip the bytes that are not within
418      bitlen and zero out the bits that are not relevant as well (that may
419      contain a sign bit due to sign-extension).  */
420   unsigned int padding
421     = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
422   /* On big-endian the padding is at the 'front' so just skip the initial
423      bytes.  */
424   if (BYTES_BIG_ENDIAN)
425     tmpbuf += padding;
426 
427   byte_size -= padding;
428 
429   if (bitlen % BITS_PER_UNIT != 0)
430     {
431       if (BYTES_BIG_ENDIAN)
432 	clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
433 			     BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
434       else
435 	clear_bit_region (tmpbuf, bitlen,
436 			  byte_size * BITS_PER_UNIT - bitlen);
437     }
438   /* Left shifting relies on the last byte being clear if bitlen is
439      a multiple of BITS_PER_UNIT, which might not be clear if
440      there are padding bytes.  */
441   else if (!BYTES_BIG_ENDIAN)
442     tmpbuf[byte_size - 1] = '\0';
443 
444   /* Clear the bit region in PTR where the bits from TMPBUF will be
445      inserted into.  */
446   if (BYTES_BIG_ENDIAN)
447     clear_bit_region_be (ptr + first_byte,
448 			 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
449   else
450     clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
451 
452   int shift_amnt;
453   int bitlen_mod = bitlen % BITS_PER_UNIT;
454   int bitpos_mod = bitpos % BITS_PER_UNIT;
455 
456   bool skip_byte = false;
457   if (BYTES_BIG_ENDIAN)
458     {
459       /* BITPOS and BITLEN are exactly aligned and no shifting
460 	 is necessary.  */
461       if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
462 	  || (bitpos_mod == 0 && bitlen_mod == 0))
463 	shift_amnt = 0;
464       /* |. . . . . . . .|
465 	  <bp >   <blen >.
466 	 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
467 	 of the value until it aligns with 'bp' in the next byte over.  */
468       else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
469 	{
470 	  shift_amnt = bitlen_mod + bitpos_mod;
471 	  skip_byte = bitlen_mod != 0;
472 	}
473       /* |. . . . . . . .|
474 	  <----bp--->
475 	    <---blen---->.
476 	 Shift the value right within the same byte so it aligns with 'bp'.  */
477       else
478 	shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
479     }
480   else
481     shift_amnt = bitpos % BITS_PER_UNIT;
482 
483   /* Create the shifted version of EXPR.  */
484   if (!BYTES_BIG_ENDIAN)
485     {
486       shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
487       if (shift_amnt == 0)
488 	byte_size--;
489     }
490   else
491     {
492       gcc_assert (BYTES_BIG_ENDIAN);
493       shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
494       /* If shifting right forced us to move into the next byte skip the now
495 	 empty byte.  */
496       if (skip_byte)
497 	{
498 	  tmpbuf++;
499 	  byte_size--;
500 	}
501     }
502 
503   /* Insert the bits from TMPBUF.  */
504   for (unsigned int i = 0; i < byte_size; i++)
505     ptr[first_byte + i] |= tmpbuf[i];
506 
507   return true;
508 }
509 
510 /* Sorting function for store_immediate_info objects.
511    Sorts them by bitposition.  */
512 
513 static int
514 sort_by_bitpos (const void *x, const void *y)
515 {
516   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
517   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
518 
519   if ((*tmp)->bitpos <= (*tmp2)->bitpos)
520     return -1;
521   else if ((*tmp)->bitpos > (*tmp2)->bitpos)
522     return 1;
523 
524   gcc_unreachable ();
525 }
526 
527 /* Sorting function for store_immediate_info objects.
528    Sorts them by the order field.  */
529 
530 static int
531 sort_by_order (const void *x, const void *y)
532 {
533   store_immediate_info *const *tmp = (store_immediate_info * const *) x;
534   store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
535 
536   if ((*tmp)->order < (*tmp2)->order)
537     return -1;
538   else if ((*tmp)->order > (*tmp2)->order)
539     return 1;
540 
541   gcc_unreachable ();
542 }
543 
544 /* Initialize a merged_store_group object from a store_immediate_info
545    object.  */
546 
547 merged_store_group::merged_store_group (store_immediate_info *info)
548 {
549   start = info->bitpos;
550   width = info->bitsize;
551   /* VAL has memory allocated for it in apply_stores once the group
552      width has been finalized.  */
553   val = NULL;
554   align = get_object_alignment (gimple_assign_lhs (info->stmt));
555   stores.create (1);
556   stores.safe_push (info);
557   last_stmt = info->stmt;
558   last_order = info->order;
559   first_stmt = last_stmt;
560   first_order = last_order;
561   buf_size = 0;
562 }
563 
564 merged_store_group::~merged_store_group ()
565 {
566   if (val)
567     XDELETEVEC (val);
568 }
569 
570 /* Merge a store recorded by INFO into this merged store.
571    The store is not overlapping with the existing recorded
572    stores.  */
573 
574 void
575 merged_store_group::merge_into (store_immediate_info *info)
576 {
577   unsigned HOST_WIDE_INT wid = info->bitsize;
578   /* Make sure we're inserting in the position we think we're inserting.  */
579   gcc_assert (info->bitpos == start + width);
580 
581   width += wid;
582   gimple *stmt = info->stmt;
583   stores.safe_push (info);
584   if (info->order > last_order)
585     {
586       last_order = info->order;
587       last_stmt = stmt;
588     }
589   else if (info->order < first_order)
590     {
591       first_order = info->order;
592       first_stmt = stmt;
593     }
594 }
595 
596 /* Merge a store described by INFO into this merged store.
597    INFO overlaps in some way with the current store (i.e. it's not contiguous
598    which is handled by merged_store_group::merge_into).  */
599 
600 void
601 merged_store_group::merge_overlapping (store_immediate_info *info)
602 {
603   gimple *stmt = info->stmt;
604   stores.safe_push (info);
605 
606   /* If the store extends the size of the group, extend the width.  */
607   if ((info->bitpos + info->bitsize) > (start + width))
608     width += info->bitpos + info->bitsize - (start + width);
609 
610   if (info->order > last_order)
611     {
612       last_order = info->order;
613       last_stmt = stmt;
614     }
615   else if (info->order < first_order)
616     {
617       first_order = info->order;
618       first_stmt = stmt;
619     }
620 }
621 
622 /* Go through all the recorded stores in this group in program order and
623    apply their values to the VAL byte array to create the final merged
624    value.  Return true if the operation succeeded.  */
625 
626 bool
627 merged_store_group::apply_stores ()
628 {
629   /* The total width of the stores must add up to a whole number of bytes
630      and start at a byte boundary.  We don't support emitting bitfield
631      references for now.  Also, make sure we have more than one store
632      in the group, otherwise we cannot merge anything.  */
633   if (width % BITS_PER_UNIT != 0
634       || start % BITS_PER_UNIT != 0
635       || stores.length () == 1)
636     return false;
637 
638   stores.qsort (sort_by_order);
639   struct store_immediate_info *info;
640   unsigned int i;
641   /* Create a buffer of a size that is 2 times the number of bytes we're
642      storing.  That way native_encode_expr can write power-of-2-sized
643      chunks without overrunning.  */
644   buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT);
645   val = XCNEWVEC (unsigned char, buf_size);
646 
647   FOR_EACH_VEC_ELT (stores, i, info)
648     {
649       unsigned int pos_in_buffer = info->bitpos - start;
650       bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt),
651 					val, info->bitsize,
652 					pos_in_buffer, buf_size);
653       if (dump_file && (dump_flags & TDF_DETAILS))
654 	{
655 	  if (ret)
656 	    {
657 	      fprintf (dump_file, "After writing ");
658 	      print_generic_expr (dump_file,
659 				  gimple_assign_rhs1 (info->stmt), 0);
660 	      fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
661 			" at position %d the merged region contains:\n",
662 			info->bitsize, pos_in_buffer);
663 	      dump_char_array (dump_file, val, buf_size);
664 	    }
665 	  else
666 	    fprintf (dump_file, "Failed to merge stores\n");
667         }
668       if (!ret)
669 	return false;
670     }
671   return true;
672 }
673 
674 /* Structure describing the store chain.  */
675 
676 struct imm_store_chain_info
677 {
678   /* Doubly-linked list that imposes an order on chain processing.
679      PNXP (prev's next pointer) points to the head of a list, or to
680      the next field in the previous chain in the list.
681      See pass_store_merging::m_stores_head for more rationale.  */
682   imm_store_chain_info *next, **pnxp;
683   tree base_addr;
684   auto_vec<struct store_immediate_info *> m_store_info;
685   auto_vec<merged_store_group *> m_merged_store_groups;
686 
687   imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
688   : next (inspt), pnxp (&inspt), base_addr (b_a)
689   {
690     inspt = this;
691     if (next)
692       {
693 	gcc_checking_assert (pnxp == next->pnxp);
694 	next->pnxp = &next;
695       }
696   }
697   ~imm_store_chain_info ()
698   {
699     *pnxp = next;
700     if (next)
701       {
702 	gcc_checking_assert (&next == next->pnxp);
703 	next->pnxp = pnxp;
704       }
705   }
706   bool terminate_and_process_chain ();
707   bool coalesce_immediate_stores ();
708   bool output_merged_store (merged_store_group *);
709   bool output_merged_stores ();
710 };
711 
712 const pass_data pass_data_tree_store_merging = {
713   GIMPLE_PASS,     /* type */
714   "store-merging", /* name */
715   OPTGROUP_NONE,   /* optinfo_flags */
716   TV_GIMPLE_STORE_MERGING,	 /* tv_id */
717   PROP_ssa,	/* properties_required */
718   0,		   /* properties_provided */
719   0,		   /* properties_destroyed */
720   0,		   /* todo_flags_start */
721   TODO_update_ssa, /* todo_flags_finish */
722 };
723 
724 class pass_store_merging : public gimple_opt_pass
725 {
726 public:
727   pass_store_merging (gcc::context *ctxt)
728     : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
729   {
730   }
731 
732   /* Pass not supported for PDP-endianness.  */
733   virtual bool
734   gate (function *)
735   {
736     return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN);
737   }
738 
739   virtual unsigned int execute (function *);
740 
741 private:
742   hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
743 
744   /* Form a doubly-linked stack of the elements of m_stores, so that
745      we can iterate over them in a predictable way.  Using this order
746      avoids extraneous differences in the compiler output just because
747      of tree pointer variations (e.g. different chains end up in
748      different positions of m_stores, so they are handled in different
749      orders, so they allocate or release SSA names in different
750      orders, and when they get reused, subsequent passes end up
751      getting different SSA names, which may ultimately change
752      decisions when going out of SSA).  */
753   imm_store_chain_info *m_stores_head;
754 
755   bool terminate_and_process_all_chains ();
756   bool terminate_all_aliasing_chains (imm_store_chain_info **,
757 				      bool, gimple *);
758   bool terminate_and_release_chain (imm_store_chain_info *);
759 }; // class pass_store_merging
760 
761 /* Terminate and process all recorded chains.  Return true if any changes
762    were made.  */
763 
764 bool
765 pass_store_merging::terminate_and_process_all_chains ()
766 {
767   bool ret = false;
768   while (m_stores_head)
769     ret |= terminate_and_release_chain (m_stores_head);
770   gcc_assert (m_stores.elements () == 0);
771   gcc_assert (m_stores_head == NULL);
772 
773   return ret;
774 }
775 
776 /* Terminate all chains that are affected by the assignment to DEST, appearing
777    in statement STMT and ultimately points to the object BASE.  Return true if
778    at least one aliasing chain was terminated.  BASE and DEST are allowed to
779    be NULL_TREE.  In that case the aliasing checks are performed on the whole
780    statement rather than a particular operand in it.  VAR_OFFSET_P signifies
781    whether STMT represents a store to BASE offset by a variable amount.
782    If that is the case we have to terminate any chain anchored at BASE.  */
783 
784 bool
785 pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
786 						     **chain_info,
787 						   bool var_offset_p,
788 						   gimple *stmt)
789 {
790   bool ret = false;
791 
792   /* If the statement doesn't touch memory it can't alias.  */
793   if (!gimple_vuse (stmt))
794     return false;
795 
796   /* Check if the assignment destination (BASE) is part of a store chain.
797      This is to catch non-constant stores to destinations that may be part
798      of a chain.  */
799   if (chain_info)
800     {
801       /* We have a chain at BASE and we're writing to [BASE + <variable>].
802 	 This can interfere with any of the stores so terminate
803 	 the chain.  */
804       if (var_offset_p)
805 	{
806 	  terminate_and_release_chain (*chain_info);
807 	  ret = true;
808 	}
809       /* Otherwise go through every store in the chain to see if it
810 	 aliases with any of them.  */
811       else
812 	{
813 	  struct store_immediate_info *info;
814 	  unsigned int i;
815 	  tree store_lhs
816 	    = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
817 	  FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
818 	    {
819 	      tree lhs = gimple_assign_lhs (info->stmt);
820 	      if (ref_maybe_used_by_stmt_p (stmt, lhs)
821 		  || stmt_may_clobber_ref_p (stmt, lhs)
822 		  || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
823 		{
824 		  if (dump_file && (dump_flags & TDF_DETAILS))
825 		    {
826 		      fprintf (dump_file,
827 			       "stmt causes chain termination:\n");
828 		      print_gimple_stmt (dump_file, stmt, 0, 0);
829 		    }
830 		  terminate_and_release_chain (*chain_info);
831 		  ret = true;
832 		  break;
833 		}
834 	    }
835 	}
836     }
837 
838   /* Check for aliasing with all other store chains.  */
839   for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
840     {
841       next = cur->next;
842 
843       /* We already checked all the stores in chain_info and terminated the
844 	 chain if necessary.  Skip it here.  */
845       if (chain_info && (*chain_info) == cur)
846 	continue;
847 
848       /* We can't use the base object here as that does not reliably exist.
849 	 Build a ao_ref from the base object address (if we know the
850 	 minimum and maximum offset and the maximum size we could improve
851 	 things here).  */
852       ao_ref chain_ref;
853       ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
854       if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
855 	  || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
856 	{
857 	  terminate_and_release_chain (cur);
858 	  ret = true;
859 	}
860     }
861 
862   return ret;
863 }
864 
865 /* Helper function.  Terminate the recorded chain storing to base object
866    BASE.  Return true if the merging and output was successful.  The m_stores
867    entry is removed after the processing in any case.  */
868 
869 bool
870 pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
871 {
872   bool ret = chain_info->terminate_and_process_chain ();
873   m_stores.remove (chain_info->base_addr);
874   delete chain_info;
875   return ret;
876 }
877 
878 /* Go through the candidate stores recorded in m_store_info and merge them
879    into merged_store_group objects recorded into m_merged_store_groups
880    representing the widened stores.  Return true if coalescing was successful
881    and the number of widened stores is fewer than the original number
882    of stores.  */
883 
884 bool
885 imm_store_chain_info::coalesce_immediate_stores ()
886 {
887   /* Anything less can't be processed.  */
888   if (m_store_info.length () < 2)
889     return false;
890 
891   if (dump_file && (dump_flags & TDF_DETAILS))
892     fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
893 	     m_store_info.length ());
894 
895   store_immediate_info *info;
896   unsigned int i;
897 
898   /* Order the stores by the bitposition they write to.  */
899   m_store_info.qsort (sort_by_bitpos);
900 
901   info = m_store_info[0];
902   merged_store_group *merged_store = new merged_store_group (info);
903 
904   FOR_EACH_VEC_ELT (m_store_info, i, info)
905     {
906       if (dump_file && (dump_flags & TDF_DETAILS))
907 	{
908 	  fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
909 			      " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
910 		   i, info->bitsize, info->bitpos);
911 	  print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt), 0);
912 	  fprintf (dump_file, "\n------------\n");
913 	}
914 
915       if (i == 0)
916 	continue;
917 
918       /* |---store 1---|
919 	       |---store 2---|
920        Overlapping stores.  */
921       unsigned HOST_WIDE_INT start = info->bitpos;
922       if (IN_RANGE (start, merged_store->start,
923 		    merged_store->start + merged_store->width - 1))
924 	{
925 	  merged_store->merge_overlapping (info);
926 	  continue;
927 	}
928 
929       /* |---store 1---| <gap> |---store 2---|.
930 	 Gap between stores.  Start a new group.  */
931       if (start != merged_store->start + merged_store->width)
932 	{
933 	  /* Try to apply all the stores recorded for the group to determine
934 	     the bitpattern they write and discard it if that fails.
935 	     This will also reject single-store groups.  */
936 	  if (!merged_store->apply_stores ())
937 	    delete merged_store;
938 	  else
939 	    m_merged_store_groups.safe_push (merged_store);
940 
941 	  merged_store = new merged_store_group (info);
942 
943 	  continue;
944 	}
945 
946       /* |---store 1---||---store 2---|
947 	 This store is consecutive to the previous one.
948 	 Merge it into the current store group.  */
949        merged_store->merge_into (info);
950     }
951 
952     /* Record or discard the last store group.  */
953     if (!merged_store->apply_stores ())
954       delete merged_store;
955     else
956       m_merged_store_groups.safe_push (merged_store);
957 
958   gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
959   bool success
960     = !m_merged_store_groups.is_empty ()
961       && m_merged_store_groups.length () < m_store_info.length ();
962 
963   if (success && dump_file)
964     fprintf (dump_file, "Coalescing successful!\n"
965 			 "Merged into %u stores\n",
966 		m_merged_store_groups.length ());
967 
968   return success;
969 }
970 
971 /* Return the type to use for the merged stores described by STMTS.
972    This is needed to get the alias sets right.  */
973 
974 static tree
975 get_alias_type_for_stmts (auto_vec<gimple *> &stmts)
976 {
977   gimple *stmt;
978   unsigned int i;
979   tree lhs = gimple_assign_lhs (stmts[0]);
980   tree type = reference_alias_ptr_type (lhs);
981 
982   FOR_EACH_VEC_ELT (stmts, i, stmt)
983     {
984       if (i == 0)
985 	continue;
986 
987       lhs = gimple_assign_lhs (stmt);
988       tree type1 = reference_alias_ptr_type (lhs);
989       if (!alias_ptr_types_compatible_p (type, type1))
990 	return ptr_type_node;
991     }
992   return type;
993 }
994 
995 /* Return the location_t information we can find among the statements
996    in STMTS.  */
997 
998 static location_t
999 get_location_for_stmts (auto_vec<gimple *> &stmts)
1000 {
1001   gimple *stmt;
1002   unsigned int i;
1003 
1004   FOR_EACH_VEC_ELT (stmts, i, stmt)
1005     if (gimple_has_location (stmt))
1006       return gimple_location (stmt);
1007 
1008   return UNKNOWN_LOCATION;
1009 }
1010 
1011 /* Used to decribe a store resulting from splitting a wide store in smaller
1012    regularly-sized stores in split_group.  */
1013 
1014 struct split_store
1015 {
1016   unsigned HOST_WIDE_INT bytepos;
1017   unsigned HOST_WIDE_INT size;
1018   unsigned HOST_WIDE_INT align;
1019   auto_vec<gimple *> orig_stmts;
1020   split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1021 	       unsigned HOST_WIDE_INT);
1022 };
1023 
1024 /* Simple constructor.  */
1025 
1026 split_store::split_store (unsigned HOST_WIDE_INT bp,
1027 			  unsigned HOST_WIDE_INT sz,
1028 			  unsigned HOST_WIDE_INT al)
1029 			  : bytepos (bp), size (sz), align (al)
1030 {
1031   orig_stmts.create (0);
1032 }
1033 
1034 /* Record all statements corresponding to stores in GROUP that write to
1035    the region starting at BITPOS and is of size BITSIZE.  Record such
1036    statements in STMTS.  The stores in GROUP must be sorted by
1037    bitposition.  */
1038 
1039 static void
1040 find_constituent_stmts (struct merged_store_group *group,
1041 			 auto_vec<gimple *> &stmts,
1042 			 unsigned HOST_WIDE_INT bitpos,
1043 			 unsigned HOST_WIDE_INT bitsize)
1044 {
1045   struct store_immediate_info *info;
1046   unsigned int i;
1047   unsigned HOST_WIDE_INT end = bitpos + bitsize;
1048   FOR_EACH_VEC_ELT (group->stores, i, info)
1049     {
1050       unsigned HOST_WIDE_INT stmt_start = info->bitpos;
1051       unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
1052       if (stmt_end < bitpos)
1053 	continue;
1054       /* The stores in GROUP are ordered by bitposition so if we're past
1055 	  the region for this group return early.  */
1056       if (stmt_start > end)
1057 	return;
1058 
1059       if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize)
1060 	  || IN_RANGE (stmt_end, bitpos, end)
1061 	  /* The statement writes a region that completely encloses the region
1062 	     that this group writes.  Unlikely to occur but let's
1063 	     handle it.  */
1064 	  || IN_RANGE (bitpos, stmt_start, stmt_end))
1065 	stmts.safe_push (info->stmt);
1066     }
1067 }
1068 
1069 /* Split a merged store described by GROUP by populating the SPLIT_STORES
1070    vector with split_store structs describing the byte offset (from the base),
1071    the bit size and alignment of each store as well as the original statements
1072    involved in each such split group.
1073    This is to separate the splitting strategy from the statement
1074    building/emission/linking done in output_merged_store.
1075    At the moment just start with the widest possible size and keep emitting
1076    the widest we can until we have emitted all the bytes, halving the size
1077    when appropriate.  */
1078 
1079 static bool
1080 split_group (merged_store_group *group,
1081 	     auto_vec<struct split_store *> &split_stores)
1082 {
1083   unsigned HOST_WIDE_INT pos = group->start;
1084   unsigned HOST_WIDE_INT size = group->width;
1085   unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
1086   unsigned HOST_WIDE_INT align = group->align;
1087 
1088   /* We don't handle partial bitfields for now.  We shouldn't have
1089      reached this far.  */
1090   gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
1091 
1092   bool allow_unaligned
1093     = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
1094 
1095   unsigned int try_size = MAX_STORE_BITSIZE;
1096   while (try_size > size
1097 	 || (!allow_unaligned
1098 	     && try_size > align))
1099     {
1100       try_size /= 2;
1101       if (try_size < BITS_PER_UNIT)
1102 	return false;
1103     }
1104 
1105   unsigned HOST_WIDE_INT try_pos = bytepos;
1106   group->stores.qsort (sort_by_bitpos);
1107 
1108   while (size > 0)
1109     {
1110       struct split_store *store = new split_store (try_pos, try_size, align);
1111       unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
1112       find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size);
1113       split_stores.safe_push (store);
1114 
1115       try_pos += try_size / BITS_PER_UNIT;
1116 
1117       size -= try_size;
1118       align = try_size;
1119       while (size < try_size)
1120 	try_size /= 2;
1121     }
1122   return true;
1123 }
1124 
1125 /* Given a merged store group GROUP output the widened version of it.
1126    The store chain is against the base object BASE.
1127    Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
1128    unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
1129    Make sure that the number of statements output is less than the number of
1130    original statements.  If a better sequence is possible emit it and
1131    return true.  */
1132 
1133 bool
1134 imm_store_chain_info::output_merged_store (merged_store_group *group)
1135 {
1136   unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT;
1137 
1138   unsigned int orig_num_stmts = group->stores.length ();
1139   if (orig_num_stmts < 2)
1140     return false;
1141 
1142   auto_vec<struct split_store *> split_stores;
1143   split_stores.create (0);
1144   if (!split_group (group, split_stores))
1145     return false;
1146 
1147   gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
1148   gimple_seq seq = NULL;
1149   unsigned int num_stmts = 0;
1150   tree last_vdef, new_vuse;
1151   last_vdef = gimple_vdef (group->last_stmt);
1152   new_vuse = gimple_vuse (group->last_stmt);
1153 
1154   gimple *stmt = NULL;
1155   /* The new SSA names created.  Keep track of them so that we can free them
1156      if we decide to not use the new sequence.  */
1157   auto_vec<tree> new_ssa_names;
1158   split_store *split_store;
1159   unsigned int i;
1160   bool fail = false;
1161 
1162   tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
1163 				      is_gimple_mem_ref_addr, NULL_TREE);
1164   FOR_EACH_VEC_ELT (split_stores, i, split_store)
1165     {
1166       unsigned HOST_WIDE_INT try_size = split_store->size;
1167       unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
1168       unsigned HOST_WIDE_INT align = split_store->align;
1169       tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts);
1170       location_t loc = get_location_for_stmts (split_store->orig_stmts);
1171 
1172       tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
1173       int_type = build_aligned_type (int_type, align);
1174       tree dest = fold_build2 (MEM_REF, int_type, addr,
1175 			       build_int_cst (offset_type, try_pos));
1176 
1177       tree src = native_interpret_expr (int_type,
1178 					group->val + try_pos - start_byte_pos,
1179 					group->buf_size);
1180 
1181       stmt = gimple_build_assign (dest, src);
1182       gimple_set_location (stmt, loc);
1183       gimple_set_vuse (stmt, new_vuse);
1184       gimple_seq_add_stmt_without_update (&seq, stmt);
1185 
1186       /* We didn't manage to reduce the number of statements.  Bail out.  */
1187       if (++num_stmts == orig_num_stmts)
1188 	{
1189 	  if (dump_file && (dump_flags & TDF_DETAILS))
1190 	    {
1191 	      fprintf (dump_file, "Exceeded original number of stmts (%u)."
1192 				  "  Not profitable to emit new sequence.\n",
1193 		       orig_num_stmts);
1194 	    }
1195 	  unsigned int ssa_count;
1196 	  tree ssa_name;
1197 	  /* Don't forget to cleanup the temporary SSA names.  */
1198 	  FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name)
1199 	    release_ssa_name (ssa_name);
1200 
1201 	  fail = true;
1202 	  break;
1203 	}
1204 
1205       tree new_vdef;
1206       if (i < split_stores.length () - 1)
1207 	{
1208 	  new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
1209 	  new_ssa_names.safe_push (new_vdef);
1210 	}
1211       else
1212 	new_vdef = last_vdef;
1213 
1214       gimple_set_vdef (stmt, new_vdef);
1215       SSA_NAME_DEF_STMT (new_vdef) = stmt;
1216       new_vuse = new_vdef;
1217     }
1218 
1219   FOR_EACH_VEC_ELT (split_stores, i, split_store)
1220     delete split_store;
1221 
1222   if (fail)
1223     return false;
1224 
1225   gcc_assert (seq);
1226   if (dump_file)
1227     {
1228       fprintf (dump_file,
1229 	       "New sequence of %u stmts to replace old one of %u stmts\n",
1230 	       num_stmts, orig_num_stmts);
1231       if (dump_flags & TDF_DETAILS)
1232 	print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
1233     }
1234   gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
1235 
1236   return true;
1237 }
1238 
1239 /* Process the merged_store_group objects created in the coalescing phase.
1240    The stores are all against the base object BASE.
1241    Try to output the widened stores and delete the original statements if
1242    successful.  Return true iff any changes were made.  */
1243 
1244 bool
1245 imm_store_chain_info::output_merged_stores ()
1246 {
1247   unsigned int i;
1248   merged_store_group *merged_store;
1249   bool ret = false;
1250   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
1251     {
1252       if (output_merged_store (merged_store))
1253 	{
1254 	  unsigned int j;
1255 	  store_immediate_info *store;
1256 	  FOR_EACH_VEC_ELT (merged_store->stores, j, store)
1257 	    {
1258 	      gimple *stmt = store->stmt;
1259 	      gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1260 	      gsi_remove (&gsi, true);
1261 	      if (stmt != merged_store->last_stmt)
1262 		{
1263 		  unlink_stmt_vdef (stmt);
1264 		  release_defs (stmt);
1265 		}
1266 	    }
1267 	  ret = true;
1268 	}
1269     }
1270   if (ret && dump_file)
1271     fprintf (dump_file, "Merging successful!\n");
1272 
1273   return ret;
1274 }
1275 
1276 /* Coalesce the store_immediate_info objects recorded against the base object
1277    BASE in the first phase and output them.
1278    Delete the allocated structures.
1279    Return true if any changes were made.  */
1280 
1281 bool
1282 imm_store_chain_info::terminate_and_process_chain ()
1283 {
1284   /* Process store chain.  */
1285   bool ret = false;
1286   if (m_store_info.length () > 1)
1287     {
1288       ret = coalesce_immediate_stores ();
1289       if (ret)
1290 	ret = output_merged_stores ();
1291     }
1292 
1293   /* Delete all the entries we allocated ourselves.  */
1294   store_immediate_info *info;
1295   unsigned int i;
1296   FOR_EACH_VEC_ELT (m_store_info, i, info)
1297     delete info;
1298 
1299   merged_store_group *merged_info;
1300   FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
1301     delete merged_info;
1302 
1303   return ret;
1304 }
1305 
1306 /* Return true iff LHS is a destination potentially interesting for
1307    store merging.  In practice these are the codes that get_inner_reference
1308    can process.  */
1309 
1310 static bool
1311 lhs_valid_for_store_merging_p (tree lhs)
1312 {
1313   tree_code code = TREE_CODE (lhs);
1314 
1315   if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
1316       || code == COMPONENT_REF || code == BIT_FIELD_REF)
1317     return true;
1318 
1319   return false;
1320 }
1321 
1322 /* Return true if the tree RHS is a constant we want to consider
1323    during store merging.  In practice accept all codes that
1324    native_encode_expr accepts.  */
1325 
1326 static bool
1327 rhs_valid_for_store_merging_p (tree rhs)
1328 {
1329   tree type = TREE_TYPE (rhs);
1330   if (TREE_CODE_CLASS (TREE_CODE (rhs)) != tcc_constant
1331       || !can_native_encode_type_p (type))
1332     return false;
1333 
1334   return true;
1335 }
1336 
1337 /* Entry point for the pass.  Go over each basic block recording chains of
1338   immediate stores.  Upon encountering a terminating statement (as defined
1339   by stmt_terminates_chain_p) process the recorded stores and emit the widened
1340   variants.  */
1341 
1342 unsigned int
1343 pass_store_merging::execute (function *fun)
1344 {
1345   basic_block bb;
1346   hash_set<gimple *> orig_stmts;
1347 
1348   FOR_EACH_BB_FN (bb, fun)
1349     {
1350       gimple_stmt_iterator gsi;
1351       unsigned HOST_WIDE_INT num_statements = 0;
1352       /* Record the original statements so that we can keep track of
1353 	 statements emitted in this pass and not re-process new
1354 	 statements.  */
1355       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1356 	{
1357 	  if (is_gimple_debug (gsi_stmt (gsi)))
1358 	    continue;
1359 
1360 	  if (++num_statements > 2)
1361 	    break;
1362 	}
1363 
1364       if (num_statements < 2)
1365 	continue;
1366 
1367       if (dump_file && (dump_flags & TDF_DETAILS))
1368 	fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
1369 
1370       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1371 	{
1372 	  gimple *stmt = gsi_stmt (gsi);
1373 
1374 	  if (is_gimple_debug (stmt))
1375 	    continue;
1376 
1377 	  if (gimple_has_volatile_ops (stmt))
1378 	    {
1379 	      /* Terminate all chains.  */
1380 	      if (dump_file && (dump_flags & TDF_DETAILS))
1381 		fprintf (dump_file, "Volatile access terminates "
1382 				    "all chains\n");
1383 	      terminate_and_process_all_chains ();
1384 	      continue;
1385 	    }
1386 
1387 	  if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
1388 	      && !stmt_can_throw_internal (stmt)
1389 	      && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
1390 	    {
1391 	      tree lhs = gimple_assign_lhs (stmt);
1392 	      tree rhs = gimple_assign_rhs1 (stmt);
1393 
1394 	      HOST_WIDE_INT bitsize, bitpos;
1395 	      machine_mode mode;
1396 	      int unsignedp = 0, reversep = 0, volatilep = 0;
1397 	      tree offset, base_addr;
1398 	      base_addr
1399 		= get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
1400 				       &unsignedp, &reversep, &volatilep);
1401 	      /* As a future enhancement we could handle stores with the same
1402 		 base and offset.  */
1403 	      bool invalid = reversep
1404 			     || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
1405 				  && (TREE_CODE (rhs) != INTEGER_CST))
1406 			     || !rhs_valid_for_store_merging_p (rhs);
1407 
1408 	      /* We do not want to rewrite TARGET_MEM_REFs.  */
1409 	      if (TREE_CODE (base_addr) == TARGET_MEM_REF)
1410 		invalid = true;
1411 	      /* In some cases get_inner_reference may return a
1412 		 MEM_REF [ptr + byteoffset].  For the purposes of this pass
1413 		 canonicalize the base_addr to MEM_REF [ptr] and take
1414 		 byteoffset into account in the bitpos.  This occurs in
1415 		 PR 23684 and this way we can catch more chains.  */
1416 	      else if (TREE_CODE (base_addr) == MEM_REF)
1417 		{
1418 		  offset_int bit_off, byte_off = mem_ref_offset (base_addr);
1419 		  bit_off = byte_off << LOG2_BITS_PER_UNIT;
1420 		  bit_off += bitpos;
1421 		  if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
1422 		    bitpos = bit_off.to_shwi ();
1423 		  else
1424 		    invalid = true;
1425 		  base_addr = TREE_OPERAND (base_addr, 0);
1426 		}
1427 	      /* get_inner_reference returns the base object, get at its
1428 	         address now.  */
1429 	      else
1430 		{
1431 		  if (bitpos < 0)
1432 		    invalid = true;
1433 		  base_addr = build_fold_addr_expr (base_addr);
1434 		}
1435 
1436 	      if (! invalid
1437 		  && offset != NULL_TREE)
1438 		{
1439 		  /* If the access is variable offset then a base
1440 		     decl has to be address-taken to be able to
1441 		     emit pointer-based stores to it.
1442 		     ???  We might be able to get away with
1443 		     re-using the original base up to the first
1444 		     variable part and then wrapping that inside
1445 		     a BIT_FIELD_REF.  */
1446 		  tree base = get_base_address (base_addr);
1447 		  if (! base
1448 		      || (DECL_P (base)
1449 			  && ! TREE_ADDRESSABLE (base)))
1450 		    invalid = true;
1451 		  else
1452 		    base_addr = build2 (POINTER_PLUS_EXPR,
1453 					TREE_TYPE (base_addr),
1454 					base_addr, offset);
1455 		}
1456 
1457 	      struct imm_store_chain_info **chain_info
1458 		= m_stores.get (base_addr);
1459 
1460 	      if (!invalid)
1461 		{
1462 		  store_immediate_info *info;
1463 		  if (chain_info)
1464 		    {
1465 		      info = new store_immediate_info (
1466 			bitsize, bitpos, stmt,
1467 			(*chain_info)->m_store_info.length ());
1468 		      if (dump_file && (dump_flags & TDF_DETAILS))
1469 			{
1470 			  fprintf (dump_file,
1471 				   "Recording immediate store from stmt:\n");
1472 			  print_gimple_stmt (dump_file, stmt, 0, 0);
1473 			}
1474 		      (*chain_info)->m_store_info.safe_push (info);
1475 		      /* If we reach the limit of stores to merge in a chain
1476 			 terminate and process the chain now.  */
1477 		      if ((*chain_info)->m_store_info.length ()
1478 			   == (unsigned int)
1479 			      PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
1480 			{
1481 			  if (dump_file && (dump_flags & TDF_DETAILS))
1482 			    fprintf (dump_file,
1483 				 "Reached maximum number of statements"
1484 				 " to merge:\n");
1485 			  terminate_and_release_chain (*chain_info);
1486 			}
1487 		      continue;
1488 		    }
1489 
1490 		  /* Store aliases any existing chain?  */
1491 		  terminate_all_aliasing_chains (chain_info, false, stmt);
1492 		  /* Start a new chain.  */
1493 		  struct imm_store_chain_info *new_chain
1494 		    = new imm_store_chain_info (m_stores_head, base_addr);
1495 		  info = new store_immediate_info (bitsize, bitpos,
1496 						   stmt, 0);
1497 		  new_chain->m_store_info.safe_push (info);
1498 		  m_stores.put (base_addr, new_chain);
1499 		  if (dump_file && (dump_flags & TDF_DETAILS))
1500 		    {
1501 		      fprintf (dump_file,
1502 			       "Starting new chain with statement:\n");
1503 		      print_gimple_stmt (dump_file, stmt, 0, 0);
1504 		      fprintf (dump_file, "The base object is:\n");
1505 		      print_generic_expr (dump_file, base_addr, 0);
1506 		      fprintf (dump_file, "\n");
1507 		    }
1508 		}
1509 	      else
1510 		terminate_all_aliasing_chains (chain_info,
1511 					       offset != NULL_TREE, stmt);
1512 
1513 	      continue;
1514 	    }
1515 
1516 	  terminate_all_aliasing_chains (NULL, false, stmt);
1517 	}
1518       terminate_and_process_all_chains ();
1519     }
1520   return 0;
1521 }
1522 
1523 } // anon namespace
1524 
1525 /* Construct and return a store merging pass object.  */
1526 
1527 gimple_opt_pass *
1528 make_pass_store_merging (gcc::context *ctxt)
1529 {
1530   return new pass_store_merging (ctxt);
1531 }
1532 
1533 #if CHECKING_P
1534 
1535 namespace selftest {
1536 
1537 /* Selftests for store merging helpers.  */
1538 
1539 /* Assert that all elements of the byte arrays X and Y, both of length N
1540    are equal.  */
1541 
1542 static void
1543 verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
1544 {
1545   for (unsigned int i = 0; i < n; i++)
1546     {
1547       if (x[i] != y[i])
1548 	{
1549 	  fprintf (stderr, "Arrays do not match.  X:\n");
1550 	  dump_char_array (stderr, x, n);
1551 	  fprintf (stderr, "Y:\n");
1552 	  dump_char_array (stderr, y, n);
1553 	}
1554       ASSERT_EQ (x[i], y[i]);
1555     }
1556 }
1557 
1558 /* Test shift_bytes_in_array and that it carries bits across between
1559    bytes correctly.  */
1560 
1561 static void
1562 verify_shift_bytes_in_array (void)
1563 {
1564    /* byte 1   | byte 0
1565       00011111 | 11100000.  */
1566   unsigned char orig[2] = { 0xe0, 0x1f };
1567   unsigned char in[2];
1568   memcpy (in, orig, sizeof orig);
1569 
1570   unsigned char expected[2] = { 0x80, 0x7f };
1571   shift_bytes_in_array (in, sizeof (in), 2);
1572   verify_array_eq (in, expected, sizeof (in));
1573 
1574   memcpy (in, orig, sizeof orig);
1575   memcpy (expected, orig, sizeof orig);
1576   /* Check that shifting by zero doesn't change anything.  */
1577   shift_bytes_in_array (in, sizeof (in), 0);
1578   verify_array_eq (in, expected, sizeof (in));
1579 
1580 }
1581 
1582 /* Test shift_bytes_in_array_right and that it carries bits across between
1583    bytes correctly.  */
1584 
1585 static void
1586 verify_shift_bytes_in_array_right (void)
1587 {
1588    /* byte 1   | byte 0
1589       00011111 | 11100000.  */
1590   unsigned char orig[2] = { 0x1f, 0xe0};
1591   unsigned char in[2];
1592   memcpy (in, orig, sizeof orig);
1593   unsigned char expected[2] = { 0x07, 0xf8};
1594   shift_bytes_in_array_right (in, sizeof (in), 2);
1595   verify_array_eq (in, expected, sizeof (in));
1596 
1597   memcpy (in, orig, sizeof orig);
1598   memcpy (expected, orig, sizeof orig);
1599   /* Check that shifting by zero doesn't change anything.  */
1600   shift_bytes_in_array_right (in, sizeof (in), 0);
1601   verify_array_eq (in, expected, sizeof (in));
1602 }
1603 
1604 /* Test clear_bit_region that it clears exactly the bits asked and
1605    nothing more.  */
1606 
1607 static void
1608 verify_clear_bit_region (void)
1609 {
1610   /* Start with all bits set and test clearing various patterns in them.  */
1611   unsigned char orig[3] = { 0xff, 0xff, 0xff};
1612   unsigned char in[3];
1613   unsigned char expected[3];
1614   memcpy (in, orig, sizeof in);
1615 
1616   /* Check zeroing out all the bits.  */
1617   clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
1618   expected[0] = expected[1] = expected[2] = 0;
1619   verify_array_eq (in, expected, sizeof in);
1620 
1621   memcpy (in, orig, sizeof in);
1622   /* Leave the first and last bits intact.  */
1623   clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
1624   expected[0] = 0x1;
1625   expected[1] = 0;
1626   expected[2] = 0x80;
1627   verify_array_eq (in, expected, sizeof in);
1628 }
1629 
1630 /* Test verify_clear_bit_region_be that it clears exactly the bits asked and
1631    nothing more.  */
1632 
1633 static void
1634 verify_clear_bit_region_be (void)
1635 {
1636   /* Start with all bits set and test clearing various patterns in them.  */
1637   unsigned char orig[3] = { 0xff, 0xff, 0xff};
1638   unsigned char in[3];
1639   unsigned char expected[3];
1640   memcpy (in, orig, sizeof in);
1641 
1642   /* Check zeroing out all the bits.  */
1643   clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
1644   expected[0] = expected[1] = expected[2] = 0;
1645   verify_array_eq (in, expected, sizeof in);
1646 
1647   memcpy (in, orig, sizeof in);
1648   /* Leave the first and last bits intact.  */
1649   clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
1650   expected[0] = 0x80;
1651   expected[1] = 0;
1652   expected[2] = 0x1;
1653   verify_array_eq (in, expected, sizeof in);
1654 }
1655 
1656 
1657 /* Run all of the selftests within this file.  */
1658 
1659 void
1660 store_merging_c_tests (void)
1661 {
1662   verify_shift_bytes_in_array ();
1663   verify_shift_bytes_in_array_right ();
1664   verify_clear_bit_region ();
1665   verify_clear_bit_region_be ();
1666 }
1667 
1668 } // namespace selftest
1669 #endif /* CHECKING_P.  */
1670